Skip to content

Instantly share code, notes, and snippets.

@pashu123
Created November 24, 2022 15:20
Show Gist options
  • Save pashu123/7f554ba72a542a0a209010b5fa2c6a07 to your computer and use it in GitHub Desktop.
Save pashu123/7f554ba72a542a0a209010b5fa2c6a07 to your computer and use it in GitHub Desktop.
func.func @forward(%arg0: !torch.vtensor<[2,4,96,96],f32>, %arg1: !torch.vtensor<[2],si64>, %arg2: !torch.vtensor<[2,77,1024],f32>) -> !torch.vtensor<[2,4,96,96],f16> {
%int160 = torch.constant.int 160
%0 = torch.vtensor.literal(dense<1.250000e-01> : tensor<f64>) : !torch.vtensor<[],f64>
%1 = torch.vtensor.literal(dense<9.9999999999999995E-7> : tensor<f64>) : !torch.vtensor<[],f64>
%2 = torch.vtensor.literal(dense<1.000000e-05> : tensor<f64>) : !torch.vtensor<[],f64>
%3 = torch.vtensor.literal(dense<160> : tensor<si64>) : !torch.vtensor<[],si64>
%4 = torch.vtensor.literal(dense<-9.2103403719761836> : tensor<f64>) : !torch.vtensor<[],f64>
%5 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x320xf32>) : !torch.vtensor<[1280,320],f32>
%6 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x4x3x3xf32>) : !torch.vtensor<[320,4,3,3],f32>
%7 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x320x3x3xf32>) : !torch.vtensor<[640,320,3,3],f32>
%8 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x320x1x1xf32>) : !torch.vtensor<[640,320,1,1],f32>
%9 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x640x3x3xf32>) : !torch.vtensor<[1280,640,3,3],f32>
%10 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x640x1x1xf32>) : !torch.vtensor<[1280,640,1,1],f32>
%11 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x3x3xf32>) : !torch.vtensor<[1280,2560,3,3],f32>
%12 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x1x1xf32>) : !torch.vtensor<[1280,2560,1,1],f32>
%13 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1920x3x3xf32>) : !torch.vtensor<[1280,1920,3,3],f32>
%14 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1920x1x1xf32>) : !torch.vtensor<[1280,1920,1,1],f32>
%15 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1024xf32>) : !torch.vtensor<[1280,1024],f32>
%16 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240xf32>) : !torch.vtensor<[10240],f32>
%17 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x1280xf32>) : !torch.vtensor<[10240,1280],f32>
%18 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x5120xf32>) : !torch.vtensor<[1280,5120],f32>
%19 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf32>) : !torch.vtensor<[1280,1280],f32>
%20 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf32>) : !torch.vtensor<[1280,1280,3,3],f32>
%21 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1920xf32>) : !torch.vtensor<[1920],f32>
%22 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1920x3x3xf32>) : !torch.vtensor<[640,1920,3,3],f32>
%23 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1920x1x1xf32>) : !torch.vtensor<[640,1920,1,1],f32>
%24 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf32>) : !torch.vtensor<[1280],f32>
%25 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1280x3x3xf32>) : !torch.vtensor<[640,1280,3,3],f32>
%26 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1280x1x1xf32>) : !torch.vtensor<[640,1280,1,1],f32>
%27 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x960x3x3xf32>) : !torch.vtensor<[640,960,3,3],f32>
%28 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1280xf32>) : !torch.vtensor<[640,1280],f32>
%29 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x960x1x1xf32>) : !torch.vtensor<[640,960,1,1],f32>
%30 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1024xf32>) : !torch.vtensor<[640,1024],f32>
%31 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120xf32>) : !torch.vtensor<[5120],f32>
%32 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120x640xf32>) : !torch.vtensor<[5120,640],f32>
%33 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x2560xf32>) : !torch.vtensor<[640,2560],f32>
%34 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf32>) : !torch.vtensor<[640,640],f32>
%35 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x3x3xf32>) : !torch.vtensor<[640,640,3,3],f32>
%36 = torch.vtensor.literal(dense_resource<__elided__> : tensor<960xf32>) : !torch.vtensor<[960],f32>
%37 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x960x3x3xf32>) : !torch.vtensor<[320,960,3,3],f32>
%38 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x960x1x1xf32>) : !torch.vtensor<[320,960,1,1],f32>
%39 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf32>) : !torch.vtensor<[640],f32>
%40 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x640x3x3xf32>) : !torch.vtensor<[320,640,3,3],f32>
%41 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x3x3xf32>) : !torch.vtensor<[320,320,3,3],f32>
%42 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x640x1x1xf32>) : !torch.vtensor<[320,640,1,1],f32>
%43 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1024xf32>) : !torch.vtensor<[320,1024],f32>
%44 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf32>) : !torch.vtensor<[2560],f32>
%45 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560x320xf32>) : !torch.vtensor<[2560,320],f32>
%46 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xf32>) : !torch.vtensor<[320,1280],f32>
%47 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf32>) : !torch.vtensor<[320,320],f32>
%48 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf32>) : !torch.vtensor<[320],f32>
%49 = torch.vtensor.literal(dense<[-0.0333971679, 0.0151019702, -0.0098297568, -0.00283672824]> : tensor<4xf32>) : !torch.vtensor<[4],f32>
%50 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4x320x3x3xf32>) : !torch.vtensor<[4,320,3,3],f32>
%int0 = torch.constant.int 0
%int6 = torch.constant.int 6
%none = torch.constant.none
%false = torch.constant.bool false
%int9223372036854775807 = torch.constant.int 9223372036854775807
%int1 = torch.constant.int 1
%int-1 = torch.constant.int -1
%int5 = torch.constant.int 5
%true = torch.constant.bool true
%int2 = torch.constant.int 2
%int32 = torch.constant.int 32
%int10 = torch.constant.int 10
%int9216 = torch.constant.int 9216
%int2949120 = torch.constant.int 2949120
%int92160 = torch.constant.int 92160
%int3 = torch.constant.int 3
%int320 = torch.constant.int 320
%int96 = torch.constant.int 96
%int18432 = torch.constant.int 18432
%float1.000000e-05 = torch.constant.float 1.000000e-05
%int64 = torch.constant.int 64
%int589824 = torch.constant.int 589824
%int84934656 = torch.constant.int 84934656
%int154 = torch.constant.int 154
%int1024 = torch.constant.int 1024
%int77 = torch.constant.int 77
%int24640 = torch.constant.int 24640
%int4928 = torch.constant.int 4928
%int709632 = torch.constant.int 709632
%int2560 = torch.constant.int 2560
%int1280 = torch.constant.int 1280
%str = torch.constant.str "none"
%int30720 = torch.constant.int 30720
%int2304 = torch.constant.int 2304
%int737280 = torch.constant.int 737280
%int23040 = torch.constant.int 23040
%int48 = torch.constant.int 48
%int20 = torch.constant.int 20
%int1474560 = torch.constant.int 1474560
%int46080 = torch.constant.int 46080
%int640 = torch.constant.int 640
%int4608 = torch.constant.int 4608
%int147456 = torch.constant.int 147456
%int5308416 = torch.constant.int 5308416
%int49280 = torch.constant.int 49280
%int177408 = torch.constant.int 177408
%int5120 = torch.constant.int 5120
%int576 = torch.constant.int 576
%int368640 = torch.constant.int 368640
%int11520 = torch.constant.int 11520
%int24 = torch.constant.int 24
%int40 = torch.constant.int 40
%int1152 = torch.constant.int 1152
%int36864 = torch.constant.int 36864
%int331776 = torch.constant.int 331776
%int98560 = torch.constant.int 98560
%int44352 = torch.constant.int 44352
%int10240 = torch.constant.int 10240
%int144 = torch.constant.int 144
%int184320 = torch.constant.int 184320
%int5760 = torch.constant.int 5760
%int12 = torch.constant.int 12
%int288 = torch.constant.int 288
%int20736 = torch.constant.int 20736
%int11088 = torch.constant.int 11088
%int15360 = torch.constant.int 15360
%int80 = torch.constant.int 80
%float2.000000e00 = torch.constant.float 2.000000e+00
%int60 = torch.constant.int 60
%int1105920 = torch.constant.int 1105920
%int34560 = torch.constant.int 34560
%int1920 = torch.constant.int 1920
%int4423680 = torch.constant.int 4423680
%int138240 = torch.constant.int 138240
%int30 = torch.constant.int 30
%int2211840 = torch.constant.int 2211840
%int69120 = torch.constant.int 69120
%int960 = torch.constant.int 960
%int8847360 = torch.constant.int 8847360
%int276480 = torch.constant.int 276480
%int5898240 = torch.constant.int 5898240
%cpu = torch.constant.device "cpu"
%51 = torch.aten.arange.start %int0, %int160, %int6, %none, %cpu, %false : !torch.int, !torch.int, !torch.int, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[160],f32>
%52 = torch.aten.mul.Tensor %51, %4 : !torch.vtensor<[160],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[160],f32>
%53 = torch.aten.div.Tensor %52, %3 : !torch.vtensor<[160],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[160],f32>
%54 = torch.aten.exp %53 : !torch.vtensor<[160],f32> -> !torch.vtensor<[160],f32>
%cuda3A0 = torch.constant.device "cuda:0"
%55 = torch.aten._to_copy %54, %int6, %int0, %cuda3A0, %none, %false, %none : !torch.vtensor<[160],f32>, !torch.int, !torch.int, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[160],f32>
%56 = torch.aten.slice.Tensor %arg1, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2],si64>
%57 = torch.aten.unsqueeze %56, %int1 : !torch.vtensor<[2],si64>, !torch.int -> !torch.vtensor<[2,1],si64>
%58 = torch.aten._to_copy %57, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,1],si64>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1],f32>
%59 = torch.aten.unsqueeze %55, %int0 : !torch.vtensor<[160],f32>, !torch.int -> !torch.vtensor<[1,160],f32>
%60 = torch.aten.mul.Tensor %58, %59 : !torch.vtensor<[2,1],f32>, !torch.vtensor<[1,160],f32> -> !torch.vtensor<[2,160],f32>
%61 = torch.aten.cos %60 : !torch.vtensor<[2,160],f32> -> !torch.vtensor<[2,160],f32>
%62 = torch.aten.sin %60 : !torch.vtensor<[2,160],f32> -> !torch.vtensor<[2,160],f32>
%63 = torch.prim.ListConstruct %61, %62 : (!torch.vtensor<[2,160],f32>, !torch.vtensor<[2,160],f32>) -> !torch.list<vtensor>
%64 = torch.aten.cat %63, %int-1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,320],f32>
%65 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%66 = torch.aten._to_copy %5, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,320],f16>
%67 = torch.aten._to_copy %64, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320],f16>
%68 = torch.aten.t %66 : !torch.vtensor<[1280,320],f16> -> !torch.vtensor<[320,1280],f16>
%69 = torch.aten.addmm %65, %67, %68, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,320],f16>, !torch.vtensor<[320,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%70 = torch.aten.silu %69 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%71 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%72 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%73 = torch.aten.t %72 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%74 = torch.aten.addmm %71, %70, %73, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%75 = torch.aten._to_copy %arg0, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,4,96,96],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,4,96,96],f16>
%76 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
%77 = torch.aten._to_copy %6, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,4,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,4,3,3],f16>
%78 = torch.prim.ListConstruct %int1, %int1 : (!torch.int, !torch.int) -> !torch.list<int>
%79 = torch.prim.ListConstruct %int0, %int0 : (!torch.int, !torch.int) -> !torch.list<int>
%80 = torch.aten._convolution %75, %77, %76, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,4,96,96],f16>, !torch.vtensor<[320,4,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,96,96],f16>
%81 = torch.aten._to_copy %80, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,320,96,96],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,96,96],f32>
%82 = torch.prim.ListConstruct %int2, %int32, %int10, %int9216 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%83 = torch.prim.ListConstruct %int2949120, %int92160, %int9216, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%84 = torch.aten._reshape_alias %81, %82, %83 : !torch.vtensor<[2,320,96,96],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,10,9216],f32>
%85 = torch.prim.ListConstruct %int2, %int3 : (!torch.int, !torch.int) -> !torch.list<int>
%result0, %result1 = torch.aten.var_mean.correction %84, %85, %int0, %true : !torch.vtensor<[2,32,10,9216],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%86 = torch.aten.add.Tensor %result0, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%87 = torch.aten.rsqrt %86 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%88 = torch.aten.sub.Tensor %84, %result1, %int1 : !torch.vtensor<[2,32,10,9216],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,9216],f32>
%89 = torch.aten.mul.Tensor %88, %87 : !torch.vtensor<[2,32,10,9216],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,9216],f32>
%90 = torch.prim.ListConstruct %int2, %int320, %int96, %int96 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%91 = torch.aten.view %89, %90 : !torch.vtensor<[2,32,10,9216],f32>, !torch.list<int> -> !torch.vtensor<[2,320,96,96],f32>
%92 = torch.aten.unsqueeze %48, %int0 : !torch.vtensor<[320],f32>, !torch.int -> !torch.vtensor<[1,320],f32>
%93 = torch.aten.unsqueeze %92, %int2 : !torch.vtensor<[1,320],f32>, !torch.int -> !torch.vtensor<[1,320,1],f32>
%94 = torch.aten.unsqueeze %93, %int3 : !torch.vtensor<[1,320,1],f32>, !torch.int -> !torch.vtensor<[1,320,1,1],f32>
%95 = torch.aten.unsqueeze %48, %int0 : !torch.vtensor<[320],f32>, !torch.int -> !torch.vtensor<[1,320],f32>
%96 = torch.aten.unsqueeze %95, %int2 : !torch.vtensor<[1,320],f32>, !torch.int -> !torch.vtensor<[1,320,1],f32>
%97 = torch.aten.unsqueeze %96, %int3 : !torch.vtensor<[1,320,1],f32>, !torch.int -> !torch.vtensor<[1,320,1,1],f32>
%98 = torch.aten.mul.Tensor %91, %97 : !torch.vtensor<[2,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f32> -> !torch.vtensor<[2,320,96,96],f32>
%99 = torch.aten.add.Tensor %98, %94, %int1 : !torch.vtensor<[2,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f32>, !torch.int -> !torch.vtensor<[2,320,96,96],f32>
%100 = torch.aten._to_copy %99, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,320,96,96],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,96,96],f16>
%101 = torch.aten.silu %100 : !torch.vtensor<[2,320,96,96],f16> -> !torch.vtensor<[2,320,96,96],f16>
%102 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
%103 = torch.aten._to_copy %41, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320,3,3],f16>
%104 = torch.aten._convolution %101, %103, %102, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,96,96],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,96,96],f16>
%105 = torch.aten.silu %74 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%106 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
%107 = torch.aten._to_copy %46, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,1280],f16>
%108 = torch.aten.t %107 : !torch.vtensor<[320,1280],f16> -> !torch.vtensor<[1280,320],f16>
%109 = torch.aten.addmm %106, %105, %108, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
%110 = torch.aten.unsqueeze %109, %int2 : !torch.vtensor<[2,320],f16>, !torch.int -> !torch.vtensor<[2,320,1],f16>
%111 = torch.aten.unsqueeze %110, %int3 : !torch.vtensor<[2,320,1],f16>, !torch.int -> !torch.vtensor<[2,320,1,1],f16>
%112 = torch.aten.add.Tensor %104, %111, %int1 : !torch.vtensor<[2,320,96,96],f16>, !torch.vtensor<[2,320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,96,96],f16>
%113 = torch.aten._to_copy %112, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,320,96,96],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,96,96],f32>
%114 = torch.aten._reshape_alias %113, %82, %83 : !torch.vtensor<[2,320,96,96],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,10,9216],f32>
%result0_0, %result1_1 = torch.aten.var_mean.correction %114, %85, %int0, %true : !torch.vtensor<[2,32,10,9216],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%115 = torch.aten.add.Tensor %result0_0, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%116 = torch.aten.rsqrt %115 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%117 = torch.aten.sub.Tensor %114, %result1_1, %int1 : !torch.vtensor<[2,32,10,9216],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,9216],f32>
%118 = torch.aten.mul.Tensor %117, %116 : !torch.vtensor<[2,32,10,9216],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,9216],f32>
%119 = torch.aten.view %118, %90 : !torch.vtensor<[2,32,10,9216],f32>, !torch.list<int> -> !torch.vtensor<[2,320,96,96],f32>
%120 = torch.aten.unsqueeze %48, %int0 : !torch.vtensor<[320],f32>, !torch.int -> !torch.vtensor<[1,320],f32>
%121 = torch.aten.unsqueeze %120, %int2 : !torch.vtensor<[1,320],f32>, !torch.int -> !torch.vtensor<[1,320,1],f32>
%122 = torch.aten.unsqueeze %121, %int3 : !torch.vtensor<[1,320,1],f32>, !torch.int -> !torch.vtensor<[1,320,1,1],f32>
%123 = torch.aten.unsqueeze %48, %int0 : !torch.vtensor<[320],f32>, !torch.int -> !torch.vtensor<[1,320],f32>
%124 = torch.aten.unsqueeze %123, %int2 : !torch.vtensor<[1,320],f32>, !torch.int -> !torch.vtensor<[1,320,1],f32>
%125 = torch.aten.unsqueeze %124, %int3 : !torch.vtensor<[1,320,1],f32>, !torch.int -> !torch.vtensor<[1,320,1,1],f32>
%126 = torch.aten.mul.Tensor %119, %125 : !torch.vtensor<[2,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f32> -> !torch.vtensor<[2,320,96,96],f32>
%127 = torch.aten.add.Tensor %126, %122, %int1 : !torch.vtensor<[2,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f32>, !torch.int -> !torch.vtensor<[2,320,96,96],f32>
%128 = torch.aten._to_copy %127, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,320,96,96],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,96,96],f16>
%129 = torch.aten.silu %128 : !torch.vtensor<[2,320,96,96],f16> -> !torch.vtensor<[2,320,96,96],f16>
%130 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
%131 = torch.aten._to_copy %41, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320,3,3],f16>
%132 = torch.aten._convolution %129, %131, %130, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,96,96],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,96,96],f16>
%133 = torch.aten.add.Tensor %80, %132, %int1 : !torch.vtensor<[2,320,96,96],f16>, !torch.vtensor<[2,320,96,96],f16>, !torch.int -> !torch.vtensor<[2,320,96,96],f16>
%134 = torch.aten._to_copy %133, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,320,96,96],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,96,96],f32>
%135 = torch.aten._reshape_alias %134, %82, %83 : !torch.vtensor<[2,320,96,96],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,10,9216],f32>
%result0_2, %result1_3 = torch.aten.var_mean.correction %135, %85, %int0, %true : !torch.vtensor<[2,32,10,9216],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%136 = torch.aten.add.Tensor %result0_2, %1, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%137 = torch.aten.rsqrt %136 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%138 = torch.aten.sub.Tensor %135, %result1_3, %int1 : !torch.vtensor<[2,32,10,9216],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,9216],f32>
%139 = torch.aten.mul.Tensor %138, %137 : !torch.vtensor<[2,32,10,9216],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,9216],f32>
%140 = torch.aten.view %139, %90 : !torch.vtensor<[2,32,10,9216],f32>, !torch.list<int> -> !torch.vtensor<[2,320,96,96],f32>
%141 = torch.aten.unsqueeze %48, %int0 : !torch.vtensor<[320],f32>, !torch.int -> !torch.vtensor<[1,320],f32>
%142 = torch.aten.unsqueeze %141, %int2 : !torch.vtensor<[1,320],f32>, !torch.int -> !torch.vtensor<[1,320,1],f32>
%143 = torch.aten.unsqueeze %142, %int3 : !torch.vtensor<[1,320,1],f32>, !torch.int -> !torch.vtensor<[1,320,1,1],f32>
%144 = torch.aten.unsqueeze %48, %int0 : !torch.vtensor<[320],f32>, !torch.int -> !torch.vtensor<[1,320],f32>
%145 = torch.aten.unsqueeze %144, %int2 : !torch.vtensor<[1,320],f32>, !torch.int -> !torch.vtensor<[1,320,1],f32>
%146 = torch.aten.unsqueeze %145, %int3 : !torch.vtensor<[1,320,1],f32>, !torch.int -> !torch.vtensor<[1,320,1,1],f32>
%147 = torch.aten.mul.Tensor %140, %146 : !torch.vtensor<[2,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f32> -> !torch.vtensor<[2,320,96,96],f32>
%148 = torch.aten.add.Tensor %147, %143, %int1 : !torch.vtensor<[2,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f32>, !torch.int -> !torch.vtensor<[2,320,96,96],f32>
%149 = torch.prim.ListConstruct %int2949120, %int9216, %int96, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%150 = torch.aten._reshape_alias %148, %90, %149 : !torch.vtensor<[2,320,96,96],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,320,96,96],f32>
%151 = torch.prim.ListConstruct %int0, %int2, %int3, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%152 = torch.aten.permute %150, %151 : !torch.vtensor<[2,320,96,96],f32>, !torch.list<int> -> !torch.vtensor<[2,96,96,320],f32>
%153 = torch.prim.ListConstruct %int2, %int9216, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%154 = torch.prim.ListConstruct %int2949120, %int1, %int9216 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%155 = torch.aten._reshape_alias %152, %153, %154 : !torch.vtensor<[2,96,96,320],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f32>
%156 = torch.aten.clone %155, %int0 : !torch.vtensor<[2,9216,320],f32>, !torch.int -> !torch.vtensor<[2,9216,320],f32>
%157 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
%158 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
%159 = torch.aten._to_copy %156, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
%160 = torch.aten.t %158 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%161 = torch.prim.ListConstruct %int18432, %int320 : (!torch.int, !torch.int) -> !torch.list<int>
%162 = torch.aten.view %159, %161 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
%163 = torch.aten.addmm %157, %162, %160, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,320],f16>
%164 = torch.aten.view %163, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
%165 = torch.aten._to_copy %164, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f32>
%166 = torch.prim.ListConstruct %int320 : (!torch.int) -> !torch.list<int>
%result0_4, %result1_5, %result2 = torch.aten.native_layer_norm %165, %166, %48, %48, %float1.000000e-05 : !torch.vtensor<[2,9216,320],f32>, !torch.list<int>, !torch.vtensor<[320],f32>, !torch.vtensor<[320],f32>, !torch.float -> !torch.vtensor<[2,9216,320],f32>, !torch.vtensor<[2,9216,1],f32>, !torch.vtensor<[2,9216,1],f32>
%167 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
%168 = torch.aten._to_copy %result0_4, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
%169 = torch.aten.t %167 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%170 = torch.prim.ListConstruct %int320, %int1 : (!torch.int, !torch.int) -> !torch.list<int>
%171 = torch.aten._reshape_alias %168, %161, %170 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
%172 = torch.aten.mm %171, %169 : !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[18432,320],f16>
%173 = torch.aten._unsafe_view %172, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
%174 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
%175 = torch.aten._to_copy %result0_4, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
%176 = torch.aten.t %174 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%177 = torch.aten._reshape_alias %175, %161, %170 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
%178 = torch.aten.mm %177, %176 : !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[18432,320],f16>
%179 = torch.aten._unsafe_view %178, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
%180 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
%181 = torch.aten._to_copy %result0_4, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
%182 = torch.aten.t %180 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%183 = torch.aten._reshape_alias %181, %161, %170 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
%184 = torch.aten.mm %183, %182 : !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[18432,320],f16>
%185 = torch.aten._unsafe_view %184, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
%186 = torch.prim.ListConstruct %int2, %int9216, %int5, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%187 = torch.prim.ListConstruct %int2949120, %int320, %int64, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%188 = torch.aten._reshape_alias %173, %186, %187 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
%189 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%190 = torch.aten.permute %188, %189 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
%191 = torch.aten.clone %190, %int0 : !torch.vtensor<[2,5,9216,64],f16>, !torch.int -> !torch.vtensor<[2,5,9216,64],f16>
%192 = torch.prim.ListConstruct %int10, %int9216, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%193 = torch.aten._unsafe_view %191, %192 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
%194 = torch.aten._reshape_alias %179, %186, %187 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
%195 = torch.aten.permute %194, %189 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
%196 = torch.aten.clone %195, %int0 : !torch.vtensor<[2,5,9216,64],f16>, !torch.int -> !torch.vtensor<[2,5,9216,64],f16>
%197 = torch.aten._unsafe_view %196, %192 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
%198 = torch.aten._reshape_alias %185, %186, %187 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
%199 = torch.aten.permute %198, %189 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
%200 = torch.aten.clone %199, %int0 : !torch.vtensor<[2,5,9216,64],f16>, !torch.int -> !torch.vtensor<[2,5,9216,64],f16>
%201 = torch.aten._unsafe_view %200, %192 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
%202 = torch.aten.unsqueeze %193, %int3 : !torch.vtensor<[10,9216,64],f16>, !torch.int -> !torch.vtensor<[10,9216,64,1],f16>
%203 = torch.prim.ListConstruct %int0, %int1, %int3, %int2 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%204 = torch.aten.permute %202, %203 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,64],f16>
%205 = torch.aten.unsqueeze %197, %int3 : !torch.vtensor<[10,9216,64],f16>, !torch.int -> !torch.vtensor<[10,9216,64,1],f16>
%206 = torch.prim.ListConstruct %int0, %int3, %int1, %int2 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%207 = torch.aten.permute %205, %206 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,1,9216,64],f16>
%208 = torch.aten.permute %204, %203 : !torch.vtensor<[10,9216,1,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64,1],f16>
%209 = torch.prim.ListConstruct %int589824, %int64, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%210 = torch.aten._reshape_alias %208, %192, %209 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
%211 = torch.prim.ListConstruct %int0, %int3, %int2, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%212 = torch.aten.permute %207, %211 : !torch.vtensor<[10,1,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,64,9216,1],f16>
%213 = torch.prim.ListConstruct %int10, %int64, %int9216 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%214 = torch.prim.ListConstruct %int589824, %int1, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%215 = torch.aten._reshape_alias %212, %213, %214 : !torch.vtensor<[10,64,9216,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,64,9216],f16>
%216 = torch.aten.bmm %210, %215 : !torch.vtensor<[10,9216,64],f16>, !torch.vtensor<[10,64,9216],f16> -> !torch.vtensor<[10,9216,9216],f16>
%217 = torch.prim.ListConstruct %int10, %int9216, %int1, %int9216 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%218 = torch.aten.view %216, %217 : !torch.vtensor<[10,9216,9216],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,9216],f16>
%219 = torch.aten.permute %218, %203 : !torch.vtensor<[10,9216,1,9216],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,9216,1],f16>
%220 = torch.prim.ListConstruct %int10, %int9216, %int9216 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%221 = torch.aten.view %219, %220 : !torch.vtensor<[10,9216,9216,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,9216],f16>
%222 = torch.aten.mul.Tensor %221, %0 : !torch.vtensor<[10,9216,9216],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[10,9216,9216],f16>
%223 = torch.aten._softmax %222, %int-1, %true : !torch.vtensor<[10,9216,9216],f16>, !torch.int, !torch.bool -> !torch.vtensor<[10,9216,9216],f32>
%224 = torch.aten._to_copy %223, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[10,9216,9216],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[10,9216,9216],f16>
%225 = torch.aten.unsqueeze %224, %int3 : !torch.vtensor<[10,9216,9216],f16>, !torch.int -> !torch.vtensor<[10,9216,9216,1],f16>
%226 = torch.aten.permute %225, %203 : !torch.vtensor<[10,9216,9216,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,9216],f16>
%227 = torch.aten.unsqueeze %201, %int3 : !torch.vtensor<[10,9216,64],f16>, !torch.int -> !torch.vtensor<[10,9216,64,1],f16>
%228 = torch.aten.permute %227, %211 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,1,64,9216],f16>
%229 = torch.aten.permute %226, %203 : !torch.vtensor<[10,9216,1,9216],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,9216,1],f16>
%230 = torch.prim.ListConstruct %int84934656, %int9216, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%231 = torch.aten._reshape_alias %229, %220, %230 : !torch.vtensor<[10,9216,9216,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,9216,9216],f16>
%232 = torch.aten.permute %228, %211 : !torch.vtensor<[10,1,64,9216],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64,1],f16>
%233 = torch.aten._reshape_alias %232, %192, %209 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
%234 = torch.aten.bmm %231, %233 : !torch.vtensor<[10,9216,9216],f16>, !torch.vtensor<[10,9216,64],f16> -> !torch.vtensor<[10,9216,64],f16>
%235 = torch.prim.ListConstruct %int10, %int9216, %int1, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%236 = torch.aten.view %234, %235 : !torch.vtensor<[10,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,64],f16>
%237 = torch.aten.permute %236, %203 : !torch.vtensor<[10,9216,1,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64,1],f16>
%238 = torch.aten.view %237, %192 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
%239 = torch.prim.ListConstruct %int2, %int5, %int9216, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%240 = torch.prim.ListConstruct %int2949120, %int589824, %int64, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%241 = torch.aten._reshape_alias %238, %239, %240 : !torch.vtensor<[10,9216,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
%242 = torch.aten.permute %241, %189 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
%243 = torch.aten.clone %242, %int0 : !torch.vtensor<[2,9216,5,64],f16>, !torch.int -> !torch.vtensor<[2,9216,5,64],f16>
%244 = torch.aten._unsafe_view %243, %153 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
%245 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
%246 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
%247 = torch.aten.t %246 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%248 = torch.aten.view %244, %161 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
%249 = torch.aten.addmm %245, %248, %247, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,320],f16>
%250 = torch.aten.view %249, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
%251 = torch.aten.add.Tensor %250, %164, %int1 : !torch.vtensor<[2,9216,320],f16>, !torch.vtensor<[2,9216,320],f16>, !torch.int -> !torch.vtensor<[2,9216,320],f16>
%252 = torch.aten._to_copy %251, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f32>
%result0_6, %result1_7, %result2_8 = torch.aten.native_layer_norm %252, %166, %48, %48, %float1.000000e-05 : !torch.vtensor<[2,9216,320],f32>, !torch.list<int>, !torch.vtensor<[320],f32>, !torch.vtensor<[320],f32>, !torch.float -> !torch.vtensor<[2,9216,320],f32>, !torch.vtensor<[2,9216,1],f32>, !torch.vtensor<[2,9216,1],f32>
%253 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
%254 = torch.aten._to_copy %result0_6, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
%255 = torch.aten.t %253 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%256 = torch.aten._reshape_alias %254, %161, %170 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
%257 = torch.aten.mm %256, %255 : !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[18432,320],f16>
%258 = torch.aten._unsafe_view %257, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
%259 = torch.aten._to_copy %43, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,1024],f16>
%260 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
%261 = torch.aten.t %259 : !torch.vtensor<[320,1024],f16> -> !torch.vtensor<[1024,320],f16>
%262 = torch.prim.ListConstruct %int154, %int1024 : (!torch.int, !torch.int) -> !torch.list<int>
%263 = torch.prim.ListConstruct %int1024, %int1 : (!torch.int, !torch.int) -> !torch.list<int>
%264 = torch.aten._reshape_alias %260, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
%265 = torch.aten.mm %264, %261 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,320],f16> -> !torch.vtensor<[154,320],f16>
%266 = torch.prim.ListConstruct %int2, %int77, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%267 = torch.aten._unsafe_view %265, %266 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
%268 = torch.aten._to_copy %43, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,1024],f16>
%269 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
%270 = torch.aten.t %268 : !torch.vtensor<[320,1024],f16> -> !torch.vtensor<[1024,320],f16>
%271 = torch.aten._reshape_alias %269, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
%272 = torch.aten.mm %271, %270 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,320],f16> -> !torch.vtensor<[154,320],f16>
%273 = torch.aten._unsafe_view %272, %266 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
%274 = torch.aten._reshape_alias %258, %186, %187 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
%275 = torch.aten.permute %274, %189 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
%276 = torch.aten.clone %275, %int0 : !torch.vtensor<[2,5,9216,64],f16>, !torch.int -> !torch.vtensor<[2,5,9216,64],f16>
%277 = torch.aten._unsafe_view %276, %192 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
%278 = torch.prim.ListConstruct %int2, %int77, %int5, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%279 = torch.prim.ListConstruct %int24640, %int320, %int64, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%280 = torch.aten._reshape_alias %267, %278, %279 : !torch.vtensor<[2,77,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,5,64],f16>
%281 = torch.aten.permute %280, %189 : !torch.vtensor<[2,77,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,77,64],f16>
%282 = torch.aten.clone %281, %int0 : !torch.vtensor<[2,5,77,64],f16>, !torch.int -> !torch.vtensor<[2,5,77,64],f16>
%283 = torch.prim.ListConstruct %int10, %int77, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%284 = torch.aten._unsafe_view %282, %283 : !torch.vtensor<[2,5,77,64],f16>, !torch.list<int> -> !torch.vtensor<[10,77,64],f16>
%285 = torch.aten._reshape_alias %273, %278, %279 : !torch.vtensor<[2,77,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,5,64],f16>
%286 = torch.aten.permute %285, %189 : !torch.vtensor<[2,77,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,77,64],f16>
%287 = torch.aten.clone %286, %int0 : !torch.vtensor<[2,5,77,64],f16>, !torch.int -> !torch.vtensor<[2,5,77,64],f16>
%288 = torch.aten._unsafe_view %287, %283 : !torch.vtensor<[2,5,77,64],f16>, !torch.list<int> -> !torch.vtensor<[10,77,64],f16>
%289 = torch.aten.unsqueeze %277, %int3 : !torch.vtensor<[10,9216,64],f16>, !torch.int -> !torch.vtensor<[10,9216,64,1],f16>
%290 = torch.aten.permute %289, %203 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,64],f16>
%291 = torch.aten.unsqueeze %284, %int3 : !torch.vtensor<[10,77,64],f16>, !torch.int -> !torch.vtensor<[10,77,64,1],f16>
%292 = torch.aten.permute %291, %206 : !torch.vtensor<[10,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,1,77,64],f16>
%293 = torch.aten.permute %290, %203 : !torch.vtensor<[10,9216,1,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64,1],f16>
%294 = torch.aten._reshape_alias %293, %192, %209 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
%295 = torch.aten.permute %292, %211 : !torch.vtensor<[10,1,77,64],f16>, !torch.list<int> -> !torch.vtensor<[10,64,77,1],f16>
%296 = torch.prim.ListConstruct %int10, %int64, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%297 = torch.prim.ListConstruct %int4928, %int1, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%298 = torch.aten._reshape_alias %295, %296, %297 : !torch.vtensor<[10,64,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,64,77],f16>
%299 = torch.aten.bmm %294, %298 : !torch.vtensor<[10,9216,64],f16>, !torch.vtensor<[10,64,77],f16> -> !torch.vtensor<[10,9216,77],f16>
%300 = torch.prim.ListConstruct %int10, %int9216, %int1, %int77 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%301 = torch.aten.view %299, %300 : !torch.vtensor<[10,9216,77],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,77],f16>
%302 = torch.aten.permute %301, %203 : !torch.vtensor<[10,9216,1,77],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,77,1],f16>
%303 = torch.prim.ListConstruct %int10, %int9216, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%304 = torch.aten.view %302, %303 : !torch.vtensor<[10,9216,77,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,77],f16>
%305 = torch.aten.mul.Tensor %304, %0 : !torch.vtensor<[10,9216,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[10,9216,77],f16>
%306 = torch.aten._softmax %305, %int-1, %true : !torch.vtensor<[10,9216,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[10,9216,77],f32>
%307 = torch.aten._to_copy %306, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[10,9216,77],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[10,9216,77],f16>
%308 = torch.aten.unsqueeze %307, %int3 : !torch.vtensor<[10,9216,77],f16>, !torch.int -> !torch.vtensor<[10,9216,77,1],f16>
%309 = torch.aten.permute %308, %203 : !torch.vtensor<[10,9216,77,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,77],f16>
%310 = torch.aten.unsqueeze %288, %int3 : !torch.vtensor<[10,77,64],f16>, !torch.int -> !torch.vtensor<[10,77,64,1],f16>
%311 = torch.aten.permute %310, %211 : !torch.vtensor<[10,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,1,64,77],f16>
%312 = torch.aten.permute %309, %203 : !torch.vtensor<[10,9216,1,77],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,77,1],f16>
%313 = torch.prim.ListConstruct %int709632, %int77, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%314 = torch.aten._reshape_alias %312, %303, %313 : !torch.vtensor<[10,9216,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,9216,77],f16>
%315 = torch.aten.permute %311, %211 : !torch.vtensor<[10,1,64,77],f16>, !torch.list<int> -> !torch.vtensor<[10,77,64,1],f16>
%316 = torch.prim.ListConstruct %int4928, %int64, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%317 = torch.aten._reshape_alias %315, %283, %316 : !torch.vtensor<[10,77,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,77,64],f16>
%318 = torch.aten.bmm %314, %317 : !torch.vtensor<[10,9216,77],f16>, !torch.vtensor<[10,77,64],f16> -> !torch.vtensor<[10,9216,64],f16>
%319 = torch.aten.view %318, %235 : !torch.vtensor<[10,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,64],f16>
%320 = torch.aten.permute %319, %203 : !torch.vtensor<[10,9216,1,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64,1],f16>
%321 = torch.aten.view %320, %192 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
%322 = torch.aten._reshape_alias %321, %239, %240 : !torch.vtensor<[10,9216,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
%323 = torch.aten.permute %322, %189 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
%324 = torch.aten.clone %323, %int0 : !torch.vtensor<[2,9216,5,64],f16>, !torch.int -> !torch.vtensor<[2,9216,5,64],f16>
%325 = torch.aten._unsafe_view %324, %153 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
%326 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
%327 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
%328 = torch.aten.t %327 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%329 = torch.aten.view %325, %161 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
%330 = torch.aten.addmm %326, %329, %328, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,320],f16>
%331 = torch.aten.view %330, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
%332 = torch.aten.add.Tensor %331, %251, %int1 : !torch.vtensor<[2,9216,320],f16>, !torch.vtensor<[2,9216,320],f16>, !torch.int -> !torch.vtensor<[2,9216,320],f16>
%333 = torch.aten._to_copy %332, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f32>
%result0_9, %result1_10, %result2_11 = torch.aten.native_layer_norm %333, %166, %48, %48, %float1.000000e-05 : !torch.vtensor<[2,9216,320],f32>, !torch.list<int>, !torch.vtensor<[320],f32>, !torch.vtensor<[320],f32>, !torch.float -> !torch.vtensor<[2,9216,320],f32>, !torch.vtensor<[2,9216,1],f32>, !torch.vtensor<[2,9216,1],f32>
%334 = torch.aten._to_copy %44, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2560],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2560],f16>
%335 = torch.aten._to_copy %45, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2560,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2560,320],f16>
%336 = torch.aten._to_copy %result0_9, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
%337 = torch.aten.t %335 : !torch.vtensor<[2560,320],f16> -> !torch.vtensor<[320,2560],f16>
%338 = torch.aten.view %336, %161 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
%339 = torch.aten.addmm %334, %338, %337, %int1, %int1 : !torch.vtensor<[2560],f16>, !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,2560],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,2560],f16>
%340 = torch.prim.ListConstruct %int2, %int9216, %int2560 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%341 = torch.aten.view %339, %340 : !torch.vtensor<[18432,2560],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,2560],f16>
%342 = torch.aten.slice.Tensor %341, %int-1, %int0, %int1280, %int1 : !torch.vtensor<[2,9216,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,9216,1280],f16>
%343 = torch.aten.slice.Tensor %341, %int-1, %int1280, %int2560, %int1 : !torch.vtensor<[2,9216,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,9216,1280],f16>
%344 = torch.aten.gelu %343, %str : !torch.vtensor<[2,9216,1280],f16>, !torch.str -> !torch.vtensor<[2,9216,1280],f16>
%345 = torch.aten.mul.Tensor %342, %344 : !torch.vtensor<[2,9216,1280],f16>, !torch.vtensor<[2,9216,1280],f16> -> !torch.vtensor<[2,9216,1280],f16>
%346 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
%347 = torch.aten._to_copy %46, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,1280],f16>
%348 = torch.aten.t %347 : !torch.vtensor<[320,1280],f16> -> !torch.vtensor<[1280,320],f16>
%349 = torch.prim.ListConstruct %int18432, %int1280 : (!torch.int, !torch.int) -> !torch.list<int>
%350 = torch.aten.view %345, %349 : !torch.vtensor<[2,9216,1280],f16>, !torch.list<int> -> !torch.vtensor<[18432,1280],f16>
%351 = torch.aten.addmm %346, %350, %348, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[18432,1280],f16>, !torch.vtensor<[1280,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,320],f16>
%352 = torch.aten.view %351, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
%353 = torch.aten.add.Tensor %352, %332, %int1 : !torch.vtensor<[2,9216,320],f16>, !torch.vtensor<[2,9216,320],f16>, !torch.int -> !torch.vtensor<[2,9216,320],f16>
%354 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
%355 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
%356 = torch.aten.t %355 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%357 = torch.aten.view %353, %161 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
%358 = torch.aten.addmm %354, %357, %356, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,320],f16>
%359 = torch.aten.view %358, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
%360 = torch.prim.ListConstruct %int2, %int96, %int96, %int320 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%361 = torch.prim.ListConstruct %int2949120, %int30720, %int320, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%362 = torch.aten._reshape_alias %359, %360, %361 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,96,96,320],f16>
%363 = torch.aten.permute %362, %206 : !torch.vtensor<[2,96,96,320],f16>, !torch.list<int> -> !torch.vtensor<[2,320,96,96],f16>
%364 = torch.prim.ListConstruct %int2949120, %int1, %int30720, %int320 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%365 = torch.aten._reshape_alias %363, %90, %364 : !torch.vtensor<[2,320,96,96],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,320,96,96],f16>
%366 = torch.aten.clone %365, %int0 : !torch.vtensor<[2,320,96,96],f16>, !torch.int -> !torch.vtensor<[2,320,96,96],f16>
%367 = torch.aten.add.Tensor %366, %133, %int1 : !torch.vtensor<[2,320,96,96],f16>, !torch.vtensor<[2,320,96,96],f16>, !torch.int -> !torch.vtensor<[2,320,96,96],f16>
%368 = torch.aten._to_copy %367, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,320,96,96],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,96,96],f32>
%369 = torch.aten._reshape_alias %368, %82, %83 : !torch.vtensor<[2,320,96,96],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,10,9216],f32>
%result0_12, %result1_13 = torch.aten.var_mean.correction %369, %85, %int0, %true : !torch.vtensor<[2,32,10,9216],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%370 = torch.aten.add.Tensor %result0_12, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%371 = torch.aten.rsqrt %370 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%372 = torch.aten.sub.Tensor %369, %result1_13, %int1 : !torch.vtensor<[2,32,10,9216],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,9216],f32>
%373 = torch.aten.mul.Tensor %372, %371 : !torch.vtensor<[2,32,10,9216],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,9216],f32>
%374 = torch.aten.view %373, %90 : !torch.vtensor<[2,32,10,9216],f32>, !torch.list<int> -> !torch.vtensor<[2,320,96,96],f32>
%375 = torch.aten.unsqueeze %48, %int0 : !torch.vtensor<[320],f32>, !torch.int -> !torch.vtensor<[1,320],f32>
%376 = torch.aten.unsqueeze %375, %int2 : !torch.vtensor<[1,320],f32>, !torch.int -> !torch.vtensor<[1,320,1],f32>
%377 = torch.aten.unsqueeze %376, %int3 : !torch.vtensor<[1,320,1],f32>, !torch.int -> !torch.vtensor<[1,320,1,1],f32>
%378 = torch.aten.unsqueeze %48, %int0 : !torch.vtensor<[320],f32>, !torch.int -> !torch.vtensor<[1,320],f32>
%379 = torch.aten.unsqueeze %378, %int2 : !torch.vtensor<[1,320],f32>, !torch.int -> !torch.vtensor<[1,320,1],f32>
%380 = torch.aten.unsqueeze %379, %int3 : !torch.vtensor<[1,320,1],f32>, !torch.int -> !torch.vtensor<[1,320,1,1],f32>
%381 = torch.aten.mul.Tensor %374, %380 : !torch.vtensor<[2,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f32> -> !torch.vtensor<[2,320,96,96],f32>
%382 = torch.aten.add.Tensor %381, %377, %int1 : !torch.vtensor<[2,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f32>, !torch.int -> !torch.vtensor<[2,320,96,96],f32>
%383 = torch.aten._to_copy %382, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,320,96,96],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,96,96],f16>
%384 = torch.aten.silu %383 : !torch.vtensor<[2,320,96,96],f16> -> !torch.vtensor<[2,320,96,96],f16>
%385 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
%386 = torch.aten._to_copy %41, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320,3,3],f16>
%387 = torch.aten._convolution %384, %386, %385, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,96,96],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,96,96],f16>
%388 = torch.aten.silu %74 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%389 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
%390 = torch.aten._to_copy %46, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,1280],f16>
%391 = torch.aten.t %390 : !torch.vtensor<[320,1280],f16> -> !torch.vtensor<[1280,320],f16>
%392 = torch.aten.addmm %389, %388, %391, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
%393 = torch.aten.unsqueeze %392, %int2 : !torch.vtensor<[2,320],f16>, !torch.int -> !torch.vtensor<[2,320,1],f16>
%394 = torch.aten.unsqueeze %393, %int3 : !torch.vtensor<[2,320,1],f16>, !torch.int -> !torch.vtensor<[2,320,1,1],f16>
%395 = torch.aten.add.Tensor %387, %394, %int1 : !torch.vtensor<[2,320,96,96],f16>, !torch.vtensor<[2,320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,96,96],f16>
%396 = torch.aten._to_copy %395, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,320,96,96],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,96,96],f32>
%397 = torch.aten._reshape_alias %396, %82, %83 : !torch.vtensor<[2,320,96,96],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,10,9216],f32>
%result0_14, %result1_15 = torch.aten.var_mean.correction %397, %85, %int0, %true : !torch.vtensor<[2,32,10,9216],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%398 = torch.aten.add.Tensor %result0_14, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%399 = torch.aten.rsqrt %398 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%400 = torch.aten.sub.Tensor %397, %result1_15, %int1 : !torch.vtensor<[2,32,10,9216],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,9216],f32>
%401 = torch.aten.mul.Tensor %400, %399 : !torch.vtensor<[2,32,10,9216],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,9216],f32>
%402 = torch.aten.view %401, %90 : !torch.vtensor<[2,32,10,9216],f32>, !torch.list<int> -> !torch.vtensor<[2,320,96,96],f32>
%403 = torch.aten.unsqueeze %48, %int0 : !torch.vtensor<[320],f32>, !torch.int -> !torch.vtensor<[1,320],f32>
%404 = torch.aten.unsqueeze %403, %int2 : !torch.vtensor<[1,320],f32>, !torch.int -> !torch.vtensor<[1,320,1],f32>
%405 = torch.aten.unsqueeze %404, %int3 : !torch.vtensor<[1,320,1],f32>, !torch.int -> !torch.vtensor<[1,320,1,1],f32>
%406 = torch.aten.unsqueeze %48, %int0 : !torch.vtensor<[320],f32>, !torch.int -> !torch.vtensor<[1,320],f32>
%407 = torch.aten.unsqueeze %406, %int2 : !torch.vtensor<[1,320],f32>, !torch.int -> !torch.vtensor<[1,320,1],f32>
%408 = torch.aten.unsqueeze %407, %int3 : !torch.vtensor<[1,320,1],f32>, !torch.int -> !torch.vtensor<[1,320,1,1],f32>
%409 = torch.aten.mul.Tensor %402, %408 : !torch.vtensor<[2,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f32> -> !torch.vtensor<[2,320,96,96],f32>
%410 = torch.aten.add.Tensor %409, %405, %int1 : !torch.vtensor<[2,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f32>, !torch.int -> !torch.vtensor<[2,320,96,96],f32>
%411 = torch.aten._to_copy %410, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,320,96,96],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,96,96],f16>
%412 = torch.aten.silu %411 : !torch.vtensor<[2,320,96,96],f16> -> !torch.vtensor<[2,320,96,96],f16>
%413 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
%414 = torch.aten._to_copy %41, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320,3,3],f16>
%415 = torch.aten._convolution %412, %414, %413, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,96,96],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,96,96],f16>
%416 = torch.aten.add.Tensor %367, %415, %int1 : !torch.vtensor<[2,320,96,96],f16>, !torch.vtensor<[2,320,96,96],f16>, !torch.int -> !torch.vtensor<[2,320,96,96],f16>
%417 = torch.aten._to_copy %416, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,320,96,96],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,96,96],f32>
%418 = torch.aten._reshape_alias %417, %82, %83 : !torch.vtensor<[2,320,96,96],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,10,9216],f32>
%result0_16, %result1_17 = torch.aten.var_mean.correction %418, %85, %int0, %true : !torch.vtensor<[2,32,10,9216],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%419 = torch.aten.add.Tensor %result0_16, %1, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%420 = torch.aten.rsqrt %419 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%421 = torch.aten.sub.Tensor %418, %result1_17, %int1 : !torch.vtensor<[2,32,10,9216],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,9216],f32>
%422 = torch.aten.mul.Tensor %421, %420 : !torch.vtensor<[2,32,10,9216],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,9216],f32>
%423 = torch.aten.view %422, %90 : !torch.vtensor<[2,32,10,9216],f32>, !torch.list<int> -> !torch.vtensor<[2,320,96,96],f32>
%424 = torch.aten.unsqueeze %48, %int0 : !torch.vtensor<[320],f32>, !torch.int -> !torch.vtensor<[1,320],f32>
%425 = torch.aten.unsqueeze %424, %int2 : !torch.vtensor<[1,320],f32>, !torch.int -> !torch.vtensor<[1,320,1],f32>
%426 = torch.aten.unsqueeze %425, %int3 : !torch.vtensor<[1,320,1],f32>, !torch.int -> !torch.vtensor<[1,320,1,1],f32>
%427 = torch.aten.unsqueeze %48, %int0 : !torch.vtensor<[320],f32>, !torch.int -> !torch.vtensor<[1,320],f32>
%428 = torch.aten.unsqueeze %427, %int2 : !torch.vtensor<[1,320],f32>, !torch.int -> !torch.vtensor<[1,320,1],f32>
%429 = torch.aten.unsqueeze %428, %int3 : !torch.vtensor<[1,320,1],f32>, !torch.int -> !torch.vtensor<[1,320,1,1],f32>
%430 = torch.aten.mul.Tensor %423, %429 : !torch.vtensor<[2,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f32> -> !torch.vtensor<[2,320,96,96],f32>
%431 = torch.aten.add.Tensor %430, %426, %int1 : !torch.vtensor<[2,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f32>, !torch.int -> !torch.vtensor<[2,320,96,96],f32>
%432 = torch.aten._reshape_alias %431, %90, %149 : !torch.vtensor<[2,320,96,96],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,320,96,96],f32>
%433 = torch.aten.permute %432, %151 : !torch.vtensor<[2,320,96,96],f32>, !torch.list<int> -> !torch.vtensor<[2,96,96,320],f32>
%434 = torch.aten._reshape_alias %433, %153, %154 : !torch.vtensor<[2,96,96,320],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f32>
%435 = torch.aten.clone %434, %int0 : !torch.vtensor<[2,9216,320],f32>, !torch.int -> !torch.vtensor<[2,9216,320],f32>
%436 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
%437 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
%438 = torch.aten._to_copy %435, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
%439 = torch.aten.t %437 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%440 = torch.aten.view %438, %161 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
%441 = torch.aten.addmm %436, %440, %439, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,320],f16>
%442 = torch.aten.view %441, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
%443 = torch.aten._to_copy %442, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f32>
%result0_18, %result1_19, %result2_20 = torch.aten.native_layer_norm %443, %166, %48, %48, %float1.000000e-05 : !torch.vtensor<[2,9216,320],f32>, !torch.list<int>, !torch.vtensor<[320],f32>, !torch.vtensor<[320],f32>, !torch.float -> !torch.vtensor<[2,9216,320],f32>, !torch.vtensor<[2,9216,1],f32>, !torch.vtensor<[2,9216,1],f32>
%444 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
%445 = torch.aten._to_copy %result0_18, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
%446 = torch.aten.t %444 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%447 = torch.aten._reshape_alias %445, %161, %170 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
%448 = torch.aten.mm %447, %446 : !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[18432,320],f16>
%449 = torch.aten._unsafe_view %448, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
%450 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
%451 = torch.aten._to_copy %result0_18, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
%452 = torch.aten.t %450 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%453 = torch.aten._reshape_alias %451, %161, %170 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
%454 = torch.aten.mm %453, %452 : !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[18432,320],f16>
%455 = torch.aten._unsafe_view %454, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
%456 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
%457 = torch.aten._to_copy %result0_18, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
%458 = torch.aten.t %456 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%459 = torch.aten._reshape_alias %457, %161, %170 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
%460 = torch.aten.mm %459, %458 : !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[18432,320],f16>
%461 = torch.aten._unsafe_view %460, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
%462 = torch.aten._reshape_alias %449, %186, %187 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
%463 = torch.aten.permute %462, %189 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
%464 = torch.aten.clone %463, %int0 : !torch.vtensor<[2,5,9216,64],f16>, !torch.int -> !torch.vtensor<[2,5,9216,64],f16>
%465 = torch.aten._unsafe_view %464, %192 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
%466 = torch.aten._reshape_alias %455, %186, %187 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
%467 = torch.aten.permute %466, %189 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
%468 = torch.aten.clone %467, %int0 : !torch.vtensor<[2,5,9216,64],f16>, !torch.int -> !torch.vtensor<[2,5,9216,64],f16>
%469 = torch.aten._unsafe_view %468, %192 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
%470 = torch.aten._reshape_alias %461, %186, %187 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
%471 = torch.aten.permute %470, %189 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
%472 = torch.aten.clone %471, %int0 : !torch.vtensor<[2,5,9216,64],f16>, !torch.int -> !torch.vtensor<[2,5,9216,64],f16>
%473 = torch.aten._unsafe_view %472, %192 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
%474 = torch.aten.unsqueeze %465, %int3 : !torch.vtensor<[10,9216,64],f16>, !torch.int -> !torch.vtensor<[10,9216,64,1],f16>
%475 = torch.aten.permute %474, %203 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,64],f16>
%476 = torch.aten.unsqueeze %469, %int3 : !torch.vtensor<[10,9216,64],f16>, !torch.int -> !torch.vtensor<[10,9216,64,1],f16>
%477 = torch.aten.permute %476, %206 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,1,9216,64],f16>
%478 = torch.aten.permute %475, %203 : !torch.vtensor<[10,9216,1,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64,1],f16>
%479 = torch.aten._reshape_alias %478, %192, %209 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
%480 = torch.aten.permute %477, %211 : !torch.vtensor<[10,1,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,64,9216,1],f16>
%481 = torch.aten._reshape_alias %480, %213, %214 : !torch.vtensor<[10,64,9216,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,64,9216],f16>
%482 = torch.aten.bmm %479, %481 : !torch.vtensor<[10,9216,64],f16>, !torch.vtensor<[10,64,9216],f16> -> !torch.vtensor<[10,9216,9216],f16>
%483 = torch.aten.view %482, %217 : !torch.vtensor<[10,9216,9216],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,9216],f16>
%484 = torch.aten.permute %483, %203 : !torch.vtensor<[10,9216,1,9216],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,9216,1],f16>
%485 = torch.aten.view %484, %220 : !torch.vtensor<[10,9216,9216,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,9216],f16>
%486 = torch.aten.mul.Tensor %485, %0 : !torch.vtensor<[10,9216,9216],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[10,9216,9216],f16>
%487 = torch.aten._softmax %486, %int-1, %true : !torch.vtensor<[10,9216,9216],f16>, !torch.int, !torch.bool -> !torch.vtensor<[10,9216,9216],f32>
%488 = torch.aten._to_copy %487, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[10,9216,9216],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[10,9216,9216],f16>
%489 = torch.aten.unsqueeze %488, %int3 : !torch.vtensor<[10,9216,9216],f16>, !torch.int -> !torch.vtensor<[10,9216,9216,1],f16>
%490 = torch.aten.permute %489, %203 : !torch.vtensor<[10,9216,9216,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,9216],f16>
%491 = torch.aten.unsqueeze %473, %int3 : !torch.vtensor<[10,9216,64],f16>, !torch.int -> !torch.vtensor<[10,9216,64,1],f16>
%492 = torch.aten.permute %491, %211 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,1,64,9216],f16>
%493 = torch.aten.permute %490, %203 : !torch.vtensor<[10,9216,1,9216],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,9216,1],f16>
%494 = torch.aten._reshape_alias %493, %220, %230 : !torch.vtensor<[10,9216,9216,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,9216,9216],f16>
%495 = torch.aten.permute %492, %211 : !torch.vtensor<[10,1,64,9216],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64,1],f16>
%496 = torch.aten._reshape_alias %495, %192, %209 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
%497 = torch.aten.bmm %494, %496 : !torch.vtensor<[10,9216,9216],f16>, !torch.vtensor<[10,9216,64],f16> -> !torch.vtensor<[10,9216,64],f16>
%498 = torch.aten.view %497, %235 : !torch.vtensor<[10,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,64],f16>
%499 = torch.aten.permute %498, %203 : !torch.vtensor<[10,9216,1,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64,1],f16>
%500 = torch.aten.view %499, %192 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
%501 = torch.aten._reshape_alias %500, %239, %240 : !torch.vtensor<[10,9216,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
%502 = torch.aten.permute %501, %189 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
%503 = torch.aten.clone %502, %int0 : !torch.vtensor<[2,9216,5,64],f16>, !torch.int -> !torch.vtensor<[2,9216,5,64],f16>
%504 = torch.aten._unsafe_view %503, %153 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
%505 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
%506 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
%507 = torch.aten.t %506 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%508 = torch.aten.view %504, %161 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
%509 = torch.aten.addmm %505, %508, %507, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,320],f16>
%510 = torch.aten.view %509, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
%511 = torch.aten.add.Tensor %510, %442, %int1 : !torch.vtensor<[2,9216,320],f16>, !torch.vtensor<[2,9216,320],f16>, !torch.int -> !torch.vtensor<[2,9216,320],f16>
%512 = torch.aten._to_copy %511, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f32>
%result0_21, %result1_22, %result2_23 = torch.aten.native_layer_norm %512, %166, %48, %48, %float1.000000e-05 : !torch.vtensor<[2,9216,320],f32>, !torch.list<int>, !torch.vtensor<[320],f32>, !torch.vtensor<[320],f32>, !torch.float -> !torch.vtensor<[2,9216,320],f32>, !torch.vtensor<[2,9216,1],f32>, !torch.vtensor<[2,9216,1],f32>
%513 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
%514 = torch.aten._to_copy %result0_21, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
%515 = torch.aten.t %513 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%516 = torch.aten._reshape_alias %514, %161, %170 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
%517 = torch.aten.mm %516, %515 : !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[18432,320],f16>
%518 = torch.aten._unsafe_view %517, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
%519 = torch.aten._to_copy %43, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,1024],f16>
%520 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
%521 = torch.aten.t %519 : !torch.vtensor<[320,1024],f16> -> !torch.vtensor<[1024,320],f16>
%522 = torch.aten._reshape_alias %520, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
%523 = torch.aten.mm %522, %521 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,320],f16> -> !torch.vtensor<[154,320],f16>
%524 = torch.aten._unsafe_view %523, %266 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
%525 = torch.aten._to_copy %43, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,1024],f16>
%526 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
%527 = torch.aten.t %525 : !torch.vtensor<[320,1024],f16> -> !torch.vtensor<[1024,320],f16>
%528 = torch.aten._reshape_alias %526, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
%529 = torch.aten.mm %528, %527 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,320],f16> -> !torch.vtensor<[154,320],f16>
%530 = torch.aten._unsafe_view %529, %266 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
%531 = torch.aten._reshape_alias %518, %186, %187 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
%532 = torch.aten.permute %531, %189 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
%533 = torch.aten.clone %532, %int0 : !torch.vtensor<[2,5,9216,64],f16>, !torch.int -> !torch.vtensor<[2,5,9216,64],f16>
%534 = torch.aten._unsafe_view %533, %192 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
%535 = torch.aten._reshape_alias %524, %278, %279 : !torch.vtensor<[2,77,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,5,64],f16>
%536 = torch.aten.permute %535, %189 : !torch.vtensor<[2,77,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,77,64],f16>
%537 = torch.aten.clone %536, %int0 : !torch.vtensor<[2,5,77,64],f16>, !torch.int -> !torch.vtensor<[2,5,77,64],f16>
%538 = torch.aten._unsafe_view %537, %283 : !torch.vtensor<[2,5,77,64],f16>, !torch.list<int> -> !torch.vtensor<[10,77,64],f16>
%539 = torch.aten._reshape_alias %530, %278, %279 : !torch.vtensor<[2,77,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,5,64],f16>
%540 = torch.aten.permute %539, %189 : !torch.vtensor<[2,77,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,77,64],f16>
%541 = torch.aten.clone %540, %int0 : !torch.vtensor<[2,5,77,64],f16>, !torch.int -> !torch.vtensor<[2,5,77,64],f16>
%542 = torch.aten._unsafe_view %541, %283 : !torch.vtensor<[2,5,77,64],f16>, !torch.list<int> -> !torch.vtensor<[10,77,64],f16>
%543 = torch.aten.unsqueeze %534, %int3 : !torch.vtensor<[10,9216,64],f16>, !torch.int -> !torch.vtensor<[10,9216,64,1],f16>
%544 = torch.aten.permute %543, %203 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,64],f16>
%545 = torch.aten.unsqueeze %538, %int3 : !torch.vtensor<[10,77,64],f16>, !torch.int -> !torch.vtensor<[10,77,64,1],f16>
%546 = torch.aten.permute %545, %206 : !torch.vtensor<[10,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,1,77,64],f16>
%547 = torch.aten.permute %544, %203 : !torch.vtensor<[10,9216,1,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64,1],f16>
%548 = torch.aten._reshape_alias %547, %192, %209 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
%549 = torch.aten.permute %546, %211 : !torch.vtensor<[10,1,77,64],f16>, !torch.list<int> -> !torch.vtensor<[10,64,77,1],f16>
%550 = torch.aten._reshape_alias %549, %296, %297 : !torch.vtensor<[10,64,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,64,77],f16>
%551 = torch.aten.bmm %548, %550 : !torch.vtensor<[10,9216,64],f16>, !torch.vtensor<[10,64,77],f16> -> !torch.vtensor<[10,9216,77],f16>
%552 = torch.aten.view %551, %300 : !torch.vtensor<[10,9216,77],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,77],f16>
%553 = torch.aten.permute %552, %203 : !torch.vtensor<[10,9216,1,77],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,77,1],f16>
%554 = torch.aten.view %553, %303 : !torch.vtensor<[10,9216,77,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,77],f16>
%555 = torch.aten.mul.Tensor %554, %0 : !torch.vtensor<[10,9216,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[10,9216,77],f16>
%556 = torch.aten._softmax %555, %int-1, %true : !torch.vtensor<[10,9216,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[10,9216,77],f32>
%557 = torch.aten._to_copy %556, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[10,9216,77],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[10,9216,77],f16>
%558 = torch.aten.unsqueeze %557, %int3 : !torch.vtensor<[10,9216,77],f16>, !torch.int -> !torch.vtensor<[10,9216,77,1],f16>
%559 = torch.aten.permute %558, %203 : !torch.vtensor<[10,9216,77,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,77],f16>
%560 = torch.aten.unsqueeze %542, %int3 : !torch.vtensor<[10,77,64],f16>, !torch.int -> !torch.vtensor<[10,77,64,1],f16>
%561 = torch.aten.permute %560, %211 : !torch.vtensor<[10,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,1,64,77],f16>
%562 = torch.aten.permute %559, %203 : !torch.vtensor<[10,9216,1,77],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,77,1],f16>
%563 = torch.aten._reshape_alias %562, %303, %313 : !torch.vtensor<[10,9216,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,9216,77],f16>
%564 = torch.aten.permute %561, %211 : !torch.vtensor<[10,1,64,77],f16>, !torch.list<int> -> !torch.vtensor<[10,77,64,1],f16>
%565 = torch.aten._reshape_alias %564, %283, %316 : !torch.vtensor<[10,77,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,77,64],f16>
%566 = torch.aten.bmm %563, %565 : !torch.vtensor<[10,9216,77],f16>, !torch.vtensor<[10,77,64],f16> -> !torch.vtensor<[10,9216,64],f16>
%567 = torch.aten.view %566, %235 : !torch.vtensor<[10,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,64],f16>
%568 = torch.aten.permute %567, %203 : !torch.vtensor<[10,9216,1,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64,1],f16>
%569 = torch.aten.view %568, %192 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
%570 = torch.aten._reshape_alias %569, %239, %240 : !torch.vtensor<[10,9216,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
%571 = torch.aten.permute %570, %189 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
%572 = torch.aten.clone %571, %int0 : !torch.vtensor<[2,9216,5,64],f16>, !torch.int -> !torch.vtensor<[2,9216,5,64],f16>
%573 = torch.aten._unsafe_view %572, %153 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
%574 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
%575 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
%576 = torch.aten.t %575 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%577 = torch.aten.view %573, %161 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
%578 = torch.aten.addmm %574, %577, %576, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,320],f16>
%579 = torch.aten.view %578, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
%580 = torch.aten.add.Tensor %579, %511, %int1 : !torch.vtensor<[2,9216,320],f16>, !torch.vtensor<[2,9216,320],f16>, !torch.int -> !torch.vtensor<[2,9216,320],f16>
%581 = torch.aten._to_copy %580, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f32>
%result0_24, %result1_25, %result2_26 = torch.aten.native_layer_norm %581, %166, %48, %48, %float1.000000e-05 : !torch.vtensor<[2,9216,320],f32>, !torch.list<int>, !torch.vtensor<[320],f32>, !torch.vtensor<[320],f32>, !torch.float -> !torch.vtensor<[2,9216,320],f32>, !torch.vtensor<[2,9216,1],f32>, !torch.vtensor<[2,9216,1],f32>
%582 = torch.aten._to_copy %44, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2560],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2560],f16>
%583 = torch.aten._to_copy %45, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2560,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2560,320],f16>
%584 = torch.aten._to_copy %result0_24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
%585 = torch.aten.t %583 : !torch.vtensor<[2560,320],f16> -> !torch.vtensor<[320,2560],f16>
%586 = torch.aten.view %584, %161 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
%587 = torch.aten.addmm %582, %586, %585, %int1, %int1 : !torch.vtensor<[2560],f16>, !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,2560],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,2560],f16>
%588 = torch.aten.view %587, %340 : !torch.vtensor<[18432,2560],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,2560],f16>
%589 = torch.aten.slice.Tensor %588, %int-1, %int0, %int1280, %int1 : !torch.vtensor<[2,9216,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,9216,1280],f16>
%590 = torch.aten.slice.Tensor %588, %int-1, %int1280, %int2560, %int1 : !torch.vtensor<[2,9216,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,9216,1280],f16>
%591 = torch.aten.gelu %590, %str : !torch.vtensor<[2,9216,1280],f16>, !torch.str -> !torch.vtensor<[2,9216,1280],f16>
%592 = torch.aten.mul.Tensor %589, %591 : !torch.vtensor<[2,9216,1280],f16>, !torch.vtensor<[2,9216,1280],f16> -> !torch.vtensor<[2,9216,1280],f16>
%593 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
%594 = torch.aten._to_copy %46, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,1280],f16>
%595 = torch.aten.t %594 : !torch.vtensor<[320,1280],f16> -> !torch.vtensor<[1280,320],f16>
%596 = torch.aten.view %592, %349 : !torch.vtensor<[2,9216,1280],f16>, !torch.list<int> -> !torch.vtensor<[18432,1280],f16>
%597 = torch.aten.addmm %593, %596, %595, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[18432,1280],f16>, !torch.vtensor<[1280,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,320],f16>
%598 = torch.aten.view %597, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
%599 = torch.aten.add.Tensor %598, %580, %int1 : !torch.vtensor<[2,9216,320],f16>, !torch.vtensor<[2,9216,320],f16>, !torch.int -> !torch.vtensor<[2,9216,320],f16>
%600 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
%601 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
%602 = torch.aten.t %601 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%603 = torch.aten.view %599, %161 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
%604 = torch.aten.addmm %600, %603, %602, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,320],f16>
%605 = torch.aten.view %604, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
%606 = torch.aten._reshape_alias %605, %360, %361 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,96,96,320],f16>
%607 = torch.aten.permute %606, %206 : !torch.vtensor<[2,96,96,320],f16>, !torch.list<int> -> !torch.vtensor<[2,320,96,96],f16>
%608 = torch.aten._reshape_alias %607, %90, %364 : !torch.vtensor<[2,320,96,96],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,320,96,96],f16>
%609 = torch.aten.clone %608, %int0 : !torch.vtensor<[2,320,96,96],f16>, !torch.int -> !torch.vtensor<[2,320,96,96],f16>
%610 = torch.aten.add.Tensor %609, %416, %int1 : !torch.vtensor<[2,320,96,96],f16>, !torch.vtensor<[2,320,96,96],f16>, !torch.int -> !torch.vtensor<[2,320,96,96],f16>
%611 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
%612 = torch.aten._to_copy %41, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320,3,3],f16>
%613 = torch.prim.ListConstruct %int2, %int2 : (!torch.int, !torch.int) -> !torch.list<int>
%614 = torch.aten._convolution %610, %612, %611, %613, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,96,96],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,48,48],f16>
%615 = torch.aten._to_copy %614, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,320,48,48],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,48,48],f32>
%616 = torch.prim.ListConstruct %int2, %int32, %int10, %int2304 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%617 = torch.prim.ListConstruct %int737280, %int23040, %int2304, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%618 = torch.aten._reshape_alias %615, %616, %617 : !torch.vtensor<[2,320,48,48],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,10,2304],f32>
%result0_27, %result1_28 = torch.aten.var_mean.correction %618, %85, %int0, %true : !torch.vtensor<[2,32,10,2304],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%619 = torch.aten.add.Tensor %result0_27, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%620 = torch.aten.rsqrt %619 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%621 = torch.aten.sub.Tensor %618, %result1_28, %int1 : !torch.vtensor<[2,32,10,2304],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,2304],f32>
%622 = torch.aten.mul.Tensor %621, %620 : !torch.vtensor<[2,32,10,2304],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,2304],f32>
%623 = torch.prim.ListConstruct %int2, %int320, %int48, %int48 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%624 = torch.aten.view %622, %623 : !torch.vtensor<[2,32,10,2304],f32>, !torch.list<int> -> !torch.vtensor<[2,320,48,48],f32>
%625 = torch.aten.unsqueeze %48, %int0 : !torch.vtensor<[320],f32>, !torch.int -> !torch.vtensor<[1,320],f32>
%626 = torch.aten.unsqueeze %625, %int2 : !torch.vtensor<[1,320],f32>, !torch.int -> !torch.vtensor<[1,320,1],f32>
%627 = torch.aten.unsqueeze %626, %int3 : !torch.vtensor<[1,320,1],f32>, !torch.int -> !torch.vtensor<[1,320,1,1],f32>
%628 = torch.aten.unsqueeze %48, %int0 : !torch.vtensor<[320],f32>, !torch.int -> !torch.vtensor<[1,320],f32>
%629 = torch.aten.unsqueeze %628, %int2 : !torch.vtensor<[1,320],f32>, !torch.int -> !torch.vtensor<[1,320,1],f32>
%630 = torch.aten.unsqueeze %629, %int3 : !torch.vtensor<[1,320,1],f32>, !torch.int -> !torch.vtensor<[1,320,1,1],f32>
%631 = torch.aten.mul.Tensor %624, %630 : !torch.vtensor<[2,320,48,48],f32>, !torch.vtensor<[1,320,1,1],f32> -> !torch.vtensor<[2,320,48,48],f32>
%632 = torch.aten.add.Tensor %631, %627, %int1 : !torch.vtensor<[2,320,48,48],f32>, !torch.vtensor<[1,320,1,1],f32>, !torch.int -> !torch.vtensor<[2,320,48,48],f32>
%633 = torch.aten._to_copy %632, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,320,48,48],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,48,48],f16>
%634 = torch.aten.silu %633 : !torch.vtensor<[2,320,48,48],f16> -> !torch.vtensor<[2,320,48,48],f16>
%635 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
%636 = torch.aten._to_copy %7, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,320,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,320,3,3],f16>
%637 = torch.aten._convolution %634, %636, %635, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,48,48],f16>, !torch.vtensor<[640,320,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,48,48],f16>
%638 = torch.aten.silu %74 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%639 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
%640 = torch.aten._to_copy %28, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,1280],f16>
%641 = torch.aten.t %640 : !torch.vtensor<[640,1280],f16> -> !torch.vtensor<[1280,640],f16>
%642 = torch.aten.addmm %639, %638, %641, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
%643 = torch.aten.unsqueeze %642, %int2 : !torch.vtensor<[2,640],f16>, !torch.int -> !torch.vtensor<[2,640,1],f16>
%644 = torch.aten.unsqueeze %643, %int3 : !torch.vtensor<[2,640,1],f16>, !torch.int -> !torch.vtensor<[2,640,1,1],f16>
%645 = torch.aten.add.Tensor %637, %644, %int1 : !torch.vtensor<[2,640,48,48],f16>, !torch.vtensor<[2,640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,48,48],f16>
%646 = torch.aten._to_copy %645, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,640,48,48],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,48,48],f32>
%647 = torch.prim.ListConstruct %int2, %int32, %int20, %int2304 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%648 = torch.prim.ListConstruct %int1474560, %int46080, %int2304, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%649 = torch.aten._reshape_alias %646, %647, %648 : !torch.vtensor<[2,640,48,48],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,20,2304],f32>
%result0_29, %result1_30 = torch.aten.var_mean.correction %649, %85, %int0, %true : !torch.vtensor<[2,32,20,2304],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%650 = torch.aten.add.Tensor %result0_29, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%651 = torch.aten.rsqrt %650 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%652 = torch.aten.sub.Tensor %649, %result1_30, %int1 : !torch.vtensor<[2,32,20,2304],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,2304],f32>
%653 = torch.aten.mul.Tensor %652, %651 : !torch.vtensor<[2,32,20,2304],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,2304],f32>
%654 = torch.prim.ListConstruct %int2, %int640, %int48, %int48 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%655 = torch.aten.view %653, %654 : !torch.vtensor<[2,32,20,2304],f32>, !torch.list<int> -> !torch.vtensor<[2,640,48,48],f32>
%656 = torch.aten.unsqueeze %39, %int0 : !torch.vtensor<[640],f32>, !torch.int -> !torch.vtensor<[1,640],f32>
%657 = torch.aten.unsqueeze %656, %int2 : !torch.vtensor<[1,640],f32>, !torch.int -> !torch.vtensor<[1,640,1],f32>
%658 = torch.aten.unsqueeze %657, %int3 : !torch.vtensor<[1,640,1],f32>, !torch.int -> !torch.vtensor<[1,640,1,1],f32>
%659 = torch.aten.unsqueeze %39, %int0 : !torch.vtensor<[640],f32>, !torch.int -> !torch.vtensor<[1,640],f32>
%660 = torch.aten.unsqueeze %659, %int2 : !torch.vtensor<[1,640],f32>, !torch.int -> !torch.vtensor<[1,640,1],f32>
%661 = torch.aten.unsqueeze %660, %int3 : !torch.vtensor<[1,640,1],f32>, !torch.int -> !torch.vtensor<[1,640,1,1],f32>
%662 = torch.aten.mul.Tensor %655, %661 : !torch.vtensor<[2,640,48,48],f32>, !torch.vtensor<[1,640,1,1],f32> -> !torch.vtensor<[2,640,48,48],f32>
%663 = torch.aten.add.Tensor %662, %658, %int1 : !torch.vtensor<[2,640,48,48],f32>, !torch.vtensor<[1,640,1,1],f32>, !torch.int -> !torch.vtensor<[2,640,48,48],f32>
%664 = torch.aten._to_copy %663, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,640,48,48],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,48,48],f16>
%665 = torch.aten.silu %664 : !torch.vtensor<[2,640,48,48],f16> -> !torch.vtensor<[2,640,48,48],f16>
%666 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
%667 = torch.aten._to_copy %35, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640,3,3],f16>
%668 = torch.aten._convolution %665, %667, %666, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,48,48],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,48,48],f16>
%669 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
%670 = torch.aten._to_copy %8, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,320,1,1],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,320,1,1],f16>
%671 = torch.aten._convolution %614, %670, %669, %78, %79, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,48,48],f16>, !torch.vtensor<[640,320,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,48,48],f16>
%672 = torch.aten.add.Tensor %671, %668, %int1 : !torch.vtensor<[2,640,48,48],f16>, !torch.vtensor<[2,640,48,48],f16>, !torch.int -> !torch.vtensor<[2,640,48,48],f16>
%673 = torch.aten._to_copy %672, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,640,48,48],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,48,48],f32>
%674 = torch.aten._reshape_alias %673, %647, %648 : !torch.vtensor<[2,640,48,48],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,20,2304],f32>
%result0_31, %result1_32 = torch.aten.var_mean.correction %674, %85, %int0, %true : !torch.vtensor<[2,32,20,2304],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%675 = torch.aten.add.Tensor %result0_31, %1, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%676 = torch.aten.rsqrt %675 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%677 = torch.aten.sub.Tensor %674, %result1_32, %int1 : !torch.vtensor<[2,32,20,2304],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,2304],f32>
%678 = torch.aten.mul.Tensor %677, %676 : !torch.vtensor<[2,32,20,2304],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,2304],f32>
%679 = torch.aten.view %678, %654 : !torch.vtensor<[2,32,20,2304],f32>, !torch.list<int> -> !torch.vtensor<[2,640,48,48],f32>
%680 = torch.aten.unsqueeze %39, %int0 : !torch.vtensor<[640],f32>, !torch.int -> !torch.vtensor<[1,640],f32>
%681 = torch.aten.unsqueeze %680, %int2 : !torch.vtensor<[1,640],f32>, !torch.int -> !torch.vtensor<[1,640,1],f32>
%682 = torch.aten.unsqueeze %681, %int3 : !torch.vtensor<[1,640,1],f32>, !torch.int -> !torch.vtensor<[1,640,1,1],f32>
%683 = torch.aten.unsqueeze %39, %int0 : !torch.vtensor<[640],f32>, !torch.int -> !torch.vtensor<[1,640],f32>
%684 = torch.aten.unsqueeze %683, %int2 : !torch.vtensor<[1,640],f32>, !torch.int -> !torch.vtensor<[1,640,1],f32>
%685 = torch.aten.unsqueeze %684, %int3 : !torch.vtensor<[1,640,1],f32>, !torch.int -> !torch.vtensor<[1,640,1,1],f32>
%686 = torch.aten.mul.Tensor %679, %685 : !torch.vtensor<[2,640,48,48],f32>, !torch.vtensor<[1,640,1,1],f32> -> !torch.vtensor<[2,640,48,48],f32>
%687 = torch.aten.add.Tensor %686, %682, %int1 : !torch.vtensor<[2,640,48,48],f32>, !torch.vtensor<[1,640,1,1],f32>, !torch.int -> !torch.vtensor<[2,640,48,48],f32>
%688 = torch.prim.ListConstruct %int1474560, %int2304, %int48, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%689 = torch.aten._reshape_alias %687, %654, %688 : !torch.vtensor<[2,640,48,48],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,640,48,48],f32>
%690 = torch.aten.permute %689, %151 : !torch.vtensor<[2,640,48,48],f32>, !torch.list<int> -> !torch.vtensor<[2,48,48,640],f32>
%691 = torch.prim.ListConstruct %int2, %int2304, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%692 = torch.prim.ListConstruct %int1474560, %int1, %int2304 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%693 = torch.aten._reshape_alias %690, %691, %692 : !torch.vtensor<[2,48,48,640],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f32>
%694 = torch.aten.clone %693, %int0 : !torch.vtensor<[2,2304,640],f32>, !torch.int -> !torch.vtensor<[2,2304,640],f32>
%695 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
%696 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
%697 = torch.aten._to_copy %694, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
%698 = torch.aten.t %696 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%699 = torch.prim.ListConstruct %int4608, %int640 : (!torch.int, !torch.int) -> !torch.list<int>
%700 = torch.aten.view %697, %699 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
%701 = torch.aten.addmm %695, %700, %698, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,640],f16>
%702 = torch.aten.view %701, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
%703 = torch.aten._to_copy %702, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f32>
%704 = torch.prim.ListConstruct %int640 : (!torch.int) -> !torch.list<int>
%result0_33, %result1_34, %result2_35 = torch.aten.native_layer_norm %703, %704, %39, %39, %float1.000000e-05 : !torch.vtensor<[2,2304,640],f32>, !torch.list<int>, !torch.vtensor<[640],f32>, !torch.vtensor<[640],f32>, !torch.float -> !torch.vtensor<[2,2304,640],f32>, !torch.vtensor<[2,2304,1],f32>, !torch.vtensor<[2,2304,1],f32>
%705 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
%706 = torch.aten._to_copy %result0_33, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
%707 = torch.aten.t %705 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%708 = torch.prim.ListConstruct %int640, %int1 : (!torch.int, !torch.int) -> !torch.list<int>
%709 = torch.aten._reshape_alias %706, %699, %708 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
%710 = torch.aten.mm %709, %707 : !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[4608,640],f16>
%711 = torch.aten._unsafe_view %710, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
%712 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
%713 = torch.aten._to_copy %result0_33, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
%714 = torch.aten.t %712 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%715 = torch.aten._reshape_alias %713, %699, %708 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
%716 = torch.aten.mm %715, %714 : !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[4608,640],f16>
%717 = torch.aten._unsafe_view %716, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
%718 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
%719 = torch.aten._to_copy %result0_33, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
%720 = torch.aten.t %718 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%721 = torch.aten._reshape_alias %719, %699, %708 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
%722 = torch.aten.mm %721, %720 : !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[4608,640],f16>
%723 = torch.aten._unsafe_view %722, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
%724 = torch.prim.ListConstruct %int2, %int2304, %int10, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%725 = torch.prim.ListConstruct %int1474560, %int640, %int64, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%726 = torch.aten._reshape_alias %711, %724, %725 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
%727 = torch.aten.permute %726, %189 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
%728 = torch.aten.clone %727, %int0 : !torch.vtensor<[2,10,2304,64],f16>, !torch.int -> !torch.vtensor<[2,10,2304,64],f16>
%729 = torch.prim.ListConstruct %int20, %int2304, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%730 = torch.aten._unsafe_view %728, %729 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
%731 = torch.aten._reshape_alias %717, %724, %725 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
%732 = torch.aten.permute %731, %189 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
%733 = torch.aten.clone %732, %int0 : !torch.vtensor<[2,10,2304,64],f16>, !torch.int -> !torch.vtensor<[2,10,2304,64],f16>
%734 = torch.aten._unsafe_view %733, %729 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
%735 = torch.aten._reshape_alias %723, %724, %725 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
%736 = torch.aten.permute %735, %189 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
%737 = torch.aten.clone %736, %int0 : !torch.vtensor<[2,10,2304,64],f16>, !torch.int -> !torch.vtensor<[2,10,2304,64],f16>
%738 = torch.aten._unsafe_view %737, %729 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
%739 = torch.aten.unsqueeze %730, %int3 : !torch.vtensor<[20,2304,64],f16>, !torch.int -> !torch.vtensor<[20,2304,64,1],f16>
%740 = torch.aten.permute %739, %203 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,64],f16>
%741 = torch.aten.unsqueeze %734, %int3 : !torch.vtensor<[20,2304,64],f16>, !torch.int -> !torch.vtensor<[20,2304,64,1],f16>
%742 = torch.aten.permute %741, %206 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,1,2304,64],f16>
%743 = torch.aten.permute %740, %203 : !torch.vtensor<[20,2304,1,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64,1],f16>
%744 = torch.prim.ListConstruct %int147456, %int64, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%745 = torch.aten._reshape_alias %743, %729, %744 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
%746 = torch.aten.permute %742, %211 : !torch.vtensor<[20,1,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,64,2304,1],f16>
%747 = torch.prim.ListConstruct %int20, %int64, %int2304 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%748 = torch.prim.ListConstruct %int147456, %int1, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%749 = torch.aten._reshape_alias %746, %747, %748 : !torch.vtensor<[20,64,2304,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,64,2304],f16>
%750 = torch.aten.bmm %745, %749 : !torch.vtensor<[20,2304,64],f16>, !torch.vtensor<[20,64,2304],f16> -> !torch.vtensor<[20,2304,2304],f16>
%751 = torch.prim.ListConstruct %int20, %int2304, %int1, %int2304 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%752 = torch.aten.view %750, %751 : !torch.vtensor<[20,2304,2304],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,2304],f16>
%753 = torch.aten.permute %752, %203 : !torch.vtensor<[20,2304,1,2304],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,2304,1],f16>
%754 = torch.prim.ListConstruct %int20, %int2304, %int2304 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%755 = torch.aten.view %753, %754 : !torch.vtensor<[20,2304,2304,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,2304],f16>
%756 = torch.aten.mul.Tensor %755, %0 : !torch.vtensor<[20,2304,2304],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[20,2304,2304],f16>
%757 = torch.aten._softmax %756, %int-1, %true : !torch.vtensor<[20,2304,2304],f16>, !torch.int, !torch.bool -> !torch.vtensor<[20,2304,2304],f32>
%758 = torch.aten._to_copy %757, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[20,2304,2304],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[20,2304,2304],f16>
%759 = torch.aten.unsqueeze %758, %int3 : !torch.vtensor<[20,2304,2304],f16>, !torch.int -> !torch.vtensor<[20,2304,2304,1],f16>
%760 = torch.aten.permute %759, %203 : !torch.vtensor<[20,2304,2304,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,2304],f16>
%761 = torch.aten.unsqueeze %738, %int3 : !torch.vtensor<[20,2304,64],f16>, !torch.int -> !torch.vtensor<[20,2304,64,1],f16>
%762 = torch.aten.permute %761, %211 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,1,64,2304],f16>
%763 = torch.aten.permute %760, %203 : !torch.vtensor<[20,2304,1,2304],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,2304,1],f16>
%764 = torch.prim.ListConstruct %int5308416, %int2304, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%765 = torch.aten._reshape_alias %763, %754, %764 : !torch.vtensor<[20,2304,2304,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,2304,2304],f16>
%766 = torch.aten.permute %762, %211 : !torch.vtensor<[20,1,64,2304],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64,1],f16>
%767 = torch.aten._reshape_alias %766, %729, %744 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
%768 = torch.aten.bmm %765, %767 : !torch.vtensor<[20,2304,2304],f16>, !torch.vtensor<[20,2304,64],f16> -> !torch.vtensor<[20,2304,64],f16>
%769 = torch.prim.ListConstruct %int20, %int2304, %int1, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%770 = torch.aten.view %768, %769 : !torch.vtensor<[20,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,64],f16>
%771 = torch.aten.permute %770, %203 : !torch.vtensor<[20,2304,1,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64,1],f16>
%772 = torch.aten.view %771, %729 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
%773 = torch.prim.ListConstruct %int2, %int10, %int2304, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%774 = torch.prim.ListConstruct %int1474560, %int147456, %int64, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%775 = torch.aten._reshape_alias %772, %773, %774 : !torch.vtensor<[20,2304,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
%776 = torch.aten.permute %775, %189 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
%777 = torch.aten.clone %776, %int0 : !torch.vtensor<[2,2304,10,64],f16>, !torch.int -> !torch.vtensor<[2,2304,10,64],f16>
%778 = torch.aten._unsafe_view %777, %691 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
%779 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
%780 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
%781 = torch.aten.t %780 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%782 = torch.aten.view %778, %699 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
%783 = torch.aten.addmm %779, %782, %781, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,640],f16>
%784 = torch.aten.view %783, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
%785 = torch.aten.add.Tensor %784, %702, %int1 : !torch.vtensor<[2,2304,640],f16>, !torch.vtensor<[2,2304,640],f16>, !torch.int -> !torch.vtensor<[2,2304,640],f16>
%786 = torch.aten._to_copy %785, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f32>
%result0_36, %result1_37, %result2_38 = torch.aten.native_layer_norm %786, %704, %39, %39, %float1.000000e-05 : !torch.vtensor<[2,2304,640],f32>, !torch.list<int>, !torch.vtensor<[640],f32>, !torch.vtensor<[640],f32>, !torch.float -> !torch.vtensor<[2,2304,640],f32>, !torch.vtensor<[2,2304,1],f32>, !torch.vtensor<[2,2304,1],f32>
%787 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
%788 = torch.aten._to_copy %result0_36, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
%789 = torch.aten.t %787 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%790 = torch.aten._reshape_alias %788, %699, %708 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
%791 = torch.aten.mm %790, %789 : !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[4608,640],f16>
%792 = torch.aten._unsafe_view %791, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
%793 = torch.aten._to_copy %30, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,1024],f16>
%794 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
%795 = torch.aten.t %793 : !torch.vtensor<[640,1024],f16> -> !torch.vtensor<[1024,640],f16>
%796 = torch.aten._reshape_alias %794, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
%797 = torch.aten.mm %796, %795 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,640],f16> -> !torch.vtensor<[154,640],f16>
%798 = torch.prim.ListConstruct %int2, %int77, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%799 = torch.aten._unsafe_view %797, %798 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
%800 = torch.aten._to_copy %30, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,1024],f16>
%801 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
%802 = torch.aten.t %800 : !torch.vtensor<[640,1024],f16> -> !torch.vtensor<[1024,640],f16>
%803 = torch.aten._reshape_alias %801, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
%804 = torch.aten.mm %803, %802 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,640],f16> -> !torch.vtensor<[154,640],f16>
%805 = torch.aten._unsafe_view %804, %798 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
%806 = torch.aten._reshape_alias %792, %724, %725 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
%807 = torch.aten.permute %806, %189 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
%808 = torch.aten.clone %807, %int0 : !torch.vtensor<[2,10,2304,64],f16>, !torch.int -> !torch.vtensor<[2,10,2304,64],f16>
%809 = torch.aten._unsafe_view %808, %729 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
%810 = torch.prim.ListConstruct %int2, %int77, %int10, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%811 = torch.prim.ListConstruct %int49280, %int640, %int64, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%812 = torch.aten._reshape_alias %799, %810, %811 : !torch.vtensor<[2,77,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,10,64],f16>
%813 = torch.aten.permute %812, %189 : !torch.vtensor<[2,77,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,77,64],f16>
%814 = torch.aten.clone %813, %int0 : !torch.vtensor<[2,10,77,64],f16>, !torch.int -> !torch.vtensor<[2,10,77,64],f16>
%815 = torch.prim.ListConstruct %int20, %int77, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%816 = torch.aten._unsafe_view %814, %815 : !torch.vtensor<[2,10,77,64],f16>, !torch.list<int> -> !torch.vtensor<[20,77,64],f16>
%817 = torch.aten._reshape_alias %805, %810, %811 : !torch.vtensor<[2,77,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,10,64],f16>
%818 = torch.aten.permute %817, %189 : !torch.vtensor<[2,77,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,77,64],f16>
%819 = torch.aten.clone %818, %int0 : !torch.vtensor<[2,10,77,64],f16>, !torch.int -> !torch.vtensor<[2,10,77,64],f16>
%820 = torch.aten._unsafe_view %819, %815 : !torch.vtensor<[2,10,77,64],f16>, !torch.list<int> -> !torch.vtensor<[20,77,64],f16>
%821 = torch.aten.unsqueeze %809, %int3 : !torch.vtensor<[20,2304,64],f16>, !torch.int -> !torch.vtensor<[20,2304,64,1],f16>
%822 = torch.aten.permute %821, %203 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,64],f16>
%823 = torch.aten.unsqueeze %816, %int3 : !torch.vtensor<[20,77,64],f16>, !torch.int -> !torch.vtensor<[20,77,64,1],f16>
%824 = torch.aten.permute %823, %206 : !torch.vtensor<[20,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,1,77,64],f16>
%825 = torch.aten.permute %822, %203 : !torch.vtensor<[20,2304,1,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64,1],f16>
%826 = torch.aten._reshape_alias %825, %729, %744 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
%827 = torch.aten.permute %824, %211 : !torch.vtensor<[20,1,77,64],f16>, !torch.list<int> -> !torch.vtensor<[20,64,77,1],f16>
%828 = torch.prim.ListConstruct %int20, %int64, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%829 = torch.aten._reshape_alias %827, %828, %297 : !torch.vtensor<[20,64,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,64,77],f16>
%830 = torch.aten.bmm %826, %829 : !torch.vtensor<[20,2304,64],f16>, !torch.vtensor<[20,64,77],f16> -> !torch.vtensor<[20,2304,77],f16>
%831 = torch.prim.ListConstruct %int20, %int2304, %int1, %int77 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%832 = torch.aten.view %830, %831 : !torch.vtensor<[20,2304,77],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,77],f16>
%833 = torch.aten.permute %832, %203 : !torch.vtensor<[20,2304,1,77],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,77,1],f16>
%834 = torch.prim.ListConstruct %int20, %int2304, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%835 = torch.aten.view %833, %834 : !torch.vtensor<[20,2304,77,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,77],f16>
%836 = torch.aten.mul.Tensor %835, %0 : !torch.vtensor<[20,2304,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[20,2304,77],f16>
%837 = torch.aten._softmax %836, %int-1, %true : !torch.vtensor<[20,2304,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[20,2304,77],f32>
%838 = torch.aten._to_copy %837, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[20,2304,77],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[20,2304,77],f16>
%839 = torch.aten.unsqueeze %838, %int3 : !torch.vtensor<[20,2304,77],f16>, !torch.int -> !torch.vtensor<[20,2304,77,1],f16>
%840 = torch.aten.permute %839, %203 : !torch.vtensor<[20,2304,77,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,77],f16>
%841 = torch.aten.unsqueeze %820, %int3 : !torch.vtensor<[20,77,64],f16>, !torch.int -> !torch.vtensor<[20,77,64,1],f16>
%842 = torch.aten.permute %841, %211 : !torch.vtensor<[20,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,1,64,77],f16>
%843 = torch.aten.permute %840, %203 : !torch.vtensor<[20,2304,1,77],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,77,1],f16>
%844 = torch.prim.ListConstruct %int177408, %int77, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%845 = torch.aten._reshape_alias %843, %834, %844 : !torch.vtensor<[20,2304,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,2304,77],f16>
%846 = torch.aten.permute %842, %211 : !torch.vtensor<[20,1,64,77],f16>, !torch.list<int> -> !torch.vtensor<[20,77,64,1],f16>
%847 = torch.aten._reshape_alias %846, %815, %316 : !torch.vtensor<[20,77,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,77,64],f16>
%848 = torch.aten.bmm %845, %847 : !torch.vtensor<[20,2304,77],f16>, !torch.vtensor<[20,77,64],f16> -> !torch.vtensor<[20,2304,64],f16>
%849 = torch.aten.view %848, %769 : !torch.vtensor<[20,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,64],f16>
%850 = torch.aten.permute %849, %203 : !torch.vtensor<[20,2304,1,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64,1],f16>
%851 = torch.aten.view %850, %729 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
%852 = torch.aten._reshape_alias %851, %773, %774 : !torch.vtensor<[20,2304,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
%853 = torch.aten.permute %852, %189 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
%854 = torch.aten.clone %853, %int0 : !torch.vtensor<[2,2304,10,64],f16>, !torch.int -> !torch.vtensor<[2,2304,10,64],f16>
%855 = torch.aten._unsafe_view %854, %691 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
%856 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
%857 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
%858 = torch.aten.t %857 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%859 = torch.aten.view %855, %699 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
%860 = torch.aten.addmm %856, %859, %858, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,640],f16>
%861 = torch.aten.view %860, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
%862 = torch.aten.add.Tensor %861, %785, %int1 : !torch.vtensor<[2,2304,640],f16>, !torch.vtensor<[2,2304,640],f16>, !torch.int -> !torch.vtensor<[2,2304,640],f16>
%863 = torch.aten._to_copy %862, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f32>
%result0_39, %result1_40, %result2_41 = torch.aten.native_layer_norm %863, %704, %39, %39, %float1.000000e-05 : !torch.vtensor<[2,2304,640],f32>, !torch.list<int>, !torch.vtensor<[640],f32>, !torch.vtensor<[640],f32>, !torch.float -> !torch.vtensor<[2,2304,640],f32>, !torch.vtensor<[2,2304,1],f32>, !torch.vtensor<[2,2304,1],f32>
%864 = torch.aten._to_copy %31, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[5120],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[5120],f16>
%865 = torch.aten._to_copy %32, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[5120,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[5120,640],f16>
%866 = torch.aten._to_copy %result0_39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
%867 = torch.aten.t %865 : !torch.vtensor<[5120,640],f16> -> !torch.vtensor<[640,5120],f16>
%868 = torch.aten.view %866, %699 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
%869 = torch.aten.addmm %864, %868, %867, %int1, %int1 : !torch.vtensor<[5120],f16>, !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,5120],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,5120],f16>
%870 = torch.prim.ListConstruct %int2, %int2304, %int5120 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%871 = torch.aten.view %869, %870 : !torch.vtensor<[4608,5120],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,5120],f16>
%872 = torch.aten.slice.Tensor %871, %int-1, %int0, %int2560, %int1 : !torch.vtensor<[2,2304,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,2304,2560],f16>
%873 = torch.aten.slice.Tensor %871, %int-1, %int2560, %int5120, %int1 : !torch.vtensor<[2,2304,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,2304,2560],f16>
%874 = torch.aten.gelu %873, %str : !torch.vtensor<[2,2304,2560],f16>, !torch.str -> !torch.vtensor<[2,2304,2560],f16>
%875 = torch.aten.mul.Tensor %872, %874 : !torch.vtensor<[2,2304,2560],f16>, !torch.vtensor<[2,2304,2560],f16> -> !torch.vtensor<[2,2304,2560],f16>
%876 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
%877 = torch.aten._to_copy %33, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,2560],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,2560],f16>
%878 = torch.aten.t %877 : !torch.vtensor<[640,2560],f16> -> !torch.vtensor<[2560,640],f16>
%879 = torch.prim.ListConstruct %int4608, %int2560 : (!torch.int, !torch.int) -> !torch.list<int>
%880 = torch.aten.view %875, %879 : !torch.vtensor<[2,2304,2560],f16>, !torch.list<int> -> !torch.vtensor<[4608,2560],f16>
%881 = torch.aten.addmm %876, %880, %878, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[4608,2560],f16>, !torch.vtensor<[2560,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,640],f16>
%882 = torch.aten.view %881, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
%883 = torch.aten.add.Tensor %882, %862, %int1 : !torch.vtensor<[2,2304,640],f16>, !torch.vtensor<[2,2304,640],f16>, !torch.int -> !torch.vtensor<[2,2304,640],f16>
%884 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
%885 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
%886 = torch.aten.t %885 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%887 = torch.aten.view %883, %699 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
%888 = torch.aten.addmm %884, %887, %886, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,640],f16>
%889 = torch.aten.view %888, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
%890 = torch.prim.ListConstruct %int2, %int48, %int48, %int640 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%891 = torch.prim.ListConstruct %int1474560, %int30720, %int640, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%892 = torch.aten._reshape_alias %889, %890, %891 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,48,48,640],f16>
%893 = torch.aten.permute %892, %206 : !torch.vtensor<[2,48,48,640],f16>, !torch.list<int> -> !torch.vtensor<[2,640,48,48],f16>
%894 = torch.prim.ListConstruct %int1474560, %int1, %int30720, %int640 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%895 = torch.aten._reshape_alias %893, %654, %894 : !torch.vtensor<[2,640,48,48],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,640,48,48],f16>
%896 = torch.aten.clone %895, %int0 : !torch.vtensor<[2,640,48,48],f16>, !torch.int -> !torch.vtensor<[2,640,48,48],f16>
%897 = torch.aten.add.Tensor %896, %672, %int1 : !torch.vtensor<[2,640,48,48],f16>, !torch.vtensor<[2,640,48,48],f16>, !torch.int -> !torch.vtensor<[2,640,48,48],f16>
%898 = torch.aten._to_copy %897, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,640,48,48],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,48,48],f32>
%899 = torch.aten._reshape_alias %898, %647, %648 : !torch.vtensor<[2,640,48,48],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,20,2304],f32>
%result0_42, %result1_43 = torch.aten.var_mean.correction %899, %85, %int0, %true : !torch.vtensor<[2,32,20,2304],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%900 = torch.aten.add.Tensor %result0_42, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%901 = torch.aten.rsqrt %900 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%902 = torch.aten.sub.Tensor %899, %result1_43, %int1 : !torch.vtensor<[2,32,20,2304],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,2304],f32>
%903 = torch.aten.mul.Tensor %902, %901 : !torch.vtensor<[2,32,20,2304],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,2304],f32>
%904 = torch.aten.view %903, %654 : !torch.vtensor<[2,32,20,2304],f32>, !torch.list<int> -> !torch.vtensor<[2,640,48,48],f32>
%905 = torch.aten.unsqueeze %39, %int0 : !torch.vtensor<[640],f32>, !torch.int -> !torch.vtensor<[1,640],f32>
%906 = torch.aten.unsqueeze %905, %int2 : !torch.vtensor<[1,640],f32>, !torch.int -> !torch.vtensor<[1,640,1],f32>
%907 = torch.aten.unsqueeze %906, %int3 : !torch.vtensor<[1,640,1],f32>, !torch.int -> !torch.vtensor<[1,640,1,1],f32>
%908 = torch.aten.unsqueeze %39, %int0 : !torch.vtensor<[640],f32>, !torch.int -> !torch.vtensor<[1,640],f32>
%909 = torch.aten.unsqueeze %908, %int2 : !torch.vtensor<[1,640],f32>, !torch.int -> !torch.vtensor<[1,640,1],f32>
%910 = torch.aten.unsqueeze %909, %int3 : !torch.vtensor<[1,640,1],f32>, !torch.int -> !torch.vtensor<[1,640,1,1],f32>
%911 = torch.aten.mul.Tensor %904, %910 : !torch.vtensor<[2,640,48,48],f32>, !torch.vtensor<[1,640,1,1],f32> -> !torch.vtensor<[2,640,48,48],f32>
%912 = torch.aten.add.Tensor %911, %907, %int1 : !torch.vtensor<[2,640,48,48],f32>, !torch.vtensor<[1,640,1,1],f32>, !torch.int -> !torch.vtensor<[2,640,48,48],f32>
%913 = torch.aten._to_copy %912, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,640,48,48],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,48,48],f16>
%914 = torch.aten.silu %913 : !torch.vtensor<[2,640,48,48],f16> -> !torch.vtensor<[2,640,48,48],f16>
%915 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
%916 = torch.aten._to_copy %35, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640,3,3],f16>
%917 = torch.aten._convolution %914, %916, %915, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,48,48],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,48,48],f16>
%918 = torch.aten.silu %74 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%919 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
%920 = torch.aten._to_copy %28, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,1280],f16>
%921 = torch.aten.t %920 : !torch.vtensor<[640,1280],f16> -> !torch.vtensor<[1280,640],f16>
%922 = torch.aten.addmm %919, %918, %921, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
%923 = torch.aten.unsqueeze %922, %int2 : !torch.vtensor<[2,640],f16>, !torch.int -> !torch.vtensor<[2,640,1],f16>
%924 = torch.aten.unsqueeze %923, %int3 : !torch.vtensor<[2,640,1],f16>, !torch.int -> !torch.vtensor<[2,640,1,1],f16>
%925 = torch.aten.add.Tensor %917, %924, %int1 : !torch.vtensor<[2,640,48,48],f16>, !torch.vtensor<[2,640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,48,48],f16>
%926 = torch.aten._to_copy %925, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,640,48,48],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,48,48],f32>
%927 = torch.aten._reshape_alias %926, %647, %648 : !torch.vtensor<[2,640,48,48],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,20,2304],f32>
%result0_44, %result1_45 = torch.aten.var_mean.correction %927, %85, %int0, %true : !torch.vtensor<[2,32,20,2304],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%928 = torch.aten.add.Tensor %result0_44, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%929 = torch.aten.rsqrt %928 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%930 = torch.aten.sub.Tensor %927, %result1_45, %int1 : !torch.vtensor<[2,32,20,2304],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,2304],f32>
%931 = torch.aten.mul.Tensor %930, %929 : !torch.vtensor<[2,32,20,2304],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,2304],f32>
%932 = torch.aten.view %931, %654 : !torch.vtensor<[2,32,20,2304],f32>, !torch.list<int> -> !torch.vtensor<[2,640,48,48],f32>
%933 = torch.aten.unsqueeze %39, %int0 : !torch.vtensor<[640],f32>, !torch.int -> !torch.vtensor<[1,640],f32>
%934 = torch.aten.unsqueeze %933, %int2 : !torch.vtensor<[1,640],f32>, !torch.int -> !torch.vtensor<[1,640,1],f32>
%935 = torch.aten.unsqueeze %934, %int3 : !torch.vtensor<[1,640,1],f32>, !torch.int -> !torch.vtensor<[1,640,1,1],f32>
%936 = torch.aten.unsqueeze %39, %int0 : !torch.vtensor<[640],f32>, !torch.int -> !torch.vtensor<[1,640],f32>
%937 = torch.aten.unsqueeze %936, %int2 : !torch.vtensor<[1,640],f32>, !torch.int -> !torch.vtensor<[1,640,1],f32>
%938 = torch.aten.unsqueeze %937, %int3 : !torch.vtensor<[1,640,1],f32>, !torch.int -> !torch.vtensor<[1,640,1,1],f32>
%939 = torch.aten.mul.Tensor %932, %938 : !torch.vtensor<[2,640,48,48],f32>, !torch.vtensor<[1,640,1,1],f32> -> !torch.vtensor<[2,640,48,48],f32>
%940 = torch.aten.add.Tensor %939, %935, %int1 : !torch.vtensor<[2,640,48,48],f32>, !torch.vtensor<[1,640,1,1],f32>, !torch.int -> !torch.vtensor<[2,640,48,48],f32>
%941 = torch.aten._to_copy %940, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,640,48,48],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,48,48],f16>
%942 = torch.aten.silu %941 : !torch.vtensor<[2,640,48,48],f16> -> !torch.vtensor<[2,640,48,48],f16>
%943 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
%944 = torch.aten._to_copy %35, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640,3,3],f16>
%945 = torch.aten._convolution %942, %944, %943, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,48,48],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,48,48],f16>
%946 = torch.aten.add.Tensor %897, %945, %int1 : !torch.vtensor<[2,640,48,48],f16>, !torch.vtensor<[2,640,48,48],f16>, !torch.int -> !torch.vtensor<[2,640,48,48],f16>
%947 = torch.aten._to_copy %946, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,640,48,48],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,48,48],f32>
%948 = torch.aten._reshape_alias %947, %647, %648 : !torch.vtensor<[2,640,48,48],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,20,2304],f32>
%result0_46, %result1_47 = torch.aten.var_mean.correction %948, %85, %int0, %true : !torch.vtensor<[2,32,20,2304],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%949 = torch.aten.add.Tensor %result0_46, %1, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%950 = torch.aten.rsqrt %949 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%951 = torch.aten.sub.Tensor %948, %result1_47, %int1 : !torch.vtensor<[2,32,20,2304],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,2304],f32>
%952 = torch.aten.mul.Tensor %951, %950 : !torch.vtensor<[2,32,20,2304],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,2304],f32>
%953 = torch.aten.view %952, %654 : !torch.vtensor<[2,32,20,2304],f32>, !torch.list<int> -> !torch.vtensor<[2,640,48,48],f32>
%954 = torch.aten.unsqueeze %39, %int0 : !torch.vtensor<[640],f32>, !torch.int -> !torch.vtensor<[1,640],f32>
%955 = torch.aten.unsqueeze %954, %int2 : !torch.vtensor<[1,640],f32>, !torch.int -> !torch.vtensor<[1,640,1],f32>
%956 = torch.aten.unsqueeze %955, %int3 : !torch.vtensor<[1,640,1],f32>, !torch.int -> !torch.vtensor<[1,640,1,1],f32>
%957 = torch.aten.unsqueeze %39, %int0 : !torch.vtensor<[640],f32>, !torch.int -> !torch.vtensor<[1,640],f32>
%958 = torch.aten.unsqueeze %957, %int2 : !torch.vtensor<[1,640],f32>, !torch.int -> !torch.vtensor<[1,640,1],f32>
%959 = torch.aten.unsqueeze %958, %int3 : !torch.vtensor<[1,640,1],f32>, !torch.int -> !torch.vtensor<[1,640,1,1],f32>
%960 = torch.aten.mul.Tensor %953, %959 : !torch.vtensor<[2,640,48,48],f32>, !torch.vtensor<[1,640,1,1],f32> -> !torch.vtensor<[2,640,48,48],f32>
%961 = torch.aten.add.Tensor %960, %956, %int1 : !torch.vtensor<[2,640,48,48],f32>, !torch.vtensor<[1,640,1,1],f32>, !torch.int -> !torch.vtensor<[2,640,48,48],f32>
%962 = torch.aten._reshape_alias %961, %654, %688 : !torch.vtensor<[2,640,48,48],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,640,48,48],f32>
%963 = torch.aten.permute %962, %151 : !torch.vtensor<[2,640,48,48],f32>, !torch.list<int> -> !torch.vtensor<[2,48,48,640],f32>
%964 = torch.aten._reshape_alias %963, %691, %692 : !torch.vtensor<[2,48,48,640],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f32>
%965 = torch.aten.clone %964, %int0 : !torch.vtensor<[2,2304,640],f32>, !torch.int -> !torch.vtensor<[2,2304,640],f32>
%966 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
%967 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
%968 = torch.aten._to_copy %965, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
%969 = torch.aten.t %967 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%970 = torch.aten.view %968, %699 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
%971 = torch.aten.addmm %966, %970, %969, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,640],f16>
%972 = torch.aten.view %971, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
%973 = torch.aten._to_copy %972, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f32>
%result0_48, %result1_49, %result2_50 = torch.aten.native_layer_norm %973, %704, %39, %39, %float1.000000e-05 : !torch.vtensor<[2,2304,640],f32>, !torch.list<int>, !torch.vtensor<[640],f32>, !torch.vtensor<[640],f32>, !torch.float -> !torch.vtensor<[2,2304,640],f32>, !torch.vtensor<[2,2304,1],f32>, !torch.vtensor<[2,2304,1],f32>
%974 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
%975 = torch.aten._to_copy %result0_48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
%976 = torch.aten.t %974 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%977 = torch.aten._reshape_alias %975, %699, %708 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
%978 = torch.aten.mm %977, %976 : !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[4608,640],f16>
%979 = torch.aten._unsafe_view %978, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
%980 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
%981 = torch.aten._to_copy %result0_48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
%982 = torch.aten.t %980 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%983 = torch.aten._reshape_alias %981, %699, %708 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
%984 = torch.aten.mm %983, %982 : !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[4608,640],f16>
%985 = torch.aten._unsafe_view %984, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
%986 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
%987 = torch.aten._to_copy %result0_48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
%988 = torch.aten.t %986 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%989 = torch.aten._reshape_alias %987, %699, %708 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
%990 = torch.aten.mm %989, %988 : !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[4608,640],f16>
%991 = torch.aten._unsafe_view %990, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
%992 = torch.aten._reshape_alias %979, %724, %725 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
%993 = torch.aten.permute %992, %189 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
%994 = torch.aten.clone %993, %int0 : !torch.vtensor<[2,10,2304,64],f16>, !torch.int -> !torch.vtensor<[2,10,2304,64],f16>
%995 = torch.aten._unsafe_view %994, %729 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
%996 = torch.aten._reshape_alias %985, %724, %725 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
%997 = torch.aten.permute %996, %189 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
%998 = torch.aten.clone %997, %int0 : !torch.vtensor<[2,10,2304,64],f16>, !torch.int -> !torch.vtensor<[2,10,2304,64],f16>
%999 = torch.aten._unsafe_view %998, %729 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
%1000 = torch.aten._reshape_alias %991, %724, %725 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
%1001 = torch.aten.permute %1000, %189 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
%1002 = torch.aten.clone %1001, %int0 : !torch.vtensor<[2,10,2304,64],f16>, !torch.int -> !torch.vtensor<[2,10,2304,64],f16>
%1003 = torch.aten._unsafe_view %1002, %729 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
%1004 = torch.aten.unsqueeze %995, %int3 : !torch.vtensor<[20,2304,64],f16>, !torch.int -> !torch.vtensor<[20,2304,64,1],f16>
%1005 = torch.aten.permute %1004, %203 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,64],f16>
%1006 = torch.aten.unsqueeze %999, %int3 : !torch.vtensor<[20,2304,64],f16>, !torch.int -> !torch.vtensor<[20,2304,64,1],f16>
%1007 = torch.aten.permute %1006, %206 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,1,2304,64],f16>
%1008 = torch.aten.permute %1005, %203 : !torch.vtensor<[20,2304,1,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64,1],f16>
%1009 = torch.aten._reshape_alias %1008, %729, %744 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
%1010 = torch.aten.permute %1007, %211 : !torch.vtensor<[20,1,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,64,2304,1],f16>
%1011 = torch.aten._reshape_alias %1010, %747, %748 : !torch.vtensor<[20,64,2304,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,64,2304],f16>
%1012 = torch.aten.bmm %1009, %1011 : !torch.vtensor<[20,2304,64],f16>, !torch.vtensor<[20,64,2304],f16> -> !torch.vtensor<[20,2304,2304],f16>
%1013 = torch.aten.view %1012, %751 : !torch.vtensor<[20,2304,2304],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,2304],f16>
%1014 = torch.aten.permute %1013, %203 : !torch.vtensor<[20,2304,1,2304],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,2304,1],f16>
%1015 = torch.aten.view %1014, %754 : !torch.vtensor<[20,2304,2304,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,2304],f16>
%1016 = torch.aten.mul.Tensor %1015, %0 : !torch.vtensor<[20,2304,2304],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[20,2304,2304],f16>
%1017 = torch.aten._softmax %1016, %int-1, %true : !torch.vtensor<[20,2304,2304],f16>, !torch.int, !torch.bool -> !torch.vtensor<[20,2304,2304],f32>
%1018 = torch.aten._to_copy %1017, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[20,2304,2304],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[20,2304,2304],f16>
%1019 = torch.aten.unsqueeze %1018, %int3 : !torch.vtensor<[20,2304,2304],f16>, !torch.int -> !torch.vtensor<[20,2304,2304,1],f16>
%1020 = torch.aten.permute %1019, %203 : !torch.vtensor<[20,2304,2304,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,2304],f16>
%1021 = torch.aten.unsqueeze %1003, %int3 : !torch.vtensor<[20,2304,64],f16>, !torch.int -> !torch.vtensor<[20,2304,64,1],f16>
%1022 = torch.aten.permute %1021, %211 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,1,64,2304],f16>
%1023 = torch.aten.permute %1020, %203 : !torch.vtensor<[20,2304,1,2304],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,2304,1],f16>
%1024 = torch.aten._reshape_alias %1023, %754, %764 : !torch.vtensor<[20,2304,2304,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,2304,2304],f16>
%1025 = torch.aten.permute %1022, %211 : !torch.vtensor<[20,1,64,2304],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64,1],f16>
%1026 = torch.aten._reshape_alias %1025, %729, %744 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
%1027 = torch.aten.bmm %1024, %1026 : !torch.vtensor<[20,2304,2304],f16>, !torch.vtensor<[20,2304,64],f16> -> !torch.vtensor<[20,2304,64],f16>
%1028 = torch.aten.view %1027, %769 : !torch.vtensor<[20,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,64],f16>
%1029 = torch.aten.permute %1028, %203 : !torch.vtensor<[20,2304,1,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64,1],f16>
%1030 = torch.aten.view %1029, %729 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
%1031 = torch.aten._reshape_alias %1030, %773, %774 : !torch.vtensor<[20,2304,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
%1032 = torch.aten.permute %1031, %189 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
%1033 = torch.aten.clone %1032, %int0 : !torch.vtensor<[2,2304,10,64],f16>, !torch.int -> !torch.vtensor<[2,2304,10,64],f16>
%1034 = torch.aten._unsafe_view %1033, %691 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
%1035 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
%1036 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
%1037 = torch.aten.t %1036 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%1038 = torch.aten.view %1034, %699 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
%1039 = torch.aten.addmm %1035, %1038, %1037, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,640],f16>
%1040 = torch.aten.view %1039, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
%1041 = torch.aten.add.Tensor %1040, %972, %int1 : !torch.vtensor<[2,2304,640],f16>, !torch.vtensor<[2,2304,640],f16>, !torch.int -> !torch.vtensor<[2,2304,640],f16>
%1042 = torch.aten._to_copy %1041, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f32>
%result0_51, %result1_52, %result2_53 = torch.aten.native_layer_norm %1042, %704, %39, %39, %float1.000000e-05 : !torch.vtensor<[2,2304,640],f32>, !torch.list<int>, !torch.vtensor<[640],f32>, !torch.vtensor<[640],f32>, !torch.float -> !torch.vtensor<[2,2304,640],f32>, !torch.vtensor<[2,2304,1],f32>, !torch.vtensor<[2,2304,1],f32>
%1043 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
%1044 = torch.aten._to_copy %result0_51, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
%1045 = torch.aten.t %1043 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%1046 = torch.aten._reshape_alias %1044, %699, %708 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
%1047 = torch.aten.mm %1046, %1045 : !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[4608,640],f16>
%1048 = torch.aten._unsafe_view %1047, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
%1049 = torch.aten._to_copy %30, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,1024],f16>
%1050 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
%1051 = torch.aten.t %1049 : !torch.vtensor<[640,1024],f16> -> !torch.vtensor<[1024,640],f16>
%1052 = torch.aten._reshape_alias %1050, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
%1053 = torch.aten.mm %1052, %1051 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,640],f16> -> !torch.vtensor<[154,640],f16>
%1054 = torch.aten._unsafe_view %1053, %798 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
%1055 = torch.aten._to_copy %30, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,1024],f16>
%1056 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
%1057 = torch.aten.t %1055 : !torch.vtensor<[640,1024],f16> -> !torch.vtensor<[1024,640],f16>
%1058 = torch.aten._reshape_alias %1056, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
%1059 = torch.aten.mm %1058, %1057 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,640],f16> -> !torch.vtensor<[154,640],f16>
%1060 = torch.aten._unsafe_view %1059, %798 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
%1061 = torch.aten._reshape_alias %1048, %724, %725 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
%1062 = torch.aten.permute %1061, %189 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
%1063 = torch.aten.clone %1062, %int0 : !torch.vtensor<[2,10,2304,64],f16>, !torch.int -> !torch.vtensor<[2,10,2304,64],f16>
%1064 = torch.aten._unsafe_view %1063, %729 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
%1065 = torch.aten._reshape_alias %1054, %810, %811 : !torch.vtensor<[2,77,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,10,64],f16>
%1066 = torch.aten.permute %1065, %189 : !torch.vtensor<[2,77,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,77,64],f16>
%1067 = torch.aten.clone %1066, %int0 : !torch.vtensor<[2,10,77,64],f16>, !torch.int -> !torch.vtensor<[2,10,77,64],f16>
%1068 = torch.aten._unsafe_view %1067, %815 : !torch.vtensor<[2,10,77,64],f16>, !torch.list<int> -> !torch.vtensor<[20,77,64],f16>
%1069 = torch.aten._reshape_alias %1060, %810, %811 : !torch.vtensor<[2,77,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,10,64],f16>
%1070 = torch.aten.permute %1069, %189 : !torch.vtensor<[2,77,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,77,64],f16>
%1071 = torch.aten.clone %1070, %int0 : !torch.vtensor<[2,10,77,64],f16>, !torch.int -> !torch.vtensor<[2,10,77,64],f16>
%1072 = torch.aten._unsafe_view %1071, %815 : !torch.vtensor<[2,10,77,64],f16>, !torch.list<int> -> !torch.vtensor<[20,77,64],f16>
%1073 = torch.aten.unsqueeze %1064, %int3 : !torch.vtensor<[20,2304,64],f16>, !torch.int -> !torch.vtensor<[20,2304,64,1],f16>
%1074 = torch.aten.permute %1073, %203 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,64],f16>
%1075 = torch.aten.unsqueeze %1068, %int3 : !torch.vtensor<[20,77,64],f16>, !torch.int -> !torch.vtensor<[20,77,64,1],f16>
%1076 = torch.aten.permute %1075, %206 : !torch.vtensor<[20,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,1,77,64],f16>
%1077 = torch.aten.permute %1074, %203 : !torch.vtensor<[20,2304,1,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64,1],f16>
%1078 = torch.aten._reshape_alias %1077, %729, %744 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
%1079 = torch.aten.permute %1076, %211 : !torch.vtensor<[20,1,77,64],f16>, !torch.list<int> -> !torch.vtensor<[20,64,77,1],f16>
%1080 = torch.aten._reshape_alias %1079, %828, %297 : !torch.vtensor<[20,64,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,64,77],f16>
%1081 = torch.aten.bmm %1078, %1080 : !torch.vtensor<[20,2304,64],f16>, !torch.vtensor<[20,64,77],f16> -> !torch.vtensor<[20,2304,77],f16>
%1082 = torch.aten.view %1081, %831 : !torch.vtensor<[20,2304,77],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,77],f16>
%1083 = torch.aten.permute %1082, %203 : !torch.vtensor<[20,2304,1,77],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,77,1],f16>
%1084 = torch.aten.view %1083, %834 : !torch.vtensor<[20,2304,77,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,77],f16>
%1085 = torch.aten.mul.Tensor %1084, %0 : !torch.vtensor<[20,2304,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[20,2304,77],f16>
%1086 = torch.aten._softmax %1085, %int-1, %true : !torch.vtensor<[20,2304,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[20,2304,77],f32>
%1087 = torch.aten._to_copy %1086, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[20,2304,77],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[20,2304,77],f16>
%1088 = torch.aten.unsqueeze %1087, %int3 : !torch.vtensor<[20,2304,77],f16>, !torch.int -> !torch.vtensor<[20,2304,77,1],f16>
%1089 = torch.aten.permute %1088, %203 : !torch.vtensor<[20,2304,77,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,77],f16>
%1090 = torch.aten.unsqueeze %1072, %int3 : !torch.vtensor<[20,77,64],f16>, !torch.int -> !torch.vtensor<[20,77,64,1],f16>
%1091 = torch.aten.permute %1090, %211 : !torch.vtensor<[20,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,1,64,77],f16>
%1092 = torch.aten.permute %1089, %203 : !torch.vtensor<[20,2304,1,77],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,77,1],f16>
%1093 = torch.aten._reshape_alias %1092, %834, %844 : !torch.vtensor<[20,2304,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,2304,77],f16>
%1094 = torch.aten.permute %1091, %211 : !torch.vtensor<[20,1,64,77],f16>, !torch.list<int> -> !torch.vtensor<[20,77,64,1],f16>
%1095 = torch.aten._reshape_alias %1094, %815, %316 : !torch.vtensor<[20,77,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,77,64],f16>
%1096 = torch.aten.bmm %1093, %1095 : !torch.vtensor<[20,2304,77],f16>, !torch.vtensor<[20,77,64],f16> -> !torch.vtensor<[20,2304,64],f16>
%1097 = torch.aten.view %1096, %769 : !torch.vtensor<[20,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,64],f16>
%1098 = torch.aten.permute %1097, %203 : !torch.vtensor<[20,2304,1,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64,1],f16>
%1099 = torch.aten.view %1098, %729 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
%1100 = torch.aten._reshape_alias %1099, %773, %774 : !torch.vtensor<[20,2304,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
%1101 = torch.aten.permute %1100, %189 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
%1102 = torch.aten.clone %1101, %int0 : !torch.vtensor<[2,2304,10,64],f16>, !torch.int -> !torch.vtensor<[2,2304,10,64],f16>
%1103 = torch.aten._unsafe_view %1102, %691 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
%1104 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
%1105 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
%1106 = torch.aten.t %1105 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%1107 = torch.aten.view %1103, %699 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
%1108 = torch.aten.addmm %1104, %1107, %1106, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,640],f16>
%1109 = torch.aten.view %1108, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
%1110 = torch.aten.add.Tensor %1109, %1041, %int1 : !torch.vtensor<[2,2304,640],f16>, !torch.vtensor<[2,2304,640],f16>, !torch.int -> !torch.vtensor<[2,2304,640],f16>
%1111 = torch.aten._to_copy %1110, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f32>
%result0_54, %result1_55, %result2_56 = torch.aten.native_layer_norm %1111, %704, %39, %39, %float1.000000e-05 : !torch.vtensor<[2,2304,640],f32>, !torch.list<int>, !torch.vtensor<[640],f32>, !torch.vtensor<[640],f32>, !torch.float -> !torch.vtensor<[2,2304,640],f32>, !torch.vtensor<[2,2304,1],f32>, !torch.vtensor<[2,2304,1],f32>
%1112 = torch.aten._to_copy %31, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[5120],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[5120],f16>
%1113 = torch.aten._to_copy %32, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[5120,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[5120,640],f16>
%1114 = torch.aten._to_copy %result0_54, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
%1115 = torch.aten.t %1113 : !torch.vtensor<[5120,640],f16> -> !torch.vtensor<[640,5120],f16>
%1116 = torch.aten.view %1114, %699 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
%1117 = torch.aten.addmm %1112, %1116, %1115, %int1, %int1 : !torch.vtensor<[5120],f16>, !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,5120],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,5120],f16>
%1118 = torch.aten.view %1117, %870 : !torch.vtensor<[4608,5120],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,5120],f16>
%1119 = torch.aten.slice.Tensor %1118, %int-1, %int0, %int2560, %int1 : !torch.vtensor<[2,2304,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,2304,2560],f16>
%1120 = torch.aten.slice.Tensor %1118, %int-1, %int2560, %int5120, %int1 : !torch.vtensor<[2,2304,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,2304,2560],f16>
%1121 = torch.aten.gelu %1120, %str : !torch.vtensor<[2,2304,2560],f16>, !torch.str -> !torch.vtensor<[2,2304,2560],f16>
%1122 = torch.aten.mul.Tensor %1119, %1121 : !torch.vtensor<[2,2304,2560],f16>, !torch.vtensor<[2,2304,2560],f16> -> !torch.vtensor<[2,2304,2560],f16>
%1123 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
%1124 = torch.aten._to_copy %33, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,2560],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,2560],f16>
%1125 = torch.aten.t %1124 : !torch.vtensor<[640,2560],f16> -> !torch.vtensor<[2560,640],f16>
%1126 = torch.aten.view %1122, %879 : !torch.vtensor<[2,2304,2560],f16>, !torch.list<int> -> !torch.vtensor<[4608,2560],f16>
%1127 = torch.aten.addmm %1123, %1126, %1125, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[4608,2560],f16>, !torch.vtensor<[2560,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,640],f16>
%1128 = torch.aten.view %1127, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
%1129 = torch.aten.add.Tensor %1128, %1110, %int1 : !torch.vtensor<[2,2304,640],f16>, !torch.vtensor<[2,2304,640],f16>, !torch.int -> !torch.vtensor<[2,2304,640],f16>
%1130 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
%1131 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
%1132 = torch.aten.t %1131 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%1133 = torch.aten.view %1129, %699 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
%1134 = torch.aten.addmm %1130, %1133, %1132, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,640],f16>
%1135 = torch.aten.view %1134, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
%1136 = torch.aten._reshape_alias %1135, %890, %891 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,48,48,640],f16>
%1137 = torch.aten.permute %1136, %206 : !torch.vtensor<[2,48,48,640],f16>, !torch.list<int> -> !torch.vtensor<[2,640,48,48],f16>
%1138 = torch.aten._reshape_alias %1137, %654, %894 : !torch.vtensor<[2,640,48,48],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,640,48,48],f16>
%1139 = torch.aten.clone %1138, %int0 : !torch.vtensor<[2,640,48,48],f16>, !torch.int -> !torch.vtensor<[2,640,48,48],f16>
%1140 = torch.aten.add.Tensor %1139, %946, %int1 : !torch.vtensor<[2,640,48,48],f16>, !torch.vtensor<[2,640,48,48],f16>, !torch.int -> !torch.vtensor<[2,640,48,48],f16>
%1141 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
%1142 = torch.aten._to_copy %35, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640,3,3],f16>
%1143 = torch.aten._convolution %1140, %1142, %1141, %613, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,48,48],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,24,24],f16>
%1144 = torch.aten._to_copy %1143, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,640,24,24],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,24,24],f32>
%1145 = torch.prim.ListConstruct %int2, %int32, %int20, %int576 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1146 = torch.prim.ListConstruct %int368640, %int11520, %int576, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1147 = torch.aten._reshape_alias %1144, %1145, %1146 : !torch.vtensor<[2,640,24,24],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,20,576],f32>
%result0_57, %result1_58 = torch.aten.var_mean.correction %1147, %85, %int0, %true : !torch.vtensor<[2,32,20,576],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%1148 = torch.aten.add.Tensor %result0_57, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%1149 = torch.aten.rsqrt %1148 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%1150 = torch.aten.sub.Tensor %1147, %result1_58, %int1 : !torch.vtensor<[2,32,20,576],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,576],f32>
%1151 = torch.aten.mul.Tensor %1150, %1149 : !torch.vtensor<[2,32,20,576],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,576],f32>
%1152 = torch.prim.ListConstruct %int2, %int640, %int24, %int24 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1153 = torch.aten.view %1151, %1152 : !torch.vtensor<[2,32,20,576],f32>, !torch.list<int> -> !torch.vtensor<[2,640,24,24],f32>
%1154 = torch.aten.unsqueeze %39, %int0 : !torch.vtensor<[640],f32>, !torch.int -> !torch.vtensor<[1,640],f32>
%1155 = torch.aten.unsqueeze %1154, %int2 : !torch.vtensor<[1,640],f32>, !torch.int -> !torch.vtensor<[1,640,1],f32>
%1156 = torch.aten.unsqueeze %1155, %int3 : !torch.vtensor<[1,640,1],f32>, !torch.int -> !torch.vtensor<[1,640,1,1],f32>
%1157 = torch.aten.unsqueeze %39, %int0 : !torch.vtensor<[640],f32>, !torch.int -> !torch.vtensor<[1,640],f32>
%1158 = torch.aten.unsqueeze %1157, %int2 : !torch.vtensor<[1,640],f32>, !torch.int -> !torch.vtensor<[1,640,1],f32>
%1159 = torch.aten.unsqueeze %1158, %int3 : !torch.vtensor<[1,640,1],f32>, !torch.int -> !torch.vtensor<[1,640,1,1],f32>
%1160 = torch.aten.mul.Tensor %1153, %1159 : !torch.vtensor<[2,640,24,24],f32>, !torch.vtensor<[1,640,1,1],f32> -> !torch.vtensor<[2,640,24,24],f32>
%1161 = torch.aten.add.Tensor %1160, %1156, %int1 : !torch.vtensor<[2,640,24,24],f32>, !torch.vtensor<[1,640,1,1],f32>, !torch.int -> !torch.vtensor<[2,640,24,24],f32>
%1162 = torch.aten._to_copy %1161, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,640,24,24],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,24,24],f16>
%1163 = torch.aten.silu %1162 : !torch.vtensor<[2,640,24,24],f16> -> !torch.vtensor<[2,640,24,24],f16>
%1164 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%1165 = torch.aten._to_copy %9, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,640,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,640,3,3],f16>
%1166 = torch.aten._convolution %1163, %1165, %1164, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,24,24],f16>, !torch.vtensor<[1280,640,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,24,24],f16>
%1167 = torch.aten.silu %74 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%1168 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%1169 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%1170 = torch.aten.t %1169 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1171 = torch.aten.addmm %1168, %1167, %1170, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%1172 = torch.aten.unsqueeze %1171, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
%1173 = torch.aten.unsqueeze %1172, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
%1174 = torch.aten.add.Tensor %1166, %1173, %int1 : !torch.vtensor<[2,1280,24,24],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,24,24],f16>
%1175 = torch.aten._to_copy %1174, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,24,24],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,24,24],f32>
%1176 = torch.prim.ListConstruct %int2, %int32, %int40, %int576 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1177 = torch.prim.ListConstruct %int737280, %int23040, %int576, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1178 = torch.aten._reshape_alias %1175, %1176, %1177 : !torch.vtensor<[2,1280,24,24],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,40,576],f32>
%result0_59, %result1_60 = torch.aten.var_mean.correction %1178, %85, %int0, %true : !torch.vtensor<[2,32,40,576],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%1179 = torch.aten.add.Tensor %result0_59, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%1180 = torch.aten.rsqrt %1179 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%1181 = torch.aten.sub.Tensor %1178, %result1_60, %int1 : !torch.vtensor<[2,32,40,576],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,576],f32>
%1182 = torch.aten.mul.Tensor %1181, %1180 : !torch.vtensor<[2,32,40,576],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,576],f32>
%1183 = torch.prim.ListConstruct %int2, %int1280, %int24, %int24 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1184 = torch.aten.view %1182, %1183 : !torch.vtensor<[2,32,40,576],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,24,24],f32>
%1185 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
%1186 = torch.aten.unsqueeze %1185, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
%1187 = torch.aten.unsqueeze %1186, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
%1188 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
%1189 = torch.aten.unsqueeze %1188, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
%1190 = torch.aten.unsqueeze %1189, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
%1191 = torch.aten.mul.Tensor %1184, %1190 : !torch.vtensor<[2,1280,24,24],f32>, !torch.vtensor<[1,1280,1,1],f32> -> !torch.vtensor<[2,1280,24,24],f32>
%1192 = torch.aten.add.Tensor %1191, %1187, %int1 : !torch.vtensor<[2,1280,24,24],f32>, !torch.vtensor<[1,1280,1,1],f32>, !torch.int -> !torch.vtensor<[2,1280,24,24],f32>
%1193 = torch.aten._to_copy %1192, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,1280,24,24],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,24,24],f16>
%1194 = torch.aten.silu %1193 : !torch.vtensor<[2,1280,24,24],f16> -> !torch.vtensor<[2,1280,24,24],f16>
%1195 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%1196 = torch.aten._to_copy %20, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280,3,3],f16>
%1197 = torch.aten._convolution %1194, %1196, %1195, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,24,24],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,24,24],f16>
%1198 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%1199 = torch.aten._to_copy %10, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,640,1,1],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,640,1,1],f16>
%1200 = torch.aten._convolution %1143, %1199, %1198, %78, %79, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,24,24],f16>, !torch.vtensor<[1280,640,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,24,24],f16>
%1201 = torch.aten.add.Tensor %1200, %1197, %int1 : !torch.vtensor<[2,1280,24,24],f16>, !torch.vtensor<[2,1280,24,24],f16>, !torch.int -> !torch.vtensor<[2,1280,24,24],f16>
%1202 = torch.aten._to_copy %1201, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,24,24],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,24,24],f32>
%1203 = torch.aten._reshape_alias %1202, %1176, %1177 : !torch.vtensor<[2,1280,24,24],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,40,576],f32>
%result0_61, %result1_62 = torch.aten.var_mean.correction %1203, %85, %int0, %true : !torch.vtensor<[2,32,40,576],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%1204 = torch.aten.add.Tensor %result0_61, %1, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%1205 = torch.aten.rsqrt %1204 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%1206 = torch.aten.sub.Tensor %1203, %result1_62, %int1 : !torch.vtensor<[2,32,40,576],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,576],f32>
%1207 = torch.aten.mul.Tensor %1206, %1205 : !torch.vtensor<[2,32,40,576],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,576],f32>
%1208 = torch.aten.view %1207, %1183 : !torch.vtensor<[2,32,40,576],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,24,24],f32>
%1209 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
%1210 = torch.aten.unsqueeze %1209, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
%1211 = torch.aten.unsqueeze %1210, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
%1212 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
%1213 = torch.aten.unsqueeze %1212, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
%1214 = torch.aten.unsqueeze %1213, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
%1215 = torch.aten.mul.Tensor %1208, %1214 : !torch.vtensor<[2,1280,24,24],f32>, !torch.vtensor<[1,1280,1,1],f32> -> !torch.vtensor<[2,1280,24,24],f32>
%1216 = torch.aten.add.Tensor %1215, %1211, %int1 : !torch.vtensor<[2,1280,24,24],f32>, !torch.vtensor<[1,1280,1,1],f32>, !torch.int -> !torch.vtensor<[2,1280,24,24],f32>
%1217 = torch.prim.ListConstruct %int737280, %int576, %int24, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1218 = torch.aten._reshape_alias %1216, %1183, %1217 : !torch.vtensor<[2,1280,24,24],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1280,24,24],f32>
%1219 = torch.aten.permute %1218, %151 : !torch.vtensor<[2,1280,24,24],f32>, !torch.list<int> -> !torch.vtensor<[2,24,24,1280],f32>
%1220 = torch.prim.ListConstruct %int2, %int576, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1221 = torch.prim.ListConstruct %int737280, %int1, %int576 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1222 = torch.aten._reshape_alias %1219, %1220, %1221 : !torch.vtensor<[2,24,24,1280],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f32>
%1223 = torch.aten.clone %1222, %int0 : !torch.vtensor<[2,576,1280],f32>, !torch.int -> !torch.vtensor<[2,576,1280],f32>
%1224 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%1225 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%1226 = torch.aten._to_copy %1223, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
%1227 = torch.aten.t %1225 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1228 = torch.prim.ListConstruct %int1152, %int1280 : (!torch.int, !torch.int) -> !torch.list<int>
%1229 = torch.aten.view %1226, %1228 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
%1230 = torch.aten.addmm %1224, %1229, %1227, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,1280],f16>
%1231 = torch.aten.view %1230, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
%1232 = torch.aten._to_copy %1231, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f32>
%1233 = torch.prim.ListConstruct %int1280 : (!torch.int) -> !torch.list<int>
%result0_63, %result1_64, %result2_65 = torch.aten.native_layer_norm %1232, %1233, %24, %24, %float1.000000e-05 : !torch.vtensor<[2,576,1280],f32>, !torch.list<int>, !torch.vtensor<[1280],f32>, !torch.vtensor<[1280],f32>, !torch.float -> !torch.vtensor<[2,576,1280],f32>, !torch.vtensor<[2,576,1],f32>, !torch.vtensor<[2,576,1],f32>
%1234 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%1235 = torch.aten._to_copy %result0_63, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
%1236 = torch.aten.t %1234 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1237 = torch.prim.ListConstruct %int1280, %int1 : (!torch.int, !torch.int) -> !torch.list<int>
%1238 = torch.aten._reshape_alias %1235, %1228, %1237 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
%1239 = torch.aten.mm %1238, %1236 : !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1152,1280],f16>
%1240 = torch.aten._unsafe_view %1239, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
%1241 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%1242 = torch.aten._to_copy %result0_63, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
%1243 = torch.aten.t %1241 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1244 = torch.aten._reshape_alias %1242, %1228, %1237 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
%1245 = torch.aten.mm %1244, %1243 : !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1152,1280],f16>
%1246 = torch.aten._unsafe_view %1245, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
%1247 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%1248 = torch.aten._to_copy %result0_63, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
%1249 = torch.aten.t %1247 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1250 = torch.aten._reshape_alias %1248, %1228, %1237 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
%1251 = torch.aten.mm %1250, %1249 : !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1152,1280],f16>
%1252 = torch.aten._unsafe_view %1251, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
%1253 = torch.prim.ListConstruct %int2, %int576, %int20, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1254 = torch.prim.ListConstruct %int737280, %int1280, %int64, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1255 = torch.aten._reshape_alias %1240, %1253, %1254 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
%1256 = torch.aten.permute %1255, %189 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
%1257 = torch.aten.clone %1256, %int0 : !torch.vtensor<[2,20,576,64],f16>, !torch.int -> !torch.vtensor<[2,20,576,64],f16>
%1258 = torch.prim.ListConstruct %int40, %int576, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1259 = torch.aten._unsafe_view %1257, %1258 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
%1260 = torch.aten._reshape_alias %1246, %1253, %1254 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
%1261 = torch.aten.permute %1260, %189 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
%1262 = torch.aten.clone %1261, %int0 : !torch.vtensor<[2,20,576,64],f16>, !torch.int -> !torch.vtensor<[2,20,576,64],f16>
%1263 = torch.aten._unsafe_view %1262, %1258 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
%1264 = torch.aten._reshape_alias %1252, %1253, %1254 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
%1265 = torch.aten.permute %1264, %189 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
%1266 = torch.aten.clone %1265, %int0 : !torch.vtensor<[2,20,576,64],f16>, !torch.int -> !torch.vtensor<[2,20,576,64],f16>
%1267 = torch.aten._unsafe_view %1266, %1258 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
%1268 = torch.aten.unsqueeze %1259, %int3 : !torch.vtensor<[40,576,64],f16>, !torch.int -> !torch.vtensor<[40,576,64,1],f16>
%1269 = torch.aten.permute %1268, %203 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,64],f16>
%1270 = torch.aten.unsqueeze %1263, %int3 : !torch.vtensor<[40,576,64],f16>, !torch.int -> !torch.vtensor<[40,576,64,1],f16>
%1271 = torch.aten.permute %1270, %206 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,1,576,64],f16>
%1272 = torch.aten.permute %1269, %203 : !torch.vtensor<[40,576,1,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64,1],f16>
%1273 = torch.prim.ListConstruct %int36864, %int64, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1274 = torch.aten._reshape_alias %1272, %1258, %1273 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
%1275 = torch.aten.permute %1271, %211 : !torch.vtensor<[40,1,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,64,576,1],f16>
%1276 = torch.prim.ListConstruct %int40, %int64, %int576 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1277 = torch.prim.ListConstruct %int36864, %int1, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1278 = torch.aten._reshape_alias %1275, %1276, %1277 : !torch.vtensor<[40,64,576,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,64,576],f16>
%1279 = torch.aten.bmm %1274, %1278 : !torch.vtensor<[40,576,64],f16>, !torch.vtensor<[40,64,576],f16> -> !torch.vtensor<[40,576,576],f16>
%1280 = torch.prim.ListConstruct %int40, %int576, %int1, %int576 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1281 = torch.aten.view %1279, %1280 : !torch.vtensor<[40,576,576],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,576],f16>
%1282 = torch.aten.permute %1281, %203 : !torch.vtensor<[40,576,1,576],f16>, !torch.list<int> -> !torch.vtensor<[40,576,576,1],f16>
%1283 = torch.prim.ListConstruct %int40, %int576, %int576 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1284 = torch.aten.view %1282, %1283 : !torch.vtensor<[40,576,576,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,576],f16>
%1285 = torch.aten.mul.Tensor %1284, %0 : !torch.vtensor<[40,576,576],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[40,576,576],f16>
%1286 = torch.aten._softmax %1285, %int-1, %true : !torch.vtensor<[40,576,576],f16>, !torch.int, !torch.bool -> !torch.vtensor<[40,576,576],f32>
%1287 = torch.aten._to_copy %1286, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[40,576,576],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[40,576,576],f16>
%1288 = torch.aten.unsqueeze %1287, %int3 : !torch.vtensor<[40,576,576],f16>, !torch.int -> !torch.vtensor<[40,576,576,1],f16>
%1289 = torch.aten.permute %1288, %203 : !torch.vtensor<[40,576,576,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,576],f16>
%1290 = torch.aten.unsqueeze %1267, %int3 : !torch.vtensor<[40,576,64],f16>, !torch.int -> !torch.vtensor<[40,576,64,1],f16>
%1291 = torch.aten.permute %1290, %211 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,1,64,576],f16>
%1292 = torch.aten.permute %1289, %203 : !torch.vtensor<[40,576,1,576],f16>, !torch.list<int> -> !torch.vtensor<[40,576,576,1],f16>
%1293 = torch.prim.ListConstruct %int331776, %int576, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1294 = torch.aten._reshape_alias %1292, %1283, %1293 : !torch.vtensor<[40,576,576,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,576,576],f16>
%1295 = torch.aten.permute %1291, %211 : !torch.vtensor<[40,1,64,576],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64,1],f16>
%1296 = torch.aten._reshape_alias %1295, %1258, %1273 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
%1297 = torch.aten.bmm %1294, %1296 : !torch.vtensor<[40,576,576],f16>, !torch.vtensor<[40,576,64],f16> -> !torch.vtensor<[40,576,64],f16>
%1298 = torch.prim.ListConstruct %int40, %int576, %int1, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1299 = torch.aten.view %1297, %1298 : !torch.vtensor<[40,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,64],f16>
%1300 = torch.aten.permute %1299, %203 : !torch.vtensor<[40,576,1,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64,1],f16>
%1301 = torch.aten.view %1300, %1258 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
%1302 = torch.prim.ListConstruct %int2, %int20, %int576, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1303 = torch.prim.ListConstruct %int737280, %int36864, %int64, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1304 = torch.aten._reshape_alias %1301, %1302, %1303 : !torch.vtensor<[40,576,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
%1305 = torch.aten.permute %1304, %189 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
%1306 = torch.aten.clone %1305, %int0 : !torch.vtensor<[2,576,20,64],f16>, !torch.int -> !torch.vtensor<[2,576,20,64],f16>
%1307 = torch.aten._unsafe_view %1306, %1220 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
%1308 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%1309 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%1310 = torch.aten.t %1309 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1311 = torch.aten.view %1307, %1228 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
%1312 = torch.aten.addmm %1308, %1311, %1310, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,1280],f16>
%1313 = torch.aten.view %1312, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
%1314 = torch.aten.add.Tensor %1313, %1231, %int1 : !torch.vtensor<[2,576,1280],f16>, !torch.vtensor<[2,576,1280],f16>, !torch.int -> !torch.vtensor<[2,576,1280],f16>
%1315 = torch.aten._to_copy %1314, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f32>
%result0_66, %result1_67, %result2_68 = torch.aten.native_layer_norm %1315, %1233, %24, %24, %float1.000000e-05 : !torch.vtensor<[2,576,1280],f32>, !torch.list<int>, !torch.vtensor<[1280],f32>, !torch.vtensor<[1280],f32>, !torch.float -> !torch.vtensor<[2,576,1280],f32>, !torch.vtensor<[2,576,1],f32>, !torch.vtensor<[2,576,1],f32>
%1316 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%1317 = torch.aten._to_copy %result0_66, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
%1318 = torch.aten.t %1316 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1319 = torch.aten._reshape_alias %1317, %1228, %1237 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
%1320 = torch.aten.mm %1319, %1318 : !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1152,1280],f16>
%1321 = torch.aten._unsafe_view %1320, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
%1322 = torch.aten._to_copy %15, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1024],f16>
%1323 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
%1324 = torch.aten.t %1322 : !torch.vtensor<[1280,1024],f16> -> !torch.vtensor<[1024,1280],f16>
%1325 = torch.aten._reshape_alias %1323, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
%1326 = torch.aten.mm %1325, %1324 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,1280],f16> -> !torch.vtensor<[154,1280],f16>
%1327 = torch.prim.ListConstruct %int2, %int77, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1328 = torch.aten._unsafe_view %1326, %1327 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
%1329 = torch.aten._to_copy %15, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1024],f16>
%1330 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
%1331 = torch.aten.t %1329 : !torch.vtensor<[1280,1024],f16> -> !torch.vtensor<[1024,1280],f16>
%1332 = torch.aten._reshape_alias %1330, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
%1333 = torch.aten.mm %1332, %1331 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,1280],f16> -> !torch.vtensor<[154,1280],f16>
%1334 = torch.aten._unsafe_view %1333, %1327 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
%1335 = torch.aten._reshape_alias %1321, %1253, %1254 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
%1336 = torch.aten.permute %1335, %189 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
%1337 = torch.aten.clone %1336, %int0 : !torch.vtensor<[2,20,576,64],f16>, !torch.int -> !torch.vtensor<[2,20,576,64],f16>
%1338 = torch.aten._unsafe_view %1337, %1258 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
%1339 = torch.prim.ListConstruct %int2, %int77, %int20, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1340 = torch.prim.ListConstruct %int98560, %int1280, %int64, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1341 = torch.aten._reshape_alias %1328, %1339, %1340 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,20,64],f16>
%1342 = torch.aten.permute %1341, %189 : !torch.vtensor<[2,77,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,77,64],f16>
%1343 = torch.aten.clone %1342, %int0 : !torch.vtensor<[2,20,77,64],f16>, !torch.int -> !torch.vtensor<[2,20,77,64],f16>
%1344 = torch.prim.ListConstruct %int40, %int77, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1345 = torch.aten._unsafe_view %1343, %1344 : !torch.vtensor<[2,20,77,64],f16>, !torch.list<int> -> !torch.vtensor<[40,77,64],f16>
%1346 = torch.aten._reshape_alias %1334, %1339, %1340 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,20,64],f16>
%1347 = torch.aten.permute %1346, %189 : !torch.vtensor<[2,77,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,77,64],f16>
%1348 = torch.aten.clone %1347, %int0 : !torch.vtensor<[2,20,77,64],f16>, !torch.int -> !torch.vtensor<[2,20,77,64],f16>
%1349 = torch.aten._unsafe_view %1348, %1344 : !torch.vtensor<[2,20,77,64],f16>, !torch.list<int> -> !torch.vtensor<[40,77,64],f16>
%1350 = torch.aten.unsqueeze %1338, %int3 : !torch.vtensor<[40,576,64],f16>, !torch.int -> !torch.vtensor<[40,576,64,1],f16>
%1351 = torch.aten.permute %1350, %203 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,64],f16>
%1352 = torch.aten.unsqueeze %1345, %int3 : !torch.vtensor<[40,77,64],f16>, !torch.int -> !torch.vtensor<[40,77,64,1],f16>
%1353 = torch.aten.permute %1352, %206 : !torch.vtensor<[40,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,1,77,64],f16>
%1354 = torch.aten.permute %1351, %203 : !torch.vtensor<[40,576,1,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64,1],f16>
%1355 = torch.aten._reshape_alias %1354, %1258, %1273 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
%1356 = torch.aten.permute %1353, %211 : !torch.vtensor<[40,1,77,64],f16>, !torch.list<int> -> !torch.vtensor<[40,64,77,1],f16>
%1357 = torch.prim.ListConstruct %int40, %int64, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1358 = torch.aten._reshape_alias %1356, %1357, %297 : !torch.vtensor<[40,64,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,64,77],f16>
%1359 = torch.aten.bmm %1355, %1358 : !torch.vtensor<[40,576,64],f16>, !torch.vtensor<[40,64,77],f16> -> !torch.vtensor<[40,576,77],f16>
%1360 = torch.prim.ListConstruct %int40, %int576, %int1, %int77 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1361 = torch.aten.view %1359, %1360 : !torch.vtensor<[40,576,77],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,77],f16>
%1362 = torch.aten.permute %1361, %203 : !torch.vtensor<[40,576,1,77],f16>, !torch.list<int> -> !torch.vtensor<[40,576,77,1],f16>
%1363 = torch.prim.ListConstruct %int40, %int576, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1364 = torch.aten.view %1362, %1363 : !torch.vtensor<[40,576,77,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,77],f16>
%1365 = torch.aten.mul.Tensor %1364, %0 : !torch.vtensor<[40,576,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[40,576,77],f16>
%1366 = torch.aten._softmax %1365, %int-1, %true : !torch.vtensor<[40,576,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[40,576,77],f32>
%1367 = torch.aten._to_copy %1366, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[40,576,77],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[40,576,77],f16>
%1368 = torch.aten.unsqueeze %1367, %int3 : !torch.vtensor<[40,576,77],f16>, !torch.int -> !torch.vtensor<[40,576,77,1],f16>
%1369 = torch.aten.permute %1368, %203 : !torch.vtensor<[40,576,77,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,77],f16>
%1370 = torch.aten.unsqueeze %1349, %int3 : !torch.vtensor<[40,77,64],f16>, !torch.int -> !torch.vtensor<[40,77,64,1],f16>
%1371 = torch.aten.permute %1370, %211 : !torch.vtensor<[40,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,1,64,77],f16>
%1372 = torch.aten.permute %1369, %203 : !torch.vtensor<[40,576,1,77],f16>, !torch.list<int> -> !torch.vtensor<[40,576,77,1],f16>
%1373 = torch.prim.ListConstruct %int44352, %int77, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1374 = torch.aten._reshape_alias %1372, %1363, %1373 : !torch.vtensor<[40,576,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,576,77],f16>
%1375 = torch.aten.permute %1371, %211 : !torch.vtensor<[40,1,64,77],f16>, !torch.list<int> -> !torch.vtensor<[40,77,64,1],f16>
%1376 = torch.aten._reshape_alias %1375, %1344, %316 : !torch.vtensor<[40,77,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,77,64],f16>
%1377 = torch.aten.bmm %1374, %1376 : !torch.vtensor<[40,576,77],f16>, !torch.vtensor<[40,77,64],f16> -> !torch.vtensor<[40,576,64],f16>
%1378 = torch.aten.view %1377, %1298 : !torch.vtensor<[40,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,64],f16>
%1379 = torch.aten.permute %1378, %203 : !torch.vtensor<[40,576,1,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64,1],f16>
%1380 = torch.aten.view %1379, %1258 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
%1381 = torch.aten._reshape_alias %1380, %1302, %1303 : !torch.vtensor<[40,576,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
%1382 = torch.aten.permute %1381, %189 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
%1383 = torch.aten.clone %1382, %int0 : !torch.vtensor<[2,576,20,64],f16>, !torch.int -> !torch.vtensor<[2,576,20,64],f16>
%1384 = torch.aten._unsafe_view %1383, %1220 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
%1385 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%1386 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%1387 = torch.aten.t %1386 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1388 = torch.aten.view %1384, %1228 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
%1389 = torch.aten.addmm %1385, %1388, %1387, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,1280],f16>
%1390 = torch.aten.view %1389, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
%1391 = torch.aten.add.Tensor %1390, %1314, %int1 : !torch.vtensor<[2,576,1280],f16>, !torch.vtensor<[2,576,1280],f16>, !torch.int -> !torch.vtensor<[2,576,1280],f16>
%1392 = torch.aten._to_copy %1391, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f32>
%result0_69, %result1_70, %result2_71 = torch.aten.native_layer_norm %1392, %1233, %24, %24, %float1.000000e-05 : !torch.vtensor<[2,576,1280],f32>, !torch.list<int>, !torch.vtensor<[1280],f32>, !torch.vtensor<[1280],f32>, !torch.float -> !torch.vtensor<[2,576,1280],f32>, !torch.vtensor<[2,576,1],f32>, !torch.vtensor<[2,576,1],f32>
%1393 = torch.aten._to_copy %16, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[10240],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[10240],f16>
%1394 = torch.aten._to_copy %17, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[10240,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[10240,1280],f16>
%1395 = torch.aten._to_copy %result0_69, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
%1396 = torch.aten.t %1394 : !torch.vtensor<[10240,1280],f16> -> !torch.vtensor<[1280,10240],f16>
%1397 = torch.aten.view %1395, %1228 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
%1398 = torch.aten.addmm %1393, %1397, %1396, %int1, %int1 : !torch.vtensor<[10240],f16>, !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,10240],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,10240],f16>
%1399 = torch.prim.ListConstruct %int2, %int576, %int10240 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1400 = torch.aten.view %1398, %1399 : !torch.vtensor<[1152,10240],f16>, !torch.list<int> -> !torch.vtensor<[2,576,10240],f16>
%1401 = torch.aten.slice.Tensor %1400, %int-1, %int0, %int5120, %int1 : !torch.vtensor<[2,576,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,576,5120],f16>
%1402 = torch.aten.slice.Tensor %1400, %int-1, %int5120, %int10240, %int1 : !torch.vtensor<[2,576,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,576,5120],f16>
%1403 = torch.aten.gelu %1402, %str : !torch.vtensor<[2,576,5120],f16>, !torch.str -> !torch.vtensor<[2,576,5120],f16>
%1404 = torch.aten.mul.Tensor %1401, %1403 : !torch.vtensor<[2,576,5120],f16>, !torch.vtensor<[2,576,5120],f16> -> !torch.vtensor<[2,576,5120],f16>
%1405 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%1406 = torch.aten._to_copy %18, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,5120],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,5120],f16>
%1407 = torch.aten.t %1406 : !torch.vtensor<[1280,5120],f16> -> !torch.vtensor<[5120,1280],f16>
%1408 = torch.prim.ListConstruct %int1152, %int5120 : (!torch.int, !torch.int) -> !torch.list<int>
%1409 = torch.aten.view %1404, %1408 : !torch.vtensor<[2,576,5120],f16>, !torch.list<int> -> !torch.vtensor<[1152,5120],f16>
%1410 = torch.aten.addmm %1405, %1409, %1407, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[1152,5120],f16>, !torch.vtensor<[5120,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,1280],f16>
%1411 = torch.aten.view %1410, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
%1412 = torch.aten.add.Tensor %1411, %1391, %int1 : !torch.vtensor<[2,576,1280],f16>, !torch.vtensor<[2,576,1280],f16>, !torch.int -> !torch.vtensor<[2,576,1280],f16>
%1413 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%1414 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%1415 = torch.aten.t %1414 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1416 = torch.aten.view %1412, %1228 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
%1417 = torch.aten.addmm %1413, %1416, %1415, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,1280],f16>
%1418 = torch.aten.view %1417, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
%1419 = torch.prim.ListConstruct %int2, %int24, %int24, %int1280 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1420 = torch.prim.ListConstruct %int737280, %int30720, %int1280, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1421 = torch.aten._reshape_alias %1418, %1419, %1420 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,24,24,1280],f16>
%1422 = torch.aten.permute %1421, %206 : !torch.vtensor<[2,24,24,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,24,24],f16>
%1423 = torch.prim.ListConstruct %int737280, %int1, %int30720, %int1280 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1424 = torch.aten._reshape_alias %1422, %1183, %1423 : !torch.vtensor<[2,1280,24,24],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1280,24,24],f16>
%1425 = torch.aten.clone %1424, %int0 : !torch.vtensor<[2,1280,24,24],f16>, !torch.int -> !torch.vtensor<[2,1280,24,24],f16>
%1426 = torch.aten.add.Tensor %1425, %1201, %int1 : !torch.vtensor<[2,1280,24,24],f16>, !torch.vtensor<[2,1280,24,24],f16>, !torch.int -> !torch.vtensor<[2,1280,24,24],f16>
%1427 = torch.aten._to_copy %1426, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,24,24],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,24,24],f32>
%1428 = torch.aten._reshape_alias %1427, %1176, %1177 : !torch.vtensor<[2,1280,24,24],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,40,576],f32>
%result0_72, %result1_73 = torch.aten.var_mean.correction %1428, %85, %int0, %true : !torch.vtensor<[2,32,40,576],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%1429 = torch.aten.add.Tensor %result0_72, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%1430 = torch.aten.rsqrt %1429 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%1431 = torch.aten.sub.Tensor %1428, %result1_73, %int1 : !torch.vtensor<[2,32,40,576],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,576],f32>
%1432 = torch.aten.mul.Tensor %1431, %1430 : !torch.vtensor<[2,32,40,576],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,576],f32>
%1433 = torch.aten.view %1432, %1183 : !torch.vtensor<[2,32,40,576],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,24,24],f32>
%1434 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
%1435 = torch.aten.unsqueeze %1434, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
%1436 = torch.aten.unsqueeze %1435, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
%1437 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
%1438 = torch.aten.unsqueeze %1437, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
%1439 = torch.aten.unsqueeze %1438, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
%1440 = torch.aten.mul.Tensor %1433, %1439 : !torch.vtensor<[2,1280,24,24],f32>, !torch.vtensor<[1,1280,1,1],f32> -> !torch.vtensor<[2,1280,24,24],f32>
%1441 = torch.aten.add.Tensor %1440, %1436, %int1 : !torch.vtensor<[2,1280,24,24],f32>, !torch.vtensor<[1,1280,1,1],f32>, !torch.int -> !torch.vtensor<[2,1280,24,24],f32>
%1442 = torch.aten._to_copy %1441, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,1280,24,24],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,24,24],f16>
%1443 = torch.aten.silu %1442 : !torch.vtensor<[2,1280,24,24],f16> -> !torch.vtensor<[2,1280,24,24],f16>
%1444 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%1445 = torch.aten._to_copy %20, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280,3,3],f16>
%1446 = torch.aten._convolution %1443, %1445, %1444, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,24,24],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,24,24],f16>
%1447 = torch.aten.silu %74 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%1448 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%1449 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%1450 = torch.aten.t %1449 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1451 = torch.aten.addmm %1448, %1447, %1450, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%1452 = torch.aten.unsqueeze %1451, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
%1453 = torch.aten.unsqueeze %1452, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
%1454 = torch.aten.add.Tensor %1446, %1453, %int1 : !torch.vtensor<[2,1280,24,24],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,24,24],f16>
%1455 = torch.aten._to_copy %1454, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,24,24],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,24,24],f32>
%1456 = torch.aten._reshape_alias %1455, %1176, %1177 : !torch.vtensor<[2,1280,24,24],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,40,576],f32>
%result0_74, %result1_75 = torch.aten.var_mean.correction %1456, %85, %int0, %true : !torch.vtensor<[2,32,40,576],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%1457 = torch.aten.add.Tensor %result0_74, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%1458 = torch.aten.rsqrt %1457 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%1459 = torch.aten.sub.Tensor %1456, %result1_75, %int1 : !torch.vtensor<[2,32,40,576],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,576],f32>
%1460 = torch.aten.mul.Tensor %1459, %1458 : !torch.vtensor<[2,32,40,576],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,576],f32>
%1461 = torch.aten.view %1460, %1183 : !torch.vtensor<[2,32,40,576],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,24,24],f32>
%1462 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
%1463 = torch.aten.unsqueeze %1462, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
%1464 = torch.aten.unsqueeze %1463, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
%1465 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
%1466 = torch.aten.unsqueeze %1465, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
%1467 = torch.aten.unsqueeze %1466, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
%1468 = torch.aten.mul.Tensor %1461, %1467 : !torch.vtensor<[2,1280,24,24],f32>, !torch.vtensor<[1,1280,1,1],f32> -> !torch.vtensor<[2,1280,24,24],f32>
%1469 = torch.aten.add.Tensor %1468, %1464, %int1 : !torch.vtensor<[2,1280,24,24],f32>, !torch.vtensor<[1,1280,1,1],f32>, !torch.int -> !torch.vtensor<[2,1280,24,24],f32>
%1470 = torch.aten._to_copy %1469, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,1280,24,24],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,24,24],f16>
%1471 = torch.aten.silu %1470 : !torch.vtensor<[2,1280,24,24],f16> -> !torch.vtensor<[2,1280,24,24],f16>
%1472 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%1473 = torch.aten._to_copy %20, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280,3,3],f16>
%1474 = torch.aten._convolution %1471, %1473, %1472, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,24,24],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,24,24],f16>
%1475 = torch.aten.add.Tensor %1426, %1474, %int1 : !torch.vtensor<[2,1280,24,24],f16>, !torch.vtensor<[2,1280,24,24],f16>, !torch.int -> !torch.vtensor<[2,1280,24,24],f16>
%1476 = torch.aten._to_copy %1475, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,24,24],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,24,24],f32>
%1477 = torch.aten._reshape_alias %1476, %1176, %1177 : !torch.vtensor<[2,1280,24,24],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,40,576],f32>
%result0_76, %result1_77 = torch.aten.var_mean.correction %1477, %85, %int0, %true : !torch.vtensor<[2,32,40,576],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%1478 = torch.aten.add.Tensor %result0_76, %1, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%1479 = torch.aten.rsqrt %1478 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%1480 = torch.aten.sub.Tensor %1477, %result1_77, %int1 : !torch.vtensor<[2,32,40,576],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,576],f32>
%1481 = torch.aten.mul.Tensor %1480, %1479 : !torch.vtensor<[2,32,40,576],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,576],f32>
%1482 = torch.aten.view %1481, %1183 : !torch.vtensor<[2,32,40,576],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,24,24],f32>
%1483 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
%1484 = torch.aten.unsqueeze %1483, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
%1485 = torch.aten.unsqueeze %1484, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
%1486 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
%1487 = torch.aten.unsqueeze %1486, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
%1488 = torch.aten.unsqueeze %1487, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
%1489 = torch.aten.mul.Tensor %1482, %1488 : !torch.vtensor<[2,1280,24,24],f32>, !torch.vtensor<[1,1280,1,1],f32> -> !torch.vtensor<[2,1280,24,24],f32>
%1490 = torch.aten.add.Tensor %1489, %1485, %int1 : !torch.vtensor<[2,1280,24,24],f32>, !torch.vtensor<[1,1280,1,1],f32>, !torch.int -> !torch.vtensor<[2,1280,24,24],f32>
%1491 = torch.aten._reshape_alias %1490, %1183, %1217 : !torch.vtensor<[2,1280,24,24],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1280,24,24],f32>
%1492 = torch.aten.permute %1491, %151 : !torch.vtensor<[2,1280,24,24],f32>, !torch.list<int> -> !torch.vtensor<[2,24,24,1280],f32>
%1493 = torch.aten._reshape_alias %1492, %1220, %1221 : !torch.vtensor<[2,24,24,1280],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f32>
%1494 = torch.aten.clone %1493, %int0 : !torch.vtensor<[2,576,1280],f32>, !torch.int -> !torch.vtensor<[2,576,1280],f32>
%1495 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%1496 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%1497 = torch.aten._to_copy %1494, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
%1498 = torch.aten.t %1496 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1499 = torch.aten.view %1497, %1228 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
%1500 = torch.aten.addmm %1495, %1499, %1498, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,1280],f16>
%1501 = torch.aten.view %1500, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
%1502 = torch.aten._to_copy %1501, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f32>
%result0_78, %result1_79, %result2_80 = torch.aten.native_layer_norm %1502, %1233, %24, %24, %float1.000000e-05 : !torch.vtensor<[2,576,1280],f32>, !torch.list<int>, !torch.vtensor<[1280],f32>, !torch.vtensor<[1280],f32>, !torch.float -> !torch.vtensor<[2,576,1280],f32>, !torch.vtensor<[2,576,1],f32>, !torch.vtensor<[2,576,1],f32>
%1503 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%1504 = torch.aten._to_copy %result0_78, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
%1505 = torch.aten.t %1503 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1506 = torch.aten._reshape_alias %1504, %1228, %1237 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
%1507 = torch.aten.mm %1506, %1505 : !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1152,1280],f16>
%1508 = torch.aten._unsafe_view %1507, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
%1509 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%1510 = torch.aten._to_copy %result0_78, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
%1511 = torch.aten.t %1509 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1512 = torch.aten._reshape_alias %1510, %1228, %1237 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
%1513 = torch.aten.mm %1512, %1511 : !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1152,1280],f16>
%1514 = torch.aten._unsafe_view %1513, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
%1515 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%1516 = torch.aten._to_copy %result0_78, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
%1517 = torch.aten.t %1515 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1518 = torch.aten._reshape_alias %1516, %1228, %1237 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
%1519 = torch.aten.mm %1518, %1517 : !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1152,1280],f16>
%1520 = torch.aten._unsafe_view %1519, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
%1521 = torch.aten._reshape_alias %1508, %1253, %1254 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
%1522 = torch.aten.permute %1521, %189 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
%1523 = torch.aten.clone %1522, %int0 : !torch.vtensor<[2,20,576,64],f16>, !torch.int -> !torch.vtensor<[2,20,576,64],f16>
%1524 = torch.aten._unsafe_view %1523, %1258 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
%1525 = torch.aten._reshape_alias %1514, %1253, %1254 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
%1526 = torch.aten.permute %1525, %189 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
%1527 = torch.aten.clone %1526, %int0 : !torch.vtensor<[2,20,576,64],f16>, !torch.int -> !torch.vtensor<[2,20,576,64],f16>
%1528 = torch.aten._unsafe_view %1527, %1258 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
%1529 = torch.aten._reshape_alias %1520, %1253, %1254 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
%1530 = torch.aten.permute %1529, %189 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
%1531 = torch.aten.clone %1530, %int0 : !torch.vtensor<[2,20,576,64],f16>, !torch.int -> !torch.vtensor<[2,20,576,64],f16>
%1532 = torch.aten._unsafe_view %1531, %1258 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
%1533 = torch.aten.unsqueeze %1524, %int3 : !torch.vtensor<[40,576,64],f16>, !torch.int -> !torch.vtensor<[40,576,64,1],f16>
%1534 = torch.aten.permute %1533, %203 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,64],f16>
%1535 = torch.aten.unsqueeze %1528, %int3 : !torch.vtensor<[40,576,64],f16>, !torch.int -> !torch.vtensor<[40,576,64,1],f16>
%1536 = torch.aten.permute %1535, %206 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,1,576,64],f16>
%1537 = torch.aten.permute %1534, %203 : !torch.vtensor<[40,576,1,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64,1],f16>
%1538 = torch.aten._reshape_alias %1537, %1258, %1273 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
%1539 = torch.aten.permute %1536, %211 : !torch.vtensor<[40,1,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,64,576,1],f16>
%1540 = torch.aten._reshape_alias %1539, %1276, %1277 : !torch.vtensor<[40,64,576,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,64,576],f16>
%1541 = torch.aten.bmm %1538, %1540 : !torch.vtensor<[40,576,64],f16>, !torch.vtensor<[40,64,576],f16> -> !torch.vtensor<[40,576,576],f16>
%1542 = torch.aten.view %1541, %1280 : !torch.vtensor<[40,576,576],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,576],f16>
%1543 = torch.aten.permute %1542, %203 : !torch.vtensor<[40,576,1,576],f16>, !torch.list<int> -> !torch.vtensor<[40,576,576,1],f16>
%1544 = torch.aten.view %1543, %1283 : !torch.vtensor<[40,576,576,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,576],f16>
%1545 = torch.aten.mul.Tensor %1544, %0 : !torch.vtensor<[40,576,576],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[40,576,576],f16>
%1546 = torch.aten._softmax %1545, %int-1, %true : !torch.vtensor<[40,576,576],f16>, !torch.int, !torch.bool -> !torch.vtensor<[40,576,576],f32>
%1547 = torch.aten._to_copy %1546, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[40,576,576],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[40,576,576],f16>
%1548 = torch.aten.unsqueeze %1547, %int3 : !torch.vtensor<[40,576,576],f16>, !torch.int -> !torch.vtensor<[40,576,576,1],f16>
%1549 = torch.aten.permute %1548, %203 : !torch.vtensor<[40,576,576,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,576],f16>
%1550 = torch.aten.unsqueeze %1532, %int3 : !torch.vtensor<[40,576,64],f16>, !torch.int -> !torch.vtensor<[40,576,64,1],f16>
%1551 = torch.aten.permute %1550, %211 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,1,64,576],f16>
%1552 = torch.aten.permute %1549, %203 : !torch.vtensor<[40,576,1,576],f16>, !torch.list<int> -> !torch.vtensor<[40,576,576,1],f16>
%1553 = torch.aten._reshape_alias %1552, %1283, %1293 : !torch.vtensor<[40,576,576,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,576,576],f16>
%1554 = torch.aten.permute %1551, %211 : !torch.vtensor<[40,1,64,576],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64,1],f16>
%1555 = torch.aten._reshape_alias %1554, %1258, %1273 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
%1556 = torch.aten.bmm %1553, %1555 : !torch.vtensor<[40,576,576],f16>, !torch.vtensor<[40,576,64],f16> -> !torch.vtensor<[40,576,64],f16>
%1557 = torch.aten.view %1556, %1298 : !torch.vtensor<[40,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,64],f16>
%1558 = torch.aten.permute %1557, %203 : !torch.vtensor<[40,576,1,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64,1],f16>
%1559 = torch.aten.view %1558, %1258 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
%1560 = torch.aten._reshape_alias %1559, %1302, %1303 : !torch.vtensor<[40,576,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
%1561 = torch.aten.permute %1560, %189 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
%1562 = torch.aten.clone %1561, %int0 : !torch.vtensor<[2,576,20,64],f16>, !torch.int -> !torch.vtensor<[2,576,20,64],f16>
%1563 = torch.aten._unsafe_view %1562, %1220 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
%1564 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%1565 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%1566 = torch.aten.t %1565 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1567 = torch.aten.view %1563, %1228 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
%1568 = torch.aten.addmm %1564, %1567, %1566, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,1280],f16>
%1569 = torch.aten.view %1568, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
%1570 = torch.aten.add.Tensor %1569, %1501, %int1 : !torch.vtensor<[2,576,1280],f16>, !torch.vtensor<[2,576,1280],f16>, !torch.int -> !torch.vtensor<[2,576,1280],f16>
%1571 = torch.aten._to_copy %1570, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f32>
%result0_81, %result1_82, %result2_83 = torch.aten.native_layer_norm %1571, %1233, %24, %24, %float1.000000e-05 : !torch.vtensor<[2,576,1280],f32>, !torch.list<int>, !torch.vtensor<[1280],f32>, !torch.vtensor<[1280],f32>, !torch.float -> !torch.vtensor<[2,576,1280],f32>, !torch.vtensor<[2,576,1],f32>, !torch.vtensor<[2,576,1],f32>
%1572 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%1573 = torch.aten._to_copy %result0_81, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
%1574 = torch.aten.t %1572 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1575 = torch.aten._reshape_alias %1573, %1228, %1237 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
%1576 = torch.aten.mm %1575, %1574 : !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1152,1280],f16>
%1577 = torch.aten._unsafe_view %1576, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
%1578 = torch.aten._to_copy %15, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1024],f16>
%1579 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
%1580 = torch.aten.t %1578 : !torch.vtensor<[1280,1024],f16> -> !torch.vtensor<[1024,1280],f16>
%1581 = torch.aten._reshape_alias %1579, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
%1582 = torch.aten.mm %1581, %1580 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,1280],f16> -> !torch.vtensor<[154,1280],f16>
%1583 = torch.aten._unsafe_view %1582, %1327 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
%1584 = torch.aten._to_copy %15, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1024],f16>
%1585 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
%1586 = torch.aten.t %1584 : !torch.vtensor<[1280,1024],f16> -> !torch.vtensor<[1024,1280],f16>
%1587 = torch.aten._reshape_alias %1585, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
%1588 = torch.aten.mm %1587, %1586 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,1280],f16> -> !torch.vtensor<[154,1280],f16>
%1589 = torch.aten._unsafe_view %1588, %1327 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
%1590 = torch.aten._reshape_alias %1577, %1253, %1254 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
%1591 = torch.aten.permute %1590, %189 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
%1592 = torch.aten.clone %1591, %int0 : !torch.vtensor<[2,20,576,64],f16>, !torch.int -> !torch.vtensor<[2,20,576,64],f16>
%1593 = torch.aten._unsafe_view %1592, %1258 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
%1594 = torch.aten._reshape_alias %1583, %1339, %1340 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,20,64],f16>
%1595 = torch.aten.permute %1594, %189 : !torch.vtensor<[2,77,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,77,64],f16>
%1596 = torch.aten.clone %1595, %int0 : !torch.vtensor<[2,20,77,64],f16>, !torch.int -> !torch.vtensor<[2,20,77,64],f16>
%1597 = torch.aten._unsafe_view %1596, %1344 : !torch.vtensor<[2,20,77,64],f16>, !torch.list<int> -> !torch.vtensor<[40,77,64],f16>
%1598 = torch.aten._reshape_alias %1589, %1339, %1340 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,20,64],f16>
%1599 = torch.aten.permute %1598, %189 : !torch.vtensor<[2,77,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,77,64],f16>
%1600 = torch.aten.clone %1599, %int0 : !torch.vtensor<[2,20,77,64],f16>, !torch.int -> !torch.vtensor<[2,20,77,64],f16>
%1601 = torch.aten._unsafe_view %1600, %1344 : !torch.vtensor<[2,20,77,64],f16>, !torch.list<int> -> !torch.vtensor<[40,77,64],f16>
%1602 = torch.aten.unsqueeze %1593, %int3 : !torch.vtensor<[40,576,64],f16>, !torch.int -> !torch.vtensor<[40,576,64,1],f16>
%1603 = torch.aten.permute %1602, %203 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,64],f16>
%1604 = torch.aten.unsqueeze %1597, %int3 : !torch.vtensor<[40,77,64],f16>, !torch.int -> !torch.vtensor<[40,77,64,1],f16>
%1605 = torch.aten.permute %1604, %206 : !torch.vtensor<[40,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,1,77,64],f16>
%1606 = torch.aten.permute %1603, %203 : !torch.vtensor<[40,576,1,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64,1],f16>
%1607 = torch.aten._reshape_alias %1606, %1258, %1273 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
%1608 = torch.aten.permute %1605, %211 : !torch.vtensor<[40,1,77,64],f16>, !torch.list<int> -> !torch.vtensor<[40,64,77,1],f16>
%1609 = torch.aten._reshape_alias %1608, %1357, %297 : !torch.vtensor<[40,64,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,64,77],f16>
%1610 = torch.aten.bmm %1607, %1609 : !torch.vtensor<[40,576,64],f16>, !torch.vtensor<[40,64,77],f16> -> !torch.vtensor<[40,576,77],f16>
%1611 = torch.aten.view %1610, %1360 : !torch.vtensor<[40,576,77],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,77],f16>
%1612 = torch.aten.permute %1611, %203 : !torch.vtensor<[40,576,1,77],f16>, !torch.list<int> -> !torch.vtensor<[40,576,77,1],f16>
%1613 = torch.aten.view %1612, %1363 : !torch.vtensor<[40,576,77,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,77],f16>
%1614 = torch.aten.mul.Tensor %1613, %0 : !torch.vtensor<[40,576,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[40,576,77],f16>
%1615 = torch.aten._softmax %1614, %int-1, %true : !torch.vtensor<[40,576,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[40,576,77],f32>
%1616 = torch.aten._to_copy %1615, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[40,576,77],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[40,576,77],f16>
%1617 = torch.aten.unsqueeze %1616, %int3 : !torch.vtensor<[40,576,77],f16>, !torch.int -> !torch.vtensor<[40,576,77,1],f16>
%1618 = torch.aten.permute %1617, %203 : !torch.vtensor<[40,576,77,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,77],f16>
%1619 = torch.aten.unsqueeze %1601, %int3 : !torch.vtensor<[40,77,64],f16>, !torch.int -> !torch.vtensor<[40,77,64,1],f16>
%1620 = torch.aten.permute %1619, %211 : !torch.vtensor<[40,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,1,64,77],f16>
%1621 = torch.aten.permute %1618, %203 : !torch.vtensor<[40,576,1,77],f16>, !torch.list<int> -> !torch.vtensor<[40,576,77,1],f16>
%1622 = torch.aten._reshape_alias %1621, %1363, %1373 : !torch.vtensor<[40,576,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,576,77],f16>
%1623 = torch.aten.permute %1620, %211 : !torch.vtensor<[40,1,64,77],f16>, !torch.list<int> -> !torch.vtensor<[40,77,64,1],f16>
%1624 = torch.aten._reshape_alias %1623, %1344, %316 : !torch.vtensor<[40,77,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,77,64],f16>
%1625 = torch.aten.bmm %1622, %1624 : !torch.vtensor<[40,576,77],f16>, !torch.vtensor<[40,77,64],f16> -> !torch.vtensor<[40,576,64],f16>
%1626 = torch.aten.view %1625, %1298 : !torch.vtensor<[40,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,64],f16>
%1627 = torch.aten.permute %1626, %203 : !torch.vtensor<[40,576,1,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64,1],f16>
%1628 = torch.aten.view %1627, %1258 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
%1629 = torch.aten._reshape_alias %1628, %1302, %1303 : !torch.vtensor<[40,576,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
%1630 = torch.aten.permute %1629, %189 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
%1631 = torch.aten.clone %1630, %int0 : !torch.vtensor<[2,576,20,64],f16>, !torch.int -> !torch.vtensor<[2,576,20,64],f16>
%1632 = torch.aten._unsafe_view %1631, %1220 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
%1633 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%1634 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%1635 = torch.aten.t %1634 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1636 = torch.aten.view %1632, %1228 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
%1637 = torch.aten.addmm %1633, %1636, %1635, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,1280],f16>
%1638 = torch.aten.view %1637, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
%1639 = torch.aten.add.Tensor %1638, %1570, %int1 : !torch.vtensor<[2,576,1280],f16>, !torch.vtensor<[2,576,1280],f16>, !torch.int -> !torch.vtensor<[2,576,1280],f16>
%1640 = torch.aten._to_copy %1639, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f32>
%result0_84, %result1_85, %result2_86 = torch.aten.native_layer_norm %1640, %1233, %24, %24, %float1.000000e-05 : !torch.vtensor<[2,576,1280],f32>, !torch.list<int>, !torch.vtensor<[1280],f32>, !torch.vtensor<[1280],f32>, !torch.float -> !torch.vtensor<[2,576,1280],f32>, !torch.vtensor<[2,576,1],f32>, !torch.vtensor<[2,576,1],f32>
%1641 = torch.aten._to_copy %16, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[10240],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[10240],f16>
%1642 = torch.aten._to_copy %17, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[10240,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[10240,1280],f16>
%1643 = torch.aten._to_copy %result0_84, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
%1644 = torch.aten.t %1642 : !torch.vtensor<[10240,1280],f16> -> !torch.vtensor<[1280,10240],f16>
%1645 = torch.aten.view %1643, %1228 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
%1646 = torch.aten.addmm %1641, %1645, %1644, %int1, %int1 : !torch.vtensor<[10240],f16>, !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,10240],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,10240],f16>
%1647 = torch.aten.view %1646, %1399 : !torch.vtensor<[1152,10240],f16>, !torch.list<int> -> !torch.vtensor<[2,576,10240],f16>
%1648 = torch.aten.slice.Tensor %1647, %int-1, %int0, %int5120, %int1 : !torch.vtensor<[2,576,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,576,5120],f16>
%1649 = torch.aten.slice.Tensor %1647, %int-1, %int5120, %int10240, %int1 : !torch.vtensor<[2,576,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,576,5120],f16>
%1650 = torch.aten.gelu %1649, %str : !torch.vtensor<[2,576,5120],f16>, !torch.str -> !torch.vtensor<[2,576,5120],f16>
%1651 = torch.aten.mul.Tensor %1648, %1650 : !torch.vtensor<[2,576,5120],f16>, !torch.vtensor<[2,576,5120],f16> -> !torch.vtensor<[2,576,5120],f16>
%1652 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%1653 = torch.aten._to_copy %18, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,5120],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,5120],f16>
%1654 = torch.aten.t %1653 : !torch.vtensor<[1280,5120],f16> -> !torch.vtensor<[5120,1280],f16>
%1655 = torch.aten.view %1651, %1408 : !torch.vtensor<[2,576,5120],f16>, !torch.list<int> -> !torch.vtensor<[1152,5120],f16>
%1656 = torch.aten.addmm %1652, %1655, %1654, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[1152,5120],f16>, !torch.vtensor<[5120,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,1280],f16>
%1657 = torch.aten.view %1656, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
%1658 = torch.aten.add.Tensor %1657, %1639, %int1 : !torch.vtensor<[2,576,1280],f16>, !torch.vtensor<[2,576,1280],f16>, !torch.int -> !torch.vtensor<[2,576,1280],f16>
%1659 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%1660 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%1661 = torch.aten.t %1660 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1662 = torch.aten.view %1658, %1228 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
%1663 = torch.aten.addmm %1659, %1662, %1661, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,1280],f16>
%1664 = torch.aten.view %1663, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
%1665 = torch.aten._reshape_alias %1664, %1419, %1420 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,24,24,1280],f16>
%1666 = torch.aten.permute %1665, %206 : !torch.vtensor<[2,24,24,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,24,24],f16>
%1667 = torch.aten._reshape_alias %1666, %1183, %1423 : !torch.vtensor<[2,1280,24,24],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1280,24,24],f16>
%1668 = torch.aten.clone %1667, %int0 : !torch.vtensor<[2,1280,24,24],f16>, !torch.int -> !torch.vtensor<[2,1280,24,24],f16>
%1669 = torch.aten.add.Tensor %1668, %1475, %int1 : !torch.vtensor<[2,1280,24,24],f16>, !torch.vtensor<[2,1280,24,24],f16>, !torch.int -> !torch.vtensor<[2,1280,24,24],f16>
%1670 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%1671 = torch.aten._to_copy %20, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280,3,3],f16>
%1672 = torch.aten._convolution %1669, %1671, %1670, %613, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,24,24],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,12,12],f16>
%1673 = torch.aten._to_copy %1672, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,12,12],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,12,12],f32>
%1674 = torch.prim.ListConstruct %int2, %int32, %int40, %int144 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1675 = torch.prim.ListConstruct %int184320, %int5760, %int144, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1676 = torch.aten._reshape_alias %1673, %1674, %1675 : !torch.vtensor<[2,1280,12,12],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,40,144],f32>
%result0_87, %result1_88 = torch.aten.var_mean.correction %1676, %85, %int0, %true : !torch.vtensor<[2,32,40,144],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%1677 = torch.aten.add.Tensor %result0_87, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%1678 = torch.aten.rsqrt %1677 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%1679 = torch.aten.sub.Tensor %1676, %result1_88, %int1 : !torch.vtensor<[2,32,40,144],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,144],f32>
%1680 = torch.aten.mul.Tensor %1679, %1678 : !torch.vtensor<[2,32,40,144],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,144],f32>
%1681 = torch.prim.ListConstruct %int2, %int1280, %int12, %int12 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1682 = torch.aten.view %1680, %1681 : !torch.vtensor<[2,32,40,144],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,12,12],f32>
%1683 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
%1684 = torch.aten.unsqueeze %1683, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
%1685 = torch.aten.unsqueeze %1684, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
%1686 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
%1687 = torch.aten.unsqueeze %1686, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
%1688 = torch.aten.unsqueeze %1687, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
%1689 = torch.aten.mul.Tensor %1682, %1688 : !torch.vtensor<[2,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f32> -> !torch.vtensor<[2,1280,12,12],f32>
%1690 = torch.aten.add.Tensor %1689, %1685, %int1 : !torch.vtensor<[2,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f32>, !torch.int -> !torch.vtensor<[2,1280,12,12],f32>
%1691 = torch.aten._to_copy %1690, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,1280,12,12],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,12,12],f16>
%1692 = torch.aten.silu %1691 : !torch.vtensor<[2,1280,12,12],f16> -> !torch.vtensor<[2,1280,12,12],f16>
%1693 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%1694 = torch.aten._to_copy %20, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280,3,3],f16>
%1695 = torch.aten._convolution %1692, %1694, %1693, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,12,12],f16>
%1696 = torch.aten.silu %74 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%1697 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%1698 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%1699 = torch.aten.t %1698 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1700 = torch.aten.addmm %1697, %1696, %1699, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%1701 = torch.aten.unsqueeze %1700, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
%1702 = torch.aten.unsqueeze %1701, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
%1703 = torch.aten.add.Tensor %1695, %1702, %int1 : !torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,12,12],f16>
%1704 = torch.aten._to_copy %1703, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,12,12],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,12,12],f32>
%1705 = torch.aten._reshape_alias %1704, %1674, %1675 : !torch.vtensor<[2,1280,12,12],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,40,144],f32>
%result0_89, %result1_90 = torch.aten.var_mean.correction %1705, %85, %int0, %true : !torch.vtensor<[2,32,40,144],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%1706 = torch.aten.add.Tensor %result0_89, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%1707 = torch.aten.rsqrt %1706 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%1708 = torch.aten.sub.Tensor %1705, %result1_90, %int1 : !torch.vtensor<[2,32,40,144],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,144],f32>
%1709 = torch.aten.mul.Tensor %1708, %1707 : !torch.vtensor<[2,32,40,144],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,144],f32>
%1710 = torch.aten.view %1709, %1681 : !torch.vtensor<[2,32,40,144],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,12,12],f32>
%1711 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
%1712 = torch.aten.unsqueeze %1711, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
%1713 = torch.aten.unsqueeze %1712, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
%1714 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
%1715 = torch.aten.unsqueeze %1714, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
%1716 = torch.aten.unsqueeze %1715, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
%1717 = torch.aten.mul.Tensor %1710, %1716 : !torch.vtensor<[2,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f32> -> !torch.vtensor<[2,1280,12,12],f32>
%1718 = torch.aten.add.Tensor %1717, %1713, %int1 : !torch.vtensor<[2,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f32>, !torch.int -> !torch.vtensor<[2,1280,12,12],f32>
%1719 = torch.aten._to_copy %1718, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,1280,12,12],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,12,12],f16>
%1720 = torch.aten.silu %1719 : !torch.vtensor<[2,1280,12,12],f16> -> !torch.vtensor<[2,1280,12,12],f16>
%1721 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%1722 = torch.aten._to_copy %20, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280,3,3],f16>
%1723 = torch.aten._convolution %1720, %1722, %1721, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,12,12],f16>
%1724 = torch.aten.add.Tensor %1672, %1723, %int1 : !torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[2,1280,12,12],f16>, !torch.int -> !torch.vtensor<[2,1280,12,12],f16>
%1725 = torch.aten._to_copy %1724, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,12,12],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,12,12],f32>
%1726 = torch.aten._reshape_alias %1725, %1674, %1675 : !torch.vtensor<[2,1280,12,12],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,40,144],f32>
%result0_91, %result1_92 = torch.aten.var_mean.correction %1726, %85, %int0, %true : !torch.vtensor<[2,32,40,144],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%1727 = torch.aten.add.Tensor %result0_91, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%1728 = torch.aten.rsqrt %1727 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%1729 = torch.aten.sub.Tensor %1726, %result1_92, %int1 : !torch.vtensor<[2,32,40,144],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,144],f32>
%1730 = torch.aten.mul.Tensor %1729, %1728 : !torch.vtensor<[2,32,40,144],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,144],f32>
%1731 = torch.aten.view %1730, %1681 : !torch.vtensor<[2,32,40,144],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,12,12],f32>
%1732 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
%1733 = torch.aten.unsqueeze %1732, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
%1734 = torch.aten.unsqueeze %1733, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
%1735 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
%1736 = torch.aten.unsqueeze %1735, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
%1737 = torch.aten.unsqueeze %1736, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
%1738 = torch.aten.mul.Tensor %1731, %1737 : !torch.vtensor<[2,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f32> -> !torch.vtensor<[2,1280,12,12],f32>
%1739 = torch.aten.add.Tensor %1738, %1734, %int1 : !torch.vtensor<[2,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f32>, !torch.int -> !torch.vtensor<[2,1280,12,12],f32>
%1740 = torch.aten._to_copy %1739, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,1280,12,12],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,12,12],f16>
%1741 = torch.aten.silu %1740 : !torch.vtensor<[2,1280,12,12],f16> -> !torch.vtensor<[2,1280,12,12],f16>
%1742 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%1743 = torch.aten._to_copy %20, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280,3,3],f16>
%1744 = torch.aten._convolution %1741, %1743, %1742, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,12,12],f16>
%1745 = torch.aten.silu %74 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%1746 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%1747 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%1748 = torch.aten.t %1747 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1749 = torch.aten.addmm %1746, %1745, %1748, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%1750 = torch.aten.unsqueeze %1749, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
%1751 = torch.aten.unsqueeze %1750, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
%1752 = torch.aten.add.Tensor %1744, %1751, %int1 : !torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,12,12],f16>
%1753 = torch.aten._to_copy %1752, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,12,12],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,12,12],f32>
%1754 = torch.aten._reshape_alias %1753, %1674, %1675 : !torch.vtensor<[2,1280,12,12],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,40,144],f32>
%result0_93, %result1_94 = torch.aten.var_mean.correction %1754, %85, %int0, %true : !torch.vtensor<[2,32,40,144],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%1755 = torch.aten.add.Tensor %result0_93, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%1756 = torch.aten.rsqrt %1755 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%1757 = torch.aten.sub.Tensor %1754, %result1_94, %int1 : !torch.vtensor<[2,32,40,144],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,144],f32>
%1758 = torch.aten.mul.Tensor %1757, %1756 : !torch.vtensor<[2,32,40,144],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,144],f32>
%1759 = torch.aten.view %1758, %1681 : !torch.vtensor<[2,32,40,144],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,12,12],f32>
%1760 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
%1761 = torch.aten.unsqueeze %1760, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
%1762 = torch.aten.unsqueeze %1761, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
%1763 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
%1764 = torch.aten.unsqueeze %1763, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
%1765 = torch.aten.unsqueeze %1764, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
%1766 = torch.aten.mul.Tensor %1759, %1765 : !torch.vtensor<[2,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f32> -> !torch.vtensor<[2,1280,12,12],f32>
%1767 = torch.aten.add.Tensor %1766, %1762, %int1 : !torch.vtensor<[2,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f32>, !torch.int -> !torch.vtensor<[2,1280,12,12],f32>
%1768 = torch.aten._to_copy %1767, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,1280,12,12],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,12,12],f16>
%1769 = torch.aten.silu %1768 : !torch.vtensor<[2,1280,12,12],f16> -> !torch.vtensor<[2,1280,12,12],f16>
%1770 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%1771 = torch.aten._to_copy %20, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280,3,3],f16>
%1772 = torch.aten._convolution %1769, %1771, %1770, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,12,12],f16>
%1773 = torch.aten.add.Tensor %1724, %1772, %int1 : !torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[2,1280,12,12],f16>, !torch.int -> !torch.vtensor<[2,1280,12,12],f16>
%1774 = torch.aten._to_copy %1773, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,12,12],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,12,12],f32>
%1775 = torch.aten._reshape_alias %1774, %1674, %1675 : !torch.vtensor<[2,1280,12,12],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,40,144],f32>
%result0_95, %result1_96 = torch.aten.var_mean.correction %1775, %85, %int0, %true : !torch.vtensor<[2,32,40,144],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%1776 = torch.aten.add.Tensor %result0_95, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%1777 = torch.aten.rsqrt %1776 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%1778 = torch.aten.sub.Tensor %1775, %result1_96, %int1 : !torch.vtensor<[2,32,40,144],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,144],f32>
%1779 = torch.aten.mul.Tensor %1778, %1777 : !torch.vtensor<[2,32,40,144],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,144],f32>
%1780 = torch.aten.view %1779, %1681 : !torch.vtensor<[2,32,40,144],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,12,12],f32>
%1781 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
%1782 = torch.aten.unsqueeze %1781, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
%1783 = torch.aten.unsqueeze %1782, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
%1784 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
%1785 = torch.aten.unsqueeze %1784, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
%1786 = torch.aten.unsqueeze %1785, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
%1787 = torch.aten.mul.Tensor %1780, %1786 : !torch.vtensor<[2,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f32> -> !torch.vtensor<[2,1280,12,12],f32>
%1788 = torch.aten.add.Tensor %1787, %1783, %int1 : !torch.vtensor<[2,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f32>, !torch.int -> !torch.vtensor<[2,1280,12,12],f32>
%1789 = torch.aten._to_copy %1788, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,1280,12,12],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,12,12],f16>
%1790 = torch.aten.silu %1789 : !torch.vtensor<[2,1280,12,12],f16> -> !torch.vtensor<[2,1280,12,12],f16>
%1791 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%1792 = torch.aten._to_copy %20, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280,3,3],f16>
%1793 = torch.aten._convolution %1790, %1792, %1791, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,12,12],f16>
%1794 = torch.aten.silu %74 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%1795 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%1796 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%1797 = torch.aten.t %1796 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1798 = torch.aten.addmm %1795, %1794, %1797, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%1799 = torch.aten.unsqueeze %1798, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
%1800 = torch.aten.unsqueeze %1799, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
%1801 = torch.aten.add.Tensor %1793, %1800, %int1 : !torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,12,12],f16>
%1802 = torch.aten._to_copy %1801, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,12,12],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,12,12],f32>
%1803 = torch.aten._reshape_alias %1802, %1674, %1675 : !torch.vtensor<[2,1280,12,12],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,40,144],f32>
%result0_97, %result1_98 = torch.aten.var_mean.correction %1803, %85, %int0, %true : !torch.vtensor<[2,32,40,144],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%1804 = torch.aten.add.Tensor %result0_97, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%1805 = torch.aten.rsqrt %1804 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%1806 = torch.aten.sub.Tensor %1803, %result1_98, %int1 : !torch.vtensor<[2,32,40,144],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,144],f32>
%1807 = torch.aten.mul.Tensor %1806, %1805 : !torch.vtensor<[2,32,40,144],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,144],f32>
%1808 = torch.aten.view %1807, %1681 : !torch.vtensor<[2,32,40,144],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,12,12],f32>
%1809 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
%1810 = torch.aten.unsqueeze %1809, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
%1811 = torch.aten.unsqueeze %1810, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
%1812 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
%1813 = torch.aten.unsqueeze %1812, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
%1814 = torch.aten.unsqueeze %1813, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
%1815 = torch.aten.mul.Tensor %1808, %1814 : !torch.vtensor<[2,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f32> -> !torch.vtensor<[2,1280,12,12],f32>
%1816 = torch.aten.add.Tensor %1815, %1811, %int1 : !torch.vtensor<[2,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f32>, !torch.int -> !torch.vtensor<[2,1280,12,12],f32>
%1817 = torch.aten._to_copy %1816, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,1280,12,12],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,12,12],f16>
%1818 = torch.aten.silu %1817 : !torch.vtensor<[2,1280,12,12],f16> -> !torch.vtensor<[2,1280,12,12],f16>
%1819 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%1820 = torch.aten._to_copy %20, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280,3,3],f16>
%1821 = torch.aten._convolution %1818, %1820, %1819, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,12,12],f16>
%1822 = torch.aten.add.Tensor %1773, %1821, %int1 : !torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[2,1280,12,12],f16>, !torch.int -> !torch.vtensor<[2,1280,12,12],f16>
%1823 = torch.aten._to_copy %1822, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,12,12],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,12,12],f32>
%1824 = torch.aten._reshape_alias %1823, %1674, %1675 : !torch.vtensor<[2,1280,12,12],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,40,144],f32>
%result0_99, %result1_100 = torch.aten.var_mean.correction %1824, %85, %int0, %true : !torch.vtensor<[2,32,40,144],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%1825 = torch.aten.add.Tensor %result0_99, %1, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%1826 = torch.aten.rsqrt %1825 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%1827 = torch.aten.sub.Tensor %1824, %result1_100, %int1 : !torch.vtensor<[2,32,40,144],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,144],f32>
%1828 = torch.aten.mul.Tensor %1827, %1826 : !torch.vtensor<[2,32,40,144],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,144],f32>
%1829 = torch.aten.view %1828, %1681 : !torch.vtensor<[2,32,40,144],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,12,12],f32>
%1830 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
%1831 = torch.aten.unsqueeze %1830, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
%1832 = torch.aten.unsqueeze %1831, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
%1833 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
%1834 = torch.aten.unsqueeze %1833, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
%1835 = torch.aten.unsqueeze %1834, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
%1836 = torch.aten.mul.Tensor %1829, %1835 : !torch.vtensor<[2,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f32> -> !torch.vtensor<[2,1280,12,12],f32>
%1837 = torch.aten.add.Tensor %1836, %1832, %int1 : !torch.vtensor<[2,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f32>, !torch.int -> !torch.vtensor<[2,1280,12,12],f32>
%1838 = torch.prim.ListConstruct %int184320, %int144, %int12, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1839 = torch.aten._reshape_alias %1837, %1681, %1838 : !torch.vtensor<[2,1280,12,12],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1280,12,12],f32>
%1840 = torch.aten.permute %1839, %151 : !torch.vtensor<[2,1280,12,12],f32>, !torch.list<int> -> !torch.vtensor<[2,12,12,1280],f32>
%1841 = torch.prim.ListConstruct %int2, %int144, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1842 = torch.prim.ListConstruct %int184320, %int1, %int144 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1843 = torch.aten._reshape_alias %1840, %1841, %1842 : !torch.vtensor<[2,12,12,1280],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,144,1280],f32>
%1844 = torch.aten.clone %1843, %int0 : !torch.vtensor<[2,144,1280],f32>, !torch.int -> !torch.vtensor<[2,144,1280],f32>
%1845 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%1846 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%1847 = torch.aten._to_copy %1844, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,144,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,144,1280],f16>
%1848 = torch.aten.t %1846 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1849 = torch.prim.ListConstruct %int288, %int1280 : (!torch.int, !torch.int) -> !torch.list<int>
%1850 = torch.aten.view %1847, %1849 : !torch.vtensor<[2,144,1280],f16>, !torch.list<int> -> !torch.vtensor<[288,1280],f16>
%1851 = torch.aten.addmm %1845, %1850, %1848, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[288,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[288,1280],f16>
%1852 = torch.aten.view %1851, %1841 : !torch.vtensor<[288,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,144,1280],f16>
%1853 = torch.aten._to_copy %1852, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,144,1280],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,144,1280],f32>
%result0_101, %result1_102, %result2_103 = torch.aten.native_layer_norm %1853, %1233, %24, %24, %float1.000000e-05 : !torch.vtensor<[2,144,1280],f32>, !torch.list<int>, !torch.vtensor<[1280],f32>, !torch.vtensor<[1280],f32>, !torch.float -> !torch.vtensor<[2,144,1280],f32>, !torch.vtensor<[2,144,1],f32>, !torch.vtensor<[2,144,1],f32>
%1854 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%1855 = torch.aten._to_copy %result0_101, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,144,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,144,1280],f16>
%1856 = torch.aten.t %1854 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1857 = torch.aten._reshape_alias %1855, %1849, %1237 : !torch.vtensor<[2,144,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[288,1280],f16>
%1858 = torch.aten.mm %1857, %1856 : !torch.vtensor<[288,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[288,1280],f16>
%1859 = torch.aten._unsafe_view %1858, %1841 : !torch.vtensor<[288,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,144,1280],f16>
%1860 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%1861 = torch.aten._to_copy %result0_101, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,144,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,144,1280],f16>
%1862 = torch.aten.t %1860 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1863 = torch.aten._reshape_alias %1861, %1849, %1237 : !torch.vtensor<[2,144,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[288,1280],f16>
%1864 = torch.aten.mm %1863, %1862 : !torch.vtensor<[288,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[288,1280],f16>
%1865 = torch.aten._unsafe_view %1864, %1841 : !torch.vtensor<[288,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,144,1280],f16>
%1866 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%1867 = torch.aten._to_copy %result0_101, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,144,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,144,1280],f16>
%1868 = torch.aten.t %1866 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1869 = torch.aten._reshape_alias %1867, %1849, %1237 : !torch.vtensor<[2,144,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[288,1280],f16>
%1870 = torch.aten.mm %1869, %1868 : !torch.vtensor<[288,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[288,1280],f16>
%1871 = torch.aten._unsafe_view %1870, %1841 : !torch.vtensor<[288,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,144,1280],f16>
%1872 = torch.prim.ListConstruct %int2, %int144, %int20, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1873 = torch.prim.ListConstruct %int184320, %int1280, %int64, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1874 = torch.aten._reshape_alias %1859, %1872, %1873 : !torch.vtensor<[2,144,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,144,20,64],f16>
%1875 = torch.aten.permute %1874, %189 : !torch.vtensor<[2,144,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,144,64],f16>
%1876 = torch.aten.clone %1875, %int0 : !torch.vtensor<[2,20,144,64],f16>, !torch.int -> !torch.vtensor<[2,20,144,64],f16>
%1877 = torch.prim.ListConstruct %int40, %int144, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1878 = torch.aten._unsafe_view %1876, %1877 : !torch.vtensor<[2,20,144,64],f16>, !torch.list<int> -> !torch.vtensor<[40,144,64],f16>
%1879 = torch.aten._reshape_alias %1865, %1872, %1873 : !torch.vtensor<[2,144,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,144,20,64],f16>
%1880 = torch.aten.permute %1879, %189 : !torch.vtensor<[2,144,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,144,64],f16>
%1881 = torch.aten.clone %1880, %int0 : !torch.vtensor<[2,20,144,64],f16>, !torch.int -> !torch.vtensor<[2,20,144,64],f16>
%1882 = torch.aten._unsafe_view %1881, %1877 : !torch.vtensor<[2,20,144,64],f16>, !torch.list<int> -> !torch.vtensor<[40,144,64],f16>
%1883 = torch.aten._reshape_alias %1871, %1872, %1873 : !torch.vtensor<[2,144,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,144,20,64],f16>
%1884 = torch.aten.permute %1883, %189 : !torch.vtensor<[2,144,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,144,64],f16>
%1885 = torch.aten.clone %1884, %int0 : !torch.vtensor<[2,20,144,64],f16>, !torch.int -> !torch.vtensor<[2,20,144,64],f16>
%1886 = torch.aten._unsafe_view %1885, %1877 : !torch.vtensor<[2,20,144,64],f16>, !torch.list<int> -> !torch.vtensor<[40,144,64],f16>
%1887 = torch.aten.unsqueeze %1878, %int3 : !torch.vtensor<[40,144,64],f16>, !torch.int -> !torch.vtensor<[40,144,64,1],f16>
%1888 = torch.aten.permute %1887, %203 : !torch.vtensor<[40,144,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,144,1,64],f16>
%1889 = torch.aten.unsqueeze %1882, %int3 : !torch.vtensor<[40,144,64],f16>, !torch.int -> !torch.vtensor<[40,144,64,1],f16>
%1890 = torch.aten.permute %1889, %206 : !torch.vtensor<[40,144,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,1,144,64],f16>
%1891 = torch.aten.permute %1888, %203 : !torch.vtensor<[40,144,1,64],f16>, !torch.list<int> -> !torch.vtensor<[40,144,64,1],f16>
%1892 = torch.prim.ListConstruct %int9216, %int64, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1893 = torch.aten._reshape_alias %1891, %1877, %1892 : !torch.vtensor<[40,144,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,144,64],f16>
%1894 = torch.aten.permute %1890, %211 : !torch.vtensor<[40,1,144,64],f16>, !torch.list<int> -> !torch.vtensor<[40,64,144,1],f16>
%1895 = torch.prim.ListConstruct %int40, %int64, %int144 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1896 = torch.prim.ListConstruct %int9216, %int1, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1897 = torch.aten._reshape_alias %1894, %1895, %1896 : !torch.vtensor<[40,64,144,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,64,144],f16>
%1898 = torch.aten.bmm %1893, %1897 : !torch.vtensor<[40,144,64],f16>, !torch.vtensor<[40,64,144],f16> -> !torch.vtensor<[40,144,144],f16>
%1899 = torch.prim.ListConstruct %int40, %int144, %int1, %int144 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1900 = torch.aten.view %1898, %1899 : !torch.vtensor<[40,144,144],f16>, !torch.list<int> -> !torch.vtensor<[40,144,1,144],f16>
%1901 = torch.aten.permute %1900, %203 : !torch.vtensor<[40,144,1,144],f16>, !torch.list<int> -> !torch.vtensor<[40,144,144,1],f16>
%1902 = torch.prim.ListConstruct %int40, %int144, %int144 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1903 = torch.aten.view %1901, %1902 : !torch.vtensor<[40,144,144,1],f16>, !torch.list<int> -> !torch.vtensor<[40,144,144],f16>
%1904 = torch.aten.mul.Tensor %1903, %0 : !torch.vtensor<[40,144,144],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[40,144,144],f16>
%1905 = torch.aten._softmax %1904, %int-1, %true : !torch.vtensor<[40,144,144],f16>, !torch.int, !torch.bool -> !torch.vtensor<[40,144,144],f32>
%1906 = torch.aten._to_copy %1905, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[40,144,144],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[40,144,144],f16>
%1907 = torch.aten.unsqueeze %1906, %int3 : !torch.vtensor<[40,144,144],f16>, !torch.int -> !torch.vtensor<[40,144,144,1],f16>
%1908 = torch.aten.permute %1907, %203 : !torch.vtensor<[40,144,144,1],f16>, !torch.list<int> -> !torch.vtensor<[40,144,1,144],f16>
%1909 = torch.aten.unsqueeze %1886, %int3 : !torch.vtensor<[40,144,64],f16>, !torch.int -> !torch.vtensor<[40,144,64,1],f16>
%1910 = torch.aten.permute %1909, %211 : !torch.vtensor<[40,144,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,1,64,144],f16>
%1911 = torch.aten.permute %1908, %203 : !torch.vtensor<[40,144,1,144],f16>, !torch.list<int> -> !torch.vtensor<[40,144,144,1],f16>
%1912 = torch.prim.ListConstruct %int20736, %int144, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1913 = torch.aten._reshape_alias %1911, %1902, %1912 : !torch.vtensor<[40,144,144,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,144,144],f16>
%1914 = torch.aten.permute %1910, %211 : !torch.vtensor<[40,1,64,144],f16>, !torch.list<int> -> !torch.vtensor<[40,144,64,1],f16>
%1915 = torch.aten._reshape_alias %1914, %1877, %1892 : !torch.vtensor<[40,144,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,144,64],f16>
%1916 = torch.aten.bmm %1913, %1915 : !torch.vtensor<[40,144,144],f16>, !torch.vtensor<[40,144,64],f16> -> !torch.vtensor<[40,144,64],f16>
%1917 = torch.prim.ListConstruct %int40, %int144, %int1, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1918 = torch.aten.view %1916, %1917 : !torch.vtensor<[40,144,64],f16>, !torch.list<int> -> !torch.vtensor<[40,144,1,64],f16>
%1919 = torch.aten.permute %1918, %203 : !torch.vtensor<[40,144,1,64],f16>, !torch.list<int> -> !torch.vtensor<[40,144,64,1],f16>
%1920 = torch.aten.view %1919, %1877 : !torch.vtensor<[40,144,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,144,64],f16>
%1921 = torch.prim.ListConstruct %int2, %int20, %int144, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1922 = torch.prim.ListConstruct %int184320, %int9216, %int64, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1923 = torch.aten._reshape_alias %1920, %1921, %1922 : !torch.vtensor<[40,144,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,20,144,64],f16>
%1924 = torch.aten.permute %1923, %189 : !torch.vtensor<[2,20,144,64],f16>, !torch.list<int> -> !torch.vtensor<[2,144,20,64],f16>
%1925 = torch.aten.clone %1924, %int0 : !torch.vtensor<[2,144,20,64],f16>, !torch.int -> !torch.vtensor<[2,144,20,64],f16>
%1926 = torch.aten._unsafe_view %1925, %1841 : !torch.vtensor<[2,144,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,144,1280],f16>
%1927 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%1928 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%1929 = torch.aten.t %1928 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1930 = torch.aten.view %1926, %1849 : !torch.vtensor<[2,144,1280],f16>, !torch.list<int> -> !torch.vtensor<[288,1280],f16>
%1931 = torch.aten.addmm %1927, %1930, %1929, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[288,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[288,1280],f16>
%1932 = torch.aten.view %1931, %1841 : !torch.vtensor<[288,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,144,1280],f16>
%1933 = torch.aten.add.Tensor %1932, %1852, %int1 : !torch.vtensor<[2,144,1280],f16>, !torch.vtensor<[2,144,1280],f16>, !torch.int -> !torch.vtensor<[2,144,1280],f16>
%1934 = torch.aten._to_copy %1933, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,144,1280],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,144,1280],f32>
%result0_104, %result1_105, %result2_106 = torch.aten.native_layer_norm %1934, %1233, %24, %24, %float1.000000e-05 : !torch.vtensor<[2,144,1280],f32>, !torch.list<int>, !torch.vtensor<[1280],f32>, !torch.vtensor<[1280],f32>, !torch.float -> !torch.vtensor<[2,144,1280],f32>, !torch.vtensor<[2,144,1],f32>, !torch.vtensor<[2,144,1],f32>
%1935 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%1936 = torch.aten._to_copy %result0_104, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,144,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,144,1280],f16>
%1937 = torch.aten.t %1935 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%1938 = torch.aten._reshape_alias %1936, %1849, %1237 : !torch.vtensor<[2,144,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[288,1280],f16>
%1939 = torch.aten.mm %1938, %1937 : !torch.vtensor<[288,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[288,1280],f16>
%1940 = torch.aten._unsafe_view %1939, %1841 : !torch.vtensor<[288,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,144,1280],f16>
%1941 = torch.aten._to_copy %15, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1024],f16>
%1942 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
%1943 = torch.aten.t %1941 : !torch.vtensor<[1280,1024],f16> -> !torch.vtensor<[1024,1280],f16>
%1944 = torch.aten._reshape_alias %1942, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
%1945 = torch.aten.mm %1944, %1943 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,1280],f16> -> !torch.vtensor<[154,1280],f16>
%1946 = torch.aten._unsafe_view %1945, %1327 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
%1947 = torch.aten._to_copy %15, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1024],f16>
%1948 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
%1949 = torch.aten.t %1947 : !torch.vtensor<[1280,1024],f16> -> !torch.vtensor<[1024,1280],f16>
%1950 = torch.aten._reshape_alias %1948, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
%1951 = torch.aten.mm %1950, %1949 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,1280],f16> -> !torch.vtensor<[154,1280],f16>
%1952 = torch.aten._unsafe_view %1951, %1327 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
%1953 = torch.aten._reshape_alias %1940, %1872, %1873 : !torch.vtensor<[2,144,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,144,20,64],f16>
%1954 = torch.aten.permute %1953, %189 : !torch.vtensor<[2,144,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,144,64],f16>
%1955 = torch.aten.clone %1954, %int0 : !torch.vtensor<[2,20,144,64],f16>, !torch.int -> !torch.vtensor<[2,20,144,64],f16>
%1956 = torch.aten._unsafe_view %1955, %1877 : !torch.vtensor<[2,20,144,64],f16>, !torch.list<int> -> !torch.vtensor<[40,144,64],f16>
%1957 = torch.aten._reshape_alias %1946, %1339, %1340 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,20,64],f16>
%1958 = torch.aten.permute %1957, %189 : !torch.vtensor<[2,77,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,77,64],f16>
%1959 = torch.aten.clone %1958, %int0 : !torch.vtensor<[2,20,77,64],f16>, !torch.int -> !torch.vtensor<[2,20,77,64],f16>
%1960 = torch.aten._unsafe_view %1959, %1344 : !torch.vtensor<[2,20,77,64],f16>, !torch.list<int> -> !torch.vtensor<[40,77,64],f16>
%1961 = torch.aten._reshape_alias %1952, %1339, %1340 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,20,64],f16>
%1962 = torch.aten.permute %1961, %189 : !torch.vtensor<[2,77,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,77,64],f16>
%1963 = torch.aten.clone %1962, %int0 : !torch.vtensor<[2,20,77,64],f16>, !torch.int -> !torch.vtensor<[2,20,77,64],f16>
%1964 = torch.aten._unsafe_view %1963, %1344 : !torch.vtensor<[2,20,77,64],f16>, !torch.list<int> -> !torch.vtensor<[40,77,64],f16>
%1965 = torch.aten.unsqueeze %1956, %int3 : !torch.vtensor<[40,144,64],f16>, !torch.int -> !torch.vtensor<[40,144,64,1],f16>
%1966 = torch.aten.permute %1965, %203 : !torch.vtensor<[40,144,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,144,1,64],f16>
%1967 = torch.aten.unsqueeze %1960, %int3 : !torch.vtensor<[40,77,64],f16>, !torch.int -> !torch.vtensor<[40,77,64,1],f16>
%1968 = torch.aten.permute %1967, %206 : !torch.vtensor<[40,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,1,77,64],f16>
%1969 = torch.aten.permute %1966, %203 : !torch.vtensor<[40,144,1,64],f16>, !torch.list<int> -> !torch.vtensor<[40,144,64,1],f16>
%1970 = torch.aten._reshape_alias %1969, %1877, %1892 : !torch.vtensor<[40,144,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,144,64],f16>
%1971 = torch.aten.permute %1968, %211 : !torch.vtensor<[40,1,77,64],f16>, !torch.list<int> -> !torch.vtensor<[40,64,77,1],f16>
%1972 = torch.aten._reshape_alias %1971, %1357, %297 : !torch.vtensor<[40,64,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,64,77],f16>
%1973 = torch.aten.bmm %1970, %1972 : !torch.vtensor<[40,144,64],f16>, !torch.vtensor<[40,64,77],f16> -> !torch.vtensor<[40,144,77],f16>
%1974 = torch.prim.ListConstruct %int40, %int144, %int1, %int77 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1975 = torch.aten.view %1973, %1974 : !torch.vtensor<[40,144,77],f16>, !torch.list<int> -> !torch.vtensor<[40,144,1,77],f16>
%1976 = torch.aten.permute %1975, %203 : !torch.vtensor<[40,144,1,77],f16>, !torch.list<int> -> !torch.vtensor<[40,144,77,1],f16>
%1977 = torch.prim.ListConstruct %int40, %int144, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1978 = torch.aten.view %1976, %1977 : !torch.vtensor<[40,144,77,1],f16>, !torch.list<int> -> !torch.vtensor<[40,144,77],f16>
%1979 = torch.aten.mul.Tensor %1978, %0 : !torch.vtensor<[40,144,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[40,144,77],f16>
%1980 = torch.aten._softmax %1979, %int-1, %true : !torch.vtensor<[40,144,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[40,144,77],f32>
%1981 = torch.aten._to_copy %1980, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[40,144,77],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[40,144,77],f16>
%1982 = torch.aten.unsqueeze %1981, %int3 : !torch.vtensor<[40,144,77],f16>, !torch.int -> !torch.vtensor<[40,144,77,1],f16>
%1983 = torch.aten.permute %1982, %203 : !torch.vtensor<[40,144,77,1],f16>, !torch.list<int> -> !torch.vtensor<[40,144,1,77],f16>
%1984 = torch.aten.unsqueeze %1964, %int3 : !torch.vtensor<[40,77,64],f16>, !torch.int -> !torch.vtensor<[40,77,64,1],f16>
%1985 = torch.aten.permute %1984, %211 : !torch.vtensor<[40,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,1,64,77],f16>
%1986 = torch.aten.permute %1983, %203 : !torch.vtensor<[40,144,1,77],f16>, !torch.list<int> -> !torch.vtensor<[40,144,77,1],f16>
%1987 = torch.prim.ListConstruct %int11088, %int77, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1988 = torch.aten._reshape_alias %1986, %1977, %1987 : !torch.vtensor<[40,144,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,144,77],f16>
%1989 = torch.aten.permute %1985, %211 : !torch.vtensor<[40,1,64,77],f16>, !torch.list<int> -> !torch.vtensor<[40,77,64,1],f16>
%1990 = torch.aten._reshape_alias %1989, %1344, %316 : !torch.vtensor<[40,77,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,77,64],f16>
%1991 = torch.aten.bmm %1988, %1990 : !torch.vtensor<[40,144,77],f16>, !torch.vtensor<[40,77,64],f16> -> !torch.vtensor<[40,144,64],f16>
%1992 = torch.aten.view %1991, %1917 : !torch.vtensor<[40,144,64],f16>, !torch.list<int> -> !torch.vtensor<[40,144,1,64],f16>
%1993 = torch.aten.permute %1992, %203 : !torch.vtensor<[40,144,1,64],f16>, !torch.list<int> -> !torch.vtensor<[40,144,64,1],f16>
%1994 = torch.aten.view %1993, %1877 : !torch.vtensor<[40,144,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,144,64],f16>
%1995 = torch.aten._reshape_alias %1994, %1921, %1922 : !torch.vtensor<[40,144,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,20,144,64],f16>
%1996 = torch.aten.permute %1995, %189 : !torch.vtensor<[2,20,144,64],f16>, !torch.list<int> -> !torch.vtensor<[2,144,20,64],f16>
%1997 = torch.aten.clone %1996, %int0 : !torch.vtensor<[2,144,20,64],f16>, !torch.int -> !torch.vtensor<[2,144,20,64],f16>
%1998 = torch.aten._unsafe_view %1997, %1841 : !torch.vtensor<[2,144,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,144,1280],f16>
%1999 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%2000 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%2001 = torch.aten.t %2000 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%2002 = torch.aten.view %1998, %1849 : !torch.vtensor<[2,144,1280],f16>, !torch.list<int> -> !torch.vtensor<[288,1280],f16>
%2003 = torch.aten.addmm %1999, %2002, %2001, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[288,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[288,1280],f16>
%2004 = torch.aten.view %2003, %1841 : !torch.vtensor<[288,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,144,1280],f16>
%2005 = torch.aten.add.Tensor %2004, %1933, %int1 : !torch.vtensor<[2,144,1280],f16>, !torch.vtensor<[2,144,1280],f16>, !torch.int -> !torch.vtensor<[2,144,1280],f16>
%2006 = torch.aten._to_copy %2005, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,144,1280],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,144,1280],f32>
%result0_107, %result1_108, %result2_109 = torch.aten.native_layer_norm %2006, %1233, %24, %24, %float1.000000e-05 : !torch.vtensor<[2,144,1280],f32>, !torch.list<int>, !torch.vtensor<[1280],f32>, !torch.vtensor<[1280],f32>, !torch.float -> !torch.vtensor<[2,144,1280],f32>, !torch.vtensor<[2,144,1],f32>, !torch.vtensor<[2,144,1],f32>
%2007 = torch.aten._to_copy %16, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[10240],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[10240],f16>
%2008 = torch.aten._to_copy %17, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[10240,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[10240,1280],f16>
%2009 = torch.aten._to_copy %result0_107, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,144,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,144,1280],f16>
%2010 = torch.aten.t %2008 : !torch.vtensor<[10240,1280],f16> -> !torch.vtensor<[1280,10240],f16>
%2011 = torch.aten.view %2009, %1849 : !torch.vtensor<[2,144,1280],f16>, !torch.list<int> -> !torch.vtensor<[288,1280],f16>
%2012 = torch.aten.addmm %2007, %2011, %2010, %int1, %int1 : !torch.vtensor<[10240],f16>, !torch.vtensor<[288,1280],f16>, !torch.vtensor<[1280,10240],f16>, !torch.int, !torch.int -> !torch.vtensor<[288,10240],f16>
%2013 = torch.prim.ListConstruct %int2, %int144, %int10240 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2014 = torch.aten.view %2012, %2013 : !torch.vtensor<[288,10240],f16>, !torch.list<int> -> !torch.vtensor<[2,144,10240],f16>
%2015 = torch.aten.slice.Tensor %2014, %int-1, %int0, %int5120, %int1 : !torch.vtensor<[2,144,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,144,5120],f16>
%2016 = torch.aten.slice.Tensor %2014, %int-1, %int5120, %int10240, %int1 : !torch.vtensor<[2,144,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,144,5120],f16>
%2017 = torch.aten.gelu %2016, %str : !torch.vtensor<[2,144,5120],f16>, !torch.str -> !torch.vtensor<[2,144,5120],f16>
%2018 = torch.aten.mul.Tensor %2015, %2017 : !torch.vtensor<[2,144,5120],f16>, !torch.vtensor<[2,144,5120],f16> -> !torch.vtensor<[2,144,5120],f16>
%2019 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%2020 = torch.aten._to_copy %18, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,5120],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,5120],f16>
%2021 = torch.aten.t %2020 : !torch.vtensor<[1280,5120],f16> -> !torch.vtensor<[5120,1280],f16>
%2022 = torch.prim.ListConstruct %int288, %int5120 : (!torch.int, !torch.int) -> !torch.list<int>
%2023 = torch.aten.view %2018, %2022 : !torch.vtensor<[2,144,5120],f16>, !torch.list<int> -> !torch.vtensor<[288,5120],f16>
%2024 = torch.aten.addmm %2019, %2023, %2021, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[288,5120],f16>, !torch.vtensor<[5120,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[288,1280],f16>
%2025 = torch.aten.view %2024, %1841 : !torch.vtensor<[288,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,144,1280],f16>
%2026 = torch.aten.add.Tensor %2025, %2005, %int1 : !torch.vtensor<[2,144,1280],f16>, !torch.vtensor<[2,144,1280],f16>, !torch.int -> !torch.vtensor<[2,144,1280],f16>
%2027 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%2028 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%2029 = torch.aten.t %2028 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%2030 = torch.aten.view %2026, %1849 : !torch.vtensor<[2,144,1280],f16>, !torch.list<int> -> !torch.vtensor<[288,1280],f16>
%2031 = torch.aten.addmm %2027, %2030, %2029, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[288,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[288,1280],f16>
%2032 = torch.aten.view %2031, %1841 : !torch.vtensor<[288,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,144,1280],f16>
%2033 = torch.prim.ListConstruct %int2, %int12, %int12, %int1280 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2034 = torch.prim.ListConstruct %int184320, %int15360, %int1280, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2035 = torch.aten._reshape_alias %2032, %2033, %2034 : !torch.vtensor<[2,144,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,12,12,1280],f16>
%2036 = torch.aten.permute %2035, %206 : !torch.vtensor<[2,12,12,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,12,12],f16>
%2037 = torch.prim.ListConstruct %int184320, %int1, %int15360, %int1280 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2038 = torch.aten._reshape_alias %2036, %1681, %2037 : !torch.vtensor<[2,1280,12,12],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1280,12,12],f16>
%2039 = torch.aten.clone %2038, %int0 : !torch.vtensor<[2,1280,12,12],f16>, !torch.int -> !torch.vtensor<[2,1280,12,12],f16>
%2040 = torch.aten.add.Tensor %2039, %1822, %int1 : !torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[2,1280,12,12],f16>, !torch.int -> !torch.vtensor<[2,1280,12,12],f16>
%2041 = torch.aten._to_copy %2040, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,12,12],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,12,12],f32>
%2042 = torch.aten._reshape_alias %2041, %1674, %1675 : !torch.vtensor<[2,1280,12,12],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,40,144],f32>
%result0_110, %result1_111 = torch.aten.var_mean.correction %2042, %85, %int0, %true : !torch.vtensor<[2,32,40,144],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%2043 = torch.aten.add.Tensor %result0_110, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2044 = torch.aten.rsqrt %2043 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%2045 = torch.aten.sub.Tensor %2042, %result1_111, %int1 : !torch.vtensor<[2,32,40,144],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,144],f32>
%2046 = torch.aten.mul.Tensor %2045, %2044 : !torch.vtensor<[2,32,40,144],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,144],f32>
%2047 = torch.aten.view %2046, %1681 : !torch.vtensor<[2,32,40,144],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,12,12],f32>
%2048 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
%2049 = torch.aten.unsqueeze %2048, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
%2050 = torch.aten.unsqueeze %2049, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
%2051 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
%2052 = torch.aten.unsqueeze %2051, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
%2053 = torch.aten.unsqueeze %2052, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
%2054 = torch.aten.mul.Tensor %2047, %2053 : !torch.vtensor<[2,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f32> -> !torch.vtensor<[2,1280,12,12],f32>
%2055 = torch.aten.add.Tensor %2054, %2050, %int1 : !torch.vtensor<[2,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f32>, !torch.int -> !torch.vtensor<[2,1280,12,12],f32>
%2056 = torch.aten._to_copy %2055, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,1280,12,12],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,12,12],f16>
%2057 = torch.aten.silu %2056 : !torch.vtensor<[2,1280,12,12],f16> -> !torch.vtensor<[2,1280,12,12],f16>
%2058 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%2059 = torch.aten._to_copy %20, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280,3,3],f16>
%2060 = torch.aten._convolution %2057, %2059, %2058, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,12,12],f16>
%2061 = torch.aten.silu %74 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%2062 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%2063 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%2064 = torch.aten.t %2063 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%2065 = torch.aten.addmm %2062, %2061, %2064, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%2066 = torch.aten.unsqueeze %2065, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
%2067 = torch.aten.unsqueeze %2066, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
%2068 = torch.aten.add.Tensor %2060, %2067, %int1 : !torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,12,12],f16>
%2069 = torch.aten._to_copy %2068, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,12,12],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,12,12],f32>
%2070 = torch.aten._reshape_alias %2069, %1674, %1675 : !torch.vtensor<[2,1280,12,12],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,40,144],f32>
%result0_112, %result1_113 = torch.aten.var_mean.correction %2070, %85, %int0, %true : !torch.vtensor<[2,32,40,144],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%2071 = torch.aten.add.Tensor %result0_112, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2072 = torch.aten.rsqrt %2071 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%2073 = torch.aten.sub.Tensor %2070, %result1_113, %int1 : !torch.vtensor<[2,32,40,144],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,144],f32>
%2074 = torch.aten.mul.Tensor %2073, %2072 : !torch.vtensor<[2,32,40,144],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,144],f32>
%2075 = torch.aten.view %2074, %1681 : !torch.vtensor<[2,32,40,144],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,12,12],f32>
%2076 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
%2077 = torch.aten.unsqueeze %2076, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
%2078 = torch.aten.unsqueeze %2077, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
%2079 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
%2080 = torch.aten.unsqueeze %2079, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
%2081 = torch.aten.unsqueeze %2080, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
%2082 = torch.aten.mul.Tensor %2075, %2081 : !torch.vtensor<[2,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f32> -> !torch.vtensor<[2,1280,12,12],f32>
%2083 = torch.aten.add.Tensor %2082, %2078, %int1 : !torch.vtensor<[2,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f32>, !torch.int -> !torch.vtensor<[2,1280,12,12],f32>
%2084 = torch.aten._to_copy %2083, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,1280,12,12],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,12,12],f16>
%2085 = torch.aten.silu %2084 : !torch.vtensor<[2,1280,12,12],f16> -> !torch.vtensor<[2,1280,12,12],f16>
%2086 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%2087 = torch.aten._to_copy %20, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280,3,3],f16>
%2088 = torch.aten._convolution %2085, %2087, %2086, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,12,12],f16>
%2089 = torch.aten.add.Tensor %2040, %2088, %int1 : !torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[2,1280,12,12],f16>, !torch.int -> !torch.vtensor<[2,1280,12,12],f16>
%2090 = torch.prim.ListConstruct %2089, %1773 : (!torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[2,1280,12,12],f16>) -> !torch.list<vtensor>
%2091 = torch.aten.cat %2090, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,2560,12,12],f16>
%2092 = torch.aten._to_copy %2091, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,2560,12,12],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2560,12,12],f32>
%2093 = torch.prim.ListConstruct %int2, %int32, %int80, %int144 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2094 = torch.prim.ListConstruct %int368640, %int11520, %int144, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2095 = torch.aten._reshape_alias %2092, %2093, %2094 : !torch.vtensor<[2,2560,12,12],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,80,144],f32>
%result0_114, %result1_115 = torch.aten.var_mean.correction %2095, %85, %int0, %true : !torch.vtensor<[2,32,80,144],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%2096 = torch.aten.add.Tensor %result0_114, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2097 = torch.aten.rsqrt %2096 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%2098 = torch.aten.sub.Tensor %2095, %result1_115, %int1 : !torch.vtensor<[2,32,80,144],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,80,144],f32>
%2099 = torch.aten.mul.Tensor %2098, %2097 : !torch.vtensor<[2,32,80,144],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,80,144],f32>
%2100 = torch.prim.ListConstruct %int2, %int2560, %int12, %int12 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2101 = torch.aten.view %2099, %2100 : !torch.vtensor<[2,32,80,144],f32>, !torch.list<int> -> !torch.vtensor<[2,2560,12,12],f32>
%2102 = torch.aten.unsqueeze %44, %int0 : !torch.vtensor<[2560],f32>, !torch.int -> !torch.vtensor<[1,2560],f32>
%2103 = torch.aten.unsqueeze %2102, %int2 : !torch.vtensor<[1,2560],f32>, !torch.int -> !torch.vtensor<[1,2560,1],f32>
%2104 = torch.aten.unsqueeze %2103, %int3 : !torch.vtensor<[1,2560,1],f32>, !torch.int -> !torch.vtensor<[1,2560,1,1],f32>
%2105 = torch.aten.unsqueeze %44, %int0 : !torch.vtensor<[2560],f32>, !torch.int -> !torch.vtensor<[1,2560],f32>
%2106 = torch.aten.unsqueeze %2105, %int2 : !torch.vtensor<[1,2560],f32>, !torch.int -> !torch.vtensor<[1,2560,1],f32>
%2107 = torch.aten.unsqueeze %2106, %int3 : !torch.vtensor<[1,2560,1],f32>, !torch.int -> !torch.vtensor<[1,2560,1,1],f32>
%2108 = torch.aten.mul.Tensor %2101, %2107 : !torch.vtensor<[2,2560,12,12],f32>, !torch.vtensor<[1,2560,1,1],f32> -> !torch.vtensor<[2,2560,12,12],f32>
%2109 = torch.aten.add.Tensor %2108, %2104, %int1 : !torch.vtensor<[2,2560,12,12],f32>, !torch.vtensor<[1,2560,1,1],f32>, !torch.int -> !torch.vtensor<[2,2560,12,12],f32>
%2110 = torch.aten._to_copy %2109, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,2560,12,12],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2560,12,12],f16>
%2111 = torch.aten.silu %2110 : !torch.vtensor<[2,2560,12,12],f16> -> !torch.vtensor<[2,2560,12,12],f16>
%2112 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%2113 = torch.aten._to_copy %11, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,2560,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,2560,3,3],f16>
%2114 = torch.aten._convolution %2111, %2113, %2112, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,2560,12,12],f16>, !torch.vtensor<[1280,2560,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,12,12],f16>
%2115 = torch.aten.silu %74 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%2116 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%2117 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%2118 = torch.aten.t %2117 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%2119 = torch.aten.addmm %2116, %2115, %2118, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%2120 = torch.aten.unsqueeze %2119, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
%2121 = torch.aten.unsqueeze %2120, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
%2122 = torch.aten.add.Tensor %2114, %2121, %int1 : !torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,12,12],f16>
%2123 = torch.aten._to_copy %2122, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,12,12],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,12,12],f32>
%2124 = torch.aten._reshape_alias %2123, %1674, %1675 : !torch.vtensor<[2,1280,12,12],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,40,144],f32>
%result0_116, %result1_117 = torch.aten.var_mean.correction %2124, %85, %int0, %true : !torch.vtensor<[2,32,40,144],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%2125 = torch.aten.add.Tensor %result0_116, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2126 = torch.aten.rsqrt %2125 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%2127 = torch.aten.sub.Tensor %2124, %result1_117, %int1 : !torch.vtensor<[2,32,40,144],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,144],f32>
%2128 = torch.aten.mul.Tensor %2127, %2126 : !torch.vtensor<[2,32,40,144],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,144],f32>
%2129 = torch.aten.view %2128, %1681 : !torch.vtensor<[2,32,40,144],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,12,12],f32>
%2130 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
%2131 = torch.aten.unsqueeze %2130, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
%2132 = torch.aten.unsqueeze %2131, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
%2133 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
%2134 = torch.aten.unsqueeze %2133, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
%2135 = torch.aten.unsqueeze %2134, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
%2136 = torch.aten.mul.Tensor %2129, %2135 : !torch.vtensor<[2,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f32> -> !torch.vtensor<[2,1280,12,12],f32>
%2137 = torch.aten.add.Tensor %2136, %2132, %int1 : !torch.vtensor<[2,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f32>, !torch.int -> !torch.vtensor<[2,1280,12,12],f32>
%2138 = torch.aten._to_copy %2137, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,1280,12,12],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,12,12],f16>
%2139 = torch.aten.silu %2138 : !torch.vtensor<[2,1280,12,12],f16> -> !torch.vtensor<[2,1280,12,12],f16>
%2140 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%2141 = torch.aten._to_copy %20, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280,3,3],f16>
%2142 = torch.aten._convolution %2139, %2141, %2140, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,12,12],f16>
%2143 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%2144 = torch.aten._to_copy %12, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,2560,1,1],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,2560,1,1],f16>
%2145 = torch.aten._convolution %2091, %2144, %2143, %78, %79, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,2560,12,12],f16>, !torch.vtensor<[1280,2560,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,12,12],f16>
%2146 = torch.aten.add.Tensor %2145, %2142, %int1 : !torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[2,1280,12,12],f16>, !torch.int -> !torch.vtensor<[2,1280,12,12],f16>
%2147 = torch.prim.ListConstruct %2146, %1724 : (!torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[2,1280,12,12],f16>) -> !torch.list<vtensor>
%2148 = torch.aten.cat %2147, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,2560,12,12],f16>
%2149 = torch.aten._to_copy %2148, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,2560,12,12],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2560,12,12],f32>
%2150 = torch.aten._reshape_alias %2149, %2093, %2094 : !torch.vtensor<[2,2560,12,12],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,80,144],f32>
%result0_118, %result1_119 = torch.aten.var_mean.correction %2150, %85, %int0, %true : !torch.vtensor<[2,32,80,144],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%2151 = torch.aten.add.Tensor %result0_118, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2152 = torch.aten.rsqrt %2151 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%2153 = torch.aten.sub.Tensor %2150, %result1_119, %int1 : !torch.vtensor<[2,32,80,144],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,80,144],f32>
%2154 = torch.aten.mul.Tensor %2153, %2152 : !torch.vtensor<[2,32,80,144],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,80,144],f32>
%2155 = torch.aten.view %2154, %2100 : !torch.vtensor<[2,32,80,144],f32>, !torch.list<int> -> !torch.vtensor<[2,2560,12,12],f32>
%2156 = torch.aten.unsqueeze %44, %int0 : !torch.vtensor<[2560],f32>, !torch.int -> !torch.vtensor<[1,2560],f32>
%2157 = torch.aten.unsqueeze %2156, %int2 : !torch.vtensor<[1,2560],f32>, !torch.int -> !torch.vtensor<[1,2560,1],f32>
%2158 = torch.aten.unsqueeze %2157, %int3 : !torch.vtensor<[1,2560,1],f32>, !torch.int -> !torch.vtensor<[1,2560,1,1],f32>
%2159 = torch.aten.unsqueeze %44, %int0 : !torch.vtensor<[2560],f32>, !torch.int -> !torch.vtensor<[1,2560],f32>
%2160 = torch.aten.unsqueeze %2159, %int2 : !torch.vtensor<[1,2560],f32>, !torch.int -> !torch.vtensor<[1,2560,1],f32>
%2161 = torch.aten.unsqueeze %2160, %int3 : !torch.vtensor<[1,2560,1],f32>, !torch.int -> !torch.vtensor<[1,2560,1,1],f32>
%2162 = torch.aten.mul.Tensor %2155, %2161 : !torch.vtensor<[2,2560,12,12],f32>, !torch.vtensor<[1,2560,1,1],f32> -> !torch.vtensor<[2,2560,12,12],f32>
%2163 = torch.aten.add.Tensor %2162, %2158, %int1 : !torch.vtensor<[2,2560,12,12],f32>, !torch.vtensor<[1,2560,1,1],f32>, !torch.int -> !torch.vtensor<[2,2560,12,12],f32>
%2164 = torch.aten._to_copy %2163, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,2560,12,12],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2560,12,12],f16>
%2165 = torch.aten.silu %2164 : !torch.vtensor<[2,2560,12,12],f16> -> !torch.vtensor<[2,2560,12,12],f16>
%2166 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%2167 = torch.aten._to_copy %11, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,2560,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,2560,3,3],f16>
%2168 = torch.aten._convolution %2165, %2167, %2166, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,2560,12,12],f16>, !torch.vtensor<[1280,2560,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,12,12],f16>
%2169 = torch.aten.silu %74 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%2170 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%2171 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%2172 = torch.aten.t %2171 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%2173 = torch.aten.addmm %2170, %2169, %2172, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%2174 = torch.aten.unsqueeze %2173, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
%2175 = torch.aten.unsqueeze %2174, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
%2176 = torch.aten.add.Tensor %2168, %2175, %int1 : !torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,12,12],f16>
%2177 = torch.aten._to_copy %2176, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,12,12],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,12,12],f32>
%2178 = torch.aten._reshape_alias %2177, %1674, %1675 : !torch.vtensor<[2,1280,12,12],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,40,144],f32>
%result0_120, %result1_121 = torch.aten.var_mean.correction %2178, %85, %int0, %true : !torch.vtensor<[2,32,40,144],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%2179 = torch.aten.add.Tensor %result0_120, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2180 = torch.aten.rsqrt %2179 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%2181 = torch.aten.sub.Tensor %2178, %result1_121, %int1 : !torch.vtensor<[2,32,40,144],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,144],f32>
%2182 = torch.aten.mul.Tensor %2181, %2180 : !torch.vtensor<[2,32,40,144],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,144],f32>
%2183 = torch.aten.view %2182, %1681 : !torch.vtensor<[2,32,40,144],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,12,12],f32>
%2184 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
%2185 = torch.aten.unsqueeze %2184, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
%2186 = torch.aten.unsqueeze %2185, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
%2187 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
%2188 = torch.aten.unsqueeze %2187, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
%2189 = torch.aten.unsqueeze %2188, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
%2190 = torch.aten.mul.Tensor %2183, %2189 : !torch.vtensor<[2,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f32> -> !torch.vtensor<[2,1280,12,12],f32>
%2191 = torch.aten.add.Tensor %2190, %2186, %int1 : !torch.vtensor<[2,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f32>, !torch.int -> !torch.vtensor<[2,1280,12,12],f32>
%2192 = torch.aten._to_copy %2191, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,1280,12,12],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,12,12],f16>
%2193 = torch.aten.silu %2192 : !torch.vtensor<[2,1280,12,12],f16> -> !torch.vtensor<[2,1280,12,12],f16>
%2194 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%2195 = torch.aten._to_copy %20, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280,3,3],f16>
%2196 = torch.aten._convolution %2193, %2195, %2194, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,12,12],f16>
%2197 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%2198 = torch.aten._to_copy %12, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,2560,1,1],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,2560,1,1],f16>
%2199 = torch.aten._convolution %2148, %2198, %2197, %78, %79, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,2560,12,12],f16>, !torch.vtensor<[1280,2560,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,12,12],f16>
%2200 = torch.aten.add.Tensor %2199, %2196, %int1 : !torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[2,1280,12,12],f16>, !torch.int -> !torch.vtensor<[2,1280,12,12],f16>
%2201 = torch.prim.ListConstruct %2200, %1672 : (!torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[2,1280,12,12],f16>) -> !torch.list<vtensor>
%2202 = torch.aten.cat %2201, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,2560,12,12],f16>
%2203 = torch.aten._to_copy %2202, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,2560,12,12],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2560,12,12],f32>
%2204 = torch.aten._reshape_alias %2203, %2093, %2094 : !torch.vtensor<[2,2560,12,12],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,80,144],f32>
%result0_122, %result1_123 = torch.aten.var_mean.correction %2204, %85, %int0, %true : !torch.vtensor<[2,32,80,144],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%2205 = torch.aten.add.Tensor %result0_122, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2206 = torch.aten.rsqrt %2205 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%2207 = torch.aten.sub.Tensor %2204, %result1_123, %int1 : !torch.vtensor<[2,32,80,144],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,80,144],f32>
%2208 = torch.aten.mul.Tensor %2207, %2206 : !torch.vtensor<[2,32,80,144],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,80,144],f32>
%2209 = torch.aten.view %2208, %2100 : !torch.vtensor<[2,32,80,144],f32>, !torch.list<int> -> !torch.vtensor<[2,2560,12,12],f32>
%2210 = torch.aten.unsqueeze %44, %int0 : !torch.vtensor<[2560],f32>, !torch.int -> !torch.vtensor<[1,2560],f32>
%2211 = torch.aten.unsqueeze %2210, %int2 : !torch.vtensor<[1,2560],f32>, !torch.int -> !torch.vtensor<[1,2560,1],f32>
%2212 = torch.aten.unsqueeze %2211, %int3 : !torch.vtensor<[1,2560,1],f32>, !torch.int -> !torch.vtensor<[1,2560,1,1],f32>
%2213 = torch.aten.unsqueeze %44, %int0 : !torch.vtensor<[2560],f32>, !torch.int -> !torch.vtensor<[1,2560],f32>
%2214 = torch.aten.unsqueeze %2213, %int2 : !torch.vtensor<[1,2560],f32>, !torch.int -> !torch.vtensor<[1,2560,1],f32>
%2215 = torch.aten.unsqueeze %2214, %int3 : !torch.vtensor<[1,2560,1],f32>, !torch.int -> !torch.vtensor<[1,2560,1,1],f32>
%2216 = torch.aten.mul.Tensor %2209, %2215 : !torch.vtensor<[2,2560,12,12],f32>, !torch.vtensor<[1,2560,1,1],f32> -> !torch.vtensor<[2,2560,12,12],f32>
%2217 = torch.aten.add.Tensor %2216, %2212, %int1 : !torch.vtensor<[2,2560,12,12],f32>, !torch.vtensor<[1,2560,1,1],f32>, !torch.int -> !torch.vtensor<[2,2560,12,12],f32>
%2218 = torch.aten._to_copy %2217, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,2560,12,12],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2560,12,12],f16>
%2219 = torch.aten.silu %2218 : !torch.vtensor<[2,2560,12,12],f16> -> !torch.vtensor<[2,2560,12,12],f16>
%2220 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%2221 = torch.aten._to_copy %11, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,2560,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,2560,3,3],f16>
%2222 = torch.aten._convolution %2219, %2221, %2220, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,2560,12,12],f16>, !torch.vtensor<[1280,2560,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,12,12],f16>
%2223 = torch.aten.silu %74 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%2224 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%2225 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%2226 = torch.aten.t %2225 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%2227 = torch.aten.addmm %2224, %2223, %2226, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%2228 = torch.aten.unsqueeze %2227, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
%2229 = torch.aten.unsqueeze %2228, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
%2230 = torch.aten.add.Tensor %2222, %2229, %int1 : !torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,12,12],f16>
%2231 = torch.aten._to_copy %2230, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,12,12],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,12,12],f32>
%2232 = torch.aten._reshape_alias %2231, %1674, %1675 : !torch.vtensor<[2,1280,12,12],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,40,144],f32>
%result0_124, %result1_125 = torch.aten.var_mean.correction %2232, %85, %int0, %true : !torch.vtensor<[2,32,40,144],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%2233 = torch.aten.add.Tensor %result0_124, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2234 = torch.aten.rsqrt %2233 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%2235 = torch.aten.sub.Tensor %2232, %result1_125, %int1 : !torch.vtensor<[2,32,40,144],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,144],f32>
%2236 = torch.aten.mul.Tensor %2235, %2234 : !torch.vtensor<[2,32,40,144],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,144],f32>
%2237 = torch.aten.view %2236, %1681 : !torch.vtensor<[2,32,40,144],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,12,12],f32>
%2238 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
%2239 = torch.aten.unsqueeze %2238, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
%2240 = torch.aten.unsqueeze %2239, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
%2241 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
%2242 = torch.aten.unsqueeze %2241, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
%2243 = torch.aten.unsqueeze %2242, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
%2244 = torch.aten.mul.Tensor %2237, %2243 : !torch.vtensor<[2,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f32> -> !torch.vtensor<[2,1280,12,12],f32>
%2245 = torch.aten.add.Tensor %2244, %2240, %int1 : !torch.vtensor<[2,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f32>, !torch.int -> !torch.vtensor<[2,1280,12,12],f32>
%2246 = torch.aten._to_copy %2245, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,1280,12,12],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,12,12],f16>
%2247 = torch.aten.silu %2246 : !torch.vtensor<[2,1280,12,12],f16> -> !torch.vtensor<[2,1280,12,12],f16>
%2248 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%2249 = torch.aten._to_copy %20, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280,3,3],f16>
%2250 = torch.aten._convolution %2247, %2249, %2248, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,12,12],f16>
%2251 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%2252 = torch.aten._to_copy %12, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,2560,1,1],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,2560,1,1],f16>
%2253 = torch.aten._convolution %2202, %2252, %2251, %78, %79, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,2560,12,12],f16>, !torch.vtensor<[1280,2560,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,12,12],f16>
%2254 = torch.aten.add.Tensor %2253, %2250, %int1 : !torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[2,1280,12,12],f16>, !torch.int -> !torch.vtensor<[2,1280,12,12],f16>
%2255 = torch.prim.ListConstruct %int24, %int24 : (!torch.int, !torch.int) -> !torch.list<int>
%2256 = torch.aten.upsample_nearest2d %2254, %2255, %float2.000000e00, %float2.000000e00 : !torch.vtensor<[2,1280,12,12],f16>, !torch.list<int>, !torch.float, !torch.float -> !torch.vtensor<[2,1280,24,24],f16>
%2257 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%2258 = torch.aten._to_copy %20, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280,3,3],f16>
%2259 = torch.aten._convolution %2256, %2258, %2257, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,24,24],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,24,24],f16>
%2260 = torch.prim.ListConstruct %2259, %1669 : (!torch.vtensor<[2,1280,24,24],f16>, !torch.vtensor<[2,1280,24,24],f16>) -> !torch.list<vtensor>
%2261 = torch.aten.cat %2260, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,2560,24,24],f16>
%2262 = torch.aten._to_copy %2261, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,2560,24,24],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2560,24,24],f32>
%2263 = torch.prim.ListConstruct %int2, %int32, %int80, %int576 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2264 = torch.prim.ListConstruct %int1474560, %int46080, %int576, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2265 = torch.aten._reshape_alias %2262, %2263, %2264 : !torch.vtensor<[2,2560,24,24],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,80,576],f32>
%result0_126, %result1_127 = torch.aten.var_mean.correction %2265, %85, %int0, %true : !torch.vtensor<[2,32,80,576],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%2266 = torch.aten.add.Tensor %result0_126, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2267 = torch.aten.rsqrt %2266 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%2268 = torch.aten.sub.Tensor %2265, %result1_127, %int1 : !torch.vtensor<[2,32,80,576],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,80,576],f32>
%2269 = torch.aten.mul.Tensor %2268, %2267 : !torch.vtensor<[2,32,80,576],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,80,576],f32>
%2270 = torch.prim.ListConstruct %int2, %int2560, %int24, %int24 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2271 = torch.aten.view %2269, %2270 : !torch.vtensor<[2,32,80,576],f32>, !torch.list<int> -> !torch.vtensor<[2,2560,24,24],f32>
%2272 = torch.aten.unsqueeze %44, %int0 : !torch.vtensor<[2560],f32>, !torch.int -> !torch.vtensor<[1,2560],f32>
%2273 = torch.aten.unsqueeze %2272, %int2 : !torch.vtensor<[1,2560],f32>, !torch.int -> !torch.vtensor<[1,2560,1],f32>
%2274 = torch.aten.unsqueeze %2273, %int3 : !torch.vtensor<[1,2560,1],f32>, !torch.int -> !torch.vtensor<[1,2560,1,1],f32>
%2275 = torch.aten.unsqueeze %44, %int0 : !torch.vtensor<[2560],f32>, !torch.int -> !torch.vtensor<[1,2560],f32>
%2276 = torch.aten.unsqueeze %2275, %int2 : !torch.vtensor<[1,2560],f32>, !torch.int -> !torch.vtensor<[1,2560,1],f32>
%2277 = torch.aten.unsqueeze %2276, %int3 : !torch.vtensor<[1,2560,1],f32>, !torch.int -> !torch.vtensor<[1,2560,1,1],f32>
%2278 = torch.aten.mul.Tensor %2271, %2277 : !torch.vtensor<[2,2560,24,24],f32>, !torch.vtensor<[1,2560,1,1],f32> -> !torch.vtensor<[2,2560,24,24],f32>
%2279 = torch.aten.add.Tensor %2278, %2274, %int1 : !torch.vtensor<[2,2560,24,24],f32>, !torch.vtensor<[1,2560,1,1],f32>, !torch.int -> !torch.vtensor<[2,2560,24,24],f32>
%2280 = torch.aten._to_copy %2279, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,2560,24,24],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2560,24,24],f16>
%2281 = torch.aten.silu %2280 : !torch.vtensor<[2,2560,24,24],f16> -> !torch.vtensor<[2,2560,24,24],f16>
%2282 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%2283 = torch.aten._to_copy %11, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,2560,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,2560,3,3],f16>
%2284 = torch.aten._convolution %2281, %2283, %2282, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,2560,24,24],f16>, !torch.vtensor<[1280,2560,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,24,24],f16>
%2285 = torch.aten.silu %74 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%2286 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%2287 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%2288 = torch.aten.t %2287 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%2289 = torch.aten.addmm %2286, %2285, %2288, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%2290 = torch.aten.unsqueeze %2289, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
%2291 = torch.aten.unsqueeze %2290, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
%2292 = torch.aten.add.Tensor %2284, %2291, %int1 : !torch.vtensor<[2,1280,24,24],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,24,24],f16>
%2293 = torch.aten._to_copy %2292, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,24,24],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,24,24],f32>
%2294 = torch.aten._reshape_alias %2293, %1176, %1177 : !torch.vtensor<[2,1280,24,24],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,40,576],f32>
%result0_128, %result1_129 = torch.aten.var_mean.correction %2294, %85, %int0, %true : !torch.vtensor<[2,32,40,576],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%2295 = torch.aten.add.Tensor %result0_128, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2296 = torch.aten.rsqrt %2295 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%2297 = torch.aten.sub.Tensor %2294, %result1_129, %int1 : !torch.vtensor<[2,32,40,576],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,576],f32>
%2298 = torch.aten.mul.Tensor %2297, %2296 : !torch.vtensor<[2,32,40,576],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,576],f32>
%2299 = torch.aten.view %2298, %1183 : !torch.vtensor<[2,32,40,576],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,24,24],f32>
%2300 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
%2301 = torch.aten.unsqueeze %2300, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
%2302 = torch.aten.unsqueeze %2301, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
%2303 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
%2304 = torch.aten.unsqueeze %2303, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
%2305 = torch.aten.unsqueeze %2304, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
%2306 = torch.aten.mul.Tensor %2299, %2305 : !torch.vtensor<[2,1280,24,24],f32>, !torch.vtensor<[1,1280,1,1],f32> -> !torch.vtensor<[2,1280,24,24],f32>
%2307 = torch.aten.add.Tensor %2306, %2302, %int1 : !torch.vtensor<[2,1280,24,24],f32>, !torch.vtensor<[1,1280,1,1],f32>, !torch.int -> !torch.vtensor<[2,1280,24,24],f32>
%2308 = torch.aten._to_copy %2307, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,1280,24,24],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,24,24],f16>
%2309 = torch.aten.silu %2308 : !torch.vtensor<[2,1280,24,24],f16> -> !torch.vtensor<[2,1280,24,24],f16>
%2310 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%2311 = torch.aten._to_copy %20, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280,3,3],f16>
%2312 = torch.aten._convolution %2309, %2311, %2310, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,24,24],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,24,24],f16>
%2313 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%2314 = torch.aten._to_copy %12, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,2560,1,1],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,2560,1,1],f16>
%2315 = torch.aten._convolution %2261, %2314, %2313, %78, %79, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,2560,24,24],f16>, !torch.vtensor<[1280,2560,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,24,24],f16>
%2316 = torch.aten.add.Tensor %2315, %2312, %int1 : !torch.vtensor<[2,1280,24,24],f16>, !torch.vtensor<[2,1280,24,24],f16>, !torch.int -> !torch.vtensor<[2,1280,24,24],f16>
%2317 = torch.aten._to_copy %2316, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,24,24],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,24,24],f32>
%2318 = torch.aten._reshape_alias %2317, %1176, %1177 : !torch.vtensor<[2,1280,24,24],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,40,576],f32>
%result0_130, %result1_131 = torch.aten.var_mean.correction %2318, %85, %int0, %true : !torch.vtensor<[2,32,40,576],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%2319 = torch.aten.add.Tensor %result0_130, %1, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2320 = torch.aten.rsqrt %2319 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%2321 = torch.aten.sub.Tensor %2318, %result1_131, %int1 : !torch.vtensor<[2,32,40,576],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,576],f32>
%2322 = torch.aten.mul.Tensor %2321, %2320 : !torch.vtensor<[2,32,40,576],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,576],f32>
%2323 = torch.aten.view %2322, %1183 : !torch.vtensor<[2,32,40,576],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,24,24],f32>
%2324 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
%2325 = torch.aten.unsqueeze %2324, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
%2326 = torch.aten.unsqueeze %2325, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
%2327 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
%2328 = torch.aten.unsqueeze %2327, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
%2329 = torch.aten.unsqueeze %2328, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
%2330 = torch.aten.mul.Tensor %2323, %2329 : !torch.vtensor<[2,1280,24,24],f32>, !torch.vtensor<[1,1280,1,1],f32> -> !torch.vtensor<[2,1280,24,24],f32>
%2331 = torch.aten.add.Tensor %2330, %2326, %int1 : !torch.vtensor<[2,1280,24,24],f32>, !torch.vtensor<[1,1280,1,1],f32>, !torch.int -> !torch.vtensor<[2,1280,24,24],f32>
%2332 = torch.aten._reshape_alias %2331, %1183, %1217 : !torch.vtensor<[2,1280,24,24],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1280,24,24],f32>
%2333 = torch.aten.permute %2332, %151 : !torch.vtensor<[2,1280,24,24],f32>, !torch.list<int> -> !torch.vtensor<[2,24,24,1280],f32>
%2334 = torch.aten._reshape_alias %2333, %1220, %1221 : !torch.vtensor<[2,24,24,1280],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f32>
%2335 = torch.aten.clone %2334, %int0 : !torch.vtensor<[2,576,1280],f32>, !torch.int -> !torch.vtensor<[2,576,1280],f32>
%2336 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%2337 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%2338 = torch.aten._to_copy %2335, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
%2339 = torch.aten.t %2337 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%2340 = torch.aten.view %2338, %1228 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
%2341 = torch.aten.addmm %2336, %2340, %2339, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,1280],f16>
%2342 = torch.aten.view %2341, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
%2343 = torch.aten._to_copy %2342, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f32>
%result0_132, %result1_133, %result2_134 = torch.aten.native_layer_norm %2343, %1233, %24, %24, %float1.000000e-05 : !torch.vtensor<[2,576,1280],f32>, !torch.list<int>, !torch.vtensor<[1280],f32>, !torch.vtensor<[1280],f32>, !torch.float -> !torch.vtensor<[2,576,1280],f32>, !torch.vtensor<[2,576,1],f32>, !torch.vtensor<[2,576,1],f32>
%2344 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%2345 = torch.aten._to_copy %result0_132, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
%2346 = torch.aten.t %2344 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%2347 = torch.aten._reshape_alias %2345, %1228, %1237 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
%2348 = torch.aten.mm %2347, %2346 : !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1152,1280],f16>
%2349 = torch.aten._unsafe_view %2348, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
%2350 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%2351 = torch.aten._to_copy %result0_132, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
%2352 = torch.aten.t %2350 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%2353 = torch.aten._reshape_alias %2351, %1228, %1237 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
%2354 = torch.aten.mm %2353, %2352 : !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1152,1280],f16>
%2355 = torch.aten._unsafe_view %2354, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
%2356 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%2357 = torch.aten._to_copy %result0_132, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
%2358 = torch.aten.t %2356 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%2359 = torch.aten._reshape_alias %2357, %1228, %1237 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
%2360 = torch.aten.mm %2359, %2358 : !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1152,1280],f16>
%2361 = torch.aten._unsafe_view %2360, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
%2362 = torch.aten._reshape_alias %2349, %1253, %1254 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
%2363 = torch.aten.permute %2362, %189 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
%2364 = torch.aten.clone %2363, %int0 : !torch.vtensor<[2,20,576,64],f16>, !torch.int -> !torch.vtensor<[2,20,576,64],f16>
%2365 = torch.aten._unsafe_view %2364, %1258 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
%2366 = torch.aten._reshape_alias %2355, %1253, %1254 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
%2367 = torch.aten.permute %2366, %189 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
%2368 = torch.aten.clone %2367, %int0 : !torch.vtensor<[2,20,576,64],f16>, !torch.int -> !torch.vtensor<[2,20,576,64],f16>
%2369 = torch.aten._unsafe_view %2368, %1258 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
%2370 = torch.aten._reshape_alias %2361, %1253, %1254 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
%2371 = torch.aten.permute %2370, %189 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
%2372 = torch.aten.clone %2371, %int0 : !torch.vtensor<[2,20,576,64],f16>, !torch.int -> !torch.vtensor<[2,20,576,64],f16>
%2373 = torch.aten._unsafe_view %2372, %1258 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
%2374 = torch.aten.unsqueeze %2365, %int3 : !torch.vtensor<[40,576,64],f16>, !torch.int -> !torch.vtensor<[40,576,64,1],f16>
%2375 = torch.aten.permute %2374, %203 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,64],f16>
%2376 = torch.aten.unsqueeze %2369, %int3 : !torch.vtensor<[40,576,64],f16>, !torch.int -> !torch.vtensor<[40,576,64,1],f16>
%2377 = torch.aten.permute %2376, %206 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,1,576,64],f16>
%2378 = torch.aten.permute %2375, %203 : !torch.vtensor<[40,576,1,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64,1],f16>
%2379 = torch.aten._reshape_alias %2378, %1258, %1273 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
%2380 = torch.aten.permute %2377, %211 : !torch.vtensor<[40,1,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,64,576,1],f16>
%2381 = torch.aten._reshape_alias %2380, %1276, %1277 : !torch.vtensor<[40,64,576,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,64,576],f16>
%2382 = torch.aten.bmm %2379, %2381 : !torch.vtensor<[40,576,64],f16>, !torch.vtensor<[40,64,576],f16> -> !torch.vtensor<[40,576,576],f16>
%2383 = torch.aten.view %2382, %1280 : !torch.vtensor<[40,576,576],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,576],f16>
%2384 = torch.aten.permute %2383, %203 : !torch.vtensor<[40,576,1,576],f16>, !torch.list<int> -> !torch.vtensor<[40,576,576,1],f16>
%2385 = torch.aten.view %2384, %1283 : !torch.vtensor<[40,576,576,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,576],f16>
%2386 = torch.aten.mul.Tensor %2385, %0 : !torch.vtensor<[40,576,576],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[40,576,576],f16>
%2387 = torch.aten._softmax %2386, %int-1, %true : !torch.vtensor<[40,576,576],f16>, !torch.int, !torch.bool -> !torch.vtensor<[40,576,576],f32>
%2388 = torch.aten._to_copy %2387, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[40,576,576],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[40,576,576],f16>
%2389 = torch.aten.unsqueeze %2388, %int3 : !torch.vtensor<[40,576,576],f16>, !torch.int -> !torch.vtensor<[40,576,576,1],f16>
%2390 = torch.aten.permute %2389, %203 : !torch.vtensor<[40,576,576,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,576],f16>
%2391 = torch.aten.unsqueeze %2373, %int3 : !torch.vtensor<[40,576,64],f16>, !torch.int -> !torch.vtensor<[40,576,64,1],f16>
%2392 = torch.aten.permute %2391, %211 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,1,64,576],f16>
%2393 = torch.aten.permute %2390, %203 : !torch.vtensor<[40,576,1,576],f16>, !torch.list<int> -> !torch.vtensor<[40,576,576,1],f16>
%2394 = torch.aten._reshape_alias %2393, %1283, %1293 : !torch.vtensor<[40,576,576,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,576,576],f16>
%2395 = torch.aten.permute %2392, %211 : !torch.vtensor<[40,1,64,576],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64,1],f16>
%2396 = torch.aten._reshape_alias %2395, %1258, %1273 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
%2397 = torch.aten.bmm %2394, %2396 : !torch.vtensor<[40,576,576],f16>, !torch.vtensor<[40,576,64],f16> -> !torch.vtensor<[40,576,64],f16>
%2398 = torch.aten.view %2397, %1298 : !torch.vtensor<[40,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,64],f16>
%2399 = torch.aten.permute %2398, %203 : !torch.vtensor<[40,576,1,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64,1],f16>
%2400 = torch.aten.view %2399, %1258 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
%2401 = torch.aten._reshape_alias %2400, %1302, %1303 : !torch.vtensor<[40,576,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
%2402 = torch.aten.permute %2401, %189 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
%2403 = torch.aten.clone %2402, %int0 : !torch.vtensor<[2,576,20,64],f16>, !torch.int -> !torch.vtensor<[2,576,20,64],f16>
%2404 = torch.aten._unsafe_view %2403, %1220 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
%2405 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%2406 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%2407 = torch.aten.t %2406 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%2408 = torch.aten.view %2404, %1228 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
%2409 = torch.aten.addmm %2405, %2408, %2407, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,1280],f16>
%2410 = torch.aten.view %2409, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
%2411 = torch.aten.add.Tensor %2410, %2342, %int1 : !torch.vtensor<[2,576,1280],f16>, !torch.vtensor<[2,576,1280],f16>, !torch.int -> !torch.vtensor<[2,576,1280],f16>
%2412 = torch.aten._to_copy %2411, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f32>
%result0_135, %result1_136, %result2_137 = torch.aten.native_layer_norm %2412, %1233, %24, %24, %float1.000000e-05 : !torch.vtensor<[2,576,1280],f32>, !torch.list<int>, !torch.vtensor<[1280],f32>, !torch.vtensor<[1280],f32>, !torch.float -> !torch.vtensor<[2,576,1280],f32>, !torch.vtensor<[2,576,1],f32>, !torch.vtensor<[2,576,1],f32>
%2413 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%2414 = torch.aten._to_copy %result0_135, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
%2415 = torch.aten.t %2413 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%2416 = torch.aten._reshape_alias %2414, %1228, %1237 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
%2417 = torch.aten.mm %2416, %2415 : !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1152,1280],f16>
%2418 = torch.aten._unsafe_view %2417, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
%2419 = torch.aten._to_copy %15, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1024],f16>
%2420 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
%2421 = torch.aten.t %2419 : !torch.vtensor<[1280,1024],f16> -> !torch.vtensor<[1024,1280],f16>
%2422 = torch.aten._reshape_alias %2420, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
%2423 = torch.aten.mm %2422, %2421 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,1280],f16> -> !torch.vtensor<[154,1280],f16>
%2424 = torch.aten._unsafe_view %2423, %1327 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
%2425 = torch.aten._to_copy %15, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1024],f16>
%2426 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
%2427 = torch.aten.t %2425 : !torch.vtensor<[1280,1024],f16> -> !torch.vtensor<[1024,1280],f16>
%2428 = torch.aten._reshape_alias %2426, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
%2429 = torch.aten.mm %2428, %2427 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,1280],f16> -> !torch.vtensor<[154,1280],f16>
%2430 = torch.aten._unsafe_view %2429, %1327 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
%2431 = torch.aten._reshape_alias %2418, %1253, %1254 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
%2432 = torch.aten.permute %2431, %189 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
%2433 = torch.aten.clone %2432, %int0 : !torch.vtensor<[2,20,576,64],f16>, !torch.int -> !torch.vtensor<[2,20,576,64],f16>
%2434 = torch.aten._unsafe_view %2433, %1258 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
%2435 = torch.aten._reshape_alias %2424, %1339, %1340 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,20,64],f16>
%2436 = torch.aten.permute %2435, %189 : !torch.vtensor<[2,77,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,77,64],f16>
%2437 = torch.aten.clone %2436, %int0 : !torch.vtensor<[2,20,77,64],f16>, !torch.int -> !torch.vtensor<[2,20,77,64],f16>
%2438 = torch.aten._unsafe_view %2437, %1344 : !torch.vtensor<[2,20,77,64],f16>, !torch.list<int> -> !torch.vtensor<[40,77,64],f16>
%2439 = torch.aten._reshape_alias %2430, %1339, %1340 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,20,64],f16>
%2440 = torch.aten.permute %2439, %189 : !torch.vtensor<[2,77,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,77,64],f16>
%2441 = torch.aten.clone %2440, %int0 : !torch.vtensor<[2,20,77,64],f16>, !torch.int -> !torch.vtensor<[2,20,77,64],f16>
%2442 = torch.aten._unsafe_view %2441, %1344 : !torch.vtensor<[2,20,77,64],f16>, !torch.list<int> -> !torch.vtensor<[40,77,64],f16>
%2443 = torch.aten.unsqueeze %2434, %int3 : !torch.vtensor<[40,576,64],f16>, !torch.int -> !torch.vtensor<[40,576,64,1],f16>
%2444 = torch.aten.permute %2443, %203 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,64],f16>
%2445 = torch.aten.unsqueeze %2438, %int3 : !torch.vtensor<[40,77,64],f16>, !torch.int -> !torch.vtensor<[40,77,64,1],f16>
%2446 = torch.aten.permute %2445, %206 : !torch.vtensor<[40,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,1,77,64],f16>
%2447 = torch.aten.permute %2444, %203 : !torch.vtensor<[40,576,1,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64,1],f16>
%2448 = torch.aten._reshape_alias %2447, %1258, %1273 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
%2449 = torch.aten.permute %2446, %211 : !torch.vtensor<[40,1,77,64],f16>, !torch.list<int> -> !torch.vtensor<[40,64,77,1],f16>
%2450 = torch.aten._reshape_alias %2449, %1357, %297 : !torch.vtensor<[40,64,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,64,77],f16>
%2451 = torch.aten.bmm %2448, %2450 : !torch.vtensor<[40,576,64],f16>, !torch.vtensor<[40,64,77],f16> -> !torch.vtensor<[40,576,77],f16>
%2452 = torch.aten.view %2451, %1360 : !torch.vtensor<[40,576,77],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,77],f16>
%2453 = torch.aten.permute %2452, %203 : !torch.vtensor<[40,576,1,77],f16>, !torch.list<int> -> !torch.vtensor<[40,576,77,1],f16>
%2454 = torch.aten.view %2453, %1363 : !torch.vtensor<[40,576,77,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,77],f16>
%2455 = torch.aten.mul.Tensor %2454, %0 : !torch.vtensor<[40,576,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[40,576,77],f16>
%2456 = torch.aten._softmax %2455, %int-1, %true : !torch.vtensor<[40,576,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[40,576,77],f32>
%2457 = torch.aten._to_copy %2456, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[40,576,77],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[40,576,77],f16>
%2458 = torch.aten.unsqueeze %2457, %int3 : !torch.vtensor<[40,576,77],f16>, !torch.int -> !torch.vtensor<[40,576,77,1],f16>
%2459 = torch.aten.permute %2458, %203 : !torch.vtensor<[40,576,77,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,77],f16>
%2460 = torch.aten.unsqueeze %2442, %int3 : !torch.vtensor<[40,77,64],f16>, !torch.int -> !torch.vtensor<[40,77,64,1],f16>
%2461 = torch.aten.permute %2460, %211 : !torch.vtensor<[40,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,1,64,77],f16>
%2462 = torch.aten.permute %2459, %203 : !torch.vtensor<[40,576,1,77],f16>, !torch.list<int> -> !torch.vtensor<[40,576,77,1],f16>
%2463 = torch.aten._reshape_alias %2462, %1363, %1373 : !torch.vtensor<[40,576,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,576,77],f16>
%2464 = torch.aten.permute %2461, %211 : !torch.vtensor<[40,1,64,77],f16>, !torch.list<int> -> !torch.vtensor<[40,77,64,1],f16>
%2465 = torch.aten._reshape_alias %2464, %1344, %316 : !torch.vtensor<[40,77,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,77,64],f16>
%2466 = torch.aten.bmm %2463, %2465 : !torch.vtensor<[40,576,77],f16>, !torch.vtensor<[40,77,64],f16> -> !torch.vtensor<[40,576,64],f16>
%2467 = torch.aten.view %2466, %1298 : !torch.vtensor<[40,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,64],f16>
%2468 = torch.aten.permute %2467, %203 : !torch.vtensor<[40,576,1,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64,1],f16>
%2469 = torch.aten.view %2468, %1258 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
%2470 = torch.aten._reshape_alias %2469, %1302, %1303 : !torch.vtensor<[40,576,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
%2471 = torch.aten.permute %2470, %189 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
%2472 = torch.aten.clone %2471, %int0 : !torch.vtensor<[2,576,20,64],f16>, !torch.int -> !torch.vtensor<[2,576,20,64],f16>
%2473 = torch.aten._unsafe_view %2472, %1220 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
%2474 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%2475 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%2476 = torch.aten.t %2475 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%2477 = torch.aten.view %2473, %1228 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
%2478 = torch.aten.addmm %2474, %2477, %2476, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,1280],f16>
%2479 = torch.aten.view %2478, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
%2480 = torch.aten.add.Tensor %2479, %2411, %int1 : !torch.vtensor<[2,576,1280],f16>, !torch.vtensor<[2,576,1280],f16>, !torch.int -> !torch.vtensor<[2,576,1280],f16>
%2481 = torch.aten._to_copy %2480, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f32>
%result0_138, %result1_139, %result2_140 = torch.aten.native_layer_norm %2481, %1233, %24, %24, %float1.000000e-05 : !torch.vtensor<[2,576,1280],f32>, !torch.list<int>, !torch.vtensor<[1280],f32>, !torch.vtensor<[1280],f32>, !torch.float -> !torch.vtensor<[2,576,1280],f32>, !torch.vtensor<[2,576,1],f32>, !torch.vtensor<[2,576,1],f32>
%2482 = torch.aten._to_copy %16, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[10240],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[10240],f16>
%2483 = torch.aten._to_copy %17, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[10240,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[10240,1280],f16>
%2484 = torch.aten._to_copy %result0_138, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
%2485 = torch.aten.t %2483 : !torch.vtensor<[10240,1280],f16> -> !torch.vtensor<[1280,10240],f16>
%2486 = torch.aten.view %2484, %1228 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
%2487 = torch.aten.addmm %2482, %2486, %2485, %int1, %int1 : !torch.vtensor<[10240],f16>, !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,10240],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,10240],f16>
%2488 = torch.aten.view %2487, %1399 : !torch.vtensor<[1152,10240],f16>, !torch.list<int> -> !torch.vtensor<[2,576,10240],f16>
%2489 = torch.aten.slice.Tensor %2488, %int-1, %int0, %int5120, %int1 : !torch.vtensor<[2,576,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,576,5120],f16>
%2490 = torch.aten.slice.Tensor %2488, %int-1, %int5120, %int10240, %int1 : !torch.vtensor<[2,576,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,576,5120],f16>
%2491 = torch.aten.gelu %2490, %str : !torch.vtensor<[2,576,5120],f16>, !torch.str -> !torch.vtensor<[2,576,5120],f16>
%2492 = torch.aten.mul.Tensor %2489, %2491 : !torch.vtensor<[2,576,5120],f16>, !torch.vtensor<[2,576,5120],f16> -> !torch.vtensor<[2,576,5120],f16>
%2493 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%2494 = torch.aten._to_copy %18, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,5120],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,5120],f16>
%2495 = torch.aten.t %2494 : !torch.vtensor<[1280,5120],f16> -> !torch.vtensor<[5120,1280],f16>
%2496 = torch.aten.view %2492, %1408 : !torch.vtensor<[2,576,5120],f16>, !torch.list<int> -> !torch.vtensor<[1152,5120],f16>
%2497 = torch.aten.addmm %2493, %2496, %2495, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[1152,5120],f16>, !torch.vtensor<[5120,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,1280],f16>
%2498 = torch.aten.view %2497, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
%2499 = torch.aten.add.Tensor %2498, %2480, %int1 : !torch.vtensor<[2,576,1280],f16>, !torch.vtensor<[2,576,1280],f16>, !torch.int -> !torch.vtensor<[2,576,1280],f16>
%2500 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%2501 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%2502 = torch.aten.t %2501 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%2503 = torch.aten.view %2499, %1228 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
%2504 = torch.aten.addmm %2500, %2503, %2502, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,1280],f16>
%2505 = torch.aten.view %2504, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
%2506 = torch.aten._reshape_alias %2505, %1419, %1420 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,24,24,1280],f16>
%2507 = torch.aten.permute %2506, %206 : !torch.vtensor<[2,24,24,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,24,24],f16>
%2508 = torch.aten._reshape_alias %2507, %1183, %1423 : !torch.vtensor<[2,1280,24,24],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1280,24,24],f16>
%2509 = torch.aten.clone %2508, %int0 : !torch.vtensor<[2,1280,24,24],f16>, !torch.int -> !torch.vtensor<[2,1280,24,24],f16>
%2510 = torch.aten.add.Tensor %2509, %2316, %int1 : !torch.vtensor<[2,1280,24,24],f16>, !torch.vtensor<[2,1280,24,24],f16>, !torch.int -> !torch.vtensor<[2,1280,24,24],f16>
%2511 = torch.prim.ListConstruct %2510, %1426 : (!torch.vtensor<[2,1280,24,24],f16>, !torch.vtensor<[2,1280,24,24],f16>) -> !torch.list<vtensor>
%2512 = torch.aten.cat %2511, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,2560,24,24],f16>
%2513 = torch.aten._to_copy %2512, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,2560,24,24],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2560,24,24],f32>
%2514 = torch.aten._reshape_alias %2513, %2263, %2264 : !torch.vtensor<[2,2560,24,24],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,80,576],f32>
%result0_141, %result1_142 = torch.aten.var_mean.correction %2514, %85, %int0, %true : !torch.vtensor<[2,32,80,576],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%2515 = torch.aten.add.Tensor %result0_141, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2516 = torch.aten.rsqrt %2515 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%2517 = torch.aten.sub.Tensor %2514, %result1_142, %int1 : !torch.vtensor<[2,32,80,576],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,80,576],f32>
%2518 = torch.aten.mul.Tensor %2517, %2516 : !torch.vtensor<[2,32,80,576],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,80,576],f32>
%2519 = torch.aten.view %2518, %2270 : !torch.vtensor<[2,32,80,576],f32>, !torch.list<int> -> !torch.vtensor<[2,2560,24,24],f32>
%2520 = torch.aten.unsqueeze %44, %int0 : !torch.vtensor<[2560],f32>, !torch.int -> !torch.vtensor<[1,2560],f32>
%2521 = torch.aten.unsqueeze %2520, %int2 : !torch.vtensor<[1,2560],f32>, !torch.int -> !torch.vtensor<[1,2560,1],f32>
%2522 = torch.aten.unsqueeze %2521, %int3 : !torch.vtensor<[1,2560,1],f32>, !torch.int -> !torch.vtensor<[1,2560,1,1],f32>
%2523 = torch.aten.unsqueeze %44, %int0 : !torch.vtensor<[2560],f32>, !torch.int -> !torch.vtensor<[1,2560],f32>
%2524 = torch.aten.unsqueeze %2523, %int2 : !torch.vtensor<[1,2560],f32>, !torch.int -> !torch.vtensor<[1,2560,1],f32>
%2525 = torch.aten.unsqueeze %2524, %int3 : !torch.vtensor<[1,2560,1],f32>, !torch.int -> !torch.vtensor<[1,2560,1,1],f32>
%2526 = torch.aten.mul.Tensor %2519, %2525 : !torch.vtensor<[2,2560,24,24],f32>, !torch.vtensor<[1,2560,1,1],f32> -> !torch.vtensor<[2,2560,24,24],f32>
%2527 = torch.aten.add.Tensor %2526, %2522, %int1 : !torch.vtensor<[2,2560,24,24],f32>, !torch.vtensor<[1,2560,1,1],f32>, !torch.int -> !torch.vtensor<[2,2560,24,24],f32>
%2528 = torch.aten._to_copy %2527, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,2560,24,24],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2560,24,24],f16>
%2529 = torch.aten.silu %2528 : !torch.vtensor<[2,2560,24,24],f16> -> !torch.vtensor<[2,2560,24,24],f16>
%2530 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%2531 = torch.aten._to_copy %11, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,2560,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,2560,3,3],f16>
%2532 = torch.aten._convolution %2529, %2531, %2530, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,2560,24,24],f16>, !torch.vtensor<[1280,2560,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,24,24],f16>
%2533 = torch.aten.silu %74 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%2534 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%2535 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%2536 = torch.aten.t %2535 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%2537 = torch.aten.addmm %2534, %2533, %2536, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%2538 = torch.aten.unsqueeze %2537, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
%2539 = torch.aten.unsqueeze %2538, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
%2540 = torch.aten.add.Tensor %2532, %2539, %int1 : !torch.vtensor<[2,1280,24,24],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,24,24],f16>
%2541 = torch.aten._to_copy %2540, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,24,24],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,24,24],f32>
%2542 = torch.aten._reshape_alias %2541, %1176, %1177 : !torch.vtensor<[2,1280,24,24],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,40,576],f32>
%result0_143, %result1_144 = torch.aten.var_mean.correction %2542, %85, %int0, %true : !torch.vtensor<[2,32,40,576],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%2543 = torch.aten.add.Tensor %result0_143, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2544 = torch.aten.rsqrt %2543 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%2545 = torch.aten.sub.Tensor %2542, %result1_144, %int1 : !torch.vtensor<[2,32,40,576],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,576],f32>
%2546 = torch.aten.mul.Tensor %2545, %2544 : !torch.vtensor<[2,32,40,576],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,576],f32>
%2547 = torch.aten.view %2546, %1183 : !torch.vtensor<[2,32,40,576],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,24,24],f32>
%2548 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
%2549 = torch.aten.unsqueeze %2548, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
%2550 = torch.aten.unsqueeze %2549, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
%2551 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
%2552 = torch.aten.unsqueeze %2551, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
%2553 = torch.aten.unsqueeze %2552, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
%2554 = torch.aten.mul.Tensor %2547, %2553 : !torch.vtensor<[2,1280,24,24],f32>, !torch.vtensor<[1,1280,1,1],f32> -> !torch.vtensor<[2,1280,24,24],f32>
%2555 = torch.aten.add.Tensor %2554, %2550, %int1 : !torch.vtensor<[2,1280,24,24],f32>, !torch.vtensor<[1,1280,1,1],f32>, !torch.int -> !torch.vtensor<[2,1280,24,24],f32>
%2556 = torch.aten._to_copy %2555, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,1280,24,24],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,24,24],f16>
%2557 = torch.aten.silu %2556 : !torch.vtensor<[2,1280,24,24],f16> -> !torch.vtensor<[2,1280,24,24],f16>
%2558 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%2559 = torch.aten._to_copy %20, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280,3,3],f16>
%2560 = torch.aten._convolution %2557, %2559, %2558, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,24,24],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,24,24],f16>
%2561 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%2562 = torch.aten._to_copy %12, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,2560,1,1],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,2560,1,1],f16>
%2563 = torch.aten._convolution %2512, %2562, %2561, %78, %79, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,2560,24,24],f16>, !torch.vtensor<[1280,2560,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,24,24],f16>
%2564 = torch.aten.add.Tensor %2563, %2560, %int1 : !torch.vtensor<[2,1280,24,24],f16>, !torch.vtensor<[2,1280,24,24],f16>, !torch.int -> !torch.vtensor<[2,1280,24,24],f16>
%2565 = torch.aten._to_copy %2564, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,24,24],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,24,24],f32>
%2566 = torch.aten._reshape_alias %2565, %1176, %1177 : !torch.vtensor<[2,1280,24,24],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,40,576],f32>
%result0_145, %result1_146 = torch.aten.var_mean.correction %2566, %85, %int0, %true : !torch.vtensor<[2,32,40,576],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%2567 = torch.aten.add.Tensor %result0_145, %1, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2568 = torch.aten.rsqrt %2567 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%2569 = torch.aten.sub.Tensor %2566, %result1_146, %int1 : !torch.vtensor<[2,32,40,576],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,576],f32>
%2570 = torch.aten.mul.Tensor %2569, %2568 : !torch.vtensor<[2,32,40,576],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,576],f32>
%2571 = torch.aten.view %2570, %1183 : !torch.vtensor<[2,32,40,576],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,24,24],f32>
%2572 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
%2573 = torch.aten.unsqueeze %2572, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
%2574 = torch.aten.unsqueeze %2573, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
%2575 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
%2576 = torch.aten.unsqueeze %2575, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
%2577 = torch.aten.unsqueeze %2576, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
%2578 = torch.aten.mul.Tensor %2571, %2577 : !torch.vtensor<[2,1280,24,24],f32>, !torch.vtensor<[1,1280,1,1],f32> -> !torch.vtensor<[2,1280,24,24],f32>
%2579 = torch.aten.add.Tensor %2578, %2574, %int1 : !torch.vtensor<[2,1280,24,24],f32>, !torch.vtensor<[1,1280,1,1],f32>, !torch.int -> !torch.vtensor<[2,1280,24,24],f32>
%2580 = torch.aten._reshape_alias %2579, %1183, %1217 : !torch.vtensor<[2,1280,24,24],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1280,24,24],f32>
%2581 = torch.aten.permute %2580, %151 : !torch.vtensor<[2,1280,24,24],f32>, !torch.list<int> -> !torch.vtensor<[2,24,24,1280],f32>
%2582 = torch.aten._reshape_alias %2581, %1220, %1221 : !torch.vtensor<[2,24,24,1280],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f32>
%2583 = torch.aten.clone %2582, %int0 : !torch.vtensor<[2,576,1280],f32>, !torch.int -> !torch.vtensor<[2,576,1280],f32>
%2584 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%2585 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%2586 = torch.aten._to_copy %2583, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
%2587 = torch.aten.t %2585 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%2588 = torch.aten.view %2586, %1228 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
%2589 = torch.aten.addmm %2584, %2588, %2587, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,1280],f16>
%2590 = torch.aten.view %2589, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
%2591 = torch.aten._to_copy %2590, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f32>
%result0_147, %result1_148, %result2_149 = torch.aten.native_layer_norm %2591, %1233, %24, %24, %float1.000000e-05 : !torch.vtensor<[2,576,1280],f32>, !torch.list<int>, !torch.vtensor<[1280],f32>, !torch.vtensor<[1280],f32>, !torch.float -> !torch.vtensor<[2,576,1280],f32>, !torch.vtensor<[2,576,1],f32>, !torch.vtensor<[2,576,1],f32>
%2592 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%2593 = torch.aten._to_copy %result0_147, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
%2594 = torch.aten.t %2592 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%2595 = torch.aten._reshape_alias %2593, %1228, %1237 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
%2596 = torch.aten.mm %2595, %2594 : !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1152,1280],f16>
%2597 = torch.aten._unsafe_view %2596, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
%2598 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%2599 = torch.aten._to_copy %result0_147, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
%2600 = torch.aten.t %2598 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%2601 = torch.aten._reshape_alias %2599, %1228, %1237 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
%2602 = torch.aten.mm %2601, %2600 : !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1152,1280],f16>
%2603 = torch.aten._unsafe_view %2602, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
%2604 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%2605 = torch.aten._to_copy %result0_147, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
%2606 = torch.aten.t %2604 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%2607 = torch.aten._reshape_alias %2605, %1228, %1237 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
%2608 = torch.aten.mm %2607, %2606 : !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1152,1280],f16>
%2609 = torch.aten._unsafe_view %2608, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
%2610 = torch.aten._reshape_alias %2597, %1253, %1254 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
%2611 = torch.aten.permute %2610, %189 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
%2612 = torch.aten.clone %2611, %int0 : !torch.vtensor<[2,20,576,64],f16>, !torch.int -> !torch.vtensor<[2,20,576,64],f16>
%2613 = torch.aten._unsafe_view %2612, %1258 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
%2614 = torch.aten._reshape_alias %2603, %1253, %1254 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
%2615 = torch.aten.permute %2614, %189 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
%2616 = torch.aten.clone %2615, %int0 : !torch.vtensor<[2,20,576,64],f16>, !torch.int -> !torch.vtensor<[2,20,576,64],f16>
%2617 = torch.aten._unsafe_view %2616, %1258 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
%2618 = torch.aten._reshape_alias %2609, %1253, %1254 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
%2619 = torch.aten.permute %2618, %189 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
%2620 = torch.aten.clone %2619, %int0 : !torch.vtensor<[2,20,576,64],f16>, !torch.int -> !torch.vtensor<[2,20,576,64],f16>
%2621 = torch.aten._unsafe_view %2620, %1258 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
%2622 = torch.aten.unsqueeze %2613, %int3 : !torch.vtensor<[40,576,64],f16>, !torch.int -> !torch.vtensor<[40,576,64,1],f16>
%2623 = torch.aten.permute %2622, %203 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,64],f16>
%2624 = torch.aten.unsqueeze %2617, %int3 : !torch.vtensor<[40,576,64],f16>, !torch.int -> !torch.vtensor<[40,576,64,1],f16>
%2625 = torch.aten.permute %2624, %206 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,1,576,64],f16>
%2626 = torch.aten.permute %2623, %203 : !torch.vtensor<[40,576,1,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64,1],f16>
%2627 = torch.aten._reshape_alias %2626, %1258, %1273 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
%2628 = torch.aten.permute %2625, %211 : !torch.vtensor<[40,1,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,64,576,1],f16>
%2629 = torch.aten._reshape_alias %2628, %1276, %1277 : !torch.vtensor<[40,64,576,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,64,576],f16>
%2630 = torch.aten.bmm %2627, %2629 : !torch.vtensor<[40,576,64],f16>, !torch.vtensor<[40,64,576],f16> -> !torch.vtensor<[40,576,576],f16>
%2631 = torch.aten.view %2630, %1280 : !torch.vtensor<[40,576,576],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,576],f16>
%2632 = torch.aten.permute %2631, %203 : !torch.vtensor<[40,576,1,576],f16>, !torch.list<int> -> !torch.vtensor<[40,576,576,1],f16>
%2633 = torch.aten.view %2632, %1283 : !torch.vtensor<[40,576,576,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,576],f16>
%2634 = torch.aten.mul.Tensor %2633, %0 : !torch.vtensor<[40,576,576],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[40,576,576],f16>
%2635 = torch.aten._softmax %2634, %int-1, %true : !torch.vtensor<[40,576,576],f16>, !torch.int, !torch.bool -> !torch.vtensor<[40,576,576],f32>
%2636 = torch.aten._to_copy %2635, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[40,576,576],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[40,576,576],f16>
%2637 = torch.aten.unsqueeze %2636, %int3 : !torch.vtensor<[40,576,576],f16>, !torch.int -> !torch.vtensor<[40,576,576,1],f16>
%2638 = torch.aten.permute %2637, %203 : !torch.vtensor<[40,576,576,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,576],f16>
%2639 = torch.aten.unsqueeze %2621, %int3 : !torch.vtensor<[40,576,64],f16>, !torch.int -> !torch.vtensor<[40,576,64,1],f16>
%2640 = torch.aten.permute %2639, %211 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,1,64,576],f16>
%2641 = torch.aten.permute %2638, %203 : !torch.vtensor<[40,576,1,576],f16>, !torch.list<int> -> !torch.vtensor<[40,576,576,1],f16>
%2642 = torch.aten._reshape_alias %2641, %1283, %1293 : !torch.vtensor<[40,576,576,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,576,576],f16>
%2643 = torch.aten.permute %2640, %211 : !torch.vtensor<[40,1,64,576],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64,1],f16>
%2644 = torch.aten._reshape_alias %2643, %1258, %1273 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
%2645 = torch.aten.bmm %2642, %2644 : !torch.vtensor<[40,576,576],f16>, !torch.vtensor<[40,576,64],f16> -> !torch.vtensor<[40,576,64],f16>
%2646 = torch.aten.view %2645, %1298 : !torch.vtensor<[40,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,64],f16>
%2647 = torch.aten.permute %2646, %203 : !torch.vtensor<[40,576,1,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64,1],f16>
%2648 = torch.aten.view %2647, %1258 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
%2649 = torch.aten._reshape_alias %2648, %1302, %1303 : !torch.vtensor<[40,576,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
%2650 = torch.aten.permute %2649, %189 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
%2651 = torch.aten.clone %2650, %int0 : !torch.vtensor<[2,576,20,64],f16>, !torch.int -> !torch.vtensor<[2,576,20,64],f16>
%2652 = torch.aten._unsafe_view %2651, %1220 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
%2653 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%2654 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%2655 = torch.aten.t %2654 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%2656 = torch.aten.view %2652, %1228 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
%2657 = torch.aten.addmm %2653, %2656, %2655, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,1280],f16>
%2658 = torch.aten.view %2657, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
%2659 = torch.aten.add.Tensor %2658, %2590, %int1 : !torch.vtensor<[2,576,1280],f16>, !torch.vtensor<[2,576,1280],f16>, !torch.int -> !torch.vtensor<[2,576,1280],f16>
%2660 = torch.aten._to_copy %2659, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f32>
%result0_150, %result1_151, %result2_152 = torch.aten.native_layer_norm %2660, %1233, %24, %24, %float1.000000e-05 : !torch.vtensor<[2,576,1280],f32>, !torch.list<int>, !torch.vtensor<[1280],f32>, !torch.vtensor<[1280],f32>, !torch.float -> !torch.vtensor<[2,576,1280],f32>, !torch.vtensor<[2,576,1],f32>, !torch.vtensor<[2,576,1],f32>
%2661 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%2662 = torch.aten._to_copy %result0_150, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
%2663 = torch.aten.t %2661 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%2664 = torch.aten._reshape_alias %2662, %1228, %1237 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
%2665 = torch.aten.mm %2664, %2663 : !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1152,1280],f16>
%2666 = torch.aten._unsafe_view %2665, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
%2667 = torch.aten._to_copy %15, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1024],f16>
%2668 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
%2669 = torch.aten.t %2667 : !torch.vtensor<[1280,1024],f16> -> !torch.vtensor<[1024,1280],f16>
%2670 = torch.aten._reshape_alias %2668, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
%2671 = torch.aten.mm %2670, %2669 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,1280],f16> -> !torch.vtensor<[154,1280],f16>
%2672 = torch.aten._unsafe_view %2671, %1327 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
%2673 = torch.aten._to_copy %15, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1024],f16>
%2674 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
%2675 = torch.aten.t %2673 : !torch.vtensor<[1280,1024],f16> -> !torch.vtensor<[1024,1280],f16>
%2676 = torch.aten._reshape_alias %2674, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
%2677 = torch.aten.mm %2676, %2675 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,1280],f16> -> !torch.vtensor<[154,1280],f16>
%2678 = torch.aten._unsafe_view %2677, %1327 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
%2679 = torch.aten._reshape_alias %2666, %1253, %1254 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
%2680 = torch.aten.permute %2679, %189 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
%2681 = torch.aten.clone %2680, %int0 : !torch.vtensor<[2,20,576,64],f16>, !torch.int -> !torch.vtensor<[2,20,576,64],f16>
%2682 = torch.aten._unsafe_view %2681, %1258 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
%2683 = torch.aten._reshape_alias %2672, %1339, %1340 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,20,64],f16>
%2684 = torch.aten.permute %2683, %189 : !torch.vtensor<[2,77,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,77,64],f16>
%2685 = torch.aten.clone %2684, %int0 : !torch.vtensor<[2,20,77,64],f16>, !torch.int -> !torch.vtensor<[2,20,77,64],f16>
%2686 = torch.aten._unsafe_view %2685, %1344 : !torch.vtensor<[2,20,77,64],f16>, !torch.list<int> -> !torch.vtensor<[40,77,64],f16>
%2687 = torch.aten._reshape_alias %2678, %1339, %1340 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,20,64],f16>
%2688 = torch.aten.permute %2687, %189 : !torch.vtensor<[2,77,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,77,64],f16>
%2689 = torch.aten.clone %2688, %int0 : !torch.vtensor<[2,20,77,64],f16>, !torch.int -> !torch.vtensor<[2,20,77,64],f16>
%2690 = torch.aten._unsafe_view %2689, %1344 : !torch.vtensor<[2,20,77,64],f16>, !torch.list<int> -> !torch.vtensor<[40,77,64],f16>
%2691 = torch.aten.unsqueeze %2682, %int3 : !torch.vtensor<[40,576,64],f16>, !torch.int -> !torch.vtensor<[40,576,64,1],f16>
%2692 = torch.aten.permute %2691, %203 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,64],f16>
%2693 = torch.aten.unsqueeze %2686, %int3 : !torch.vtensor<[40,77,64],f16>, !torch.int -> !torch.vtensor<[40,77,64,1],f16>
%2694 = torch.aten.permute %2693, %206 : !torch.vtensor<[40,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,1,77,64],f16>
%2695 = torch.aten.permute %2692, %203 : !torch.vtensor<[40,576,1,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64,1],f16>
%2696 = torch.aten._reshape_alias %2695, %1258, %1273 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
%2697 = torch.aten.permute %2694, %211 : !torch.vtensor<[40,1,77,64],f16>, !torch.list<int> -> !torch.vtensor<[40,64,77,1],f16>
%2698 = torch.aten._reshape_alias %2697, %1357, %297 : !torch.vtensor<[40,64,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,64,77],f16>
%2699 = torch.aten.bmm %2696, %2698 : !torch.vtensor<[40,576,64],f16>, !torch.vtensor<[40,64,77],f16> -> !torch.vtensor<[40,576,77],f16>
%2700 = torch.aten.view %2699, %1360 : !torch.vtensor<[40,576,77],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,77],f16>
%2701 = torch.aten.permute %2700, %203 : !torch.vtensor<[40,576,1,77],f16>, !torch.list<int> -> !torch.vtensor<[40,576,77,1],f16>
%2702 = torch.aten.view %2701, %1363 : !torch.vtensor<[40,576,77,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,77],f16>
%2703 = torch.aten.mul.Tensor %2702, %0 : !torch.vtensor<[40,576,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[40,576,77],f16>
%2704 = torch.aten._softmax %2703, %int-1, %true : !torch.vtensor<[40,576,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[40,576,77],f32>
%2705 = torch.aten._to_copy %2704, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[40,576,77],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[40,576,77],f16>
%2706 = torch.aten.unsqueeze %2705, %int3 : !torch.vtensor<[40,576,77],f16>, !torch.int -> !torch.vtensor<[40,576,77,1],f16>
%2707 = torch.aten.permute %2706, %203 : !torch.vtensor<[40,576,77,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,77],f16>
%2708 = torch.aten.unsqueeze %2690, %int3 : !torch.vtensor<[40,77,64],f16>, !torch.int -> !torch.vtensor<[40,77,64,1],f16>
%2709 = torch.aten.permute %2708, %211 : !torch.vtensor<[40,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,1,64,77],f16>
%2710 = torch.aten.permute %2707, %203 : !torch.vtensor<[40,576,1,77],f16>, !torch.list<int> -> !torch.vtensor<[40,576,77,1],f16>
%2711 = torch.aten._reshape_alias %2710, %1363, %1373 : !torch.vtensor<[40,576,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,576,77],f16>
%2712 = torch.aten.permute %2709, %211 : !torch.vtensor<[40,1,64,77],f16>, !torch.list<int> -> !torch.vtensor<[40,77,64,1],f16>
%2713 = torch.aten._reshape_alias %2712, %1344, %316 : !torch.vtensor<[40,77,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,77,64],f16>
%2714 = torch.aten.bmm %2711, %2713 : !torch.vtensor<[40,576,77],f16>, !torch.vtensor<[40,77,64],f16> -> !torch.vtensor<[40,576,64],f16>
%2715 = torch.aten.view %2714, %1298 : !torch.vtensor<[40,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,64],f16>
%2716 = torch.aten.permute %2715, %203 : !torch.vtensor<[40,576,1,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64,1],f16>
%2717 = torch.aten.view %2716, %1258 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
%2718 = torch.aten._reshape_alias %2717, %1302, %1303 : !torch.vtensor<[40,576,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
%2719 = torch.aten.permute %2718, %189 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
%2720 = torch.aten.clone %2719, %int0 : !torch.vtensor<[2,576,20,64],f16>, !torch.int -> !torch.vtensor<[2,576,20,64],f16>
%2721 = torch.aten._unsafe_view %2720, %1220 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
%2722 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%2723 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%2724 = torch.aten.t %2723 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%2725 = torch.aten.view %2721, %1228 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
%2726 = torch.aten.addmm %2722, %2725, %2724, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,1280],f16>
%2727 = torch.aten.view %2726, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
%2728 = torch.aten.add.Tensor %2727, %2659, %int1 : !torch.vtensor<[2,576,1280],f16>, !torch.vtensor<[2,576,1280],f16>, !torch.int -> !torch.vtensor<[2,576,1280],f16>
%2729 = torch.aten._to_copy %2728, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f32>
%result0_153, %result1_154, %result2_155 = torch.aten.native_layer_norm %2729, %1233, %24, %24, %float1.000000e-05 : !torch.vtensor<[2,576,1280],f32>, !torch.list<int>, !torch.vtensor<[1280],f32>, !torch.vtensor<[1280],f32>, !torch.float -> !torch.vtensor<[2,576,1280],f32>, !torch.vtensor<[2,576,1],f32>, !torch.vtensor<[2,576,1],f32>
%2730 = torch.aten._to_copy %16, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[10240],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[10240],f16>
%2731 = torch.aten._to_copy %17, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[10240,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[10240,1280],f16>
%2732 = torch.aten._to_copy %result0_153, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
%2733 = torch.aten.t %2731 : !torch.vtensor<[10240,1280],f16> -> !torch.vtensor<[1280,10240],f16>
%2734 = torch.aten.view %2732, %1228 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
%2735 = torch.aten.addmm %2730, %2734, %2733, %int1, %int1 : !torch.vtensor<[10240],f16>, !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,10240],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,10240],f16>
%2736 = torch.aten.view %2735, %1399 : !torch.vtensor<[1152,10240],f16>, !torch.list<int> -> !torch.vtensor<[2,576,10240],f16>
%2737 = torch.aten.slice.Tensor %2736, %int-1, %int0, %int5120, %int1 : !torch.vtensor<[2,576,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,576,5120],f16>
%2738 = torch.aten.slice.Tensor %2736, %int-1, %int5120, %int10240, %int1 : !torch.vtensor<[2,576,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,576,5120],f16>
%2739 = torch.aten.gelu %2738, %str : !torch.vtensor<[2,576,5120],f16>, !torch.str -> !torch.vtensor<[2,576,5120],f16>
%2740 = torch.aten.mul.Tensor %2737, %2739 : !torch.vtensor<[2,576,5120],f16>, !torch.vtensor<[2,576,5120],f16> -> !torch.vtensor<[2,576,5120],f16>
%2741 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%2742 = torch.aten._to_copy %18, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,5120],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,5120],f16>
%2743 = torch.aten.t %2742 : !torch.vtensor<[1280,5120],f16> -> !torch.vtensor<[5120,1280],f16>
%2744 = torch.aten.view %2740, %1408 : !torch.vtensor<[2,576,5120],f16>, !torch.list<int> -> !torch.vtensor<[1152,5120],f16>
%2745 = torch.aten.addmm %2741, %2744, %2743, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[1152,5120],f16>, !torch.vtensor<[5120,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,1280],f16>
%2746 = torch.aten.view %2745, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
%2747 = torch.aten.add.Tensor %2746, %2728, %int1 : !torch.vtensor<[2,576,1280],f16>, !torch.vtensor<[2,576,1280],f16>, !torch.int -> !torch.vtensor<[2,576,1280],f16>
%2748 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%2749 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%2750 = torch.aten.t %2749 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%2751 = torch.aten.view %2747, %1228 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
%2752 = torch.aten.addmm %2748, %2751, %2750, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,1280],f16>
%2753 = torch.aten.view %2752, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
%2754 = torch.aten._reshape_alias %2753, %1419, %1420 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,24,24,1280],f16>
%2755 = torch.aten.permute %2754, %206 : !torch.vtensor<[2,24,24,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,24,24],f16>
%2756 = torch.aten._reshape_alias %2755, %1183, %1423 : !torch.vtensor<[2,1280,24,24],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1280,24,24],f16>
%2757 = torch.aten.clone %2756, %int0 : !torch.vtensor<[2,1280,24,24],f16>, !torch.int -> !torch.vtensor<[2,1280,24,24],f16>
%2758 = torch.aten.add.Tensor %2757, %2564, %int1 : !torch.vtensor<[2,1280,24,24],f16>, !torch.vtensor<[2,1280,24,24],f16>, !torch.int -> !torch.vtensor<[2,1280,24,24],f16>
%2759 = torch.prim.ListConstruct %2758, %1143 : (!torch.vtensor<[2,1280,24,24],f16>, !torch.vtensor<[2,640,24,24],f16>) -> !torch.list<vtensor>
%2760 = torch.aten.cat %2759, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,1920,24,24],f16>
%2761 = torch.aten._to_copy %2760, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,1920,24,24],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1920,24,24],f32>
%2762 = torch.prim.ListConstruct %int2, %int32, %int60, %int576 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2763 = torch.prim.ListConstruct %int1105920, %int34560, %int576, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2764 = torch.aten._reshape_alias %2761, %2762, %2763 : !torch.vtensor<[2,1920,24,24],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,60,576],f32>
%result0_156, %result1_157 = torch.aten.var_mean.correction %2764, %85, %int0, %true : !torch.vtensor<[2,32,60,576],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%2765 = torch.aten.add.Tensor %result0_156, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2766 = torch.aten.rsqrt %2765 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%2767 = torch.aten.sub.Tensor %2764, %result1_157, %int1 : !torch.vtensor<[2,32,60,576],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,60,576],f32>
%2768 = torch.aten.mul.Tensor %2767, %2766 : !torch.vtensor<[2,32,60,576],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,60,576],f32>
%2769 = torch.prim.ListConstruct %int2, %int1920, %int24, %int24 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2770 = torch.aten.view %2768, %2769 : !torch.vtensor<[2,32,60,576],f32>, !torch.list<int> -> !torch.vtensor<[2,1920,24,24],f32>
%2771 = torch.aten.unsqueeze %21, %int0 : !torch.vtensor<[1920],f32>, !torch.int -> !torch.vtensor<[1,1920],f32>
%2772 = torch.aten.unsqueeze %2771, %int2 : !torch.vtensor<[1,1920],f32>, !torch.int -> !torch.vtensor<[1,1920,1],f32>
%2773 = torch.aten.unsqueeze %2772, %int3 : !torch.vtensor<[1,1920,1],f32>, !torch.int -> !torch.vtensor<[1,1920,1,1],f32>
%2774 = torch.aten.unsqueeze %21, %int0 : !torch.vtensor<[1920],f32>, !torch.int -> !torch.vtensor<[1,1920],f32>
%2775 = torch.aten.unsqueeze %2774, %int2 : !torch.vtensor<[1,1920],f32>, !torch.int -> !torch.vtensor<[1,1920,1],f32>
%2776 = torch.aten.unsqueeze %2775, %int3 : !torch.vtensor<[1,1920,1],f32>, !torch.int -> !torch.vtensor<[1,1920,1,1],f32>
%2777 = torch.aten.mul.Tensor %2770, %2776 : !torch.vtensor<[2,1920,24,24],f32>, !torch.vtensor<[1,1920,1,1],f32> -> !torch.vtensor<[2,1920,24,24],f32>
%2778 = torch.aten.add.Tensor %2777, %2773, %int1 : !torch.vtensor<[2,1920,24,24],f32>, !torch.vtensor<[1,1920,1,1],f32>, !torch.int -> !torch.vtensor<[2,1920,24,24],f32>
%2779 = torch.aten._to_copy %2778, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,1920,24,24],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1920,24,24],f16>
%2780 = torch.aten.silu %2779 : !torch.vtensor<[2,1920,24,24],f16> -> !torch.vtensor<[2,1920,24,24],f16>
%2781 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%2782 = torch.aten._to_copy %13, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1920,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1920,3,3],f16>
%2783 = torch.aten._convolution %2780, %2782, %2781, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1920,24,24],f16>, !torch.vtensor<[1280,1920,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,24,24],f16>
%2784 = torch.aten.silu %74 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%2785 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%2786 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%2787 = torch.aten.t %2786 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%2788 = torch.aten.addmm %2785, %2784, %2787, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
%2789 = torch.aten.unsqueeze %2788, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
%2790 = torch.aten.unsqueeze %2789, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
%2791 = torch.aten.add.Tensor %2783, %2790, %int1 : !torch.vtensor<[2,1280,24,24],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,24,24],f16>
%2792 = torch.aten._to_copy %2791, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,24,24],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,24,24],f32>
%2793 = torch.aten._reshape_alias %2792, %1176, %1177 : !torch.vtensor<[2,1280,24,24],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,40,576],f32>
%result0_158, %result1_159 = torch.aten.var_mean.correction %2793, %85, %int0, %true : !torch.vtensor<[2,32,40,576],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%2794 = torch.aten.add.Tensor %result0_158, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2795 = torch.aten.rsqrt %2794 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%2796 = torch.aten.sub.Tensor %2793, %result1_159, %int1 : !torch.vtensor<[2,32,40,576],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,576],f32>
%2797 = torch.aten.mul.Tensor %2796, %2795 : !torch.vtensor<[2,32,40,576],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,576],f32>
%2798 = torch.aten.view %2797, %1183 : !torch.vtensor<[2,32,40,576],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,24,24],f32>
%2799 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
%2800 = torch.aten.unsqueeze %2799, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
%2801 = torch.aten.unsqueeze %2800, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
%2802 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
%2803 = torch.aten.unsqueeze %2802, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
%2804 = torch.aten.unsqueeze %2803, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
%2805 = torch.aten.mul.Tensor %2798, %2804 : !torch.vtensor<[2,1280,24,24],f32>, !torch.vtensor<[1,1280,1,1],f32> -> !torch.vtensor<[2,1280,24,24],f32>
%2806 = torch.aten.add.Tensor %2805, %2801, %int1 : !torch.vtensor<[2,1280,24,24],f32>, !torch.vtensor<[1,1280,1,1],f32>, !torch.int -> !torch.vtensor<[2,1280,24,24],f32>
%2807 = torch.aten._to_copy %2806, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,1280,24,24],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,24,24],f16>
%2808 = torch.aten.silu %2807 : !torch.vtensor<[2,1280,24,24],f16> -> !torch.vtensor<[2,1280,24,24],f16>
%2809 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%2810 = torch.aten._to_copy %20, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280,3,3],f16>
%2811 = torch.aten._convolution %2808, %2810, %2809, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,24,24],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,24,24],f16>
%2812 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%2813 = torch.aten._to_copy %14, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1920,1,1],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1920,1,1],f16>
%2814 = torch.aten._convolution %2760, %2813, %2812, %78, %79, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1920,24,24],f16>, !torch.vtensor<[1280,1920,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,24,24],f16>
%2815 = torch.aten.add.Tensor %2814, %2811, %int1 : !torch.vtensor<[2,1280,24,24],f16>, !torch.vtensor<[2,1280,24,24],f16>, !torch.int -> !torch.vtensor<[2,1280,24,24],f16>
%2816 = torch.aten._to_copy %2815, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,24,24],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,24,24],f32>
%2817 = torch.aten._reshape_alias %2816, %1176, %1177 : !torch.vtensor<[2,1280,24,24],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,40,576],f32>
%result0_160, %result1_161 = torch.aten.var_mean.correction %2817, %85, %int0, %true : !torch.vtensor<[2,32,40,576],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%2818 = torch.aten.add.Tensor %result0_160, %1, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%2819 = torch.aten.rsqrt %2818 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%2820 = torch.aten.sub.Tensor %2817, %result1_161, %int1 : !torch.vtensor<[2,32,40,576],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,576],f32>
%2821 = torch.aten.mul.Tensor %2820, %2819 : !torch.vtensor<[2,32,40,576],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,576],f32>
%2822 = torch.aten.view %2821, %1183 : !torch.vtensor<[2,32,40,576],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,24,24],f32>
%2823 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
%2824 = torch.aten.unsqueeze %2823, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
%2825 = torch.aten.unsqueeze %2824, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
%2826 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
%2827 = torch.aten.unsqueeze %2826, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
%2828 = torch.aten.unsqueeze %2827, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
%2829 = torch.aten.mul.Tensor %2822, %2828 : !torch.vtensor<[2,1280,24,24],f32>, !torch.vtensor<[1,1280,1,1],f32> -> !torch.vtensor<[2,1280,24,24],f32>
%2830 = torch.aten.add.Tensor %2829, %2825, %int1 : !torch.vtensor<[2,1280,24,24],f32>, !torch.vtensor<[1,1280,1,1],f32>, !torch.int -> !torch.vtensor<[2,1280,24,24],f32>
%2831 = torch.aten._reshape_alias %2830, %1183, %1217 : !torch.vtensor<[2,1280,24,24],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1280,24,24],f32>
%2832 = torch.aten.permute %2831, %151 : !torch.vtensor<[2,1280,24,24],f32>, !torch.list<int> -> !torch.vtensor<[2,24,24,1280],f32>
%2833 = torch.aten._reshape_alias %2832, %1220, %1221 : !torch.vtensor<[2,24,24,1280],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f32>
%2834 = torch.aten.clone %2833, %int0 : !torch.vtensor<[2,576,1280],f32>, !torch.int -> !torch.vtensor<[2,576,1280],f32>
%2835 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%2836 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%2837 = torch.aten._to_copy %2834, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
%2838 = torch.aten.t %2836 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%2839 = torch.aten.view %2837, %1228 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
%2840 = torch.aten.addmm %2835, %2839, %2838, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,1280],f16>
%2841 = torch.aten.view %2840, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
%2842 = torch.aten._to_copy %2841, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f32>
%result0_162, %result1_163, %result2_164 = torch.aten.native_layer_norm %2842, %1233, %24, %24, %float1.000000e-05 : !torch.vtensor<[2,576,1280],f32>, !torch.list<int>, !torch.vtensor<[1280],f32>, !torch.vtensor<[1280],f32>, !torch.float -> !torch.vtensor<[2,576,1280],f32>, !torch.vtensor<[2,576,1],f32>, !torch.vtensor<[2,576,1],f32>
%2843 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%2844 = torch.aten._to_copy %result0_162, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
%2845 = torch.aten.t %2843 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%2846 = torch.aten._reshape_alias %2844, %1228, %1237 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
%2847 = torch.aten.mm %2846, %2845 : !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1152,1280],f16>
%2848 = torch.aten._unsafe_view %2847, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
%2849 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%2850 = torch.aten._to_copy %result0_162, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
%2851 = torch.aten.t %2849 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%2852 = torch.aten._reshape_alias %2850, %1228, %1237 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
%2853 = torch.aten.mm %2852, %2851 : !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1152,1280],f16>
%2854 = torch.aten._unsafe_view %2853, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
%2855 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%2856 = torch.aten._to_copy %result0_162, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
%2857 = torch.aten.t %2855 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%2858 = torch.aten._reshape_alias %2856, %1228, %1237 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
%2859 = torch.aten.mm %2858, %2857 : !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1152,1280],f16>
%2860 = torch.aten._unsafe_view %2859, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
%2861 = torch.aten._reshape_alias %2848, %1253, %1254 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
%2862 = torch.aten.permute %2861, %189 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
%2863 = torch.aten.clone %2862, %int0 : !torch.vtensor<[2,20,576,64],f16>, !torch.int -> !torch.vtensor<[2,20,576,64],f16>
%2864 = torch.aten._unsafe_view %2863, %1258 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
%2865 = torch.aten._reshape_alias %2854, %1253, %1254 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
%2866 = torch.aten.permute %2865, %189 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
%2867 = torch.aten.clone %2866, %int0 : !torch.vtensor<[2,20,576,64],f16>, !torch.int -> !torch.vtensor<[2,20,576,64],f16>
%2868 = torch.aten._unsafe_view %2867, %1258 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
%2869 = torch.aten._reshape_alias %2860, %1253, %1254 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
%2870 = torch.aten.permute %2869, %189 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
%2871 = torch.aten.clone %2870, %int0 : !torch.vtensor<[2,20,576,64],f16>, !torch.int -> !torch.vtensor<[2,20,576,64],f16>
%2872 = torch.aten._unsafe_view %2871, %1258 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
%2873 = torch.aten.unsqueeze %2864, %int3 : !torch.vtensor<[40,576,64],f16>, !torch.int -> !torch.vtensor<[40,576,64,1],f16>
%2874 = torch.aten.permute %2873, %203 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,64],f16>
%2875 = torch.aten.unsqueeze %2868, %int3 : !torch.vtensor<[40,576,64],f16>, !torch.int -> !torch.vtensor<[40,576,64,1],f16>
%2876 = torch.aten.permute %2875, %206 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,1,576,64],f16>
%2877 = torch.aten.permute %2874, %203 : !torch.vtensor<[40,576,1,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64,1],f16>
%2878 = torch.aten._reshape_alias %2877, %1258, %1273 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
%2879 = torch.aten.permute %2876, %211 : !torch.vtensor<[40,1,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,64,576,1],f16>
%2880 = torch.aten._reshape_alias %2879, %1276, %1277 : !torch.vtensor<[40,64,576,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,64,576],f16>
%2881 = torch.aten.bmm %2878, %2880 : !torch.vtensor<[40,576,64],f16>, !torch.vtensor<[40,64,576],f16> -> !torch.vtensor<[40,576,576],f16>
%2882 = torch.aten.view %2881, %1280 : !torch.vtensor<[40,576,576],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,576],f16>
%2883 = torch.aten.permute %2882, %203 : !torch.vtensor<[40,576,1,576],f16>, !torch.list<int> -> !torch.vtensor<[40,576,576,1],f16>
%2884 = torch.aten.view %2883, %1283 : !torch.vtensor<[40,576,576,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,576],f16>
%2885 = torch.aten.mul.Tensor %2884, %0 : !torch.vtensor<[40,576,576],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[40,576,576],f16>
%2886 = torch.aten._softmax %2885, %int-1, %true : !torch.vtensor<[40,576,576],f16>, !torch.int, !torch.bool -> !torch.vtensor<[40,576,576],f32>
%2887 = torch.aten._to_copy %2886, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[40,576,576],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[40,576,576],f16>
%2888 = torch.aten.unsqueeze %2887, %int3 : !torch.vtensor<[40,576,576],f16>, !torch.int -> !torch.vtensor<[40,576,576,1],f16>
%2889 = torch.aten.permute %2888, %203 : !torch.vtensor<[40,576,576,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,576],f16>
%2890 = torch.aten.unsqueeze %2872, %int3 : !torch.vtensor<[40,576,64],f16>, !torch.int -> !torch.vtensor<[40,576,64,1],f16>
%2891 = torch.aten.permute %2890, %211 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,1,64,576],f16>
%2892 = torch.aten.permute %2889, %203 : !torch.vtensor<[40,576,1,576],f16>, !torch.list<int> -> !torch.vtensor<[40,576,576,1],f16>
%2893 = torch.aten._reshape_alias %2892, %1283, %1293 : !torch.vtensor<[40,576,576,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,576,576],f16>
%2894 = torch.aten.permute %2891, %211 : !torch.vtensor<[40,1,64,576],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64,1],f16>
%2895 = torch.aten._reshape_alias %2894, %1258, %1273 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
%2896 = torch.aten.bmm %2893, %2895 : !torch.vtensor<[40,576,576],f16>, !torch.vtensor<[40,576,64],f16> -> !torch.vtensor<[40,576,64],f16>
%2897 = torch.aten.view %2896, %1298 : !torch.vtensor<[40,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,64],f16>
%2898 = torch.aten.permute %2897, %203 : !torch.vtensor<[40,576,1,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64,1],f16>
%2899 = torch.aten.view %2898, %1258 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
%2900 = torch.aten._reshape_alias %2899, %1302, %1303 : !torch.vtensor<[40,576,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
%2901 = torch.aten.permute %2900, %189 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
%2902 = torch.aten.clone %2901, %int0 : !torch.vtensor<[2,576,20,64],f16>, !torch.int -> !torch.vtensor<[2,576,20,64],f16>
%2903 = torch.aten._unsafe_view %2902, %1220 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
%2904 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%2905 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%2906 = torch.aten.t %2905 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%2907 = torch.aten.view %2903, %1228 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
%2908 = torch.aten.addmm %2904, %2907, %2906, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,1280],f16>
%2909 = torch.aten.view %2908, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
%2910 = torch.aten.add.Tensor %2909, %2841, %int1 : !torch.vtensor<[2,576,1280],f16>, !torch.vtensor<[2,576,1280],f16>, !torch.int -> !torch.vtensor<[2,576,1280],f16>
%2911 = torch.aten._to_copy %2910, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f32>
%result0_165, %result1_166, %result2_167 = torch.aten.native_layer_norm %2911, %1233, %24, %24, %float1.000000e-05 : !torch.vtensor<[2,576,1280],f32>, !torch.list<int>, !torch.vtensor<[1280],f32>, !torch.vtensor<[1280],f32>, !torch.float -> !torch.vtensor<[2,576,1280],f32>, !torch.vtensor<[2,576,1],f32>, !torch.vtensor<[2,576,1],f32>
%2912 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%2913 = torch.aten._to_copy %result0_165, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
%2914 = torch.aten.t %2912 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%2915 = torch.aten._reshape_alias %2913, %1228, %1237 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
%2916 = torch.aten.mm %2915, %2914 : !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1152,1280],f16>
%2917 = torch.aten._unsafe_view %2916, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
%2918 = torch.aten._to_copy %15, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1024],f16>
%2919 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
%2920 = torch.aten.t %2918 : !torch.vtensor<[1280,1024],f16> -> !torch.vtensor<[1024,1280],f16>
%2921 = torch.aten._reshape_alias %2919, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
%2922 = torch.aten.mm %2921, %2920 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,1280],f16> -> !torch.vtensor<[154,1280],f16>
%2923 = torch.aten._unsafe_view %2922, %1327 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
%2924 = torch.aten._to_copy %15, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1024],f16>
%2925 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
%2926 = torch.aten.t %2924 : !torch.vtensor<[1280,1024],f16> -> !torch.vtensor<[1024,1280],f16>
%2927 = torch.aten._reshape_alias %2925, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
%2928 = torch.aten.mm %2927, %2926 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,1280],f16> -> !torch.vtensor<[154,1280],f16>
%2929 = torch.aten._unsafe_view %2928, %1327 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
%2930 = torch.aten._reshape_alias %2917, %1253, %1254 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
%2931 = torch.aten.permute %2930, %189 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
%2932 = torch.aten.clone %2931, %int0 : !torch.vtensor<[2,20,576,64],f16>, !torch.int -> !torch.vtensor<[2,20,576,64],f16>
%2933 = torch.aten._unsafe_view %2932, %1258 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
%2934 = torch.aten._reshape_alias %2923, %1339, %1340 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,20,64],f16>
%2935 = torch.aten.permute %2934, %189 : !torch.vtensor<[2,77,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,77,64],f16>
%2936 = torch.aten.clone %2935, %int0 : !torch.vtensor<[2,20,77,64],f16>, !torch.int -> !torch.vtensor<[2,20,77,64],f16>
%2937 = torch.aten._unsafe_view %2936, %1344 : !torch.vtensor<[2,20,77,64],f16>, !torch.list<int> -> !torch.vtensor<[40,77,64],f16>
%2938 = torch.aten._reshape_alias %2929, %1339, %1340 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,20,64],f16>
%2939 = torch.aten.permute %2938, %189 : !torch.vtensor<[2,77,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,77,64],f16>
%2940 = torch.aten.clone %2939, %int0 : !torch.vtensor<[2,20,77,64],f16>, !torch.int -> !torch.vtensor<[2,20,77,64],f16>
%2941 = torch.aten._unsafe_view %2940, %1344 : !torch.vtensor<[2,20,77,64],f16>, !torch.list<int> -> !torch.vtensor<[40,77,64],f16>
%2942 = torch.aten.unsqueeze %2933, %int3 : !torch.vtensor<[40,576,64],f16>, !torch.int -> !torch.vtensor<[40,576,64,1],f16>
%2943 = torch.aten.permute %2942, %203 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,64],f16>
%2944 = torch.aten.unsqueeze %2937, %int3 : !torch.vtensor<[40,77,64],f16>, !torch.int -> !torch.vtensor<[40,77,64,1],f16>
%2945 = torch.aten.permute %2944, %206 : !torch.vtensor<[40,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,1,77,64],f16>
%2946 = torch.aten.permute %2943, %203 : !torch.vtensor<[40,576,1,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64,1],f16>
%2947 = torch.aten._reshape_alias %2946, %1258, %1273 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
%2948 = torch.aten.permute %2945, %211 : !torch.vtensor<[40,1,77,64],f16>, !torch.list<int> -> !torch.vtensor<[40,64,77,1],f16>
%2949 = torch.aten._reshape_alias %2948, %1357, %297 : !torch.vtensor<[40,64,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,64,77],f16>
%2950 = torch.aten.bmm %2947, %2949 : !torch.vtensor<[40,576,64],f16>, !torch.vtensor<[40,64,77],f16> -> !torch.vtensor<[40,576,77],f16>
%2951 = torch.aten.view %2950, %1360 : !torch.vtensor<[40,576,77],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,77],f16>
%2952 = torch.aten.permute %2951, %203 : !torch.vtensor<[40,576,1,77],f16>, !torch.list<int> -> !torch.vtensor<[40,576,77,1],f16>
%2953 = torch.aten.view %2952, %1363 : !torch.vtensor<[40,576,77,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,77],f16>
%2954 = torch.aten.mul.Tensor %2953, %0 : !torch.vtensor<[40,576,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[40,576,77],f16>
%2955 = torch.aten._softmax %2954, %int-1, %true : !torch.vtensor<[40,576,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[40,576,77],f32>
%2956 = torch.aten._to_copy %2955, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[40,576,77],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[40,576,77],f16>
%2957 = torch.aten.unsqueeze %2956, %int3 : !torch.vtensor<[40,576,77],f16>, !torch.int -> !torch.vtensor<[40,576,77,1],f16>
%2958 = torch.aten.permute %2957, %203 : !torch.vtensor<[40,576,77,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,77],f16>
%2959 = torch.aten.unsqueeze %2941, %int3 : !torch.vtensor<[40,77,64],f16>, !torch.int -> !torch.vtensor<[40,77,64,1],f16>
%2960 = torch.aten.permute %2959, %211 : !torch.vtensor<[40,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,1,64,77],f16>
%2961 = torch.aten.permute %2958, %203 : !torch.vtensor<[40,576,1,77],f16>, !torch.list<int> -> !torch.vtensor<[40,576,77,1],f16>
%2962 = torch.aten._reshape_alias %2961, %1363, %1373 : !torch.vtensor<[40,576,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,576,77],f16>
%2963 = torch.aten.permute %2960, %211 : !torch.vtensor<[40,1,64,77],f16>, !torch.list<int> -> !torch.vtensor<[40,77,64,1],f16>
%2964 = torch.aten._reshape_alias %2963, %1344, %316 : !torch.vtensor<[40,77,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,77,64],f16>
%2965 = torch.aten.bmm %2962, %2964 : !torch.vtensor<[40,576,77],f16>, !torch.vtensor<[40,77,64],f16> -> !torch.vtensor<[40,576,64],f16>
%2966 = torch.aten.view %2965, %1298 : !torch.vtensor<[40,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,64],f16>
%2967 = torch.aten.permute %2966, %203 : !torch.vtensor<[40,576,1,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64,1],f16>
%2968 = torch.aten.view %2967, %1258 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
%2969 = torch.aten._reshape_alias %2968, %1302, %1303 : !torch.vtensor<[40,576,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
%2970 = torch.aten.permute %2969, %189 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
%2971 = torch.aten.clone %2970, %int0 : !torch.vtensor<[2,576,20,64],f16>, !torch.int -> !torch.vtensor<[2,576,20,64],f16>
%2972 = torch.aten._unsafe_view %2971, %1220 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
%2973 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%2974 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%2975 = torch.aten.t %2974 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%2976 = torch.aten.view %2972, %1228 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
%2977 = torch.aten.addmm %2973, %2976, %2975, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,1280],f16>
%2978 = torch.aten.view %2977, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
%2979 = torch.aten.add.Tensor %2978, %2910, %int1 : !torch.vtensor<[2,576,1280],f16>, !torch.vtensor<[2,576,1280],f16>, !torch.int -> !torch.vtensor<[2,576,1280],f16>
%2980 = torch.aten._to_copy %2979, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f32>
%result0_168, %result1_169, %result2_170 = torch.aten.native_layer_norm %2980, %1233, %24, %24, %float1.000000e-05 : !torch.vtensor<[2,576,1280],f32>, !torch.list<int>, !torch.vtensor<[1280],f32>, !torch.vtensor<[1280],f32>, !torch.float -> !torch.vtensor<[2,576,1280],f32>, !torch.vtensor<[2,576,1],f32>, !torch.vtensor<[2,576,1],f32>
%2981 = torch.aten._to_copy %16, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[10240],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[10240],f16>
%2982 = torch.aten._to_copy %17, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[10240,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[10240,1280],f16>
%2983 = torch.aten._to_copy %result0_168, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
%2984 = torch.aten.t %2982 : !torch.vtensor<[10240,1280],f16> -> !torch.vtensor<[1280,10240],f16>
%2985 = torch.aten.view %2983, %1228 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
%2986 = torch.aten.addmm %2981, %2985, %2984, %int1, %int1 : !torch.vtensor<[10240],f16>, !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,10240],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,10240],f16>
%2987 = torch.aten.view %2986, %1399 : !torch.vtensor<[1152,10240],f16>, !torch.list<int> -> !torch.vtensor<[2,576,10240],f16>
%2988 = torch.aten.slice.Tensor %2987, %int-1, %int0, %int5120, %int1 : !torch.vtensor<[2,576,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,576,5120],f16>
%2989 = torch.aten.slice.Tensor %2987, %int-1, %int5120, %int10240, %int1 : !torch.vtensor<[2,576,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,576,5120],f16>
%2990 = torch.aten.gelu %2989, %str : !torch.vtensor<[2,576,5120],f16>, !torch.str -> !torch.vtensor<[2,576,5120],f16>
%2991 = torch.aten.mul.Tensor %2988, %2990 : !torch.vtensor<[2,576,5120],f16>, !torch.vtensor<[2,576,5120],f16> -> !torch.vtensor<[2,576,5120],f16>
%2992 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%2993 = torch.aten._to_copy %18, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,5120],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,5120],f16>
%2994 = torch.aten.t %2993 : !torch.vtensor<[1280,5120],f16> -> !torch.vtensor<[5120,1280],f16>
%2995 = torch.aten.view %2991, %1408 : !torch.vtensor<[2,576,5120],f16>, !torch.list<int> -> !torch.vtensor<[1152,5120],f16>
%2996 = torch.aten.addmm %2992, %2995, %2994, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[1152,5120],f16>, !torch.vtensor<[5120,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,1280],f16>
%2997 = torch.aten.view %2996, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
%2998 = torch.aten.add.Tensor %2997, %2979, %int1 : !torch.vtensor<[2,576,1280],f16>, !torch.vtensor<[2,576,1280],f16>, !torch.int -> !torch.vtensor<[2,576,1280],f16>
%2999 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%3000 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
%3001 = torch.aten.t %3000 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
%3002 = torch.aten.view %2998, %1228 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
%3003 = torch.aten.addmm %2999, %3002, %3001, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,1280],f16>
%3004 = torch.aten.view %3003, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
%3005 = torch.aten._reshape_alias %3004, %1419, %1420 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,24,24,1280],f16>
%3006 = torch.aten.permute %3005, %206 : !torch.vtensor<[2,24,24,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,24,24],f16>
%3007 = torch.aten._reshape_alias %3006, %1183, %1423 : !torch.vtensor<[2,1280,24,24],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1280,24,24],f16>
%3008 = torch.aten.clone %3007, %int0 : !torch.vtensor<[2,1280,24,24],f16>, !torch.int -> !torch.vtensor<[2,1280,24,24],f16>
%3009 = torch.aten.add.Tensor %3008, %2815, %int1 : !torch.vtensor<[2,1280,24,24],f16>, !torch.vtensor<[2,1280,24,24],f16>, !torch.int -> !torch.vtensor<[2,1280,24,24],f16>
%3010 = torch.prim.ListConstruct %int48, %int48 : (!torch.int, !torch.int) -> !torch.list<int>
%3011 = torch.aten.upsample_nearest2d %3009, %3010, %float2.000000e00, %float2.000000e00 : !torch.vtensor<[2,1280,24,24],f16>, !torch.list<int>, !torch.float, !torch.float -> !torch.vtensor<[2,1280,48,48],f16>
%3012 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
%3013 = torch.aten._to_copy %20, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280,3,3],f16>
%3014 = torch.aten._convolution %3011, %3013, %3012, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,48,48],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,48,48],f16>
%3015 = torch.prim.ListConstruct %3014, %1140 : (!torch.vtensor<[2,1280,48,48],f16>, !torch.vtensor<[2,640,48,48],f16>) -> !torch.list<vtensor>
%3016 = torch.aten.cat %3015, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,1920,48,48],f16>
%3017 = torch.aten._to_copy %3016, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,1920,48,48],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1920,48,48],f32>
%3018 = torch.prim.ListConstruct %int2, %int32, %int60, %int2304 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3019 = torch.prim.ListConstruct %int4423680, %int138240, %int2304, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3020 = torch.aten._reshape_alias %3017, %3018, %3019 : !torch.vtensor<[2,1920,48,48],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,60,2304],f32>
%result0_171, %result1_172 = torch.aten.var_mean.correction %3020, %85, %int0, %true : !torch.vtensor<[2,32,60,2304],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%3021 = torch.aten.add.Tensor %result0_171, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%3022 = torch.aten.rsqrt %3021 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%3023 = torch.aten.sub.Tensor %3020, %result1_172, %int1 : !torch.vtensor<[2,32,60,2304],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,60,2304],f32>
%3024 = torch.aten.mul.Tensor %3023, %3022 : !torch.vtensor<[2,32,60,2304],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,60,2304],f32>
%3025 = torch.prim.ListConstruct %int2, %int1920, %int48, %int48 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3026 = torch.aten.view %3024, %3025 : !torch.vtensor<[2,32,60,2304],f32>, !torch.list<int> -> !torch.vtensor<[2,1920,48,48],f32>
%3027 = torch.aten.unsqueeze %21, %int0 : !torch.vtensor<[1920],f32>, !torch.int -> !torch.vtensor<[1,1920],f32>
%3028 = torch.aten.unsqueeze %3027, %int2 : !torch.vtensor<[1,1920],f32>, !torch.int -> !torch.vtensor<[1,1920,1],f32>
%3029 = torch.aten.unsqueeze %3028, %int3 : !torch.vtensor<[1,1920,1],f32>, !torch.int -> !torch.vtensor<[1,1920,1,1],f32>
%3030 = torch.aten.unsqueeze %21, %int0 : !torch.vtensor<[1920],f32>, !torch.int -> !torch.vtensor<[1,1920],f32>
%3031 = torch.aten.unsqueeze %3030, %int2 : !torch.vtensor<[1,1920],f32>, !torch.int -> !torch.vtensor<[1,1920,1],f32>
%3032 = torch.aten.unsqueeze %3031, %int3 : !torch.vtensor<[1,1920,1],f32>, !torch.int -> !torch.vtensor<[1,1920,1,1],f32>
%3033 = torch.aten.mul.Tensor %3026, %3032 : !torch.vtensor<[2,1920,48,48],f32>, !torch.vtensor<[1,1920,1,1],f32> -> !torch.vtensor<[2,1920,48,48],f32>
%3034 = torch.aten.add.Tensor %3033, %3029, %int1 : !torch.vtensor<[2,1920,48,48],f32>, !torch.vtensor<[1,1920,1,1],f32>, !torch.int -> !torch.vtensor<[2,1920,48,48],f32>
%3035 = torch.aten._to_copy %3034, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,1920,48,48],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1920,48,48],f16>
%3036 = torch.aten.silu %3035 : !torch.vtensor<[2,1920,48,48],f16> -> !torch.vtensor<[2,1920,48,48],f16>
%3037 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
%3038 = torch.aten._to_copy %22, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,1920,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,1920,3,3],f16>
%3039 = torch.aten._convolution %3036, %3038, %3037, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1920,48,48],f16>, !torch.vtensor<[640,1920,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,48,48],f16>
%3040 = torch.aten.silu %74 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%3041 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
%3042 = torch.aten._to_copy %28, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,1280],f16>
%3043 = torch.aten.t %3042 : !torch.vtensor<[640,1280],f16> -> !torch.vtensor<[1280,640],f16>
%3044 = torch.aten.addmm %3041, %3040, %3043, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
%3045 = torch.aten.unsqueeze %3044, %int2 : !torch.vtensor<[2,640],f16>, !torch.int -> !torch.vtensor<[2,640,1],f16>
%3046 = torch.aten.unsqueeze %3045, %int3 : !torch.vtensor<[2,640,1],f16>, !torch.int -> !torch.vtensor<[2,640,1,1],f16>
%3047 = torch.aten.add.Tensor %3039, %3046, %int1 : !torch.vtensor<[2,640,48,48],f16>, !torch.vtensor<[2,640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,48,48],f16>
%3048 = torch.aten._to_copy %3047, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,640,48,48],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,48,48],f32>
%3049 = torch.aten._reshape_alias %3048, %647, %648 : !torch.vtensor<[2,640,48,48],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,20,2304],f32>
%result0_173, %result1_174 = torch.aten.var_mean.correction %3049, %85, %int0, %true : !torch.vtensor<[2,32,20,2304],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%3050 = torch.aten.add.Tensor %result0_173, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%3051 = torch.aten.rsqrt %3050 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%3052 = torch.aten.sub.Tensor %3049, %result1_174, %int1 : !torch.vtensor<[2,32,20,2304],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,2304],f32>
%3053 = torch.aten.mul.Tensor %3052, %3051 : !torch.vtensor<[2,32,20,2304],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,2304],f32>
%3054 = torch.aten.view %3053, %654 : !torch.vtensor<[2,32,20,2304],f32>, !torch.list<int> -> !torch.vtensor<[2,640,48,48],f32>
%3055 = torch.aten.unsqueeze %39, %int0 : !torch.vtensor<[640],f32>, !torch.int -> !torch.vtensor<[1,640],f32>
%3056 = torch.aten.unsqueeze %3055, %int2 : !torch.vtensor<[1,640],f32>, !torch.int -> !torch.vtensor<[1,640,1],f32>
%3057 = torch.aten.unsqueeze %3056, %int3 : !torch.vtensor<[1,640,1],f32>, !torch.int -> !torch.vtensor<[1,640,1,1],f32>
%3058 = torch.aten.unsqueeze %39, %int0 : !torch.vtensor<[640],f32>, !torch.int -> !torch.vtensor<[1,640],f32>
%3059 = torch.aten.unsqueeze %3058, %int2 : !torch.vtensor<[1,640],f32>, !torch.int -> !torch.vtensor<[1,640,1],f32>
%3060 = torch.aten.unsqueeze %3059, %int3 : !torch.vtensor<[1,640,1],f32>, !torch.int -> !torch.vtensor<[1,640,1,1],f32>
%3061 = torch.aten.mul.Tensor %3054, %3060 : !torch.vtensor<[2,640,48,48],f32>, !torch.vtensor<[1,640,1,1],f32> -> !torch.vtensor<[2,640,48,48],f32>
%3062 = torch.aten.add.Tensor %3061, %3057, %int1 : !torch.vtensor<[2,640,48,48],f32>, !torch.vtensor<[1,640,1,1],f32>, !torch.int -> !torch.vtensor<[2,640,48,48],f32>
%3063 = torch.aten._to_copy %3062, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,640,48,48],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,48,48],f16>
%3064 = torch.aten.silu %3063 : !torch.vtensor<[2,640,48,48],f16> -> !torch.vtensor<[2,640,48,48],f16>
%3065 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
%3066 = torch.aten._to_copy %35, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640,3,3],f16>
%3067 = torch.aten._convolution %3064, %3066, %3065, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,48,48],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,48,48],f16>
%3068 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
%3069 = torch.aten._to_copy %23, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,1920,1,1],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,1920,1,1],f16>
%3070 = torch.aten._convolution %3016, %3069, %3068, %78, %79, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1920,48,48],f16>, !torch.vtensor<[640,1920,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,48,48],f16>
%3071 = torch.aten.add.Tensor %3070, %3067, %int1 : !torch.vtensor<[2,640,48,48],f16>, !torch.vtensor<[2,640,48,48],f16>, !torch.int -> !torch.vtensor<[2,640,48,48],f16>
%3072 = torch.aten._to_copy %3071, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,640,48,48],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,48,48],f32>
%3073 = torch.aten._reshape_alias %3072, %647, %648 : !torch.vtensor<[2,640,48,48],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,20,2304],f32>
%result0_175, %result1_176 = torch.aten.var_mean.correction %3073, %85, %int0, %true : !torch.vtensor<[2,32,20,2304],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%3074 = torch.aten.add.Tensor %result0_175, %1, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%3075 = torch.aten.rsqrt %3074 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%3076 = torch.aten.sub.Tensor %3073, %result1_176, %int1 : !torch.vtensor<[2,32,20,2304],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,2304],f32>
%3077 = torch.aten.mul.Tensor %3076, %3075 : !torch.vtensor<[2,32,20,2304],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,2304],f32>
%3078 = torch.aten.view %3077, %654 : !torch.vtensor<[2,32,20,2304],f32>, !torch.list<int> -> !torch.vtensor<[2,640,48,48],f32>
%3079 = torch.aten.unsqueeze %39, %int0 : !torch.vtensor<[640],f32>, !torch.int -> !torch.vtensor<[1,640],f32>
%3080 = torch.aten.unsqueeze %3079, %int2 : !torch.vtensor<[1,640],f32>, !torch.int -> !torch.vtensor<[1,640,1],f32>
%3081 = torch.aten.unsqueeze %3080, %int3 : !torch.vtensor<[1,640,1],f32>, !torch.int -> !torch.vtensor<[1,640,1,1],f32>
%3082 = torch.aten.unsqueeze %39, %int0 : !torch.vtensor<[640],f32>, !torch.int -> !torch.vtensor<[1,640],f32>
%3083 = torch.aten.unsqueeze %3082, %int2 : !torch.vtensor<[1,640],f32>, !torch.int -> !torch.vtensor<[1,640,1],f32>
%3084 = torch.aten.unsqueeze %3083, %int3 : !torch.vtensor<[1,640,1],f32>, !torch.int -> !torch.vtensor<[1,640,1,1],f32>
%3085 = torch.aten.mul.Tensor %3078, %3084 : !torch.vtensor<[2,640,48,48],f32>, !torch.vtensor<[1,640,1,1],f32> -> !torch.vtensor<[2,640,48,48],f32>
%3086 = torch.aten.add.Tensor %3085, %3081, %int1 : !torch.vtensor<[2,640,48,48],f32>, !torch.vtensor<[1,640,1,1],f32>, !torch.int -> !torch.vtensor<[2,640,48,48],f32>
%3087 = torch.aten._reshape_alias %3086, %654, %688 : !torch.vtensor<[2,640,48,48],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,640,48,48],f32>
%3088 = torch.aten.permute %3087, %151 : !torch.vtensor<[2,640,48,48],f32>, !torch.list<int> -> !torch.vtensor<[2,48,48,640],f32>
%3089 = torch.aten._reshape_alias %3088, %691, %692 : !torch.vtensor<[2,48,48,640],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f32>
%3090 = torch.aten.clone %3089, %int0 : !torch.vtensor<[2,2304,640],f32>, !torch.int -> !torch.vtensor<[2,2304,640],f32>
%3091 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
%3092 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
%3093 = torch.aten._to_copy %3090, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
%3094 = torch.aten.t %3092 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%3095 = torch.aten.view %3093, %699 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
%3096 = torch.aten.addmm %3091, %3095, %3094, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,640],f16>
%3097 = torch.aten.view %3096, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
%3098 = torch.aten._to_copy %3097, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f32>
%result0_177, %result1_178, %result2_179 = torch.aten.native_layer_norm %3098, %704, %39, %39, %float1.000000e-05 : !torch.vtensor<[2,2304,640],f32>, !torch.list<int>, !torch.vtensor<[640],f32>, !torch.vtensor<[640],f32>, !torch.float -> !torch.vtensor<[2,2304,640],f32>, !torch.vtensor<[2,2304,1],f32>, !torch.vtensor<[2,2304,1],f32>
%3099 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
%3100 = torch.aten._to_copy %result0_177, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
%3101 = torch.aten.t %3099 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%3102 = torch.aten._reshape_alias %3100, %699, %708 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
%3103 = torch.aten.mm %3102, %3101 : !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[4608,640],f16>
%3104 = torch.aten._unsafe_view %3103, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
%3105 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
%3106 = torch.aten._to_copy %result0_177, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
%3107 = torch.aten.t %3105 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%3108 = torch.aten._reshape_alias %3106, %699, %708 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
%3109 = torch.aten.mm %3108, %3107 : !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[4608,640],f16>
%3110 = torch.aten._unsafe_view %3109, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
%3111 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
%3112 = torch.aten._to_copy %result0_177, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
%3113 = torch.aten.t %3111 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%3114 = torch.aten._reshape_alias %3112, %699, %708 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
%3115 = torch.aten.mm %3114, %3113 : !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[4608,640],f16>
%3116 = torch.aten._unsafe_view %3115, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
%3117 = torch.aten._reshape_alias %3104, %724, %725 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
%3118 = torch.aten.permute %3117, %189 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
%3119 = torch.aten.clone %3118, %int0 : !torch.vtensor<[2,10,2304,64],f16>, !torch.int -> !torch.vtensor<[2,10,2304,64],f16>
%3120 = torch.aten._unsafe_view %3119, %729 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
%3121 = torch.aten._reshape_alias %3110, %724, %725 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
%3122 = torch.aten.permute %3121, %189 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
%3123 = torch.aten.clone %3122, %int0 : !torch.vtensor<[2,10,2304,64],f16>, !torch.int -> !torch.vtensor<[2,10,2304,64],f16>
%3124 = torch.aten._unsafe_view %3123, %729 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
%3125 = torch.aten._reshape_alias %3116, %724, %725 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
%3126 = torch.aten.permute %3125, %189 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
%3127 = torch.aten.clone %3126, %int0 : !torch.vtensor<[2,10,2304,64],f16>, !torch.int -> !torch.vtensor<[2,10,2304,64],f16>
%3128 = torch.aten._unsafe_view %3127, %729 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
%3129 = torch.aten.unsqueeze %3120, %int3 : !torch.vtensor<[20,2304,64],f16>, !torch.int -> !torch.vtensor<[20,2304,64,1],f16>
%3130 = torch.aten.permute %3129, %203 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,64],f16>
%3131 = torch.aten.unsqueeze %3124, %int3 : !torch.vtensor<[20,2304,64],f16>, !torch.int -> !torch.vtensor<[20,2304,64,1],f16>
%3132 = torch.aten.permute %3131, %206 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,1,2304,64],f16>
%3133 = torch.aten.permute %3130, %203 : !torch.vtensor<[20,2304,1,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64,1],f16>
%3134 = torch.aten._reshape_alias %3133, %729, %744 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
%3135 = torch.aten.permute %3132, %211 : !torch.vtensor<[20,1,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,64,2304,1],f16>
%3136 = torch.aten._reshape_alias %3135, %747, %748 : !torch.vtensor<[20,64,2304,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,64,2304],f16>
%3137 = torch.aten.bmm %3134, %3136 : !torch.vtensor<[20,2304,64],f16>, !torch.vtensor<[20,64,2304],f16> -> !torch.vtensor<[20,2304,2304],f16>
%3138 = torch.aten.view %3137, %751 : !torch.vtensor<[20,2304,2304],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,2304],f16>
%3139 = torch.aten.permute %3138, %203 : !torch.vtensor<[20,2304,1,2304],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,2304,1],f16>
%3140 = torch.aten.view %3139, %754 : !torch.vtensor<[20,2304,2304,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,2304],f16>
%3141 = torch.aten.mul.Tensor %3140, %0 : !torch.vtensor<[20,2304,2304],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[20,2304,2304],f16>
%3142 = torch.aten._softmax %3141, %int-1, %true : !torch.vtensor<[20,2304,2304],f16>, !torch.int, !torch.bool -> !torch.vtensor<[20,2304,2304],f32>
%3143 = torch.aten._to_copy %3142, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[20,2304,2304],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[20,2304,2304],f16>
%3144 = torch.aten.unsqueeze %3143, %int3 : !torch.vtensor<[20,2304,2304],f16>, !torch.int -> !torch.vtensor<[20,2304,2304,1],f16>
%3145 = torch.aten.permute %3144, %203 : !torch.vtensor<[20,2304,2304,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,2304],f16>
%3146 = torch.aten.unsqueeze %3128, %int3 : !torch.vtensor<[20,2304,64],f16>, !torch.int -> !torch.vtensor<[20,2304,64,1],f16>
%3147 = torch.aten.permute %3146, %211 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,1,64,2304],f16>
%3148 = torch.aten.permute %3145, %203 : !torch.vtensor<[20,2304,1,2304],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,2304,1],f16>
%3149 = torch.aten._reshape_alias %3148, %754, %764 : !torch.vtensor<[20,2304,2304,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,2304,2304],f16>
%3150 = torch.aten.permute %3147, %211 : !torch.vtensor<[20,1,64,2304],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64,1],f16>
%3151 = torch.aten._reshape_alias %3150, %729, %744 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
%3152 = torch.aten.bmm %3149, %3151 : !torch.vtensor<[20,2304,2304],f16>, !torch.vtensor<[20,2304,64],f16> -> !torch.vtensor<[20,2304,64],f16>
%3153 = torch.aten.view %3152, %769 : !torch.vtensor<[20,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,64],f16>
%3154 = torch.aten.permute %3153, %203 : !torch.vtensor<[20,2304,1,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64,1],f16>
%3155 = torch.aten.view %3154, %729 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
%3156 = torch.aten._reshape_alias %3155, %773, %774 : !torch.vtensor<[20,2304,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
%3157 = torch.aten.permute %3156, %189 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
%3158 = torch.aten.clone %3157, %int0 : !torch.vtensor<[2,2304,10,64],f16>, !torch.int -> !torch.vtensor<[2,2304,10,64],f16>
%3159 = torch.aten._unsafe_view %3158, %691 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
%3160 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
%3161 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
%3162 = torch.aten.t %3161 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%3163 = torch.aten.view %3159, %699 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
%3164 = torch.aten.addmm %3160, %3163, %3162, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,640],f16>
%3165 = torch.aten.view %3164, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
%3166 = torch.aten.add.Tensor %3165, %3097, %int1 : !torch.vtensor<[2,2304,640],f16>, !torch.vtensor<[2,2304,640],f16>, !torch.int -> !torch.vtensor<[2,2304,640],f16>
%3167 = torch.aten._to_copy %3166, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f32>
%result0_180, %result1_181, %result2_182 = torch.aten.native_layer_norm %3167, %704, %39, %39, %float1.000000e-05 : !torch.vtensor<[2,2304,640],f32>, !torch.list<int>, !torch.vtensor<[640],f32>, !torch.vtensor<[640],f32>, !torch.float -> !torch.vtensor<[2,2304,640],f32>, !torch.vtensor<[2,2304,1],f32>, !torch.vtensor<[2,2304,1],f32>
%3168 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
%3169 = torch.aten._to_copy %result0_180, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
%3170 = torch.aten.t %3168 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%3171 = torch.aten._reshape_alias %3169, %699, %708 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
%3172 = torch.aten.mm %3171, %3170 : !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[4608,640],f16>
%3173 = torch.aten._unsafe_view %3172, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
%3174 = torch.aten._to_copy %30, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,1024],f16>
%3175 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
%3176 = torch.aten.t %3174 : !torch.vtensor<[640,1024],f16> -> !torch.vtensor<[1024,640],f16>
%3177 = torch.aten._reshape_alias %3175, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
%3178 = torch.aten.mm %3177, %3176 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,640],f16> -> !torch.vtensor<[154,640],f16>
%3179 = torch.aten._unsafe_view %3178, %798 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
%3180 = torch.aten._to_copy %30, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,1024],f16>
%3181 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
%3182 = torch.aten.t %3180 : !torch.vtensor<[640,1024],f16> -> !torch.vtensor<[1024,640],f16>
%3183 = torch.aten._reshape_alias %3181, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
%3184 = torch.aten.mm %3183, %3182 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,640],f16> -> !torch.vtensor<[154,640],f16>
%3185 = torch.aten._unsafe_view %3184, %798 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
%3186 = torch.aten._reshape_alias %3173, %724, %725 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
%3187 = torch.aten.permute %3186, %189 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
%3188 = torch.aten.clone %3187, %int0 : !torch.vtensor<[2,10,2304,64],f16>, !torch.int -> !torch.vtensor<[2,10,2304,64],f16>
%3189 = torch.aten._unsafe_view %3188, %729 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
%3190 = torch.aten._reshape_alias %3179, %810, %811 : !torch.vtensor<[2,77,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,10,64],f16>
%3191 = torch.aten.permute %3190, %189 : !torch.vtensor<[2,77,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,77,64],f16>
%3192 = torch.aten.clone %3191, %int0 : !torch.vtensor<[2,10,77,64],f16>, !torch.int -> !torch.vtensor<[2,10,77,64],f16>
%3193 = torch.aten._unsafe_view %3192, %815 : !torch.vtensor<[2,10,77,64],f16>, !torch.list<int> -> !torch.vtensor<[20,77,64],f16>
%3194 = torch.aten._reshape_alias %3185, %810, %811 : !torch.vtensor<[2,77,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,10,64],f16>
%3195 = torch.aten.permute %3194, %189 : !torch.vtensor<[2,77,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,77,64],f16>
%3196 = torch.aten.clone %3195, %int0 : !torch.vtensor<[2,10,77,64],f16>, !torch.int -> !torch.vtensor<[2,10,77,64],f16>
%3197 = torch.aten._unsafe_view %3196, %815 : !torch.vtensor<[2,10,77,64],f16>, !torch.list<int> -> !torch.vtensor<[20,77,64],f16>
%3198 = torch.aten.unsqueeze %3189, %int3 : !torch.vtensor<[20,2304,64],f16>, !torch.int -> !torch.vtensor<[20,2304,64,1],f16>
%3199 = torch.aten.permute %3198, %203 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,64],f16>
%3200 = torch.aten.unsqueeze %3193, %int3 : !torch.vtensor<[20,77,64],f16>, !torch.int -> !torch.vtensor<[20,77,64,1],f16>
%3201 = torch.aten.permute %3200, %206 : !torch.vtensor<[20,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,1,77,64],f16>
%3202 = torch.aten.permute %3199, %203 : !torch.vtensor<[20,2304,1,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64,1],f16>
%3203 = torch.aten._reshape_alias %3202, %729, %744 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
%3204 = torch.aten.permute %3201, %211 : !torch.vtensor<[20,1,77,64],f16>, !torch.list<int> -> !torch.vtensor<[20,64,77,1],f16>
%3205 = torch.aten._reshape_alias %3204, %828, %297 : !torch.vtensor<[20,64,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,64,77],f16>
%3206 = torch.aten.bmm %3203, %3205 : !torch.vtensor<[20,2304,64],f16>, !torch.vtensor<[20,64,77],f16> -> !torch.vtensor<[20,2304,77],f16>
%3207 = torch.aten.view %3206, %831 : !torch.vtensor<[20,2304,77],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,77],f16>
%3208 = torch.aten.permute %3207, %203 : !torch.vtensor<[20,2304,1,77],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,77,1],f16>
%3209 = torch.aten.view %3208, %834 : !torch.vtensor<[20,2304,77,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,77],f16>
%3210 = torch.aten.mul.Tensor %3209, %0 : !torch.vtensor<[20,2304,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[20,2304,77],f16>
%3211 = torch.aten._softmax %3210, %int-1, %true : !torch.vtensor<[20,2304,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[20,2304,77],f32>
%3212 = torch.aten._to_copy %3211, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[20,2304,77],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[20,2304,77],f16>
%3213 = torch.aten.unsqueeze %3212, %int3 : !torch.vtensor<[20,2304,77],f16>, !torch.int -> !torch.vtensor<[20,2304,77,1],f16>
%3214 = torch.aten.permute %3213, %203 : !torch.vtensor<[20,2304,77,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,77],f16>
%3215 = torch.aten.unsqueeze %3197, %int3 : !torch.vtensor<[20,77,64],f16>, !torch.int -> !torch.vtensor<[20,77,64,1],f16>
%3216 = torch.aten.permute %3215, %211 : !torch.vtensor<[20,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,1,64,77],f16>
%3217 = torch.aten.permute %3214, %203 : !torch.vtensor<[20,2304,1,77],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,77,1],f16>
%3218 = torch.aten._reshape_alias %3217, %834, %844 : !torch.vtensor<[20,2304,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,2304,77],f16>
%3219 = torch.aten.permute %3216, %211 : !torch.vtensor<[20,1,64,77],f16>, !torch.list<int> -> !torch.vtensor<[20,77,64,1],f16>
%3220 = torch.aten._reshape_alias %3219, %815, %316 : !torch.vtensor<[20,77,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,77,64],f16>
%3221 = torch.aten.bmm %3218, %3220 : !torch.vtensor<[20,2304,77],f16>, !torch.vtensor<[20,77,64],f16> -> !torch.vtensor<[20,2304,64],f16>
%3222 = torch.aten.view %3221, %769 : !torch.vtensor<[20,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,64],f16>
%3223 = torch.aten.permute %3222, %203 : !torch.vtensor<[20,2304,1,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64,1],f16>
%3224 = torch.aten.view %3223, %729 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
%3225 = torch.aten._reshape_alias %3224, %773, %774 : !torch.vtensor<[20,2304,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
%3226 = torch.aten.permute %3225, %189 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
%3227 = torch.aten.clone %3226, %int0 : !torch.vtensor<[2,2304,10,64],f16>, !torch.int -> !torch.vtensor<[2,2304,10,64],f16>
%3228 = torch.aten._unsafe_view %3227, %691 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
%3229 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
%3230 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
%3231 = torch.aten.t %3230 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%3232 = torch.aten.view %3228, %699 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
%3233 = torch.aten.addmm %3229, %3232, %3231, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,640],f16>
%3234 = torch.aten.view %3233, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
%3235 = torch.aten.add.Tensor %3234, %3166, %int1 : !torch.vtensor<[2,2304,640],f16>, !torch.vtensor<[2,2304,640],f16>, !torch.int -> !torch.vtensor<[2,2304,640],f16>
%3236 = torch.aten._to_copy %3235, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f32>
%result0_183, %result1_184, %result2_185 = torch.aten.native_layer_norm %3236, %704, %39, %39, %float1.000000e-05 : !torch.vtensor<[2,2304,640],f32>, !torch.list<int>, !torch.vtensor<[640],f32>, !torch.vtensor<[640],f32>, !torch.float -> !torch.vtensor<[2,2304,640],f32>, !torch.vtensor<[2,2304,1],f32>, !torch.vtensor<[2,2304,1],f32>
%3237 = torch.aten._to_copy %31, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[5120],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[5120],f16>
%3238 = torch.aten._to_copy %32, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[5120,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[5120,640],f16>
%3239 = torch.aten._to_copy %result0_183, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
%3240 = torch.aten.t %3238 : !torch.vtensor<[5120,640],f16> -> !torch.vtensor<[640,5120],f16>
%3241 = torch.aten.view %3239, %699 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
%3242 = torch.aten.addmm %3237, %3241, %3240, %int1, %int1 : !torch.vtensor<[5120],f16>, !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,5120],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,5120],f16>
%3243 = torch.aten.view %3242, %870 : !torch.vtensor<[4608,5120],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,5120],f16>
%3244 = torch.aten.slice.Tensor %3243, %int-1, %int0, %int2560, %int1 : !torch.vtensor<[2,2304,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,2304,2560],f16>
%3245 = torch.aten.slice.Tensor %3243, %int-1, %int2560, %int5120, %int1 : !torch.vtensor<[2,2304,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,2304,2560],f16>
%3246 = torch.aten.gelu %3245, %str : !torch.vtensor<[2,2304,2560],f16>, !torch.str -> !torch.vtensor<[2,2304,2560],f16>
%3247 = torch.aten.mul.Tensor %3244, %3246 : !torch.vtensor<[2,2304,2560],f16>, !torch.vtensor<[2,2304,2560],f16> -> !torch.vtensor<[2,2304,2560],f16>
%3248 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
%3249 = torch.aten._to_copy %33, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,2560],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,2560],f16>
%3250 = torch.aten.t %3249 : !torch.vtensor<[640,2560],f16> -> !torch.vtensor<[2560,640],f16>
%3251 = torch.aten.view %3247, %879 : !torch.vtensor<[2,2304,2560],f16>, !torch.list<int> -> !torch.vtensor<[4608,2560],f16>
%3252 = torch.aten.addmm %3248, %3251, %3250, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[4608,2560],f16>, !torch.vtensor<[2560,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,640],f16>
%3253 = torch.aten.view %3252, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
%3254 = torch.aten.add.Tensor %3253, %3235, %int1 : !torch.vtensor<[2,2304,640],f16>, !torch.vtensor<[2,2304,640],f16>, !torch.int -> !torch.vtensor<[2,2304,640],f16>
%3255 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
%3256 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
%3257 = torch.aten.t %3256 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%3258 = torch.aten.view %3254, %699 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
%3259 = torch.aten.addmm %3255, %3258, %3257, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,640],f16>
%3260 = torch.aten.view %3259, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
%3261 = torch.aten._reshape_alias %3260, %890, %891 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,48,48,640],f16>
%3262 = torch.aten.permute %3261, %206 : !torch.vtensor<[2,48,48,640],f16>, !torch.list<int> -> !torch.vtensor<[2,640,48,48],f16>
%3263 = torch.aten._reshape_alias %3262, %654, %894 : !torch.vtensor<[2,640,48,48],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,640,48,48],f16>
%3264 = torch.aten.clone %3263, %int0 : !torch.vtensor<[2,640,48,48],f16>, !torch.int -> !torch.vtensor<[2,640,48,48],f16>
%3265 = torch.aten.add.Tensor %3264, %3071, %int1 : !torch.vtensor<[2,640,48,48],f16>, !torch.vtensor<[2,640,48,48],f16>, !torch.int -> !torch.vtensor<[2,640,48,48],f16>
%3266 = torch.prim.ListConstruct %3265, %897 : (!torch.vtensor<[2,640,48,48],f16>, !torch.vtensor<[2,640,48,48],f16>) -> !torch.list<vtensor>
%3267 = torch.aten.cat %3266, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,1280,48,48],f16>
%3268 = torch.aten._to_copy %3267, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,48,48],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,48,48],f32>
%3269 = torch.prim.ListConstruct %int2, %int32, %int40, %int2304 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3270 = torch.prim.ListConstruct %int2949120, %int92160, %int2304, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3271 = torch.aten._reshape_alias %3268, %3269, %3270 : !torch.vtensor<[2,1280,48,48],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,40,2304],f32>
%result0_186, %result1_187 = torch.aten.var_mean.correction %3271, %85, %int0, %true : !torch.vtensor<[2,32,40,2304],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%3272 = torch.aten.add.Tensor %result0_186, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%3273 = torch.aten.rsqrt %3272 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%3274 = torch.aten.sub.Tensor %3271, %result1_187, %int1 : !torch.vtensor<[2,32,40,2304],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,2304],f32>
%3275 = torch.aten.mul.Tensor %3274, %3273 : !torch.vtensor<[2,32,40,2304],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,2304],f32>
%3276 = torch.prim.ListConstruct %int2, %int1280, %int48, %int48 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3277 = torch.aten.view %3275, %3276 : !torch.vtensor<[2,32,40,2304],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,48,48],f32>
%3278 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
%3279 = torch.aten.unsqueeze %3278, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
%3280 = torch.aten.unsqueeze %3279, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
%3281 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
%3282 = torch.aten.unsqueeze %3281, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
%3283 = torch.aten.unsqueeze %3282, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
%3284 = torch.aten.mul.Tensor %3277, %3283 : !torch.vtensor<[2,1280,48,48],f32>, !torch.vtensor<[1,1280,1,1],f32> -> !torch.vtensor<[2,1280,48,48],f32>
%3285 = torch.aten.add.Tensor %3284, %3280, %int1 : !torch.vtensor<[2,1280,48,48],f32>, !torch.vtensor<[1,1280,1,1],f32>, !torch.int -> !torch.vtensor<[2,1280,48,48],f32>
%3286 = torch.aten._to_copy %3285, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,1280,48,48],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,48,48],f16>
%3287 = torch.aten.silu %3286 : !torch.vtensor<[2,1280,48,48],f16> -> !torch.vtensor<[2,1280,48,48],f16>
%3288 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
%3289 = torch.aten._to_copy %25, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,1280,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,1280,3,3],f16>
%3290 = torch.aten._convolution %3287, %3289, %3288, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,48,48],f16>, !torch.vtensor<[640,1280,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,48,48],f16>
%3291 = torch.aten.silu %74 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%3292 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
%3293 = torch.aten._to_copy %28, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,1280],f16>
%3294 = torch.aten.t %3293 : !torch.vtensor<[640,1280],f16> -> !torch.vtensor<[1280,640],f16>
%3295 = torch.aten.addmm %3292, %3291, %3294, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
%3296 = torch.aten.unsqueeze %3295, %int2 : !torch.vtensor<[2,640],f16>, !torch.int -> !torch.vtensor<[2,640,1],f16>
%3297 = torch.aten.unsqueeze %3296, %int3 : !torch.vtensor<[2,640,1],f16>, !torch.int -> !torch.vtensor<[2,640,1,1],f16>
%3298 = torch.aten.add.Tensor %3290, %3297, %int1 : !torch.vtensor<[2,640,48,48],f16>, !torch.vtensor<[2,640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,48,48],f16>
%3299 = torch.aten._to_copy %3298, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,640,48,48],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,48,48],f32>
%3300 = torch.aten._reshape_alias %3299, %647, %648 : !torch.vtensor<[2,640,48,48],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,20,2304],f32>
%result0_188, %result1_189 = torch.aten.var_mean.correction %3300, %85, %int0, %true : !torch.vtensor<[2,32,20,2304],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%3301 = torch.aten.add.Tensor %result0_188, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%3302 = torch.aten.rsqrt %3301 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%3303 = torch.aten.sub.Tensor %3300, %result1_189, %int1 : !torch.vtensor<[2,32,20,2304],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,2304],f32>
%3304 = torch.aten.mul.Tensor %3303, %3302 : !torch.vtensor<[2,32,20,2304],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,2304],f32>
%3305 = torch.aten.view %3304, %654 : !torch.vtensor<[2,32,20,2304],f32>, !torch.list<int> -> !torch.vtensor<[2,640,48,48],f32>
%3306 = torch.aten.unsqueeze %39, %int0 : !torch.vtensor<[640],f32>, !torch.int -> !torch.vtensor<[1,640],f32>
%3307 = torch.aten.unsqueeze %3306, %int2 : !torch.vtensor<[1,640],f32>, !torch.int -> !torch.vtensor<[1,640,1],f32>
%3308 = torch.aten.unsqueeze %3307, %int3 : !torch.vtensor<[1,640,1],f32>, !torch.int -> !torch.vtensor<[1,640,1,1],f32>
%3309 = torch.aten.unsqueeze %39, %int0 : !torch.vtensor<[640],f32>, !torch.int -> !torch.vtensor<[1,640],f32>
%3310 = torch.aten.unsqueeze %3309, %int2 : !torch.vtensor<[1,640],f32>, !torch.int -> !torch.vtensor<[1,640,1],f32>
%3311 = torch.aten.unsqueeze %3310, %int3 : !torch.vtensor<[1,640,1],f32>, !torch.int -> !torch.vtensor<[1,640,1,1],f32>
%3312 = torch.aten.mul.Tensor %3305, %3311 : !torch.vtensor<[2,640,48,48],f32>, !torch.vtensor<[1,640,1,1],f32> -> !torch.vtensor<[2,640,48,48],f32>
%3313 = torch.aten.add.Tensor %3312, %3308, %int1 : !torch.vtensor<[2,640,48,48],f32>, !torch.vtensor<[1,640,1,1],f32>, !torch.int -> !torch.vtensor<[2,640,48,48],f32>
%3314 = torch.aten._to_copy %3313, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,640,48,48],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,48,48],f16>
%3315 = torch.aten.silu %3314 : !torch.vtensor<[2,640,48,48],f16> -> !torch.vtensor<[2,640,48,48],f16>
%3316 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
%3317 = torch.aten._to_copy %35, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640,3,3],f16>
%3318 = torch.aten._convolution %3315, %3317, %3316, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,48,48],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,48,48],f16>
%3319 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
%3320 = torch.aten._to_copy %26, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,1280,1,1],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,1280,1,1],f16>
%3321 = torch.aten._convolution %3267, %3320, %3319, %78, %79, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,48,48],f16>, !torch.vtensor<[640,1280,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,48,48],f16>
%3322 = torch.aten.add.Tensor %3321, %3318, %int1 : !torch.vtensor<[2,640,48,48],f16>, !torch.vtensor<[2,640,48,48],f16>, !torch.int -> !torch.vtensor<[2,640,48,48],f16>
%3323 = torch.aten._to_copy %3322, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,640,48,48],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,48,48],f32>
%3324 = torch.aten._reshape_alias %3323, %647, %648 : !torch.vtensor<[2,640,48,48],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,20,2304],f32>
%result0_190, %result1_191 = torch.aten.var_mean.correction %3324, %85, %int0, %true : !torch.vtensor<[2,32,20,2304],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%3325 = torch.aten.add.Tensor %result0_190, %1, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%3326 = torch.aten.rsqrt %3325 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%3327 = torch.aten.sub.Tensor %3324, %result1_191, %int1 : !torch.vtensor<[2,32,20,2304],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,2304],f32>
%3328 = torch.aten.mul.Tensor %3327, %3326 : !torch.vtensor<[2,32,20,2304],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,2304],f32>
%3329 = torch.aten.view %3328, %654 : !torch.vtensor<[2,32,20,2304],f32>, !torch.list<int> -> !torch.vtensor<[2,640,48,48],f32>
%3330 = torch.aten.unsqueeze %39, %int0 : !torch.vtensor<[640],f32>, !torch.int -> !torch.vtensor<[1,640],f32>
%3331 = torch.aten.unsqueeze %3330, %int2 : !torch.vtensor<[1,640],f32>, !torch.int -> !torch.vtensor<[1,640,1],f32>
%3332 = torch.aten.unsqueeze %3331, %int3 : !torch.vtensor<[1,640,1],f32>, !torch.int -> !torch.vtensor<[1,640,1,1],f32>
%3333 = torch.aten.unsqueeze %39, %int0 : !torch.vtensor<[640],f32>, !torch.int -> !torch.vtensor<[1,640],f32>
%3334 = torch.aten.unsqueeze %3333, %int2 : !torch.vtensor<[1,640],f32>, !torch.int -> !torch.vtensor<[1,640,1],f32>
%3335 = torch.aten.unsqueeze %3334, %int3 : !torch.vtensor<[1,640,1],f32>, !torch.int -> !torch.vtensor<[1,640,1,1],f32>
%3336 = torch.aten.mul.Tensor %3329, %3335 : !torch.vtensor<[2,640,48,48],f32>, !torch.vtensor<[1,640,1,1],f32> -> !torch.vtensor<[2,640,48,48],f32>
%3337 = torch.aten.add.Tensor %3336, %3332, %int1 : !torch.vtensor<[2,640,48,48],f32>, !torch.vtensor<[1,640,1,1],f32>, !torch.int -> !torch.vtensor<[2,640,48,48],f32>
%3338 = torch.aten._reshape_alias %3337, %654, %688 : !torch.vtensor<[2,640,48,48],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,640,48,48],f32>
%3339 = torch.aten.permute %3338, %151 : !torch.vtensor<[2,640,48,48],f32>, !torch.list<int> -> !torch.vtensor<[2,48,48,640],f32>
%3340 = torch.aten._reshape_alias %3339, %691, %692 : !torch.vtensor<[2,48,48,640],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f32>
%3341 = torch.aten.clone %3340, %int0 : !torch.vtensor<[2,2304,640],f32>, !torch.int -> !torch.vtensor<[2,2304,640],f32>
%3342 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
%3343 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
%3344 = torch.aten._to_copy %3341, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
%3345 = torch.aten.t %3343 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%3346 = torch.aten.view %3344, %699 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
%3347 = torch.aten.addmm %3342, %3346, %3345, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,640],f16>
%3348 = torch.aten.view %3347, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
%3349 = torch.aten._to_copy %3348, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f32>
%result0_192, %result1_193, %result2_194 = torch.aten.native_layer_norm %3349, %704, %39, %39, %float1.000000e-05 : !torch.vtensor<[2,2304,640],f32>, !torch.list<int>, !torch.vtensor<[640],f32>, !torch.vtensor<[640],f32>, !torch.float -> !torch.vtensor<[2,2304,640],f32>, !torch.vtensor<[2,2304,1],f32>, !torch.vtensor<[2,2304,1],f32>
%3350 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
%3351 = torch.aten._to_copy %result0_192, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
%3352 = torch.aten.t %3350 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%3353 = torch.aten._reshape_alias %3351, %699, %708 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
%3354 = torch.aten.mm %3353, %3352 : !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[4608,640],f16>
%3355 = torch.aten._unsafe_view %3354, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
%3356 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
%3357 = torch.aten._to_copy %result0_192, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
%3358 = torch.aten.t %3356 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%3359 = torch.aten._reshape_alias %3357, %699, %708 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
%3360 = torch.aten.mm %3359, %3358 : !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[4608,640],f16>
%3361 = torch.aten._unsafe_view %3360, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
%3362 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
%3363 = torch.aten._to_copy %result0_192, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
%3364 = torch.aten.t %3362 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%3365 = torch.aten._reshape_alias %3363, %699, %708 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
%3366 = torch.aten.mm %3365, %3364 : !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[4608,640],f16>
%3367 = torch.aten._unsafe_view %3366, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
%3368 = torch.aten._reshape_alias %3355, %724, %725 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
%3369 = torch.aten.permute %3368, %189 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
%3370 = torch.aten.clone %3369, %int0 : !torch.vtensor<[2,10,2304,64],f16>, !torch.int -> !torch.vtensor<[2,10,2304,64],f16>
%3371 = torch.aten._unsafe_view %3370, %729 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
%3372 = torch.aten._reshape_alias %3361, %724, %725 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
%3373 = torch.aten.permute %3372, %189 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
%3374 = torch.aten.clone %3373, %int0 : !torch.vtensor<[2,10,2304,64],f16>, !torch.int -> !torch.vtensor<[2,10,2304,64],f16>
%3375 = torch.aten._unsafe_view %3374, %729 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
%3376 = torch.aten._reshape_alias %3367, %724, %725 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
%3377 = torch.aten.permute %3376, %189 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
%3378 = torch.aten.clone %3377, %int0 : !torch.vtensor<[2,10,2304,64],f16>, !torch.int -> !torch.vtensor<[2,10,2304,64],f16>
%3379 = torch.aten._unsafe_view %3378, %729 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
%3380 = torch.aten.unsqueeze %3371, %int3 : !torch.vtensor<[20,2304,64],f16>, !torch.int -> !torch.vtensor<[20,2304,64,1],f16>
%3381 = torch.aten.permute %3380, %203 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,64],f16>
%3382 = torch.aten.unsqueeze %3375, %int3 : !torch.vtensor<[20,2304,64],f16>, !torch.int -> !torch.vtensor<[20,2304,64,1],f16>
%3383 = torch.aten.permute %3382, %206 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,1,2304,64],f16>
%3384 = torch.aten.permute %3381, %203 : !torch.vtensor<[20,2304,1,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64,1],f16>
%3385 = torch.aten._reshape_alias %3384, %729, %744 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
%3386 = torch.aten.permute %3383, %211 : !torch.vtensor<[20,1,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,64,2304,1],f16>
%3387 = torch.aten._reshape_alias %3386, %747, %748 : !torch.vtensor<[20,64,2304,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,64,2304],f16>
%3388 = torch.aten.bmm %3385, %3387 : !torch.vtensor<[20,2304,64],f16>, !torch.vtensor<[20,64,2304],f16> -> !torch.vtensor<[20,2304,2304],f16>
%3389 = torch.aten.view %3388, %751 : !torch.vtensor<[20,2304,2304],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,2304],f16>
%3390 = torch.aten.permute %3389, %203 : !torch.vtensor<[20,2304,1,2304],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,2304,1],f16>
%3391 = torch.aten.view %3390, %754 : !torch.vtensor<[20,2304,2304,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,2304],f16>
%3392 = torch.aten.mul.Tensor %3391, %0 : !torch.vtensor<[20,2304,2304],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[20,2304,2304],f16>
%3393 = torch.aten._softmax %3392, %int-1, %true : !torch.vtensor<[20,2304,2304],f16>, !torch.int, !torch.bool -> !torch.vtensor<[20,2304,2304],f32>
%3394 = torch.aten._to_copy %3393, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[20,2304,2304],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[20,2304,2304],f16>
%3395 = torch.aten.unsqueeze %3394, %int3 : !torch.vtensor<[20,2304,2304],f16>, !torch.int -> !torch.vtensor<[20,2304,2304,1],f16>
%3396 = torch.aten.permute %3395, %203 : !torch.vtensor<[20,2304,2304,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,2304],f16>
%3397 = torch.aten.unsqueeze %3379, %int3 : !torch.vtensor<[20,2304,64],f16>, !torch.int -> !torch.vtensor<[20,2304,64,1],f16>
%3398 = torch.aten.permute %3397, %211 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,1,64,2304],f16>
%3399 = torch.aten.permute %3396, %203 : !torch.vtensor<[20,2304,1,2304],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,2304,1],f16>
%3400 = torch.aten._reshape_alias %3399, %754, %764 : !torch.vtensor<[20,2304,2304,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,2304,2304],f16>
%3401 = torch.aten.permute %3398, %211 : !torch.vtensor<[20,1,64,2304],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64,1],f16>
%3402 = torch.aten._reshape_alias %3401, %729, %744 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
%3403 = torch.aten.bmm %3400, %3402 : !torch.vtensor<[20,2304,2304],f16>, !torch.vtensor<[20,2304,64],f16> -> !torch.vtensor<[20,2304,64],f16>
%3404 = torch.aten.view %3403, %769 : !torch.vtensor<[20,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,64],f16>
%3405 = torch.aten.permute %3404, %203 : !torch.vtensor<[20,2304,1,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64,1],f16>
%3406 = torch.aten.view %3405, %729 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
%3407 = torch.aten._reshape_alias %3406, %773, %774 : !torch.vtensor<[20,2304,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
%3408 = torch.aten.permute %3407, %189 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
%3409 = torch.aten.clone %3408, %int0 : !torch.vtensor<[2,2304,10,64],f16>, !torch.int -> !torch.vtensor<[2,2304,10,64],f16>
%3410 = torch.aten._unsafe_view %3409, %691 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
%3411 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
%3412 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
%3413 = torch.aten.t %3412 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%3414 = torch.aten.view %3410, %699 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
%3415 = torch.aten.addmm %3411, %3414, %3413, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,640],f16>
%3416 = torch.aten.view %3415, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
%3417 = torch.aten.add.Tensor %3416, %3348, %int1 : !torch.vtensor<[2,2304,640],f16>, !torch.vtensor<[2,2304,640],f16>, !torch.int -> !torch.vtensor<[2,2304,640],f16>
%3418 = torch.aten._to_copy %3417, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f32>
%result0_195, %result1_196, %result2_197 = torch.aten.native_layer_norm %3418, %704, %39, %39, %float1.000000e-05 : !torch.vtensor<[2,2304,640],f32>, !torch.list<int>, !torch.vtensor<[640],f32>, !torch.vtensor<[640],f32>, !torch.float -> !torch.vtensor<[2,2304,640],f32>, !torch.vtensor<[2,2304,1],f32>, !torch.vtensor<[2,2304,1],f32>
%3419 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
%3420 = torch.aten._to_copy %result0_195, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
%3421 = torch.aten.t %3419 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%3422 = torch.aten._reshape_alias %3420, %699, %708 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
%3423 = torch.aten.mm %3422, %3421 : !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[4608,640],f16>
%3424 = torch.aten._unsafe_view %3423, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
%3425 = torch.aten._to_copy %30, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,1024],f16>
%3426 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
%3427 = torch.aten.t %3425 : !torch.vtensor<[640,1024],f16> -> !torch.vtensor<[1024,640],f16>
%3428 = torch.aten._reshape_alias %3426, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
%3429 = torch.aten.mm %3428, %3427 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,640],f16> -> !torch.vtensor<[154,640],f16>
%3430 = torch.aten._unsafe_view %3429, %798 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
%3431 = torch.aten._to_copy %30, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,1024],f16>
%3432 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
%3433 = torch.aten.t %3431 : !torch.vtensor<[640,1024],f16> -> !torch.vtensor<[1024,640],f16>
%3434 = torch.aten._reshape_alias %3432, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
%3435 = torch.aten.mm %3434, %3433 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,640],f16> -> !torch.vtensor<[154,640],f16>
%3436 = torch.aten._unsafe_view %3435, %798 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
%3437 = torch.aten._reshape_alias %3424, %724, %725 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
%3438 = torch.aten.permute %3437, %189 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
%3439 = torch.aten.clone %3438, %int0 : !torch.vtensor<[2,10,2304,64],f16>, !torch.int -> !torch.vtensor<[2,10,2304,64],f16>
%3440 = torch.aten._unsafe_view %3439, %729 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
%3441 = torch.aten._reshape_alias %3430, %810, %811 : !torch.vtensor<[2,77,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,10,64],f16>
%3442 = torch.aten.permute %3441, %189 : !torch.vtensor<[2,77,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,77,64],f16>
%3443 = torch.aten.clone %3442, %int0 : !torch.vtensor<[2,10,77,64],f16>, !torch.int -> !torch.vtensor<[2,10,77,64],f16>
%3444 = torch.aten._unsafe_view %3443, %815 : !torch.vtensor<[2,10,77,64],f16>, !torch.list<int> -> !torch.vtensor<[20,77,64],f16>
%3445 = torch.aten._reshape_alias %3436, %810, %811 : !torch.vtensor<[2,77,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,10,64],f16>
%3446 = torch.aten.permute %3445, %189 : !torch.vtensor<[2,77,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,77,64],f16>
%3447 = torch.aten.clone %3446, %int0 : !torch.vtensor<[2,10,77,64],f16>, !torch.int -> !torch.vtensor<[2,10,77,64],f16>
%3448 = torch.aten._unsafe_view %3447, %815 : !torch.vtensor<[2,10,77,64],f16>, !torch.list<int> -> !torch.vtensor<[20,77,64],f16>
%3449 = torch.aten.unsqueeze %3440, %int3 : !torch.vtensor<[20,2304,64],f16>, !torch.int -> !torch.vtensor<[20,2304,64,1],f16>
%3450 = torch.aten.permute %3449, %203 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,64],f16>
%3451 = torch.aten.unsqueeze %3444, %int3 : !torch.vtensor<[20,77,64],f16>, !torch.int -> !torch.vtensor<[20,77,64,1],f16>
%3452 = torch.aten.permute %3451, %206 : !torch.vtensor<[20,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,1,77,64],f16>
%3453 = torch.aten.permute %3450, %203 : !torch.vtensor<[20,2304,1,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64,1],f16>
%3454 = torch.aten._reshape_alias %3453, %729, %744 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
%3455 = torch.aten.permute %3452, %211 : !torch.vtensor<[20,1,77,64],f16>, !torch.list<int> -> !torch.vtensor<[20,64,77,1],f16>
%3456 = torch.aten._reshape_alias %3455, %828, %297 : !torch.vtensor<[20,64,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,64,77],f16>
%3457 = torch.aten.bmm %3454, %3456 : !torch.vtensor<[20,2304,64],f16>, !torch.vtensor<[20,64,77],f16> -> !torch.vtensor<[20,2304,77],f16>
%3458 = torch.aten.view %3457, %831 : !torch.vtensor<[20,2304,77],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,77],f16>
%3459 = torch.aten.permute %3458, %203 : !torch.vtensor<[20,2304,1,77],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,77,1],f16>
%3460 = torch.aten.view %3459, %834 : !torch.vtensor<[20,2304,77,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,77],f16>
%3461 = torch.aten.mul.Tensor %3460, %0 : !torch.vtensor<[20,2304,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[20,2304,77],f16>
%3462 = torch.aten._softmax %3461, %int-1, %true : !torch.vtensor<[20,2304,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[20,2304,77],f32>
%3463 = torch.aten._to_copy %3462, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[20,2304,77],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[20,2304,77],f16>
%3464 = torch.aten.unsqueeze %3463, %int3 : !torch.vtensor<[20,2304,77],f16>, !torch.int -> !torch.vtensor<[20,2304,77,1],f16>
%3465 = torch.aten.permute %3464, %203 : !torch.vtensor<[20,2304,77,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,77],f16>
%3466 = torch.aten.unsqueeze %3448, %int3 : !torch.vtensor<[20,77,64],f16>, !torch.int -> !torch.vtensor<[20,77,64,1],f16>
%3467 = torch.aten.permute %3466, %211 : !torch.vtensor<[20,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,1,64,77],f16>
%3468 = torch.aten.permute %3465, %203 : !torch.vtensor<[20,2304,1,77],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,77,1],f16>
%3469 = torch.aten._reshape_alias %3468, %834, %844 : !torch.vtensor<[20,2304,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,2304,77],f16>
%3470 = torch.aten.permute %3467, %211 : !torch.vtensor<[20,1,64,77],f16>, !torch.list<int> -> !torch.vtensor<[20,77,64,1],f16>
%3471 = torch.aten._reshape_alias %3470, %815, %316 : !torch.vtensor<[20,77,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,77,64],f16>
%3472 = torch.aten.bmm %3469, %3471 : !torch.vtensor<[20,2304,77],f16>, !torch.vtensor<[20,77,64],f16> -> !torch.vtensor<[20,2304,64],f16>
%3473 = torch.aten.view %3472, %769 : !torch.vtensor<[20,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,64],f16>
%3474 = torch.aten.permute %3473, %203 : !torch.vtensor<[20,2304,1,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64,1],f16>
%3475 = torch.aten.view %3474, %729 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
%3476 = torch.aten._reshape_alias %3475, %773, %774 : !torch.vtensor<[20,2304,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
%3477 = torch.aten.permute %3476, %189 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
%3478 = torch.aten.clone %3477, %int0 : !torch.vtensor<[2,2304,10,64],f16>, !torch.int -> !torch.vtensor<[2,2304,10,64],f16>
%3479 = torch.aten._unsafe_view %3478, %691 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
%3480 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
%3481 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
%3482 = torch.aten.t %3481 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%3483 = torch.aten.view %3479, %699 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
%3484 = torch.aten.addmm %3480, %3483, %3482, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,640],f16>
%3485 = torch.aten.view %3484, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
%3486 = torch.aten.add.Tensor %3485, %3417, %int1 : !torch.vtensor<[2,2304,640],f16>, !torch.vtensor<[2,2304,640],f16>, !torch.int -> !torch.vtensor<[2,2304,640],f16>
%3487 = torch.aten._to_copy %3486, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f32>
%result0_198, %result1_199, %result2_200 = torch.aten.native_layer_norm %3487, %704, %39, %39, %float1.000000e-05 : !torch.vtensor<[2,2304,640],f32>, !torch.list<int>, !torch.vtensor<[640],f32>, !torch.vtensor<[640],f32>, !torch.float -> !torch.vtensor<[2,2304,640],f32>, !torch.vtensor<[2,2304,1],f32>, !torch.vtensor<[2,2304,1],f32>
%3488 = torch.aten._to_copy %31, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[5120],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[5120],f16>
%3489 = torch.aten._to_copy %32, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[5120,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[5120,640],f16>
%3490 = torch.aten._to_copy %result0_198, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
%3491 = torch.aten.t %3489 : !torch.vtensor<[5120,640],f16> -> !torch.vtensor<[640,5120],f16>
%3492 = torch.aten.view %3490, %699 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
%3493 = torch.aten.addmm %3488, %3492, %3491, %int1, %int1 : !torch.vtensor<[5120],f16>, !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,5120],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,5120],f16>
%3494 = torch.aten.view %3493, %870 : !torch.vtensor<[4608,5120],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,5120],f16>
%3495 = torch.aten.slice.Tensor %3494, %int-1, %int0, %int2560, %int1 : !torch.vtensor<[2,2304,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,2304,2560],f16>
%3496 = torch.aten.slice.Tensor %3494, %int-1, %int2560, %int5120, %int1 : !torch.vtensor<[2,2304,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,2304,2560],f16>
%3497 = torch.aten.gelu %3496, %str : !torch.vtensor<[2,2304,2560],f16>, !torch.str -> !torch.vtensor<[2,2304,2560],f16>
%3498 = torch.aten.mul.Tensor %3495, %3497 : !torch.vtensor<[2,2304,2560],f16>, !torch.vtensor<[2,2304,2560],f16> -> !torch.vtensor<[2,2304,2560],f16>
%3499 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
%3500 = torch.aten._to_copy %33, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,2560],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,2560],f16>
%3501 = torch.aten.t %3500 : !torch.vtensor<[640,2560],f16> -> !torch.vtensor<[2560,640],f16>
%3502 = torch.aten.view %3498, %879 : !torch.vtensor<[2,2304,2560],f16>, !torch.list<int> -> !torch.vtensor<[4608,2560],f16>
%3503 = torch.aten.addmm %3499, %3502, %3501, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[4608,2560],f16>, !torch.vtensor<[2560,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,640],f16>
%3504 = torch.aten.view %3503, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
%3505 = torch.aten.add.Tensor %3504, %3486, %int1 : !torch.vtensor<[2,2304,640],f16>, !torch.vtensor<[2,2304,640],f16>, !torch.int -> !torch.vtensor<[2,2304,640],f16>
%3506 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
%3507 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
%3508 = torch.aten.t %3507 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%3509 = torch.aten.view %3505, %699 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
%3510 = torch.aten.addmm %3506, %3509, %3508, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,640],f16>
%3511 = torch.aten.view %3510, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
%3512 = torch.aten._reshape_alias %3511, %890, %891 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,48,48,640],f16>
%3513 = torch.aten.permute %3512, %206 : !torch.vtensor<[2,48,48,640],f16>, !torch.list<int> -> !torch.vtensor<[2,640,48,48],f16>
%3514 = torch.aten._reshape_alias %3513, %654, %894 : !torch.vtensor<[2,640,48,48],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,640,48,48],f16>
%3515 = torch.aten.clone %3514, %int0 : !torch.vtensor<[2,640,48,48],f16>, !torch.int -> !torch.vtensor<[2,640,48,48],f16>
%3516 = torch.aten.add.Tensor %3515, %3322, %int1 : !torch.vtensor<[2,640,48,48],f16>, !torch.vtensor<[2,640,48,48],f16>, !torch.int -> !torch.vtensor<[2,640,48,48],f16>
%3517 = torch.prim.ListConstruct %3516, %614 : (!torch.vtensor<[2,640,48,48],f16>, !torch.vtensor<[2,320,48,48],f16>) -> !torch.list<vtensor>
%3518 = torch.aten.cat %3517, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,960,48,48],f16>
%3519 = torch.aten._to_copy %3518, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,960,48,48],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,960,48,48],f32>
%3520 = torch.prim.ListConstruct %int2, %int32, %int30, %int2304 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3521 = torch.prim.ListConstruct %int2211840, %int69120, %int2304, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3522 = torch.aten._reshape_alias %3519, %3520, %3521 : !torch.vtensor<[2,960,48,48],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,30,2304],f32>
%result0_201, %result1_202 = torch.aten.var_mean.correction %3522, %85, %int0, %true : !torch.vtensor<[2,32,30,2304],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%3523 = torch.aten.add.Tensor %result0_201, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%3524 = torch.aten.rsqrt %3523 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%3525 = torch.aten.sub.Tensor %3522, %result1_202, %int1 : !torch.vtensor<[2,32,30,2304],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,30,2304],f32>
%3526 = torch.aten.mul.Tensor %3525, %3524 : !torch.vtensor<[2,32,30,2304],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,30,2304],f32>
%3527 = torch.prim.ListConstruct %int2, %int960, %int48, %int48 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3528 = torch.aten.view %3526, %3527 : !torch.vtensor<[2,32,30,2304],f32>, !torch.list<int> -> !torch.vtensor<[2,960,48,48],f32>
%3529 = torch.aten.unsqueeze %36, %int0 : !torch.vtensor<[960],f32>, !torch.int -> !torch.vtensor<[1,960],f32>
%3530 = torch.aten.unsqueeze %3529, %int2 : !torch.vtensor<[1,960],f32>, !torch.int -> !torch.vtensor<[1,960,1],f32>
%3531 = torch.aten.unsqueeze %3530, %int3 : !torch.vtensor<[1,960,1],f32>, !torch.int -> !torch.vtensor<[1,960,1,1],f32>
%3532 = torch.aten.unsqueeze %36, %int0 : !torch.vtensor<[960],f32>, !torch.int -> !torch.vtensor<[1,960],f32>
%3533 = torch.aten.unsqueeze %3532, %int2 : !torch.vtensor<[1,960],f32>, !torch.int -> !torch.vtensor<[1,960,1],f32>
%3534 = torch.aten.unsqueeze %3533, %int3 : !torch.vtensor<[1,960,1],f32>, !torch.int -> !torch.vtensor<[1,960,1,1],f32>
%3535 = torch.aten.mul.Tensor %3528, %3534 : !torch.vtensor<[2,960,48,48],f32>, !torch.vtensor<[1,960,1,1],f32> -> !torch.vtensor<[2,960,48,48],f32>
%3536 = torch.aten.add.Tensor %3535, %3531, %int1 : !torch.vtensor<[2,960,48,48],f32>, !torch.vtensor<[1,960,1,1],f32>, !torch.int -> !torch.vtensor<[2,960,48,48],f32>
%3537 = torch.aten._to_copy %3536, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,960,48,48],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,960,48,48],f16>
%3538 = torch.aten.silu %3537 : !torch.vtensor<[2,960,48,48],f16> -> !torch.vtensor<[2,960,48,48],f16>
%3539 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
%3540 = torch.aten._to_copy %27, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,960,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,960,3,3],f16>
%3541 = torch.aten._convolution %3538, %3540, %3539, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,960,48,48],f16>, !torch.vtensor<[640,960,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,48,48],f16>
%3542 = torch.aten.silu %74 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%3543 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
%3544 = torch.aten._to_copy %28, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,1280],f16>
%3545 = torch.aten.t %3544 : !torch.vtensor<[640,1280],f16> -> !torch.vtensor<[1280,640],f16>
%3546 = torch.aten.addmm %3543, %3542, %3545, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
%3547 = torch.aten.unsqueeze %3546, %int2 : !torch.vtensor<[2,640],f16>, !torch.int -> !torch.vtensor<[2,640,1],f16>
%3548 = torch.aten.unsqueeze %3547, %int3 : !torch.vtensor<[2,640,1],f16>, !torch.int -> !torch.vtensor<[2,640,1,1],f16>
%3549 = torch.aten.add.Tensor %3541, %3548, %int1 : !torch.vtensor<[2,640,48,48],f16>, !torch.vtensor<[2,640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,48,48],f16>
%3550 = torch.aten._to_copy %3549, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,640,48,48],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,48,48],f32>
%3551 = torch.aten._reshape_alias %3550, %647, %648 : !torch.vtensor<[2,640,48,48],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,20,2304],f32>
%result0_203, %result1_204 = torch.aten.var_mean.correction %3551, %85, %int0, %true : !torch.vtensor<[2,32,20,2304],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%3552 = torch.aten.add.Tensor %result0_203, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%3553 = torch.aten.rsqrt %3552 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%3554 = torch.aten.sub.Tensor %3551, %result1_204, %int1 : !torch.vtensor<[2,32,20,2304],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,2304],f32>
%3555 = torch.aten.mul.Tensor %3554, %3553 : !torch.vtensor<[2,32,20,2304],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,2304],f32>
%3556 = torch.aten.view %3555, %654 : !torch.vtensor<[2,32,20,2304],f32>, !torch.list<int> -> !torch.vtensor<[2,640,48,48],f32>
%3557 = torch.aten.unsqueeze %39, %int0 : !torch.vtensor<[640],f32>, !torch.int -> !torch.vtensor<[1,640],f32>
%3558 = torch.aten.unsqueeze %3557, %int2 : !torch.vtensor<[1,640],f32>, !torch.int -> !torch.vtensor<[1,640,1],f32>
%3559 = torch.aten.unsqueeze %3558, %int3 : !torch.vtensor<[1,640,1],f32>, !torch.int -> !torch.vtensor<[1,640,1,1],f32>
%3560 = torch.aten.unsqueeze %39, %int0 : !torch.vtensor<[640],f32>, !torch.int -> !torch.vtensor<[1,640],f32>
%3561 = torch.aten.unsqueeze %3560, %int2 : !torch.vtensor<[1,640],f32>, !torch.int -> !torch.vtensor<[1,640,1],f32>
%3562 = torch.aten.unsqueeze %3561, %int3 : !torch.vtensor<[1,640,1],f32>, !torch.int -> !torch.vtensor<[1,640,1,1],f32>
%3563 = torch.aten.mul.Tensor %3556, %3562 : !torch.vtensor<[2,640,48,48],f32>, !torch.vtensor<[1,640,1,1],f32> -> !torch.vtensor<[2,640,48,48],f32>
%3564 = torch.aten.add.Tensor %3563, %3559, %int1 : !torch.vtensor<[2,640,48,48],f32>, !torch.vtensor<[1,640,1,1],f32>, !torch.int -> !torch.vtensor<[2,640,48,48],f32>
%3565 = torch.aten._to_copy %3564, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,640,48,48],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,48,48],f16>
%3566 = torch.aten.silu %3565 : !torch.vtensor<[2,640,48,48],f16> -> !torch.vtensor<[2,640,48,48],f16>
%3567 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
%3568 = torch.aten._to_copy %35, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640,3,3],f16>
%3569 = torch.aten._convolution %3566, %3568, %3567, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,48,48],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,48,48],f16>
%3570 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
%3571 = torch.aten._to_copy %29, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,960,1,1],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,960,1,1],f16>
%3572 = torch.aten._convolution %3518, %3571, %3570, %78, %79, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,960,48,48],f16>, !torch.vtensor<[640,960,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,48,48],f16>
%3573 = torch.aten.add.Tensor %3572, %3569, %int1 : !torch.vtensor<[2,640,48,48],f16>, !torch.vtensor<[2,640,48,48],f16>, !torch.int -> !torch.vtensor<[2,640,48,48],f16>
%3574 = torch.aten._to_copy %3573, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,640,48,48],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,48,48],f32>
%3575 = torch.aten._reshape_alias %3574, %647, %648 : !torch.vtensor<[2,640,48,48],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,20,2304],f32>
%result0_205, %result1_206 = torch.aten.var_mean.correction %3575, %85, %int0, %true : !torch.vtensor<[2,32,20,2304],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%3576 = torch.aten.add.Tensor %result0_205, %1, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%3577 = torch.aten.rsqrt %3576 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%3578 = torch.aten.sub.Tensor %3575, %result1_206, %int1 : !torch.vtensor<[2,32,20,2304],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,2304],f32>
%3579 = torch.aten.mul.Tensor %3578, %3577 : !torch.vtensor<[2,32,20,2304],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,2304],f32>
%3580 = torch.aten.view %3579, %654 : !torch.vtensor<[2,32,20,2304],f32>, !torch.list<int> -> !torch.vtensor<[2,640,48,48],f32>
%3581 = torch.aten.unsqueeze %39, %int0 : !torch.vtensor<[640],f32>, !torch.int -> !torch.vtensor<[1,640],f32>
%3582 = torch.aten.unsqueeze %3581, %int2 : !torch.vtensor<[1,640],f32>, !torch.int -> !torch.vtensor<[1,640,1],f32>
%3583 = torch.aten.unsqueeze %3582, %int3 : !torch.vtensor<[1,640,1],f32>, !torch.int -> !torch.vtensor<[1,640,1,1],f32>
%3584 = torch.aten.unsqueeze %39, %int0 : !torch.vtensor<[640],f32>, !torch.int -> !torch.vtensor<[1,640],f32>
%3585 = torch.aten.unsqueeze %3584, %int2 : !torch.vtensor<[1,640],f32>, !torch.int -> !torch.vtensor<[1,640,1],f32>
%3586 = torch.aten.unsqueeze %3585, %int3 : !torch.vtensor<[1,640,1],f32>, !torch.int -> !torch.vtensor<[1,640,1,1],f32>
%3587 = torch.aten.mul.Tensor %3580, %3586 : !torch.vtensor<[2,640,48,48],f32>, !torch.vtensor<[1,640,1,1],f32> -> !torch.vtensor<[2,640,48,48],f32>
%3588 = torch.aten.add.Tensor %3587, %3583, %int1 : !torch.vtensor<[2,640,48,48],f32>, !torch.vtensor<[1,640,1,1],f32>, !torch.int -> !torch.vtensor<[2,640,48,48],f32>
%3589 = torch.aten._reshape_alias %3588, %654, %688 : !torch.vtensor<[2,640,48,48],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,640,48,48],f32>
%3590 = torch.aten.permute %3589, %151 : !torch.vtensor<[2,640,48,48],f32>, !torch.list<int> -> !torch.vtensor<[2,48,48,640],f32>
%3591 = torch.aten._reshape_alias %3590, %691, %692 : !torch.vtensor<[2,48,48,640],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f32>
%3592 = torch.aten.clone %3591, %int0 : !torch.vtensor<[2,2304,640],f32>, !torch.int -> !torch.vtensor<[2,2304,640],f32>
%3593 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
%3594 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
%3595 = torch.aten._to_copy %3592, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
%3596 = torch.aten.t %3594 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%3597 = torch.aten.view %3595, %699 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
%3598 = torch.aten.addmm %3593, %3597, %3596, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,640],f16>
%3599 = torch.aten.view %3598, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
%3600 = torch.aten._to_copy %3599, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f32>
%result0_207, %result1_208, %result2_209 = torch.aten.native_layer_norm %3600, %704, %39, %39, %float1.000000e-05 : !torch.vtensor<[2,2304,640],f32>, !torch.list<int>, !torch.vtensor<[640],f32>, !torch.vtensor<[640],f32>, !torch.float -> !torch.vtensor<[2,2304,640],f32>, !torch.vtensor<[2,2304,1],f32>, !torch.vtensor<[2,2304,1],f32>
%3601 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
%3602 = torch.aten._to_copy %result0_207, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
%3603 = torch.aten.t %3601 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%3604 = torch.aten._reshape_alias %3602, %699, %708 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
%3605 = torch.aten.mm %3604, %3603 : !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[4608,640],f16>
%3606 = torch.aten._unsafe_view %3605, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
%3607 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
%3608 = torch.aten._to_copy %result0_207, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
%3609 = torch.aten.t %3607 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%3610 = torch.aten._reshape_alias %3608, %699, %708 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
%3611 = torch.aten.mm %3610, %3609 : !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[4608,640],f16>
%3612 = torch.aten._unsafe_view %3611, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
%3613 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
%3614 = torch.aten._to_copy %result0_207, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
%3615 = torch.aten.t %3613 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%3616 = torch.aten._reshape_alias %3614, %699, %708 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
%3617 = torch.aten.mm %3616, %3615 : !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[4608,640],f16>
%3618 = torch.aten._unsafe_view %3617, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
%3619 = torch.aten._reshape_alias %3606, %724, %725 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
%3620 = torch.aten.permute %3619, %189 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
%3621 = torch.aten.clone %3620, %int0 : !torch.vtensor<[2,10,2304,64],f16>, !torch.int -> !torch.vtensor<[2,10,2304,64],f16>
%3622 = torch.aten._unsafe_view %3621, %729 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
%3623 = torch.aten._reshape_alias %3612, %724, %725 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
%3624 = torch.aten.permute %3623, %189 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
%3625 = torch.aten.clone %3624, %int0 : !torch.vtensor<[2,10,2304,64],f16>, !torch.int -> !torch.vtensor<[2,10,2304,64],f16>
%3626 = torch.aten._unsafe_view %3625, %729 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
%3627 = torch.aten._reshape_alias %3618, %724, %725 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
%3628 = torch.aten.permute %3627, %189 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
%3629 = torch.aten.clone %3628, %int0 : !torch.vtensor<[2,10,2304,64],f16>, !torch.int -> !torch.vtensor<[2,10,2304,64],f16>
%3630 = torch.aten._unsafe_view %3629, %729 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
%3631 = torch.aten.unsqueeze %3622, %int3 : !torch.vtensor<[20,2304,64],f16>, !torch.int -> !torch.vtensor<[20,2304,64,1],f16>
%3632 = torch.aten.permute %3631, %203 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,64],f16>
%3633 = torch.aten.unsqueeze %3626, %int3 : !torch.vtensor<[20,2304,64],f16>, !torch.int -> !torch.vtensor<[20,2304,64,1],f16>
%3634 = torch.aten.permute %3633, %206 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,1,2304,64],f16>
%3635 = torch.aten.permute %3632, %203 : !torch.vtensor<[20,2304,1,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64,1],f16>
%3636 = torch.aten._reshape_alias %3635, %729, %744 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
%3637 = torch.aten.permute %3634, %211 : !torch.vtensor<[20,1,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,64,2304,1],f16>
%3638 = torch.aten._reshape_alias %3637, %747, %748 : !torch.vtensor<[20,64,2304,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,64,2304],f16>
%3639 = torch.aten.bmm %3636, %3638 : !torch.vtensor<[20,2304,64],f16>, !torch.vtensor<[20,64,2304],f16> -> !torch.vtensor<[20,2304,2304],f16>
%3640 = torch.aten.view %3639, %751 : !torch.vtensor<[20,2304,2304],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,2304],f16>
%3641 = torch.aten.permute %3640, %203 : !torch.vtensor<[20,2304,1,2304],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,2304,1],f16>
%3642 = torch.aten.view %3641, %754 : !torch.vtensor<[20,2304,2304,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,2304],f16>
%3643 = torch.aten.mul.Tensor %3642, %0 : !torch.vtensor<[20,2304,2304],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[20,2304,2304],f16>
%3644 = torch.aten._softmax %3643, %int-1, %true : !torch.vtensor<[20,2304,2304],f16>, !torch.int, !torch.bool -> !torch.vtensor<[20,2304,2304],f32>
%3645 = torch.aten._to_copy %3644, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[20,2304,2304],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[20,2304,2304],f16>
%3646 = torch.aten.unsqueeze %3645, %int3 : !torch.vtensor<[20,2304,2304],f16>, !torch.int -> !torch.vtensor<[20,2304,2304,1],f16>
%3647 = torch.aten.permute %3646, %203 : !torch.vtensor<[20,2304,2304,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,2304],f16>
%3648 = torch.aten.unsqueeze %3630, %int3 : !torch.vtensor<[20,2304,64],f16>, !torch.int -> !torch.vtensor<[20,2304,64,1],f16>
%3649 = torch.aten.permute %3648, %211 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,1,64,2304],f16>
%3650 = torch.aten.permute %3647, %203 : !torch.vtensor<[20,2304,1,2304],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,2304,1],f16>
%3651 = torch.aten._reshape_alias %3650, %754, %764 : !torch.vtensor<[20,2304,2304,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,2304,2304],f16>
%3652 = torch.aten.permute %3649, %211 : !torch.vtensor<[20,1,64,2304],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64,1],f16>
%3653 = torch.aten._reshape_alias %3652, %729, %744 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
%3654 = torch.aten.bmm %3651, %3653 : !torch.vtensor<[20,2304,2304],f16>, !torch.vtensor<[20,2304,64],f16> -> !torch.vtensor<[20,2304,64],f16>
%3655 = torch.aten.view %3654, %769 : !torch.vtensor<[20,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,64],f16>
%3656 = torch.aten.permute %3655, %203 : !torch.vtensor<[20,2304,1,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64,1],f16>
%3657 = torch.aten.view %3656, %729 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
%3658 = torch.aten._reshape_alias %3657, %773, %774 : !torch.vtensor<[20,2304,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
%3659 = torch.aten.permute %3658, %189 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
%3660 = torch.aten.clone %3659, %int0 : !torch.vtensor<[2,2304,10,64],f16>, !torch.int -> !torch.vtensor<[2,2304,10,64],f16>
%3661 = torch.aten._unsafe_view %3660, %691 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
%3662 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
%3663 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
%3664 = torch.aten.t %3663 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%3665 = torch.aten.view %3661, %699 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
%3666 = torch.aten.addmm %3662, %3665, %3664, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,640],f16>
%3667 = torch.aten.view %3666, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
%3668 = torch.aten.add.Tensor %3667, %3599, %int1 : !torch.vtensor<[2,2304,640],f16>, !torch.vtensor<[2,2304,640],f16>, !torch.int -> !torch.vtensor<[2,2304,640],f16>
%3669 = torch.aten._to_copy %3668, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f32>
%result0_210, %result1_211, %result2_212 = torch.aten.native_layer_norm %3669, %704, %39, %39, %float1.000000e-05 : !torch.vtensor<[2,2304,640],f32>, !torch.list<int>, !torch.vtensor<[640],f32>, !torch.vtensor<[640],f32>, !torch.float -> !torch.vtensor<[2,2304,640],f32>, !torch.vtensor<[2,2304,1],f32>, !torch.vtensor<[2,2304,1],f32>
%3670 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
%3671 = torch.aten._to_copy %result0_210, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
%3672 = torch.aten.t %3670 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%3673 = torch.aten._reshape_alias %3671, %699, %708 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
%3674 = torch.aten.mm %3673, %3672 : !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[4608,640],f16>
%3675 = torch.aten._unsafe_view %3674, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
%3676 = torch.aten._to_copy %30, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,1024],f16>
%3677 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
%3678 = torch.aten.t %3676 : !torch.vtensor<[640,1024],f16> -> !torch.vtensor<[1024,640],f16>
%3679 = torch.aten._reshape_alias %3677, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
%3680 = torch.aten.mm %3679, %3678 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,640],f16> -> !torch.vtensor<[154,640],f16>
%3681 = torch.aten._unsafe_view %3680, %798 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
%3682 = torch.aten._to_copy %30, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,1024],f16>
%3683 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
%3684 = torch.aten.t %3682 : !torch.vtensor<[640,1024],f16> -> !torch.vtensor<[1024,640],f16>
%3685 = torch.aten._reshape_alias %3683, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
%3686 = torch.aten.mm %3685, %3684 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,640],f16> -> !torch.vtensor<[154,640],f16>
%3687 = torch.aten._unsafe_view %3686, %798 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
%3688 = torch.aten._reshape_alias %3675, %724, %725 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
%3689 = torch.aten.permute %3688, %189 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
%3690 = torch.aten.clone %3689, %int0 : !torch.vtensor<[2,10,2304,64],f16>, !torch.int -> !torch.vtensor<[2,10,2304,64],f16>
%3691 = torch.aten._unsafe_view %3690, %729 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
%3692 = torch.aten._reshape_alias %3681, %810, %811 : !torch.vtensor<[2,77,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,10,64],f16>
%3693 = torch.aten.permute %3692, %189 : !torch.vtensor<[2,77,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,77,64],f16>
%3694 = torch.aten.clone %3693, %int0 : !torch.vtensor<[2,10,77,64],f16>, !torch.int -> !torch.vtensor<[2,10,77,64],f16>
%3695 = torch.aten._unsafe_view %3694, %815 : !torch.vtensor<[2,10,77,64],f16>, !torch.list<int> -> !torch.vtensor<[20,77,64],f16>
%3696 = torch.aten._reshape_alias %3687, %810, %811 : !torch.vtensor<[2,77,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,10,64],f16>
%3697 = torch.aten.permute %3696, %189 : !torch.vtensor<[2,77,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,77,64],f16>
%3698 = torch.aten.clone %3697, %int0 : !torch.vtensor<[2,10,77,64],f16>, !torch.int -> !torch.vtensor<[2,10,77,64],f16>
%3699 = torch.aten._unsafe_view %3698, %815 : !torch.vtensor<[2,10,77,64],f16>, !torch.list<int> -> !torch.vtensor<[20,77,64],f16>
%3700 = torch.aten.unsqueeze %3691, %int3 : !torch.vtensor<[20,2304,64],f16>, !torch.int -> !torch.vtensor<[20,2304,64,1],f16>
%3701 = torch.aten.permute %3700, %203 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,64],f16>
%3702 = torch.aten.unsqueeze %3695, %int3 : !torch.vtensor<[20,77,64],f16>, !torch.int -> !torch.vtensor<[20,77,64,1],f16>
%3703 = torch.aten.permute %3702, %206 : !torch.vtensor<[20,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,1,77,64],f16>
%3704 = torch.aten.permute %3701, %203 : !torch.vtensor<[20,2304,1,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64,1],f16>
%3705 = torch.aten._reshape_alias %3704, %729, %744 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
%3706 = torch.aten.permute %3703, %211 : !torch.vtensor<[20,1,77,64],f16>, !torch.list<int> -> !torch.vtensor<[20,64,77,1],f16>
%3707 = torch.aten._reshape_alias %3706, %828, %297 : !torch.vtensor<[20,64,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,64,77],f16>
%3708 = torch.aten.bmm %3705, %3707 : !torch.vtensor<[20,2304,64],f16>, !torch.vtensor<[20,64,77],f16> -> !torch.vtensor<[20,2304,77],f16>
%3709 = torch.aten.view %3708, %831 : !torch.vtensor<[20,2304,77],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,77],f16>
%3710 = torch.aten.permute %3709, %203 : !torch.vtensor<[20,2304,1,77],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,77,1],f16>
%3711 = torch.aten.view %3710, %834 : !torch.vtensor<[20,2304,77,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,77],f16>
%3712 = torch.aten.mul.Tensor %3711, %0 : !torch.vtensor<[20,2304,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[20,2304,77],f16>
%3713 = torch.aten._softmax %3712, %int-1, %true : !torch.vtensor<[20,2304,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[20,2304,77],f32>
%3714 = torch.aten._to_copy %3713, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[20,2304,77],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[20,2304,77],f16>
%3715 = torch.aten.unsqueeze %3714, %int3 : !torch.vtensor<[20,2304,77],f16>, !torch.int -> !torch.vtensor<[20,2304,77,1],f16>
%3716 = torch.aten.permute %3715, %203 : !torch.vtensor<[20,2304,77,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,77],f16>
%3717 = torch.aten.unsqueeze %3699, %int3 : !torch.vtensor<[20,77,64],f16>, !torch.int -> !torch.vtensor<[20,77,64,1],f16>
%3718 = torch.aten.permute %3717, %211 : !torch.vtensor<[20,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,1,64,77],f16>
%3719 = torch.aten.permute %3716, %203 : !torch.vtensor<[20,2304,1,77],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,77,1],f16>
%3720 = torch.aten._reshape_alias %3719, %834, %844 : !torch.vtensor<[20,2304,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,2304,77],f16>
%3721 = torch.aten.permute %3718, %211 : !torch.vtensor<[20,1,64,77],f16>, !torch.list<int> -> !torch.vtensor<[20,77,64,1],f16>
%3722 = torch.aten._reshape_alias %3721, %815, %316 : !torch.vtensor<[20,77,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,77,64],f16>
%3723 = torch.aten.bmm %3720, %3722 : !torch.vtensor<[20,2304,77],f16>, !torch.vtensor<[20,77,64],f16> -> !torch.vtensor<[20,2304,64],f16>
%3724 = torch.aten.view %3723, %769 : !torch.vtensor<[20,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,64],f16>
%3725 = torch.aten.permute %3724, %203 : !torch.vtensor<[20,2304,1,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64,1],f16>
%3726 = torch.aten.view %3725, %729 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
%3727 = torch.aten._reshape_alias %3726, %773, %774 : !torch.vtensor<[20,2304,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
%3728 = torch.aten.permute %3727, %189 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
%3729 = torch.aten.clone %3728, %int0 : !torch.vtensor<[2,2304,10,64],f16>, !torch.int -> !torch.vtensor<[2,2304,10,64],f16>
%3730 = torch.aten._unsafe_view %3729, %691 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
%3731 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
%3732 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
%3733 = torch.aten.t %3732 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%3734 = torch.aten.view %3730, %699 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
%3735 = torch.aten.addmm %3731, %3734, %3733, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,640],f16>
%3736 = torch.aten.view %3735, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
%3737 = torch.aten.add.Tensor %3736, %3668, %int1 : !torch.vtensor<[2,2304,640],f16>, !torch.vtensor<[2,2304,640],f16>, !torch.int -> !torch.vtensor<[2,2304,640],f16>
%3738 = torch.aten._to_copy %3737, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f32>
%result0_213, %result1_214, %result2_215 = torch.aten.native_layer_norm %3738, %704, %39, %39, %float1.000000e-05 : !torch.vtensor<[2,2304,640],f32>, !torch.list<int>, !torch.vtensor<[640],f32>, !torch.vtensor<[640],f32>, !torch.float -> !torch.vtensor<[2,2304,640],f32>, !torch.vtensor<[2,2304,1],f32>, !torch.vtensor<[2,2304,1],f32>
%3739 = torch.aten._to_copy %31, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[5120],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[5120],f16>
%3740 = torch.aten._to_copy %32, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[5120,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[5120,640],f16>
%3741 = torch.aten._to_copy %result0_213, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
%3742 = torch.aten.t %3740 : !torch.vtensor<[5120,640],f16> -> !torch.vtensor<[640,5120],f16>
%3743 = torch.aten.view %3741, %699 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
%3744 = torch.aten.addmm %3739, %3743, %3742, %int1, %int1 : !torch.vtensor<[5120],f16>, !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,5120],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,5120],f16>
%3745 = torch.aten.view %3744, %870 : !torch.vtensor<[4608,5120],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,5120],f16>
%3746 = torch.aten.slice.Tensor %3745, %int-1, %int0, %int2560, %int1 : !torch.vtensor<[2,2304,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,2304,2560],f16>
%3747 = torch.aten.slice.Tensor %3745, %int-1, %int2560, %int5120, %int1 : !torch.vtensor<[2,2304,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,2304,2560],f16>
%3748 = torch.aten.gelu %3747, %str : !torch.vtensor<[2,2304,2560],f16>, !torch.str -> !torch.vtensor<[2,2304,2560],f16>
%3749 = torch.aten.mul.Tensor %3746, %3748 : !torch.vtensor<[2,2304,2560],f16>, !torch.vtensor<[2,2304,2560],f16> -> !torch.vtensor<[2,2304,2560],f16>
%3750 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
%3751 = torch.aten._to_copy %33, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,2560],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,2560],f16>
%3752 = torch.aten.t %3751 : !torch.vtensor<[640,2560],f16> -> !torch.vtensor<[2560,640],f16>
%3753 = torch.aten.view %3749, %879 : !torch.vtensor<[2,2304,2560],f16>, !torch.list<int> -> !torch.vtensor<[4608,2560],f16>
%3754 = torch.aten.addmm %3750, %3753, %3752, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[4608,2560],f16>, !torch.vtensor<[2560,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,640],f16>
%3755 = torch.aten.view %3754, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
%3756 = torch.aten.add.Tensor %3755, %3737, %int1 : !torch.vtensor<[2,2304,640],f16>, !torch.vtensor<[2,2304,640],f16>, !torch.int -> !torch.vtensor<[2,2304,640],f16>
%3757 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
%3758 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
%3759 = torch.aten.t %3758 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
%3760 = torch.aten.view %3756, %699 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
%3761 = torch.aten.addmm %3757, %3760, %3759, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,640],f16>
%3762 = torch.aten.view %3761, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
%3763 = torch.aten._reshape_alias %3762, %890, %891 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,48,48,640],f16>
%3764 = torch.aten.permute %3763, %206 : !torch.vtensor<[2,48,48,640],f16>, !torch.list<int> -> !torch.vtensor<[2,640,48,48],f16>
%3765 = torch.aten._reshape_alias %3764, %654, %894 : !torch.vtensor<[2,640,48,48],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,640,48,48],f16>
%3766 = torch.aten.clone %3765, %int0 : !torch.vtensor<[2,640,48,48],f16>, !torch.int -> !torch.vtensor<[2,640,48,48],f16>
%3767 = torch.aten.add.Tensor %3766, %3573, %int1 : !torch.vtensor<[2,640,48,48],f16>, !torch.vtensor<[2,640,48,48],f16>, !torch.int -> !torch.vtensor<[2,640,48,48],f16>
%3768 = torch.prim.ListConstruct %int96, %int96 : (!torch.int, !torch.int) -> !torch.list<int>
%3769 = torch.aten.upsample_nearest2d %3767, %3768, %float2.000000e00, %float2.000000e00 : !torch.vtensor<[2,640,48,48],f16>, !torch.list<int>, !torch.float, !torch.float -> !torch.vtensor<[2,640,96,96],f16>
%3770 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
%3771 = torch.aten._to_copy %35, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640,3,3],f16>
%3772 = torch.aten._convolution %3769, %3771, %3770, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,96,96],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,96,96],f16>
%3773 = torch.prim.ListConstruct %3772, %610 : (!torch.vtensor<[2,640,96,96],f16>, !torch.vtensor<[2,320,96,96],f16>) -> !torch.list<vtensor>
%3774 = torch.aten.cat %3773, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,960,96,96],f16>
%3775 = torch.aten._to_copy %3774, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,960,96,96],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,960,96,96],f32>
%3776 = torch.prim.ListConstruct %int2, %int32, %int30, %int9216 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3777 = torch.prim.ListConstruct %int8847360, %int276480, %int9216, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3778 = torch.aten._reshape_alias %3775, %3776, %3777 : !torch.vtensor<[2,960,96,96],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,30,9216],f32>
%result0_216, %result1_217 = torch.aten.var_mean.correction %3778, %85, %int0, %true : !torch.vtensor<[2,32,30,9216],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%3779 = torch.aten.add.Tensor %result0_216, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%3780 = torch.aten.rsqrt %3779 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%3781 = torch.aten.sub.Tensor %3778, %result1_217, %int1 : !torch.vtensor<[2,32,30,9216],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,30,9216],f32>
%3782 = torch.aten.mul.Tensor %3781, %3780 : !torch.vtensor<[2,32,30,9216],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,30,9216],f32>
%3783 = torch.prim.ListConstruct %int2, %int960, %int96, %int96 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%3784 = torch.aten.view %3782, %3783 : !torch.vtensor<[2,32,30,9216],f32>, !torch.list<int> -> !torch.vtensor<[2,960,96,96],f32>
%3785 = torch.aten.unsqueeze %36, %int0 : !torch.vtensor<[960],f32>, !torch.int -> !torch.vtensor<[1,960],f32>
%3786 = torch.aten.unsqueeze %3785, %int2 : !torch.vtensor<[1,960],f32>, !torch.int -> !torch.vtensor<[1,960,1],f32>
%3787 = torch.aten.unsqueeze %3786, %int3 : !torch.vtensor<[1,960,1],f32>, !torch.int -> !torch.vtensor<[1,960,1,1],f32>
%3788 = torch.aten.unsqueeze %36, %int0 : !torch.vtensor<[960],f32>, !torch.int -> !torch.vtensor<[1,960],f32>
%3789 = torch.aten.unsqueeze %3788, %int2 : !torch.vtensor<[1,960],f32>, !torch.int -> !torch.vtensor<[1,960,1],f32>
%3790 = torch.aten.unsqueeze %3789, %int3 : !torch.vtensor<[1,960,1],f32>, !torch.int -> !torch.vtensor<[1,960,1,1],f32>
%3791 = torch.aten.mul.Tensor %3784, %3790 : !torch.vtensor<[2,960,96,96],f32>, !torch.vtensor<[1,960,1,1],f32> -> !torch.vtensor<[2,960,96,96],f32>
%3792 = torch.aten.add.Tensor %3791, %3787, %int1 : !torch.vtensor<[2,960,96,96],f32>, !torch.vtensor<[1,960,1,1],f32>, !torch.int -> !torch.vtensor<[2,960,96,96],f32>
%3793 = torch.aten._to_copy %3792, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,960,96,96],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,960,96,96],f16>
%3794 = torch.aten.silu %3793 : !torch.vtensor<[2,960,96,96],f16> -> !torch.vtensor<[2,960,96,96],f16>
%3795 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
%3796 = torch.aten._to_copy %37, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,960,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,960,3,3],f16>
%3797 = torch.aten._convolution %3794, %3796, %3795, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,960,96,96],f16>, !torch.vtensor<[320,960,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,96,96],f16>
%3798 = torch.aten.silu %74 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%3799 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
%3800 = torch.aten._to_copy %46, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,1280],f16>
%3801 = torch.aten.t %3800 : !torch.vtensor<[320,1280],f16> -> !torch.vtensor<[1280,320],f16>
%3802 = torch.aten.addmm %3799, %3798, %3801, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
%3803 = torch.aten.unsqueeze %3802, %int2 : !torch.vtensor<[2,320],f16>, !torch.int -> !torch.vtensor<[2,320,1],f16>
%3804 = torch.aten.unsqueeze %3803, %int3 : !torch.vtensor<[2,320,1],f16>, !torch.int -> !torch.vtensor<[2,320,1,1],f16>
%3805 = torch.aten.add.Tensor %3797, %3804, %int1 : !torch.vtensor<[2,320,96,96],f16>, !torch.vtensor<[2,320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,96,96],f16>
%3806 = torch.aten._to_copy %3805, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,320,96,96],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,96,96],f32>
%3807 = torch.aten._reshape_alias %3806, %82, %83 : !torch.vtensor<[2,320,96,96],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,10,9216],f32>
%result0_218, %result1_219 = torch.aten.var_mean.correction %3807, %85, %int0, %true : !torch.vtensor<[2,32,10,9216],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%3808 = torch.aten.add.Tensor %result0_218, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%3809 = torch.aten.rsqrt %3808 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%3810 = torch.aten.sub.Tensor %3807, %result1_219, %int1 : !torch.vtensor<[2,32,10,9216],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,9216],f32>
%3811 = torch.aten.mul.Tensor %3810, %3809 : !torch.vtensor<[2,32,10,9216],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,9216],f32>
%3812 = torch.aten.view %3811, %90 : !torch.vtensor<[2,32,10,9216],f32>, !torch.list<int> -> !torch.vtensor<[2,320,96,96],f32>
%3813 = torch.aten.unsqueeze %48, %int0 : !torch.vtensor<[320],f32>, !torch.int -> !torch.vtensor<[1,320],f32>
%3814 = torch.aten.unsqueeze %3813, %int2 : !torch.vtensor<[1,320],f32>, !torch.int -> !torch.vtensor<[1,320,1],f32>
%3815 = torch.aten.unsqueeze %3814, %int3 : !torch.vtensor<[1,320,1],f32>, !torch.int -> !torch.vtensor<[1,320,1,1],f32>
%3816 = torch.aten.unsqueeze %48, %int0 : !torch.vtensor<[320],f32>, !torch.int -> !torch.vtensor<[1,320],f32>
%3817 = torch.aten.unsqueeze %3816, %int2 : !torch.vtensor<[1,320],f32>, !torch.int -> !torch.vtensor<[1,320,1],f32>
%3818 = torch.aten.unsqueeze %3817, %int3 : !torch.vtensor<[1,320,1],f32>, !torch.int -> !torch.vtensor<[1,320,1,1],f32>
%3819 = torch.aten.mul.Tensor %3812, %3818 : !torch.vtensor<[2,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f32> -> !torch.vtensor<[2,320,96,96],f32>
%3820 = torch.aten.add.Tensor %3819, %3815, %int1 : !torch.vtensor<[2,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f32>, !torch.int -> !torch.vtensor<[2,320,96,96],f32>
%3821 = torch.aten._to_copy %3820, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,320,96,96],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,96,96],f16>
%3822 = torch.aten.silu %3821 : !torch.vtensor<[2,320,96,96],f16> -> !torch.vtensor<[2,320,96,96],f16>
%3823 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
%3824 = torch.aten._to_copy %41, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320,3,3],f16>
%3825 = torch.aten._convolution %3822, %3824, %3823, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,96,96],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,96,96],f16>
%3826 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
%3827 = torch.aten._to_copy %38, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,960,1,1],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,960,1,1],f16>
%3828 = torch.aten._convolution %3774, %3827, %3826, %78, %79, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,960,96,96],f16>, !torch.vtensor<[320,960,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,96,96],f16>
%3829 = torch.aten.add.Tensor %3828, %3825, %int1 : !torch.vtensor<[2,320,96,96],f16>, !torch.vtensor<[2,320,96,96],f16>, !torch.int -> !torch.vtensor<[2,320,96,96],f16>
%3830 = torch.aten._to_copy %3829, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,320,96,96],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,96,96],f32>
%3831 = torch.aten._reshape_alias %3830, %82, %83 : !torch.vtensor<[2,320,96,96],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,10,9216],f32>
%result0_220, %result1_221 = torch.aten.var_mean.correction %3831, %85, %int0, %true : !torch.vtensor<[2,32,10,9216],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%3832 = torch.aten.add.Tensor %result0_220, %1, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%3833 = torch.aten.rsqrt %3832 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%3834 = torch.aten.sub.Tensor %3831, %result1_221, %int1 : !torch.vtensor<[2,32,10,9216],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,9216],f32>
%3835 = torch.aten.mul.Tensor %3834, %3833 : !torch.vtensor<[2,32,10,9216],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,9216],f32>
%3836 = torch.aten.view %3835, %90 : !torch.vtensor<[2,32,10,9216],f32>, !torch.list<int> -> !torch.vtensor<[2,320,96,96],f32>
%3837 = torch.aten.unsqueeze %48, %int0 : !torch.vtensor<[320],f32>, !torch.int -> !torch.vtensor<[1,320],f32>
%3838 = torch.aten.unsqueeze %3837, %int2 : !torch.vtensor<[1,320],f32>, !torch.int -> !torch.vtensor<[1,320,1],f32>
%3839 = torch.aten.unsqueeze %3838, %int3 : !torch.vtensor<[1,320,1],f32>, !torch.int -> !torch.vtensor<[1,320,1,1],f32>
%3840 = torch.aten.unsqueeze %48, %int0 : !torch.vtensor<[320],f32>, !torch.int -> !torch.vtensor<[1,320],f32>
%3841 = torch.aten.unsqueeze %3840, %int2 : !torch.vtensor<[1,320],f32>, !torch.int -> !torch.vtensor<[1,320,1],f32>
%3842 = torch.aten.unsqueeze %3841, %int3 : !torch.vtensor<[1,320,1],f32>, !torch.int -> !torch.vtensor<[1,320,1,1],f32>
%3843 = torch.aten.mul.Tensor %3836, %3842 : !torch.vtensor<[2,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f32> -> !torch.vtensor<[2,320,96,96],f32>
%3844 = torch.aten.add.Tensor %3843, %3839, %int1 : !torch.vtensor<[2,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f32>, !torch.int -> !torch.vtensor<[2,320,96,96],f32>
%3845 = torch.aten._reshape_alias %3844, %90, %149 : !torch.vtensor<[2,320,96,96],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,320,96,96],f32>
%3846 = torch.aten.permute %3845, %151 : !torch.vtensor<[2,320,96,96],f32>, !torch.list<int> -> !torch.vtensor<[2,96,96,320],f32>
%3847 = torch.aten._reshape_alias %3846, %153, %154 : !torch.vtensor<[2,96,96,320],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f32>
%3848 = torch.aten.clone %3847, %int0 : !torch.vtensor<[2,9216,320],f32>, !torch.int -> !torch.vtensor<[2,9216,320],f32>
%3849 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
%3850 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
%3851 = torch.aten._to_copy %3848, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
%3852 = torch.aten.t %3850 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%3853 = torch.aten.view %3851, %161 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
%3854 = torch.aten.addmm %3849, %3853, %3852, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,320],f16>
%3855 = torch.aten.view %3854, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
%3856 = torch.aten._to_copy %3855, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f32>
%result0_222, %result1_223, %result2_224 = torch.aten.native_layer_norm %3856, %166, %48, %48, %float1.000000e-05 : !torch.vtensor<[2,9216,320],f32>, !torch.list<int>, !torch.vtensor<[320],f32>, !torch.vtensor<[320],f32>, !torch.float -> !torch.vtensor<[2,9216,320],f32>, !torch.vtensor<[2,9216,1],f32>, !torch.vtensor<[2,9216,1],f32>
%3857 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
%3858 = torch.aten._to_copy %result0_222, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
%3859 = torch.aten.t %3857 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%3860 = torch.aten._reshape_alias %3858, %161, %170 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
%3861 = torch.aten.mm %3860, %3859 : !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[18432,320],f16>
%3862 = torch.aten._unsafe_view %3861, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
%3863 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
%3864 = torch.aten._to_copy %result0_222, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
%3865 = torch.aten.t %3863 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%3866 = torch.aten._reshape_alias %3864, %161, %170 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
%3867 = torch.aten.mm %3866, %3865 : !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[18432,320],f16>
%3868 = torch.aten._unsafe_view %3867, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
%3869 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
%3870 = torch.aten._to_copy %result0_222, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
%3871 = torch.aten.t %3869 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%3872 = torch.aten._reshape_alias %3870, %161, %170 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
%3873 = torch.aten.mm %3872, %3871 : !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[18432,320],f16>
%3874 = torch.aten._unsafe_view %3873, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
%3875 = torch.aten._reshape_alias %3862, %186, %187 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
%3876 = torch.aten.permute %3875, %189 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
%3877 = torch.aten.clone %3876, %int0 : !torch.vtensor<[2,5,9216,64],f16>, !torch.int -> !torch.vtensor<[2,5,9216,64],f16>
%3878 = torch.aten._unsafe_view %3877, %192 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
%3879 = torch.aten._reshape_alias %3868, %186, %187 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
%3880 = torch.aten.permute %3879, %189 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
%3881 = torch.aten.clone %3880, %int0 : !torch.vtensor<[2,5,9216,64],f16>, !torch.int -> !torch.vtensor<[2,5,9216,64],f16>
%3882 = torch.aten._unsafe_view %3881, %192 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
%3883 = torch.aten._reshape_alias %3874, %186, %187 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
%3884 = torch.aten.permute %3883, %189 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
%3885 = torch.aten.clone %3884, %int0 : !torch.vtensor<[2,5,9216,64],f16>, !torch.int -> !torch.vtensor<[2,5,9216,64],f16>
%3886 = torch.aten._unsafe_view %3885, %192 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
%3887 = torch.aten.unsqueeze %3878, %int3 : !torch.vtensor<[10,9216,64],f16>, !torch.int -> !torch.vtensor<[10,9216,64,1],f16>
%3888 = torch.aten.permute %3887, %203 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,64],f16>
%3889 = torch.aten.unsqueeze %3882, %int3 : !torch.vtensor<[10,9216,64],f16>, !torch.int -> !torch.vtensor<[10,9216,64,1],f16>
%3890 = torch.aten.permute %3889, %206 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,1,9216,64],f16>
%3891 = torch.aten.permute %3888, %203 : !torch.vtensor<[10,9216,1,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64,1],f16>
%3892 = torch.aten._reshape_alias %3891, %192, %209 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
%3893 = torch.aten.permute %3890, %211 : !torch.vtensor<[10,1,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,64,9216,1],f16>
%3894 = torch.aten._reshape_alias %3893, %213, %214 : !torch.vtensor<[10,64,9216,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,64,9216],f16>
%3895 = torch.aten.bmm %3892, %3894 : !torch.vtensor<[10,9216,64],f16>, !torch.vtensor<[10,64,9216],f16> -> !torch.vtensor<[10,9216,9216],f16>
%3896 = torch.aten.view %3895, %217 : !torch.vtensor<[10,9216,9216],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,9216],f16>
%3897 = torch.aten.permute %3896, %203 : !torch.vtensor<[10,9216,1,9216],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,9216,1],f16>
%3898 = torch.aten.view %3897, %220 : !torch.vtensor<[10,9216,9216,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,9216],f16>
%3899 = torch.aten.mul.Tensor %3898, %0 : !torch.vtensor<[10,9216,9216],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[10,9216,9216],f16>
%3900 = torch.aten._softmax %3899, %int-1, %true : !torch.vtensor<[10,9216,9216],f16>, !torch.int, !torch.bool -> !torch.vtensor<[10,9216,9216],f32>
%3901 = torch.aten._to_copy %3900, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[10,9216,9216],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[10,9216,9216],f16>
%3902 = torch.aten.unsqueeze %3901, %int3 : !torch.vtensor<[10,9216,9216],f16>, !torch.int -> !torch.vtensor<[10,9216,9216,1],f16>
%3903 = torch.aten.permute %3902, %203 : !torch.vtensor<[10,9216,9216,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,9216],f16>
%3904 = torch.aten.unsqueeze %3886, %int3 : !torch.vtensor<[10,9216,64],f16>, !torch.int -> !torch.vtensor<[10,9216,64,1],f16>
%3905 = torch.aten.permute %3904, %211 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,1,64,9216],f16>
%3906 = torch.aten.permute %3903, %203 : !torch.vtensor<[10,9216,1,9216],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,9216,1],f16>
%3907 = torch.aten._reshape_alias %3906, %220, %230 : !torch.vtensor<[10,9216,9216,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,9216,9216],f16>
%3908 = torch.aten.permute %3905, %211 : !torch.vtensor<[10,1,64,9216],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64,1],f16>
%3909 = torch.aten._reshape_alias %3908, %192, %209 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
%3910 = torch.aten.bmm %3907, %3909 : !torch.vtensor<[10,9216,9216],f16>, !torch.vtensor<[10,9216,64],f16> -> !torch.vtensor<[10,9216,64],f16>
%3911 = torch.aten.view %3910, %235 : !torch.vtensor<[10,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,64],f16>
%3912 = torch.aten.permute %3911, %203 : !torch.vtensor<[10,9216,1,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64,1],f16>
%3913 = torch.aten.view %3912, %192 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
%3914 = torch.aten._reshape_alias %3913, %239, %240 : !torch.vtensor<[10,9216,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
%3915 = torch.aten.permute %3914, %189 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
%3916 = torch.aten.clone %3915, %int0 : !torch.vtensor<[2,9216,5,64],f16>, !torch.int -> !torch.vtensor<[2,9216,5,64],f16>
%3917 = torch.aten._unsafe_view %3916, %153 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
%3918 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
%3919 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
%3920 = torch.aten.t %3919 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%3921 = torch.aten.view %3917, %161 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
%3922 = torch.aten.addmm %3918, %3921, %3920, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,320],f16>
%3923 = torch.aten.view %3922, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
%3924 = torch.aten.add.Tensor %3923, %3855, %int1 : !torch.vtensor<[2,9216,320],f16>, !torch.vtensor<[2,9216,320],f16>, !torch.int -> !torch.vtensor<[2,9216,320],f16>
%3925 = torch.aten._to_copy %3924, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f32>
%result0_225, %result1_226, %result2_227 = torch.aten.native_layer_norm %3925, %166, %48, %48, %float1.000000e-05 : !torch.vtensor<[2,9216,320],f32>, !torch.list<int>, !torch.vtensor<[320],f32>, !torch.vtensor<[320],f32>, !torch.float -> !torch.vtensor<[2,9216,320],f32>, !torch.vtensor<[2,9216,1],f32>, !torch.vtensor<[2,9216,1],f32>
%3926 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
%3927 = torch.aten._to_copy %result0_225, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
%3928 = torch.aten.t %3926 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%3929 = torch.aten._reshape_alias %3927, %161, %170 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
%3930 = torch.aten.mm %3929, %3928 : !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[18432,320],f16>
%3931 = torch.aten._unsafe_view %3930, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
%3932 = torch.aten._to_copy %43, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,1024],f16>
%3933 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
%3934 = torch.aten.t %3932 : !torch.vtensor<[320,1024],f16> -> !torch.vtensor<[1024,320],f16>
%3935 = torch.aten._reshape_alias %3933, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
%3936 = torch.aten.mm %3935, %3934 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,320],f16> -> !torch.vtensor<[154,320],f16>
%3937 = torch.aten._unsafe_view %3936, %266 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
%3938 = torch.aten._to_copy %43, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,1024],f16>
%3939 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
%3940 = torch.aten.t %3938 : !torch.vtensor<[320,1024],f16> -> !torch.vtensor<[1024,320],f16>
%3941 = torch.aten._reshape_alias %3939, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
%3942 = torch.aten.mm %3941, %3940 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,320],f16> -> !torch.vtensor<[154,320],f16>
%3943 = torch.aten._unsafe_view %3942, %266 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
%3944 = torch.aten._reshape_alias %3931, %186, %187 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
%3945 = torch.aten.permute %3944, %189 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
%3946 = torch.aten.clone %3945, %int0 : !torch.vtensor<[2,5,9216,64],f16>, !torch.int -> !torch.vtensor<[2,5,9216,64],f16>
%3947 = torch.aten._unsafe_view %3946, %192 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
%3948 = torch.aten._reshape_alias %3937, %278, %279 : !torch.vtensor<[2,77,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,5,64],f16>
%3949 = torch.aten.permute %3948, %189 : !torch.vtensor<[2,77,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,77,64],f16>
%3950 = torch.aten.clone %3949, %int0 : !torch.vtensor<[2,5,77,64],f16>, !torch.int -> !torch.vtensor<[2,5,77,64],f16>
%3951 = torch.aten._unsafe_view %3950, %283 : !torch.vtensor<[2,5,77,64],f16>, !torch.list<int> -> !torch.vtensor<[10,77,64],f16>
%3952 = torch.aten._reshape_alias %3943, %278, %279 : !torch.vtensor<[2,77,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,5,64],f16>
%3953 = torch.aten.permute %3952, %189 : !torch.vtensor<[2,77,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,77,64],f16>
%3954 = torch.aten.clone %3953, %int0 : !torch.vtensor<[2,5,77,64],f16>, !torch.int -> !torch.vtensor<[2,5,77,64],f16>
%3955 = torch.aten._unsafe_view %3954, %283 : !torch.vtensor<[2,5,77,64],f16>, !torch.list<int> -> !torch.vtensor<[10,77,64],f16>
%3956 = torch.aten.unsqueeze %3947, %int3 : !torch.vtensor<[10,9216,64],f16>, !torch.int -> !torch.vtensor<[10,9216,64,1],f16>
%3957 = torch.aten.permute %3956, %203 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,64],f16>
%3958 = torch.aten.unsqueeze %3951, %int3 : !torch.vtensor<[10,77,64],f16>, !torch.int -> !torch.vtensor<[10,77,64,1],f16>
%3959 = torch.aten.permute %3958, %206 : !torch.vtensor<[10,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,1,77,64],f16>
%3960 = torch.aten.permute %3957, %203 : !torch.vtensor<[10,9216,1,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64,1],f16>
%3961 = torch.aten._reshape_alias %3960, %192, %209 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
%3962 = torch.aten.permute %3959, %211 : !torch.vtensor<[10,1,77,64],f16>, !torch.list<int> -> !torch.vtensor<[10,64,77,1],f16>
%3963 = torch.aten._reshape_alias %3962, %296, %297 : !torch.vtensor<[10,64,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,64,77],f16>
%3964 = torch.aten.bmm %3961, %3963 : !torch.vtensor<[10,9216,64],f16>, !torch.vtensor<[10,64,77],f16> -> !torch.vtensor<[10,9216,77],f16>
%3965 = torch.aten.view %3964, %300 : !torch.vtensor<[10,9216,77],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,77],f16>
%3966 = torch.aten.permute %3965, %203 : !torch.vtensor<[10,9216,1,77],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,77,1],f16>
%3967 = torch.aten.view %3966, %303 : !torch.vtensor<[10,9216,77,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,77],f16>
%3968 = torch.aten.mul.Tensor %3967, %0 : !torch.vtensor<[10,9216,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[10,9216,77],f16>
%3969 = torch.aten._softmax %3968, %int-1, %true : !torch.vtensor<[10,9216,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[10,9216,77],f32>
%3970 = torch.aten._to_copy %3969, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[10,9216,77],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[10,9216,77],f16>
%3971 = torch.aten.unsqueeze %3970, %int3 : !torch.vtensor<[10,9216,77],f16>, !torch.int -> !torch.vtensor<[10,9216,77,1],f16>
%3972 = torch.aten.permute %3971, %203 : !torch.vtensor<[10,9216,77,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,77],f16>
%3973 = torch.aten.unsqueeze %3955, %int3 : !torch.vtensor<[10,77,64],f16>, !torch.int -> !torch.vtensor<[10,77,64,1],f16>
%3974 = torch.aten.permute %3973, %211 : !torch.vtensor<[10,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,1,64,77],f16>
%3975 = torch.aten.permute %3972, %203 : !torch.vtensor<[10,9216,1,77],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,77,1],f16>
%3976 = torch.aten._reshape_alias %3975, %303, %313 : !torch.vtensor<[10,9216,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,9216,77],f16>
%3977 = torch.aten.permute %3974, %211 : !torch.vtensor<[10,1,64,77],f16>, !torch.list<int> -> !torch.vtensor<[10,77,64,1],f16>
%3978 = torch.aten._reshape_alias %3977, %283, %316 : !torch.vtensor<[10,77,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,77,64],f16>
%3979 = torch.aten.bmm %3976, %3978 : !torch.vtensor<[10,9216,77],f16>, !torch.vtensor<[10,77,64],f16> -> !torch.vtensor<[10,9216,64],f16>
%3980 = torch.aten.view %3979, %235 : !torch.vtensor<[10,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,64],f16>
%3981 = torch.aten.permute %3980, %203 : !torch.vtensor<[10,9216,1,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64,1],f16>
%3982 = torch.aten.view %3981, %192 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
%3983 = torch.aten._reshape_alias %3982, %239, %240 : !torch.vtensor<[10,9216,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
%3984 = torch.aten.permute %3983, %189 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
%3985 = torch.aten.clone %3984, %int0 : !torch.vtensor<[2,9216,5,64],f16>, !torch.int -> !torch.vtensor<[2,9216,5,64],f16>
%3986 = torch.aten._unsafe_view %3985, %153 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
%3987 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
%3988 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
%3989 = torch.aten.t %3988 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%3990 = torch.aten.view %3986, %161 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
%3991 = torch.aten.addmm %3987, %3990, %3989, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,320],f16>
%3992 = torch.aten.view %3991, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
%3993 = torch.aten.add.Tensor %3992, %3924, %int1 : !torch.vtensor<[2,9216,320],f16>, !torch.vtensor<[2,9216,320],f16>, !torch.int -> !torch.vtensor<[2,9216,320],f16>
%3994 = torch.aten._to_copy %3993, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f32>
%result0_228, %result1_229, %result2_230 = torch.aten.native_layer_norm %3994, %166, %48, %48, %float1.000000e-05 : !torch.vtensor<[2,9216,320],f32>, !torch.list<int>, !torch.vtensor<[320],f32>, !torch.vtensor<[320],f32>, !torch.float -> !torch.vtensor<[2,9216,320],f32>, !torch.vtensor<[2,9216,1],f32>, !torch.vtensor<[2,9216,1],f32>
%3995 = torch.aten._to_copy %44, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2560],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2560],f16>
%3996 = torch.aten._to_copy %45, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2560,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2560,320],f16>
%3997 = torch.aten._to_copy %result0_228, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
%3998 = torch.aten.t %3996 : !torch.vtensor<[2560,320],f16> -> !torch.vtensor<[320,2560],f16>
%3999 = torch.aten.view %3997, %161 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
%4000 = torch.aten.addmm %3995, %3999, %3998, %int1, %int1 : !torch.vtensor<[2560],f16>, !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,2560],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,2560],f16>
%4001 = torch.aten.view %4000, %340 : !torch.vtensor<[18432,2560],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,2560],f16>
%4002 = torch.aten.slice.Tensor %4001, %int-1, %int0, %int1280, %int1 : !torch.vtensor<[2,9216,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,9216,1280],f16>
%4003 = torch.aten.slice.Tensor %4001, %int-1, %int1280, %int2560, %int1 : !torch.vtensor<[2,9216,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,9216,1280],f16>
%4004 = torch.aten.gelu %4003, %str : !torch.vtensor<[2,9216,1280],f16>, !torch.str -> !torch.vtensor<[2,9216,1280],f16>
%4005 = torch.aten.mul.Tensor %4002, %4004 : !torch.vtensor<[2,9216,1280],f16>, !torch.vtensor<[2,9216,1280],f16> -> !torch.vtensor<[2,9216,1280],f16>
%4006 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
%4007 = torch.aten._to_copy %46, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,1280],f16>
%4008 = torch.aten.t %4007 : !torch.vtensor<[320,1280],f16> -> !torch.vtensor<[1280,320],f16>
%4009 = torch.aten.view %4005, %349 : !torch.vtensor<[2,9216,1280],f16>, !torch.list<int> -> !torch.vtensor<[18432,1280],f16>
%4010 = torch.aten.addmm %4006, %4009, %4008, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[18432,1280],f16>, !torch.vtensor<[1280,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,320],f16>
%4011 = torch.aten.view %4010, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
%4012 = torch.aten.add.Tensor %4011, %3993, %int1 : !torch.vtensor<[2,9216,320],f16>, !torch.vtensor<[2,9216,320],f16>, !torch.int -> !torch.vtensor<[2,9216,320],f16>
%4013 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
%4014 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
%4015 = torch.aten.t %4014 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%4016 = torch.aten.view %4012, %161 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
%4017 = torch.aten.addmm %4013, %4016, %4015, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,320],f16>
%4018 = torch.aten.view %4017, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
%4019 = torch.aten._reshape_alias %4018, %360, %361 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,96,96,320],f16>
%4020 = torch.aten.permute %4019, %206 : !torch.vtensor<[2,96,96,320],f16>, !torch.list<int> -> !torch.vtensor<[2,320,96,96],f16>
%4021 = torch.aten._reshape_alias %4020, %90, %364 : !torch.vtensor<[2,320,96,96],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,320,96,96],f16>
%4022 = torch.aten.clone %4021, %int0 : !torch.vtensor<[2,320,96,96],f16>, !torch.int -> !torch.vtensor<[2,320,96,96],f16>
%4023 = torch.aten.add.Tensor %4022, %3829, %int1 : !torch.vtensor<[2,320,96,96],f16>, !torch.vtensor<[2,320,96,96],f16>, !torch.int -> !torch.vtensor<[2,320,96,96],f16>
%4024 = torch.prim.ListConstruct %4023, %367 : (!torch.vtensor<[2,320,96,96],f16>, !torch.vtensor<[2,320,96,96],f16>) -> !torch.list<vtensor>
%4025 = torch.aten.cat %4024, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,640,96,96],f16>
%4026 = torch.aten._to_copy %4025, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,640,96,96],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,96,96],f32>
%4027 = torch.prim.ListConstruct %int2, %int32, %int20, %int9216 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%4028 = torch.prim.ListConstruct %int5898240, %int184320, %int9216, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%4029 = torch.aten._reshape_alias %4026, %4027, %4028 : !torch.vtensor<[2,640,96,96],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,20,9216],f32>
%result0_231, %result1_232 = torch.aten.var_mean.correction %4029, %85, %int0, %true : !torch.vtensor<[2,32,20,9216],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%4030 = torch.aten.add.Tensor %result0_231, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%4031 = torch.aten.rsqrt %4030 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%4032 = torch.aten.sub.Tensor %4029, %result1_232, %int1 : !torch.vtensor<[2,32,20,9216],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,9216],f32>
%4033 = torch.aten.mul.Tensor %4032, %4031 : !torch.vtensor<[2,32,20,9216],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,9216],f32>
%4034 = torch.prim.ListConstruct %int2, %int640, %int96, %int96 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%4035 = torch.aten.view %4033, %4034 : !torch.vtensor<[2,32,20,9216],f32>, !torch.list<int> -> !torch.vtensor<[2,640,96,96],f32>
%4036 = torch.aten.unsqueeze %39, %int0 : !torch.vtensor<[640],f32>, !torch.int -> !torch.vtensor<[1,640],f32>
%4037 = torch.aten.unsqueeze %4036, %int2 : !torch.vtensor<[1,640],f32>, !torch.int -> !torch.vtensor<[1,640,1],f32>
%4038 = torch.aten.unsqueeze %4037, %int3 : !torch.vtensor<[1,640,1],f32>, !torch.int -> !torch.vtensor<[1,640,1,1],f32>
%4039 = torch.aten.unsqueeze %39, %int0 : !torch.vtensor<[640],f32>, !torch.int -> !torch.vtensor<[1,640],f32>
%4040 = torch.aten.unsqueeze %4039, %int2 : !torch.vtensor<[1,640],f32>, !torch.int -> !torch.vtensor<[1,640,1],f32>
%4041 = torch.aten.unsqueeze %4040, %int3 : !torch.vtensor<[1,640,1],f32>, !torch.int -> !torch.vtensor<[1,640,1,1],f32>
%4042 = torch.aten.mul.Tensor %4035, %4041 : !torch.vtensor<[2,640,96,96],f32>, !torch.vtensor<[1,640,1,1],f32> -> !torch.vtensor<[2,640,96,96],f32>
%4043 = torch.aten.add.Tensor %4042, %4038, %int1 : !torch.vtensor<[2,640,96,96],f32>, !torch.vtensor<[1,640,1,1],f32>, !torch.int -> !torch.vtensor<[2,640,96,96],f32>
%4044 = torch.aten._to_copy %4043, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,640,96,96],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,96,96],f16>
%4045 = torch.aten.silu %4044 : !torch.vtensor<[2,640,96,96],f16> -> !torch.vtensor<[2,640,96,96],f16>
%4046 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
%4047 = torch.aten._to_copy %40, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,640,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,640,3,3],f16>
%4048 = torch.aten._convolution %4045, %4047, %4046, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,96,96],f16>, !torch.vtensor<[320,640,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,96,96],f16>
%4049 = torch.aten.silu %74 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%4050 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
%4051 = torch.aten._to_copy %46, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,1280],f16>
%4052 = torch.aten.t %4051 : !torch.vtensor<[320,1280],f16> -> !torch.vtensor<[1280,320],f16>
%4053 = torch.aten.addmm %4050, %4049, %4052, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
%4054 = torch.aten.unsqueeze %4053, %int2 : !torch.vtensor<[2,320],f16>, !torch.int -> !torch.vtensor<[2,320,1],f16>
%4055 = torch.aten.unsqueeze %4054, %int3 : !torch.vtensor<[2,320,1],f16>, !torch.int -> !torch.vtensor<[2,320,1,1],f16>
%4056 = torch.aten.add.Tensor %4048, %4055, %int1 : !torch.vtensor<[2,320,96,96],f16>, !torch.vtensor<[2,320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,96,96],f16>
%4057 = torch.aten._to_copy %4056, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,320,96,96],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,96,96],f32>
%4058 = torch.aten._reshape_alias %4057, %82, %83 : !torch.vtensor<[2,320,96,96],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,10,9216],f32>
%result0_233, %result1_234 = torch.aten.var_mean.correction %4058, %85, %int0, %true : !torch.vtensor<[2,32,10,9216],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%4059 = torch.aten.add.Tensor %result0_233, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%4060 = torch.aten.rsqrt %4059 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%4061 = torch.aten.sub.Tensor %4058, %result1_234, %int1 : !torch.vtensor<[2,32,10,9216],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,9216],f32>
%4062 = torch.aten.mul.Tensor %4061, %4060 : !torch.vtensor<[2,32,10,9216],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,9216],f32>
%4063 = torch.aten.view %4062, %90 : !torch.vtensor<[2,32,10,9216],f32>, !torch.list<int> -> !torch.vtensor<[2,320,96,96],f32>
%4064 = torch.aten.unsqueeze %48, %int0 : !torch.vtensor<[320],f32>, !torch.int -> !torch.vtensor<[1,320],f32>
%4065 = torch.aten.unsqueeze %4064, %int2 : !torch.vtensor<[1,320],f32>, !torch.int -> !torch.vtensor<[1,320,1],f32>
%4066 = torch.aten.unsqueeze %4065, %int3 : !torch.vtensor<[1,320,1],f32>, !torch.int -> !torch.vtensor<[1,320,1,1],f32>
%4067 = torch.aten.unsqueeze %48, %int0 : !torch.vtensor<[320],f32>, !torch.int -> !torch.vtensor<[1,320],f32>
%4068 = torch.aten.unsqueeze %4067, %int2 : !torch.vtensor<[1,320],f32>, !torch.int -> !torch.vtensor<[1,320,1],f32>
%4069 = torch.aten.unsqueeze %4068, %int3 : !torch.vtensor<[1,320,1],f32>, !torch.int -> !torch.vtensor<[1,320,1,1],f32>
%4070 = torch.aten.mul.Tensor %4063, %4069 : !torch.vtensor<[2,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f32> -> !torch.vtensor<[2,320,96,96],f32>
%4071 = torch.aten.add.Tensor %4070, %4066, %int1 : !torch.vtensor<[2,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f32>, !torch.int -> !torch.vtensor<[2,320,96,96],f32>
%4072 = torch.aten._to_copy %4071, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,320,96,96],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,96,96],f16>
%4073 = torch.aten.silu %4072 : !torch.vtensor<[2,320,96,96],f16> -> !torch.vtensor<[2,320,96,96],f16>
%4074 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
%4075 = torch.aten._to_copy %41, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320,3,3],f16>
%4076 = torch.aten._convolution %4073, %4075, %4074, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,96,96],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,96,96],f16>
%4077 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
%4078 = torch.aten._to_copy %42, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,640,1,1],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,640,1,1],f16>
%4079 = torch.aten._convolution %4025, %4078, %4077, %78, %79, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,96,96],f16>, !torch.vtensor<[320,640,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,96,96],f16>
%4080 = torch.aten.add.Tensor %4079, %4076, %int1 : !torch.vtensor<[2,320,96,96],f16>, !torch.vtensor<[2,320,96,96],f16>, !torch.int -> !torch.vtensor<[2,320,96,96],f16>
%4081 = torch.aten._to_copy %4080, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,320,96,96],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,96,96],f32>
%4082 = torch.aten._reshape_alias %4081, %82, %83 : !torch.vtensor<[2,320,96,96],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,10,9216],f32>
%result0_235, %result1_236 = torch.aten.var_mean.correction %4082, %85, %int0, %true : !torch.vtensor<[2,32,10,9216],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%4083 = torch.aten.add.Tensor %result0_235, %1, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%4084 = torch.aten.rsqrt %4083 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%4085 = torch.aten.sub.Tensor %4082, %result1_236, %int1 : !torch.vtensor<[2,32,10,9216],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,9216],f32>
%4086 = torch.aten.mul.Tensor %4085, %4084 : !torch.vtensor<[2,32,10,9216],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,9216],f32>
%4087 = torch.aten.view %4086, %90 : !torch.vtensor<[2,32,10,9216],f32>, !torch.list<int> -> !torch.vtensor<[2,320,96,96],f32>
%4088 = torch.aten.unsqueeze %48, %int0 : !torch.vtensor<[320],f32>, !torch.int -> !torch.vtensor<[1,320],f32>
%4089 = torch.aten.unsqueeze %4088, %int2 : !torch.vtensor<[1,320],f32>, !torch.int -> !torch.vtensor<[1,320,1],f32>
%4090 = torch.aten.unsqueeze %4089, %int3 : !torch.vtensor<[1,320,1],f32>, !torch.int -> !torch.vtensor<[1,320,1,1],f32>
%4091 = torch.aten.unsqueeze %48, %int0 : !torch.vtensor<[320],f32>, !torch.int -> !torch.vtensor<[1,320],f32>
%4092 = torch.aten.unsqueeze %4091, %int2 : !torch.vtensor<[1,320],f32>, !torch.int -> !torch.vtensor<[1,320,1],f32>
%4093 = torch.aten.unsqueeze %4092, %int3 : !torch.vtensor<[1,320,1],f32>, !torch.int -> !torch.vtensor<[1,320,1,1],f32>
%4094 = torch.aten.mul.Tensor %4087, %4093 : !torch.vtensor<[2,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f32> -> !torch.vtensor<[2,320,96,96],f32>
%4095 = torch.aten.add.Tensor %4094, %4090, %int1 : !torch.vtensor<[2,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f32>, !torch.int -> !torch.vtensor<[2,320,96,96],f32>
%4096 = torch.aten._reshape_alias %4095, %90, %149 : !torch.vtensor<[2,320,96,96],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,320,96,96],f32>
%4097 = torch.aten.permute %4096, %151 : !torch.vtensor<[2,320,96,96],f32>, !torch.list<int> -> !torch.vtensor<[2,96,96,320],f32>
%4098 = torch.aten._reshape_alias %4097, %153, %154 : !torch.vtensor<[2,96,96,320],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f32>
%4099 = torch.aten.clone %4098, %int0 : !torch.vtensor<[2,9216,320],f32>, !torch.int -> !torch.vtensor<[2,9216,320],f32>
%4100 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
%4101 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
%4102 = torch.aten._to_copy %4099, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
%4103 = torch.aten.t %4101 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%4104 = torch.aten.view %4102, %161 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
%4105 = torch.aten.addmm %4100, %4104, %4103, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,320],f16>
%4106 = torch.aten.view %4105, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
%4107 = torch.aten._to_copy %4106, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f32>
%result0_237, %result1_238, %result2_239 = torch.aten.native_layer_norm %4107, %166, %48, %48, %float1.000000e-05 : !torch.vtensor<[2,9216,320],f32>, !torch.list<int>, !torch.vtensor<[320],f32>, !torch.vtensor<[320],f32>, !torch.float -> !torch.vtensor<[2,9216,320],f32>, !torch.vtensor<[2,9216,1],f32>, !torch.vtensor<[2,9216,1],f32>
%4108 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
%4109 = torch.aten._to_copy %result0_237, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
%4110 = torch.aten.t %4108 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%4111 = torch.aten._reshape_alias %4109, %161, %170 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
%4112 = torch.aten.mm %4111, %4110 : !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[18432,320],f16>
%4113 = torch.aten._unsafe_view %4112, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
%4114 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
%4115 = torch.aten._to_copy %result0_237, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
%4116 = torch.aten.t %4114 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%4117 = torch.aten._reshape_alias %4115, %161, %170 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
%4118 = torch.aten.mm %4117, %4116 : !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[18432,320],f16>
%4119 = torch.aten._unsafe_view %4118, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
%4120 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
%4121 = torch.aten._to_copy %result0_237, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
%4122 = torch.aten.t %4120 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%4123 = torch.aten._reshape_alias %4121, %161, %170 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
%4124 = torch.aten.mm %4123, %4122 : !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[18432,320],f16>
%4125 = torch.aten._unsafe_view %4124, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
%4126 = torch.aten._reshape_alias %4113, %186, %187 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
%4127 = torch.aten.permute %4126, %189 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
%4128 = torch.aten.clone %4127, %int0 : !torch.vtensor<[2,5,9216,64],f16>, !torch.int -> !torch.vtensor<[2,5,9216,64],f16>
%4129 = torch.aten._unsafe_view %4128, %192 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
%4130 = torch.aten._reshape_alias %4119, %186, %187 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
%4131 = torch.aten.permute %4130, %189 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
%4132 = torch.aten.clone %4131, %int0 : !torch.vtensor<[2,5,9216,64],f16>, !torch.int -> !torch.vtensor<[2,5,9216,64],f16>
%4133 = torch.aten._unsafe_view %4132, %192 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
%4134 = torch.aten._reshape_alias %4125, %186, %187 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
%4135 = torch.aten.permute %4134, %189 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
%4136 = torch.aten.clone %4135, %int0 : !torch.vtensor<[2,5,9216,64],f16>, !torch.int -> !torch.vtensor<[2,5,9216,64],f16>
%4137 = torch.aten._unsafe_view %4136, %192 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
%4138 = torch.aten.unsqueeze %4129, %int3 : !torch.vtensor<[10,9216,64],f16>, !torch.int -> !torch.vtensor<[10,9216,64,1],f16>
%4139 = torch.aten.permute %4138, %203 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,64],f16>
%4140 = torch.aten.unsqueeze %4133, %int3 : !torch.vtensor<[10,9216,64],f16>, !torch.int -> !torch.vtensor<[10,9216,64,1],f16>
%4141 = torch.aten.permute %4140, %206 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,1,9216,64],f16>
%4142 = torch.aten.permute %4139, %203 : !torch.vtensor<[10,9216,1,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64,1],f16>
%4143 = torch.aten._reshape_alias %4142, %192, %209 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
%4144 = torch.aten.permute %4141, %211 : !torch.vtensor<[10,1,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,64,9216,1],f16>
%4145 = torch.aten._reshape_alias %4144, %213, %214 : !torch.vtensor<[10,64,9216,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,64,9216],f16>
%4146 = torch.aten.bmm %4143, %4145 : !torch.vtensor<[10,9216,64],f16>, !torch.vtensor<[10,64,9216],f16> -> !torch.vtensor<[10,9216,9216],f16>
%4147 = torch.aten.view %4146, %217 : !torch.vtensor<[10,9216,9216],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,9216],f16>
%4148 = torch.aten.permute %4147, %203 : !torch.vtensor<[10,9216,1,9216],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,9216,1],f16>
%4149 = torch.aten.view %4148, %220 : !torch.vtensor<[10,9216,9216,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,9216],f16>
%4150 = torch.aten.mul.Tensor %4149, %0 : !torch.vtensor<[10,9216,9216],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[10,9216,9216],f16>
%4151 = torch.aten._softmax %4150, %int-1, %true : !torch.vtensor<[10,9216,9216],f16>, !torch.int, !torch.bool -> !torch.vtensor<[10,9216,9216],f32>
%4152 = torch.aten._to_copy %4151, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[10,9216,9216],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[10,9216,9216],f16>
%4153 = torch.aten.unsqueeze %4152, %int3 : !torch.vtensor<[10,9216,9216],f16>, !torch.int -> !torch.vtensor<[10,9216,9216,1],f16>
%4154 = torch.aten.permute %4153, %203 : !torch.vtensor<[10,9216,9216,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,9216],f16>
%4155 = torch.aten.unsqueeze %4137, %int3 : !torch.vtensor<[10,9216,64],f16>, !torch.int -> !torch.vtensor<[10,9216,64,1],f16>
%4156 = torch.aten.permute %4155, %211 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,1,64,9216],f16>
%4157 = torch.aten.permute %4154, %203 : !torch.vtensor<[10,9216,1,9216],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,9216,1],f16>
%4158 = torch.aten._reshape_alias %4157, %220, %230 : !torch.vtensor<[10,9216,9216,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,9216,9216],f16>
%4159 = torch.aten.permute %4156, %211 : !torch.vtensor<[10,1,64,9216],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64,1],f16>
%4160 = torch.aten._reshape_alias %4159, %192, %209 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
%4161 = torch.aten.bmm %4158, %4160 : !torch.vtensor<[10,9216,9216],f16>, !torch.vtensor<[10,9216,64],f16> -> !torch.vtensor<[10,9216,64],f16>
%4162 = torch.aten.view %4161, %235 : !torch.vtensor<[10,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,64],f16>
%4163 = torch.aten.permute %4162, %203 : !torch.vtensor<[10,9216,1,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64,1],f16>
%4164 = torch.aten.view %4163, %192 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
%4165 = torch.aten._reshape_alias %4164, %239, %240 : !torch.vtensor<[10,9216,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
%4166 = torch.aten.permute %4165, %189 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
%4167 = torch.aten.clone %4166, %int0 : !torch.vtensor<[2,9216,5,64],f16>, !torch.int -> !torch.vtensor<[2,9216,5,64],f16>
%4168 = torch.aten._unsafe_view %4167, %153 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
%4169 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
%4170 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
%4171 = torch.aten.t %4170 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%4172 = torch.aten.view %4168, %161 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
%4173 = torch.aten.addmm %4169, %4172, %4171, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,320],f16>
%4174 = torch.aten.view %4173, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
%4175 = torch.aten.add.Tensor %4174, %4106, %int1 : !torch.vtensor<[2,9216,320],f16>, !torch.vtensor<[2,9216,320],f16>, !torch.int -> !torch.vtensor<[2,9216,320],f16>
%4176 = torch.aten._to_copy %4175, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f32>
%result0_240, %result1_241, %result2_242 = torch.aten.native_layer_norm %4176, %166, %48, %48, %float1.000000e-05 : !torch.vtensor<[2,9216,320],f32>, !torch.list<int>, !torch.vtensor<[320],f32>, !torch.vtensor<[320],f32>, !torch.float -> !torch.vtensor<[2,9216,320],f32>, !torch.vtensor<[2,9216,1],f32>, !torch.vtensor<[2,9216,1],f32>
%4177 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
%4178 = torch.aten._to_copy %result0_240, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
%4179 = torch.aten.t %4177 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%4180 = torch.aten._reshape_alias %4178, %161, %170 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
%4181 = torch.aten.mm %4180, %4179 : !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[18432,320],f16>
%4182 = torch.aten._unsafe_view %4181, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
%4183 = torch.aten._to_copy %43, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,1024],f16>
%4184 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
%4185 = torch.aten.t %4183 : !torch.vtensor<[320,1024],f16> -> !torch.vtensor<[1024,320],f16>
%4186 = torch.aten._reshape_alias %4184, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
%4187 = torch.aten.mm %4186, %4185 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,320],f16> -> !torch.vtensor<[154,320],f16>
%4188 = torch.aten._unsafe_view %4187, %266 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
%4189 = torch.aten._to_copy %43, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,1024],f16>
%4190 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
%4191 = torch.aten.t %4189 : !torch.vtensor<[320,1024],f16> -> !torch.vtensor<[1024,320],f16>
%4192 = torch.aten._reshape_alias %4190, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
%4193 = torch.aten.mm %4192, %4191 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,320],f16> -> !torch.vtensor<[154,320],f16>
%4194 = torch.aten._unsafe_view %4193, %266 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
%4195 = torch.aten._reshape_alias %4182, %186, %187 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
%4196 = torch.aten.permute %4195, %189 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
%4197 = torch.aten.clone %4196, %int0 : !torch.vtensor<[2,5,9216,64],f16>, !torch.int -> !torch.vtensor<[2,5,9216,64],f16>
%4198 = torch.aten._unsafe_view %4197, %192 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
%4199 = torch.aten._reshape_alias %4188, %278, %279 : !torch.vtensor<[2,77,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,5,64],f16>
%4200 = torch.aten.permute %4199, %189 : !torch.vtensor<[2,77,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,77,64],f16>
%4201 = torch.aten.clone %4200, %int0 : !torch.vtensor<[2,5,77,64],f16>, !torch.int -> !torch.vtensor<[2,5,77,64],f16>
%4202 = torch.aten._unsafe_view %4201, %283 : !torch.vtensor<[2,5,77,64],f16>, !torch.list<int> -> !torch.vtensor<[10,77,64],f16>
%4203 = torch.aten._reshape_alias %4194, %278, %279 : !torch.vtensor<[2,77,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,5,64],f16>
%4204 = torch.aten.permute %4203, %189 : !torch.vtensor<[2,77,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,77,64],f16>
%4205 = torch.aten.clone %4204, %int0 : !torch.vtensor<[2,5,77,64],f16>, !torch.int -> !torch.vtensor<[2,5,77,64],f16>
%4206 = torch.aten._unsafe_view %4205, %283 : !torch.vtensor<[2,5,77,64],f16>, !torch.list<int> -> !torch.vtensor<[10,77,64],f16>
%4207 = torch.aten.unsqueeze %4198, %int3 : !torch.vtensor<[10,9216,64],f16>, !torch.int -> !torch.vtensor<[10,9216,64,1],f16>
%4208 = torch.aten.permute %4207, %203 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,64],f16>
%4209 = torch.aten.unsqueeze %4202, %int3 : !torch.vtensor<[10,77,64],f16>, !torch.int -> !torch.vtensor<[10,77,64,1],f16>
%4210 = torch.aten.permute %4209, %206 : !torch.vtensor<[10,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,1,77,64],f16>
%4211 = torch.aten.permute %4208, %203 : !torch.vtensor<[10,9216,1,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64,1],f16>
%4212 = torch.aten._reshape_alias %4211, %192, %209 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
%4213 = torch.aten.permute %4210, %211 : !torch.vtensor<[10,1,77,64],f16>, !torch.list<int> -> !torch.vtensor<[10,64,77,1],f16>
%4214 = torch.aten._reshape_alias %4213, %296, %297 : !torch.vtensor<[10,64,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,64,77],f16>
%4215 = torch.aten.bmm %4212, %4214 : !torch.vtensor<[10,9216,64],f16>, !torch.vtensor<[10,64,77],f16> -> !torch.vtensor<[10,9216,77],f16>
%4216 = torch.aten.view %4215, %300 : !torch.vtensor<[10,9216,77],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,77],f16>
%4217 = torch.aten.permute %4216, %203 : !torch.vtensor<[10,9216,1,77],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,77,1],f16>
%4218 = torch.aten.view %4217, %303 : !torch.vtensor<[10,9216,77,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,77],f16>
%4219 = torch.aten.mul.Tensor %4218, %0 : !torch.vtensor<[10,9216,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[10,9216,77],f16>
%4220 = torch.aten._softmax %4219, %int-1, %true : !torch.vtensor<[10,9216,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[10,9216,77],f32>
%4221 = torch.aten._to_copy %4220, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[10,9216,77],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[10,9216,77],f16>
%4222 = torch.aten.unsqueeze %4221, %int3 : !torch.vtensor<[10,9216,77],f16>, !torch.int -> !torch.vtensor<[10,9216,77,1],f16>
%4223 = torch.aten.permute %4222, %203 : !torch.vtensor<[10,9216,77,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,77],f16>
%4224 = torch.aten.unsqueeze %4206, %int3 : !torch.vtensor<[10,77,64],f16>, !torch.int -> !torch.vtensor<[10,77,64,1],f16>
%4225 = torch.aten.permute %4224, %211 : !torch.vtensor<[10,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,1,64,77],f16>
%4226 = torch.aten.permute %4223, %203 : !torch.vtensor<[10,9216,1,77],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,77,1],f16>
%4227 = torch.aten._reshape_alias %4226, %303, %313 : !torch.vtensor<[10,9216,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,9216,77],f16>
%4228 = torch.aten.permute %4225, %211 : !torch.vtensor<[10,1,64,77],f16>, !torch.list<int> -> !torch.vtensor<[10,77,64,1],f16>
%4229 = torch.aten._reshape_alias %4228, %283, %316 : !torch.vtensor<[10,77,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,77,64],f16>
%4230 = torch.aten.bmm %4227, %4229 : !torch.vtensor<[10,9216,77],f16>, !torch.vtensor<[10,77,64],f16> -> !torch.vtensor<[10,9216,64],f16>
%4231 = torch.aten.view %4230, %235 : !torch.vtensor<[10,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,64],f16>
%4232 = torch.aten.permute %4231, %203 : !torch.vtensor<[10,9216,1,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64,1],f16>
%4233 = torch.aten.view %4232, %192 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
%4234 = torch.aten._reshape_alias %4233, %239, %240 : !torch.vtensor<[10,9216,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
%4235 = torch.aten.permute %4234, %189 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
%4236 = torch.aten.clone %4235, %int0 : !torch.vtensor<[2,9216,5,64],f16>, !torch.int -> !torch.vtensor<[2,9216,5,64],f16>
%4237 = torch.aten._unsafe_view %4236, %153 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
%4238 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
%4239 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
%4240 = torch.aten.t %4239 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%4241 = torch.aten.view %4237, %161 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
%4242 = torch.aten.addmm %4238, %4241, %4240, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,320],f16>
%4243 = torch.aten.view %4242, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
%4244 = torch.aten.add.Tensor %4243, %4175, %int1 : !torch.vtensor<[2,9216,320],f16>, !torch.vtensor<[2,9216,320],f16>, !torch.int -> !torch.vtensor<[2,9216,320],f16>
%4245 = torch.aten._to_copy %4244, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f32>
%result0_243, %result1_244, %result2_245 = torch.aten.native_layer_norm %4245, %166, %48, %48, %float1.000000e-05 : !torch.vtensor<[2,9216,320],f32>, !torch.list<int>, !torch.vtensor<[320],f32>, !torch.vtensor<[320],f32>, !torch.float -> !torch.vtensor<[2,9216,320],f32>, !torch.vtensor<[2,9216,1],f32>, !torch.vtensor<[2,9216,1],f32>
%4246 = torch.aten._to_copy %44, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2560],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2560],f16>
%4247 = torch.aten._to_copy %45, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2560,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2560,320],f16>
%4248 = torch.aten._to_copy %result0_243, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
%4249 = torch.aten.t %4247 : !torch.vtensor<[2560,320],f16> -> !torch.vtensor<[320,2560],f16>
%4250 = torch.aten.view %4248, %161 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
%4251 = torch.aten.addmm %4246, %4250, %4249, %int1, %int1 : !torch.vtensor<[2560],f16>, !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,2560],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,2560],f16>
%4252 = torch.aten.view %4251, %340 : !torch.vtensor<[18432,2560],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,2560],f16>
%4253 = torch.aten.slice.Tensor %4252, %int-1, %int0, %int1280, %int1 : !torch.vtensor<[2,9216,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,9216,1280],f16>
%4254 = torch.aten.slice.Tensor %4252, %int-1, %int1280, %int2560, %int1 : !torch.vtensor<[2,9216,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,9216,1280],f16>
%4255 = torch.aten.gelu %4254, %str : !torch.vtensor<[2,9216,1280],f16>, !torch.str -> !torch.vtensor<[2,9216,1280],f16>
%4256 = torch.aten.mul.Tensor %4253, %4255 : !torch.vtensor<[2,9216,1280],f16>, !torch.vtensor<[2,9216,1280],f16> -> !torch.vtensor<[2,9216,1280],f16>
%4257 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
%4258 = torch.aten._to_copy %46, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,1280],f16>
%4259 = torch.aten.t %4258 : !torch.vtensor<[320,1280],f16> -> !torch.vtensor<[1280,320],f16>
%4260 = torch.aten.view %4256, %349 : !torch.vtensor<[2,9216,1280],f16>, !torch.list<int> -> !torch.vtensor<[18432,1280],f16>
%4261 = torch.aten.addmm %4257, %4260, %4259, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[18432,1280],f16>, !torch.vtensor<[1280,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,320],f16>
%4262 = torch.aten.view %4261, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
%4263 = torch.aten.add.Tensor %4262, %4244, %int1 : !torch.vtensor<[2,9216,320],f16>, !torch.vtensor<[2,9216,320],f16>, !torch.int -> !torch.vtensor<[2,9216,320],f16>
%4264 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
%4265 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
%4266 = torch.aten.t %4265 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%4267 = torch.aten.view %4263, %161 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
%4268 = torch.aten.addmm %4264, %4267, %4266, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,320],f16>
%4269 = torch.aten.view %4268, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
%4270 = torch.aten._reshape_alias %4269, %360, %361 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,96,96,320],f16>
%4271 = torch.aten.permute %4270, %206 : !torch.vtensor<[2,96,96,320],f16>, !torch.list<int> -> !torch.vtensor<[2,320,96,96],f16>
%4272 = torch.aten._reshape_alias %4271, %90, %364 : !torch.vtensor<[2,320,96,96],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,320,96,96],f16>
%4273 = torch.aten.clone %4272, %int0 : !torch.vtensor<[2,320,96,96],f16>, !torch.int -> !torch.vtensor<[2,320,96,96],f16>
%4274 = torch.aten.add.Tensor %4273, %4080, %int1 : !torch.vtensor<[2,320,96,96],f16>, !torch.vtensor<[2,320,96,96],f16>, !torch.int -> !torch.vtensor<[2,320,96,96],f16>
%4275 = torch.prim.ListConstruct %4274, %80 : (!torch.vtensor<[2,320,96,96],f16>, !torch.vtensor<[2,320,96,96],f16>) -> !torch.list<vtensor>
%4276 = torch.aten.cat %4275, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,640,96,96],f16>
%4277 = torch.aten._to_copy %4276, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,640,96,96],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,96,96],f32>
%4278 = torch.aten._reshape_alias %4277, %4027, %4028 : !torch.vtensor<[2,640,96,96],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,20,9216],f32>
%result0_246, %result1_247 = torch.aten.var_mean.correction %4278, %85, %int0, %true : !torch.vtensor<[2,32,20,9216],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%4279 = torch.aten.add.Tensor %result0_246, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%4280 = torch.aten.rsqrt %4279 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%4281 = torch.aten.sub.Tensor %4278, %result1_247, %int1 : !torch.vtensor<[2,32,20,9216],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,9216],f32>
%4282 = torch.aten.mul.Tensor %4281, %4280 : !torch.vtensor<[2,32,20,9216],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,9216],f32>
%4283 = torch.aten.view %4282, %4034 : !torch.vtensor<[2,32,20,9216],f32>, !torch.list<int> -> !torch.vtensor<[2,640,96,96],f32>
%4284 = torch.aten.unsqueeze %39, %int0 : !torch.vtensor<[640],f32>, !torch.int -> !torch.vtensor<[1,640],f32>
%4285 = torch.aten.unsqueeze %4284, %int2 : !torch.vtensor<[1,640],f32>, !torch.int -> !torch.vtensor<[1,640,1],f32>
%4286 = torch.aten.unsqueeze %4285, %int3 : !torch.vtensor<[1,640,1],f32>, !torch.int -> !torch.vtensor<[1,640,1,1],f32>
%4287 = torch.aten.unsqueeze %39, %int0 : !torch.vtensor<[640],f32>, !torch.int -> !torch.vtensor<[1,640],f32>
%4288 = torch.aten.unsqueeze %4287, %int2 : !torch.vtensor<[1,640],f32>, !torch.int -> !torch.vtensor<[1,640,1],f32>
%4289 = torch.aten.unsqueeze %4288, %int3 : !torch.vtensor<[1,640,1],f32>, !torch.int -> !torch.vtensor<[1,640,1,1],f32>
%4290 = torch.aten.mul.Tensor %4283, %4289 : !torch.vtensor<[2,640,96,96],f32>, !torch.vtensor<[1,640,1,1],f32> -> !torch.vtensor<[2,640,96,96],f32>
%4291 = torch.aten.add.Tensor %4290, %4286, %int1 : !torch.vtensor<[2,640,96,96],f32>, !torch.vtensor<[1,640,1,1],f32>, !torch.int -> !torch.vtensor<[2,640,96,96],f32>
%4292 = torch.aten._to_copy %4291, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,640,96,96],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,96,96],f16>
%4293 = torch.aten.silu %4292 : !torch.vtensor<[2,640,96,96],f16> -> !torch.vtensor<[2,640,96,96],f16>
%4294 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
%4295 = torch.aten._to_copy %40, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,640,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,640,3,3],f16>
%4296 = torch.aten._convolution %4293, %4295, %4294, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,96,96],f16>, !torch.vtensor<[320,640,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,96,96],f16>
%4297 = torch.aten.silu %74 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
%4298 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
%4299 = torch.aten._to_copy %46, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,1280],f16>
%4300 = torch.aten.t %4299 : !torch.vtensor<[320,1280],f16> -> !torch.vtensor<[1280,320],f16>
%4301 = torch.aten.addmm %4298, %4297, %4300, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
%4302 = torch.aten.unsqueeze %4301, %int2 : !torch.vtensor<[2,320],f16>, !torch.int -> !torch.vtensor<[2,320,1],f16>
%4303 = torch.aten.unsqueeze %4302, %int3 : !torch.vtensor<[2,320,1],f16>, !torch.int -> !torch.vtensor<[2,320,1,1],f16>
%4304 = torch.aten.add.Tensor %4296, %4303, %int1 : !torch.vtensor<[2,320,96,96],f16>, !torch.vtensor<[2,320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,96,96],f16>
%4305 = torch.aten._to_copy %4304, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,320,96,96],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,96,96],f32>
%4306 = torch.aten._reshape_alias %4305, %82, %83 : !torch.vtensor<[2,320,96,96],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,10,9216],f32>
%result0_248, %result1_249 = torch.aten.var_mean.correction %4306, %85, %int0, %true : !torch.vtensor<[2,32,10,9216],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%4307 = torch.aten.add.Tensor %result0_248, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%4308 = torch.aten.rsqrt %4307 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%4309 = torch.aten.sub.Tensor %4306, %result1_249, %int1 : !torch.vtensor<[2,32,10,9216],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,9216],f32>
%4310 = torch.aten.mul.Tensor %4309, %4308 : !torch.vtensor<[2,32,10,9216],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,9216],f32>
%4311 = torch.aten.view %4310, %90 : !torch.vtensor<[2,32,10,9216],f32>, !torch.list<int> -> !torch.vtensor<[2,320,96,96],f32>
%4312 = torch.aten.unsqueeze %48, %int0 : !torch.vtensor<[320],f32>, !torch.int -> !torch.vtensor<[1,320],f32>
%4313 = torch.aten.unsqueeze %4312, %int2 : !torch.vtensor<[1,320],f32>, !torch.int -> !torch.vtensor<[1,320,1],f32>
%4314 = torch.aten.unsqueeze %4313, %int3 : !torch.vtensor<[1,320,1],f32>, !torch.int -> !torch.vtensor<[1,320,1,1],f32>
%4315 = torch.aten.unsqueeze %48, %int0 : !torch.vtensor<[320],f32>, !torch.int -> !torch.vtensor<[1,320],f32>
%4316 = torch.aten.unsqueeze %4315, %int2 : !torch.vtensor<[1,320],f32>, !torch.int -> !torch.vtensor<[1,320,1],f32>
%4317 = torch.aten.unsqueeze %4316, %int3 : !torch.vtensor<[1,320,1],f32>, !torch.int -> !torch.vtensor<[1,320,1,1],f32>
%4318 = torch.aten.mul.Tensor %4311, %4317 : !torch.vtensor<[2,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f32> -> !torch.vtensor<[2,320,96,96],f32>
%4319 = torch.aten.add.Tensor %4318, %4314, %int1 : !torch.vtensor<[2,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f32>, !torch.int -> !torch.vtensor<[2,320,96,96],f32>
%4320 = torch.aten._to_copy %4319, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,320,96,96],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,96,96],f16>
%4321 = torch.aten.silu %4320 : !torch.vtensor<[2,320,96,96],f16> -> !torch.vtensor<[2,320,96,96],f16>
%4322 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
%4323 = torch.aten._to_copy %41, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320,3,3],f16>
%4324 = torch.aten._convolution %4321, %4323, %4322, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,96,96],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,96,96],f16>
%4325 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
%4326 = torch.aten._to_copy %42, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,640,1,1],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,640,1,1],f16>
%4327 = torch.aten._convolution %4276, %4326, %4325, %78, %79, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,96,96],f16>, !torch.vtensor<[320,640,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,96,96],f16>
%4328 = torch.aten.add.Tensor %4327, %4324, %int1 : !torch.vtensor<[2,320,96,96],f16>, !torch.vtensor<[2,320,96,96],f16>, !torch.int -> !torch.vtensor<[2,320,96,96],f16>
%4329 = torch.aten._to_copy %4328, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,320,96,96],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,96,96],f32>
%4330 = torch.aten._reshape_alias %4329, %82, %83 : !torch.vtensor<[2,320,96,96],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,10,9216],f32>
%result0_250, %result1_251 = torch.aten.var_mean.correction %4330, %85, %int0, %true : !torch.vtensor<[2,32,10,9216],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%4331 = torch.aten.add.Tensor %result0_250, %1, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%4332 = torch.aten.rsqrt %4331 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%4333 = torch.aten.sub.Tensor %4330, %result1_251, %int1 : !torch.vtensor<[2,32,10,9216],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,9216],f32>
%4334 = torch.aten.mul.Tensor %4333, %4332 : !torch.vtensor<[2,32,10,9216],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,9216],f32>
%4335 = torch.aten.view %4334, %90 : !torch.vtensor<[2,32,10,9216],f32>, !torch.list<int> -> !torch.vtensor<[2,320,96,96],f32>
%4336 = torch.aten.unsqueeze %48, %int0 : !torch.vtensor<[320],f32>, !torch.int -> !torch.vtensor<[1,320],f32>
%4337 = torch.aten.unsqueeze %4336, %int2 : !torch.vtensor<[1,320],f32>, !torch.int -> !torch.vtensor<[1,320,1],f32>
%4338 = torch.aten.unsqueeze %4337, %int3 : !torch.vtensor<[1,320,1],f32>, !torch.int -> !torch.vtensor<[1,320,1,1],f32>
%4339 = torch.aten.unsqueeze %48, %int0 : !torch.vtensor<[320],f32>, !torch.int -> !torch.vtensor<[1,320],f32>
%4340 = torch.aten.unsqueeze %4339, %int2 : !torch.vtensor<[1,320],f32>, !torch.int -> !torch.vtensor<[1,320,1],f32>
%4341 = torch.aten.unsqueeze %4340, %int3 : !torch.vtensor<[1,320,1],f32>, !torch.int -> !torch.vtensor<[1,320,1,1],f32>
%4342 = torch.aten.mul.Tensor %4335, %4341 : !torch.vtensor<[2,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f32> -> !torch.vtensor<[2,320,96,96],f32>
%4343 = torch.aten.add.Tensor %4342, %4338, %int1 : !torch.vtensor<[2,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f32>, !torch.int -> !torch.vtensor<[2,320,96,96],f32>
%4344 = torch.aten._reshape_alias %4343, %90, %149 : !torch.vtensor<[2,320,96,96],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,320,96,96],f32>
%4345 = torch.aten.permute %4344, %151 : !torch.vtensor<[2,320,96,96],f32>, !torch.list<int> -> !torch.vtensor<[2,96,96,320],f32>
%4346 = torch.aten._reshape_alias %4345, %153, %154 : !torch.vtensor<[2,96,96,320],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f32>
%4347 = torch.aten.clone %4346, %int0 : !torch.vtensor<[2,9216,320],f32>, !torch.int -> !torch.vtensor<[2,9216,320],f32>
%4348 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
%4349 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
%4350 = torch.aten._to_copy %4347, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
%4351 = torch.aten.t %4349 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%4352 = torch.aten.view %4350, %161 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
%4353 = torch.aten.addmm %4348, %4352, %4351, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,320],f16>
%4354 = torch.aten.view %4353, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
%4355 = torch.aten._to_copy %4354, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f32>
%result0_252, %result1_253, %result2_254 = torch.aten.native_layer_norm %4355, %166, %48, %48, %float1.000000e-05 : !torch.vtensor<[2,9216,320],f32>, !torch.list<int>, !torch.vtensor<[320],f32>, !torch.vtensor<[320],f32>, !torch.float -> !torch.vtensor<[2,9216,320],f32>, !torch.vtensor<[2,9216,1],f32>, !torch.vtensor<[2,9216,1],f32>
%4356 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
%4357 = torch.aten._to_copy %result0_252, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
%4358 = torch.aten.t %4356 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%4359 = torch.aten._reshape_alias %4357, %161, %170 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
%4360 = torch.aten.mm %4359, %4358 : !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[18432,320],f16>
%4361 = torch.aten._unsafe_view %4360, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
%4362 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
%4363 = torch.aten._to_copy %result0_252, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
%4364 = torch.aten.t %4362 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%4365 = torch.aten._reshape_alias %4363, %161, %170 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
%4366 = torch.aten.mm %4365, %4364 : !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[18432,320],f16>
%4367 = torch.aten._unsafe_view %4366, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
%4368 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
%4369 = torch.aten._to_copy %result0_252, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
%4370 = torch.aten.t %4368 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%4371 = torch.aten._reshape_alias %4369, %161, %170 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
%4372 = torch.aten.mm %4371, %4370 : !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[18432,320],f16>
%4373 = torch.aten._unsafe_view %4372, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
%4374 = torch.aten._reshape_alias %4361, %186, %187 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
%4375 = torch.aten.permute %4374, %189 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
%4376 = torch.aten.clone %4375, %int0 : !torch.vtensor<[2,5,9216,64],f16>, !torch.int -> !torch.vtensor<[2,5,9216,64],f16>
%4377 = torch.aten._unsafe_view %4376, %192 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
%4378 = torch.aten._reshape_alias %4367, %186, %187 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
%4379 = torch.aten.permute %4378, %189 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
%4380 = torch.aten.clone %4379, %int0 : !torch.vtensor<[2,5,9216,64],f16>, !torch.int -> !torch.vtensor<[2,5,9216,64],f16>
%4381 = torch.aten._unsafe_view %4380, %192 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
%4382 = torch.aten._reshape_alias %4373, %186, %187 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
%4383 = torch.aten.permute %4382, %189 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
%4384 = torch.aten.clone %4383, %int0 : !torch.vtensor<[2,5,9216,64],f16>, !torch.int -> !torch.vtensor<[2,5,9216,64],f16>
%4385 = torch.aten._unsafe_view %4384, %192 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
%4386 = torch.aten.unsqueeze %4377, %int3 : !torch.vtensor<[10,9216,64],f16>, !torch.int -> !torch.vtensor<[10,9216,64,1],f16>
%4387 = torch.aten.permute %4386, %203 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,64],f16>
%4388 = torch.aten.unsqueeze %4381, %int3 : !torch.vtensor<[10,9216,64],f16>, !torch.int -> !torch.vtensor<[10,9216,64,1],f16>
%4389 = torch.aten.permute %4388, %206 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,1,9216,64],f16>
%4390 = torch.aten.permute %4387, %203 : !torch.vtensor<[10,9216,1,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64,1],f16>
%4391 = torch.aten._reshape_alias %4390, %192, %209 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
%4392 = torch.aten.permute %4389, %211 : !torch.vtensor<[10,1,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,64,9216,1],f16>
%4393 = torch.aten._reshape_alias %4392, %213, %214 : !torch.vtensor<[10,64,9216,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,64,9216],f16>
%4394 = torch.aten.bmm %4391, %4393 : !torch.vtensor<[10,9216,64],f16>, !torch.vtensor<[10,64,9216],f16> -> !torch.vtensor<[10,9216,9216],f16>
%4395 = torch.aten.view %4394, %217 : !torch.vtensor<[10,9216,9216],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,9216],f16>
%4396 = torch.aten.permute %4395, %203 : !torch.vtensor<[10,9216,1,9216],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,9216,1],f16>
%4397 = torch.aten.view %4396, %220 : !torch.vtensor<[10,9216,9216,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,9216],f16>
%4398 = torch.aten.mul.Tensor %4397, %0 : !torch.vtensor<[10,9216,9216],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[10,9216,9216],f16>
%4399 = torch.aten._softmax %4398, %int-1, %true : !torch.vtensor<[10,9216,9216],f16>, !torch.int, !torch.bool -> !torch.vtensor<[10,9216,9216],f32>
%4400 = torch.aten._to_copy %4399, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[10,9216,9216],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[10,9216,9216],f16>
%4401 = torch.aten.unsqueeze %4400, %int3 : !torch.vtensor<[10,9216,9216],f16>, !torch.int -> !torch.vtensor<[10,9216,9216,1],f16>
%4402 = torch.aten.permute %4401, %203 : !torch.vtensor<[10,9216,9216,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,9216],f16>
%4403 = torch.aten.unsqueeze %4385, %int3 : !torch.vtensor<[10,9216,64],f16>, !torch.int -> !torch.vtensor<[10,9216,64,1],f16>
%4404 = torch.aten.permute %4403, %211 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,1,64,9216],f16>
%4405 = torch.aten.permute %4402, %203 : !torch.vtensor<[10,9216,1,9216],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,9216,1],f16>
%4406 = torch.aten._reshape_alias %4405, %220, %230 : !torch.vtensor<[10,9216,9216,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,9216,9216],f16>
%4407 = torch.aten.permute %4404, %211 : !torch.vtensor<[10,1,64,9216],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64,1],f16>
%4408 = torch.aten._reshape_alias %4407, %192, %209 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
%4409 = torch.aten.bmm %4406, %4408 : !torch.vtensor<[10,9216,9216],f16>, !torch.vtensor<[10,9216,64],f16> -> !torch.vtensor<[10,9216,64],f16>
%4410 = torch.aten.view %4409, %235 : !torch.vtensor<[10,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,64],f16>
%4411 = torch.aten.permute %4410, %203 : !torch.vtensor<[10,9216,1,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64,1],f16>
%4412 = torch.aten.view %4411, %192 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
%4413 = torch.aten._reshape_alias %4412, %239, %240 : !torch.vtensor<[10,9216,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
%4414 = torch.aten.permute %4413, %189 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
%4415 = torch.aten.clone %4414, %int0 : !torch.vtensor<[2,9216,5,64],f16>, !torch.int -> !torch.vtensor<[2,9216,5,64],f16>
%4416 = torch.aten._unsafe_view %4415, %153 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
%4417 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
%4418 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
%4419 = torch.aten.t %4418 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%4420 = torch.aten.view %4416, %161 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
%4421 = torch.aten.addmm %4417, %4420, %4419, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,320],f16>
%4422 = torch.aten.view %4421, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
%4423 = torch.aten.add.Tensor %4422, %4354, %int1 : !torch.vtensor<[2,9216,320],f16>, !torch.vtensor<[2,9216,320],f16>, !torch.int -> !torch.vtensor<[2,9216,320],f16>
%4424 = torch.aten._to_copy %4423, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f32>
%result0_255, %result1_256, %result2_257 = torch.aten.native_layer_norm %4424, %166, %48, %48, %float1.000000e-05 : !torch.vtensor<[2,9216,320],f32>, !torch.list<int>, !torch.vtensor<[320],f32>, !torch.vtensor<[320],f32>, !torch.float -> !torch.vtensor<[2,9216,320],f32>, !torch.vtensor<[2,9216,1],f32>, !torch.vtensor<[2,9216,1],f32>
%4425 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
%4426 = torch.aten._to_copy %result0_255, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
%4427 = torch.aten.t %4425 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%4428 = torch.aten._reshape_alias %4426, %161, %170 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
%4429 = torch.aten.mm %4428, %4427 : !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[18432,320],f16>
%4430 = torch.aten._unsafe_view %4429, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
%4431 = torch.aten._to_copy %43, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,1024],f16>
%4432 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
%4433 = torch.aten.t %4431 : !torch.vtensor<[320,1024],f16> -> !torch.vtensor<[1024,320],f16>
%4434 = torch.aten._reshape_alias %4432, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
%4435 = torch.aten.mm %4434, %4433 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,320],f16> -> !torch.vtensor<[154,320],f16>
%4436 = torch.aten._unsafe_view %4435, %266 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
%4437 = torch.aten._to_copy %43, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,1024],f16>
%4438 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
%4439 = torch.aten.t %4437 : !torch.vtensor<[320,1024],f16> -> !torch.vtensor<[1024,320],f16>
%4440 = torch.aten._reshape_alias %4438, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
%4441 = torch.aten.mm %4440, %4439 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,320],f16> -> !torch.vtensor<[154,320],f16>
%4442 = torch.aten._unsafe_view %4441, %266 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
%4443 = torch.aten._reshape_alias %4430, %186, %187 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
%4444 = torch.aten.permute %4443, %189 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
%4445 = torch.aten.clone %4444, %int0 : !torch.vtensor<[2,5,9216,64],f16>, !torch.int -> !torch.vtensor<[2,5,9216,64],f16>
%4446 = torch.aten._unsafe_view %4445, %192 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
%4447 = torch.aten._reshape_alias %4436, %278, %279 : !torch.vtensor<[2,77,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,5,64],f16>
%4448 = torch.aten.permute %4447, %189 : !torch.vtensor<[2,77,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,77,64],f16>
%4449 = torch.aten.clone %4448, %int0 : !torch.vtensor<[2,5,77,64],f16>, !torch.int -> !torch.vtensor<[2,5,77,64],f16>
%4450 = torch.aten._unsafe_view %4449, %283 : !torch.vtensor<[2,5,77,64],f16>, !torch.list<int> -> !torch.vtensor<[10,77,64],f16>
%4451 = torch.aten._reshape_alias %4442, %278, %279 : !torch.vtensor<[2,77,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,5,64],f16>
%4452 = torch.aten.permute %4451, %189 : !torch.vtensor<[2,77,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,77,64],f16>
%4453 = torch.aten.clone %4452, %int0 : !torch.vtensor<[2,5,77,64],f16>, !torch.int -> !torch.vtensor<[2,5,77,64],f16>
%4454 = torch.aten._unsafe_view %4453, %283 : !torch.vtensor<[2,5,77,64],f16>, !torch.list<int> -> !torch.vtensor<[10,77,64],f16>
%4455 = torch.aten.unsqueeze %4446, %int3 : !torch.vtensor<[10,9216,64],f16>, !torch.int -> !torch.vtensor<[10,9216,64,1],f16>
%4456 = torch.aten.permute %4455, %203 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,64],f16>
%4457 = torch.aten.unsqueeze %4450, %int3 : !torch.vtensor<[10,77,64],f16>, !torch.int -> !torch.vtensor<[10,77,64,1],f16>
%4458 = torch.aten.permute %4457, %206 : !torch.vtensor<[10,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,1,77,64],f16>
%4459 = torch.aten.permute %4456, %203 : !torch.vtensor<[10,9216,1,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64,1],f16>
%4460 = torch.aten._reshape_alias %4459, %192, %209 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
%4461 = torch.aten.permute %4458, %211 : !torch.vtensor<[10,1,77,64],f16>, !torch.list<int> -> !torch.vtensor<[10,64,77,1],f16>
%4462 = torch.aten._reshape_alias %4461, %296, %297 : !torch.vtensor<[10,64,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,64,77],f16>
%4463 = torch.aten.bmm %4460, %4462 : !torch.vtensor<[10,9216,64],f16>, !torch.vtensor<[10,64,77],f16> -> !torch.vtensor<[10,9216,77],f16>
%4464 = torch.aten.view %4463, %300 : !torch.vtensor<[10,9216,77],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,77],f16>
%4465 = torch.aten.permute %4464, %203 : !torch.vtensor<[10,9216,1,77],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,77,1],f16>
%4466 = torch.aten.view %4465, %303 : !torch.vtensor<[10,9216,77,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,77],f16>
%4467 = torch.aten.mul.Tensor %4466, %0 : !torch.vtensor<[10,9216,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[10,9216,77],f16>
%4468 = torch.aten._softmax %4467, %int-1, %true : !torch.vtensor<[10,9216,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[10,9216,77],f32>
%4469 = torch.aten._to_copy %4468, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[10,9216,77],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[10,9216,77],f16>
%4470 = torch.aten.unsqueeze %4469, %int3 : !torch.vtensor<[10,9216,77],f16>, !torch.int -> !torch.vtensor<[10,9216,77,1],f16>
%4471 = torch.aten.permute %4470, %203 : !torch.vtensor<[10,9216,77,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,77],f16>
%4472 = torch.aten.unsqueeze %4454, %int3 : !torch.vtensor<[10,77,64],f16>, !torch.int -> !torch.vtensor<[10,77,64,1],f16>
%4473 = torch.aten.permute %4472, %211 : !torch.vtensor<[10,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,1,64,77],f16>
%4474 = torch.aten.permute %4471, %203 : !torch.vtensor<[10,9216,1,77],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,77,1],f16>
%4475 = torch.aten._reshape_alias %4474, %303, %313 : !torch.vtensor<[10,9216,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,9216,77],f16>
%4476 = torch.aten.permute %4473, %211 : !torch.vtensor<[10,1,64,77],f16>, !torch.list<int> -> !torch.vtensor<[10,77,64,1],f16>
%4477 = torch.aten._reshape_alias %4476, %283, %316 : !torch.vtensor<[10,77,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,77,64],f16>
%4478 = torch.aten.bmm %4475, %4477 : !torch.vtensor<[10,9216,77],f16>, !torch.vtensor<[10,77,64],f16> -> !torch.vtensor<[10,9216,64],f16>
%4479 = torch.aten.view %4478, %235 : !torch.vtensor<[10,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,64],f16>
%4480 = torch.aten.permute %4479, %203 : !torch.vtensor<[10,9216,1,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64,1],f16>
%4481 = torch.aten.view %4480, %192 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
%4482 = torch.aten._reshape_alias %4481, %239, %240 : !torch.vtensor<[10,9216,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
%4483 = torch.aten.permute %4482, %189 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
%4484 = torch.aten.clone %4483, %int0 : !torch.vtensor<[2,9216,5,64],f16>, !torch.int -> !torch.vtensor<[2,9216,5,64],f16>
%4485 = torch.aten._unsafe_view %4484, %153 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
%4486 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
%4487 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
%4488 = torch.aten.t %4487 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%4489 = torch.aten.view %4485, %161 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
%4490 = torch.aten.addmm %4486, %4489, %4488, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,320],f16>
%4491 = torch.aten.view %4490, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
%4492 = torch.aten.add.Tensor %4491, %4423, %int1 : !torch.vtensor<[2,9216,320],f16>, !torch.vtensor<[2,9216,320],f16>, !torch.int -> !torch.vtensor<[2,9216,320],f16>
%4493 = torch.aten._to_copy %4492, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f32>
%result0_258, %result1_259, %result2_260 = torch.aten.native_layer_norm %4493, %166, %48, %48, %float1.000000e-05 : !torch.vtensor<[2,9216,320],f32>, !torch.list<int>, !torch.vtensor<[320],f32>, !torch.vtensor<[320],f32>, !torch.float -> !torch.vtensor<[2,9216,320],f32>, !torch.vtensor<[2,9216,1],f32>, !torch.vtensor<[2,9216,1],f32>
%4494 = torch.aten._to_copy %44, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2560],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2560],f16>
%4495 = torch.aten._to_copy %45, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2560,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2560,320],f16>
%4496 = torch.aten._to_copy %result0_258, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
%4497 = torch.aten.t %4495 : !torch.vtensor<[2560,320],f16> -> !torch.vtensor<[320,2560],f16>
%4498 = torch.aten.view %4496, %161 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
%4499 = torch.aten.addmm %4494, %4498, %4497, %int1, %int1 : !torch.vtensor<[2560],f16>, !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,2560],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,2560],f16>
%4500 = torch.aten.view %4499, %340 : !torch.vtensor<[18432,2560],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,2560],f16>
%4501 = torch.aten.slice.Tensor %4500, %int-1, %int0, %int1280, %int1 : !torch.vtensor<[2,9216,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,9216,1280],f16>
%4502 = torch.aten.slice.Tensor %4500, %int-1, %int1280, %int2560, %int1 : !torch.vtensor<[2,9216,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,9216,1280],f16>
%4503 = torch.aten.gelu %4502, %str : !torch.vtensor<[2,9216,1280],f16>, !torch.str -> !torch.vtensor<[2,9216,1280],f16>
%4504 = torch.aten.mul.Tensor %4501, %4503 : !torch.vtensor<[2,9216,1280],f16>, !torch.vtensor<[2,9216,1280],f16> -> !torch.vtensor<[2,9216,1280],f16>
%4505 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
%4506 = torch.aten._to_copy %46, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,1280],f16>
%4507 = torch.aten.t %4506 : !torch.vtensor<[320,1280],f16> -> !torch.vtensor<[1280,320],f16>
%4508 = torch.aten.view %4504, %349 : !torch.vtensor<[2,9216,1280],f16>, !torch.list<int> -> !torch.vtensor<[18432,1280],f16>
%4509 = torch.aten.addmm %4505, %4508, %4507, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[18432,1280],f16>, !torch.vtensor<[1280,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,320],f16>
%4510 = torch.aten.view %4509, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
%4511 = torch.aten.add.Tensor %4510, %4492, %int1 : !torch.vtensor<[2,9216,320],f16>, !torch.vtensor<[2,9216,320],f16>, !torch.int -> !torch.vtensor<[2,9216,320],f16>
%4512 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
%4513 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
%4514 = torch.aten.t %4513 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
%4515 = torch.aten.view %4511, %161 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
%4516 = torch.aten.addmm %4512, %4515, %4514, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,320],f16>
%4517 = torch.aten.view %4516, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
%4518 = torch.aten._reshape_alias %4517, %360, %361 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,96,96,320],f16>
%4519 = torch.aten.permute %4518, %206 : !torch.vtensor<[2,96,96,320],f16>, !torch.list<int> -> !torch.vtensor<[2,320,96,96],f16>
%4520 = torch.aten._reshape_alias %4519, %90, %364 : !torch.vtensor<[2,320,96,96],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,320,96,96],f16>
%4521 = torch.aten.clone %4520, %int0 : !torch.vtensor<[2,320,96,96],f16>, !torch.int -> !torch.vtensor<[2,320,96,96],f16>
%4522 = torch.aten.add.Tensor %4521, %4328, %int1 : !torch.vtensor<[2,320,96,96],f16>, !torch.vtensor<[2,320,96,96],f16>, !torch.int -> !torch.vtensor<[2,320,96,96],f16>
%4523 = torch.aten._to_copy %4522, %int6, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,320,96,96],f16>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,96,96],f32>
%4524 = torch.aten._reshape_alias %4523, %82, %83 : !torch.vtensor<[2,320,96,96],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,10,9216],f32>
%result0_261, %result1_262 = torch.aten.var_mean.correction %4524, %85, %int0, %true : !torch.vtensor<[2,32,10,9216],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
%4525 = torch.aten.add.Tensor %result0_261, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
%4526 = torch.aten.rsqrt %4525 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
%4527 = torch.aten.sub.Tensor %4524, %result1_262, %int1 : !torch.vtensor<[2,32,10,9216],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,9216],f32>
%4528 = torch.aten.mul.Tensor %4527, %4526 : !torch.vtensor<[2,32,10,9216],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,9216],f32>
%4529 = torch.aten.view %4528, %90 : !torch.vtensor<[2,32,10,9216],f32>, !torch.list<int> -> !torch.vtensor<[2,320,96,96],f32>
%4530 = torch.aten.unsqueeze %48, %int0 : !torch.vtensor<[320],f32>, !torch.int -> !torch.vtensor<[1,320],f32>
%4531 = torch.aten.unsqueeze %4530, %int2 : !torch.vtensor<[1,320],f32>, !torch.int -> !torch.vtensor<[1,320,1],f32>
%4532 = torch.aten.unsqueeze %4531, %int3 : !torch.vtensor<[1,320,1],f32>, !torch.int -> !torch.vtensor<[1,320,1,1],f32>
%4533 = torch.aten.unsqueeze %48, %int0 : !torch.vtensor<[320],f32>, !torch.int -> !torch.vtensor<[1,320],f32>
%4534 = torch.aten.unsqueeze %4533, %int2 : !torch.vtensor<[1,320],f32>, !torch.int -> !torch.vtensor<[1,320,1],f32>
%4535 = torch.aten.unsqueeze %4534, %int3 : !torch.vtensor<[1,320,1],f32>, !torch.int -> !torch.vtensor<[1,320,1,1],f32>
%4536 = torch.aten.mul.Tensor %4529, %4535 : !torch.vtensor<[2,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f32> -> !torch.vtensor<[2,320,96,96],f32>
%4537 = torch.aten.add.Tensor %4536, %4532, %int1 : !torch.vtensor<[2,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f32>, !torch.int -> !torch.vtensor<[2,320,96,96],f32>
%4538 = torch.aten.silu %4537 : !torch.vtensor<[2,320,96,96],f32> -> !torch.vtensor<[2,320,96,96],f32>
%4539 = torch.aten._to_copy %49, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[4],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[4],f16>
%4540 = torch.aten._to_copy %50, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[4,320,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[4,320,3,3],f16>
%4541 = torch.aten._to_copy %4538, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,320,96,96],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,96,96],f16>
%4542 = torch.aten._convolution %4541, %4540, %4539, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,96,96],f16>, !torch.vtensor<[4,320,3,3],f16>, !torch.vtensor<[4],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,4,96,96],f16>
return %4542 : !torch.vtensor<[2,4,96,96],f16>
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment