pashu123 · November 24, 2022 15:20
diff --git a/stable2.0.mlir b/stable2.0.mlir
 func.func @forward(%arg0: !torch.vtensor<[2,4,96,96],f32>, %arg1: !torch.vtensor<[2],si64>, %arg2: !torch.vtensor<[2,77,1024],f32>) -> !torch.vtensor<[2,4,96,96],f16> {
  %int160 = torch.constant.int 160
  %0 = torch.vtensor.literal(dense<1.250000e-01> : tensor<f64>) : !torch.vtensor<[],f64>
  %1 = torch.vtensor.literal(dense<9.9999999999999995E-7> : tensor<f64>) : !torch.vtensor<[],f64>
  %2 = torch.vtensor.literal(dense<1.000000e-05> : tensor<f64>) : !torch.vtensor<[],f64>
  %3 = torch.vtensor.literal(dense<160> : tensor<si64>) : !torch.vtensor<[],si64>
  %4 = torch.vtensor.literal(dense<-9.2103403719761836> : tensor<f64>) : !torch.vtensor<[],f64>
  %5 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x320xf32>) : !torch.vtensor<[1280,320],f32>
  %6 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x4x3x3xf32>) : !torch.vtensor<[320,4,3,3],f32>
  %7 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x320x3x3xf32>) : !torch.vtensor<[640,320,3,3],f32>
  %8 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x320x1x1xf32>) : !torch.vtensor<[640,320,1,1],f32>
  %9 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x640x3x3xf32>) : !torch.vtensor<[1280,640,3,3],f32>
  %10 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x640x1x1xf32>) : !torch.vtensor<[1280,640,1,1],f32>
  %11 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x3x3xf32>) : !torch.vtensor<[1280,2560,3,3],f32>
  %12 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x1x1xf32>) : !torch.vtensor<[1280,2560,1,1],f32>
  %13 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1920x3x3xf32>) : !torch.vtensor<[1280,1920,3,3],f32>
  %14 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1920x1x1xf32>) : !torch.vtensor<[1280,1920,1,1],f32>
  %15 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1024xf32>) : !torch.vtensor<[1280,1024],f32>
  %16 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240xf32>) : !torch.vtensor<[10240],f32>
  %17 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x1280xf32>) : !torch.vtensor<[10240,1280],f32>
  %18 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x5120xf32>) : !torch.vtensor<[1280,5120],f32>
  %19 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf32>) : !torch.vtensor<[1280,1280],f32>
  %20 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf32>) : !torch.vtensor<[1280,1280,3,3],f32>
  %21 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1920xf32>) : !torch.vtensor<[1920],f32>
  %22 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1920x3x3xf32>) : !torch.vtensor<[640,1920,3,3],f32>
  %23 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1920x1x1xf32>) : !torch.vtensor<[640,1920,1,1],f32>
  %24 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf32>) : !torch.vtensor<[1280],f32>
  %25 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1280x3x3xf32>) : !torch.vtensor<[640,1280,3,3],f32>
  %26 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1280x1x1xf32>) : !torch.vtensor<[640,1280,1,1],f32>
  %27 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x960x3x3xf32>) : !torch.vtensor<[640,960,3,3],f32>
  %28 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1280xf32>) : !torch.vtensor<[640,1280],f32>
  %29 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x960x1x1xf32>) : !torch.vtensor<[640,960,1,1],f32>
  %30 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1024xf32>) : !torch.vtensor<[640,1024],f32>
  %31 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120xf32>) : !torch.vtensor<[5120],f32>
  %32 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120x640xf32>) : !torch.vtensor<[5120,640],f32>
  %33 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x2560xf32>) : !torch.vtensor<[640,2560],f32>
  %34 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf32>) : !torch.vtensor<[640,640],f32>
  %35 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x3x3xf32>) : !torch.vtensor<[640,640,3,3],f32>
  %36 = torch.vtensor.literal(dense_resource<__elided__> : tensor<960xf32>) : !torch.vtensor<[960],f32>
  %37 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x960x3x3xf32>) : !torch.vtensor<[320,960,3,3],f32>
  %38 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x960x1x1xf32>) : !torch.vtensor<[320,960,1,1],f32>
  %39 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf32>) : !torch.vtensor<[640],f32>
  %40 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x640x3x3xf32>) : !torch.vtensor<[320,640,3,3],f32>
  %41 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x3x3xf32>) : !torch.vtensor<[320,320,3,3],f32>
  %42 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x640x1x1xf32>) : !torch.vtensor<[320,640,1,1],f32>
  %43 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1024xf32>) : !torch.vtensor<[320,1024],f32>
  %44 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf32>) : !torch.vtensor<[2560],f32>
  %45 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560x320xf32>) : !torch.vtensor<[2560,320],f32>
  %46 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xf32>) : !torch.vtensor<[320,1280],f32>
  %47 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf32>) : !torch.vtensor<[320,320],f32>
  %48 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf32>) : !torch.vtensor<[320],f32>
  %49 = torch.vtensor.literal(dense<[-0.0333971679, 0.0151019702, -0.0098297568, -0.00283672824]> : tensor<4xf32>) : !torch.vtensor<[4],f32>
  %50 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4x320x3x3xf32>) : !torch.vtensor<[4,320,3,3],f32>
  %int0 = torch.constant.int 0
  %int6 = torch.constant.int 6
  %none = torch.constant.none
  %false = torch.constant.bool false
  %int9223372036854775807 = torch.constant.int 9223372036854775807
  %int1 = torch.constant.int 1
  %int-1 = torch.constant.int -1
  %int5 = torch.constant.int 5
  %true = torch.constant.bool true
  %int2 = torch.constant.int 2
  %int32 = torch.constant.int 32
  %int10 = torch.constant.int 10
  %int9216 = torch.constant.int 9216
  %int2949120 = torch.constant.int 2949120
  %int92160 = torch.constant.int 92160
  %int3 = torch.constant.int 3
  %int320 = torch.constant.int 320
  %int96 = torch.constant.int 96
  %int18432 = torch.constant.int 18432
  %float1.000000e-05 = torch.constant.float 1.000000e-05
  %int64 = torch.constant.int 64
  %int589824 = torch.constant.int 589824
  %int84934656 = torch.constant.int 84934656
  %int154 = torch.constant.int 154
  %int1024 = torch.constant.int 1024
  %int77 = torch.constant.int 77
  %int24640 = torch.constant.int 24640
  %int4928 = torch.constant.int 4928
  %int709632 = torch.constant.int 709632
  %int2560 = torch.constant.int 2560
  %int1280 = torch.constant.int 1280
  %str = torch.constant.str "none"
  %int30720 = torch.constant.int 30720
  %int2304 = torch.constant.int 2304
  %int737280 = torch.constant.int 737280
  %int23040 = torch.constant.int 23040
  %int48 = torch.constant.int 48
  %int20 = torch.constant.int 20
  %int1474560 = torch.constant.int 1474560
  %int46080 = torch.constant.int 46080
  %int640 = torch.constant.int 640
  %int4608 = torch.constant.int 4608
  %int147456 = torch.constant.int 147456
  %int5308416 = torch.constant.int 5308416
  %int49280 = torch.constant.int 49280
  %int177408 = torch.constant.int 177408
  %int5120 = torch.constant.int 5120
  %int576 = torch.constant.int 576
  %int368640 = torch.constant.int 368640
  %int11520 = torch.constant.int 11520
  %int24 = torch.constant.int 24
  %int40 = torch.constant.int 40
  %int1152 = torch.constant.int 1152
  %int36864 = torch.constant.int 36864
  %int331776 = torch.constant.int 331776
  %int98560 = torch.constant.int 98560
  %int44352 = torch.constant.int 44352
  %int10240 = torch.constant.int 10240
  %int144 = torch.constant.int 144
  %int184320 = torch.constant.int 184320
  %int5760 = torch.constant.int 5760
  %int12 = torch.constant.int 12
  %int288 = torch.constant.int 288
  %int20736 = torch.constant.int 20736
  %int11088 = torch.constant.int 11088
  %int15360 = torch.constant.int 15360
  %int80 = torch.constant.int 80
  %float2.000000e00 = torch.constant.float 2.000000e+00
  %int60 = torch.constant.int 60
  %int1105920 = torch.constant.int 1105920
  %int34560 = torch.constant.int 34560
  %int1920 = torch.constant.int 1920
  %int4423680 = torch.constant.int 4423680
  %int138240 = torch.constant.int 138240
  %int30 = torch.constant.int 30
  %int2211840 = torch.constant.int 2211840
  %int69120 = torch.constant.int 69120
  %int960 = torch.constant.int 960
  %int8847360 = torch.constant.int 8847360
  %int276480 = torch.constant.int 276480
  %int5898240 = torch.constant.int 5898240
  %cpu = torch.constant.device "cpu"
  %51 = torch.aten.arange.start %int0, %int160, %int6, %none, %cpu, %false : !torch.int, !torch.int, !torch.int, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[160],f32>
  %52 = torch.aten.mul.Tensor %51, %4 : !torch.vtensor<[160],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[160],f32>
  %53 = torch.aten.div.Tensor %52, %3 : !torch.vtensor<[160],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[160],f32>
  %54 = torch.aten.exp %53 : !torch.vtensor<[160],f32> -> !torch.vtensor<[160],f32>
  %cuda3A0 = torch.constant.device "cuda:0"
  %55 = torch.aten._to_copy %54, %int6, %int0, %cuda3A0, %none, %false, %none : !torch.vtensor<[160],f32>, !torch.int, !torch.int, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[160],f32>
  %56 = torch.aten.slice.Tensor %arg1, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[2],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2],si64>
  %57 = torch.aten.unsqueeze %56, %int1 : !torch.vtensor<[2],si64>, !torch.int -> !torch.vtensor<[2,1],si64>
  %58 = torch.aten._to_copy %57, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,1],si64>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1],f32>
  %59 = torch.aten.unsqueeze %55, %int0 : !torch.vtensor<[160],f32>, !torch.int -> !torch.vtensor<[1,160],f32>
  %60 = torch.aten.mul.Tensor %58, %59 : !torch.vtensor<[2,1],f32>, !torch.vtensor<[1,160],f32> -> !torch.vtensor<[2,160],f32>
  %61 = torch.aten.cos %60 : !torch.vtensor<[2,160],f32> -> !torch.vtensor<[2,160],f32>
  %62 = torch.aten.sin %60 : !torch.vtensor<[2,160],f32> -> !torch.vtensor<[2,160],f32>
  %63 = torch.prim.ListConstruct %61, %62 : (!torch.vtensor<[2,160],f32>, !torch.vtensor<[2,160],f32>) -> !torch.list<vtensor>
  %64 = torch.aten.cat %63, %int-1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,320],f32>
  %65 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %66 = torch.aten._to_copy %5, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,320],f16>
  %67 = torch.aten._to_copy %64, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320],f16>
  %68 = torch.aten.t %66 : !torch.vtensor<[1280,320],f16> -> !torch.vtensor<[320,1280],f16>
  %69 = torch.aten.addmm %65, %67, %68, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,320],f16>, !torch.vtensor<[320,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
  %70 = torch.aten.silu %69 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
  %71 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %72 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %73 = torch.aten.t %72 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %74 = torch.aten.addmm %71, %70, %73, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
  %75 = torch.aten._to_copy %arg0, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,4,96,96],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,4,96,96],f16>
  %76 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
  %77 = torch.aten._to_copy %6, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,4,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,4,3,3],f16>
  %78 = torch.prim.ListConstruct %int1, %int1 : (!torch.int, !torch.int) -> !torch.list<int>
  %79 = torch.prim.ListConstruct %int0, %int0 : (!torch.int, !torch.int) -> !torch.list<int>
  %80 = torch.aten._convolution %75, %77, %76, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,4,96,96],f16>, !torch.vtensor<[320,4,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,96,96],f16>
  %81 = torch.aten._to_copy %80, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,320,96,96],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,96,96],f32>
  %82 = torch.prim.ListConstruct %int2, %int32, %int10, %int9216 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %83 = torch.prim.ListConstruct %int2949120, %int92160, %int9216, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %84 = torch.aten._reshape_alias %81, %82, %83 : !torch.vtensor<[2,320,96,96],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,10,9216],f32>
  %85 = torch.prim.ListConstruct %int2, %int3 : (!torch.int, !torch.int) -> !torch.list<int>
  %result0, %result1 = torch.aten.var_mean.correction %84, %85, %int0, %true : !torch.vtensor<[2,32,10,9216],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %86 = torch.aten.add.Tensor %result0, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %87 = torch.aten.rsqrt %86 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %88 = torch.aten.sub.Tensor %84, %result1, %int1 : !torch.vtensor<[2,32,10,9216],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,9216],f32>
  %89 = torch.aten.mul.Tensor %88, %87 : !torch.vtensor<[2,32,10,9216],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,9216],f32>
  %90 = torch.prim.ListConstruct %int2, %int320, %int96, %int96 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %91 = torch.aten.view %89, %90 : !torch.vtensor<[2,32,10,9216],f32>, !torch.list<int> -> !torch.vtensor<[2,320,96,96],f32>
  %92 = torch.aten.unsqueeze %48, %int0 : !torch.vtensor<[320],f32>, !torch.int -> !torch.vtensor<[1,320],f32>
  %93 = torch.aten.unsqueeze %92, %int2 : !torch.vtensor<[1,320],f32>, !torch.int -> !torch.vtensor<[1,320,1],f32>
  %94 = torch.aten.unsqueeze %93, %int3 : !torch.vtensor<[1,320,1],f32>, !torch.int -> !torch.vtensor<[1,320,1,1],f32>
  %95 = torch.aten.unsqueeze %48, %int0 : !torch.vtensor<[320],f32>, !torch.int -> !torch.vtensor<[1,320],f32>
  %96 = torch.aten.unsqueeze %95, %int2 : !torch.vtensor<[1,320],f32>, !torch.int -> !torch.vtensor<[1,320,1],f32>
  %97 = torch.aten.unsqueeze %96, %int3 : !torch.vtensor<[1,320,1],f32>, !torch.int -> !torch.vtensor<[1,320,1,1],f32>
  %98 = torch.aten.mul.Tensor %91, %97 : !torch.vtensor<[2,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f32> -> !torch.vtensor<[2,320,96,96],f32>
  %99 = torch.aten.add.Tensor %98, %94, %int1 : !torch.vtensor<[2,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f32>, !torch.int -> !torch.vtensor<[2,320,96,96],f32>
  %100 = torch.aten._to_copy %99, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,320,96,96],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,96,96],f16>
  %101 = torch.aten.silu %100 : !torch.vtensor<[2,320,96,96],f16> -> !torch.vtensor<[2,320,96,96],f16>
  %102 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
  %103 = torch.aten._to_copy %41, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320,3,3],f16>
  %104 = torch.aten._convolution %101, %103, %102, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,96,96],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,96,96],f16>
  %105 = torch.aten.silu %74 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
  %106 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
  %107 = torch.aten._to_copy %46, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,1280],f16>
  %108 = torch.aten.t %107 : !torch.vtensor<[320,1280],f16> -> !torch.vtensor<[1280,320],f16>
  %109 = torch.aten.addmm %106, %105, %108, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
  %110 = torch.aten.unsqueeze %109, %int2 : !torch.vtensor<[2,320],f16>, !torch.int -> !torch.vtensor<[2,320,1],f16>
  %111 = torch.aten.unsqueeze %110, %int3 : !torch.vtensor<[2,320,1],f16>, !torch.int -> !torch.vtensor<[2,320,1,1],f16>
  %112 = torch.aten.add.Tensor %104, %111, %int1 : !torch.vtensor<[2,320,96,96],f16>, !torch.vtensor<[2,320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,96,96],f16>
  %113 = torch.aten._to_copy %112, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,320,96,96],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,96,96],f32>
  %114 = torch.aten._reshape_alias %113, %82, %83 : !torch.vtensor<[2,320,96,96],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,10,9216],f32>
  %result0_0, %result1_1 = torch.aten.var_mean.correction %114, %85, %int0, %true : !torch.vtensor<[2,32,10,9216],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %115 = torch.aten.add.Tensor %result0_0, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %116 = torch.aten.rsqrt %115 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %117 = torch.aten.sub.Tensor %114, %result1_1, %int1 : !torch.vtensor<[2,32,10,9216],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,9216],f32>
  %118 = torch.aten.mul.Tensor %117, %116 : !torch.vtensor<[2,32,10,9216],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,9216],f32>
  %119 = torch.aten.view %118, %90 : !torch.vtensor<[2,32,10,9216],f32>, !torch.list<int> -> !torch.vtensor<[2,320,96,96],f32>
  %120 = torch.aten.unsqueeze %48, %int0 : !torch.vtensor<[320],f32>, !torch.int -> !torch.vtensor<[1,320],f32>
  %121 = torch.aten.unsqueeze %120, %int2 : !torch.vtensor<[1,320],f32>, !torch.int -> !torch.vtensor<[1,320,1],f32>
  %122 = torch.aten.unsqueeze %121, %int3 : !torch.vtensor<[1,320,1],f32>, !torch.int -> !torch.vtensor<[1,320,1,1],f32>
  %123 = torch.aten.unsqueeze %48, %int0 : !torch.vtensor<[320],f32>, !torch.int -> !torch.vtensor<[1,320],f32>
  %124 = torch.aten.unsqueeze %123, %int2 : !torch.vtensor<[1,320],f32>, !torch.int -> !torch.vtensor<[1,320,1],f32>
  %125 = torch.aten.unsqueeze %124, %int3 : !torch.vtensor<[1,320,1],f32>, !torch.int -> !torch.vtensor<[1,320,1,1],f32>
  %126 = torch.aten.mul.Tensor %119, %125 : !torch.vtensor<[2,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f32> -> !torch.vtensor<[2,320,96,96],f32>
  %127 = torch.aten.add.Tensor %126, %122, %int1 : !torch.vtensor<[2,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f32>, !torch.int -> !torch.vtensor<[2,320,96,96],f32>
  %128 = torch.aten._to_copy %127, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,320,96,96],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,96,96],f16>
  %129 = torch.aten.silu %128 : !torch.vtensor<[2,320,96,96],f16> -> !torch.vtensor<[2,320,96,96],f16>
  %130 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
  %131 = torch.aten._to_copy %41, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320,3,3],f16>
  %132 = torch.aten._convolution %129, %131, %130, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,96,96],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,96,96],f16>
  %133 = torch.aten.add.Tensor %80, %132, %int1 : !torch.vtensor<[2,320,96,96],f16>, !torch.vtensor<[2,320,96,96],f16>, !torch.int -> !torch.vtensor<[2,320,96,96],f16>
  %134 = torch.aten._to_copy %133, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,320,96,96],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,96,96],f32>
  %135 = torch.aten._reshape_alias %134, %82, %83 : !torch.vtensor<[2,320,96,96],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,10,9216],f32>
  %result0_2, %result1_3 = torch.aten.var_mean.correction %135, %85, %int0, %true : !torch.vtensor<[2,32,10,9216],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %136 = torch.aten.add.Tensor %result0_2, %1, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %137 = torch.aten.rsqrt %136 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %138 = torch.aten.sub.Tensor %135, %result1_3, %int1 : !torch.vtensor<[2,32,10,9216],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,9216],f32>
  %139 = torch.aten.mul.Tensor %138, %137 : !torch.vtensor<[2,32,10,9216],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,9216],f32>
  %140 = torch.aten.view %139, %90 : !torch.vtensor<[2,32,10,9216],f32>, !torch.list<int> -> !torch.vtensor<[2,320,96,96],f32>
  %141 = torch.aten.unsqueeze %48, %int0 : !torch.vtensor<[320],f32>, !torch.int -> !torch.vtensor<[1,320],f32>
  %142 = torch.aten.unsqueeze %141, %int2 : !torch.vtensor<[1,320],f32>, !torch.int -> !torch.vtensor<[1,320,1],f32>
  %143 = torch.aten.unsqueeze %142, %int3 : !torch.vtensor<[1,320,1],f32>, !torch.int -> !torch.vtensor<[1,320,1,1],f32>
  %144 = torch.aten.unsqueeze %48, %int0 : !torch.vtensor<[320],f32>, !torch.int -> !torch.vtensor<[1,320],f32>
  %145 = torch.aten.unsqueeze %144, %int2 : !torch.vtensor<[1,320],f32>, !torch.int -> !torch.vtensor<[1,320,1],f32>
  %146 = torch.aten.unsqueeze %145, %int3 : !torch.vtensor<[1,320,1],f32>, !torch.int -> !torch.vtensor<[1,320,1,1],f32>
  %147 = torch.aten.mul.Tensor %140, %146 : !torch.vtensor<[2,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f32> -> !torch.vtensor<[2,320,96,96],f32>
  %148 = torch.aten.add.Tensor %147, %143, %int1 : !torch.vtensor<[2,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f32>, !torch.int -> !torch.vtensor<[2,320,96,96],f32>
  %149 = torch.prim.ListConstruct %int2949120, %int9216, %int96, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %150 = torch.aten._reshape_alias %148, %90, %149 : !torch.vtensor<[2,320,96,96],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,320,96,96],f32>
  %151 = torch.prim.ListConstruct %int0, %int2, %int3, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %152 = torch.aten.permute %150, %151 : !torch.vtensor<[2,320,96,96],f32>, !torch.list<int> -> !torch.vtensor<[2,96,96,320],f32>
  %153 = torch.prim.ListConstruct %int2, %int9216, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %154 = torch.prim.ListConstruct %int2949120, %int1, %int9216 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %155 = torch.aten._reshape_alias %152, %153, %154 : !torch.vtensor<[2,96,96,320],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f32>
  %156 = torch.aten.clone %155, %int0 : !torch.vtensor<[2,9216,320],f32>, !torch.int -> !torch.vtensor<[2,9216,320],f32>
  %157 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
  %158 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
  %159 = torch.aten._to_copy %156, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
  %160 = torch.aten.t %158 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %161 = torch.prim.ListConstruct %int18432, %int320 : (!torch.int, !torch.int) -> !torch.list<int>
  %162 = torch.aten.view %159, %161 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
  %163 = torch.aten.addmm %157, %162, %160, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,320],f16>
  %164 = torch.aten.view %163, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
  %165 = torch.aten._to_copy %164, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f32>
  %166 = torch.prim.ListConstruct %int320 : (!torch.int) -> !torch.list<int>
  %result0_4, %result1_5, %result2 = torch.aten.native_layer_norm %165, %166, %48, %48, %float1.000000e-05 : !torch.vtensor<[2,9216,320],f32>, !torch.list<int>, !torch.vtensor<[320],f32>, !torch.vtensor<[320],f32>, !torch.float -> !torch.vtensor<[2,9216,320],f32>, !torch.vtensor<[2,9216,1],f32>, !torch.vtensor<[2,9216,1],f32>
  %167 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
  %168 = torch.aten._to_copy %result0_4, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
  %169 = torch.aten.t %167 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %170 = torch.prim.ListConstruct %int320, %int1 : (!torch.int, !torch.int) -> !torch.list<int>
  %171 = torch.aten._reshape_alias %168, %161, %170 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
  %172 = torch.aten.mm %171, %169 : !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[18432,320],f16>
  %173 = torch.aten._unsafe_view %172, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
  %174 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
  %175 = torch.aten._to_copy %result0_4, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
  %176 = torch.aten.t %174 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %177 = torch.aten._reshape_alias %175, %161, %170 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
  %178 = torch.aten.mm %177, %176 : !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[18432,320],f16>
  %179 = torch.aten._unsafe_view %178, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
  %180 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
  %181 = torch.aten._to_copy %result0_4, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
  %182 = torch.aten.t %180 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %183 = torch.aten._reshape_alias %181, %161, %170 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
  %184 = torch.aten.mm %183, %182 : !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[18432,320],f16>
  %185 = torch.aten._unsafe_view %184, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
  %186 = torch.prim.ListConstruct %int2, %int9216, %int5, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %187 = torch.prim.ListConstruct %int2949120, %int320, %int64, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %188 = torch.aten._reshape_alias %173, %186, %187 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
  %189 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %190 = torch.aten.permute %188, %189 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
  %191 = torch.aten.clone %190, %int0 : !torch.vtensor<[2,5,9216,64],f16>, !torch.int -> !torch.vtensor<[2,5,9216,64],f16>
  %192 = torch.prim.ListConstruct %int10, %int9216, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %193 = torch.aten._unsafe_view %191, %192 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
  %194 = torch.aten._reshape_alias %179, %186, %187 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
  %195 = torch.aten.permute %194, %189 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
  %196 = torch.aten.clone %195, %int0 : !torch.vtensor<[2,5,9216,64],f16>, !torch.int -> !torch.vtensor<[2,5,9216,64],f16>
  %197 = torch.aten._unsafe_view %196, %192 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
  %198 = torch.aten._reshape_alias %185, %186, %187 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
  %199 = torch.aten.permute %198, %189 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
  %200 = torch.aten.clone %199, %int0 : !torch.vtensor<[2,5,9216,64],f16>, !torch.int -> !torch.vtensor<[2,5,9216,64],f16>
  %201 = torch.aten._unsafe_view %200, %192 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
  %202 = torch.aten.unsqueeze %193, %int3 : !torch.vtensor<[10,9216,64],f16>, !torch.int -> !torch.vtensor<[10,9216,64,1],f16>
  %203 = torch.prim.ListConstruct %int0, %int1, %int3, %int2 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %204 = torch.aten.permute %202, %203 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,64],f16>
  %205 = torch.aten.unsqueeze %197, %int3 : !torch.vtensor<[10,9216,64],f16>, !torch.int -> !torch.vtensor<[10,9216,64,1],f16>
  %206 = torch.prim.ListConstruct %int0, %int3, %int1, %int2 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %207 = torch.aten.permute %205, %206 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,1,9216,64],f16>
  %208 = torch.aten.permute %204, %203 : !torch.vtensor<[10,9216,1,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64,1],f16>
  %209 = torch.prim.ListConstruct %int589824, %int64, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %210 = torch.aten._reshape_alias %208, %192, %209 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
  %211 = torch.prim.ListConstruct %int0, %int3, %int2, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %212 = torch.aten.permute %207, %211 : !torch.vtensor<[10,1,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,64,9216,1],f16>
  %213 = torch.prim.ListConstruct %int10, %int64, %int9216 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %214 = torch.prim.ListConstruct %int589824, %int1, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %215 = torch.aten._reshape_alias %212, %213, %214 : !torch.vtensor<[10,64,9216,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,64,9216],f16>
  %216 = torch.aten.bmm %210, %215 : !torch.vtensor<[10,9216,64],f16>, !torch.vtensor<[10,64,9216],f16> -> !torch.vtensor<[10,9216,9216],f16>
  %217 = torch.prim.ListConstruct %int10, %int9216, %int1, %int9216 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %218 = torch.aten.view %216, %217 : !torch.vtensor<[10,9216,9216],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,9216],f16>
  %219 = torch.aten.permute %218, %203 : !torch.vtensor<[10,9216,1,9216],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,9216,1],f16>
  %220 = torch.prim.ListConstruct %int10, %int9216, %int9216 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %221 = torch.aten.view %219, %220 : !torch.vtensor<[10,9216,9216,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,9216],f16>
  %222 = torch.aten.mul.Tensor %221, %0 : !torch.vtensor<[10,9216,9216],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[10,9216,9216],f16>
  %223 = torch.aten._softmax %222, %int-1, %true : !torch.vtensor<[10,9216,9216],f16>, !torch.int, !torch.bool -> !torch.vtensor<[10,9216,9216],f32>
  %224 = torch.aten._to_copy %223, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[10,9216,9216],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[10,9216,9216],f16>
  %225 = torch.aten.unsqueeze %224, %int3 : !torch.vtensor<[10,9216,9216],f16>, !torch.int -> !torch.vtensor<[10,9216,9216,1],f16>
  %226 = torch.aten.permute %225, %203 : !torch.vtensor<[10,9216,9216,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,9216],f16>
  %227 = torch.aten.unsqueeze %201, %int3 : !torch.vtensor<[10,9216,64],f16>, !torch.int -> !torch.vtensor<[10,9216,64,1],f16>
  %228 = torch.aten.permute %227, %211 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,1,64,9216],f16>
  %229 = torch.aten.permute %226, %203 : !torch.vtensor<[10,9216,1,9216],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,9216,1],f16>
  %230 = torch.prim.ListConstruct %int84934656, %int9216, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %231 = torch.aten._reshape_alias %229, %220, %230 : !torch.vtensor<[10,9216,9216,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,9216,9216],f16>
  %232 = torch.aten.permute %228, %211 : !torch.vtensor<[10,1,64,9216],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64,1],f16>
  %233 = torch.aten._reshape_alias %232, %192, %209 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
  %234 = torch.aten.bmm %231, %233 : !torch.vtensor<[10,9216,9216],f16>, !torch.vtensor<[10,9216,64],f16> -> !torch.vtensor<[10,9216,64],f16>
  %235 = torch.prim.ListConstruct %int10, %int9216, %int1, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %236 = torch.aten.view %234, %235 : !torch.vtensor<[10,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,64],f16>
  %237 = torch.aten.permute %236, %203 : !torch.vtensor<[10,9216,1,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64,1],f16>
  %238 = torch.aten.view %237, %192 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
  %239 = torch.prim.ListConstruct %int2, %int5, %int9216, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %240 = torch.prim.ListConstruct %int2949120, %int589824, %int64, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %241 = torch.aten._reshape_alias %238, %239, %240 : !torch.vtensor<[10,9216,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
  %242 = torch.aten.permute %241, %189 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
  %243 = torch.aten.clone %242, %int0 : !torch.vtensor<[2,9216,5,64],f16>, !torch.int -> !torch.vtensor<[2,9216,5,64],f16>
  %244 = torch.aten._unsafe_view %243, %153 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
  %245 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
  %246 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
  %247 = torch.aten.t %246 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %248 = torch.aten.view %244, %161 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
  %249 = torch.aten.addmm %245, %248, %247, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,320],f16>
  %250 = torch.aten.view %249, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
  %251 = torch.aten.add.Tensor %250, %164, %int1 : !torch.vtensor<[2,9216,320],f16>, !torch.vtensor<[2,9216,320],f16>, !torch.int -> !torch.vtensor<[2,9216,320],f16>
  %252 = torch.aten._to_copy %251, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f32>
  %result0_6, %result1_7, %result2_8 = torch.aten.native_layer_norm %252, %166, %48, %48, %float1.000000e-05 : !torch.vtensor<[2,9216,320],f32>, !torch.list<int>, !torch.vtensor<[320],f32>, !torch.vtensor<[320],f32>, !torch.float -> !torch.vtensor<[2,9216,320],f32>, !torch.vtensor<[2,9216,1],f32>, !torch.vtensor<[2,9216,1],f32>
  %253 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
  %254 = torch.aten._to_copy %result0_6, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
  %255 = torch.aten.t %253 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %256 = torch.aten._reshape_alias %254, %161, %170 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
  %257 = torch.aten.mm %256, %255 : !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[18432,320],f16>
  %258 = torch.aten._unsafe_view %257, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
  %259 = torch.aten._to_copy %43, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,1024],f16>
  %260 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
  %261 = torch.aten.t %259 : !torch.vtensor<[320,1024],f16> -> !torch.vtensor<[1024,320],f16>
  %262 = torch.prim.ListConstruct %int154, %int1024 : (!torch.int, !torch.int) -> !torch.list<int>
  %263 = torch.prim.ListConstruct %int1024, %int1 : (!torch.int, !torch.int) -> !torch.list<int>
  %264 = torch.aten._reshape_alias %260, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
  %265 = torch.aten.mm %264, %261 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,320],f16> -> !torch.vtensor<[154,320],f16>
  %266 = torch.prim.ListConstruct %int2, %int77, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %267 = torch.aten._unsafe_view %265, %266 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
  %268 = torch.aten._to_copy %43, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,1024],f16>
  %269 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
  %270 = torch.aten.t %268 : !torch.vtensor<[320,1024],f16> -> !torch.vtensor<[1024,320],f16>
  %271 = torch.aten._reshape_alias %269, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
  %272 = torch.aten.mm %271, %270 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,320],f16> -> !torch.vtensor<[154,320],f16>
  %273 = torch.aten._unsafe_view %272, %266 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
  %274 = torch.aten._reshape_alias %258, %186, %187 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
  %275 = torch.aten.permute %274, %189 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
  %276 = torch.aten.clone %275, %int0 : !torch.vtensor<[2,5,9216,64],f16>, !torch.int -> !torch.vtensor<[2,5,9216,64],f16>
  %277 = torch.aten._unsafe_view %276, %192 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
  %278 = torch.prim.ListConstruct %int2, %int77, %int5, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %279 = torch.prim.ListConstruct %int24640, %int320, %int64, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %280 = torch.aten._reshape_alias %267, %278, %279 : !torch.vtensor<[2,77,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,5,64],f16>
  %281 = torch.aten.permute %280, %189 : !torch.vtensor<[2,77,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,77,64],f16>
  %282 = torch.aten.clone %281, %int0 : !torch.vtensor<[2,5,77,64],f16>, !torch.int -> !torch.vtensor<[2,5,77,64],f16>
  %283 = torch.prim.ListConstruct %int10, %int77, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %284 = torch.aten._unsafe_view %282, %283 : !torch.vtensor<[2,5,77,64],f16>, !torch.list<int> -> !torch.vtensor<[10,77,64],f16>
  %285 = torch.aten._reshape_alias %273, %278, %279 : !torch.vtensor<[2,77,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,5,64],f16>
  %286 = torch.aten.permute %285, %189 : !torch.vtensor<[2,77,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,77,64],f16>
  %287 = torch.aten.clone %286, %int0 : !torch.vtensor<[2,5,77,64],f16>, !torch.int -> !torch.vtensor<[2,5,77,64],f16>
  %288 = torch.aten._unsafe_view %287, %283 : !torch.vtensor<[2,5,77,64],f16>, !torch.list<int> -> !torch.vtensor<[10,77,64],f16>
  %289 = torch.aten.unsqueeze %277, %int3 : !torch.vtensor<[10,9216,64],f16>, !torch.int -> !torch.vtensor<[10,9216,64,1],f16>
  %290 = torch.aten.permute %289, %203 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,64],f16>
  %291 = torch.aten.unsqueeze %284, %int3 : !torch.vtensor<[10,77,64],f16>, !torch.int -> !torch.vtensor<[10,77,64,1],f16>
  %292 = torch.aten.permute %291, %206 : !torch.vtensor<[10,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,1,77,64],f16>
  %293 = torch.aten.permute %290, %203 : !torch.vtensor<[10,9216,1,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64,1],f16>
  %294 = torch.aten._reshape_alias %293, %192, %209 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
  %295 = torch.aten.permute %292, %211 : !torch.vtensor<[10,1,77,64],f16>, !torch.list<int> -> !torch.vtensor<[10,64,77,1],f16>
  %296 = torch.prim.ListConstruct %int10, %int64, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %297 = torch.prim.ListConstruct %int4928, %int1, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %298 = torch.aten._reshape_alias %295, %296, %297 : !torch.vtensor<[10,64,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,64,77],f16>
  %299 = torch.aten.bmm %294, %298 : !torch.vtensor<[10,9216,64],f16>, !torch.vtensor<[10,64,77],f16> -> !torch.vtensor<[10,9216,77],f16>
  %300 = torch.prim.ListConstruct %int10, %int9216, %int1, %int77 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %301 = torch.aten.view %299, %300 : !torch.vtensor<[10,9216,77],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,77],f16>
  %302 = torch.aten.permute %301, %203 : !torch.vtensor<[10,9216,1,77],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,77,1],f16>
  %303 = torch.prim.ListConstruct %int10, %int9216, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %304 = torch.aten.view %302, %303 : !torch.vtensor<[10,9216,77,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,77],f16>
  %305 = torch.aten.mul.Tensor %304, %0 : !torch.vtensor<[10,9216,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[10,9216,77],f16>
  %306 = torch.aten._softmax %305, %int-1, %true : !torch.vtensor<[10,9216,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[10,9216,77],f32>
  %307 = torch.aten._to_copy %306, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[10,9216,77],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[10,9216,77],f16>
  %308 = torch.aten.unsqueeze %307, %int3 : !torch.vtensor<[10,9216,77],f16>, !torch.int -> !torch.vtensor<[10,9216,77,1],f16>
  %309 = torch.aten.permute %308, %203 : !torch.vtensor<[10,9216,77,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,77],f16>
  %310 = torch.aten.unsqueeze %288, %int3 : !torch.vtensor<[10,77,64],f16>, !torch.int -> !torch.vtensor<[10,77,64,1],f16>
  %311 = torch.aten.permute %310, %211 : !torch.vtensor<[10,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,1,64,77],f16>
  %312 = torch.aten.permute %309, %203 : !torch.vtensor<[10,9216,1,77],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,77,1],f16>
  %313 = torch.prim.ListConstruct %int709632, %int77, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %314 = torch.aten._reshape_alias %312, %303, %313 : !torch.vtensor<[10,9216,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,9216,77],f16>
  %315 = torch.aten.permute %311, %211 : !torch.vtensor<[10,1,64,77],f16>, !torch.list<int> -> !torch.vtensor<[10,77,64,1],f16>
  %316 = torch.prim.ListConstruct %int4928, %int64, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %317 = torch.aten._reshape_alias %315, %283, %316 : !torch.vtensor<[10,77,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,77,64],f16>
  %318 = torch.aten.bmm %314, %317 : !torch.vtensor<[10,9216,77],f16>, !torch.vtensor<[10,77,64],f16> -> !torch.vtensor<[10,9216,64],f16>
  %319 = torch.aten.view %318, %235 : !torch.vtensor<[10,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,64],f16>
  %320 = torch.aten.permute %319, %203 : !torch.vtensor<[10,9216,1,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64,1],f16>
  %321 = torch.aten.view %320, %192 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
  %322 = torch.aten._reshape_alias %321, %239, %240 : !torch.vtensor<[10,9216,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
  %323 = torch.aten.permute %322, %189 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
  %324 = torch.aten.clone %323, %int0 : !torch.vtensor<[2,9216,5,64],f16>, !torch.int -> !torch.vtensor<[2,9216,5,64],f16>
  %325 = torch.aten._unsafe_view %324, %153 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
  %326 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
  %327 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
  %328 = torch.aten.t %327 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %329 = torch.aten.view %325, %161 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
  %330 = torch.aten.addmm %326, %329, %328, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,320],f16>
  %331 = torch.aten.view %330, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
  %332 = torch.aten.add.Tensor %331, %251, %int1 : !torch.vtensor<[2,9216,320],f16>, !torch.vtensor<[2,9216,320],f16>, !torch.int -> !torch.vtensor<[2,9216,320],f16>
  %333 = torch.aten._to_copy %332, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f32>
  %result0_9, %result1_10, %result2_11 = torch.aten.native_layer_norm %333, %166, %48, %48, %float1.000000e-05 : !torch.vtensor<[2,9216,320],f32>, !torch.list<int>, !torch.vtensor<[320],f32>, !torch.vtensor<[320],f32>, !torch.float -> !torch.vtensor<[2,9216,320],f32>, !torch.vtensor<[2,9216,1],f32>, !torch.vtensor<[2,9216,1],f32>
  %334 = torch.aten._to_copy %44, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2560],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2560],f16>
  %335 = torch.aten._to_copy %45, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2560,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2560,320],f16>
  %336 = torch.aten._to_copy %result0_9, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
  %337 = torch.aten.t %335 : !torch.vtensor<[2560,320],f16> -> !torch.vtensor<[320,2560],f16>
  %338 = torch.aten.view %336, %161 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
  %339 = torch.aten.addmm %334, %338, %337, %int1, %int1 : !torch.vtensor<[2560],f16>, !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,2560],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,2560],f16>
  %340 = torch.prim.ListConstruct %int2, %int9216, %int2560 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %341 = torch.aten.view %339, %340 : !torch.vtensor<[18432,2560],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,2560],f16>
  %342 = torch.aten.slice.Tensor %341, %int-1, %int0, %int1280, %int1 : !torch.vtensor<[2,9216,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,9216,1280],f16>
  %343 = torch.aten.slice.Tensor %341, %int-1, %int1280, %int2560, %int1 : !torch.vtensor<[2,9216,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,9216,1280],f16>
  %344 = torch.aten.gelu %343, %str : !torch.vtensor<[2,9216,1280],f16>, !torch.str -> !torch.vtensor<[2,9216,1280],f16>
  %345 = torch.aten.mul.Tensor %342, %344 : !torch.vtensor<[2,9216,1280],f16>, !torch.vtensor<[2,9216,1280],f16> -> !torch.vtensor<[2,9216,1280],f16>
  %346 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
  %347 = torch.aten._to_copy %46, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,1280],f16>
  %348 = torch.aten.t %347 : !torch.vtensor<[320,1280],f16> -> !torch.vtensor<[1280,320],f16>
  %349 = torch.prim.ListConstruct %int18432, %int1280 : (!torch.int, !torch.int) -> !torch.list<int>
  %350 = torch.aten.view %345, %349 : !torch.vtensor<[2,9216,1280],f16>, !torch.list<int> -> !torch.vtensor<[18432,1280],f16>
  %351 = torch.aten.addmm %346, %350, %348, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[18432,1280],f16>, !torch.vtensor<[1280,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,320],f16>
  %352 = torch.aten.view %351, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
  %353 = torch.aten.add.Tensor %352, %332, %int1 : !torch.vtensor<[2,9216,320],f16>, !torch.vtensor<[2,9216,320],f16>, !torch.int -> !torch.vtensor<[2,9216,320],f16>
  %354 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
  %355 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
  %356 = torch.aten.t %355 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %357 = torch.aten.view %353, %161 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
  %358 = torch.aten.addmm %354, %357, %356, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,320],f16>
  %359 = torch.aten.view %358, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
  %360 = torch.prim.ListConstruct %int2, %int96, %int96, %int320 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %361 = torch.prim.ListConstruct %int2949120, %int30720, %int320, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %362 = torch.aten._reshape_alias %359, %360, %361 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,96,96,320],f16>
  %363 = torch.aten.permute %362, %206 : !torch.vtensor<[2,96,96,320],f16>, !torch.list<int> -> !torch.vtensor<[2,320,96,96],f16>
  %364 = torch.prim.ListConstruct %int2949120, %int1, %int30720, %int320 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %365 = torch.aten._reshape_alias %363, %90, %364 : !torch.vtensor<[2,320,96,96],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,320,96,96],f16>
  %366 = torch.aten.clone %365, %int0 : !torch.vtensor<[2,320,96,96],f16>, !torch.int -> !torch.vtensor<[2,320,96,96],f16>
  %367 = torch.aten.add.Tensor %366, %133, %int1 : !torch.vtensor<[2,320,96,96],f16>, !torch.vtensor<[2,320,96,96],f16>, !torch.int -> !torch.vtensor<[2,320,96,96],f16>
  %368 = torch.aten._to_copy %367, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,320,96,96],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,96,96],f32>
  %369 = torch.aten._reshape_alias %368, %82, %83 : !torch.vtensor<[2,320,96,96],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,10,9216],f32>
  %result0_12, %result1_13 = torch.aten.var_mean.correction %369, %85, %int0, %true : !torch.vtensor<[2,32,10,9216],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %370 = torch.aten.add.Tensor %result0_12, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %371 = torch.aten.rsqrt %370 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %372 = torch.aten.sub.Tensor %369, %result1_13, %int1 : !torch.vtensor<[2,32,10,9216],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,9216],f32>
  %373 = torch.aten.mul.Tensor %372, %371 : !torch.vtensor<[2,32,10,9216],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,9216],f32>
  %374 = torch.aten.view %373, %90 : !torch.vtensor<[2,32,10,9216],f32>, !torch.list<int> -> !torch.vtensor<[2,320,96,96],f32>
  %375 = torch.aten.unsqueeze %48, %int0 : !torch.vtensor<[320],f32>, !torch.int -> !torch.vtensor<[1,320],f32>
  %376 = torch.aten.unsqueeze %375, %int2 : !torch.vtensor<[1,320],f32>, !torch.int -> !torch.vtensor<[1,320,1],f32>
  %377 = torch.aten.unsqueeze %376, %int3 : !torch.vtensor<[1,320,1],f32>, !torch.int -> !torch.vtensor<[1,320,1,1],f32>
  %378 = torch.aten.unsqueeze %48, %int0 : !torch.vtensor<[320],f32>, !torch.int -> !torch.vtensor<[1,320],f32>
  %379 = torch.aten.unsqueeze %378, %int2 : !torch.vtensor<[1,320],f32>, !torch.int -> !torch.vtensor<[1,320,1],f32>
  %380 = torch.aten.unsqueeze %379, %int3 : !torch.vtensor<[1,320,1],f32>, !torch.int -> !torch.vtensor<[1,320,1,1],f32>
  %381 = torch.aten.mul.Tensor %374, %380 : !torch.vtensor<[2,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f32> -> !torch.vtensor<[2,320,96,96],f32>
  %382 = torch.aten.add.Tensor %381, %377, %int1 : !torch.vtensor<[2,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f32>, !torch.int -> !torch.vtensor<[2,320,96,96],f32>
  %383 = torch.aten._to_copy %382, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,320,96,96],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,96,96],f16>
  %384 = torch.aten.silu %383 : !torch.vtensor<[2,320,96,96],f16> -> !torch.vtensor<[2,320,96,96],f16>
  %385 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
  %386 = torch.aten._to_copy %41, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320,3,3],f16>
  %387 = torch.aten._convolution %384, %386, %385, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,96,96],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,96,96],f16>
  %388 = torch.aten.silu %74 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
  %389 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
  %390 = torch.aten._to_copy %46, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,1280],f16>
  %391 = torch.aten.t %390 : !torch.vtensor<[320,1280],f16> -> !torch.vtensor<[1280,320],f16>
  %392 = torch.aten.addmm %389, %388, %391, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
  %393 = torch.aten.unsqueeze %392, %int2 : !torch.vtensor<[2,320],f16>, !torch.int -> !torch.vtensor<[2,320,1],f16>
  %394 = torch.aten.unsqueeze %393, %int3 : !torch.vtensor<[2,320,1],f16>, !torch.int -> !torch.vtensor<[2,320,1,1],f16>
  %395 = torch.aten.add.Tensor %387, %394, %int1 : !torch.vtensor<[2,320,96,96],f16>, !torch.vtensor<[2,320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,96,96],f16>
  %396 = torch.aten._to_copy %395, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,320,96,96],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,96,96],f32>
  %397 = torch.aten._reshape_alias %396, %82, %83 : !torch.vtensor<[2,320,96,96],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,10,9216],f32>
  %result0_14, %result1_15 = torch.aten.var_mean.correction %397, %85, %int0, %true : !torch.vtensor<[2,32,10,9216],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %398 = torch.aten.add.Tensor %result0_14, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %399 = torch.aten.rsqrt %398 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %400 = torch.aten.sub.Tensor %397, %result1_15, %int1 : !torch.vtensor<[2,32,10,9216],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,9216],f32>
  %401 = torch.aten.mul.Tensor %400, %399 : !torch.vtensor<[2,32,10,9216],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,9216],f32>
  %402 = torch.aten.view %401, %90 : !torch.vtensor<[2,32,10,9216],f32>, !torch.list<int> -> !torch.vtensor<[2,320,96,96],f32>
  %403 = torch.aten.unsqueeze %48, %int0 : !torch.vtensor<[320],f32>, !torch.int -> !torch.vtensor<[1,320],f32>
  %404 = torch.aten.unsqueeze %403, %int2 : !torch.vtensor<[1,320],f32>, !torch.int -> !torch.vtensor<[1,320,1],f32>
  %405 = torch.aten.unsqueeze %404, %int3 : !torch.vtensor<[1,320,1],f32>, !torch.int -> !torch.vtensor<[1,320,1,1],f32>
  %406 = torch.aten.unsqueeze %48, %int0 : !torch.vtensor<[320],f32>, !torch.int -> !torch.vtensor<[1,320],f32>
  %407 = torch.aten.unsqueeze %406, %int2 : !torch.vtensor<[1,320],f32>, !torch.int -> !torch.vtensor<[1,320,1],f32>
  %408 = torch.aten.unsqueeze %407, %int3 : !torch.vtensor<[1,320,1],f32>, !torch.int -> !torch.vtensor<[1,320,1,1],f32>
  %409 = torch.aten.mul.Tensor %402, %408 : !torch.vtensor<[2,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f32> -> !torch.vtensor<[2,320,96,96],f32>
  %410 = torch.aten.add.Tensor %409, %405, %int1 : !torch.vtensor<[2,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f32>, !torch.int -> !torch.vtensor<[2,320,96,96],f32>
  %411 = torch.aten._to_copy %410, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,320,96,96],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,96,96],f16>
  %412 = torch.aten.silu %411 : !torch.vtensor<[2,320,96,96],f16> -> !torch.vtensor<[2,320,96,96],f16>
  %413 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
  %414 = torch.aten._to_copy %41, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320,3,3],f16>
  %415 = torch.aten._convolution %412, %414, %413, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,96,96],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,96,96],f16>
  %416 = torch.aten.add.Tensor %367, %415, %int1 : !torch.vtensor<[2,320,96,96],f16>, !torch.vtensor<[2,320,96,96],f16>, !torch.int -> !torch.vtensor<[2,320,96,96],f16>
  %417 = torch.aten._to_copy %416, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,320,96,96],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,96,96],f32>
  %418 = torch.aten._reshape_alias %417, %82, %83 : !torch.vtensor<[2,320,96,96],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,10,9216],f32>
  %result0_16, %result1_17 = torch.aten.var_mean.correction %418, %85, %int0, %true : !torch.vtensor<[2,32,10,9216],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %419 = torch.aten.add.Tensor %result0_16, %1, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %420 = torch.aten.rsqrt %419 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %421 = torch.aten.sub.Tensor %418, %result1_17, %int1 : !torch.vtensor<[2,32,10,9216],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,9216],f32>
  %422 = torch.aten.mul.Tensor %421, %420 : !torch.vtensor<[2,32,10,9216],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,9216],f32>
  %423 = torch.aten.view %422, %90 : !torch.vtensor<[2,32,10,9216],f32>, !torch.list<int> -> !torch.vtensor<[2,320,96,96],f32>
  %424 = torch.aten.unsqueeze %48, %int0 : !torch.vtensor<[320],f32>, !torch.int -> !torch.vtensor<[1,320],f32>
  %425 = torch.aten.unsqueeze %424, %int2 : !torch.vtensor<[1,320],f32>, !torch.int -> !torch.vtensor<[1,320,1],f32>
  %426 = torch.aten.unsqueeze %425, %int3 : !torch.vtensor<[1,320,1],f32>, !torch.int -> !torch.vtensor<[1,320,1,1],f32>
  %427 = torch.aten.unsqueeze %48, %int0 : !torch.vtensor<[320],f32>, !torch.int -> !torch.vtensor<[1,320],f32>
  %428 = torch.aten.unsqueeze %427, %int2 : !torch.vtensor<[1,320],f32>, !torch.int -> !torch.vtensor<[1,320,1],f32>
  %429 = torch.aten.unsqueeze %428, %int3 : !torch.vtensor<[1,320,1],f32>, !torch.int -> !torch.vtensor<[1,320,1,1],f32>
  %430 = torch.aten.mul.Tensor %423, %429 : !torch.vtensor<[2,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f32> -> !torch.vtensor<[2,320,96,96],f32>
  %431 = torch.aten.add.Tensor %430, %426, %int1 : !torch.vtensor<[2,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f32>, !torch.int -> !torch.vtensor<[2,320,96,96],f32>
  %432 = torch.aten._reshape_alias %431, %90, %149 : !torch.vtensor<[2,320,96,96],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,320,96,96],f32>
  %433 = torch.aten.permute %432, %151 : !torch.vtensor<[2,320,96,96],f32>, !torch.list<int> -> !torch.vtensor<[2,96,96,320],f32>
  %434 = torch.aten._reshape_alias %433, %153, %154 : !torch.vtensor<[2,96,96,320],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f32>
  %435 = torch.aten.clone %434, %int0 : !torch.vtensor<[2,9216,320],f32>, !torch.int -> !torch.vtensor<[2,9216,320],f32>
  %436 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
  %437 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
  %438 = torch.aten._to_copy %435, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
  %439 = torch.aten.t %437 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %440 = torch.aten.view %438, %161 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
  %441 = torch.aten.addmm %436, %440, %439, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,320],f16>
  %442 = torch.aten.view %441, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
  %443 = torch.aten._to_copy %442, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f32>
  %result0_18, %result1_19, %result2_20 = torch.aten.native_layer_norm %443, %166, %48, %48, %float1.000000e-05 : !torch.vtensor<[2,9216,320],f32>, !torch.list<int>, !torch.vtensor<[320],f32>, !torch.vtensor<[320],f32>, !torch.float -> !torch.vtensor<[2,9216,320],f32>, !torch.vtensor<[2,9216,1],f32>, !torch.vtensor<[2,9216,1],f32>
  %444 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
  %445 = torch.aten._to_copy %result0_18, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
  %446 = torch.aten.t %444 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %447 = torch.aten._reshape_alias %445, %161, %170 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
  %448 = torch.aten.mm %447, %446 : !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[18432,320],f16>
  %449 = torch.aten._unsafe_view %448, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
  %450 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
  %451 = torch.aten._to_copy %result0_18, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
  %452 = torch.aten.t %450 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %453 = torch.aten._reshape_alias %451, %161, %170 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
  %454 = torch.aten.mm %453, %452 : !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[18432,320],f16>
  %455 = torch.aten._unsafe_view %454, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
  %456 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
  %457 = torch.aten._to_copy %result0_18, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
  %458 = torch.aten.t %456 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %459 = torch.aten._reshape_alias %457, %161, %170 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
  %460 = torch.aten.mm %459, %458 : !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[18432,320],f16>
  %461 = torch.aten._unsafe_view %460, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
  %462 = torch.aten._reshape_alias %449, %186, %187 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
  %463 = torch.aten.permute %462, %189 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
  %464 = torch.aten.clone %463, %int0 : !torch.vtensor<[2,5,9216,64],f16>, !torch.int -> !torch.vtensor<[2,5,9216,64],f16>
  %465 = torch.aten._unsafe_view %464, %192 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
  %466 = torch.aten._reshape_alias %455, %186, %187 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
  %467 = torch.aten.permute %466, %189 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
  %468 = torch.aten.clone %467, %int0 : !torch.vtensor<[2,5,9216,64],f16>, !torch.int -> !torch.vtensor<[2,5,9216,64],f16>
  %469 = torch.aten._unsafe_view %468, %192 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
  %470 = torch.aten._reshape_alias %461, %186, %187 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
  %471 = torch.aten.permute %470, %189 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
  %472 = torch.aten.clone %471, %int0 : !torch.vtensor<[2,5,9216,64],f16>, !torch.int -> !torch.vtensor<[2,5,9216,64],f16>
  %473 = torch.aten._unsafe_view %472, %192 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
  %474 = torch.aten.unsqueeze %465, %int3 : !torch.vtensor<[10,9216,64],f16>, !torch.int -> !torch.vtensor<[10,9216,64,1],f16>
  %475 = torch.aten.permute %474, %203 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,64],f16>
  %476 = torch.aten.unsqueeze %469, %int3 : !torch.vtensor<[10,9216,64],f16>, !torch.int -> !torch.vtensor<[10,9216,64,1],f16>
  %477 = torch.aten.permute %476, %206 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,1,9216,64],f16>
  %478 = torch.aten.permute %475, %203 : !torch.vtensor<[10,9216,1,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64,1],f16>
  %479 = torch.aten._reshape_alias %478, %192, %209 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
  %480 = torch.aten.permute %477, %211 : !torch.vtensor<[10,1,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,64,9216,1],f16>
  %481 = torch.aten._reshape_alias %480, %213, %214 : !torch.vtensor<[10,64,9216,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,64,9216],f16>
  %482 = torch.aten.bmm %479, %481 : !torch.vtensor<[10,9216,64],f16>, !torch.vtensor<[10,64,9216],f16> -> !torch.vtensor<[10,9216,9216],f16>
  %483 = torch.aten.view %482, %217 : !torch.vtensor<[10,9216,9216],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,9216],f16>
  %484 = torch.aten.permute %483, %203 : !torch.vtensor<[10,9216,1,9216],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,9216,1],f16>
  %485 = torch.aten.view %484, %220 : !torch.vtensor<[10,9216,9216,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,9216],f16>
  %486 = torch.aten.mul.Tensor %485, %0 : !torch.vtensor<[10,9216,9216],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[10,9216,9216],f16>
  %487 = torch.aten._softmax %486, %int-1, %true : !torch.vtensor<[10,9216,9216],f16>, !torch.int, !torch.bool -> !torch.vtensor<[10,9216,9216],f32>
  %488 = torch.aten._to_copy %487, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[10,9216,9216],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[10,9216,9216],f16>
  %489 = torch.aten.unsqueeze %488, %int3 : !torch.vtensor<[10,9216,9216],f16>, !torch.int -> !torch.vtensor<[10,9216,9216,1],f16>
  %490 = torch.aten.permute %489, %203 : !torch.vtensor<[10,9216,9216,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,9216],f16>
  %491 = torch.aten.unsqueeze %473, %int3 : !torch.vtensor<[10,9216,64],f16>, !torch.int -> !torch.vtensor<[10,9216,64,1],f16>
  %492 = torch.aten.permute %491, %211 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,1,64,9216],f16>
  %493 = torch.aten.permute %490, %203 : !torch.vtensor<[10,9216,1,9216],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,9216,1],f16>
  %494 = torch.aten._reshape_alias %493, %220, %230 : !torch.vtensor<[10,9216,9216,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,9216,9216],f16>
  %495 = torch.aten.permute %492, %211 : !torch.vtensor<[10,1,64,9216],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64,1],f16>
  %496 = torch.aten._reshape_alias %495, %192, %209 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
  %497 = torch.aten.bmm %494, %496 : !torch.vtensor<[10,9216,9216],f16>, !torch.vtensor<[10,9216,64],f16> -> !torch.vtensor<[10,9216,64],f16>
  %498 = torch.aten.view %497, %235 : !torch.vtensor<[10,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,64],f16>
  %499 = torch.aten.permute %498, %203 : !torch.vtensor<[10,9216,1,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64,1],f16>
  %500 = torch.aten.view %499, %192 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
  %501 = torch.aten._reshape_alias %500, %239, %240 : !torch.vtensor<[10,9216,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
  %502 = torch.aten.permute %501, %189 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
  %503 = torch.aten.clone %502, %int0 : !torch.vtensor<[2,9216,5,64],f16>, !torch.int -> !torch.vtensor<[2,9216,5,64],f16>
  %504 = torch.aten._unsafe_view %503, %153 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
  %505 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
  %506 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
  %507 = torch.aten.t %506 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %508 = torch.aten.view %504, %161 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
  %509 = torch.aten.addmm %505, %508, %507, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,320],f16>
  %510 = torch.aten.view %509, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
  %511 = torch.aten.add.Tensor %510, %442, %int1 : !torch.vtensor<[2,9216,320],f16>, !torch.vtensor<[2,9216,320],f16>, !torch.int -> !torch.vtensor<[2,9216,320],f16>
  %512 = torch.aten._to_copy %511, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f32>
  %result0_21, %result1_22, %result2_23 = torch.aten.native_layer_norm %512, %166, %48, %48, %float1.000000e-05 : !torch.vtensor<[2,9216,320],f32>, !torch.list<int>, !torch.vtensor<[320],f32>, !torch.vtensor<[320],f32>, !torch.float -> !torch.vtensor<[2,9216,320],f32>, !torch.vtensor<[2,9216,1],f32>, !torch.vtensor<[2,9216,1],f32>
  %513 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
  %514 = torch.aten._to_copy %result0_21, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
  %515 = torch.aten.t %513 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %516 = torch.aten._reshape_alias %514, %161, %170 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
  %517 = torch.aten.mm %516, %515 : !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[18432,320],f16>
  %518 = torch.aten._unsafe_view %517, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
  %519 = torch.aten._to_copy %43, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,1024],f16>
  %520 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
  %521 = torch.aten.t %519 : !torch.vtensor<[320,1024],f16> -> !torch.vtensor<[1024,320],f16>
  %522 = torch.aten._reshape_alias %520, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
  %523 = torch.aten.mm %522, %521 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,320],f16> -> !torch.vtensor<[154,320],f16>
  %524 = torch.aten._unsafe_view %523, %266 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
  %525 = torch.aten._to_copy %43, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,1024],f16>
  %526 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
  %527 = torch.aten.t %525 : !torch.vtensor<[320,1024],f16> -> !torch.vtensor<[1024,320],f16>
  %528 = torch.aten._reshape_alias %526, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
  %529 = torch.aten.mm %528, %527 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,320],f16> -> !torch.vtensor<[154,320],f16>
  %530 = torch.aten._unsafe_view %529, %266 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
  %531 = torch.aten._reshape_alias %518, %186, %187 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
  %532 = torch.aten.permute %531, %189 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
  %533 = torch.aten.clone %532, %int0 : !torch.vtensor<[2,5,9216,64],f16>, !torch.int -> !torch.vtensor<[2,5,9216,64],f16>
  %534 = torch.aten._unsafe_view %533, %192 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
  %535 = torch.aten._reshape_alias %524, %278, %279 : !torch.vtensor<[2,77,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,5,64],f16>
  %536 = torch.aten.permute %535, %189 : !torch.vtensor<[2,77,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,77,64],f16>
  %537 = torch.aten.clone %536, %int0 : !torch.vtensor<[2,5,77,64],f16>, !torch.int -> !torch.vtensor<[2,5,77,64],f16>
  %538 = torch.aten._unsafe_view %537, %283 : !torch.vtensor<[2,5,77,64],f16>, !torch.list<int> -> !torch.vtensor<[10,77,64],f16>
  %539 = torch.aten._reshape_alias %530, %278, %279 : !torch.vtensor<[2,77,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,5,64],f16>
  %540 = torch.aten.permute %539, %189 : !torch.vtensor<[2,77,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,77,64],f16>
  %541 = torch.aten.clone %540, %int0 : !torch.vtensor<[2,5,77,64],f16>, !torch.int -> !torch.vtensor<[2,5,77,64],f16>
  %542 = torch.aten._unsafe_view %541, %283 : !torch.vtensor<[2,5,77,64],f16>, !torch.list<int> -> !torch.vtensor<[10,77,64],f16>
  %543 = torch.aten.unsqueeze %534, %int3 : !torch.vtensor<[10,9216,64],f16>, !torch.int -> !torch.vtensor<[10,9216,64,1],f16>
  %544 = torch.aten.permute %543, %203 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,64],f16>
  %545 = torch.aten.unsqueeze %538, %int3 : !torch.vtensor<[10,77,64],f16>, !torch.int -> !torch.vtensor<[10,77,64,1],f16>
  %546 = torch.aten.permute %545, %206 : !torch.vtensor<[10,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,1,77,64],f16>
  %547 = torch.aten.permute %544, %203 : !torch.vtensor<[10,9216,1,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64,1],f16>
  %548 = torch.aten._reshape_alias %547, %192, %209 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
  %549 = torch.aten.permute %546, %211 : !torch.vtensor<[10,1,77,64],f16>, !torch.list<int> -> !torch.vtensor<[10,64,77,1],f16>
  %550 = torch.aten._reshape_alias %549, %296, %297 : !torch.vtensor<[10,64,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,64,77],f16>
  %551 = torch.aten.bmm %548, %550 : !torch.vtensor<[10,9216,64],f16>, !torch.vtensor<[10,64,77],f16> -> !torch.vtensor<[10,9216,77],f16>
  %552 = torch.aten.view %551, %300 : !torch.vtensor<[10,9216,77],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,77],f16>
  %553 = torch.aten.permute %552, %203 : !torch.vtensor<[10,9216,1,77],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,77,1],f16>
  %554 = torch.aten.view %553, %303 : !torch.vtensor<[10,9216,77,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,77],f16>
  %555 = torch.aten.mul.Tensor %554, %0 : !torch.vtensor<[10,9216,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[10,9216,77],f16>
  %556 = torch.aten._softmax %555, %int-1, %true : !torch.vtensor<[10,9216,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[10,9216,77],f32>
  %557 = torch.aten._to_copy %556, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[10,9216,77],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[10,9216,77],f16>
  %558 = torch.aten.unsqueeze %557, %int3 : !torch.vtensor<[10,9216,77],f16>, !torch.int -> !torch.vtensor<[10,9216,77,1],f16>
  %559 = torch.aten.permute %558, %203 : !torch.vtensor<[10,9216,77,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,77],f16>
  %560 = torch.aten.unsqueeze %542, %int3 : !torch.vtensor<[10,77,64],f16>, !torch.int -> !torch.vtensor<[10,77,64,1],f16>
  %561 = torch.aten.permute %560, %211 : !torch.vtensor<[10,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,1,64,77],f16>
  %562 = torch.aten.permute %559, %203 : !torch.vtensor<[10,9216,1,77],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,77,1],f16>
  %563 = torch.aten._reshape_alias %562, %303, %313 : !torch.vtensor<[10,9216,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,9216,77],f16>
  %564 = torch.aten.permute %561, %211 : !torch.vtensor<[10,1,64,77],f16>, !torch.list<int> -> !torch.vtensor<[10,77,64,1],f16>
  %565 = torch.aten._reshape_alias %564, %283, %316 : !torch.vtensor<[10,77,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,77,64],f16>
  %566 = torch.aten.bmm %563, %565 : !torch.vtensor<[10,9216,77],f16>, !torch.vtensor<[10,77,64],f16> -> !torch.vtensor<[10,9216,64],f16>
  %567 = torch.aten.view %566, %235 : !torch.vtensor<[10,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,64],f16>
  %568 = torch.aten.permute %567, %203 : !torch.vtensor<[10,9216,1,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64,1],f16>
  %569 = torch.aten.view %568, %192 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
  %570 = torch.aten._reshape_alias %569, %239, %240 : !torch.vtensor<[10,9216,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
  %571 = torch.aten.permute %570, %189 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
  %572 = torch.aten.clone %571, %int0 : !torch.vtensor<[2,9216,5,64],f16>, !torch.int -> !torch.vtensor<[2,9216,5,64],f16>
  %573 = torch.aten._unsafe_view %572, %153 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
  %574 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
  %575 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
  %576 = torch.aten.t %575 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %577 = torch.aten.view %573, %161 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
  %578 = torch.aten.addmm %574, %577, %576, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,320],f16>
  %579 = torch.aten.view %578, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
  %580 = torch.aten.add.Tensor %579, %511, %int1 : !torch.vtensor<[2,9216,320],f16>, !torch.vtensor<[2,9216,320],f16>, !torch.int -> !torch.vtensor<[2,9216,320],f16>
  %581 = torch.aten._to_copy %580, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f32>
  %result0_24, %result1_25, %result2_26 = torch.aten.native_layer_norm %581, %166, %48, %48, %float1.000000e-05 : !torch.vtensor<[2,9216,320],f32>, !torch.list<int>, !torch.vtensor<[320],f32>, !torch.vtensor<[320],f32>, !torch.float -> !torch.vtensor<[2,9216,320],f32>, !torch.vtensor<[2,9216,1],f32>, !torch.vtensor<[2,9216,1],f32>
  %582 = torch.aten._to_copy %44, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2560],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2560],f16>
  %583 = torch.aten._to_copy %45, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2560,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2560,320],f16>
  %584 = torch.aten._to_copy %result0_24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
  %585 = torch.aten.t %583 : !torch.vtensor<[2560,320],f16> -> !torch.vtensor<[320,2560],f16>
  %586 = torch.aten.view %584, %161 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
  %587 = torch.aten.addmm %582, %586, %585, %int1, %int1 : !torch.vtensor<[2560],f16>, !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,2560],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,2560],f16>
  %588 = torch.aten.view %587, %340 : !torch.vtensor<[18432,2560],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,2560],f16>
  %589 = torch.aten.slice.Tensor %588, %int-1, %int0, %int1280, %int1 : !torch.vtensor<[2,9216,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,9216,1280],f16>
  %590 = torch.aten.slice.Tensor %588, %int-1, %int1280, %int2560, %int1 : !torch.vtensor<[2,9216,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,9216,1280],f16>
  %591 = torch.aten.gelu %590, %str : !torch.vtensor<[2,9216,1280],f16>, !torch.str -> !torch.vtensor<[2,9216,1280],f16>
  %592 = torch.aten.mul.Tensor %589, %591 : !torch.vtensor<[2,9216,1280],f16>, !torch.vtensor<[2,9216,1280],f16> -> !torch.vtensor<[2,9216,1280],f16>
  %593 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
  %594 = torch.aten._to_copy %46, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,1280],f16>
  %595 = torch.aten.t %594 : !torch.vtensor<[320,1280],f16> -> !torch.vtensor<[1280,320],f16>
  %596 = torch.aten.view %592, %349 : !torch.vtensor<[2,9216,1280],f16>, !torch.list<int> -> !torch.vtensor<[18432,1280],f16>
  %597 = torch.aten.addmm %593, %596, %595, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[18432,1280],f16>, !torch.vtensor<[1280,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,320],f16>
  %598 = torch.aten.view %597, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
  %599 = torch.aten.add.Tensor %598, %580, %int1 : !torch.vtensor<[2,9216,320],f16>, !torch.vtensor<[2,9216,320],f16>, !torch.int -> !torch.vtensor<[2,9216,320],f16>
  %600 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
  %601 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
  %602 = torch.aten.t %601 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %603 = torch.aten.view %599, %161 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
  %604 = torch.aten.addmm %600, %603, %602, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,320],f16>
  %605 = torch.aten.view %604, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
  %606 = torch.aten._reshape_alias %605, %360, %361 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,96,96,320],f16>
  %607 = torch.aten.permute %606, %206 : !torch.vtensor<[2,96,96,320],f16>, !torch.list<int> -> !torch.vtensor<[2,320,96,96],f16>
  %608 = torch.aten._reshape_alias %607, %90, %364 : !torch.vtensor<[2,320,96,96],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,320,96,96],f16>
  %609 = torch.aten.clone %608, %int0 : !torch.vtensor<[2,320,96,96],f16>, !torch.int -> !torch.vtensor<[2,320,96,96],f16>
  %610 = torch.aten.add.Tensor %609, %416, %int1 : !torch.vtensor<[2,320,96,96],f16>, !torch.vtensor<[2,320,96,96],f16>, !torch.int -> !torch.vtensor<[2,320,96,96],f16>
  %611 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
  %612 = torch.aten._to_copy %41, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320,3,3],f16>
  %613 = torch.prim.ListConstruct %int2, %int2 : (!torch.int, !torch.int) -> !torch.list<int>
  %614 = torch.aten._convolution %610, %612, %611, %613, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,96,96],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,48,48],f16>
  %615 = torch.aten._to_copy %614, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,320,48,48],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,48,48],f32>
  %616 = torch.prim.ListConstruct %int2, %int32, %int10, %int2304 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %617 = torch.prim.ListConstruct %int737280, %int23040, %int2304, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %618 = torch.aten._reshape_alias %615, %616, %617 : !torch.vtensor<[2,320,48,48],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,10,2304],f32>
  %result0_27, %result1_28 = torch.aten.var_mean.correction %618, %85, %int0, %true : !torch.vtensor<[2,32,10,2304],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %619 = torch.aten.add.Tensor %result0_27, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %620 = torch.aten.rsqrt %619 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %621 = torch.aten.sub.Tensor %618, %result1_28, %int1 : !torch.vtensor<[2,32,10,2304],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,2304],f32>
  %622 = torch.aten.mul.Tensor %621, %620 : !torch.vtensor<[2,32,10,2304],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,2304],f32>
  %623 = torch.prim.ListConstruct %int2, %int320, %int48, %int48 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %624 = torch.aten.view %622, %623 : !torch.vtensor<[2,32,10,2304],f32>, !torch.list<int> -> !torch.vtensor<[2,320,48,48],f32>
  %625 = torch.aten.unsqueeze %48, %int0 : !torch.vtensor<[320],f32>, !torch.int -> !torch.vtensor<[1,320],f32>
  %626 = torch.aten.unsqueeze %625, %int2 : !torch.vtensor<[1,320],f32>, !torch.int -> !torch.vtensor<[1,320,1],f32>
  %627 = torch.aten.unsqueeze %626, %int3 : !torch.vtensor<[1,320,1],f32>, !torch.int -> !torch.vtensor<[1,320,1,1],f32>
  %628 = torch.aten.unsqueeze %48, %int0 : !torch.vtensor<[320],f32>, !torch.int -> !torch.vtensor<[1,320],f32>
  %629 = torch.aten.unsqueeze %628, %int2 : !torch.vtensor<[1,320],f32>, !torch.int -> !torch.vtensor<[1,320,1],f32>
  %630 = torch.aten.unsqueeze %629, %int3 : !torch.vtensor<[1,320,1],f32>, !torch.int -> !torch.vtensor<[1,320,1,1],f32>
  %631 = torch.aten.mul.Tensor %624, %630 : !torch.vtensor<[2,320,48,48],f32>, !torch.vtensor<[1,320,1,1],f32> -> !torch.vtensor<[2,320,48,48],f32>
  %632 = torch.aten.add.Tensor %631, %627, %int1 : !torch.vtensor<[2,320,48,48],f32>, !torch.vtensor<[1,320,1,1],f32>, !torch.int -> !torch.vtensor<[2,320,48,48],f32>
  %633 = torch.aten._to_copy %632, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,320,48,48],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,48,48],f16>
  %634 = torch.aten.silu %633 : !torch.vtensor<[2,320,48,48],f16> -> !torch.vtensor<[2,320,48,48],f16>
  %635 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
  %636 = torch.aten._to_copy %7, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,320,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,320,3,3],f16>
  %637 = torch.aten._convolution %634, %636, %635, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,48,48],f16>, !torch.vtensor<[640,320,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,48,48],f16>
  %638 = torch.aten.silu %74 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
  %639 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
  %640 = torch.aten._to_copy %28, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,1280],f16>
  %641 = torch.aten.t %640 : !torch.vtensor<[640,1280],f16> -> !torch.vtensor<[1280,640],f16>
  %642 = torch.aten.addmm %639, %638, %641, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
  %643 = torch.aten.unsqueeze %642, %int2 : !torch.vtensor<[2,640],f16>, !torch.int -> !torch.vtensor<[2,640,1],f16>
  %644 = torch.aten.unsqueeze %643, %int3 : !torch.vtensor<[2,640,1],f16>, !torch.int -> !torch.vtensor<[2,640,1,1],f16>
  %645 = torch.aten.add.Tensor %637, %644, %int1 : !torch.vtensor<[2,640,48,48],f16>, !torch.vtensor<[2,640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,48,48],f16>
  %646 = torch.aten._to_copy %645, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,640,48,48],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,48,48],f32>
  %647 = torch.prim.ListConstruct %int2, %int32, %int20, %int2304 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %648 = torch.prim.ListConstruct %int1474560, %int46080, %int2304, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %649 = torch.aten._reshape_alias %646, %647, %648 : !torch.vtensor<[2,640,48,48],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,20,2304],f32>
  %result0_29, %result1_30 = torch.aten.var_mean.correction %649, %85, %int0, %true : !torch.vtensor<[2,32,20,2304],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %650 = torch.aten.add.Tensor %result0_29, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %651 = torch.aten.rsqrt %650 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %652 = torch.aten.sub.Tensor %649, %result1_30, %int1 : !torch.vtensor<[2,32,20,2304],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,2304],f32>
  %653 = torch.aten.mul.Tensor %652, %651 : !torch.vtensor<[2,32,20,2304],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,2304],f32>
  %654 = torch.prim.ListConstruct %int2, %int640, %int48, %int48 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %655 = torch.aten.view %653, %654 : !torch.vtensor<[2,32,20,2304],f32>, !torch.list<int> -> !torch.vtensor<[2,640,48,48],f32>
  %656 = torch.aten.unsqueeze %39, %int0 : !torch.vtensor<[640],f32>, !torch.int -> !torch.vtensor<[1,640],f32>
  %657 = torch.aten.unsqueeze %656, %int2 : !torch.vtensor<[1,640],f32>, !torch.int -> !torch.vtensor<[1,640,1],f32>
  %658 = torch.aten.unsqueeze %657, %int3 : !torch.vtensor<[1,640,1],f32>, !torch.int -> !torch.vtensor<[1,640,1,1],f32>
  %659 = torch.aten.unsqueeze %39, %int0 : !torch.vtensor<[640],f32>, !torch.int -> !torch.vtensor<[1,640],f32>
  %660 = torch.aten.unsqueeze %659, %int2 : !torch.vtensor<[1,640],f32>, !torch.int -> !torch.vtensor<[1,640,1],f32>
  %661 = torch.aten.unsqueeze %660, %int3 : !torch.vtensor<[1,640,1],f32>, !torch.int -> !torch.vtensor<[1,640,1,1],f32>
  %662 = torch.aten.mul.Tensor %655, %661 : !torch.vtensor<[2,640,48,48],f32>, !torch.vtensor<[1,640,1,1],f32> -> !torch.vtensor<[2,640,48,48],f32>
  %663 = torch.aten.add.Tensor %662, %658, %int1 : !torch.vtensor<[2,640,48,48],f32>, !torch.vtensor<[1,640,1,1],f32>, !torch.int -> !torch.vtensor<[2,640,48,48],f32>
  %664 = torch.aten._to_copy %663, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,640,48,48],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,48,48],f16>
  %665 = torch.aten.silu %664 : !torch.vtensor<[2,640,48,48],f16> -> !torch.vtensor<[2,640,48,48],f16>
  %666 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
  %667 = torch.aten._to_copy %35, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640,3,3],f16>
  %668 = torch.aten._convolution %665, %667, %666, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,48,48],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,48,48],f16>
  %669 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
  %670 = torch.aten._to_copy %8, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,320,1,1],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,320,1,1],f16>
  %671 = torch.aten._convolution %614, %670, %669, %78, %79, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,48,48],f16>, !torch.vtensor<[640,320,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,48,48],f16>
  %672 = torch.aten.add.Tensor %671, %668, %int1 : !torch.vtensor<[2,640,48,48],f16>, !torch.vtensor<[2,640,48,48],f16>, !torch.int -> !torch.vtensor<[2,640,48,48],f16>
  %673 = torch.aten._to_copy %672, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,640,48,48],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,48,48],f32>
  %674 = torch.aten._reshape_alias %673, %647, %648 : !torch.vtensor<[2,640,48,48],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,20,2304],f32>
  %result0_31, %result1_32 = torch.aten.var_mean.correction %674, %85, %int0, %true : !torch.vtensor<[2,32,20,2304],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %675 = torch.aten.add.Tensor %result0_31, %1, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %676 = torch.aten.rsqrt %675 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %677 = torch.aten.sub.Tensor %674, %result1_32, %int1 : !torch.vtensor<[2,32,20,2304],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,2304],f32>
  %678 = torch.aten.mul.Tensor %677, %676 : !torch.vtensor<[2,32,20,2304],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,2304],f32>
  %679 = torch.aten.view %678, %654 : !torch.vtensor<[2,32,20,2304],f32>, !torch.list<int> -> !torch.vtensor<[2,640,48,48],f32>
  %680 = torch.aten.unsqueeze %39, %int0 : !torch.vtensor<[640],f32>, !torch.int -> !torch.vtensor<[1,640],f32>
  %681 = torch.aten.unsqueeze %680, %int2 : !torch.vtensor<[1,640],f32>, !torch.int -> !torch.vtensor<[1,640,1],f32>
  %682 = torch.aten.unsqueeze %681, %int3 : !torch.vtensor<[1,640,1],f32>, !torch.int -> !torch.vtensor<[1,640,1,1],f32>
  %683 = torch.aten.unsqueeze %39, %int0 : !torch.vtensor<[640],f32>, !torch.int -> !torch.vtensor<[1,640],f32>
  %684 = torch.aten.unsqueeze %683, %int2 : !torch.vtensor<[1,640],f32>, !torch.int -> !torch.vtensor<[1,640,1],f32>
  %685 = torch.aten.unsqueeze %684, %int3 : !torch.vtensor<[1,640,1],f32>, !torch.int -> !torch.vtensor<[1,640,1,1],f32>
  %686 = torch.aten.mul.Tensor %679, %685 : !torch.vtensor<[2,640,48,48],f32>, !torch.vtensor<[1,640,1,1],f32> -> !torch.vtensor<[2,640,48,48],f32>
  %687 = torch.aten.add.Tensor %686, %682, %int1 : !torch.vtensor<[2,640,48,48],f32>, !torch.vtensor<[1,640,1,1],f32>, !torch.int -> !torch.vtensor<[2,640,48,48],f32>
  %688 = torch.prim.ListConstruct %int1474560, %int2304, %int48, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %689 = torch.aten._reshape_alias %687, %654, %688 : !torch.vtensor<[2,640,48,48],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,640,48,48],f32>
  %690 = torch.aten.permute %689, %151 : !torch.vtensor<[2,640,48,48],f32>, !torch.list<int> -> !torch.vtensor<[2,48,48,640],f32>
  %691 = torch.prim.ListConstruct %int2, %int2304, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %692 = torch.prim.ListConstruct %int1474560, %int1, %int2304 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %693 = torch.aten._reshape_alias %690, %691, %692 : !torch.vtensor<[2,48,48,640],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f32>
  %694 = torch.aten.clone %693, %int0 : !torch.vtensor<[2,2304,640],f32>, !torch.int -> !torch.vtensor<[2,2304,640],f32>
  %695 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
  %696 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
  %697 = torch.aten._to_copy %694, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
  %698 = torch.aten.t %696 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %699 = torch.prim.ListConstruct %int4608, %int640 : (!torch.int, !torch.int) -> !torch.list<int>
  %700 = torch.aten.view %697, %699 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
  %701 = torch.aten.addmm %695, %700, %698, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,640],f16>
  %702 = torch.aten.view %701, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
  %703 = torch.aten._to_copy %702, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f32>
  %704 = torch.prim.ListConstruct %int640 : (!torch.int) -> !torch.list<int>
  %result0_33, %result1_34, %result2_35 = torch.aten.native_layer_norm %703, %704, %39, %39, %float1.000000e-05 : !torch.vtensor<[2,2304,640],f32>, !torch.list<int>, !torch.vtensor<[640],f32>, !torch.vtensor<[640],f32>, !torch.float -> !torch.vtensor<[2,2304,640],f32>, !torch.vtensor<[2,2304,1],f32>, !torch.vtensor<[2,2304,1],f32>
  %705 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
  %706 = torch.aten._to_copy %result0_33, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
  %707 = torch.aten.t %705 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %708 = torch.prim.ListConstruct %int640, %int1 : (!torch.int, !torch.int) -> !torch.list<int>
  %709 = torch.aten._reshape_alias %706, %699, %708 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
  %710 = torch.aten.mm %709, %707 : !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[4608,640],f16>
  %711 = torch.aten._unsafe_view %710, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
  %712 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
  %713 = torch.aten._to_copy %result0_33, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
  %714 = torch.aten.t %712 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %715 = torch.aten._reshape_alias %713, %699, %708 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
  %716 = torch.aten.mm %715, %714 : !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[4608,640],f16>
  %717 = torch.aten._unsafe_view %716, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
  %718 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
  %719 = torch.aten._to_copy %result0_33, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
  %720 = torch.aten.t %718 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %721 = torch.aten._reshape_alias %719, %699, %708 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
  %722 = torch.aten.mm %721, %720 : !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[4608,640],f16>
  %723 = torch.aten._unsafe_view %722, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
  %724 = torch.prim.ListConstruct %int2, %int2304, %int10, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %725 = torch.prim.ListConstruct %int1474560, %int640, %int64, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %726 = torch.aten._reshape_alias %711, %724, %725 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
  %727 = torch.aten.permute %726, %189 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
  %728 = torch.aten.clone %727, %int0 : !torch.vtensor<[2,10,2304,64],f16>, !torch.int -> !torch.vtensor<[2,10,2304,64],f16>
  %729 = torch.prim.ListConstruct %int20, %int2304, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %730 = torch.aten._unsafe_view %728, %729 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
  %731 = torch.aten._reshape_alias %717, %724, %725 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
  %732 = torch.aten.permute %731, %189 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
  %733 = torch.aten.clone %732, %int0 : !torch.vtensor<[2,10,2304,64],f16>, !torch.int -> !torch.vtensor<[2,10,2304,64],f16>
  %734 = torch.aten._unsafe_view %733, %729 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
  %735 = torch.aten._reshape_alias %723, %724, %725 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
  %736 = torch.aten.permute %735, %189 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
  %737 = torch.aten.clone %736, %int0 : !torch.vtensor<[2,10,2304,64],f16>, !torch.int -> !torch.vtensor<[2,10,2304,64],f16>
  %738 = torch.aten._unsafe_view %737, %729 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
  %739 = torch.aten.unsqueeze %730, %int3 : !torch.vtensor<[20,2304,64],f16>, !torch.int -> !torch.vtensor<[20,2304,64,1],f16>
  %740 = torch.aten.permute %739, %203 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,64],f16>
  %741 = torch.aten.unsqueeze %734, %int3 : !torch.vtensor<[20,2304,64],f16>, !torch.int -> !torch.vtensor<[20,2304,64,1],f16>
  %742 = torch.aten.permute %741, %206 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,1,2304,64],f16>
  %743 = torch.aten.permute %740, %203 : !torch.vtensor<[20,2304,1,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64,1],f16>
  %744 = torch.prim.ListConstruct %int147456, %int64, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %745 = torch.aten._reshape_alias %743, %729, %744 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
  %746 = torch.aten.permute %742, %211 : !torch.vtensor<[20,1,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,64,2304,1],f16>
  %747 = torch.prim.ListConstruct %int20, %int64, %int2304 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %748 = torch.prim.ListConstruct %int147456, %int1, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %749 = torch.aten._reshape_alias %746, %747, %748 : !torch.vtensor<[20,64,2304,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,64,2304],f16>
  %750 = torch.aten.bmm %745, %749 : !torch.vtensor<[20,2304,64],f16>, !torch.vtensor<[20,64,2304],f16> -> !torch.vtensor<[20,2304,2304],f16>
  %751 = torch.prim.ListConstruct %int20, %int2304, %int1, %int2304 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %752 = torch.aten.view %750, %751 : !torch.vtensor<[20,2304,2304],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,2304],f16>
  %753 = torch.aten.permute %752, %203 : !torch.vtensor<[20,2304,1,2304],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,2304,1],f16>
  %754 = torch.prim.ListConstruct %int20, %int2304, %int2304 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %755 = torch.aten.view %753, %754 : !torch.vtensor<[20,2304,2304,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,2304],f16>
  %756 = torch.aten.mul.Tensor %755, %0 : !torch.vtensor<[20,2304,2304],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[20,2304,2304],f16>
  %757 = torch.aten._softmax %756, %int-1, %true : !torch.vtensor<[20,2304,2304],f16>, !torch.int, !torch.bool -> !torch.vtensor<[20,2304,2304],f32>
  %758 = torch.aten._to_copy %757, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[20,2304,2304],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[20,2304,2304],f16>
  %759 = torch.aten.unsqueeze %758, %int3 : !torch.vtensor<[20,2304,2304],f16>, !torch.int -> !torch.vtensor<[20,2304,2304,1],f16>
  %760 = torch.aten.permute %759, %203 : !torch.vtensor<[20,2304,2304,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,2304],f16>
  %761 = torch.aten.unsqueeze %738, %int3 : !torch.vtensor<[20,2304,64],f16>, !torch.int -> !torch.vtensor<[20,2304,64,1],f16>
  %762 = torch.aten.permute %761, %211 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,1,64,2304],f16>
  %763 = torch.aten.permute %760, %203 : !torch.vtensor<[20,2304,1,2304],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,2304,1],f16>
  %764 = torch.prim.ListConstruct %int5308416, %int2304, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %765 = torch.aten._reshape_alias %763, %754, %764 : !torch.vtensor<[20,2304,2304,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,2304,2304],f16>
  %766 = torch.aten.permute %762, %211 : !torch.vtensor<[20,1,64,2304],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64,1],f16>
  %767 = torch.aten._reshape_alias %766, %729, %744 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
  %768 = torch.aten.bmm %765, %767 : !torch.vtensor<[20,2304,2304],f16>, !torch.vtensor<[20,2304,64],f16> -> !torch.vtensor<[20,2304,64],f16>
  %769 = torch.prim.ListConstruct %int20, %int2304, %int1, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %770 = torch.aten.view %768, %769 : !torch.vtensor<[20,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,64],f16>
  %771 = torch.aten.permute %770, %203 : !torch.vtensor<[20,2304,1,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64,1],f16>
  %772 = torch.aten.view %771, %729 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
  %773 = torch.prim.ListConstruct %int2, %int10, %int2304, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %774 = torch.prim.ListConstruct %int1474560, %int147456, %int64, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %775 = torch.aten._reshape_alias %772, %773, %774 : !torch.vtensor<[20,2304,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
  %776 = torch.aten.permute %775, %189 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
  %777 = torch.aten.clone %776, %int0 : !torch.vtensor<[2,2304,10,64],f16>, !torch.int -> !torch.vtensor<[2,2304,10,64],f16>
  %778 = torch.aten._unsafe_view %777, %691 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
  %779 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
  %780 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
  %781 = torch.aten.t %780 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %782 = torch.aten.view %778, %699 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
  %783 = torch.aten.addmm %779, %782, %781, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,640],f16>
  %784 = torch.aten.view %783, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
  %785 = torch.aten.add.Tensor %784, %702, %int1 : !torch.vtensor<[2,2304,640],f16>, !torch.vtensor<[2,2304,640],f16>, !torch.int -> !torch.vtensor<[2,2304,640],f16>
  %786 = torch.aten._to_copy %785, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f32>
  %result0_36, %result1_37, %result2_38 = torch.aten.native_layer_norm %786, %704, %39, %39, %float1.000000e-05 : !torch.vtensor<[2,2304,640],f32>, !torch.list<int>, !torch.vtensor<[640],f32>, !torch.vtensor<[640],f32>, !torch.float -> !torch.vtensor<[2,2304,640],f32>, !torch.vtensor<[2,2304,1],f32>, !torch.vtensor<[2,2304,1],f32>
  %787 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
  %788 = torch.aten._to_copy %result0_36, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
  %789 = torch.aten.t %787 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %790 = torch.aten._reshape_alias %788, %699, %708 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
  %791 = torch.aten.mm %790, %789 : !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[4608,640],f16>
  %792 = torch.aten._unsafe_view %791, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
  %793 = torch.aten._to_copy %30, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,1024],f16>
  %794 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
  %795 = torch.aten.t %793 : !torch.vtensor<[640,1024],f16> -> !torch.vtensor<[1024,640],f16>
  %796 = torch.aten._reshape_alias %794, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
  %797 = torch.aten.mm %796, %795 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,640],f16> -> !torch.vtensor<[154,640],f16>
  %798 = torch.prim.ListConstruct %int2, %int77, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %799 = torch.aten._unsafe_view %797, %798 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
  %800 = torch.aten._to_copy %30, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,1024],f16>
  %801 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
  %802 = torch.aten.t %800 : !torch.vtensor<[640,1024],f16> -> !torch.vtensor<[1024,640],f16>
  %803 = torch.aten._reshape_alias %801, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
  %804 = torch.aten.mm %803, %802 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,640],f16> -> !torch.vtensor<[154,640],f16>
  %805 = torch.aten._unsafe_view %804, %798 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
  %806 = torch.aten._reshape_alias %792, %724, %725 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
  %807 = torch.aten.permute %806, %189 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
  %808 = torch.aten.clone %807, %int0 : !torch.vtensor<[2,10,2304,64],f16>, !torch.int -> !torch.vtensor<[2,10,2304,64],f16>
  %809 = torch.aten._unsafe_view %808, %729 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
  %810 = torch.prim.ListConstruct %int2, %int77, %int10, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %811 = torch.prim.ListConstruct %int49280, %int640, %int64, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %812 = torch.aten._reshape_alias %799, %810, %811 : !torch.vtensor<[2,77,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,10,64],f16>
  %813 = torch.aten.permute %812, %189 : !torch.vtensor<[2,77,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,77,64],f16>
  %814 = torch.aten.clone %813, %int0 : !torch.vtensor<[2,10,77,64],f16>, !torch.int -> !torch.vtensor<[2,10,77,64],f16>
  %815 = torch.prim.ListConstruct %int20, %int77, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %816 = torch.aten._unsafe_view %814, %815 : !torch.vtensor<[2,10,77,64],f16>, !torch.list<int> -> !torch.vtensor<[20,77,64],f16>
  %817 = torch.aten._reshape_alias %805, %810, %811 : !torch.vtensor<[2,77,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,10,64],f16>
  %818 = torch.aten.permute %817, %189 : !torch.vtensor<[2,77,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,77,64],f16>
  %819 = torch.aten.clone %818, %int0 : !torch.vtensor<[2,10,77,64],f16>, !torch.int -> !torch.vtensor<[2,10,77,64],f16>
  %820 = torch.aten._unsafe_view %819, %815 : !torch.vtensor<[2,10,77,64],f16>, !torch.list<int> -> !torch.vtensor<[20,77,64],f16>
  %821 = torch.aten.unsqueeze %809, %int3 : !torch.vtensor<[20,2304,64],f16>, !torch.int -> !torch.vtensor<[20,2304,64,1],f16>
  %822 = torch.aten.permute %821, %203 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,64],f16>
  %823 = torch.aten.unsqueeze %816, %int3 : !torch.vtensor<[20,77,64],f16>, !torch.int -> !torch.vtensor<[20,77,64,1],f16>
  %824 = torch.aten.permute %823, %206 : !torch.vtensor<[20,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,1,77,64],f16>
  %825 = torch.aten.permute %822, %203 : !torch.vtensor<[20,2304,1,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64,1],f16>
  %826 = torch.aten._reshape_alias %825, %729, %744 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
  %827 = torch.aten.permute %824, %211 : !torch.vtensor<[20,1,77,64],f16>, !torch.list<int> -> !torch.vtensor<[20,64,77,1],f16>
  %828 = torch.prim.ListConstruct %int20, %int64, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %829 = torch.aten._reshape_alias %827, %828, %297 : !torch.vtensor<[20,64,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,64,77],f16>
  %830 = torch.aten.bmm %826, %829 : !torch.vtensor<[20,2304,64],f16>, !torch.vtensor<[20,64,77],f16> -> !torch.vtensor<[20,2304,77],f16>
  %831 = torch.prim.ListConstruct %int20, %int2304, %int1, %int77 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %832 = torch.aten.view %830, %831 : !torch.vtensor<[20,2304,77],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,77],f16>
  %833 = torch.aten.permute %832, %203 : !torch.vtensor<[20,2304,1,77],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,77,1],f16>
  %834 = torch.prim.ListConstruct %int20, %int2304, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %835 = torch.aten.view %833, %834 : !torch.vtensor<[20,2304,77,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,77],f16>
  %836 = torch.aten.mul.Tensor %835, %0 : !torch.vtensor<[20,2304,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[20,2304,77],f16>
  %837 = torch.aten._softmax %836, %int-1, %true : !torch.vtensor<[20,2304,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[20,2304,77],f32>
  %838 = torch.aten._to_copy %837, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[20,2304,77],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[20,2304,77],f16>
  %839 = torch.aten.unsqueeze %838, %int3 : !torch.vtensor<[20,2304,77],f16>, !torch.int -> !torch.vtensor<[20,2304,77,1],f16>
  %840 = torch.aten.permute %839, %203 : !torch.vtensor<[20,2304,77,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,77],f16>
  %841 = torch.aten.unsqueeze %820, %int3 : !torch.vtensor<[20,77,64],f16>, !torch.int -> !torch.vtensor<[20,77,64,1],f16>
  %842 = torch.aten.permute %841, %211 : !torch.vtensor<[20,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,1,64,77],f16>
  %843 = torch.aten.permute %840, %203 : !torch.vtensor<[20,2304,1,77],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,77,1],f16>
  %844 = torch.prim.ListConstruct %int177408, %int77, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %845 = torch.aten._reshape_alias %843, %834, %844 : !torch.vtensor<[20,2304,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,2304,77],f16>
  %846 = torch.aten.permute %842, %211 : !torch.vtensor<[20,1,64,77],f16>, !torch.list<int> -> !torch.vtensor<[20,77,64,1],f16>
  %847 = torch.aten._reshape_alias %846, %815, %316 : !torch.vtensor<[20,77,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,77,64],f16>
  %848 = torch.aten.bmm %845, %847 : !torch.vtensor<[20,2304,77],f16>, !torch.vtensor<[20,77,64],f16> -> !torch.vtensor<[20,2304,64],f16>
  %849 = torch.aten.view %848, %769 : !torch.vtensor<[20,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,64],f16>
  %850 = torch.aten.permute %849, %203 : !torch.vtensor<[20,2304,1,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64,1],f16>
  %851 = torch.aten.view %850, %729 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
  %852 = torch.aten._reshape_alias %851, %773, %774 : !torch.vtensor<[20,2304,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
  %853 = torch.aten.permute %852, %189 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
  %854 = torch.aten.clone %853, %int0 : !torch.vtensor<[2,2304,10,64],f16>, !torch.int -> !torch.vtensor<[2,2304,10,64],f16>
  %855 = torch.aten._unsafe_view %854, %691 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
  %856 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
  %857 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
  %858 = torch.aten.t %857 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %859 = torch.aten.view %855, %699 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
  %860 = torch.aten.addmm %856, %859, %858, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,640],f16>
  %861 = torch.aten.view %860, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
  %862 = torch.aten.add.Tensor %861, %785, %int1 : !torch.vtensor<[2,2304,640],f16>, !torch.vtensor<[2,2304,640],f16>, !torch.int -> !torch.vtensor<[2,2304,640],f16>
  %863 = torch.aten._to_copy %862, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f32>
  %result0_39, %result1_40, %result2_41 = torch.aten.native_layer_norm %863, %704, %39, %39, %float1.000000e-05 : !torch.vtensor<[2,2304,640],f32>, !torch.list<int>, !torch.vtensor<[640],f32>, !torch.vtensor<[640],f32>, !torch.float -> !torch.vtensor<[2,2304,640],f32>, !torch.vtensor<[2,2304,1],f32>, !torch.vtensor<[2,2304,1],f32>
  %864 = torch.aten._to_copy %31, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[5120],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[5120],f16>
  %865 = torch.aten._to_copy %32, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[5120,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[5120,640],f16>
  %866 = torch.aten._to_copy %result0_39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
  %867 = torch.aten.t %865 : !torch.vtensor<[5120,640],f16> -> !torch.vtensor<[640,5120],f16>
  %868 = torch.aten.view %866, %699 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
  %869 = torch.aten.addmm %864, %868, %867, %int1, %int1 : !torch.vtensor<[5120],f16>, !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,5120],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,5120],f16>
  %870 = torch.prim.ListConstruct %int2, %int2304, %int5120 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %871 = torch.aten.view %869, %870 : !torch.vtensor<[4608,5120],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,5120],f16>
  %872 = torch.aten.slice.Tensor %871, %int-1, %int0, %int2560, %int1 : !torch.vtensor<[2,2304,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,2304,2560],f16>
  %873 = torch.aten.slice.Tensor %871, %int-1, %int2560, %int5120, %int1 : !torch.vtensor<[2,2304,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,2304,2560],f16>
  %874 = torch.aten.gelu %873, %str : !torch.vtensor<[2,2304,2560],f16>, !torch.str -> !torch.vtensor<[2,2304,2560],f16>
  %875 = torch.aten.mul.Tensor %872, %874 : !torch.vtensor<[2,2304,2560],f16>, !torch.vtensor<[2,2304,2560],f16> -> !torch.vtensor<[2,2304,2560],f16>
  %876 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
  %877 = torch.aten._to_copy %33, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,2560],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,2560],f16>
  %878 = torch.aten.t %877 : !torch.vtensor<[640,2560],f16> -> !torch.vtensor<[2560,640],f16>
  %879 = torch.prim.ListConstruct %int4608, %int2560 : (!torch.int, !torch.int) -> !torch.list<int>
  %880 = torch.aten.view %875, %879 : !torch.vtensor<[2,2304,2560],f16>, !torch.list<int> -> !torch.vtensor<[4608,2560],f16>
  %881 = torch.aten.addmm %876, %880, %878, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[4608,2560],f16>, !torch.vtensor<[2560,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,640],f16>
  %882 = torch.aten.view %881, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
  %883 = torch.aten.add.Tensor %882, %862, %int1 : !torch.vtensor<[2,2304,640],f16>, !torch.vtensor<[2,2304,640],f16>, !torch.int -> !torch.vtensor<[2,2304,640],f16>
  %884 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
  %885 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
  %886 = torch.aten.t %885 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %887 = torch.aten.view %883, %699 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
  %888 = torch.aten.addmm %884, %887, %886, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,640],f16>
  %889 = torch.aten.view %888, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
  %890 = torch.prim.ListConstruct %int2, %int48, %int48, %int640 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %891 = torch.prim.ListConstruct %int1474560, %int30720, %int640, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %892 = torch.aten._reshape_alias %889, %890, %891 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,48,48,640],f16>
  %893 = torch.aten.permute %892, %206 : !torch.vtensor<[2,48,48,640],f16>, !torch.list<int> -> !torch.vtensor<[2,640,48,48],f16>
  %894 = torch.prim.ListConstruct %int1474560, %int1, %int30720, %int640 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %895 = torch.aten._reshape_alias %893, %654, %894 : !torch.vtensor<[2,640,48,48],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,640,48,48],f16>
  %896 = torch.aten.clone %895, %int0 : !torch.vtensor<[2,640,48,48],f16>, !torch.int -> !torch.vtensor<[2,640,48,48],f16>
  %897 = torch.aten.add.Tensor %896, %672, %int1 : !torch.vtensor<[2,640,48,48],f16>, !torch.vtensor<[2,640,48,48],f16>, !torch.int -> !torch.vtensor<[2,640,48,48],f16>
  %898 = torch.aten._to_copy %897, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,640,48,48],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,48,48],f32>
  %899 = torch.aten._reshape_alias %898, %647, %648 : !torch.vtensor<[2,640,48,48],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,20,2304],f32>
  %result0_42, %result1_43 = torch.aten.var_mean.correction %899, %85, %int0, %true : !torch.vtensor<[2,32,20,2304],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %900 = torch.aten.add.Tensor %result0_42, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %901 = torch.aten.rsqrt %900 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %902 = torch.aten.sub.Tensor %899, %result1_43, %int1 : !torch.vtensor<[2,32,20,2304],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,2304],f32>
  %903 = torch.aten.mul.Tensor %902, %901 : !torch.vtensor<[2,32,20,2304],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,2304],f32>
  %904 = torch.aten.view %903, %654 : !torch.vtensor<[2,32,20,2304],f32>, !torch.list<int> -> !torch.vtensor<[2,640,48,48],f32>
  %905 = torch.aten.unsqueeze %39, %int0 : !torch.vtensor<[640],f32>, !torch.int -> !torch.vtensor<[1,640],f32>
  %906 = torch.aten.unsqueeze %905, %int2 : !torch.vtensor<[1,640],f32>, !torch.int -> !torch.vtensor<[1,640,1],f32>
  %907 = torch.aten.unsqueeze %906, %int3 : !torch.vtensor<[1,640,1],f32>, !torch.int -> !torch.vtensor<[1,640,1,1],f32>
  %908 = torch.aten.unsqueeze %39, %int0 : !torch.vtensor<[640],f32>, !torch.int -> !torch.vtensor<[1,640],f32>
  %909 = torch.aten.unsqueeze %908, %int2 : !torch.vtensor<[1,640],f32>, !torch.int -> !torch.vtensor<[1,640,1],f32>
  %910 = torch.aten.unsqueeze %909, %int3 : !torch.vtensor<[1,640,1],f32>, !torch.int -> !torch.vtensor<[1,640,1,1],f32>
  %911 = torch.aten.mul.Tensor %904, %910 : !torch.vtensor<[2,640,48,48],f32>, !torch.vtensor<[1,640,1,1],f32> -> !torch.vtensor<[2,640,48,48],f32>
  %912 = torch.aten.add.Tensor %911, %907, %int1 : !torch.vtensor<[2,640,48,48],f32>, !torch.vtensor<[1,640,1,1],f32>, !torch.int -> !torch.vtensor<[2,640,48,48],f32>
  %913 = torch.aten._to_copy %912, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,640,48,48],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,48,48],f16>
  %914 = torch.aten.silu %913 : !torch.vtensor<[2,640,48,48],f16> -> !torch.vtensor<[2,640,48,48],f16>
  %915 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
  %916 = torch.aten._to_copy %35, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640,3,3],f16>
  %917 = torch.aten._convolution %914, %916, %915, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,48,48],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,48,48],f16>
  %918 = torch.aten.silu %74 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
  %919 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
  %920 = torch.aten._to_copy %28, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,1280],f16>
  %921 = torch.aten.t %920 : !torch.vtensor<[640,1280],f16> -> !torch.vtensor<[1280,640],f16>
  %922 = torch.aten.addmm %919, %918, %921, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
  %923 = torch.aten.unsqueeze %922, %int2 : !torch.vtensor<[2,640],f16>, !torch.int -> !torch.vtensor<[2,640,1],f16>
  %924 = torch.aten.unsqueeze %923, %int3 : !torch.vtensor<[2,640,1],f16>, !torch.int -> !torch.vtensor<[2,640,1,1],f16>
  %925 = torch.aten.add.Tensor %917, %924, %int1 : !torch.vtensor<[2,640,48,48],f16>, !torch.vtensor<[2,640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,48,48],f16>
  %926 = torch.aten._to_copy %925, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,640,48,48],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,48,48],f32>
  %927 = torch.aten._reshape_alias %926, %647, %648 : !torch.vtensor<[2,640,48,48],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,20,2304],f32>
  %result0_44, %result1_45 = torch.aten.var_mean.correction %927, %85, %int0, %true : !torch.vtensor<[2,32,20,2304],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %928 = torch.aten.add.Tensor %result0_44, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %929 = torch.aten.rsqrt %928 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %930 = torch.aten.sub.Tensor %927, %result1_45, %int1 : !torch.vtensor<[2,32,20,2304],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,2304],f32>
  %931 = torch.aten.mul.Tensor %930, %929 : !torch.vtensor<[2,32,20,2304],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,2304],f32>
  %932 = torch.aten.view %931, %654 : !torch.vtensor<[2,32,20,2304],f32>, !torch.list<int> -> !torch.vtensor<[2,640,48,48],f32>
  %933 = torch.aten.unsqueeze %39, %int0 : !torch.vtensor<[640],f32>, !torch.int -> !torch.vtensor<[1,640],f32>
  %934 = torch.aten.unsqueeze %933, %int2 : !torch.vtensor<[1,640],f32>, !torch.int -> !torch.vtensor<[1,640,1],f32>
  %935 = torch.aten.unsqueeze %934, %int3 : !torch.vtensor<[1,640,1],f32>, !torch.int -> !torch.vtensor<[1,640,1,1],f32>
  %936 = torch.aten.unsqueeze %39, %int0 : !torch.vtensor<[640],f32>, !torch.int -> !torch.vtensor<[1,640],f32>
  %937 = torch.aten.unsqueeze %936, %int2 : !torch.vtensor<[1,640],f32>, !torch.int -> !torch.vtensor<[1,640,1],f32>
  %938 = torch.aten.unsqueeze %937, %int3 : !torch.vtensor<[1,640,1],f32>, !torch.int -> !torch.vtensor<[1,640,1,1],f32>
  %939 = torch.aten.mul.Tensor %932, %938 : !torch.vtensor<[2,640,48,48],f32>, !torch.vtensor<[1,640,1,1],f32> -> !torch.vtensor<[2,640,48,48],f32>
  %940 = torch.aten.add.Tensor %939, %935, %int1 : !torch.vtensor<[2,640,48,48],f32>, !torch.vtensor<[1,640,1,1],f32>, !torch.int -> !torch.vtensor<[2,640,48,48],f32>
  %941 = torch.aten._to_copy %940, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,640,48,48],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,48,48],f16>
  %942 = torch.aten.silu %941 : !torch.vtensor<[2,640,48,48],f16> -> !torch.vtensor<[2,640,48,48],f16>
  %943 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
  %944 = torch.aten._to_copy %35, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640,3,3],f16>
  %945 = torch.aten._convolution %942, %944, %943, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,48,48],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,48,48],f16>
  %946 = torch.aten.add.Tensor %897, %945, %int1 : !torch.vtensor<[2,640,48,48],f16>, !torch.vtensor<[2,640,48,48],f16>, !torch.int -> !torch.vtensor<[2,640,48,48],f16>
  %947 = torch.aten._to_copy %946, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,640,48,48],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,48,48],f32>
  %948 = torch.aten._reshape_alias %947, %647, %648 : !torch.vtensor<[2,640,48,48],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,20,2304],f32>
  %result0_46, %result1_47 = torch.aten.var_mean.correction %948, %85, %int0, %true : !torch.vtensor<[2,32,20,2304],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %949 = torch.aten.add.Tensor %result0_46, %1, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %950 = torch.aten.rsqrt %949 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %951 = torch.aten.sub.Tensor %948, %result1_47, %int1 : !torch.vtensor<[2,32,20,2304],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,2304],f32>
  %952 = torch.aten.mul.Tensor %951, %950 : !torch.vtensor<[2,32,20,2304],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,2304],f32>
  %953 = torch.aten.view %952, %654 : !torch.vtensor<[2,32,20,2304],f32>, !torch.list<int> -> !torch.vtensor<[2,640,48,48],f32>
  %954 = torch.aten.unsqueeze %39, %int0 : !torch.vtensor<[640],f32>, !torch.int -> !torch.vtensor<[1,640],f32>
  %955 = torch.aten.unsqueeze %954, %int2 : !torch.vtensor<[1,640],f32>, !torch.int -> !torch.vtensor<[1,640,1],f32>
  %956 = torch.aten.unsqueeze %955, %int3 : !torch.vtensor<[1,640,1],f32>, !torch.int -> !torch.vtensor<[1,640,1,1],f32>
  %957 = torch.aten.unsqueeze %39, %int0 : !torch.vtensor<[640],f32>, !torch.int -> !torch.vtensor<[1,640],f32>
  %958 = torch.aten.unsqueeze %957, %int2 : !torch.vtensor<[1,640],f32>, !torch.int -> !torch.vtensor<[1,640,1],f32>
  %959 = torch.aten.unsqueeze %958, %int3 : !torch.vtensor<[1,640,1],f32>, !torch.int -> !torch.vtensor<[1,640,1,1],f32>
  %960 = torch.aten.mul.Tensor %953, %959 : !torch.vtensor<[2,640,48,48],f32>, !torch.vtensor<[1,640,1,1],f32> -> !torch.vtensor<[2,640,48,48],f32>
  %961 = torch.aten.add.Tensor %960, %956, %int1 : !torch.vtensor<[2,640,48,48],f32>, !torch.vtensor<[1,640,1,1],f32>, !torch.int -> !torch.vtensor<[2,640,48,48],f32>
  %962 = torch.aten._reshape_alias %961, %654, %688 : !torch.vtensor<[2,640,48,48],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,640,48,48],f32>
  %963 = torch.aten.permute %962, %151 : !torch.vtensor<[2,640,48,48],f32>, !torch.list<int> -> !torch.vtensor<[2,48,48,640],f32>
  %964 = torch.aten._reshape_alias %963, %691, %692 : !torch.vtensor<[2,48,48,640],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f32>
  %965 = torch.aten.clone %964, %int0 : !torch.vtensor<[2,2304,640],f32>, !torch.int -> !torch.vtensor<[2,2304,640],f32>
  %966 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
  %967 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
  %968 = torch.aten._to_copy %965, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
  %969 = torch.aten.t %967 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %970 = torch.aten.view %968, %699 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
  %971 = torch.aten.addmm %966, %970, %969, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,640],f16>
  %972 = torch.aten.view %971, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
  %973 = torch.aten._to_copy %972, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f32>
  %result0_48, %result1_49, %result2_50 = torch.aten.native_layer_norm %973, %704, %39, %39, %float1.000000e-05 : !torch.vtensor<[2,2304,640],f32>, !torch.list<int>, !torch.vtensor<[640],f32>, !torch.vtensor<[640],f32>, !torch.float -> !torch.vtensor<[2,2304,640],f32>, !torch.vtensor<[2,2304,1],f32>, !torch.vtensor<[2,2304,1],f32>
  %974 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
  %975 = torch.aten._to_copy %result0_48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
  %976 = torch.aten.t %974 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %977 = torch.aten._reshape_alias %975, %699, %708 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
  %978 = torch.aten.mm %977, %976 : !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[4608,640],f16>
  %979 = torch.aten._unsafe_view %978, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
  %980 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
  %981 = torch.aten._to_copy %result0_48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
  %982 = torch.aten.t %980 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %983 = torch.aten._reshape_alias %981, %699, %708 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
  %984 = torch.aten.mm %983, %982 : !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[4608,640],f16>
  %985 = torch.aten._unsafe_view %984, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
  %986 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
  %987 = torch.aten._to_copy %result0_48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
  %988 = torch.aten.t %986 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %989 = torch.aten._reshape_alias %987, %699, %708 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
  %990 = torch.aten.mm %989, %988 : !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[4608,640],f16>
  %991 = torch.aten._unsafe_view %990, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
  %992 = torch.aten._reshape_alias %979, %724, %725 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
  %993 = torch.aten.permute %992, %189 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
  %994 = torch.aten.clone %993, %int0 : !torch.vtensor<[2,10,2304,64],f16>, !torch.int -> !torch.vtensor<[2,10,2304,64],f16>
  %995 = torch.aten._unsafe_view %994, %729 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
  %996 = torch.aten._reshape_alias %985, %724, %725 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
  %997 = torch.aten.permute %996, %189 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
  %998 = torch.aten.clone %997, %int0 : !torch.vtensor<[2,10,2304,64],f16>, !torch.int -> !torch.vtensor<[2,10,2304,64],f16>
  %999 = torch.aten._unsafe_view %998, %729 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
  %1000 = torch.aten._reshape_alias %991, %724, %725 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
  %1001 = torch.aten.permute %1000, %189 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
  %1002 = torch.aten.clone %1001, %int0 : !torch.vtensor<[2,10,2304,64],f16>, !torch.int -> !torch.vtensor<[2,10,2304,64],f16>
  %1003 = torch.aten._unsafe_view %1002, %729 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
  %1004 = torch.aten.unsqueeze %995, %int3 : !torch.vtensor<[20,2304,64],f16>, !torch.int -> !torch.vtensor<[20,2304,64,1],f16>
  %1005 = torch.aten.permute %1004, %203 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,64],f16>
  %1006 = torch.aten.unsqueeze %999, %int3 : !torch.vtensor<[20,2304,64],f16>, !torch.int -> !torch.vtensor<[20,2304,64,1],f16>
  %1007 = torch.aten.permute %1006, %206 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,1,2304,64],f16>
  %1008 = torch.aten.permute %1005, %203 : !torch.vtensor<[20,2304,1,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64,1],f16>
  %1009 = torch.aten._reshape_alias %1008, %729, %744 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
  %1010 = torch.aten.permute %1007, %211 : !torch.vtensor<[20,1,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,64,2304,1],f16>
  %1011 = torch.aten._reshape_alias %1010, %747, %748 : !torch.vtensor<[20,64,2304,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,64,2304],f16>
  %1012 = torch.aten.bmm %1009, %1011 : !torch.vtensor<[20,2304,64],f16>, !torch.vtensor<[20,64,2304],f16> -> !torch.vtensor<[20,2304,2304],f16>
  %1013 = torch.aten.view %1012, %751 : !torch.vtensor<[20,2304,2304],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,2304],f16>
  %1014 = torch.aten.permute %1013, %203 : !torch.vtensor<[20,2304,1,2304],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,2304,1],f16>
  %1015 = torch.aten.view %1014, %754 : !torch.vtensor<[20,2304,2304,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,2304],f16>
  %1016 = torch.aten.mul.Tensor %1015, %0 : !torch.vtensor<[20,2304,2304],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[20,2304,2304],f16>
  %1017 = torch.aten._softmax %1016, %int-1, %true : !torch.vtensor<[20,2304,2304],f16>, !torch.int, !torch.bool -> !torch.vtensor<[20,2304,2304],f32>
  %1018 = torch.aten._to_copy %1017, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[20,2304,2304],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[20,2304,2304],f16>
  %1019 = torch.aten.unsqueeze %1018, %int3 : !torch.vtensor<[20,2304,2304],f16>, !torch.int -> !torch.vtensor<[20,2304,2304,1],f16>
  %1020 = torch.aten.permute %1019, %203 : !torch.vtensor<[20,2304,2304,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,2304],f16>
  %1021 = torch.aten.unsqueeze %1003, %int3 : !torch.vtensor<[20,2304,64],f16>, !torch.int -> !torch.vtensor<[20,2304,64,1],f16>
  %1022 = torch.aten.permute %1021, %211 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,1,64,2304],f16>
  %1023 = torch.aten.permute %1020, %203 : !torch.vtensor<[20,2304,1,2304],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,2304,1],f16>
  %1024 = torch.aten._reshape_alias %1023, %754, %764 : !torch.vtensor<[20,2304,2304,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,2304,2304],f16>
  %1025 = torch.aten.permute %1022, %211 : !torch.vtensor<[20,1,64,2304],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64,1],f16>
  %1026 = torch.aten._reshape_alias %1025, %729, %744 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
  %1027 = torch.aten.bmm %1024, %1026 : !torch.vtensor<[20,2304,2304],f16>, !torch.vtensor<[20,2304,64],f16> -> !torch.vtensor<[20,2304,64],f16>
  %1028 = torch.aten.view %1027, %769 : !torch.vtensor<[20,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,64],f16>
  %1029 = torch.aten.permute %1028, %203 : !torch.vtensor<[20,2304,1,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64,1],f16>
  %1030 = torch.aten.view %1029, %729 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
  %1031 = torch.aten._reshape_alias %1030, %773, %774 : !torch.vtensor<[20,2304,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
  %1032 = torch.aten.permute %1031, %189 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
  %1033 = torch.aten.clone %1032, %int0 : !torch.vtensor<[2,2304,10,64],f16>, !torch.int -> !torch.vtensor<[2,2304,10,64],f16>
  %1034 = torch.aten._unsafe_view %1033, %691 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
  %1035 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
  %1036 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
  %1037 = torch.aten.t %1036 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %1038 = torch.aten.view %1034, %699 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
  %1039 = torch.aten.addmm %1035, %1038, %1037, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,640],f16>
  %1040 = torch.aten.view %1039, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
  %1041 = torch.aten.add.Tensor %1040, %972, %int1 : !torch.vtensor<[2,2304,640],f16>, !torch.vtensor<[2,2304,640],f16>, !torch.int -> !torch.vtensor<[2,2304,640],f16>
  %1042 = torch.aten._to_copy %1041, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f32>
  %result0_51, %result1_52, %result2_53 = torch.aten.native_layer_norm %1042, %704, %39, %39, %float1.000000e-05 : !torch.vtensor<[2,2304,640],f32>, !torch.list<int>, !torch.vtensor<[640],f32>, !torch.vtensor<[640],f32>, !torch.float -> !torch.vtensor<[2,2304,640],f32>, !torch.vtensor<[2,2304,1],f32>, !torch.vtensor<[2,2304,1],f32>
  %1043 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
  %1044 = torch.aten._to_copy %result0_51, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
  %1045 = torch.aten.t %1043 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %1046 = torch.aten._reshape_alias %1044, %699, %708 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
  %1047 = torch.aten.mm %1046, %1045 : !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[4608,640],f16>
  %1048 = torch.aten._unsafe_view %1047, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
  %1049 = torch.aten._to_copy %30, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,1024],f16>
  %1050 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
  %1051 = torch.aten.t %1049 : !torch.vtensor<[640,1024],f16> -> !torch.vtensor<[1024,640],f16>
  %1052 = torch.aten._reshape_alias %1050, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
  %1053 = torch.aten.mm %1052, %1051 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,640],f16> -> !torch.vtensor<[154,640],f16>
  %1054 = torch.aten._unsafe_view %1053, %798 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
  %1055 = torch.aten._to_copy %30, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,1024],f16>
  %1056 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
  %1057 = torch.aten.t %1055 : !torch.vtensor<[640,1024],f16> -> !torch.vtensor<[1024,640],f16>
  %1058 = torch.aten._reshape_alias %1056, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
  %1059 = torch.aten.mm %1058, %1057 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,640],f16> -> !torch.vtensor<[154,640],f16>
  %1060 = torch.aten._unsafe_view %1059, %798 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
  %1061 = torch.aten._reshape_alias %1048, %724, %725 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
  %1062 = torch.aten.permute %1061, %189 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
  %1063 = torch.aten.clone %1062, %int0 : !torch.vtensor<[2,10,2304,64],f16>, !torch.int -> !torch.vtensor<[2,10,2304,64],f16>
  %1064 = torch.aten._unsafe_view %1063, %729 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
  %1065 = torch.aten._reshape_alias %1054, %810, %811 : !torch.vtensor<[2,77,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,10,64],f16>
  %1066 = torch.aten.permute %1065, %189 : !torch.vtensor<[2,77,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,77,64],f16>
  %1067 = torch.aten.clone %1066, %int0 : !torch.vtensor<[2,10,77,64],f16>, !torch.int -> !torch.vtensor<[2,10,77,64],f16>
  %1068 = torch.aten._unsafe_view %1067, %815 : !torch.vtensor<[2,10,77,64],f16>, !torch.list<int> -> !torch.vtensor<[20,77,64],f16>
  %1069 = torch.aten._reshape_alias %1060, %810, %811 : !torch.vtensor<[2,77,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,10,64],f16>
  %1070 = torch.aten.permute %1069, %189 : !torch.vtensor<[2,77,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,77,64],f16>
  %1071 = torch.aten.clone %1070, %int0 : !torch.vtensor<[2,10,77,64],f16>, !torch.int -> !torch.vtensor<[2,10,77,64],f16>
  %1072 = torch.aten._unsafe_view %1071, %815 : !torch.vtensor<[2,10,77,64],f16>, !torch.list<int> -> !torch.vtensor<[20,77,64],f16>
  %1073 = torch.aten.unsqueeze %1064, %int3 : !torch.vtensor<[20,2304,64],f16>, !torch.int -> !torch.vtensor<[20,2304,64,1],f16>
  %1074 = torch.aten.permute %1073, %203 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,64],f16>
  %1075 = torch.aten.unsqueeze %1068, %int3 : !torch.vtensor<[20,77,64],f16>, !torch.int -> !torch.vtensor<[20,77,64,1],f16>
  %1076 = torch.aten.permute %1075, %206 : !torch.vtensor<[20,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,1,77,64],f16>
  %1077 = torch.aten.permute %1074, %203 : !torch.vtensor<[20,2304,1,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64,1],f16>
  %1078 = torch.aten._reshape_alias %1077, %729, %744 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
  %1079 = torch.aten.permute %1076, %211 : !torch.vtensor<[20,1,77,64],f16>, !torch.list<int> -> !torch.vtensor<[20,64,77,1],f16>
  %1080 = torch.aten._reshape_alias %1079, %828, %297 : !torch.vtensor<[20,64,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,64,77],f16>
  %1081 = torch.aten.bmm %1078, %1080 : !torch.vtensor<[20,2304,64],f16>, !torch.vtensor<[20,64,77],f16> -> !torch.vtensor<[20,2304,77],f16>
  %1082 = torch.aten.view %1081, %831 : !torch.vtensor<[20,2304,77],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,77],f16>
  %1083 = torch.aten.permute %1082, %203 : !torch.vtensor<[20,2304,1,77],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,77,1],f16>
  %1084 = torch.aten.view %1083, %834 : !torch.vtensor<[20,2304,77,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,77],f16>
  %1085 = torch.aten.mul.Tensor %1084, %0 : !torch.vtensor<[20,2304,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[20,2304,77],f16>
  %1086 = torch.aten._softmax %1085, %int-1, %true : !torch.vtensor<[20,2304,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[20,2304,77],f32>
  %1087 = torch.aten._to_copy %1086, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[20,2304,77],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[20,2304,77],f16>
  %1088 = torch.aten.unsqueeze %1087, %int3 : !torch.vtensor<[20,2304,77],f16>, !torch.int -> !torch.vtensor<[20,2304,77,1],f16>
  %1089 = torch.aten.permute %1088, %203 : !torch.vtensor<[20,2304,77,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,77],f16>
  %1090 = torch.aten.unsqueeze %1072, %int3 : !torch.vtensor<[20,77,64],f16>, !torch.int -> !torch.vtensor<[20,77,64,1],f16>
  %1091 = torch.aten.permute %1090, %211 : !torch.vtensor<[20,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,1,64,77],f16>
  %1092 = torch.aten.permute %1089, %203 : !torch.vtensor<[20,2304,1,77],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,77,1],f16>
  %1093 = torch.aten._reshape_alias %1092, %834, %844 : !torch.vtensor<[20,2304,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,2304,77],f16>
  %1094 = torch.aten.permute %1091, %211 : !torch.vtensor<[20,1,64,77],f16>, !torch.list<int> -> !torch.vtensor<[20,77,64,1],f16>
  %1095 = torch.aten._reshape_alias %1094, %815, %316 : !torch.vtensor<[20,77,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,77,64],f16>
  %1096 = torch.aten.bmm %1093, %1095 : !torch.vtensor<[20,2304,77],f16>, !torch.vtensor<[20,77,64],f16> -> !torch.vtensor<[20,2304,64],f16>
  %1097 = torch.aten.view %1096, %769 : !torch.vtensor<[20,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,64],f16>
  %1098 = torch.aten.permute %1097, %203 : !torch.vtensor<[20,2304,1,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64,1],f16>
  %1099 = torch.aten.view %1098, %729 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
  %1100 = torch.aten._reshape_alias %1099, %773, %774 : !torch.vtensor<[20,2304,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
  %1101 = torch.aten.permute %1100, %189 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
  %1102 = torch.aten.clone %1101, %int0 : !torch.vtensor<[2,2304,10,64],f16>, !torch.int -> !torch.vtensor<[2,2304,10,64],f16>
  %1103 = torch.aten._unsafe_view %1102, %691 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
  %1104 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
  %1105 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
  %1106 = torch.aten.t %1105 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %1107 = torch.aten.view %1103, %699 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
  %1108 = torch.aten.addmm %1104, %1107, %1106, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,640],f16>
  %1109 = torch.aten.view %1108, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
  %1110 = torch.aten.add.Tensor %1109, %1041, %int1 : !torch.vtensor<[2,2304,640],f16>, !torch.vtensor<[2,2304,640],f16>, !torch.int -> !torch.vtensor<[2,2304,640],f16>
  %1111 = torch.aten._to_copy %1110, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f32>
  %result0_54, %result1_55, %result2_56 = torch.aten.native_layer_norm %1111, %704, %39, %39, %float1.000000e-05 : !torch.vtensor<[2,2304,640],f32>, !torch.list<int>, !torch.vtensor<[640],f32>, !torch.vtensor<[640],f32>, !torch.float -> !torch.vtensor<[2,2304,640],f32>, !torch.vtensor<[2,2304,1],f32>, !torch.vtensor<[2,2304,1],f32>
  %1112 = torch.aten._to_copy %31, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[5120],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[5120],f16>
  %1113 = torch.aten._to_copy %32, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[5120,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[5120,640],f16>
  %1114 = torch.aten._to_copy %result0_54, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
  %1115 = torch.aten.t %1113 : !torch.vtensor<[5120,640],f16> -> !torch.vtensor<[640,5120],f16>
  %1116 = torch.aten.view %1114, %699 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
  %1117 = torch.aten.addmm %1112, %1116, %1115, %int1, %int1 : !torch.vtensor<[5120],f16>, !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,5120],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,5120],f16>
  %1118 = torch.aten.view %1117, %870 : !torch.vtensor<[4608,5120],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,5120],f16>
  %1119 = torch.aten.slice.Tensor %1118, %int-1, %int0, %int2560, %int1 : !torch.vtensor<[2,2304,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,2304,2560],f16>
  %1120 = torch.aten.slice.Tensor %1118, %int-1, %int2560, %int5120, %int1 : !torch.vtensor<[2,2304,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,2304,2560],f16>
  %1121 = torch.aten.gelu %1120, %str : !torch.vtensor<[2,2304,2560],f16>, !torch.str -> !torch.vtensor<[2,2304,2560],f16>
  %1122 = torch.aten.mul.Tensor %1119, %1121 : !torch.vtensor<[2,2304,2560],f16>, !torch.vtensor<[2,2304,2560],f16> -> !torch.vtensor<[2,2304,2560],f16>
  %1123 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
  %1124 = torch.aten._to_copy %33, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,2560],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,2560],f16>
  %1125 = torch.aten.t %1124 : !torch.vtensor<[640,2560],f16> -> !torch.vtensor<[2560,640],f16>
  %1126 = torch.aten.view %1122, %879 : !torch.vtensor<[2,2304,2560],f16>, !torch.list<int> -> !torch.vtensor<[4608,2560],f16>
  %1127 = torch.aten.addmm %1123, %1126, %1125, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[4608,2560],f16>, !torch.vtensor<[2560,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,640],f16>
  %1128 = torch.aten.view %1127, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
  %1129 = torch.aten.add.Tensor %1128, %1110, %int1 : !torch.vtensor<[2,2304,640],f16>, !torch.vtensor<[2,2304,640],f16>, !torch.int -> !torch.vtensor<[2,2304,640],f16>
  %1130 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
  %1131 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
  %1132 = torch.aten.t %1131 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %1133 = torch.aten.view %1129, %699 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
  %1134 = torch.aten.addmm %1130, %1133, %1132, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,640],f16>
  %1135 = torch.aten.view %1134, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
  %1136 = torch.aten._reshape_alias %1135, %890, %891 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,48,48,640],f16>
  %1137 = torch.aten.permute %1136, %206 : !torch.vtensor<[2,48,48,640],f16>, !torch.list<int> -> !torch.vtensor<[2,640,48,48],f16>
  %1138 = torch.aten._reshape_alias %1137, %654, %894 : !torch.vtensor<[2,640,48,48],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,640,48,48],f16>
  %1139 = torch.aten.clone %1138, %int0 : !torch.vtensor<[2,640,48,48],f16>, !torch.int -> !torch.vtensor<[2,640,48,48],f16>
  %1140 = torch.aten.add.Tensor %1139, %946, %int1 : !torch.vtensor<[2,640,48,48],f16>, !torch.vtensor<[2,640,48,48],f16>, !torch.int -> !torch.vtensor<[2,640,48,48],f16>
  %1141 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
  %1142 = torch.aten._to_copy %35, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640,3,3],f16>
  %1143 = torch.aten._convolution %1140, %1142, %1141, %613, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,48,48],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,24,24],f16>
  %1144 = torch.aten._to_copy %1143, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,640,24,24],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,24,24],f32>
  %1145 = torch.prim.ListConstruct %int2, %int32, %int20, %int576 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1146 = torch.prim.ListConstruct %int368640, %int11520, %int576, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1147 = torch.aten._reshape_alias %1144, %1145, %1146 : !torch.vtensor<[2,640,24,24],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,20,576],f32>
  %result0_57, %result1_58 = torch.aten.var_mean.correction %1147, %85, %int0, %true : !torch.vtensor<[2,32,20,576],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %1148 = torch.aten.add.Tensor %result0_57, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %1149 = torch.aten.rsqrt %1148 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %1150 = torch.aten.sub.Tensor %1147, %result1_58, %int1 : !torch.vtensor<[2,32,20,576],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,576],f32>
  %1151 = torch.aten.mul.Tensor %1150, %1149 : !torch.vtensor<[2,32,20,576],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,576],f32>
  %1152 = torch.prim.ListConstruct %int2, %int640, %int24, %int24 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1153 = torch.aten.view %1151, %1152 : !torch.vtensor<[2,32,20,576],f32>, !torch.list<int> -> !torch.vtensor<[2,640,24,24],f32>
  %1154 = torch.aten.unsqueeze %39, %int0 : !torch.vtensor<[640],f32>, !torch.int -> !torch.vtensor<[1,640],f32>
  %1155 = torch.aten.unsqueeze %1154, %int2 : !torch.vtensor<[1,640],f32>, !torch.int -> !torch.vtensor<[1,640,1],f32>
  %1156 = torch.aten.unsqueeze %1155, %int3 : !torch.vtensor<[1,640,1],f32>, !torch.int -> !torch.vtensor<[1,640,1,1],f32>
  %1157 = torch.aten.unsqueeze %39, %int0 : !torch.vtensor<[640],f32>, !torch.int -> !torch.vtensor<[1,640],f32>
  %1158 = torch.aten.unsqueeze %1157, %int2 : !torch.vtensor<[1,640],f32>, !torch.int -> !torch.vtensor<[1,640,1],f32>
  %1159 = torch.aten.unsqueeze %1158, %int3 : !torch.vtensor<[1,640,1],f32>, !torch.int -> !torch.vtensor<[1,640,1,1],f32>
  %1160 = torch.aten.mul.Tensor %1153, %1159 : !torch.vtensor<[2,640,24,24],f32>, !torch.vtensor<[1,640,1,1],f32> -> !torch.vtensor<[2,640,24,24],f32>
  %1161 = torch.aten.add.Tensor %1160, %1156, %int1 : !torch.vtensor<[2,640,24,24],f32>, !torch.vtensor<[1,640,1,1],f32>, !torch.int -> !torch.vtensor<[2,640,24,24],f32>
  %1162 = torch.aten._to_copy %1161, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,640,24,24],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,24,24],f16>
  %1163 = torch.aten.silu %1162 : !torch.vtensor<[2,640,24,24],f16> -> !torch.vtensor<[2,640,24,24],f16>
  %1164 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %1165 = torch.aten._to_copy %9, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,640,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,640,3,3],f16>
  %1166 = torch.aten._convolution %1163, %1165, %1164, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,24,24],f16>, !torch.vtensor<[1280,640,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,24,24],f16>
  %1167 = torch.aten.silu %74 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
  %1168 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %1169 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %1170 = torch.aten.t %1169 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1171 = torch.aten.addmm %1168, %1167, %1170, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
  %1172 = torch.aten.unsqueeze %1171, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
  %1173 = torch.aten.unsqueeze %1172, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
  %1174 = torch.aten.add.Tensor %1166, %1173, %int1 : !torch.vtensor<[2,1280,24,24],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,24,24],f16>
  %1175 = torch.aten._to_copy %1174, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,24,24],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,24,24],f32>
  %1176 = torch.prim.ListConstruct %int2, %int32, %int40, %int576 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1177 = torch.prim.ListConstruct %int737280, %int23040, %int576, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1178 = torch.aten._reshape_alias %1175, %1176, %1177 : !torch.vtensor<[2,1280,24,24],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,40,576],f32>
  %result0_59, %result1_60 = torch.aten.var_mean.correction %1178, %85, %int0, %true : !torch.vtensor<[2,32,40,576],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %1179 = torch.aten.add.Tensor %result0_59, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %1180 = torch.aten.rsqrt %1179 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %1181 = torch.aten.sub.Tensor %1178, %result1_60, %int1 : !torch.vtensor<[2,32,40,576],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,576],f32>
  %1182 = torch.aten.mul.Tensor %1181, %1180 : !torch.vtensor<[2,32,40,576],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,576],f32>
  %1183 = torch.prim.ListConstruct %int2, %int1280, %int24, %int24 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1184 = torch.aten.view %1182, %1183 : !torch.vtensor<[2,32,40,576],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,24,24],f32>
  %1185 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
  %1186 = torch.aten.unsqueeze %1185, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
  %1187 = torch.aten.unsqueeze %1186, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
  %1188 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
  %1189 = torch.aten.unsqueeze %1188, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
  %1190 = torch.aten.unsqueeze %1189, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
  %1191 = torch.aten.mul.Tensor %1184, %1190 : !torch.vtensor<[2,1280,24,24],f32>, !torch.vtensor<[1,1280,1,1],f32> -> !torch.vtensor<[2,1280,24,24],f32>
  %1192 = torch.aten.add.Tensor %1191, %1187, %int1 : !torch.vtensor<[2,1280,24,24],f32>, !torch.vtensor<[1,1280,1,1],f32>, !torch.int -> !torch.vtensor<[2,1280,24,24],f32>
  %1193 = torch.aten._to_copy %1192, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,1280,24,24],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,24,24],f16>
  %1194 = torch.aten.silu %1193 : !torch.vtensor<[2,1280,24,24],f16> -> !torch.vtensor<[2,1280,24,24],f16>
  %1195 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %1196 = torch.aten._to_copy %20, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280,3,3],f16>
  %1197 = torch.aten._convolution %1194, %1196, %1195, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,24,24],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,24,24],f16>
  %1198 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %1199 = torch.aten._to_copy %10, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,640,1,1],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,640,1,1],f16>
  %1200 = torch.aten._convolution %1143, %1199, %1198, %78, %79, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,24,24],f16>, !torch.vtensor<[1280,640,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,24,24],f16>
  %1201 = torch.aten.add.Tensor %1200, %1197, %int1 : !torch.vtensor<[2,1280,24,24],f16>, !torch.vtensor<[2,1280,24,24],f16>, !torch.int -> !torch.vtensor<[2,1280,24,24],f16>
  %1202 = torch.aten._to_copy %1201, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,24,24],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,24,24],f32>
  %1203 = torch.aten._reshape_alias %1202, %1176, %1177 : !torch.vtensor<[2,1280,24,24],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,40,576],f32>
  %result0_61, %result1_62 = torch.aten.var_mean.correction %1203, %85, %int0, %true : !torch.vtensor<[2,32,40,576],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %1204 = torch.aten.add.Tensor %result0_61, %1, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %1205 = torch.aten.rsqrt %1204 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %1206 = torch.aten.sub.Tensor %1203, %result1_62, %int1 : !torch.vtensor<[2,32,40,576],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,576],f32>
  %1207 = torch.aten.mul.Tensor %1206, %1205 : !torch.vtensor<[2,32,40,576],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,576],f32>
  %1208 = torch.aten.view %1207, %1183 : !torch.vtensor<[2,32,40,576],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,24,24],f32>
  %1209 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
  %1210 = torch.aten.unsqueeze %1209, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
  %1211 = torch.aten.unsqueeze %1210, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
  %1212 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
  %1213 = torch.aten.unsqueeze %1212, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
  %1214 = torch.aten.unsqueeze %1213, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
  %1215 = torch.aten.mul.Tensor %1208, %1214 : !torch.vtensor<[2,1280,24,24],f32>, !torch.vtensor<[1,1280,1,1],f32> -> !torch.vtensor<[2,1280,24,24],f32>
  %1216 = torch.aten.add.Tensor %1215, %1211, %int1 : !torch.vtensor<[2,1280,24,24],f32>, !torch.vtensor<[1,1280,1,1],f32>, !torch.int -> !torch.vtensor<[2,1280,24,24],f32>
  %1217 = torch.prim.ListConstruct %int737280, %int576, %int24, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1218 = torch.aten._reshape_alias %1216, %1183, %1217 : !torch.vtensor<[2,1280,24,24],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1280,24,24],f32>
  %1219 = torch.aten.permute %1218, %151 : !torch.vtensor<[2,1280,24,24],f32>, !torch.list<int> -> !torch.vtensor<[2,24,24,1280],f32>
  %1220 = torch.prim.ListConstruct %int2, %int576, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1221 = torch.prim.ListConstruct %int737280, %int1, %int576 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1222 = torch.aten._reshape_alias %1219, %1220, %1221 : !torch.vtensor<[2,24,24,1280],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f32>
  %1223 = torch.aten.clone %1222, %int0 : !torch.vtensor<[2,576,1280],f32>, !torch.int -> !torch.vtensor<[2,576,1280],f32>
  %1224 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %1225 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %1226 = torch.aten._to_copy %1223, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
  %1227 = torch.aten.t %1225 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1228 = torch.prim.ListConstruct %int1152, %int1280 : (!torch.int, !torch.int) -> !torch.list<int>
  %1229 = torch.aten.view %1226, %1228 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
  %1230 = torch.aten.addmm %1224, %1229, %1227, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,1280],f16>
  %1231 = torch.aten.view %1230, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
  %1232 = torch.aten._to_copy %1231, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f32>
  %1233 = torch.prim.ListConstruct %int1280 : (!torch.int) -> !torch.list<int>
  %result0_63, %result1_64, %result2_65 = torch.aten.native_layer_norm %1232, %1233, %24, %24, %float1.000000e-05 : !torch.vtensor<[2,576,1280],f32>, !torch.list<int>, !torch.vtensor<[1280],f32>, !torch.vtensor<[1280],f32>, !torch.float -> !torch.vtensor<[2,576,1280],f32>, !torch.vtensor<[2,576,1],f32>, !torch.vtensor<[2,576,1],f32>
  %1234 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %1235 = torch.aten._to_copy %result0_63, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
  %1236 = torch.aten.t %1234 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1237 = torch.prim.ListConstruct %int1280, %int1 : (!torch.int, !torch.int) -> !torch.list<int>
  %1238 = torch.aten._reshape_alias %1235, %1228, %1237 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
  %1239 = torch.aten.mm %1238, %1236 : !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1152,1280],f16>
  %1240 = torch.aten._unsafe_view %1239, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
  %1241 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %1242 = torch.aten._to_copy %result0_63, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
  %1243 = torch.aten.t %1241 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1244 = torch.aten._reshape_alias %1242, %1228, %1237 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
  %1245 = torch.aten.mm %1244, %1243 : !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1152,1280],f16>
  %1246 = torch.aten._unsafe_view %1245, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
  %1247 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %1248 = torch.aten._to_copy %result0_63, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
  %1249 = torch.aten.t %1247 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1250 = torch.aten._reshape_alias %1248, %1228, %1237 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
  %1251 = torch.aten.mm %1250, %1249 : !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1152,1280],f16>
  %1252 = torch.aten._unsafe_view %1251, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
  %1253 = torch.prim.ListConstruct %int2, %int576, %int20, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1254 = torch.prim.ListConstruct %int737280, %int1280, %int64, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1255 = torch.aten._reshape_alias %1240, %1253, %1254 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
  %1256 = torch.aten.permute %1255, %189 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
  %1257 = torch.aten.clone %1256, %int0 : !torch.vtensor<[2,20,576,64],f16>, !torch.int -> !torch.vtensor<[2,20,576,64],f16>
  %1258 = torch.prim.ListConstruct %int40, %int576, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1259 = torch.aten._unsafe_view %1257, %1258 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
  %1260 = torch.aten._reshape_alias %1246, %1253, %1254 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
  %1261 = torch.aten.permute %1260, %189 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
  %1262 = torch.aten.clone %1261, %int0 : !torch.vtensor<[2,20,576,64],f16>, !torch.int -> !torch.vtensor<[2,20,576,64],f16>
  %1263 = torch.aten._unsafe_view %1262, %1258 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
  %1264 = torch.aten._reshape_alias %1252, %1253, %1254 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
  %1265 = torch.aten.permute %1264, %189 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
  %1266 = torch.aten.clone %1265, %int0 : !torch.vtensor<[2,20,576,64],f16>, !torch.int -> !torch.vtensor<[2,20,576,64],f16>
  %1267 = torch.aten._unsafe_view %1266, %1258 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
  %1268 = torch.aten.unsqueeze %1259, %int3 : !torch.vtensor<[40,576,64],f16>, !torch.int -> !torch.vtensor<[40,576,64,1],f16>
  %1269 = torch.aten.permute %1268, %203 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,64],f16>
  %1270 = torch.aten.unsqueeze %1263, %int3 : !torch.vtensor<[40,576,64],f16>, !torch.int -> !torch.vtensor<[40,576,64,1],f16>
  %1271 = torch.aten.permute %1270, %206 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,1,576,64],f16>
  %1272 = torch.aten.permute %1269, %203 : !torch.vtensor<[40,576,1,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64,1],f16>
  %1273 = torch.prim.ListConstruct %int36864, %int64, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1274 = torch.aten._reshape_alias %1272, %1258, %1273 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
  %1275 = torch.aten.permute %1271, %211 : !torch.vtensor<[40,1,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,64,576,1],f16>
  %1276 = torch.prim.ListConstruct %int40, %int64, %int576 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1277 = torch.prim.ListConstruct %int36864, %int1, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1278 = torch.aten._reshape_alias %1275, %1276, %1277 : !torch.vtensor<[40,64,576,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,64,576],f16>
  %1279 = torch.aten.bmm %1274, %1278 : !torch.vtensor<[40,576,64],f16>, !torch.vtensor<[40,64,576],f16> -> !torch.vtensor<[40,576,576],f16>
  %1280 = torch.prim.ListConstruct %int40, %int576, %int1, %int576 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1281 = torch.aten.view %1279, %1280 : !torch.vtensor<[40,576,576],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,576],f16>
  %1282 = torch.aten.permute %1281, %203 : !torch.vtensor<[40,576,1,576],f16>, !torch.list<int> -> !torch.vtensor<[40,576,576,1],f16>
  %1283 = torch.prim.ListConstruct %int40, %int576, %int576 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1284 = torch.aten.view %1282, %1283 : !torch.vtensor<[40,576,576,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,576],f16>
  %1285 = torch.aten.mul.Tensor %1284, %0 : !torch.vtensor<[40,576,576],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[40,576,576],f16>
  %1286 = torch.aten._softmax %1285, %int-1, %true : !torch.vtensor<[40,576,576],f16>, !torch.int, !torch.bool -> !torch.vtensor<[40,576,576],f32>
  %1287 = torch.aten._to_copy %1286, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[40,576,576],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[40,576,576],f16>
  %1288 = torch.aten.unsqueeze %1287, %int3 : !torch.vtensor<[40,576,576],f16>, !torch.int -> !torch.vtensor<[40,576,576,1],f16>
  %1289 = torch.aten.permute %1288, %203 : !torch.vtensor<[40,576,576,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,576],f16>
  %1290 = torch.aten.unsqueeze %1267, %int3 : !torch.vtensor<[40,576,64],f16>, !torch.int -> !torch.vtensor<[40,576,64,1],f16>
  %1291 = torch.aten.permute %1290, %211 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,1,64,576],f16>
  %1292 = torch.aten.permute %1289, %203 : !torch.vtensor<[40,576,1,576],f16>, !torch.list<int> -> !torch.vtensor<[40,576,576,1],f16>
  %1293 = torch.prim.ListConstruct %int331776, %int576, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1294 = torch.aten._reshape_alias %1292, %1283, %1293 : !torch.vtensor<[40,576,576,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,576,576],f16>
  %1295 = torch.aten.permute %1291, %211 : !torch.vtensor<[40,1,64,576],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64,1],f16>
  %1296 = torch.aten._reshape_alias %1295, %1258, %1273 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
  %1297 = torch.aten.bmm %1294, %1296 : !torch.vtensor<[40,576,576],f16>, !torch.vtensor<[40,576,64],f16> -> !torch.vtensor<[40,576,64],f16>
  %1298 = torch.prim.ListConstruct %int40, %int576, %int1, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1299 = torch.aten.view %1297, %1298 : !torch.vtensor<[40,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,64],f16>
  %1300 = torch.aten.permute %1299, %203 : !torch.vtensor<[40,576,1,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64,1],f16>
  %1301 = torch.aten.view %1300, %1258 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
  %1302 = torch.prim.ListConstruct %int2, %int20, %int576, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1303 = torch.prim.ListConstruct %int737280, %int36864, %int64, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1304 = torch.aten._reshape_alias %1301, %1302, %1303 : !torch.vtensor<[40,576,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
  %1305 = torch.aten.permute %1304, %189 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
  %1306 = torch.aten.clone %1305, %int0 : !torch.vtensor<[2,576,20,64],f16>, !torch.int -> !torch.vtensor<[2,576,20,64],f16>
  %1307 = torch.aten._unsafe_view %1306, %1220 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
  %1308 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %1309 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %1310 = torch.aten.t %1309 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1311 = torch.aten.view %1307, %1228 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
  %1312 = torch.aten.addmm %1308, %1311, %1310, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,1280],f16>
  %1313 = torch.aten.view %1312, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
  %1314 = torch.aten.add.Tensor %1313, %1231, %int1 : !torch.vtensor<[2,576,1280],f16>, !torch.vtensor<[2,576,1280],f16>, !torch.int -> !torch.vtensor<[2,576,1280],f16>
  %1315 = torch.aten._to_copy %1314, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f32>
  %result0_66, %result1_67, %result2_68 = torch.aten.native_layer_norm %1315, %1233, %24, %24, %float1.000000e-05 : !torch.vtensor<[2,576,1280],f32>, !torch.list<int>, !torch.vtensor<[1280],f32>, !torch.vtensor<[1280],f32>, !torch.float -> !torch.vtensor<[2,576,1280],f32>, !torch.vtensor<[2,576,1],f32>, !torch.vtensor<[2,576,1],f32>
  %1316 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %1317 = torch.aten._to_copy %result0_66, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
  %1318 = torch.aten.t %1316 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1319 = torch.aten._reshape_alias %1317, %1228, %1237 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
  %1320 = torch.aten.mm %1319, %1318 : !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1152,1280],f16>
  %1321 = torch.aten._unsafe_view %1320, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
  %1322 = torch.aten._to_copy %15, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1024],f16>
  %1323 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
  %1324 = torch.aten.t %1322 : !torch.vtensor<[1280,1024],f16> -> !torch.vtensor<[1024,1280],f16>
  %1325 = torch.aten._reshape_alias %1323, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
  %1326 = torch.aten.mm %1325, %1324 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,1280],f16> -> !torch.vtensor<[154,1280],f16>
  %1327 = torch.prim.ListConstruct %int2, %int77, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1328 = torch.aten._unsafe_view %1326, %1327 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
  %1329 = torch.aten._to_copy %15, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1024],f16>
  %1330 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
  %1331 = torch.aten.t %1329 : !torch.vtensor<[1280,1024],f16> -> !torch.vtensor<[1024,1280],f16>
  %1332 = torch.aten._reshape_alias %1330, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
  %1333 = torch.aten.mm %1332, %1331 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,1280],f16> -> !torch.vtensor<[154,1280],f16>
  %1334 = torch.aten._unsafe_view %1333, %1327 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
  %1335 = torch.aten._reshape_alias %1321, %1253, %1254 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
  %1336 = torch.aten.permute %1335, %189 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
  %1337 = torch.aten.clone %1336, %int0 : !torch.vtensor<[2,20,576,64],f16>, !torch.int -> !torch.vtensor<[2,20,576,64],f16>
  %1338 = torch.aten._unsafe_view %1337, %1258 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
  %1339 = torch.prim.ListConstruct %int2, %int77, %int20, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1340 = torch.prim.ListConstruct %int98560, %int1280, %int64, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1341 = torch.aten._reshape_alias %1328, %1339, %1340 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,20,64],f16>
  %1342 = torch.aten.permute %1341, %189 : !torch.vtensor<[2,77,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,77,64],f16>
  %1343 = torch.aten.clone %1342, %int0 : !torch.vtensor<[2,20,77,64],f16>, !torch.int -> !torch.vtensor<[2,20,77,64],f16>
  %1344 = torch.prim.ListConstruct %int40, %int77, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1345 = torch.aten._unsafe_view %1343, %1344 : !torch.vtensor<[2,20,77,64],f16>, !torch.list<int> -> !torch.vtensor<[40,77,64],f16>
  %1346 = torch.aten._reshape_alias %1334, %1339, %1340 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,20,64],f16>
  %1347 = torch.aten.permute %1346, %189 : !torch.vtensor<[2,77,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,77,64],f16>
  %1348 = torch.aten.clone %1347, %int0 : !torch.vtensor<[2,20,77,64],f16>, !torch.int -> !torch.vtensor<[2,20,77,64],f16>
  %1349 = torch.aten._unsafe_view %1348, %1344 : !torch.vtensor<[2,20,77,64],f16>, !torch.list<int> -> !torch.vtensor<[40,77,64],f16>
  %1350 = torch.aten.unsqueeze %1338, %int3 : !torch.vtensor<[40,576,64],f16>, !torch.int -> !torch.vtensor<[40,576,64,1],f16>
  %1351 = torch.aten.permute %1350, %203 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,64],f16>
  %1352 = torch.aten.unsqueeze %1345, %int3 : !torch.vtensor<[40,77,64],f16>, !torch.int -> !torch.vtensor<[40,77,64,1],f16>
  %1353 = torch.aten.permute %1352, %206 : !torch.vtensor<[40,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,1,77,64],f16>
  %1354 = torch.aten.permute %1351, %203 : !torch.vtensor<[40,576,1,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64,1],f16>
  %1355 = torch.aten._reshape_alias %1354, %1258, %1273 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
  %1356 = torch.aten.permute %1353, %211 : !torch.vtensor<[40,1,77,64],f16>, !torch.list<int> -> !torch.vtensor<[40,64,77,1],f16>
  %1357 = torch.prim.ListConstruct %int40, %int64, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1358 = torch.aten._reshape_alias %1356, %1357, %297 : !torch.vtensor<[40,64,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,64,77],f16>
  %1359 = torch.aten.bmm %1355, %1358 : !torch.vtensor<[40,576,64],f16>, !torch.vtensor<[40,64,77],f16> -> !torch.vtensor<[40,576,77],f16>
  %1360 = torch.prim.ListConstruct %int40, %int576, %int1, %int77 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1361 = torch.aten.view %1359, %1360 : !torch.vtensor<[40,576,77],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,77],f16>
  %1362 = torch.aten.permute %1361, %203 : !torch.vtensor<[40,576,1,77],f16>, !torch.list<int> -> !torch.vtensor<[40,576,77,1],f16>
  %1363 = torch.prim.ListConstruct %int40, %int576, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1364 = torch.aten.view %1362, %1363 : !torch.vtensor<[40,576,77,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,77],f16>
  %1365 = torch.aten.mul.Tensor %1364, %0 : !torch.vtensor<[40,576,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[40,576,77],f16>
  %1366 = torch.aten._softmax %1365, %int-1, %true : !torch.vtensor<[40,576,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[40,576,77],f32>
  %1367 = torch.aten._to_copy %1366, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[40,576,77],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[40,576,77],f16>
  %1368 = torch.aten.unsqueeze %1367, %int3 : !torch.vtensor<[40,576,77],f16>, !torch.int -> !torch.vtensor<[40,576,77,1],f16>
  %1369 = torch.aten.permute %1368, %203 : !torch.vtensor<[40,576,77,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,77],f16>
  %1370 = torch.aten.unsqueeze %1349, %int3 : !torch.vtensor<[40,77,64],f16>, !torch.int -> !torch.vtensor<[40,77,64,1],f16>
  %1371 = torch.aten.permute %1370, %211 : !torch.vtensor<[40,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,1,64,77],f16>
  %1372 = torch.aten.permute %1369, %203 : !torch.vtensor<[40,576,1,77],f16>, !torch.list<int> -> !torch.vtensor<[40,576,77,1],f16>
  %1373 = torch.prim.ListConstruct %int44352, %int77, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1374 = torch.aten._reshape_alias %1372, %1363, %1373 : !torch.vtensor<[40,576,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,576,77],f16>
  %1375 = torch.aten.permute %1371, %211 : !torch.vtensor<[40,1,64,77],f16>, !torch.list<int> -> !torch.vtensor<[40,77,64,1],f16>
  %1376 = torch.aten._reshape_alias %1375, %1344, %316 : !torch.vtensor<[40,77,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,77,64],f16>
  %1377 = torch.aten.bmm %1374, %1376 : !torch.vtensor<[40,576,77],f16>, !torch.vtensor<[40,77,64],f16> -> !torch.vtensor<[40,576,64],f16>
  %1378 = torch.aten.view %1377, %1298 : !torch.vtensor<[40,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,64],f16>
  %1379 = torch.aten.permute %1378, %203 : !torch.vtensor<[40,576,1,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64,1],f16>
  %1380 = torch.aten.view %1379, %1258 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
  %1381 = torch.aten._reshape_alias %1380, %1302, %1303 : !torch.vtensor<[40,576,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
  %1382 = torch.aten.permute %1381, %189 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
  %1383 = torch.aten.clone %1382, %int0 : !torch.vtensor<[2,576,20,64],f16>, !torch.int -> !torch.vtensor<[2,576,20,64],f16>
  %1384 = torch.aten._unsafe_view %1383, %1220 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
  %1385 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %1386 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %1387 = torch.aten.t %1386 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1388 = torch.aten.view %1384, %1228 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
  %1389 = torch.aten.addmm %1385, %1388, %1387, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,1280],f16>
  %1390 = torch.aten.view %1389, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
  %1391 = torch.aten.add.Tensor %1390, %1314, %int1 : !torch.vtensor<[2,576,1280],f16>, !torch.vtensor<[2,576,1280],f16>, !torch.int -> !torch.vtensor<[2,576,1280],f16>
  %1392 = torch.aten._to_copy %1391, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f32>
  %result0_69, %result1_70, %result2_71 = torch.aten.native_layer_norm %1392, %1233, %24, %24, %float1.000000e-05 : !torch.vtensor<[2,576,1280],f32>, !torch.list<int>, !torch.vtensor<[1280],f32>, !torch.vtensor<[1280],f32>, !torch.float -> !torch.vtensor<[2,576,1280],f32>, !torch.vtensor<[2,576,1],f32>, !torch.vtensor<[2,576,1],f32>
  %1393 = torch.aten._to_copy %16, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[10240],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[10240],f16>
  %1394 = torch.aten._to_copy %17, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[10240,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[10240,1280],f16>
  %1395 = torch.aten._to_copy %result0_69, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
  %1396 = torch.aten.t %1394 : !torch.vtensor<[10240,1280],f16> -> !torch.vtensor<[1280,10240],f16>
  %1397 = torch.aten.view %1395, %1228 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
  %1398 = torch.aten.addmm %1393, %1397, %1396, %int1, %int1 : !torch.vtensor<[10240],f16>, !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,10240],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,10240],f16>
  %1399 = torch.prim.ListConstruct %int2, %int576, %int10240 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1400 = torch.aten.view %1398, %1399 : !torch.vtensor<[1152,10240],f16>, !torch.list<int> -> !torch.vtensor<[2,576,10240],f16>
  %1401 = torch.aten.slice.Tensor %1400, %int-1, %int0, %int5120, %int1 : !torch.vtensor<[2,576,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,576,5120],f16>
  %1402 = torch.aten.slice.Tensor %1400, %int-1, %int5120, %int10240, %int1 : !torch.vtensor<[2,576,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,576,5120],f16>
  %1403 = torch.aten.gelu %1402, %str : !torch.vtensor<[2,576,5120],f16>, !torch.str -> !torch.vtensor<[2,576,5120],f16>
  %1404 = torch.aten.mul.Tensor %1401, %1403 : !torch.vtensor<[2,576,5120],f16>, !torch.vtensor<[2,576,5120],f16> -> !torch.vtensor<[2,576,5120],f16>
  %1405 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %1406 = torch.aten._to_copy %18, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,5120],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,5120],f16>
  %1407 = torch.aten.t %1406 : !torch.vtensor<[1280,5120],f16> -> !torch.vtensor<[5120,1280],f16>
  %1408 = torch.prim.ListConstruct %int1152, %int5120 : (!torch.int, !torch.int) -> !torch.list<int>
  %1409 = torch.aten.view %1404, %1408 : !torch.vtensor<[2,576,5120],f16>, !torch.list<int> -> !torch.vtensor<[1152,5120],f16>
  %1410 = torch.aten.addmm %1405, %1409, %1407, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[1152,5120],f16>, !torch.vtensor<[5120,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,1280],f16>
  %1411 = torch.aten.view %1410, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
  %1412 = torch.aten.add.Tensor %1411, %1391, %int1 : !torch.vtensor<[2,576,1280],f16>, !torch.vtensor<[2,576,1280],f16>, !torch.int -> !torch.vtensor<[2,576,1280],f16>
  %1413 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %1414 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %1415 = torch.aten.t %1414 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1416 = torch.aten.view %1412, %1228 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
  %1417 = torch.aten.addmm %1413, %1416, %1415, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,1280],f16>
  %1418 = torch.aten.view %1417, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
  %1419 = torch.prim.ListConstruct %int2, %int24, %int24, %int1280 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1420 = torch.prim.ListConstruct %int737280, %int30720, %int1280, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1421 = torch.aten._reshape_alias %1418, %1419, %1420 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,24,24,1280],f16>
  %1422 = torch.aten.permute %1421, %206 : !torch.vtensor<[2,24,24,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,24,24],f16>
  %1423 = torch.prim.ListConstruct %int737280, %int1, %int30720, %int1280 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1424 = torch.aten._reshape_alias %1422, %1183, %1423 : !torch.vtensor<[2,1280,24,24],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1280,24,24],f16>
  %1425 = torch.aten.clone %1424, %int0 : !torch.vtensor<[2,1280,24,24],f16>, !torch.int -> !torch.vtensor<[2,1280,24,24],f16>
  %1426 = torch.aten.add.Tensor %1425, %1201, %int1 : !torch.vtensor<[2,1280,24,24],f16>, !torch.vtensor<[2,1280,24,24],f16>, !torch.int -> !torch.vtensor<[2,1280,24,24],f16>
  %1427 = torch.aten._to_copy %1426, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,24,24],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,24,24],f32>
  %1428 = torch.aten._reshape_alias %1427, %1176, %1177 : !torch.vtensor<[2,1280,24,24],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,40,576],f32>
  %result0_72, %result1_73 = torch.aten.var_mean.correction %1428, %85, %int0, %true : !torch.vtensor<[2,32,40,576],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %1429 = torch.aten.add.Tensor %result0_72, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %1430 = torch.aten.rsqrt %1429 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %1431 = torch.aten.sub.Tensor %1428, %result1_73, %int1 : !torch.vtensor<[2,32,40,576],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,576],f32>
  %1432 = torch.aten.mul.Tensor %1431, %1430 : !torch.vtensor<[2,32,40,576],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,576],f32>
  %1433 = torch.aten.view %1432, %1183 : !torch.vtensor<[2,32,40,576],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,24,24],f32>
  %1434 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
  %1435 = torch.aten.unsqueeze %1434, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
  %1436 = torch.aten.unsqueeze %1435, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
  %1437 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
  %1438 = torch.aten.unsqueeze %1437, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
  %1439 = torch.aten.unsqueeze %1438, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
  %1440 = torch.aten.mul.Tensor %1433, %1439 : !torch.vtensor<[2,1280,24,24],f32>, !torch.vtensor<[1,1280,1,1],f32> -> !torch.vtensor<[2,1280,24,24],f32>
  %1441 = torch.aten.add.Tensor %1440, %1436, %int1 : !torch.vtensor<[2,1280,24,24],f32>, !torch.vtensor<[1,1280,1,1],f32>, !torch.int -> !torch.vtensor<[2,1280,24,24],f32>
  %1442 = torch.aten._to_copy %1441, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,1280,24,24],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,24,24],f16>
  %1443 = torch.aten.silu %1442 : !torch.vtensor<[2,1280,24,24],f16> -> !torch.vtensor<[2,1280,24,24],f16>
  %1444 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %1445 = torch.aten._to_copy %20, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280,3,3],f16>
  %1446 = torch.aten._convolution %1443, %1445, %1444, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,24,24],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,24,24],f16>
  %1447 = torch.aten.silu %74 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
  %1448 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %1449 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %1450 = torch.aten.t %1449 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1451 = torch.aten.addmm %1448, %1447, %1450, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
  %1452 = torch.aten.unsqueeze %1451, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
  %1453 = torch.aten.unsqueeze %1452, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
  %1454 = torch.aten.add.Tensor %1446, %1453, %int1 : !torch.vtensor<[2,1280,24,24],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,24,24],f16>
  %1455 = torch.aten._to_copy %1454, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,24,24],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,24,24],f32>
  %1456 = torch.aten._reshape_alias %1455, %1176, %1177 : !torch.vtensor<[2,1280,24,24],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,40,576],f32>
  %result0_74, %result1_75 = torch.aten.var_mean.correction %1456, %85, %int0, %true : !torch.vtensor<[2,32,40,576],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %1457 = torch.aten.add.Tensor %result0_74, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %1458 = torch.aten.rsqrt %1457 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %1459 = torch.aten.sub.Tensor %1456, %result1_75, %int1 : !torch.vtensor<[2,32,40,576],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,576],f32>
  %1460 = torch.aten.mul.Tensor %1459, %1458 : !torch.vtensor<[2,32,40,576],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,576],f32>
  %1461 = torch.aten.view %1460, %1183 : !torch.vtensor<[2,32,40,576],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,24,24],f32>
  %1462 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
  %1463 = torch.aten.unsqueeze %1462, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
  %1464 = torch.aten.unsqueeze %1463, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
  %1465 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
  %1466 = torch.aten.unsqueeze %1465, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
  %1467 = torch.aten.unsqueeze %1466, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
  %1468 = torch.aten.mul.Tensor %1461, %1467 : !torch.vtensor<[2,1280,24,24],f32>, !torch.vtensor<[1,1280,1,1],f32> -> !torch.vtensor<[2,1280,24,24],f32>
  %1469 = torch.aten.add.Tensor %1468, %1464, %int1 : !torch.vtensor<[2,1280,24,24],f32>, !torch.vtensor<[1,1280,1,1],f32>, !torch.int -> !torch.vtensor<[2,1280,24,24],f32>
  %1470 = torch.aten._to_copy %1469, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,1280,24,24],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,24,24],f16>
  %1471 = torch.aten.silu %1470 : !torch.vtensor<[2,1280,24,24],f16> -> !torch.vtensor<[2,1280,24,24],f16>
  %1472 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %1473 = torch.aten._to_copy %20, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280,3,3],f16>
  %1474 = torch.aten._convolution %1471, %1473, %1472, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,24,24],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,24,24],f16>
  %1475 = torch.aten.add.Tensor %1426, %1474, %int1 : !torch.vtensor<[2,1280,24,24],f16>, !torch.vtensor<[2,1280,24,24],f16>, !torch.int -> !torch.vtensor<[2,1280,24,24],f16>
  %1476 = torch.aten._to_copy %1475, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,24,24],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,24,24],f32>
  %1477 = torch.aten._reshape_alias %1476, %1176, %1177 : !torch.vtensor<[2,1280,24,24],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,40,576],f32>
  %result0_76, %result1_77 = torch.aten.var_mean.correction %1477, %85, %int0, %true : !torch.vtensor<[2,32,40,576],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %1478 = torch.aten.add.Tensor %result0_76, %1, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %1479 = torch.aten.rsqrt %1478 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %1480 = torch.aten.sub.Tensor %1477, %result1_77, %int1 : !torch.vtensor<[2,32,40,576],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,576],f32>
  %1481 = torch.aten.mul.Tensor %1480, %1479 : !torch.vtensor<[2,32,40,576],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,576],f32>
  %1482 = torch.aten.view %1481, %1183 : !torch.vtensor<[2,32,40,576],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,24,24],f32>
  %1483 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
  %1484 = torch.aten.unsqueeze %1483, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
  %1485 = torch.aten.unsqueeze %1484, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
  %1486 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
  %1487 = torch.aten.unsqueeze %1486, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
  %1488 = torch.aten.unsqueeze %1487, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
  %1489 = torch.aten.mul.Tensor %1482, %1488 : !torch.vtensor<[2,1280,24,24],f32>, !torch.vtensor<[1,1280,1,1],f32> -> !torch.vtensor<[2,1280,24,24],f32>
  %1490 = torch.aten.add.Tensor %1489, %1485, %int1 : !torch.vtensor<[2,1280,24,24],f32>, !torch.vtensor<[1,1280,1,1],f32>, !torch.int -> !torch.vtensor<[2,1280,24,24],f32>
  %1491 = torch.aten._reshape_alias %1490, %1183, %1217 : !torch.vtensor<[2,1280,24,24],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1280,24,24],f32>
  %1492 = torch.aten.permute %1491, %151 : !torch.vtensor<[2,1280,24,24],f32>, !torch.list<int> -> !torch.vtensor<[2,24,24,1280],f32>
  %1493 = torch.aten._reshape_alias %1492, %1220, %1221 : !torch.vtensor<[2,24,24,1280],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f32>
  %1494 = torch.aten.clone %1493, %int0 : !torch.vtensor<[2,576,1280],f32>, !torch.int -> !torch.vtensor<[2,576,1280],f32>
  %1495 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %1496 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %1497 = torch.aten._to_copy %1494, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
  %1498 = torch.aten.t %1496 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1499 = torch.aten.view %1497, %1228 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
  %1500 = torch.aten.addmm %1495, %1499, %1498, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,1280],f16>
  %1501 = torch.aten.view %1500, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
  %1502 = torch.aten._to_copy %1501, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f32>
  %result0_78, %result1_79, %result2_80 = torch.aten.native_layer_norm %1502, %1233, %24, %24, %float1.000000e-05 : !torch.vtensor<[2,576,1280],f32>, !torch.list<int>, !torch.vtensor<[1280],f32>, !torch.vtensor<[1280],f32>, !torch.float -> !torch.vtensor<[2,576,1280],f32>, !torch.vtensor<[2,576,1],f32>, !torch.vtensor<[2,576,1],f32>
  %1503 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %1504 = torch.aten._to_copy %result0_78, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
  %1505 = torch.aten.t %1503 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1506 = torch.aten._reshape_alias %1504, %1228, %1237 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
  %1507 = torch.aten.mm %1506, %1505 : !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1152,1280],f16>
  %1508 = torch.aten._unsafe_view %1507, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
  %1509 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %1510 = torch.aten._to_copy %result0_78, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
  %1511 = torch.aten.t %1509 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1512 = torch.aten._reshape_alias %1510, %1228, %1237 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
  %1513 = torch.aten.mm %1512, %1511 : !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1152,1280],f16>
  %1514 = torch.aten._unsafe_view %1513, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
  %1515 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %1516 = torch.aten._to_copy %result0_78, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
  %1517 = torch.aten.t %1515 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1518 = torch.aten._reshape_alias %1516, %1228, %1237 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
  %1519 = torch.aten.mm %1518, %1517 : !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1152,1280],f16>
  %1520 = torch.aten._unsafe_view %1519, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
  %1521 = torch.aten._reshape_alias %1508, %1253, %1254 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
  %1522 = torch.aten.permute %1521, %189 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
  %1523 = torch.aten.clone %1522, %int0 : !torch.vtensor<[2,20,576,64],f16>, !torch.int -> !torch.vtensor<[2,20,576,64],f16>
  %1524 = torch.aten._unsafe_view %1523, %1258 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
  %1525 = torch.aten._reshape_alias %1514, %1253, %1254 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
  %1526 = torch.aten.permute %1525, %189 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
  %1527 = torch.aten.clone %1526, %int0 : !torch.vtensor<[2,20,576,64],f16>, !torch.int -> !torch.vtensor<[2,20,576,64],f16>
  %1528 = torch.aten._unsafe_view %1527, %1258 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
  %1529 = torch.aten._reshape_alias %1520, %1253, %1254 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
  %1530 = torch.aten.permute %1529, %189 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
  %1531 = torch.aten.clone %1530, %int0 : !torch.vtensor<[2,20,576,64],f16>, !torch.int -> !torch.vtensor<[2,20,576,64],f16>
  %1532 = torch.aten._unsafe_view %1531, %1258 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
  %1533 = torch.aten.unsqueeze %1524, %int3 : !torch.vtensor<[40,576,64],f16>, !torch.int -> !torch.vtensor<[40,576,64,1],f16>
  %1534 = torch.aten.permute %1533, %203 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,64],f16>
  %1535 = torch.aten.unsqueeze %1528, %int3 : !torch.vtensor<[40,576,64],f16>, !torch.int -> !torch.vtensor<[40,576,64,1],f16>
  %1536 = torch.aten.permute %1535, %206 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,1,576,64],f16>
  %1537 = torch.aten.permute %1534, %203 : !torch.vtensor<[40,576,1,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64,1],f16>
  %1538 = torch.aten._reshape_alias %1537, %1258, %1273 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
  %1539 = torch.aten.permute %1536, %211 : !torch.vtensor<[40,1,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,64,576,1],f16>
  %1540 = torch.aten._reshape_alias %1539, %1276, %1277 : !torch.vtensor<[40,64,576,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,64,576],f16>
  %1541 = torch.aten.bmm %1538, %1540 : !torch.vtensor<[40,576,64],f16>, !torch.vtensor<[40,64,576],f16> -> !torch.vtensor<[40,576,576],f16>
  %1542 = torch.aten.view %1541, %1280 : !torch.vtensor<[40,576,576],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,576],f16>
  %1543 = torch.aten.permute %1542, %203 : !torch.vtensor<[40,576,1,576],f16>, !torch.list<int> -> !torch.vtensor<[40,576,576,1],f16>
  %1544 = torch.aten.view %1543, %1283 : !torch.vtensor<[40,576,576,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,576],f16>
  %1545 = torch.aten.mul.Tensor %1544, %0 : !torch.vtensor<[40,576,576],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[40,576,576],f16>
  %1546 = torch.aten._softmax %1545, %int-1, %true : !torch.vtensor<[40,576,576],f16>, !torch.int, !torch.bool -> !torch.vtensor<[40,576,576],f32>
  %1547 = torch.aten._to_copy %1546, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[40,576,576],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[40,576,576],f16>
  %1548 = torch.aten.unsqueeze %1547, %int3 : !torch.vtensor<[40,576,576],f16>, !torch.int -> !torch.vtensor<[40,576,576,1],f16>
  %1549 = torch.aten.permute %1548, %203 : !torch.vtensor<[40,576,576,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,576],f16>
  %1550 = torch.aten.unsqueeze %1532, %int3 : !torch.vtensor<[40,576,64],f16>, !torch.int -> !torch.vtensor<[40,576,64,1],f16>
  %1551 = torch.aten.permute %1550, %211 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,1,64,576],f16>
  %1552 = torch.aten.permute %1549, %203 : !torch.vtensor<[40,576,1,576],f16>, !torch.list<int> -> !torch.vtensor<[40,576,576,1],f16>
  %1553 = torch.aten._reshape_alias %1552, %1283, %1293 : !torch.vtensor<[40,576,576,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,576,576],f16>
  %1554 = torch.aten.permute %1551, %211 : !torch.vtensor<[40,1,64,576],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64,1],f16>
  %1555 = torch.aten._reshape_alias %1554, %1258, %1273 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
  %1556 = torch.aten.bmm %1553, %1555 : !torch.vtensor<[40,576,576],f16>, !torch.vtensor<[40,576,64],f16> -> !torch.vtensor<[40,576,64],f16>
  %1557 = torch.aten.view %1556, %1298 : !torch.vtensor<[40,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,64],f16>
  %1558 = torch.aten.permute %1557, %203 : !torch.vtensor<[40,576,1,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64,1],f16>
  %1559 = torch.aten.view %1558, %1258 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
  %1560 = torch.aten._reshape_alias %1559, %1302, %1303 : !torch.vtensor<[40,576,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
  %1561 = torch.aten.permute %1560, %189 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
  %1562 = torch.aten.clone %1561, %int0 : !torch.vtensor<[2,576,20,64],f16>, !torch.int -> !torch.vtensor<[2,576,20,64],f16>
  %1563 = torch.aten._unsafe_view %1562, %1220 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
  %1564 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %1565 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %1566 = torch.aten.t %1565 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1567 = torch.aten.view %1563, %1228 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
  %1568 = torch.aten.addmm %1564, %1567, %1566, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,1280],f16>
  %1569 = torch.aten.view %1568, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
  %1570 = torch.aten.add.Tensor %1569, %1501, %int1 : !torch.vtensor<[2,576,1280],f16>, !torch.vtensor<[2,576,1280],f16>, !torch.int -> !torch.vtensor<[2,576,1280],f16>
  %1571 = torch.aten._to_copy %1570, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f32>
  %result0_81, %result1_82, %result2_83 = torch.aten.native_layer_norm %1571, %1233, %24, %24, %float1.000000e-05 : !torch.vtensor<[2,576,1280],f32>, !torch.list<int>, !torch.vtensor<[1280],f32>, !torch.vtensor<[1280],f32>, !torch.float -> !torch.vtensor<[2,576,1280],f32>, !torch.vtensor<[2,576,1],f32>, !torch.vtensor<[2,576,1],f32>
  %1572 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %1573 = torch.aten._to_copy %result0_81, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
  %1574 = torch.aten.t %1572 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1575 = torch.aten._reshape_alias %1573, %1228, %1237 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
  %1576 = torch.aten.mm %1575, %1574 : !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1152,1280],f16>
  %1577 = torch.aten._unsafe_view %1576, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
  %1578 = torch.aten._to_copy %15, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1024],f16>
  %1579 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
  %1580 = torch.aten.t %1578 : !torch.vtensor<[1280,1024],f16> -> !torch.vtensor<[1024,1280],f16>
  %1581 = torch.aten._reshape_alias %1579, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
  %1582 = torch.aten.mm %1581, %1580 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,1280],f16> -> !torch.vtensor<[154,1280],f16>
  %1583 = torch.aten._unsafe_view %1582, %1327 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
  %1584 = torch.aten._to_copy %15, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1024],f16>
  %1585 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
  %1586 = torch.aten.t %1584 : !torch.vtensor<[1280,1024],f16> -> !torch.vtensor<[1024,1280],f16>
  %1587 = torch.aten._reshape_alias %1585, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
  %1588 = torch.aten.mm %1587, %1586 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,1280],f16> -> !torch.vtensor<[154,1280],f16>
  %1589 = torch.aten._unsafe_view %1588, %1327 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
  %1590 = torch.aten._reshape_alias %1577, %1253, %1254 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
  %1591 = torch.aten.permute %1590, %189 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
  %1592 = torch.aten.clone %1591, %int0 : !torch.vtensor<[2,20,576,64],f16>, !torch.int -> !torch.vtensor<[2,20,576,64],f16>
  %1593 = torch.aten._unsafe_view %1592, %1258 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
  %1594 = torch.aten._reshape_alias %1583, %1339, %1340 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,20,64],f16>
  %1595 = torch.aten.permute %1594, %189 : !torch.vtensor<[2,77,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,77,64],f16>
  %1596 = torch.aten.clone %1595, %int0 : !torch.vtensor<[2,20,77,64],f16>, !torch.int -> !torch.vtensor<[2,20,77,64],f16>
  %1597 = torch.aten._unsafe_view %1596, %1344 : !torch.vtensor<[2,20,77,64],f16>, !torch.list<int> -> !torch.vtensor<[40,77,64],f16>
  %1598 = torch.aten._reshape_alias %1589, %1339, %1340 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,20,64],f16>
  %1599 = torch.aten.permute %1598, %189 : !torch.vtensor<[2,77,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,77,64],f16>
  %1600 = torch.aten.clone %1599, %int0 : !torch.vtensor<[2,20,77,64],f16>, !torch.int -> !torch.vtensor<[2,20,77,64],f16>
  %1601 = torch.aten._unsafe_view %1600, %1344 : !torch.vtensor<[2,20,77,64],f16>, !torch.list<int> -> !torch.vtensor<[40,77,64],f16>
  %1602 = torch.aten.unsqueeze %1593, %int3 : !torch.vtensor<[40,576,64],f16>, !torch.int -> !torch.vtensor<[40,576,64,1],f16>
  %1603 = torch.aten.permute %1602, %203 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,64],f16>
  %1604 = torch.aten.unsqueeze %1597, %int3 : !torch.vtensor<[40,77,64],f16>, !torch.int -> !torch.vtensor<[40,77,64,1],f16>
  %1605 = torch.aten.permute %1604, %206 : !torch.vtensor<[40,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,1,77,64],f16>
  %1606 = torch.aten.permute %1603, %203 : !torch.vtensor<[40,576,1,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64,1],f16>
  %1607 = torch.aten._reshape_alias %1606, %1258, %1273 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
  %1608 = torch.aten.permute %1605, %211 : !torch.vtensor<[40,1,77,64],f16>, !torch.list<int> -> !torch.vtensor<[40,64,77,1],f16>
  %1609 = torch.aten._reshape_alias %1608, %1357, %297 : !torch.vtensor<[40,64,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,64,77],f16>
  %1610 = torch.aten.bmm %1607, %1609 : !torch.vtensor<[40,576,64],f16>, !torch.vtensor<[40,64,77],f16> -> !torch.vtensor<[40,576,77],f16>
  %1611 = torch.aten.view %1610, %1360 : !torch.vtensor<[40,576,77],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,77],f16>
  %1612 = torch.aten.permute %1611, %203 : !torch.vtensor<[40,576,1,77],f16>, !torch.list<int> -> !torch.vtensor<[40,576,77,1],f16>
  %1613 = torch.aten.view %1612, %1363 : !torch.vtensor<[40,576,77,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,77],f16>
  %1614 = torch.aten.mul.Tensor %1613, %0 : !torch.vtensor<[40,576,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[40,576,77],f16>
  %1615 = torch.aten._softmax %1614, %int-1, %true : !torch.vtensor<[40,576,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[40,576,77],f32>
  %1616 = torch.aten._to_copy %1615, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[40,576,77],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[40,576,77],f16>
  %1617 = torch.aten.unsqueeze %1616, %int3 : !torch.vtensor<[40,576,77],f16>, !torch.int -> !torch.vtensor<[40,576,77,1],f16>
  %1618 = torch.aten.permute %1617, %203 : !torch.vtensor<[40,576,77,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,77],f16>
  %1619 = torch.aten.unsqueeze %1601, %int3 : !torch.vtensor<[40,77,64],f16>, !torch.int -> !torch.vtensor<[40,77,64,1],f16>
  %1620 = torch.aten.permute %1619, %211 : !torch.vtensor<[40,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,1,64,77],f16>
  %1621 = torch.aten.permute %1618, %203 : !torch.vtensor<[40,576,1,77],f16>, !torch.list<int> -> !torch.vtensor<[40,576,77,1],f16>
  %1622 = torch.aten._reshape_alias %1621, %1363, %1373 : !torch.vtensor<[40,576,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,576,77],f16>
  %1623 = torch.aten.permute %1620, %211 : !torch.vtensor<[40,1,64,77],f16>, !torch.list<int> -> !torch.vtensor<[40,77,64,1],f16>
  %1624 = torch.aten._reshape_alias %1623, %1344, %316 : !torch.vtensor<[40,77,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,77,64],f16>
  %1625 = torch.aten.bmm %1622, %1624 : !torch.vtensor<[40,576,77],f16>, !torch.vtensor<[40,77,64],f16> -> !torch.vtensor<[40,576,64],f16>
  %1626 = torch.aten.view %1625, %1298 : !torch.vtensor<[40,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,64],f16>
  %1627 = torch.aten.permute %1626, %203 : !torch.vtensor<[40,576,1,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64,1],f16>
  %1628 = torch.aten.view %1627, %1258 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
  %1629 = torch.aten._reshape_alias %1628, %1302, %1303 : !torch.vtensor<[40,576,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
  %1630 = torch.aten.permute %1629, %189 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
  %1631 = torch.aten.clone %1630, %int0 : !torch.vtensor<[2,576,20,64],f16>, !torch.int -> !torch.vtensor<[2,576,20,64],f16>
  %1632 = torch.aten._unsafe_view %1631, %1220 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
  %1633 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %1634 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %1635 = torch.aten.t %1634 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1636 = torch.aten.view %1632, %1228 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
  %1637 = torch.aten.addmm %1633, %1636, %1635, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,1280],f16>
  %1638 = torch.aten.view %1637, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
  %1639 = torch.aten.add.Tensor %1638, %1570, %int1 : !torch.vtensor<[2,576,1280],f16>, !torch.vtensor<[2,576,1280],f16>, !torch.int -> !torch.vtensor<[2,576,1280],f16>
  %1640 = torch.aten._to_copy %1639, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f32>
  %result0_84, %result1_85, %result2_86 = torch.aten.native_layer_norm %1640, %1233, %24, %24, %float1.000000e-05 : !torch.vtensor<[2,576,1280],f32>, !torch.list<int>, !torch.vtensor<[1280],f32>, !torch.vtensor<[1280],f32>, !torch.float -> !torch.vtensor<[2,576,1280],f32>, !torch.vtensor<[2,576,1],f32>, !torch.vtensor<[2,576,1],f32>
  %1641 = torch.aten._to_copy %16, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[10240],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[10240],f16>
  %1642 = torch.aten._to_copy %17, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[10240,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[10240,1280],f16>
  %1643 = torch.aten._to_copy %result0_84, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
  %1644 = torch.aten.t %1642 : !torch.vtensor<[10240,1280],f16> -> !torch.vtensor<[1280,10240],f16>
  %1645 = torch.aten.view %1643, %1228 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
  %1646 = torch.aten.addmm %1641, %1645, %1644, %int1, %int1 : !torch.vtensor<[10240],f16>, !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,10240],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,10240],f16>
  %1647 = torch.aten.view %1646, %1399 : !torch.vtensor<[1152,10240],f16>, !torch.list<int> -> !torch.vtensor<[2,576,10240],f16>
  %1648 = torch.aten.slice.Tensor %1647, %int-1, %int0, %int5120, %int1 : !torch.vtensor<[2,576,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,576,5120],f16>
  %1649 = torch.aten.slice.Tensor %1647, %int-1, %int5120, %int10240, %int1 : !torch.vtensor<[2,576,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,576,5120],f16>
  %1650 = torch.aten.gelu %1649, %str : !torch.vtensor<[2,576,5120],f16>, !torch.str -> !torch.vtensor<[2,576,5120],f16>
  %1651 = torch.aten.mul.Tensor %1648, %1650 : !torch.vtensor<[2,576,5120],f16>, !torch.vtensor<[2,576,5120],f16> -> !torch.vtensor<[2,576,5120],f16>
  %1652 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %1653 = torch.aten._to_copy %18, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,5120],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,5120],f16>
  %1654 = torch.aten.t %1653 : !torch.vtensor<[1280,5120],f16> -> !torch.vtensor<[5120,1280],f16>
  %1655 = torch.aten.view %1651, %1408 : !torch.vtensor<[2,576,5120],f16>, !torch.list<int> -> !torch.vtensor<[1152,5120],f16>
  %1656 = torch.aten.addmm %1652, %1655, %1654, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[1152,5120],f16>, !torch.vtensor<[5120,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,1280],f16>
  %1657 = torch.aten.view %1656, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
  %1658 = torch.aten.add.Tensor %1657, %1639, %int1 : !torch.vtensor<[2,576,1280],f16>, !torch.vtensor<[2,576,1280],f16>, !torch.int -> !torch.vtensor<[2,576,1280],f16>
  %1659 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %1660 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %1661 = torch.aten.t %1660 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1662 = torch.aten.view %1658, %1228 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
  %1663 = torch.aten.addmm %1659, %1662, %1661, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,1280],f16>
  %1664 = torch.aten.view %1663, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
  %1665 = torch.aten._reshape_alias %1664, %1419, %1420 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,24,24,1280],f16>
  %1666 = torch.aten.permute %1665, %206 : !torch.vtensor<[2,24,24,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,24,24],f16>
  %1667 = torch.aten._reshape_alias %1666, %1183, %1423 : !torch.vtensor<[2,1280,24,24],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1280,24,24],f16>
  %1668 = torch.aten.clone %1667, %int0 : !torch.vtensor<[2,1280,24,24],f16>, !torch.int -> !torch.vtensor<[2,1280,24,24],f16>
  %1669 = torch.aten.add.Tensor %1668, %1475, %int1 : !torch.vtensor<[2,1280,24,24],f16>, !torch.vtensor<[2,1280,24,24],f16>, !torch.int -> !torch.vtensor<[2,1280,24,24],f16>
  %1670 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %1671 = torch.aten._to_copy %20, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280,3,3],f16>
  %1672 = torch.aten._convolution %1669, %1671, %1670, %613, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,24,24],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,12,12],f16>
  %1673 = torch.aten._to_copy %1672, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,12,12],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,12,12],f32>
  %1674 = torch.prim.ListConstruct %int2, %int32, %int40, %int144 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1675 = torch.prim.ListConstruct %int184320, %int5760, %int144, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1676 = torch.aten._reshape_alias %1673, %1674, %1675 : !torch.vtensor<[2,1280,12,12],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,40,144],f32>
  %result0_87, %result1_88 = torch.aten.var_mean.correction %1676, %85, %int0, %true : !torch.vtensor<[2,32,40,144],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %1677 = torch.aten.add.Tensor %result0_87, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %1678 = torch.aten.rsqrt %1677 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %1679 = torch.aten.sub.Tensor %1676, %result1_88, %int1 : !torch.vtensor<[2,32,40,144],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,144],f32>
  %1680 = torch.aten.mul.Tensor %1679, %1678 : !torch.vtensor<[2,32,40,144],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,144],f32>
  %1681 = torch.prim.ListConstruct %int2, %int1280, %int12, %int12 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1682 = torch.aten.view %1680, %1681 : !torch.vtensor<[2,32,40,144],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,12,12],f32>
  %1683 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
  %1684 = torch.aten.unsqueeze %1683, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
  %1685 = torch.aten.unsqueeze %1684, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
  %1686 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
  %1687 = torch.aten.unsqueeze %1686, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
  %1688 = torch.aten.unsqueeze %1687, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
  %1689 = torch.aten.mul.Tensor %1682, %1688 : !torch.vtensor<[2,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f32> -> !torch.vtensor<[2,1280,12,12],f32>
  %1690 = torch.aten.add.Tensor %1689, %1685, %int1 : !torch.vtensor<[2,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f32>, !torch.int -> !torch.vtensor<[2,1280,12,12],f32>
  %1691 = torch.aten._to_copy %1690, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,1280,12,12],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,12,12],f16>
  %1692 = torch.aten.silu %1691 : !torch.vtensor<[2,1280,12,12],f16> -> !torch.vtensor<[2,1280,12,12],f16>
  %1693 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %1694 = torch.aten._to_copy %20, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280,3,3],f16>
  %1695 = torch.aten._convolution %1692, %1694, %1693, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,12,12],f16>
  %1696 = torch.aten.silu %74 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
  %1697 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %1698 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %1699 = torch.aten.t %1698 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1700 = torch.aten.addmm %1697, %1696, %1699, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
  %1701 = torch.aten.unsqueeze %1700, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
  %1702 = torch.aten.unsqueeze %1701, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
  %1703 = torch.aten.add.Tensor %1695, %1702, %int1 : !torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,12,12],f16>
  %1704 = torch.aten._to_copy %1703, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,12,12],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,12,12],f32>
  %1705 = torch.aten._reshape_alias %1704, %1674, %1675 : !torch.vtensor<[2,1280,12,12],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,40,144],f32>
  %result0_89, %result1_90 = torch.aten.var_mean.correction %1705, %85, %int0, %true : !torch.vtensor<[2,32,40,144],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %1706 = torch.aten.add.Tensor %result0_89, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %1707 = torch.aten.rsqrt %1706 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %1708 = torch.aten.sub.Tensor %1705, %result1_90, %int1 : !torch.vtensor<[2,32,40,144],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,144],f32>
  %1709 = torch.aten.mul.Tensor %1708, %1707 : !torch.vtensor<[2,32,40,144],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,144],f32>
  %1710 = torch.aten.view %1709, %1681 : !torch.vtensor<[2,32,40,144],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,12,12],f32>
  %1711 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
  %1712 = torch.aten.unsqueeze %1711, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
  %1713 = torch.aten.unsqueeze %1712, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
  %1714 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
  %1715 = torch.aten.unsqueeze %1714, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
  %1716 = torch.aten.unsqueeze %1715, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
  %1717 = torch.aten.mul.Tensor %1710, %1716 : !torch.vtensor<[2,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f32> -> !torch.vtensor<[2,1280,12,12],f32>
  %1718 = torch.aten.add.Tensor %1717, %1713, %int1 : !torch.vtensor<[2,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f32>, !torch.int -> !torch.vtensor<[2,1280,12,12],f32>
  %1719 = torch.aten._to_copy %1718, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,1280,12,12],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,12,12],f16>
  %1720 = torch.aten.silu %1719 : !torch.vtensor<[2,1280,12,12],f16> -> !torch.vtensor<[2,1280,12,12],f16>
  %1721 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %1722 = torch.aten._to_copy %20, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280,3,3],f16>
  %1723 = torch.aten._convolution %1720, %1722, %1721, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,12,12],f16>
  %1724 = torch.aten.add.Tensor %1672, %1723, %int1 : !torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[2,1280,12,12],f16>, !torch.int -> !torch.vtensor<[2,1280,12,12],f16>
  %1725 = torch.aten._to_copy %1724, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,12,12],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,12,12],f32>
  %1726 = torch.aten._reshape_alias %1725, %1674, %1675 : !torch.vtensor<[2,1280,12,12],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,40,144],f32>
  %result0_91, %result1_92 = torch.aten.var_mean.correction %1726, %85, %int0, %true : !torch.vtensor<[2,32,40,144],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %1727 = torch.aten.add.Tensor %result0_91, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %1728 = torch.aten.rsqrt %1727 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %1729 = torch.aten.sub.Tensor %1726, %result1_92, %int1 : !torch.vtensor<[2,32,40,144],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,144],f32>
  %1730 = torch.aten.mul.Tensor %1729, %1728 : !torch.vtensor<[2,32,40,144],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,144],f32>
  %1731 = torch.aten.view %1730, %1681 : !torch.vtensor<[2,32,40,144],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,12,12],f32>
  %1732 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
  %1733 = torch.aten.unsqueeze %1732, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
  %1734 = torch.aten.unsqueeze %1733, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
  %1735 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
  %1736 = torch.aten.unsqueeze %1735, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
  %1737 = torch.aten.unsqueeze %1736, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
  %1738 = torch.aten.mul.Tensor %1731, %1737 : !torch.vtensor<[2,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f32> -> !torch.vtensor<[2,1280,12,12],f32>
  %1739 = torch.aten.add.Tensor %1738, %1734, %int1 : !torch.vtensor<[2,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f32>, !torch.int -> !torch.vtensor<[2,1280,12,12],f32>
  %1740 = torch.aten._to_copy %1739, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,1280,12,12],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,12,12],f16>
  %1741 = torch.aten.silu %1740 : !torch.vtensor<[2,1280,12,12],f16> -> !torch.vtensor<[2,1280,12,12],f16>
  %1742 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %1743 = torch.aten._to_copy %20, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280,3,3],f16>
  %1744 = torch.aten._convolution %1741, %1743, %1742, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,12,12],f16>
  %1745 = torch.aten.silu %74 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
  %1746 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %1747 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %1748 = torch.aten.t %1747 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1749 = torch.aten.addmm %1746, %1745, %1748, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
  %1750 = torch.aten.unsqueeze %1749, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
  %1751 = torch.aten.unsqueeze %1750, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
  %1752 = torch.aten.add.Tensor %1744, %1751, %int1 : !torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,12,12],f16>
  %1753 = torch.aten._to_copy %1752, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,12,12],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,12,12],f32>
  %1754 = torch.aten._reshape_alias %1753, %1674, %1675 : !torch.vtensor<[2,1280,12,12],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,40,144],f32>
  %result0_93, %result1_94 = torch.aten.var_mean.correction %1754, %85, %int0, %true : !torch.vtensor<[2,32,40,144],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %1755 = torch.aten.add.Tensor %result0_93, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %1756 = torch.aten.rsqrt %1755 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %1757 = torch.aten.sub.Tensor %1754, %result1_94, %int1 : !torch.vtensor<[2,32,40,144],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,144],f32>
  %1758 = torch.aten.mul.Tensor %1757, %1756 : !torch.vtensor<[2,32,40,144],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,144],f32>
  %1759 = torch.aten.view %1758, %1681 : !torch.vtensor<[2,32,40,144],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,12,12],f32>
  %1760 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
  %1761 = torch.aten.unsqueeze %1760, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
  %1762 = torch.aten.unsqueeze %1761, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
  %1763 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
  %1764 = torch.aten.unsqueeze %1763, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
  %1765 = torch.aten.unsqueeze %1764, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
  %1766 = torch.aten.mul.Tensor %1759, %1765 : !torch.vtensor<[2,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f32> -> !torch.vtensor<[2,1280,12,12],f32>
  %1767 = torch.aten.add.Tensor %1766, %1762, %int1 : !torch.vtensor<[2,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f32>, !torch.int -> !torch.vtensor<[2,1280,12,12],f32>
  %1768 = torch.aten._to_copy %1767, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,1280,12,12],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,12,12],f16>
  %1769 = torch.aten.silu %1768 : !torch.vtensor<[2,1280,12,12],f16> -> !torch.vtensor<[2,1280,12,12],f16>
  %1770 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %1771 = torch.aten._to_copy %20, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280,3,3],f16>
  %1772 = torch.aten._convolution %1769, %1771, %1770, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,12,12],f16>
  %1773 = torch.aten.add.Tensor %1724, %1772, %int1 : !torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[2,1280,12,12],f16>, !torch.int -> !torch.vtensor<[2,1280,12,12],f16>
  %1774 = torch.aten._to_copy %1773, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,12,12],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,12,12],f32>
  %1775 = torch.aten._reshape_alias %1774, %1674, %1675 : !torch.vtensor<[2,1280,12,12],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,40,144],f32>
  %result0_95, %result1_96 = torch.aten.var_mean.correction %1775, %85, %int0, %true : !torch.vtensor<[2,32,40,144],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %1776 = torch.aten.add.Tensor %result0_95, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %1777 = torch.aten.rsqrt %1776 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %1778 = torch.aten.sub.Tensor %1775, %result1_96, %int1 : !torch.vtensor<[2,32,40,144],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,144],f32>
  %1779 = torch.aten.mul.Tensor %1778, %1777 : !torch.vtensor<[2,32,40,144],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,144],f32>
  %1780 = torch.aten.view %1779, %1681 : !torch.vtensor<[2,32,40,144],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,12,12],f32>
  %1781 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
  %1782 = torch.aten.unsqueeze %1781, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
  %1783 = torch.aten.unsqueeze %1782, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
  %1784 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
  %1785 = torch.aten.unsqueeze %1784, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
  %1786 = torch.aten.unsqueeze %1785, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
  %1787 = torch.aten.mul.Tensor %1780, %1786 : !torch.vtensor<[2,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f32> -> !torch.vtensor<[2,1280,12,12],f32>
  %1788 = torch.aten.add.Tensor %1787, %1783, %int1 : !torch.vtensor<[2,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f32>, !torch.int -> !torch.vtensor<[2,1280,12,12],f32>
  %1789 = torch.aten._to_copy %1788, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,1280,12,12],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,12,12],f16>
  %1790 = torch.aten.silu %1789 : !torch.vtensor<[2,1280,12,12],f16> -> !torch.vtensor<[2,1280,12,12],f16>
  %1791 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %1792 = torch.aten._to_copy %20, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280,3,3],f16>
  %1793 = torch.aten._convolution %1790, %1792, %1791, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,12,12],f16>
  %1794 = torch.aten.silu %74 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
  %1795 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %1796 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %1797 = torch.aten.t %1796 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1798 = torch.aten.addmm %1795, %1794, %1797, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
  %1799 = torch.aten.unsqueeze %1798, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
  %1800 = torch.aten.unsqueeze %1799, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
  %1801 = torch.aten.add.Tensor %1793, %1800, %int1 : !torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,12,12],f16>
  %1802 = torch.aten._to_copy %1801, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,12,12],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,12,12],f32>
  %1803 = torch.aten._reshape_alias %1802, %1674, %1675 : !torch.vtensor<[2,1280,12,12],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,40,144],f32>
  %result0_97, %result1_98 = torch.aten.var_mean.correction %1803, %85, %int0, %true : !torch.vtensor<[2,32,40,144],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %1804 = torch.aten.add.Tensor %result0_97, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %1805 = torch.aten.rsqrt %1804 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %1806 = torch.aten.sub.Tensor %1803, %result1_98, %int1 : !torch.vtensor<[2,32,40,144],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,144],f32>
  %1807 = torch.aten.mul.Tensor %1806, %1805 : !torch.vtensor<[2,32,40,144],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,144],f32>
  %1808 = torch.aten.view %1807, %1681 : !torch.vtensor<[2,32,40,144],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,12,12],f32>
  %1809 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
  %1810 = torch.aten.unsqueeze %1809, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
  %1811 = torch.aten.unsqueeze %1810, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
  %1812 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
  %1813 = torch.aten.unsqueeze %1812, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
  %1814 = torch.aten.unsqueeze %1813, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
  %1815 = torch.aten.mul.Tensor %1808, %1814 : !torch.vtensor<[2,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f32> -> !torch.vtensor<[2,1280,12,12],f32>
  %1816 = torch.aten.add.Tensor %1815, %1811, %int1 : !torch.vtensor<[2,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f32>, !torch.int -> !torch.vtensor<[2,1280,12,12],f32>
  %1817 = torch.aten._to_copy %1816, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,1280,12,12],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,12,12],f16>
  %1818 = torch.aten.silu %1817 : !torch.vtensor<[2,1280,12,12],f16> -> !torch.vtensor<[2,1280,12,12],f16>
  %1819 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %1820 = torch.aten._to_copy %20, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280,3,3],f16>
  %1821 = torch.aten._convolution %1818, %1820, %1819, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,12,12],f16>
  %1822 = torch.aten.add.Tensor %1773, %1821, %int1 : !torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[2,1280,12,12],f16>, !torch.int -> !torch.vtensor<[2,1280,12,12],f16>
  %1823 = torch.aten._to_copy %1822, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,12,12],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,12,12],f32>
  %1824 = torch.aten._reshape_alias %1823, %1674, %1675 : !torch.vtensor<[2,1280,12,12],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,40,144],f32>
  %result0_99, %result1_100 = torch.aten.var_mean.correction %1824, %85, %int0, %true : !torch.vtensor<[2,32,40,144],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %1825 = torch.aten.add.Tensor %result0_99, %1, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %1826 = torch.aten.rsqrt %1825 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %1827 = torch.aten.sub.Tensor %1824, %result1_100, %int1 : !torch.vtensor<[2,32,40,144],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,144],f32>
  %1828 = torch.aten.mul.Tensor %1827, %1826 : !torch.vtensor<[2,32,40,144],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,144],f32>
  %1829 = torch.aten.view %1828, %1681 : !torch.vtensor<[2,32,40,144],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,12,12],f32>
  %1830 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
  %1831 = torch.aten.unsqueeze %1830, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
  %1832 = torch.aten.unsqueeze %1831, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
  %1833 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
  %1834 = torch.aten.unsqueeze %1833, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
  %1835 = torch.aten.unsqueeze %1834, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
  %1836 = torch.aten.mul.Tensor %1829, %1835 : !torch.vtensor<[2,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f32> -> !torch.vtensor<[2,1280,12,12],f32>
  %1837 = torch.aten.add.Tensor %1836, %1832, %int1 : !torch.vtensor<[2,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f32>, !torch.int -> !torch.vtensor<[2,1280,12,12],f32>
  %1838 = torch.prim.ListConstruct %int184320, %int144, %int12, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1839 = torch.aten._reshape_alias %1837, %1681, %1838 : !torch.vtensor<[2,1280,12,12],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1280,12,12],f32>
  %1840 = torch.aten.permute %1839, %151 : !torch.vtensor<[2,1280,12,12],f32>, !torch.list<int> -> !torch.vtensor<[2,12,12,1280],f32>
  %1841 = torch.prim.ListConstruct %int2, %int144, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1842 = torch.prim.ListConstruct %int184320, %int1, %int144 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1843 = torch.aten._reshape_alias %1840, %1841, %1842 : !torch.vtensor<[2,12,12,1280],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,144,1280],f32>
  %1844 = torch.aten.clone %1843, %int0 : !torch.vtensor<[2,144,1280],f32>, !torch.int -> !torch.vtensor<[2,144,1280],f32>
  %1845 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %1846 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %1847 = torch.aten._to_copy %1844, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,144,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,144,1280],f16>
  %1848 = torch.aten.t %1846 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1849 = torch.prim.ListConstruct %int288, %int1280 : (!torch.int, !torch.int) -> !torch.list<int>
  %1850 = torch.aten.view %1847, %1849 : !torch.vtensor<[2,144,1280],f16>, !torch.list<int> -> !torch.vtensor<[288,1280],f16>
  %1851 = torch.aten.addmm %1845, %1850, %1848, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[288,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[288,1280],f16>
  %1852 = torch.aten.view %1851, %1841 : !torch.vtensor<[288,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,144,1280],f16>
  %1853 = torch.aten._to_copy %1852, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,144,1280],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,144,1280],f32>
  %result0_101, %result1_102, %result2_103 = torch.aten.native_layer_norm %1853, %1233, %24, %24, %float1.000000e-05 : !torch.vtensor<[2,144,1280],f32>, !torch.list<int>, !torch.vtensor<[1280],f32>, !torch.vtensor<[1280],f32>, !torch.float -> !torch.vtensor<[2,144,1280],f32>, !torch.vtensor<[2,144,1],f32>, !torch.vtensor<[2,144,1],f32>
  %1854 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %1855 = torch.aten._to_copy %result0_101, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,144,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,144,1280],f16>
  %1856 = torch.aten.t %1854 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1857 = torch.aten._reshape_alias %1855, %1849, %1237 : !torch.vtensor<[2,144,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[288,1280],f16>
  %1858 = torch.aten.mm %1857, %1856 : !torch.vtensor<[288,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[288,1280],f16>
  %1859 = torch.aten._unsafe_view %1858, %1841 : !torch.vtensor<[288,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,144,1280],f16>
  %1860 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %1861 = torch.aten._to_copy %result0_101, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,144,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,144,1280],f16>
  %1862 = torch.aten.t %1860 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1863 = torch.aten._reshape_alias %1861, %1849, %1237 : !torch.vtensor<[2,144,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[288,1280],f16>
  %1864 = torch.aten.mm %1863, %1862 : !torch.vtensor<[288,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[288,1280],f16>
  %1865 = torch.aten._unsafe_view %1864, %1841 : !torch.vtensor<[288,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,144,1280],f16>
  %1866 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %1867 = torch.aten._to_copy %result0_101, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,144,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,144,1280],f16>
  %1868 = torch.aten.t %1866 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1869 = torch.aten._reshape_alias %1867, %1849, %1237 : !torch.vtensor<[2,144,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[288,1280],f16>
  %1870 = torch.aten.mm %1869, %1868 : !torch.vtensor<[288,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[288,1280],f16>
  %1871 = torch.aten._unsafe_view %1870, %1841 : !torch.vtensor<[288,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,144,1280],f16>
  %1872 = torch.prim.ListConstruct %int2, %int144, %int20, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1873 = torch.prim.ListConstruct %int184320, %int1280, %int64, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1874 = torch.aten._reshape_alias %1859, %1872, %1873 : !torch.vtensor<[2,144,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,144,20,64],f16>
  %1875 = torch.aten.permute %1874, %189 : !torch.vtensor<[2,144,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,144,64],f16>
  %1876 = torch.aten.clone %1875, %int0 : !torch.vtensor<[2,20,144,64],f16>, !torch.int -> !torch.vtensor<[2,20,144,64],f16>
  %1877 = torch.prim.ListConstruct %int40, %int144, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1878 = torch.aten._unsafe_view %1876, %1877 : !torch.vtensor<[2,20,144,64],f16>, !torch.list<int> -> !torch.vtensor<[40,144,64],f16>
  %1879 = torch.aten._reshape_alias %1865, %1872, %1873 : !torch.vtensor<[2,144,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,144,20,64],f16>
  %1880 = torch.aten.permute %1879, %189 : !torch.vtensor<[2,144,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,144,64],f16>
  %1881 = torch.aten.clone %1880, %int0 : !torch.vtensor<[2,20,144,64],f16>, !torch.int -> !torch.vtensor<[2,20,144,64],f16>
  %1882 = torch.aten._unsafe_view %1881, %1877 : !torch.vtensor<[2,20,144,64],f16>, !torch.list<int> -> !torch.vtensor<[40,144,64],f16>
  %1883 = torch.aten._reshape_alias %1871, %1872, %1873 : !torch.vtensor<[2,144,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,144,20,64],f16>
  %1884 = torch.aten.permute %1883, %189 : !torch.vtensor<[2,144,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,144,64],f16>
  %1885 = torch.aten.clone %1884, %int0 : !torch.vtensor<[2,20,144,64],f16>, !torch.int -> !torch.vtensor<[2,20,144,64],f16>
  %1886 = torch.aten._unsafe_view %1885, %1877 : !torch.vtensor<[2,20,144,64],f16>, !torch.list<int> -> !torch.vtensor<[40,144,64],f16>
  %1887 = torch.aten.unsqueeze %1878, %int3 : !torch.vtensor<[40,144,64],f16>, !torch.int -> !torch.vtensor<[40,144,64,1],f16>
  %1888 = torch.aten.permute %1887, %203 : !torch.vtensor<[40,144,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,144,1,64],f16>
  %1889 = torch.aten.unsqueeze %1882, %int3 : !torch.vtensor<[40,144,64],f16>, !torch.int -> !torch.vtensor<[40,144,64,1],f16>
  %1890 = torch.aten.permute %1889, %206 : !torch.vtensor<[40,144,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,1,144,64],f16>
  %1891 = torch.aten.permute %1888, %203 : !torch.vtensor<[40,144,1,64],f16>, !torch.list<int> -> !torch.vtensor<[40,144,64,1],f16>
  %1892 = torch.prim.ListConstruct %int9216, %int64, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1893 = torch.aten._reshape_alias %1891, %1877, %1892 : !torch.vtensor<[40,144,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,144,64],f16>
  %1894 = torch.aten.permute %1890, %211 : !torch.vtensor<[40,1,144,64],f16>, !torch.list<int> -> !torch.vtensor<[40,64,144,1],f16>
  %1895 = torch.prim.ListConstruct %int40, %int64, %int144 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1896 = torch.prim.ListConstruct %int9216, %int1, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1897 = torch.aten._reshape_alias %1894, %1895, %1896 : !torch.vtensor<[40,64,144,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,64,144],f16>
  %1898 = torch.aten.bmm %1893, %1897 : !torch.vtensor<[40,144,64],f16>, !torch.vtensor<[40,64,144],f16> -> !torch.vtensor<[40,144,144],f16>
  %1899 = torch.prim.ListConstruct %int40, %int144, %int1, %int144 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1900 = torch.aten.view %1898, %1899 : !torch.vtensor<[40,144,144],f16>, !torch.list<int> -> !torch.vtensor<[40,144,1,144],f16>
  %1901 = torch.aten.permute %1900, %203 : !torch.vtensor<[40,144,1,144],f16>, !torch.list<int> -> !torch.vtensor<[40,144,144,1],f16>
  %1902 = torch.prim.ListConstruct %int40, %int144, %int144 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1903 = torch.aten.view %1901, %1902 : !torch.vtensor<[40,144,144,1],f16>, !torch.list<int> -> !torch.vtensor<[40,144,144],f16>
  %1904 = torch.aten.mul.Tensor %1903, %0 : !torch.vtensor<[40,144,144],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[40,144,144],f16>
  %1905 = torch.aten._softmax %1904, %int-1, %true : !torch.vtensor<[40,144,144],f16>, !torch.int, !torch.bool -> !torch.vtensor<[40,144,144],f32>
  %1906 = torch.aten._to_copy %1905, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[40,144,144],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[40,144,144],f16>
  %1907 = torch.aten.unsqueeze %1906, %int3 : !torch.vtensor<[40,144,144],f16>, !torch.int -> !torch.vtensor<[40,144,144,1],f16>
  %1908 = torch.aten.permute %1907, %203 : !torch.vtensor<[40,144,144,1],f16>, !torch.list<int> -> !torch.vtensor<[40,144,1,144],f16>
  %1909 = torch.aten.unsqueeze %1886, %int3 : !torch.vtensor<[40,144,64],f16>, !torch.int -> !torch.vtensor<[40,144,64,1],f16>
  %1910 = torch.aten.permute %1909, %211 : !torch.vtensor<[40,144,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,1,64,144],f16>
  %1911 = torch.aten.permute %1908, %203 : !torch.vtensor<[40,144,1,144],f16>, !torch.list<int> -> !torch.vtensor<[40,144,144,1],f16>
  %1912 = torch.prim.ListConstruct %int20736, %int144, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1913 = torch.aten._reshape_alias %1911, %1902, %1912 : !torch.vtensor<[40,144,144,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,144,144],f16>
  %1914 = torch.aten.permute %1910, %211 : !torch.vtensor<[40,1,64,144],f16>, !torch.list<int> -> !torch.vtensor<[40,144,64,1],f16>
  %1915 = torch.aten._reshape_alias %1914, %1877, %1892 : !torch.vtensor<[40,144,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,144,64],f16>
  %1916 = torch.aten.bmm %1913, %1915 : !torch.vtensor<[40,144,144],f16>, !torch.vtensor<[40,144,64],f16> -> !torch.vtensor<[40,144,64],f16>
  %1917 = torch.prim.ListConstruct %int40, %int144, %int1, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1918 = torch.aten.view %1916, %1917 : !torch.vtensor<[40,144,64],f16>, !torch.list<int> -> !torch.vtensor<[40,144,1,64],f16>
  %1919 = torch.aten.permute %1918, %203 : !torch.vtensor<[40,144,1,64],f16>, !torch.list<int> -> !torch.vtensor<[40,144,64,1],f16>
  %1920 = torch.aten.view %1919, %1877 : !torch.vtensor<[40,144,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,144,64],f16>
  %1921 = torch.prim.ListConstruct %int2, %int20, %int144, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1922 = torch.prim.ListConstruct %int184320, %int9216, %int64, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1923 = torch.aten._reshape_alias %1920, %1921, %1922 : !torch.vtensor<[40,144,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,20,144,64],f16>
  %1924 = torch.aten.permute %1923, %189 : !torch.vtensor<[2,20,144,64],f16>, !torch.list<int> -> !torch.vtensor<[2,144,20,64],f16>
  %1925 = torch.aten.clone %1924, %int0 : !torch.vtensor<[2,144,20,64],f16>, !torch.int -> !torch.vtensor<[2,144,20,64],f16>
  %1926 = torch.aten._unsafe_view %1925, %1841 : !torch.vtensor<[2,144,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,144,1280],f16>
  %1927 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %1928 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %1929 = torch.aten.t %1928 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1930 = torch.aten.view %1926, %1849 : !torch.vtensor<[2,144,1280],f16>, !torch.list<int> -> !torch.vtensor<[288,1280],f16>
  %1931 = torch.aten.addmm %1927, %1930, %1929, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[288,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[288,1280],f16>
  %1932 = torch.aten.view %1931, %1841 : !torch.vtensor<[288,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,144,1280],f16>
  %1933 = torch.aten.add.Tensor %1932, %1852, %int1 : !torch.vtensor<[2,144,1280],f16>, !torch.vtensor<[2,144,1280],f16>, !torch.int -> !torch.vtensor<[2,144,1280],f16>
  %1934 = torch.aten._to_copy %1933, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,144,1280],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,144,1280],f32>
  %result0_104, %result1_105, %result2_106 = torch.aten.native_layer_norm %1934, %1233, %24, %24, %float1.000000e-05 : !torch.vtensor<[2,144,1280],f32>, !torch.list<int>, !torch.vtensor<[1280],f32>, !torch.vtensor<[1280],f32>, !torch.float -> !torch.vtensor<[2,144,1280],f32>, !torch.vtensor<[2,144,1],f32>, !torch.vtensor<[2,144,1],f32>
  %1935 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %1936 = torch.aten._to_copy %result0_104, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,144,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,144,1280],f16>
  %1937 = torch.aten.t %1935 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %1938 = torch.aten._reshape_alias %1936, %1849, %1237 : !torch.vtensor<[2,144,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[288,1280],f16>
  %1939 = torch.aten.mm %1938, %1937 : !torch.vtensor<[288,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[288,1280],f16>
  %1940 = torch.aten._unsafe_view %1939, %1841 : !torch.vtensor<[288,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,144,1280],f16>
  %1941 = torch.aten._to_copy %15, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1024],f16>
  %1942 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
  %1943 = torch.aten.t %1941 : !torch.vtensor<[1280,1024],f16> -> !torch.vtensor<[1024,1280],f16>
  %1944 = torch.aten._reshape_alias %1942, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
  %1945 = torch.aten.mm %1944, %1943 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,1280],f16> -> !torch.vtensor<[154,1280],f16>
  %1946 = torch.aten._unsafe_view %1945, %1327 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
  %1947 = torch.aten._to_copy %15, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1024],f16>
  %1948 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
  %1949 = torch.aten.t %1947 : !torch.vtensor<[1280,1024],f16> -> !torch.vtensor<[1024,1280],f16>
  %1950 = torch.aten._reshape_alias %1948, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
  %1951 = torch.aten.mm %1950, %1949 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,1280],f16> -> !torch.vtensor<[154,1280],f16>
  %1952 = torch.aten._unsafe_view %1951, %1327 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
  %1953 = torch.aten._reshape_alias %1940, %1872, %1873 : !torch.vtensor<[2,144,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,144,20,64],f16>
  %1954 = torch.aten.permute %1953, %189 : !torch.vtensor<[2,144,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,144,64],f16>
  %1955 = torch.aten.clone %1954, %int0 : !torch.vtensor<[2,20,144,64],f16>, !torch.int -> !torch.vtensor<[2,20,144,64],f16>
  %1956 = torch.aten._unsafe_view %1955, %1877 : !torch.vtensor<[2,20,144,64],f16>, !torch.list<int> -> !torch.vtensor<[40,144,64],f16>
  %1957 = torch.aten._reshape_alias %1946, %1339, %1340 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,20,64],f16>
  %1958 = torch.aten.permute %1957, %189 : !torch.vtensor<[2,77,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,77,64],f16>
  %1959 = torch.aten.clone %1958, %int0 : !torch.vtensor<[2,20,77,64],f16>, !torch.int -> !torch.vtensor<[2,20,77,64],f16>
  %1960 = torch.aten._unsafe_view %1959, %1344 : !torch.vtensor<[2,20,77,64],f16>, !torch.list<int> -> !torch.vtensor<[40,77,64],f16>
  %1961 = torch.aten._reshape_alias %1952, %1339, %1340 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,20,64],f16>
  %1962 = torch.aten.permute %1961, %189 : !torch.vtensor<[2,77,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,77,64],f16>
  %1963 = torch.aten.clone %1962, %int0 : !torch.vtensor<[2,20,77,64],f16>, !torch.int -> !torch.vtensor<[2,20,77,64],f16>
  %1964 = torch.aten._unsafe_view %1963, %1344 : !torch.vtensor<[2,20,77,64],f16>, !torch.list<int> -> !torch.vtensor<[40,77,64],f16>
  %1965 = torch.aten.unsqueeze %1956, %int3 : !torch.vtensor<[40,144,64],f16>, !torch.int -> !torch.vtensor<[40,144,64,1],f16>
  %1966 = torch.aten.permute %1965, %203 : !torch.vtensor<[40,144,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,144,1,64],f16>
  %1967 = torch.aten.unsqueeze %1960, %int3 : !torch.vtensor<[40,77,64],f16>, !torch.int -> !torch.vtensor<[40,77,64,1],f16>
  %1968 = torch.aten.permute %1967, %206 : !torch.vtensor<[40,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,1,77,64],f16>
  %1969 = torch.aten.permute %1966, %203 : !torch.vtensor<[40,144,1,64],f16>, !torch.list<int> -> !torch.vtensor<[40,144,64,1],f16>
  %1970 = torch.aten._reshape_alias %1969, %1877, %1892 : !torch.vtensor<[40,144,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,144,64],f16>
  %1971 = torch.aten.permute %1968, %211 : !torch.vtensor<[40,1,77,64],f16>, !torch.list<int> -> !torch.vtensor<[40,64,77,1],f16>
  %1972 = torch.aten._reshape_alias %1971, %1357, %297 : !torch.vtensor<[40,64,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,64,77],f16>
  %1973 = torch.aten.bmm %1970, %1972 : !torch.vtensor<[40,144,64],f16>, !torch.vtensor<[40,64,77],f16> -> !torch.vtensor<[40,144,77],f16>
  %1974 = torch.prim.ListConstruct %int40, %int144, %int1, %int77 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1975 = torch.aten.view %1973, %1974 : !torch.vtensor<[40,144,77],f16>, !torch.list<int> -> !torch.vtensor<[40,144,1,77],f16>
  %1976 = torch.aten.permute %1975, %203 : !torch.vtensor<[40,144,1,77],f16>, !torch.list<int> -> !torch.vtensor<[40,144,77,1],f16>
  %1977 = torch.prim.ListConstruct %int40, %int144, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1978 = torch.aten.view %1976, %1977 : !torch.vtensor<[40,144,77,1],f16>, !torch.list<int> -> !torch.vtensor<[40,144,77],f16>
  %1979 = torch.aten.mul.Tensor %1978, %0 : !torch.vtensor<[40,144,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[40,144,77],f16>
  %1980 = torch.aten._softmax %1979, %int-1, %true : !torch.vtensor<[40,144,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[40,144,77],f32>
  %1981 = torch.aten._to_copy %1980, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[40,144,77],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[40,144,77],f16>
  %1982 = torch.aten.unsqueeze %1981, %int3 : !torch.vtensor<[40,144,77],f16>, !torch.int -> !torch.vtensor<[40,144,77,1],f16>
  %1983 = torch.aten.permute %1982, %203 : !torch.vtensor<[40,144,77,1],f16>, !torch.list<int> -> !torch.vtensor<[40,144,1,77],f16>
  %1984 = torch.aten.unsqueeze %1964, %int3 : !torch.vtensor<[40,77,64],f16>, !torch.int -> !torch.vtensor<[40,77,64,1],f16>
  %1985 = torch.aten.permute %1984, %211 : !torch.vtensor<[40,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,1,64,77],f16>
  %1986 = torch.aten.permute %1983, %203 : !torch.vtensor<[40,144,1,77],f16>, !torch.list<int> -> !torch.vtensor<[40,144,77,1],f16>
  %1987 = torch.prim.ListConstruct %int11088, %int77, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %1988 = torch.aten._reshape_alias %1986, %1977, %1987 : !torch.vtensor<[40,144,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,144,77],f16>
  %1989 = torch.aten.permute %1985, %211 : !torch.vtensor<[40,1,64,77],f16>, !torch.list<int> -> !torch.vtensor<[40,77,64,1],f16>
  %1990 = torch.aten._reshape_alias %1989, %1344, %316 : !torch.vtensor<[40,77,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,77,64],f16>
  %1991 = torch.aten.bmm %1988, %1990 : !torch.vtensor<[40,144,77],f16>, !torch.vtensor<[40,77,64],f16> -> !torch.vtensor<[40,144,64],f16>
  %1992 = torch.aten.view %1991, %1917 : !torch.vtensor<[40,144,64],f16>, !torch.list<int> -> !torch.vtensor<[40,144,1,64],f16>
  %1993 = torch.aten.permute %1992, %203 : !torch.vtensor<[40,144,1,64],f16>, !torch.list<int> -> !torch.vtensor<[40,144,64,1],f16>
  %1994 = torch.aten.view %1993, %1877 : !torch.vtensor<[40,144,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,144,64],f16>
  %1995 = torch.aten._reshape_alias %1994, %1921, %1922 : !torch.vtensor<[40,144,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,20,144,64],f16>
  %1996 = torch.aten.permute %1995, %189 : !torch.vtensor<[2,20,144,64],f16>, !torch.list<int> -> !torch.vtensor<[2,144,20,64],f16>
  %1997 = torch.aten.clone %1996, %int0 : !torch.vtensor<[2,144,20,64],f16>, !torch.int -> !torch.vtensor<[2,144,20,64],f16>
  %1998 = torch.aten._unsafe_view %1997, %1841 : !torch.vtensor<[2,144,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,144,1280],f16>
  %1999 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %2000 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %2001 = torch.aten.t %2000 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %2002 = torch.aten.view %1998, %1849 : !torch.vtensor<[2,144,1280],f16>, !torch.list<int> -> !torch.vtensor<[288,1280],f16>
  %2003 = torch.aten.addmm %1999, %2002, %2001, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[288,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[288,1280],f16>
  %2004 = torch.aten.view %2003, %1841 : !torch.vtensor<[288,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,144,1280],f16>
  %2005 = torch.aten.add.Tensor %2004, %1933, %int1 : !torch.vtensor<[2,144,1280],f16>, !torch.vtensor<[2,144,1280],f16>, !torch.int -> !torch.vtensor<[2,144,1280],f16>
  %2006 = torch.aten._to_copy %2005, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,144,1280],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,144,1280],f32>
  %result0_107, %result1_108, %result2_109 = torch.aten.native_layer_norm %2006, %1233, %24, %24, %float1.000000e-05 : !torch.vtensor<[2,144,1280],f32>, !torch.list<int>, !torch.vtensor<[1280],f32>, !torch.vtensor<[1280],f32>, !torch.float -> !torch.vtensor<[2,144,1280],f32>, !torch.vtensor<[2,144,1],f32>, !torch.vtensor<[2,144,1],f32>
  %2007 = torch.aten._to_copy %16, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[10240],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[10240],f16>
  %2008 = torch.aten._to_copy %17, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[10240,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[10240,1280],f16>
  %2009 = torch.aten._to_copy %result0_107, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,144,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,144,1280],f16>
  %2010 = torch.aten.t %2008 : !torch.vtensor<[10240,1280],f16> -> !torch.vtensor<[1280,10240],f16>
  %2011 = torch.aten.view %2009, %1849 : !torch.vtensor<[2,144,1280],f16>, !torch.list<int> -> !torch.vtensor<[288,1280],f16>
  %2012 = torch.aten.addmm %2007, %2011, %2010, %int1, %int1 : !torch.vtensor<[10240],f16>, !torch.vtensor<[288,1280],f16>, !torch.vtensor<[1280,10240],f16>, !torch.int, !torch.int -> !torch.vtensor<[288,10240],f16>
  %2013 = torch.prim.ListConstruct %int2, %int144, %int10240 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %2014 = torch.aten.view %2012, %2013 : !torch.vtensor<[288,10240],f16>, !torch.list<int> -> !torch.vtensor<[2,144,10240],f16>
  %2015 = torch.aten.slice.Tensor %2014, %int-1, %int0, %int5120, %int1 : !torch.vtensor<[2,144,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,144,5120],f16>
  %2016 = torch.aten.slice.Tensor %2014, %int-1, %int5120, %int10240, %int1 : !torch.vtensor<[2,144,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,144,5120],f16>
  %2017 = torch.aten.gelu %2016, %str : !torch.vtensor<[2,144,5120],f16>, !torch.str -> !torch.vtensor<[2,144,5120],f16>
  %2018 = torch.aten.mul.Tensor %2015, %2017 : !torch.vtensor<[2,144,5120],f16>, !torch.vtensor<[2,144,5120],f16> -> !torch.vtensor<[2,144,5120],f16>
  %2019 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %2020 = torch.aten._to_copy %18, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,5120],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,5120],f16>
  %2021 = torch.aten.t %2020 : !torch.vtensor<[1280,5120],f16> -> !torch.vtensor<[5120,1280],f16>
  %2022 = torch.prim.ListConstruct %int288, %int5120 : (!torch.int, !torch.int) -> !torch.list<int>
  %2023 = torch.aten.view %2018, %2022 : !torch.vtensor<[2,144,5120],f16>, !torch.list<int> -> !torch.vtensor<[288,5120],f16>
  %2024 = torch.aten.addmm %2019, %2023, %2021, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[288,5120],f16>, !torch.vtensor<[5120,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[288,1280],f16>
  %2025 = torch.aten.view %2024, %1841 : !torch.vtensor<[288,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,144,1280],f16>
  %2026 = torch.aten.add.Tensor %2025, %2005, %int1 : !torch.vtensor<[2,144,1280],f16>, !torch.vtensor<[2,144,1280],f16>, !torch.int -> !torch.vtensor<[2,144,1280],f16>
  %2027 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %2028 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %2029 = torch.aten.t %2028 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %2030 = torch.aten.view %2026, %1849 : !torch.vtensor<[2,144,1280],f16>, !torch.list<int> -> !torch.vtensor<[288,1280],f16>
  %2031 = torch.aten.addmm %2027, %2030, %2029, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[288,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[288,1280],f16>
  %2032 = torch.aten.view %2031, %1841 : !torch.vtensor<[288,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,144,1280],f16>
  %2033 = torch.prim.ListConstruct %int2, %int12, %int12, %int1280 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %2034 = torch.prim.ListConstruct %int184320, %int15360, %int1280, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %2035 = torch.aten._reshape_alias %2032, %2033, %2034 : !torch.vtensor<[2,144,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,12,12,1280],f16>
  %2036 = torch.aten.permute %2035, %206 : !torch.vtensor<[2,12,12,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,12,12],f16>
  %2037 = torch.prim.ListConstruct %int184320, %int1, %int15360, %int1280 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %2038 = torch.aten._reshape_alias %2036, %1681, %2037 : !torch.vtensor<[2,1280,12,12],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1280,12,12],f16>
  %2039 = torch.aten.clone %2038, %int0 : !torch.vtensor<[2,1280,12,12],f16>, !torch.int -> !torch.vtensor<[2,1280,12,12],f16>
  %2040 = torch.aten.add.Tensor %2039, %1822, %int1 : !torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[2,1280,12,12],f16>, !torch.int -> !torch.vtensor<[2,1280,12,12],f16>
  %2041 = torch.aten._to_copy %2040, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,12,12],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,12,12],f32>
  %2042 = torch.aten._reshape_alias %2041, %1674, %1675 : !torch.vtensor<[2,1280,12,12],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,40,144],f32>
  %result0_110, %result1_111 = torch.aten.var_mean.correction %2042, %85, %int0, %true : !torch.vtensor<[2,32,40,144],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %2043 = torch.aten.add.Tensor %result0_110, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %2044 = torch.aten.rsqrt %2043 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %2045 = torch.aten.sub.Tensor %2042, %result1_111, %int1 : !torch.vtensor<[2,32,40,144],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,144],f32>
  %2046 = torch.aten.mul.Tensor %2045, %2044 : !torch.vtensor<[2,32,40,144],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,144],f32>
  %2047 = torch.aten.view %2046, %1681 : !torch.vtensor<[2,32,40,144],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,12,12],f32>
  %2048 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
  %2049 = torch.aten.unsqueeze %2048, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
  %2050 = torch.aten.unsqueeze %2049, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
  %2051 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
  %2052 = torch.aten.unsqueeze %2051, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
  %2053 = torch.aten.unsqueeze %2052, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
  %2054 = torch.aten.mul.Tensor %2047, %2053 : !torch.vtensor<[2,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f32> -> !torch.vtensor<[2,1280,12,12],f32>
  %2055 = torch.aten.add.Tensor %2054, %2050, %int1 : !torch.vtensor<[2,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f32>, !torch.int -> !torch.vtensor<[2,1280,12,12],f32>
  %2056 = torch.aten._to_copy %2055, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,1280,12,12],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,12,12],f16>
  %2057 = torch.aten.silu %2056 : !torch.vtensor<[2,1280,12,12],f16> -> !torch.vtensor<[2,1280,12,12],f16>
  %2058 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %2059 = torch.aten._to_copy %20, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280,3,3],f16>
  %2060 = torch.aten._convolution %2057, %2059, %2058, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,12,12],f16>
  %2061 = torch.aten.silu %74 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
  %2062 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %2063 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %2064 = torch.aten.t %2063 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %2065 = torch.aten.addmm %2062, %2061, %2064, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
  %2066 = torch.aten.unsqueeze %2065, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
  %2067 = torch.aten.unsqueeze %2066, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
  %2068 = torch.aten.add.Tensor %2060, %2067, %int1 : !torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,12,12],f16>
  %2069 = torch.aten._to_copy %2068, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,12,12],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,12,12],f32>
  %2070 = torch.aten._reshape_alias %2069, %1674, %1675 : !torch.vtensor<[2,1280,12,12],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,40,144],f32>
  %result0_112, %result1_113 = torch.aten.var_mean.correction %2070, %85, %int0, %true : !torch.vtensor<[2,32,40,144],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %2071 = torch.aten.add.Tensor %result0_112, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %2072 = torch.aten.rsqrt %2071 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %2073 = torch.aten.sub.Tensor %2070, %result1_113, %int1 : !torch.vtensor<[2,32,40,144],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,144],f32>
  %2074 = torch.aten.mul.Tensor %2073, %2072 : !torch.vtensor<[2,32,40,144],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,144],f32>
  %2075 = torch.aten.view %2074, %1681 : !torch.vtensor<[2,32,40,144],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,12,12],f32>
  %2076 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
  %2077 = torch.aten.unsqueeze %2076, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
  %2078 = torch.aten.unsqueeze %2077, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
  %2079 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
  %2080 = torch.aten.unsqueeze %2079, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
  %2081 = torch.aten.unsqueeze %2080, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
  %2082 = torch.aten.mul.Tensor %2075, %2081 : !torch.vtensor<[2,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f32> -> !torch.vtensor<[2,1280,12,12],f32>
  %2083 = torch.aten.add.Tensor %2082, %2078, %int1 : !torch.vtensor<[2,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f32>, !torch.int -> !torch.vtensor<[2,1280,12,12],f32>
  %2084 = torch.aten._to_copy %2083, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,1280,12,12],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,12,12],f16>
  %2085 = torch.aten.silu %2084 : !torch.vtensor<[2,1280,12,12],f16> -> !torch.vtensor<[2,1280,12,12],f16>
  %2086 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %2087 = torch.aten._to_copy %20, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280,3,3],f16>
  %2088 = torch.aten._convolution %2085, %2087, %2086, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,12,12],f16>
  %2089 = torch.aten.add.Tensor %2040, %2088, %int1 : !torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[2,1280,12,12],f16>, !torch.int -> !torch.vtensor<[2,1280,12,12],f16>
  %2090 = torch.prim.ListConstruct %2089, %1773 : (!torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[2,1280,12,12],f16>) -> !torch.list<vtensor>
  %2091 = torch.aten.cat %2090, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,2560,12,12],f16>
  %2092 = torch.aten._to_copy %2091, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,2560,12,12],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2560,12,12],f32>
  %2093 = torch.prim.ListConstruct %int2, %int32, %int80, %int144 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %2094 = torch.prim.ListConstruct %int368640, %int11520, %int144, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %2095 = torch.aten._reshape_alias %2092, %2093, %2094 : !torch.vtensor<[2,2560,12,12],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,80,144],f32>
  %result0_114, %result1_115 = torch.aten.var_mean.correction %2095, %85, %int0, %true : !torch.vtensor<[2,32,80,144],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %2096 = torch.aten.add.Tensor %result0_114, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %2097 = torch.aten.rsqrt %2096 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %2098 = torch.aten.sub.Tensor %2095, %result1_115, %int1 : !torch.vtensor<[2,32,80,144],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,80,144],f32>
  %2099 = torch.aten.mul.Tensor %2098, %2097 : !torch.vtensor<[2,32,80,144],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,80,144],f32>
  %2100 = torch.prim.ListConstruct %int2, %int2560, %int12, %int12 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %2101 = torch.aten.view %2099, %2100 : !torch.vtensor<[2,32,80,144],f32>, !torch.list<int> -> !torch.vtensor<[2,2560,12,12],f32>
  %2102 = torch.aten.unsqueeze %44, %int0 : !torch.vtensor<[2560],f32>, !torch.int -> !torch.vtensor<[1,2560],f32>
  %2103 = torch.aten.unsqueeze %2102, %int2 : !torch.vtensor<[1,2560],f32>, !torch.int -> !torch.vtensor<[1,2560,1],f32>
  %2104 = torch.aten.unsqueeze %2103, %int3 : !torch.vtensor<[1,2560,1],f32>, !torch.int -> !torch.vtensor<[1,2560,1,1],f32>
  %2105 = torch.aten.unsqueeze %44, %int0 : !torch.vtensor<[2560],f32>, !torch.int -> !torch.vtensor<[1,2560],f32>
  %2106 = torch.aten.unsqueeze %2105, %int2 : !torch.vtensor<[1,2560],f32>, !torch.int -> !torch.vtensor<[1,2560,1],f32>
  %2107 = torch.aten.unsqueeze %2106, %int3 : !torch.vtensor<[1,2560,1],f32>, !torch.int -> !torch.vtensor<[1,2560,1,1],f32>
  %2108 = torch.aten.mul.Tensor %2101, %2107 : !torch.vtensor<[2,2560,12,12],f32>, !torch.vtensor<[1,2560,1,1],f32> -> !torch.vtensor<[2,2560,12,12],f32>
  %2109 = torch.aten.add.Tensor %2108, %2104, %int1 : !torch.vtensor<[2,2560,12,12],f32>, !torch.vtensor<[1,2560,1,1],f32>, !torch.int -> !torch.vtensor<[2,2560,12,12],f32>
  %2110 = torch.aten._to_copy %2109, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,2560,12,12],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2560,12,12],f16>
  %2111 = torch.aten.silu %2110 : !torch.vtensor<[2,2560,12,12],f16> -> !torch.vtensor<[2,2560,12,12],f16>
  %2112 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %2113 = torch.aten._to_copy %11, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,2560,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,2560,3,3],f16>
  %2114 = torch.aten._convolution %2111, %2113, %2112, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,2560,12,12],f16>, !torch.vtensor<[1280,2560,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,12,12],f16>
  %2115 = torch.aten.silu %74 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
  %2116 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %2117 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %2118 = torch.aten.t %2117 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %2119 = torch.aten.addmm %2116, %2115, %2118, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
  %2120 = torch.aten.unsqueeze %2119, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
  %2121 = torch.aten.unsqueeze %2120, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
  %2122 = torch.aten.add.Tensor %2114, %2121, %int1 : !torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,12,12],f16>
  %2123 = torch.aten._to_copy %2122, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,12,12],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,12,12],f32>
  %2124 = torch.aten._reshape_alias %2123, %1674, %1675 : !torch.vtensor<[2,1280,12,12],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,40,144],f32>
  %result0_116, %result1_117 = torch.aten.var_mean.correction %2124, %85, %int0, %true : !torch.vtensor<[2,32,40,144],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %2125 = torch.aten.add.Tensor %result0_116, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %2126 = torch.aten.rsqrt %2125 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %2127 = torch.aten.sub.Tensor %2124, %result1_117, %int1 : !torch.vtensor<[2,32,40,144],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,144],f32>
  %2128 = torch.aten.mul.Tensor %2127, %2126 : !torch.vtensor<[2,32,40,144],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,144],f32>
  %2129 = torch.aten.view %2128, %1681 : !torch.vtensor<[2,32,40,144],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,12,12],f32>
  %2130 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
  %2131 = torch.aten.unsqueeze %2130, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
  %2132 = torch.aten.unsqueeze %2131, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
  %2133 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
  %2134 = torch.aten.unsqueeze %2133, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
  %2135 = torch.aten.unsqueeze %2134, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
  %2136 = torch.aten.mul.Tensor %2129, %2135 : !torch.vtensor<[2,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f32> -> !torch.vtensor<[2,1280,12,12],f32>
  %2137 = torch.aten.add.Tensor %2136, %2132, %int1 : !torch.vtensor<[2,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f32>, !torch.int -> !torch.vtensor<[2,1280,12,12],f32>
  %2138 = torch.aten._to_copy %2137, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,1280,12,12],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,12,12],f16>
  %2139 = torch.aten.silu %2138 : !torch.vtensor<[2,1280,12,12],f16> -> !torch.vtensor<[2,1280,12,12],f16>
  %2140 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %2141 = torch.aten._to_copy %20, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280,3,3],f16>
  %2142 = torch.aten._convolution %2139, %2141, %2140, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,12,12],f16>
  %2143 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %2144 = torch.aten._to_copy %12, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,2560,1,1],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,2560,1,1],f16>
  %2145 = torch.aten._convolution %2091, %2144, %2143, %78, %79, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,2560,12,12],f16>, !torch.vtensor<[1280,2560,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,12,12],f16>
  %2146 = torch.aten.add.Tensor %2145, %2142, %int1 : !torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[2,1280,12,12],f16>, !torch.int -> !torch.vtensor<[2,1280,12,12],f16>
  %2147 = torch.prim.ListConstruct %2146, %1724 : (!torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[2,1280,12,12],f16>) -> !torch.list<vtensor>
  %2148 = torch.aten.cat %2147, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,2560,12,12],f16>
  %2149 = torch.aten._to_copy %2148, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,2560,12,12],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2560,12,12],f32>
  %2150 = torch.aten._reshape_alias %2149, %2093, %2094 : !torch.vtensor<[2,2560,12,12],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,80,144],f32>
  %result0_118, %result1_119 = torch.aten.var_mean.correction %2150, %85, %int0, %true : !torch.vtensor<[2,32,80,144],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %2151 = torch.aten.add.Tensor %result0_118, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %2152 = torch.aten.rsqrt %2151 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %2153 = torch.aten.sub.Tensor %2150, %result1_119, %int1 : !torch.vtensor<[2,32,80,144],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,80,144],f32>
  %2154 = torch.aten.mul.Tensor %2153, %2152 : !torch.vtensor<[2,32,80,144],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,80,144],f32>
  %2155 = torch.aten.view %2154, %2100 : !torch.vtensor<[2,32,80,144],f32>, !torch.list<int> -> !torch.vtensor<[2,2560,12,12],f32>
  %2156 = torch.aten.unsqueeze %44, %int0 : !torch.vtensor<[2560],f32>, !torch.int -> !torch.vtensor<[1,2560],f32>
  %2157 = torch.aten.unsqueeze %2156, %int2 : !torch.vtensor<[1,2560],f32>, !torch.int -> !torch.vtensor<[1,2560,1],f32>
  %2158 = torch.aten.unsqueeze %2157, %int3 : !torch.vtensor<[1,2560,1],f32>, !torch.int -> !torch.vtensor<[1,2560,1,1],f32>
  %2159 = torch.aten.unsqueeze %44, %int0 : !torch.vtensor<[2560],f32>, !torch.int -> !torch.vtensor<[1,2560],f32>
  %2160 = torch.aten.unsqueeze %2159, %int2 : !torch.vtensor<[1,2560],f32>, !torch.int -> !torch.vtensor<[1,2560,1],f32>
  %2161 = torch.aten.unsqueeze %2160, %int3 : !torch.vtensor<[1,2560,1],f32>, !torch.int -> !torch.vtensor<[1,2560,1,1],f32>
  %2162 = torch.aten.mul.Tensor %2155, %2161 : !torch.vtensor<[2,2560,12,12],f32>, !torch.vtensor<[1,2560,1,1],f32> -> !torch.vtensor<[2,2560,12,12],f32>
  %2163 = torch.aten.add.Tensor %2162, %2158, %int1 : !torch.vtensor<[2,2560,12,12],f32>, !torch.vtensor<[1,2560,1,1],f32>, !torch.int -> !torch.vtensor<[2,2560,12,12],f32>
  %2164 = torch.aten._to_copy %2163, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,2560,12,12],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2560,12,12],f16>
  %2165 = torch.aten.silu %2164 : !torch.vtensor<[2,2560,12,12],f16> -> !torch.vtensor<[2,2560,12,12],f16>
  %2166 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %2167 = torch.aten._to_copy %11, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,2560,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,2560,3,3],f16>
  %2168 = torch.aten._convolution %2165, %2167, %2166, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,2560,12,12],f16>, !torch.vtensor<[1280,2560,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,12,12],f16>
  %2169 = torch.aten.silu %74 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
  %2170 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %2171 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %2172 = torch.aten.t %2171 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %2173 = torch.aten.addmm %2170, %2169, %2172, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
  %2174 = torch.aten.unsqueeze %2173, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
  %2175 = torch.aten.unsqueeze %2174, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
  %2176 = torch.aten.add.Tensor %2168, %2175, %int1 : !torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,12,12],f16>
  %2177 = torch.aten._to_copy %2176, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,12,12],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,12,12],f32>
  %2178 = torch.aten._reshape_alias %2177, %1674, %1675 : !torch.vtensor<[2,1280,12,12],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,40,144],f32>
  %result0_120, %result1_121 = torch.aten.var_mean.correction %2178, %85, %int0, %true : !torch.vtensor<[2,32,40,144],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %2179 = torch.aten.add.Tensor %result0_120, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %2180 = torch.aten.rsqrt %2179 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %2181 = torch.aten.sub.Tensor %2178, %result1_121, %int1 : !torch.vtensor<[2,32,40,144],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,144],f32>
  %2182 = torch.aten.mul.Tensor %2181, %2180 : !torch.vtensor<[2,32,40,144],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,144],f32>
  %2183 = torch.aten.view %2182, %1681 : !torch.vtensor<[2,32,40,144],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,12,12],f32>
  %2184 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
  %2185 = torch.aten.unsqueeze %2184, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
  %2186 = torch.aten.unsqueeze %2185, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
  %2187 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
  %2188 = torch.aten.unsqueeze %2187, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
  %2189 = torch.aten.unsqueeze %2188, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
  %2190 = torch.aten.mul.Tensor %2183, %2189 : !torch.vtensor<[2,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f32> -> !torch.vtensor<[2,1280,12,12],f32>
  %2191 = torch.aten.add.Tensor %2190, %2186, %int1 : !torch.vtensor<[2,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f32>, !torch.int -> !torch.vtensor<[2,1280,12,12],f32>
  %2192 = torch.aten._to_copy %2191, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,1280,12,12],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,12,12],f16>
  %2193 = torch.aten.silu %2192 : !torch.vtensor<[2,1280,12,12],f16> -> !torch.vtensor<[2,1280,12,12],f16>
  %2194 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %2195 = torch.aten._to_copy %20, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280,3,3],f16>
  %2196 = torch.aten._convolution %2193, %2195, %2194, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,12,12],f16>
  %2197 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %2198 = torch.aten._to_copy %12, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,2560,1,1],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,2560,1,1],f16>
  %2199 = torch.aten._convolution %2148, %2198, %2197, %78, %79, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,2560,12,12],f16>, !torch.vtensor<[1280,2560,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,12,12],f16>
  %2200 = torch.aten.add.Tensor %2199, %2196, %int1 : !torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[2,1280,12,12],f16>, !torch.int -> !torch.vtensor<[2,1280,12,12],f16>
  %2201 = torch.prim.ListConstruct %2200, %1672 : (!torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[2,1280,12,12],f16>) -> !torch.list<vtensor>
  %2202 = torch.aten.cat %2201, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,2560,12,12],f16>
  %2203 = torch.aten._to_copy %2202, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,2560,12,12],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2560,12,12],f32>
  %2204 = torch.aten._reshape_alias %2203, %2093, %2094 : !torch.vtensor<[2,2560,12,12],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,80,144],f32>
  %result0_122, %result1_123 = torch.aten.var_mean.correction %2204, %85, %int0, %true : !torch.vtensor<[2,32,80,144],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %2205 = torch.aten.add.Tensor %result0_122, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %2206 = torch.aten.rsqrt %2205 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %2207 = torch.aten.sub.Tensor %2204, %result1_123, %int1 : !torch.vtensor<[2,32,80,144],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,80,144],f32>
  %2208 = torch.aten.mul.Tensor %2207, %2206 : !torch.vtensor<[2,32,80,144],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,80,144],f32>
  %2209 = torch.aten.view %2208, %2100 : !torch.vtensor<[2,32,80,144],f32>, !torch.list<int> -> !torch.vtensor<[2,2560,12,12],f32>
  %2210 = torch.aten.unsqueeze %44, %int0 : !torch.vtensor<[2560],f32>, !torch.int -> !torch.vtensor<[1,2560],f32>
  %2211 = torch.aten.unsqueeze %2210, %int2 : !torch.vtensor<[1,2560],f32>, !torch.int -> !torch.vtensor<[1,2560,1],f32>
  %2212 = torch.aten.unsqueeze %2211, %int3 : !torch.vtensor<[1,2560,1],f32>, !torch.int -> !torch.vtensor<[1,2560,1,1],f32>
  %2213 = torch.aten.unsqueeze %44, %int0 : !torch.vtensor<[2560],f32>, !torch.int -> !torch.vtensor<[1,2560],f32>
  %2214 = torch.aten.unsqueeze %2213, %int2 : !torch.vtensor<[1,2560],f32>, !torch.int -> !torch.vtensor<[1,2560,1],f32>
  %2215 = torch.aten.unsqueeze %2214, %int3 : !torch.vtensor<[1,2560,1],f32>, !torch.int -> !torch.vtensor<[1,2560,1,1],f32>
  %2216 = torch.aten.mul.Tensor %2209, %2215 : !torch.vtensor<[2,2560,12,12],f32>, !torch.vtensor<[1,2560,1,1],f32> -> !torch.vtensor<[2,2560,12,12],f32>
  %2217 = torch.aten.add.Tensor %2216, %2212, %int1 : !torch.vtensor<[2,2560,12,12],f32>, !torch.vtensor<[1,2560,1,1],f32>, !torch.int -> !torch.vtensor<[2,2560,12,12],f32>
  %2218 = torch.aten._to_copy %2217, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,2560,12,12],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2560,12,12],f16>
  %2219 = torch.aten.silu %2218 : !torch.vtensor<[2,2560,12,12],f16> -> !torch.vtensor<[2,2560,12,12],f16>
  %2220 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %2221 = torch.aten._to_copy %11, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,2560,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,2560,3,3],f16>
  %2222 = torch.aten._convolution %2219, %2221, %2220, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,2560,12,12],f16>, !torch.vtensor<[1280,2560,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,12,12],f16>
  %2223 = torch.aten.silu %74 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
  %2224 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %2225 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %2226 = torch.aten.t %2225 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %2227 = torch.aten.addmm %2224, %2223, %2226, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
  %2228 = torch.aten.unsqueeze %2227, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
  %2229 = torch.aten.unsqueeze %2228, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
  %2230 = torch.aten.add.Tensor %2222, %2229, %int1 : !torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,12,12],f16>
  %2231 = torch.aten._to_copy %2230, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,12,12],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,12,12],f32>
  %2232 = torch.aten._reshape_alias %2231, %1674, %1675 : !torch.vtensor<[2,1280,12,12],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,40,144],f32>
  %result0_124, %result1_125 = torch.aten.var_mean.correction %2232, %85, %int0, %true : !torch.vtensor<[2,32,40,144],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %2233 = torch.aten.add.Tensor %result0_124, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %2234 = torch.aten.rsqrt %2233 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %2235 = torch.aten.sub.Tensor %2232, %result1_125, %int1 : !torch.vtensor<[2,32,40,144],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,144],f32>
  %2236 = torch.aten.mul.Tensor %2235, %2234 : !torch.vtensor<[2,32,40,144],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,144],f32>
  %2237 = torch.aten.view %2236, %1681 : !torch.vtensor<[2,32,40,144],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,12,12],f32>
  %2238 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
  %2239 = torch.aten.unsqueeze %2238, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
  %2240 = torch.aten.unsqueeze %2239, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
  %2241 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
  %2242 = torch.aten.unsqueeze %2241, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
  %2243 = torch.aten.unsqueeze %2242, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
  %2244 = torch.aten.mul.Tensor %2237, %2243 : !torch.vtensor<[2,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f32> -> !torch.vtensor<[2,1280,12,12],f32>
  %2245 = torch.aten.add.Tensor %2244, %2240, %int1 : !torch.vtensor<[2,1280,12,12],f32>, !torch.vtensor<[1,1280,1,1],f32>, !torch.int -> !torch.vtensor<[2,1280,12,12],f32>
  %2246 = torch.aten._to_copy %2245, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,1280,12,12],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,12,12],f16>
  %2247 = torch.aten.silu %2246 : !torch.vtensor<[2,1280,12,12],f16> -> !torch.vtensor<[2,1280,12,12],f16>
  %2248 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %2249 = torch.aten._to_copy %20, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280,3,3],f16>
  %2250 = torch.aten._convolution %2247, %2249, %2248, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,12,12],f16>
  %2251 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %2252 = torch.aten._to_copy %12, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,2560,1,1],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,2560,1,1],f16>
  %2253 = torch.aten._convolution %2202, %2252, %2251, %78, %79, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,2560,12,12],f16>, !torch.vtensor<[1280,2560,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,12,12],f16>
  %2254 = torch.aten.add.Tensor %2253, %2250, %int1 : !torch.vtensor<[2,1280,12,12],f16>, !torch.vtensor<[2,1280,12,12],f16>, !torch.int -> !torch.vtensor<[2,1280,12,12],f16>
  %2255 = torch.prim.ListConstruct %int24, %int24 : (!torch.int, !torch.int) -> !torch.list<int>
  %2256 = torch.aten.upsample_nearest2d %2254, %2255, %float2.000000e00, %float2.000000e00 : !torch.vtensor<[2,1280,12,12],f16>, !torch.list<int>, !torch.float, !torch.float -> !torch.vtensor<[2,1280,24,24],f16>
  %2257 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %2258 = torch.aten._to_copy %20, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280,3,3],f16>
  %2259 = torch.aten._convolution %2256, %2258, %2257, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,24,24],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,24,24],f16>
  %2260 = torch.prim.ListConstruct %2259, %1669 : (!torch.vtensor<[2,1280,24,24],f16>, !torch.vtensor<[2,1280,24,24],f16>) -> !torch.list<vtensor>
  %2261 = torch.aten.cat %2260, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,2560,24,24],f16>
  %2262 = torch.aten._to_copy %2261, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,2560,24,24],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2560,24,24],f32>
  %2263 = torch.prim.ListConstruct %int2, %int32, %int80, %int576 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %2264 = torch.prim.ListConstruct %int1474560, %int46080, %int576, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %2265 = torch.aten._reshape_alias %2262, %2263, %2264 : !torch.vtensor<[2,2560,24,24],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,80,576],f32>
  %result0_126, %result1_127 = torch.aten.var_mean.correction %2265, %85, %int0, %true : !torch.vtensor<[2,32,80,576],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %2266 = torch.aten.add.Tensor %result0_126, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %2267 = torch.aten.rsqrt %2266 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %2268 = torch.aten.sub.Tensor %2265, %result1_127, %int1 : !torch.vtensor<[2,32,80,576],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,80,576],f32>
  %2269 = torch.aten.mul.Tensor %2268, %2267 : !torch.vtensor<[2,32,80,576],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,80,576],f32>
  %2270 = torch.prim.ListConstruct %int2, %int2560, %int24, %int24 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %2271 = torch.aten.view %2269, %2270 : !torch.vtensor<[2,32,80,576],f32>, !torch.list<int> -> !torch.vtensor<[2,2560,24,24],f32>
  %2272 = torch.aten.unsqueeze %44, %int0 : !torch.vtensor<[2560],f32>, !torch.int -> !torch.vtensor<[1,2560],f32>
  %2273 = torch.aten.unsqueeze %2272, %int2 : !torch.vtensor<[1,2560],f32>, !torch.int -> !torch.vtensor<[1,2560,1],f32>
  %2274 = torch.aten.unsqueeze %2273, %int3 : !torch.vtensor<[1,2560,1],f32>, !torch.int -> !torch.vtensor<[1,2560,1,1],f32>
  %2275 = torch.aten.unsqueeze %44, %int0 : !torch.vtensor<[2560],f32>, !torch.int -> !torch.vtensor<[1,2560],f32>
  %2276 = torch.aten.unsqueeze %2275, %int2 : !torch.vtensor<[1,2560],f32>, !torch.int -> !torch.vtensor<[1,2560,1],f32>
  %2277 = torch.aten.unsqueeze %2276, %int3 : !torch.vtensor<[1,2560,1],f32>, !torch.int -> !torch.vtensor<[1,2560,1,1],f32>
  %2278 = torch.aten.mul.Tensor %2271, %2277 : !torch.vtensor<[2,2560,24,24],f32>, !torch.vtensor<[1,2560,1,1],f32> -> !torch.vtensor<[2,2560,24,24],f32>
  %2279 = torch.aten.add.Tensor %2278, %2274, %int1 : !torch.vtensor<[2,2560,24,24],f32>, !torch.vtensor<[1,2560,1,1],f32>, !torch.int -> !torch.vtensor<[2,2560,24,24],f32>
  %2280 = torch.aten._to_copy %2279, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,2560,24,24],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2560,24,24],f16>
  %2281 = torch.aten.silu %2280 : !torch.vtensor<[2,2560,24,24],f16> -> !torch.vtensor<[2,2560,24,24],f16>
  %2282 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %2283 = torch.aten._to_copy %11, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,2560,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,2560,3,3],f16>
  %2284 = torch.aten._convolution %2281, %2283, %2282, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,2560,24,24],f16>, !torch.vtensor<[1280,2560,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,24,24],f16>
  %2285 = torch.aten.silu %74 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
  %2286 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %2287 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %2288 = torch.aten.t %2287 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %2289 = torch.aten.addmm %2286, %2285, %2288, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
  %2290 = torch.aten.unsqueeze %2289, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
  %2291 = torch.aten.unsqueeze %2290, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
  %2292 = torch.aten.add.Tensor %2284, %2291, %int1 : !torch.vtensor<[2,1280,24,24],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,24,24],f16>
  %2293 = torch.aten._to_copy %2292, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,24,24],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,24,24],f32>
  %2294 = torch.aten._reshape_alias %2293, %1176, %1177 : !torch.vtensor<[2,1280,24,24],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,40,576],f32>
  %result0_128, %result1_129 = torch.aten.var_mean.correction %2294, %85, %int0, %true : !torch.vtensor<[2,32,40,576],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %2295 = torch.aten.add.Tensor %result0_128, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %2296 = torch.aten.rsqrt %2295 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %2297 = torch.aten.sub.Tensor %2294, %result1_129, %int1 : !torch.vtensor<[2,32,40,576],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,576],f32>
  %2298 = torch.aten.mul.Tensor %2297, %2296 : !torch.vtensor<[2,32,40,576],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,576],f32>
  %2299 = torch.aten.view %2298, %1183 : !torch.vtensor<[2,32,40,576],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,24,24],f32>
  %2300 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
  %2301 = torch.aten.unsqueeze %2300, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
  %2302 = torch.aten.unsqueeze %2301, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
  %2303 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
  %2304 = torch.aten.unsqueeze %2303, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
  %2305 = torch.aten.unsqueeze %2304, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
  %2306 = torch.aten.mul.Tensor %2299, %2305 : !torch.vtensor<[2,1280,24,24],f32>, !torch.vtensor<[1,1280,1,1],f32> -> !torch.vtensor<[2,1280,24,24],f32>
  %2307 = torch.aten.add.Tensor %2306, %2302, %int1 : !torch.vtensor<[2,1280,24,24],f32>, !torch.vtensor<[1,1280,1,1],f32>, !torch.int -> !torch.vtensor<[2,1280,24,24],f32>
  %2308 = torch.aten._to_copy %2307, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,1280,24,24],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,24,24],f16>
  %2309 = torch.aten.silu %2308 : !torch.vtensor<[2,1280,24,24],f16> -> !torch.vtensor<[2,1280,24,24],f16>
  %2310 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %2311 = torch.aten._to_copy %20, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280,3,3],f16>
  %2312 = torch.aten._convolution %2309, %2311, %2310, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,24,24],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,24,24],f16>
  %2313 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %2314 = torch.aten._to_copy %12, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,2560,1,1],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,2560,1,1],f16>
  %2315 = torch.aten._convolution %2261, %2314, %2313, %78, %79, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,2560,24,24],f16>, !torch.vtensor<[1280,2560,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,24,24],f16>
  %2316 = torch.aten.add.Tensor %2315, %2312, %int1 : !torch.vtensor<[2,1280,24,24],f16>, !torch.vtensor<[2,1280,24,24],f16>, !torch.int -> !torch.vtensor<[2,1280,24,24],f16>
  %2317 = torch.aten._to_copy %2316, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,24,24],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,24,24],f32>
  %2318 = torch.aten._reshape_alias %2317, %1176, %1177 : !torch.vtensor<[2,1280,24,24],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,40,576],f32>
  %result0_130, %result1_131 = torch.aten.var_mean.correction %2318, %85, %int0, %true : !torch.vtensor<[2,32,40,576],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %2319 = torch.aten.add.Tensor %result0_130, %1, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %2320 = torch.aten.rsqrt %2319 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %2321 = torch.aten.sub.Tensor %2318, %result1_131, %int1 : !torch.vtensor<[2,32,40,576],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,576],f32>
  %2322 = torch.aten.mul.Tensor %2321, %2320 : !torch.vtensor<[2,32,40,576],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,576],f32>
  %2323 = torch.aten.view %2322, %1183 : !torch.vtensor<[2,32,40,576],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,24,24],f32>
  %2324 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
  %2325 = torch.aten.unsqueeze %2324, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
  %2326 = torch.aten.unsqueeze %2325, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
  %2327 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
  %2328 = torch.aten.unsqueeze %2327, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
  %2329 = torch.aten.unsqueeze %2328, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
  %2330 = torch.aten.mul.Tensor %2323, %2329 : !torch.vtensor<[2,1280,24,24],f32>, !torch.vtensor<[1,1280,1,1],f32> -> !torch.vtensor<[2,1280,24,24],f32>
  %2331 = torch.aten.add.Tensor %2330, %2326, %int1 : !torch.vtensor<[2,1280,24,24],f32>, !torch.vtensor<[1,1280,1,1],f32>, !torch.int -> !torch.vtensor<[2,1280,24,24],f32>
  %2332 = torch.aten._reshape_alias %2331, %1183, %1217 : !torch.vtensor<[2,1280,24,24],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1280,24,24],f32>
  %2333 = torch.aten.permute %2332, %151 : !torch.vtensor<[2,1280,24,24],f32>, !torch.list<int> -> !torch.vtensor<[2,24,24,1280],f32>
  %2334 = torch.aten._reshape_alias %2333, %1220, %1221 : !torch.vtensor<[2,24,24,1280],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f32>
  %2335 = torch.aten.clone %2334, %int0 : !torch.vtensor<[2,576,1280],f32>, !torch.int -> !torch.vtensor<[2,576,1280],f32>
  %2336 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %2337 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %2338 = torch.aten._to_copy %2335, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
  %2339 = torch.aten.t %2337 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %2340 = torch.aten.view %2338, %1228 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
  %2341 = torch.aten.addmm %2336, %2340, %2339, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,1280],f16>
  %2342 = torch.aten.view %2341, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
  %2343 = torch.aten._to_copy %2342, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f32>
  %result0_132, %result1_133, %result2_134 = torch.aten.native_layer_norm %2343, %1233, %24, %24, %float1.000000e-05 : !torch.vtensor<[2,576,1280],f32>, !torch.list<int>, !torch.vtensor<[1280],f32>, !torch.vtensor<[1280],f32>, !torch.float -> !torch.vtensor<[2,576,1280],f32>, !torch.vtensor<[2,576,1],f32>, !torch.vtensor<[2,576,1],f32>
  %2344 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %2345 = torch.aten._to_copy %result0_132, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
  %2346 = torch.aten.t %2344 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %2347 = torch.aten._reshape_alias %2345, %1228, %1237 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
  %2348 = torch.aten.mm %2347, %2346 : !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1152,1280],f16>
  %2349 = torch.aten._unsafe_view %2348, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
  %2350 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %2351 = torch.aten._to_copy %result0_132, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
  %2352 = torch.aten.t %2350 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %2353 = torch.aten._reshape_alias %2351, %1228, %1237 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
  %2354 = torch.aten.mm %2353, %2352 : !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1152,1280],f16>
  %2355 = torch.aten._unsafe_view %2354, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
  %2356 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %2357 = torch.aten._to_copy %result0_132, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
  %2358 = torch.aten.t %2356 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %2359 = torch.aten._reshape_alias %2357, %1228, %1237 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
  %2360 = torch.aten.mm %2359, %2358 : !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1152,1280],f16>
  %2361 = torch.aten._unsafe_view %2360, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
  %2362 = torch.aten._reshape_alias %2349, %1253, %1254 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
  %2363 = torch.aten.permute %2362, %189 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
  %2364 = torch.aten.clone %2363, %int0 : !torch.vtensor<[2,20,576,64],f16>, !torch.int -> !torch.vtensor<[2,20,576,64],f16>
  %2365 = torch.aten._unsafe_view %2364, %1258 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
  %2366 = torch.aten._reshape_alias %2355, %1253, %1254 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
  %2367 = torch.aten.permute %2366, %189 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
  %2368 = torch.aten.clone %2367, %int0 : !torch.vtensor<[2,20,576,64],f16>, !torch.int -> !torch.vtensor<[2,20,576,64],f16>
  %2369 = torch.aten._unsafe_view %2368, %1258 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
  %2370 = torch.aten._reshape_alias %2361, %1253, %1254 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
  %2371 = torch.aten.permute %2370, %189 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
  %2372 = torch.aten.clone %2371, %int0 : !torch.vtensor<[2,20,576,64],f16>, !torch.int -> !torch.vtensor<[2,20,576,64],f16>
  %2373 = torch.aten._unsafe_view %2372, %1258 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
  %2374 = torch.aten.unsqueeze %2365, %int3 : !torch.vtensor<[40,576,64],f16>, !torch.int -> !torch.vtensor<[40,576,64,1],f16>
  %2375 = torch.aten.permute %2374, %203 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,64],f16>
  %2376 = torch.aten.unsqueeze %2369, %int3 : !torch.vtensor<[40,576,64],f16>, !torch.int -> !torch.vtensor<[40,576,64,1],f16>
  %2377 = torch.aten.permute %2376, %206 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,1,576,64],f16>
  %2378 = torch.aten.permute %2375, %203 : !torch.vtensor<[40,576,1,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64,1],f16>
  %2379 = torch.aten._reshape_alias %2378, %1258, %1273 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
  %2380 = torch.aten.permute %2377, %211 : !torch.vtensor<[40,1,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,64,576,1],f16>
  %2381 = torch.aten._reshape_alias %2380, %1276, %1277 : !torch.vtensor<[40,64,576,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,64,576],f16>
  %2382 = torch.aten.bmm %2379, %2381 : !torch.vtensor<[40,576,64],f16>, !torch.vtensor<[40,64,576],f16> -> !torch.vtensor<[40,576,576],f16>
  %2383 = torch.aten.view %2382, %1280 : !torch.vtensor<[40,576,576],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,576],f16>
  %2384 = torch.aten.permute %2383, %203 : !torch.vtensor<[40,576,1,576],f16>, !torch.list<int> -> !torch.vtensor<[40,576,576,1],f16>
  %2385 = torch.aten.view %2384, %1283 : !torch.vtensor<[40,576,576,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,576],f16>
  %2386 = torch.aten.mul.Tensor %2385, %0 : !torch.vtensor<[40,576,576],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[40,576,576],f16>
  %2387 = torch.aten._softmax %2386, %int-1, %true : !torch.vtensor<[40,576,576],f16>, !torch.int, !torch.bool -> !torch.vtensor<[40,576,576],f32>
  %2388 = torch.aten._to_copy %2387, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[40,576,576],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[40,576,576],f16>
  %2389 = torch.aten.unsqueeze %2388, %int3 : !torch.vtensor<[40,576,576],f16>, !torch.int -> !torch.vtensor<[40,576,576,1],f16>
  %2390 = torch.aten.permute %2389, %203 : !torch.vtensor<[40,576,576,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,576],f16>
  %2391 = torch.aten.unsqueeze %2373, %int3 : !torch.vtensor<[40,576,64],f16>, !torch.int -> !torch.vtensor<[40,576,64,1],f16>
  %2392 = torch.aten.permute %2391, %211 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,1,64,576],f16>
  %2393 = torch.aten.permute %2390, %203 : !torch.vtensor<[40,576,1,576],f16>, !torch.list<int> -> !torch.vtensor<[40,576,576,1],f16>
  %2394 = torch.aten._reshape_alias %2393, %1283, %1293 : !torch.vtensor<[40,576,576,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,576,576],f16>
  %2395 = torch.aten.permute %2392, %211 : !torch.vtensor<[40,1,64,576],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64,1],f16>
  %2396 = torch.aten._reshape_alias %2395, %1258, %1273 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
  %2397 = torch.aten.bmm %2394, %2396 : !torch.vtensor<[40,576,576],f16>, !torch.vtensor<[40,576,64],f16> -> !torch.vtensor<[40,576,64],f16>
  %2398 = torch.aten.view %2397, %1298 : !torch.vtensor<[40,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,64],f16>
  %2399 = torch.aten.permute %2398, %203 : !torch.vtensor<[40,576,1,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64,1],f16>
  %2400 = torch.aten.view %2399, %1258 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
  %2401 = torch.aten._reshape_alias %2400, %1302, %1303 : !torch.vtensor<[40,576,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
  %2402 = torch.aten.permute %2401, %189 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
  %2403 = torch.aten.clone %2402, %int0 : !torch.vtensor<[2,576,20,64],f16>, !torch.int -> !torch.vtensor<[2,576,20,64],f16>
  %2404 = torch.aten._unsafe_view %2403, %1220 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
  %2405 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %2406 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %2407 = torch.aten.t %2406 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %2408 = torch.aten.view %2404, %1228 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
  %2409 = torch.aten.addmm %2405, %2408, %2407, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,1280],f16>
  %2410 = torch.aten.view %2409, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
  %2411 = torch.aten.add.Tensor %2410, %2342, %int1 : !torch.vtensor<[2,576,1280],f16>, !torch.vtensor<[2,576,1280],f16>, !torch.int -> !torch.vtensor<[2,576,1280],f16>
  %2412 = torch.aten._to_copy %2411, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f32>
  %result0_135, %result1_136, %result2_137 = torch.aten.native_layer_norm %2412, %1233, %24, %24, %float1.000000e-05 : !torch.vtensor<[2,576,1280],f32>, !torch.list<int>, !torch.vtensor<[1280],f32>, !torch.vtensor<[1280],f32>, !torch.float -> !torch.vtensor<[2,576,1280],f32>, !torch.vtensor<[2,576,1],f32>, !torch.vtensor<[2,576,1],f32>
  %2413 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %2414 = torch.aten._to_copy %result0_135, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
  %2415 = torch.aten.t %2413 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %2416 = torch.aten._reshape_alias %2414, %1228, %1237 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
  %2417 = torch.aten.mm %2416, %2415 : !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1152,1280],f16>
  %2418 = torch.aten._unsafe_view %2417, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
  %2419 = torch.aten._to_copy %15, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1024],f16>
  %2420 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
  %2421 = torch.aten.t %2419 : !torch.vtensor<[1280,1024],f16> -> !torch.vtensor<[1024,1280],f16>
  %2422 = torch.aten._reshape_alias %2420, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
  %2423 = torch.aten.mm %2422, %2421 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,1280],f16> -> !torch.vtensor<[154,1280],f16>
  %2424 = torch.aten._unsafe_view %2423, %1327 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
  %2425 = torch.aten._to_copy %15, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1024],f16>
  %2426 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
  %2427 = torch.aten.t %2425 : !torch.vtensor<[1280,1024],f16> -> !torch.vtensor<[1024,1280],f16>
  %2428 = torch.aten._reshape_alias %2426, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
  %2429 = torch.aten.mm %2428, %2427 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,1280],f16> -> !torch.vtensor<[154,1280],f16>
  %2430 = torch.aten._unsafe_view %2429, %1327 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
  %2431 = torch.aten._reshape_alias %2418, %1253, %1254 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
  %2432 = torch.aten.permute %2431, %189 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
  %2433 = torch.aten.clone %2432, %int0 : !torch.vtensor<[2,20,576,64],f16>, !torch.int -> !torch.vtensor<[2,20,576,64],f16>
  %2434 = torch.aten._unsafe_view %2433, %1258 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
  %2435 = torch.aten._reshape_alias %2424, %1339, %1340 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,20,64],f16>
  %2436 = torch.aten.permute %2435, %189 : !torch.vtensor<[2,77,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,77,64],f16>
  %2437 = torch.aten.clone %2436, %int0 : !torch.vtensor<[2,20,77,64],f16>, !torch.int -> !torch.vtensor<[2,20,77,64],f16>
  %2438 = torch.aten._unsafe_view %2437, %1344 : !torch.vtensor<[2,20,77,64],f16>, !torch.list<int> -> !torch.vtensor<[40,77,64],f16>
  %2439 = torch.aten._reshape_alias %2430, %1339, %1340 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,20,64],f16>
  %2440 = torch.aten.permute %2439, %189 : !torch.vtensor<[2,77,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,77,64],f16>
  %2441 = torch.aten.clone %2440, %int0 : !torch.vtensor<[2,20,77,64],f16>, !torch.int -> !torch.vtensor<[2,20,77,64],f16>
  %2442 = torch.aten._unsafe_view %2441, %1344 : !torch.vtensor<[2,20,77,64],f16>, !torch.list<int> -> !torch.vtensor<[40,77,64],f16>
  %2443 = torch.aten.unsqueeze %2434, %int3 : !torch.vtensor<[40,576,64],f16>, !torch.int -> !torch.vtensor<[40,576,64,1],f16>
  %2444 = torch.aten.permute %2443, %203 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,64],f16>
  %2445 = torch.aten.unsqueeze %2438, %int3 : !torch.vtensor<[40,77,64],f16>, !torch.int -> !torch.vtensor<[40,77,64,1],f16>
  %2446 = torch.aten.permute %2445, %206 : !torch.vtensor<[40,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,1,77,64],f16>
  %2447 = torch.aten.permute %2444, %203 : !torch.vtensor<[40,576,1,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64,1],f16>
  %2448 = torch.aten._reshape_alias %2447, %1258, %1273 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
  %2449 = torch.aten.permute %2446, %211 : !torch.vtensor<[40,1,77,64],f16>, !torch.list<int> -> !torch.vtensor<[40,64,77,1],f16>
  %2450 = torch.aten._reshape_alias %2449, %1357, %297 : !torch.vtensor<[40,64,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,64,77],f16>
  %2451 = torch.aten.bmm %2448, %2450 : !torch.vtensor<[40,576,64],f16>, !torch.vtensor<[40,64,77],f16> -> !torch.vtensor<[40,576,77],f16>
  %2452 = torch.aten.view %2451, %1360 : !torch.vtensor<[40,576,77],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,77],f16>
  %2453 = torch.aten.permute %2452, %203 : !torch.vtensor<[40,576,1,77],f16>, !torch.list<int> -> !torch.vtensor<[40,576,77,1],f16>
  %2454 = torch.aten.view %2453, %1363 : !torch.vtensor<[40,576,77,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,77],f16>
  %2455 = torch.aten.mul.Tensor %2454, %0 : !torch.vtensor<[40,576,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[40,576,77],f16>
  %2456 = torch.aten._softmax %2455, %int-1, %true : !torch.vtensor<[40,576,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[40,576,77],f32>
  %2457 = torch.aten._to_copy %2456, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[40,576,77],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[40,576,77],f16>
  %2458 = torch.aten.unsqueeze %2457, %int3 : !torch.vtensor<[40,576,77],f16>, !torch.int -> !torch.vtensor<[40,576,77,1],f16>
  %2459 = torch.aten.permute %2458, %203 : !torch.vtensor<[40,576,77,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,77],f16>
  %2460 = torch.aten.unsqueeze %2442, %int3 : !torch.vtensor<[40,77,64],f16>, !torch.int -> !torch.vtensor<[40,77,64,1],f16>
  %2461 = torch.aten.permute %2460, %211 : !torch.vtensor<[40,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,1,64,77],f16>
  %2462 = torch.aten.permute %2459, %203 : !torch.vtensor<[40,576,1,77],f16>, !torch.list<int> -> !torch.vtensor<[40,576,77,1],f16>
  %2463 = torch.aten._reshape_alias %2462, %1363, %1373 : !torch.vtensor<[40,576,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,576,77],f16>
  %2464 = torch.aten.permute %2461, %211 : !torch.vtensor<[40,1,64,77],f16>, !torch.list<int> -> !torch.vtensor<[40,77,64,1],f16>
  %2465 = torch.aten._reshape_alias %2464, %1344, %316 : !torch.vtensor<[40,77,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,77,64],f16>
  %2466 = torch.aten.bmm %2463, %2465 : !torch.vtensor<[40,576,77],f16>, !torch.vtensor<[40,77,64],f16> -> !torch.vtensor<[40,576,64],f16>
  %2467 = torch.aten.view %2466, %1298 : !torch.vtensor<[40,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,64],f16>
  %2468 = torch.aten.permute %2467, %203 : !torch.vtensor<[40,576,1,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64,1],f16>
  %2469 = torch.aten.view %2468, %1258 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
  %2470 = torch.aten._reshape_alias %2469, %1302, %1303 : !torch.vtensor<[40,576,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
  %2471 = torch.aten.permute %2470, %189 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
  %2472 = torch.aten.clone %2471, %int0 : !torch.vtensor<[2,576,20,64],f16>, !torch.int -> !torch.vtensor<[2,576,20,64],f16>
  %2473 = torch.aten._unsafe_view %2472, %1220 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
  %2474 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %2475 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %2476 = torch.aten.t %2475 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %2477 = torch.aten.view %2473, %1228 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
  %2478 = torch.aten.addmm %2474, %2477, %2476, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,1280],f16>
  %2479 = torch.aten.view %2478, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
  %2480 = torch.aten.add.Tensor %2479, %2411, %int1 : !torch.vtensor<[2,576,1280],f16>, !torch.vtensor<[2,576,1280],f16>, !torch.int -> !torch.vtensor<[2,576,1280],f16>
  %2481 = torch.aten._to_copy %2480, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f32>
  %result0_138, %result1_139, %result2_140 = torch.aten.native_layer_norm %2481, %1233, %24, %24, %float1.000000e-05 : !torch.vtensor<[2,576,1280],f32>, !torch.list<int>, !torch.vtensor<[1280],f32>, !torch.vtensor<[1280],f32>, !torch.float -> !torch.vtensor<[2,576,1280],f32>, !torch.vtensor<[2,576,1],f32>, !torch.vtensor<[2,576,1],f32>
  %2482 = torch.aten._to_copy %16, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[10240],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[10240],f16>
  %2483 = torch.aten._to_copy %17, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[10240,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[10240,1280],f16>
  %2484 = torch.aten._to_copy %result0_138, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
  %2485 = torch.aten.t %2483 : !torch.vtensor<[10240,1280],f16> -> !torch.vtensor<[1280,10240],f16>
  %2486 = torch.aten.view %2484, %1228 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
  %2487 = torch.aten.addmm %2482, %2486, %2485, %int1, %int1 : !torch.vtensor<[10240],f16>, !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,10240],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,10240],f16>
  %2488 = torch.aten.view %2487, %1399 : !torch.vtensor<[1152,10240],f16>, !torch.list<int> -> !torch.vtensor<[2,576,10240],f16>
  %2489 = torch.aten.slice.Tensor %2488, %int-1, %int0, %int5120, %int1 : !torch.vtensor<[2,576,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,576,5120],f16>
  %2490 = torch.aten.slice.Tensor %2488, %int-1, %int5120, %int10240, %int1 : !torch.vtensor<[2,576,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,576,5120],f16>
  %2491 = torch.aten.gelu %2490, %str : !torch.vtensor<[2,576,5120],f16>, !torch.str -> !torch.vtensor<[2,576,5120],f16>
  %2492 = torch.aten.mul.Tensor %2489, %2491 : !torch.vtensor<[2,576,5120],f16>, !torch.vtensor<[2,576,5120],f16> -> !torch.vtensor<[2,576,5120],f16>
  %2493 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %2494 = torch.aten._to_copy %18, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,5120],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,5120],f16>
  %2495 = torch.aten.t %2494 : !torch.vtensor<[1280,5120],f16> -> !torch.vtensor<[5120,1280],f16>
  %2496 = torch.aten.view %2492, %1408 : !torch.vtensor<[2,576,5120],f16>, !torch.list<int> -> !torch.vtensor<[1152,5120],f16>
  %2497 = torch.aten.addmm %2493, %2496, %2495, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[1152,5120],f16>, !torch.vtensor<[5120,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,1280],f16>
  %2498 = torch.aten.view %2497, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
  %2499 = torch.aten.add.Tensor %2498, %2480, %int1 : !torch.vtensor<[2,576,1280],f16>, !torch.vtensor<[2,576,1280],f16>, !torch.int -> !torch.vtensor<[2,576,1280],f16>
  %2500 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %2501 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %2502 = torch.aten.t %2501 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %2503 = torch.aten.view %2499, %1228 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
  %2504 = torch.aten.addmm %2500, %2503, %2502, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,1280],f16>
  %2505 = torch.aten.view %2504, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
  %2506 = torch.aten._reshape_alias %2505, %1419, %1420 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,24,24,1280],f16>
  %2507 = torch.aten.permute %2506, %206 : !torch.vtensor<[2,24,24,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,24,24],f16>
  %2508 = torch.aten._reshape_alias %2507, %1183, %1423 : !torch.vtensor<[2,1280,24,24],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1280,24,24],f16>
  %2509 = torch.aten.clone %2508, %int0 : !torch.vtensor<[2,1280,24,24],f16>, !torch.int -> !torch.vtensor<[2,1280,24,24],f16>
  %2510 = torch.aten.add.Tensor %2509, %2316, %int1 : !torch.vtensor<[2,1280,24,24],f16>, !torch.vtensor<[2,1280,24,24],f16>, !torch.int -> !torch.vtensor<[2,1280,24,24],f16>
  %2511 = torch.prim.ListConstruct %2510, %1426 : (!torch.vtensor<[2,1280,24,24],f16>, !torch.vtensor<[2,1280,24,24],f16>) -> !torch.list<vtensor>
  %2512 = torch.aten.cat %2511, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,2560,24,24],f16>
  %2513 = torch.aten._to_copy %2512, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,2560,24,24],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2560,24,24],f32>
  %2514 = torch.aten._reshape_alias %2513, %2263, %2264 : !torch.vtensor<[2,2560,24,24],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,80,576],f32>
  %result0_141, %result1_142 = torch.aten.var_mean.correction %2514, %85, %int0, %true : !torch.vtensor<[2,32,80,576],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %2515 = torch.aten.add.Tensor %result0_141, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %2516 = torch.aten.rsqrt %2515 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %2517 = torch.aten.sub.Tensor %2514, %result1_142, %int1 : !torch.vtensor<[2,32,80,576],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,80,576],f32>
  %2518 = torch.aten.mul.Tensor %2517, %2516 : !torch.vtensor<[2,32,80,576],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,80,576],f32>
  %2519 = torch.aten.view %2518, %2270 : !torch.vtensor<[2,32,80,576],f32>, !torch.list<int> -> !torch.vtensor<[2,2560,24,24],f32>
  %2520 = torch.aten.unsqueeze %44, %int0 : !torch.vtensor<[2560],f32>, !torch.int -> !torch.vtensor<[1,2560],f32>
  %2521 = torch.aten.unsqueeze %2520, %int2 : !torch.vtensor<[1,2560],f32>, !torch.int -> !torch.vtensor<[1,2560,1],f32>
  %2522 = torch.aten.unsqueeze %2521, %int3 : !torch.vtensor<[1,2560,1],f32>, !torch.int -> !torch.vtensor<[1,2560,1,1],f32>
  %2523 = torch.aten.unsqueeze %44, %int0 : !torch.vtensor<[2560],f32>, !torch.int -> !torch.vtensor<[1,2560],f32>
  %2524 = torch.aten.unsqueeze %2523, %int2 : !torch.vtensor<[1,2560],f32>, !torch.int -> !torch.vtensor<[1,2560,1],f32>
  %2525 = torch.aten.unsqueeze %2524, %int3 : !torch.vtensor<[1,2560,1],f32>, !torch.int -> !torch.vtensor<[1,2560,1,1],f32>
  %2526 = torch.aten.mul.Tensor %2519, %2525 : !torch.vtensor<[2,2560,24,24],f32>, !torch.vtensor<[1,2560,1,1],f32> -> !torch.vtensor<[2,2560,24,24],f32>
  %2527 = torch.aten.add.Tensor %2526, %2522, %int1 : !torch.vtensor<[2,2560,24,24],f32>, !torch.vtensor<[1,2560,1,1],f32>, !torch.int -> !torch.vtensor<[2,2560,24,24],f32>
  %2528 = torch.aten._to_copy %2527, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,2560,24,24],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2560,24,24],f16>
  %2529 = torch.aten.silu %2528 : !torch.vtensor<[2,2560,24,24],f16> -> !torch.vtensor<[2,2560,24,24],f16>
  %2530 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %2531 = torch.aten._to_copy %11, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,2560,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,2560,3,3],f16>
  %2532 = torch.aten._convolution %2529, %2531, %2530, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,2560,24,24],f16>, !torch.vtensor<[1280,2560,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,24,24],f16>
  %2533 = torch.aten.silu %74 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
  %2534 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %2535 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %2536 = torch.aten.t %2535 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %2537 = torch.aten.addmm %2534, %2533, %2536, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
  %2538 = torch.aten.unsqueeze %2537, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
  %2539 = torch.aten.unsqueeze %2538, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
  %2540 = torch.aten.add.Tensor %2532, %2539, %int1 : !torch.vtensor<[2,1280,24,24],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,24,24],f16>
  %2541 = torch.aten._to_copy %2540, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,24,24],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,24,24],f32>
  %2542 = torch.aten._reshape_alias %2541, %1176, %1177 : !torch.vtensor<[2,1280,24,24],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,40,576],f32>
  %result0_143, %result1_144 = torch.aten.var_mean.correction %2542, %85, %int0, %true : !torch.vtensor<[2,32,40,576],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %2543 = torch.aten.add.Tensor %result0_143, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %2544 = torch.aten.rsqrt %2543 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %2545 = torch.aten.sub.Tensor %2542, %result1_144, %int1 : !torch.vtensor<[2,32,40,576],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,576],f32>
  %2546 = torch.aten.mul.Tensor %2545, %2544 : !torch.vtensor<[2,32,40,576],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,576],f32>
  %2547 = torch.aten.view %2546, %1183 : !torch.vtensor<[2,32,40,576],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,24,24],f32>
  %2548 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
  %2549 = torch.aten.unsqueeze %2548, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
  %2550 = torch.aten.unsqueeze %2549, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
  %2551 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
  %2552 = torch.aten.unsqueeze %2551, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
  %2553 = torch.aten.unsqueeze %2552, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
  %2554 = torch.aten.mul.Tensor %2547, %2553 : !torch.vtensor<[2,1280,24,24],f32>, !torch.vtensor<[1,1280,1,1],f32> -> !torch.vtensor<[2,1280,24,24],f32>
  %2555 = torch.aten.add.Tensor %2554, %2550, %int1 : !torch.vtensor<[2,1280,24,24],f32>, !torch.vtensor<[1,1280,1,1],f32>, !torch.int -> !torch.vtensor<[2,1280,24,24],f32>
  %2556 = torch.aten._to_copy %2555, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,1280,24,24],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,24,24],f16>
  %2557 = torch.aten.silu %2556 : !torch.vtensor<[2,1280,24,24],f16> -> !torch.vtensor<[2,1280,24,24],f16>
  %2558 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %2559 = torch.aten._to_copy %20, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280,3,3],f16>
  %2560 = torch.aten._convolution %2557, %2559, %2558, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,24,24],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,24,24],f16>
  %2561 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %2562 = torch.aten._to_copy %12, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,2560,1,1],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,2560,1,1],f16>
  %2563 = torch.aten._convolution %2512, %2562, %2561, %78, %79, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,2560,24,24],f16>, !torch.vtensor<[1280,2560,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,24,24],f16>
  %2564 = torch.aten.add.Tensor %2563, %2560, %int1 : !torch.vtensor<[2,1280,24,24],f16>, !torch.vtensor<[2,1280,24,24],f16>, !torch.int -> !torch.vtensor<[2,1280,24,24],f16>
  %2565 = torch.aten._to_copy %2564, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,24,24],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,24,24],f32>
  %2566 = torch.aten._reshape_alias %2565, %1176, %1177 : !torch.vtensor<[2,1280,24,24],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,40,576],f32>
  %result0_145, %result1_146 = torch.aten.var_mean.correction %2566, %85, %int0, %true : !torch.vtensor<[2,32,40,576],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %2567 = torch.aten.add.Tensor %result0_145, %1, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %2568 = torch.aten.rsqrt %2567 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %2569 = torch.aten.sub.Tensor %2566, %result1_146, %int1 : !torch.vtensor<[2,32,40,576],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,576],f32>
  %2570 = torch.aten.mul.Tensor %2569, %2568 : !torch.vtensor<[2,32,40,576],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,576],f32>
  %2571 = torch.aten.view %2570, %1183 : !torch.vtensor<[2,32,40,576],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,24,24],f32>
  %2572 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
  %2573 = torch.aten.unsqueeze %2572, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
  %2574 = torch.aten.unsqueeze %2573, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
  %2575 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
  %2576 = torch.aten.unsqueeze %2575, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
  %2577 = torch.aten.unsqueeze %2576, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
  %2578 = torch.aten.mul.Tensor %2571, %2577 : !torch.vtensor<[2,1280,24,24],f32>, !torch.vtensor<[1,1280,1,1],f32> -> !torch.vtensor<[2,1280,24,24],f32>
  %2579 = torch.aten.add.Tensor %2578, %2574, %int1 : !torch.vtensor<[2,1280,24,24],f32>, !torch.vtensor<[1,1280,1,1],f32>, !torch.int -> !torch.vtensor<[2,1280,24,24],f32>
  %2580 = torch.aten._reshape_alias %2579, %1183, %1217 : !torch.vtensor<[2,1280,24,24],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1280,24,24],f32>
  %2581 = torch.aten.permute %2580, %151 : !torch.vtensor<[2,1280,24,24],f32>, !torch.list<int> -> !torch.vtensor<[2,24,24,1280],f32>
  %2582 = torch.aten._reshape_alias %2581, %1220, %1221 : !torch.vtensor<[2,24,24,1280],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f32>
  %2583 = torch.aten.clone %2582, %int0 : !torch.vtensor<[2,576,1280],f32>, !torch.int -> !torch.vtensor<[2,576,1280],f32>
  %2584 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %2585 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %2586 = torch.aten._to_copy %2583, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
  %2587 = torch.aten.t %2585 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %2588 = torch.aten.view %2586, %1228 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
  %2589 = torch.aten.addmm %2584, %2588, %2587, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,1280],f16>
  %2590 = torch.aten.view %2589, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
  %2591 = torch.aten._to_copy %2590, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f32>
  %result0_147, %result1_148, %result2_149 = torch.aten.native_layer_norm %2591, %1233, %24, %24, %float1.000000e-05 : !torch.vtensor<[2,576,1280],f32>, !torch.list<int>, !torch.vtensor<[1280],f32>, !torch.vtensor<[1280],f32>, !torch.float -> !torch.vtensor<[2,576,1280],f32>, !torch.vtensor<[2,576,1],f32>, !torch.vtensor<[2,576,1],f32>
  %2592 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %2593 = torch.aten._to_copy %result0_147, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
  %2594 = torch.aten.t %2592 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %2595 = torch.aten._reshape_alias %2593, %1228, %1237 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
  %2596 = torch.aten.mm %2595, %2594 : !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1152,1280],f16>
  %2597 = torch.aten._unsafe_view %2596, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
  %2598 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %2599 = torch.aten._to_copy %result0_147, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
  %2600 = torch.aten.t %2598 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %2601 = torch.aten._reshape_alias %2599, %1228, %1237 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
  %2602 = torch.aten.mm %2601, %2600 : !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1152,1280],f16>
  %2603 = torch.aten._unsafe_view %2602, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
  %2604 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %2605 = torch.aten._to_copy %result0_147, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
  %2606 = torch.aten.t %2604 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %2607 = torch.aten._reshape_alias %2605, %1228, %1237 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
  %2608 = torch.aten.mm %2607, %2606 : !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1152,1280],f16>
  %2609 = torch.aten._unsafe_view %2608, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
  %2610 = torch.aten._reshape_alias %2597, %1253, %1254 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
  %2611 = torch.aten.permute %2610, %189 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
  %2612 = torch.aten.clone %2611, %int0 : !torch.vtensor<[2,20,576,64],f16>, !torch.int -> !torch.vtensor<[2,20,576,64],f16>
  %2613 = torch.aten._unsafe_view %2612, %1258 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
  %2614 = torch.aten._reshape_alias %2603, %1253, %1254 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
  %2615 = torch.aten.permute %2614, %189 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
  %2616 = torch.aten.clone %2615, %int0 : !torch.vtensor<[2,20,576,64],f16>, !torch.int -> !torch.vtensor<[2,20,576,64],f16>
  %2617 = torch.aten._unsafe_view %2616, %1258 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
  %2618 = torch.aten._reshape_alias %2609, %1253, %1254 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
  %2619 = torch.aten.permute %2618, %189 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
  %2620 = torch.aten.clone %2619, %int0 : !torch.vtensor<[2,20,576,64],f16>, !torch.int -> !torch.vtensor<[2,20,576,64],f16>
  %2621 = torch.aten._unsafe_view %2620, %1258 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
  %2622 = torch.aten.unsqueeze %2613, %int3 : !torch.vtensor<[40,576,64],f16>, !torch.int -> !torch.vtensor<[40,576,64,1],f16>
  %2623 = torch.aten.permute %2622, %203 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,64],f16>
  %2624 = torch.aten.unsqueeze %2617, %int3 : !torch.vtensor<[40,576,64],f16>, !torch.int -> !torch.vtensor<[40,576,64,1],f16>
  %2625 = torch.aten.permute %2624, %206 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,1,576,64],f16>
  %2626 = torch.aten.permute %2623, %203 : !torch.vtensor<[40,576,1,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64,1],f16>
  %2627 = torch.aten._reshape_alias %2626, %1258, %1273 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
  %2628 = torch.aten.permute %2625, %211 : !torch.vtensor<[40,1,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,64,576,1],f16>
  %2629 = torch.aten._reshape_alias %2628, %1276, %1277 : !torch.vtensor<[40,64,576,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,64,576],f16>
  %2630 = torch.aten.bmm %2627, %2629 : !torch.vtensor<[40,576,64],f16>, !torch.vtensor<[40,64,576],f16> -> !torch.vtensor<[40,576,576],f16>
  %2631 = torch.aten.view %2630, %1280 : !torch.vtensor<[40,576,576],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,576],f16>
  %2632 = torch.aten.permute %2631, %203 : !torch.vtensor<[40,576,1,576],f16>, !torch.list<int> -> !torch.vtensor<[40,576,576,1],f16>
  %2633 = torch.aten.view %2632, %1283 : !torch.vtensor<[40,576,576,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,576],f16>
  %2634 = torch.aten.mul.Tensor %2633, %0 : !torch.vtensor<[40,576,576],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[40,576,576],f16>
  %2635 = torch.aten._softmax %2634, %int-1, %true : !torch.vtensor<[40,576,576],f16>, !torch.int, !torch.bool -> !torch.vtensor<[40,576,576],f32>
  %2636 = torch.aten._to_copy %2635, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[40,576,576],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[40,576,576],f16>
  %2637 = torch.aten.unsqueeze %2636, %int3 : !torch.vtensor<[40,576,576],f16>, !torch.int -> !torch.vtensor<[40,576,576,1],f16>
  %2638 = torch.aten.permute %2637, %203 : !torch.vtensor<[40,576,576,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,576],f16>
  %2639 = torch.aten.unsqueeze %2621, %int3 : !torch.vtensor<[40,576,64],f16>, !torch.int -> !torch.vtensor<[40,576,64,1],f16>
  %2640 = torch.aten.permute %2639, %211 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,1,64,576],f16>
  %2641 = torch.aten.permute %2638, %203 : !torch.vtensor<[40,576,1,576],f16>, !torch.list<int> -> !torch.vtensor<[40,576,576,1],f16>
  %2642 = torch.aten._reshape_alias %2641, %1283, %1293 : !torch.vtensor<[40,576,576,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,576,576],f16>
  %2643 = torch.aten.permute %2640, %211 : !torch.vtensor<[40,1,64,576],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64,1],f16>
  %2644 = torch.aten._reshape_alias %2643, %1258, %1273 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
  %2645 = torch.aten.bmm %2642, %2644 : !torch.vtensor<[40,576,576],f16>, !torch.vtensor<[40,576,64],f16> -> !torch.vtensor<[40,576,64],f16>
  %2646 = torch.aten.view %2645, %1298 : !torch.vtensor<[40,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,64],f16>
  %2647 = torch.aten.permute %2646, %203 : !torch.vtensor<[40,576,1,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64,1],f16>
  %2648 = torch.aten.view %2647, %1258 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
  %2649 = torch.aten._reshape_alias %2648, %1302, %1303 : !torch.vtensor<[40,576,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
  %2650 = torch.aten.permute %2649, %189 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
  %2651 = torch.aten.clone %2650, %int0 : !torch.vtensor<[2,576,20,64],f16>, !torch.int -> !torch.vtensor<[2,576,20,64],f16>
  %2652 = torch.aten._unsafe_view %2651, %1220 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
  %2653 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %2654 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %2655 = torch.aten.t %2654 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %2656 = torch.aten.view %2652, %1228 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
  %2657 = torch.aten.addmm %2653, %2656, %2655, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,1280],f16>
  %2658 = torch.aten.view %2657, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
  %2659 = torch.aten.add.Tensor %2658, %2590, %int1 : !torch.vtensor<[2,576,1280],f16>, !torch.vtensor<[2,576,1280],f16>, !torch.int -> !torch.vtensor<[2,576,1280],f16>
  %2660 = torch.aten._to_copy %2659, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f32>
  %result0_150, %result1_151, %result2_152 = torch.aten.native_layer_norm %2660, %1233, %24, %24, %float1.000000e-05 : !torch.vtensor<[2,576,1280],f32>, !torch.list<int>, !torch.vtensor<[1280],f32>, !torch.vtensor<[1280],f32>, !torch.float -> !torch.vtensor<[2,576,1280],f32>, !torch.vtensor<[2,576,1],f32>, !torch.vtensor<[2,576,1],f32>
  %2661 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %2662 = torch.aten._to_copy %result0_150, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
  %2663 = torch.aten.t %2661 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %2664 = torch.aten._reshape_alias %2662, %1228, %1237 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
  %2665 = torch.aten.mm %2664, %2663 : !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1152,1280],f16>
  %2666 = torch.aten._unsafe_view %2665, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
  %2667 = torch.aten._to_copy %15, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1024],f16>
  %2668 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
  %2669 = torch.aten.t %2667 : !torch.vtensor<[1280,1024],f16> -> !torch.vtensor<[1024,1280],f16>
  %2670 = torch.aten._reshape_alias %2668, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
  %2671 = torch.aten.mm %2670, %2669 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,1280],f16> -> !torch.vtensor<[154,1280],f16>
  %2672 = torch.aten._unsafe_view %2671, %1327 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
  %2673 = torch.aten._to_copy %15, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1024],f16>
  %2674 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
  %2675 = torch.aten.t %2673 : !torch.vtensor<[1280,1024],f16> -> !torch.vtensor<[1024,1280],f16>
  %2676 = torch.aten._reshape_alias %2674, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
  %2677 = torch.aten.mm %2676, %2675 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,1280],f16> -> !torch.vtensor<[154,1280],f16>
  %2678 = torch.aten._unsafe_view %2677, %1327 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
  %2679 = torch.aten._reshape_alias %2666, %1253, %1254 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
  %2680 = torch.aten.permute %2679, %189 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
  %2681 = torch.aten.clone %2680, %int0 : !torch.vtensor<[2,20,576,64],f16>, !torch.int -> !torch.vtensor<[2,20,576,64],f16>
  %2682 = torch.aten._unsafe_view %2681, %1258 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
  %2683 = torch.aten._reshape_alias %2672, %1339, %1340 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,20,64],f16>
  %2684 = torch.aten.permute %2683, %189 : !torch.vtensor<[2,77,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,77,64],f16>
  %2685 = torch.aten.clone %2684, %int0 : !torch.vtensor<[2,20,77,64],f16>, !torch.int -> !torch.vtensor<[2,20,77,64],f16>
  %2686 = torch.aten._unsafe_view %2685, %1344 : !torch.vtensor<[2,20,77,64],f16>, !torch.list<int> -> !torch.vtensor<[40,77,64],f16>
  %2687 = torch.aten._reshape_alias %2678, %1339, %1340 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,20,64],f16>
  %2688 = torch.aten.permute %2687, %189 : !torch.vtensor<[2,77,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,77,64],f16>
  %2689 = torch.aten.clone %2688, %int0 : !torch.vtensor<[2,20,77,64],f16>, !torch.int -> !torch.vtensor<[2,20,77,64],f16>
  %2690 = torch.aten._unsafe_view %2689, %1344 : !torch.vtensor<[2,20,77,64],f16>, !torch.list<int> -> !torch.vtensor<[40,77,64],f16>
  %2691 = torch.aten.unsqueeze %2682, %int3 : !torch.vtensor<[40,576,64],f16>, !torch.int -> !torch.vtensor<[40,576,64,1],f16>
  %2692 = torch.aten.permute %2691, %203 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,64],f16>
  %2693 = torch.aten.unsqueeze %2686, %int3 : !torch.vtensor<[40,77,64],f16>, !torch.int -> !torch.vtensor<[40,77,64,1],f16>
  %2694 = torch.aten.permute %2693, %206 : !torch.vtensor<[40,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,1,77,64],f16>
  %2695 = torch.aten.permute %2692, %203 : !torch.vtensor<[40,576,1,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64,1],f16>
  %2696 = torch.aten._reshape_alias %2695, %1258, %1273 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
  %2697 = torch.aten.permute %2694, %211 : !torch.vtensor<[40,1,77,64],f16>, !torch.list<int> -> !torch.vtensor<[40,64,77,1],f16>
  %2698 = torch.aten._reshape_alias %2697, %1357, %297 : !torch.vtensor<[40,64,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,64,77],f16>
  %2699 = torch.aten.bmm %2696, %2698 : !torch.vtensor<[40,576,64],f16>, !torch.vtensor<[40,64,77],f16> -> !torch.vtensor<[40,576,77],f16>
  %2700 = torch.aten.view %2699, %1360 : !torch.vtensor<[40,576,77],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,77],f16>
  %2701 = torch.aten.permute %2700, %203 : !torch.vtensor<[40,576,1,77],f16>, !torch.list<int> -> !torch.vtensor<[40,576,77,1],f16>
  %2702 = torch.aten.view %2701, %1363 : !torch.vtensor<[40,576,77,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,77],f16>
  %2703 = torch.aten.mul.Tensor %2702, %0 : !torch.vtensor<[40,576,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[40,576,77],f16>
  %2704 = torch.aten._softmax %2703, %int-1, %true : !torch.vtensor<[40,576,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[40,576,77],f32>
  %2705 = torch.aten._to_copy %2704, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[40,576,77],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[40,576,77],f16>
  %2706 = torch.aten.unsqueeze %2705, %int3 : !torch.vtensor<[40,576,77],f16>, !torch.int -> !torch.vtensor<[40,576,77,1],f16>
  %2707 = torch.aten.permute %2706, %203 : !torch.vtensor<[40,576,77,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,77],f16>
  %2708 = torch.aten.unsqueeze %2690, %int3 : !torch.vtensor<[40,77,64],f16>, !torch.int -> !torch.vtensor<[40,77,64,1],f16>
  %2709 = torch.aten.permute %2708, %211 : !torch.vtensor<[40,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,1,64,77],f16>
  %2710 = torch.aten.permute %2707, %203 : !torch.vtensor<[40,576,1,77],f16>, !torch.list<int> -> !torch.vtensor<[40,576,77,1],f16>
  %2711 = torch.aten._reshape_alias %2710, %1363, %1373 : !torch.vtensor<[40,576,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,576,77],f16>
  %2712 = torch.aten.permute %2709, %211 : !torch.vtensor<[40,1,64,77],f16>, !torch.list<int> -> !torch.vtensor<[40,77,64,1],f16>
  %2713 = torch.aten._reshape_alias %2712, %1344, %316 : !torch.vtensor<[40,77,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,77,64],f16>
  %2714 = torch.aten.bmm %2711, %2713 : !torch.vtensor<[40,576,77],f16>, !torch.vtensor<[40,77,64],f16> -> !torch.vtensor<[40,576,64],f16>
  %2715 = torch.aten.view %2714, %1298 : !torch.vtensor<[40,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,64],f16>
  %2716 = torch.aten.permute %2715, %203 : !torch.vtensor<[40,576,1,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64,1],f16>
  %2717 = torch.aten.view %2716, %1258 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
  %2718 = torch.aten._reshape_alias %2717, %1302, %1303 : !torch.vtensor<[40,576,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
  %2719 = torch.aten.permute %2718, %189 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
  %2720 = torch.aten.clone %2719, %int0 : !torch.vtensor<[2,576,20,64],f16>, !torch.int -> !torch.vtensor<[2,576,20,64],f16>
  %2721 = torch.aten._unsafe_view %2720, %1220 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
  %2722 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %2723 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %2724 = torch.aten.t %2723 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %2725 = torch.aten.view %2721, %1228 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
  %2726 = torch.aten.addmm %2722, %2725, %2724, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,1280],f16>
  %2727 = torch.aten.view %2726, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
  %2728 = torch.aten.add.Tensor %2727, %2659, %int1 : !torch.vtensor<[2,576,1280],f16>, !torch.vtensor<[2,576,1280],f16>, !torch.int -> !torch.vtensor<[2,576,1280],f16>
  %2729 = torch.aten._to_copy %2728, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f32>
  %result0_153, %result1_154, %result2_155 = torch.aten.native_layer_norm %2729, %1233, %24, %24, %float1.000000e-05 : !torch.vtensor<[2,576,1280],f32>, !torch.list<int>, !torch.vtensor<[1280],f32>, !torch.vtensor<[1280],f32>, !torch.float -> !torch.vtensor<[2,576,1280],f32>, !torch.vtensor<[2,576,1],f32>, !torch.vtensor<[2,576,1],f32>
  %2730 = torch.aten._to_copy %16, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[10240],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[10240],f16>
  %2731 = torch.aten._to_copy %17, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[10240,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[10240,1280],f16>
  %2732 = torch.aten._to_copy %result0_153, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
  %2733 = torch.aten.t %2731 : !torch.vtensor<[10240,1280],f16> -> !torch.vtensor<[1280,10240],f16>
  %2734 = torch.aten.view %2732, %1228 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
  %2735 = torch.aten.addmm %2730, %2734, %2733, %int1, %int1 : !torch.vtensor<[10240],f16>, !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,10240],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,10240],f16>
  %2736 = torch.aten.view %2735, %1399 : !torch.vtensor<[1152,10240],f16>, !torch.list<int> -> !torch.vtensor<[2,576,10240],f16>
  %2737 = torch.aten.slice.Tensor %2736, %int-1, %int0, %int5120, %int1 : !torch.vtensor<[2,576,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,576,5120],f16>
  %2738 = torch.aten.slice.Tensor %2736, %int-1, %int5120, %int10240, %int1 : !torch.vtensor<[2,576,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,576,5120],f16>
  %2739 = torch.aten.gelu %2738, %str : !torch.vtensor<[2,576,5120],f16>, !torch.str -> !torch.vtensor<[2,576,5120],f16>
  %2740 = torch.aten.mul.Tensor %2737, %2739 : !torch.vtensor<[2,576,5120],f16>, !torch.vtensor<[2,576,5120],f16> -> !torch.vtensor<[2,576,5120],f16>
  %2741 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %2742 = torch.aten._to_copy %18, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,5120],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,5120],f16>
  %2743 = torch.aten.t %2742 : !torch.vtensor<[1280,5120],f16> -> !torch.vtensor<[5120,1280],f16>
  %2744 = torch.aten.view %2740, %1408 : !torch.vtensor<[2,576,5120],f16>, !torch.list<int> -> !torch.vtensor<[1152,5120],f16>
  %2745 = torch.aten.addmm %2741, %2744, %2743, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[1152,5120],f16>, !torch.vtensor<[5120,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,1280],f16>
  %2746 = torch.aten.view %2745, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
  %2747 = torch.aten.add.Tensor %2746, %2728, %int1 : !torch.vtensor<[2,576,1280],f16>, !torch.vtensor<[2,576,1280],f16>, !torch.int -> !torch.vtensor<[2,576,1280],f16>
  %2748 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %2749 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %2750 = torch.aten.t %2749 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %2751 = torch.aten.view %2747, %1228 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
  %2752 = torch.aten.addmm %2748, %2751, %2750, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,1280],f16>
  %2753 = torch.aten.view %2752, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
  %2754 = torch.aten._reshape_alias %2753, %1419, %1420 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,24,24,1280],f16>
  %2755 = torch.aten.permute %2754, %206 : !torch.vtensor<[2,24,24,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,24,24],f16>
  %2756 = torch.aten._reshape_alias %2755, %1183, %1423 : !torch.vtensor<[2,1280,24,24],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1280,24,24],f16>
  %2757 = torch.aten.clone %2756, %int0 : !torch.vtensor<[2,1280,24,24],f16>, !torch.int -> !torch.vtensor<[2,1280,24,24],f16>
  %2758 = torch.aten.add.Tensor %2757, %2564, %int1 : !torch.vtensor<[2,1280,24,24],f16>, !torch.vtensor<[2,1280,24,24],f16>, !torch.int -> !torch.vtensor<[2,1280,24,24],f16>
  %2759 = torch.prim.ListConstruct %2758, %1143 : (!torch.vtensor<[2,1280,24,24],f16>, !torch.vtensor<[2,640,24,24],f16>) -> !torch.list<vtensor>
  %2760 = torch.aten.cat %2759, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,1920,24,24],f16>
  %2761 = torch.aten._to_copy %2760, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,1920,24,24],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1920,24,24],f32>
  %2762 = torch.prim.ListConstruct %int2, %int32, %int60, %int576 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %2763 = torch.prim.ListConstruct %int1105920, %int34560, %int576, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %2764 = torch.aten._reshape_alias %2761, %2762, %2763 : !torch.vtensor<[2,1920,24,24],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,60,576],f32>
  %result0_156, %result1_157 = torch.aten.var_mean.correction %2764, %85, %int0, %true : !torch.vtensor<[2,32,60,576],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %2765 = torch.aten.add.Tensor %result0_156, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %2766 = torch.aten.rsqrt %2765 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %2767 = torch.aten.sub.Tensor %2764, %result1_157, %int1 : !torch.vtensor<[2,32,60,576],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,60,576],f32>
  %2768 = torch.aten.mul.Tensor %2767, %2766 : !torch.vtensor<[2,32,60,576],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,60,576],f32>
  %2769 = torch.prim.ListConstruct %int2, %int1920, %int24, %int24 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %2770 = torch.aten.view %2768, %2769 : !torch.vtensor<[2,32,60,576],f32>, !torch.list<int> -> !torch.vtensor<[2,1920,24,24],f32>
  %2771 = torch.aten.unsqueeze %21, %int0 : !torch.vtensor<[1920],f32>, !torch.int -> !torch.vtensor<[1,1920],f32>
  %2772 = torch.aten.unsqueeze %2771, %int2 : !torch.vtensor<[1,1920],f32>, !torch.int -> !torch.vtensor<[1,1920,1],f32>
  %2773 = torch.aten.unsqueeze %2772, %int3 : !torch.vtensor<[1,1920,1],f32>, !torch.int -> !torch.vtensor<[1,1920,1,1],f32>
  %2774 = torch.aten.unsqueeze %21, %int0 : !torch.vtensor<[1920],f32>, !torch.int -> !torch.vtensor<[1,1920],f32>
  %2775 = torch.aten.unsqueeze %2774, %int2 : !torch.vtensor<[1,1920],f32>, !torch.int -> !torch.vtensor<[1,1920,1],f32>
  %2776 = torch.aten.unsqueeze %2775, %int3 : !torch.vtensor<[1,1920,1],f32>, !torch.int -> !torch.vtensor<[1,1920,1,1],f32>
  %2777 = torch.aten.mul.Tensor %2770, %2776 : !torch.vtensor<[2,1920,24,24],f32>, !torch.vtensor<[1,1920,1,1],f32> -> !torch.vtensor<[2,1920,24,24],f32>
  %2778 = torch.aten.add.Tensor %2777, %2773, %int1 : !torch.vtensor<[2,1920,24,24],f32>, !torch.vtensor<[1,1920,1,1],f32>, !torch.int -> !torch.vtensor<[2,1920,24,24],f32>
  %2779 = torch.aten._to_copy %2778, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,1920,24,24],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1920,24,24],f16>
  %2780 = torch.aten.silu %2779 : !torch.vtensor<[2,1920,24,24],f16> -> !torch.vtensor<[2,1920,24,24],f16>
  %2781 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %2782 = torch.aten._to_copy %13, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1920,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1920,3,3],f16>
  %2783 = torch.aten._convolution %2780, %2782, %2781, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1920,24,24],f16>, !torch.vtensor<[1280,1920,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,24,24],f16>
  %2784 = torch.aten.silu %74 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
  %2785 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %2786 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %2787 = torch.aten.t %2786 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %2788 = torch.aten.addmm %2785, %2784, %2787, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,1280],f16>
  %2789 = torch.aten.unsqueeze %2788, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16>
  %2790 = torch.aten.unsqueeze %2789, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16>
  %2791 = torch.aten.add.Tensor %2783, %2790, %int1 : !torch.vtensor<[2,1280,24,24],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,24,24],f16>
  %2792 = torch.aten._to_copy %2791, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,24,24],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,24,24],f32>
  %2793 = torch.aten._reshape_alias %2792, %1176, %1177 : !torch.vtensor<[2,1280,24,24],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,40,576],f32>
  %result0_158, %result1_159 = torch.aten.var_mean.correction %2793, %85, %int0, %true : !torch.vtensor<[2,32,40,576],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %2794 = torch.aten.add.Tensor %result0_158, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %2795 = torch.aten.rsqrt %2794 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %2796 = torch.aten.sub.Tensor %2793, %result1_159, %int1 : !torch.vtensor<[2,32,40,576],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,576],f32>
  %2797 = torch.aten.mul.Tensor %2796, %2795 : !torch.vtensor<[2,32,40,576],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,576],f32>
  %2798 = torch.aten.view %2797, %1183 : !torch.vtensor<[2,32,40,576],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,24,24],f32>
  %2799 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
  %2800 = torch.aten.unsqueeze %2799, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
  %2801 = torch.aten.unsqueeze %2800, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
  %2802 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
  %2803 = torch.aten.unsqueeze %2802, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
  %2804 = torch.aten.unsqueeze %2803, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
  %2805 = torch.aten.mul.Tensor %2798, %2804 : !torch.vtensor<[2,1280,24,24],f32>, !torch.vtensor<[1,1280,1,1],f32> -> !torch.vtensor<[2,1280,24,24],f32>
  %2806 = torch.aten.add.Tensor %2805, %2801, %int1 : !torch.vtensor<[2,1280,24,24],f32>, !torch.vtensor<[1,1280,1,1],f32>, !torch.int -> !torch.vtensor<[2,1280,24,24],f32>
  %2807 = torch.aten._to_copy %2806, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,1280,24,24],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,24,24],f16>
  %2808 = torch.aten.silu %2807 : !torch.vtensor<[2,1280,24,24],f16> -> !torch.vtensor<[2,1280,24,24],f16>
  %2809 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %2810 = torch.aten._to_copy %20, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280,3,3],f16>
  %2811 = torch.aten._convolution %2808, %2810, %2809, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,24,24],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,24,24],f16>
  %2812 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %2813 = torch.aten._to_copy %14, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1920,1,1],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1920,1,1],f16>
  %2814 = torch.aten._convolution %2760, %2813, %2812, %78, %79, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1920,24,24],f16>, !torch.vtensor<[1280,1920,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,24,24],f16>
  %2815 = torch.aten.add.Tensor %2814, %2811, %int1 : !torch.vtensor<[2,1280,24,24],f16>, !torch.vtensor<[2,1280,24,24],f16>, !torch.int -> !torch.vtensor<[2,1280,24,24],f16>
  %2816 = torch.aten._to_copy %2815, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,24,24],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,24,24],f32>
  %2817 = torch.aten._reshape_alias %2816, %1176, %1177 : !torch.vtensor<[2,1280,24,24],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,40,576],f32>
  %result0_160, %result1_161 = torch.aten.var_mean.correction %2817, %85, %int0, %true : !torch.vtensor<[2,32,40,576],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %2818 = torch.aten.add.Tensor %result0_160, %1, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %2819 = torch.aten.rsqrt %2818 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %2820 = torch.aten.sub.Tensor %2817, %result1_161, %int1 : !torch.vtensor<[2,32,40,576],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,576],f32>
  %2821 = torch.aten.mul.Tensor %2820, %2819 : !torch.vtensor<[2,32,40,576],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,576],f32>
  %2822 = torch.aten.view %2821, %1183 : !torch.vtensor<[2,32,40,576],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,24,24],f32>
  %2823 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
  %2824 = torch.aten.unsqueeze %2823, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
  %2825 = torch.aten.unsqueeze %2824, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
  %2826 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
  %2827 = torch.aten.unsqueeze %2826, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
  %2828 = torch.aten.unsqueeze %2827, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
  %2829 = torch.aten.mul.Tensor %2822, %2828 : !torch.vtensor<[2,1280,24,24],f32>, !torch.vtensor<[1,1280,1,1],f32> -> !torch.vtensor<[2,1280,24,24],f32>
  %2830 = torch.aten.add.Tensor %2829, %2825, %int1 : !torch.vtensor<[2,1280,24,24],f32>, !torch.vtensor<[1,1280,1,1],f32>, !torch.int -> !torch.vtensor<[2,1280,24,24],f32>
  %2831 = torch.aten._reshape_alias %2830, %1183, %1217 : !torch.vtensor<[2,1280,24,24],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1280,24,24],f32>
  %2832 = torch.aten.permute %2831, %151 : !torch.vtensor<[2,1280,24,24],f32>, !torch.list<int> -> !torch.vtensor<[2,24,24,1280],f32>
  %2833 = torch.aten._reshape_alias %2832, %1220, %1221 : !torch.vtensor<[2,24,24,1280],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f32>
  %2834 = torch.aten.clone %2833, %int0 : !torch.vtensor<[2,576,1280],f32>, !torch.int -> !torch.vtensor<[2,576,1280],f32>
  %2835 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %2836 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %2837 = torch.aten._to_copy %2834, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
  %2838 = torch.aten.t %2836 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %2839 = torch.aten.view %2837, %1228 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
  %2840 = torch.aten.addmm %2835, %2839, %2838, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,1280],f16>
  %2841 = torch.aten.view %2840, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
  %2842 = torch.aten._to_copy %2841, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f32>
  %result0_162, %result1_163, %result2_164 = torch.aten.native_layer_norm %2842, %1233, %24, %24, %float1.000000e-05 : !torch.vtensor<[2,576,1280],f32>, !torch.list<int>, !torch.vtensor<[1280],f32>, !torch.vtensor<[1280],f32>, !torch.float -> !torch.vtensor<[2,576,1280],f32>, !torch.vtensor<[2,576,1],f32>, !torch.vtensor<[2,576,1],f32>
  %2843 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %2844 = torch.aten._to_copy %result0_162, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
  %2845 = torch.aten.t %2843 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %2846 = torch.aten._reshape_alias %2844, %1228, %1237 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
  %2847 = torch.aten.mm %2846, %2845 : !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1152,1280],f16>
  %2848 = torch.aten._unsafe_view %2847, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
  %2849 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %2850 = torch.aten._to_copy %result0_162, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
  %2851 = torch.aten.t %2849 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %2852 = torch.aten._reshape_alias %2850, %1228, %1237 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
  %2853 = torch.aten.mm %2852, %2851 : !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1152,1280],f16>
  %2854 = torch.aten._unsafe_view %2853, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
  %2855 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %2856 = torch.aten._to_copy %result0_162, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
  %2857 = torch.aten.t %2855 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %2858 = torch.aten._reshape_alias %2856, %1228, %1237 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
  %2859 = torch.aten.mm %2858, %2857 : !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1152,1280],f16>
  %2860 = torch.aten._unsafe_view %2859, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
  %2861 = torch.aten._reshape_alias %2848, %1253, %1254 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
  %2862 = torch.aten.permute %2861, %189 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
  %2863 = torch.aten.clone %2862, %int0 : !torch.vtensor<[2,20,576,64],f16>, !torch.int -> !torch.vtensor<[2,20,576,64],f16>
  %2864 = torch.aten._unsafe_view %2863, %1258 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
  %2865 = torch.aten._reshape_alias %2854, %1253, %1254 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
  %2866 = torch.aten.permute %2865, %189 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
  %2867 = torch.aten.clone %2866, %int0 : !torch.vtensor<[2,20,576,64],f16>, !torch.int -> !torch.vtensor<[2,20,576,64],f16>
  %2868 = torch.aten._unsafe_view %2867, %1258 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
  %2869 = torch.aten._reshape_alias %2860, %1253, %1254 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
  %2870 = torch.aten.permute %2869, %189 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
  %2871 = torch.aten.clone %2870, %int0 : !torch.vtensor<[2,20,576,64],f16>, !torch.int -> !torch.vtensor<[2,20,576,64],f16>
  %2872 = torch.aten._unsafe_view %2871, %1258 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
  %2873 = torch.aten.unsqueeze %2864, %int3 : !torch.vtensor<[40,576,64],f16>, !torch.int -> !torch.vtensor<[40,576,64,1],f16>
  %2874 = torch.aten.permute %2873, %203 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,64],f16>
  %2875 = torch.aten.unsqueeze %2868, %int3 : !torch.vtensor<[40,576,64],f16>, !torch.int -> !torch.vtensor<[40,576,64,1],f16>
  %2876 = torch.aten.permute %2875, %206 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,1,576,64],f16>
  %2877 = torch.aten.permute %2874, %203 : !torch.vtensor<[40,576,1,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64,1],f16>
  %2878 = torch.aten._reshape_alias %2877, %1258, %1273 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
  %2879 = torch.aten.permute %2876, %211 : !torch.vtensor<[40,1,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,64,576,1],f16>
  %2880 = torch.aten._reshape_alias %2879, %1276, %1277 : !torch.vtensor<[40,64,576,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,64,576],f16>
  %2881 = torch.aten.bmm %2878, %2880 : !torch.vtensor<[40,576,64],f16>, !torch.vtensor<[40,64,576],f16> -> !torch.vtensor<[40,576,576],f16>
  %2882 = torch.aten.view %2881, %1280 : !torch.vtensor<[40,576,576],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,576],f16>
  %2883 = torch.aten.permute %2882, %203 : !torch.vtensor<[40,576,1,576],f16>, !torch.list<int> -> !torch.vtensor<[40,576,576,1],f16>
  %2884 = torch.aten.view %2883, %1283 : !torch.vtensor<[40,576,576,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,576],f16>
  %2885 = torch.aten.mul.Tensor %2884, %0 : !torch.vtensor<[40,576,576],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[40,576,576],f16>
  %2886 = torch.aten._softmax %2885, %int-1, %true : !torch.vtensor<[40,576,576],f16>, !torch.int, !torch.bool -> !torch.vtensor<[40,576,576],f32>
  %2887 = torch.aten._to_copy %2886, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[40,576,576],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[40,576,576],f16>
  %2888 = torch.aten.unsqueeze %2887, %int3 : !torch.vtensor<[40,576,576],f16>, !torch.int -> !torch.vtensor<[40,576,576,1],f16>
  %2889 = torch.aten.permute %2888, %203 : !torch.vtensor<[40,576,576,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,576],f16>
  %2890 = torch.aten.unsqueeze %2872, %int3 : !torch.vtensor<[40,576,64],f16>, !torch.int -> !torch.vtensor<[40,576,64,1],f16>
  %2891 = torch.aten.permute %2890, %211 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,1,64,576],f16>
  %2892 = torch.aten.permute %2889, %203 : !torch.vtensor<[40,576,1,576],f16>, !torch.list<int> -> !torch.vtensor<[40,576,576,1],f16>
  %2893 = torch.aten._reshape_alias %2892, %1283, %1293 : !torch.vtensor<[40,576,576,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,576,576],f16>
  %2894 = torch.aten.permute %2891, %211 : !torch.vtensor<[40,1,64,576],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64,1],f16>
  %2895 = torch.aten._reshape_alias %2894, %1258, %1273 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
  %2896 = torch.aten.bmm %2893, %2895 : !torch.vtensor<[40,576,576],f16>, !torch.vtensor<[40,576,64],f16> -> !torch.vtensor<[40,576,64],f16>
  %2897 = torch.aten.view %2896, %1298 : !torch.vtensor<[40,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,64],f16>
  %2898 = torch.aten.permute %2897, %203 : !torch.vtensor<[40,576,1,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64,1],f16>
  %2899 = torch.aten.view %2898, %1258 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
  %2900 = torch.aten._reshape_alias %2899, %1302, %1303 : !torch.vtensor<[40,576,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
  %2901 = torch.aten.permute %2900, %189 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
  %2902 = torch.aten.clone %2901, %int0 : !torch.vtensor<[2,576,20,64],f16>, !torch.int -> !torch.vtensor<[2,576,20,64],f16>
  %2903 = torch.aten._unsafe_view %2902, %1220 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
  %2904 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %2905 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %2906 = torch.aten.t %2905 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %2907 = torch.aten.view %2903, %1228 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
  %2908 = torch.aten.addmm %2904, %2907, %2906, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,1280],f16>
  %2909 = torch.aten.view %2908, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
  %2910 = torch.aten.add.Tensor %2909, %2841, %int1 : !torch.vtensor<[2,576,1280],f16>, !torch.vtensor<[2,576,1280],f16>, !torch.int -> !torch.vtensor<[2,576,1280],f16>
  %2911 = torch.aten._to_copy %2910, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f32>
  %result0_165, %result1_166, %result2_167 = torch.aten.native_layer_norm %2911, %1233, %24, %24, %float1.000000e-05 : !torch.vtensor<[2,576,1280],f32>, !torch.list<int>, !torch.vtensor<[1280],f32>, !torch.vtensor<[1280],f32>, !torch.float -> !torch.vtensor<[2,576,1280],f32>, !torch.vtensor<[2,576,1],f32>, !torch.vtensor<[2,576,1],f32>
  %2912 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %2913 = torch.aten._to_copy %result0_165, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
  %2914 = torch.aten.t %2912 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %2915 = torch.aten._reshape_alias %2913, %1228, %1237 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
  %2916 = torch.aten.mm %2915, %2914 : !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1152,1280],f16>
  %2917 = torch.aten._unsafe_view %2916, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
  %2918 = torch.aten._to_copy %15, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1024],f16>
  %2919 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
  %2920 = torch.aten.t %2918 : !torch.vtensor<[1280,1024],f16> -> !torch.vtensor<[1024,1280],f16>
  %2921 = torch.aten._reshape_alias %2919, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
  %2922 = torch.aten.mm %2921, %2920 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,1280],f16> -> !torch.vtensor<[154,1280],f16>
  %2923 = torch.aten._unsafe_view %2922, %1327 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
  %2924 = torch.aten._to_copy %15, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1024],f16>
  %2925 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
  %2926 = torch.aten.t %2924 : !torch.vtensor<[1280,1024],f16> -> !torch.vtensor<[1024,1280],f16>
  %2927 = torch.aten._reshape_alias %2925, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
  %2928 = torch.aten.mm %2927, %2926 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,1280],f16> -> !torch.vtensor<[154,1280],f16>
  %2929 = torch.aten._unsafe_view %2928, %1327 : !torch.vtensor<[154,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,77,1280],f16>
  %2930 = torch.aten._reshape_alias %2917, %1253, %1254 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
  %2931 = torch.aten.permute %2930, %189 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
  %2932 = torch.aten.clone %2931, %int0 : !torch.vtensor<[2,20,576,64],f16>, !torch.int -> !torch.vtensor<[2,20,576,64],f16>
  %2933 = torch.aten._unsafe_view %2932, %1258 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
  %2934 = torch.aten._reshape_alias %2923, %1339, %1340 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,20,64],f16>
  %2935 = torch.aten.permute %2934, %189 : !torch.vtensor<[2,77,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,77,64],f16>
  %2936 = torch.aten.clone %2935, %int0 : !torch.vtensor<[2,20,77,64],f16>, !torch.int -> !torch.vtensor<[2,20,77,64],f16>
  %2937 = torch.aten._unsafe_view %2936, %1344 : !torch.vtensor<[2,20,77,64],f16>, !torch.list<int> -> !torch.vtensor<[40,77,64],f16>
  %2938 = torch.aten._reshape_alias %2929, %1339, %1340 : !torch.vtensor<[2,77,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,20,64],f16>
  %2939 = torch.aten.permute %2938, %189 : !torch.vtensor<[2,77,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,77,64],f16>
  %2940 = torch.aten.clone %2939, %int0 : !torch.vtensor<[2,20,77,64],f16>, !torch.int -> !torch.vtensor<[2,20,77,64],f16>
  %2941 = torch.aten._unsafe_view %2940, %1344 : !torch.vtensor<[2,20,77,64],f16>, !torch.list<int> -> !torch.vtensor<[40,77,64],f16>
  %2942 = torch.aten.unsqueeze %2933, %int3 : !torch.vtensor<[40,576,64],f16>, !torch.int -> !torch.vtensor<[40,576,64,1],f16>
  %2943 = torch.aten.permute %2942, %203 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,64],f16>
  %2944 = torch.aten.unsqueeze %2937, %int3 : !torch.vtensor<[40,77,64],f16>, !torch.int -> !torch.vtensor<[40,77,64,1],f16>
  %2945 = torch.aten.permute %2944, %206 : !torch.vtensor<[40,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,1,77,64],f16>
  %2946 = torch.aten.permute %2943, %203 : !torch.vtensor<[40,576,1,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64,1],f16>
  %2947 = torch.aten._reshape_alias %2946, %1258, %1273 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
  %2948 = torch.aten.permute %2945, %211 : !torch.vtensor<[40,1,77,64],f16>, !torch.list<int> -> !torch.vtensor<[40,64,77,1],f16>
  %2949 = torch.aten._reshape_alias %2948, %1357, %297 : !torch.vtensor<[40,64,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,64,77],f16>
  %2950 = torch.aten.bmm %2947, %2949 : !torch.vtensor<[40,576,64],f16>, !torch.vtensor<[40,64,77],f16> -> !torch.vtensor<[40,576,77],f16>
  %2951 = torch.aten.view %2950, %1360 : !torch.vtensor<[40,576,77],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,77],f16>
  %2952 = torch.aten.permute %2951, %203 : !torch.vtensor<[40,576,1,77],f16>, !torch.list<int> -> !torch.vtensor<[40,576,77,1],f16>
  %2953 = torch.aten.view %2952, %1363 : !torch.vtensor<[40,576,77,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,77],f16>
  %2954 = torch.aten.mul.Tensor %2953, %0 : !torch.vtensor<[40,576,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[40,576,77],f16>
  %2955 = torch.aten._softmax %2954, %int-1, %true : !torch.vtensor<[40,576,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[40,576,77],f32>
  %2956 = torch.aten._to_copy %2955, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[40,576,77],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[40,576,77],f16>
  %2957 = torch.aten.unsqueeze %2956, %int3 : !torch.vtensor<[40,576,77],f16>, !torch.int -> !torch.vtensor<[40,576,77,1],f16>
  %2958 = torch.aten.permute %2957, %203 : !torch.vtensor<[40,576,77,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,77],f16>
  %2959 = torch.aten.unsqueeze %2941, %int3 : !torch.vtensor<[40,77,64],f16>, !torch.int -> !torch.vtensor<[40,77,64,1],f16>
  %2960 = torch.aten.permute %2959, %211 : !torch.vtensor<[40,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,1,64,77],f16>
  %2961 = torch.aten.permute %2958, %203 : !torch.vtensor<[40,576,1,77],f16>, !torch.list<int> -> !torch.vtensor<[40,576,77,1],f16>
  %2962 = torch.aten._reshape_alias %2961, %1363, %1373 : !torch.vtensor<[40,576,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,576,77],f16>
  %2963 = torch.aten.permute %2960, %211 : !torch.vtensor<[40,1,64,77],f16>, !torch.list<int> -> !torch.vtensor<[40,77,64,1],f16>
  %2964 = torch.aten._reshape_alias %2963, %1344, %316 : !torch.vtensor<[40,77,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[40,77,64],f16>
  %2965 = torch.aten.bmm %2962, %2964 : !torch.vtensor<[40,576,77],f16>, !torch.vtensor<[40,77,64],f16> -> !torch.vtensor<[40,576,64],f16>
  %2966 = torch.aten.view %2965, %1298 : !torch.vtensor<[40,576,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,1,64],f16>
  %2967 = torch.aten.permute %2966, %203 : !torch.vtensor<[40,576,1,64],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64,1],f16>
  %2968 = torch.aten.view %2967, %1258 : !torch.vtensor<[40,576,64,1],f16>, !torch.list<int> -> !torch.vtensor<[40,576,64],f16>
  %2969 = torch.aten._reshape_alias %2968, %1302, %1303 : !torch.vtensor<[40,576,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,20,576,64],f16>
  %2970 = torch.aten.permute %2969, %189 : !torch.vtensor<[2,20,576,64],f16>, !torch.list<int> -> !torch.vtensor<[2,576,20,64],f16>
  %2971 = torch.aten.clone %2970, %int0 : !torch.vtensor<[2,576,20,64],f16>, !torch.int -> !torch.vtensor<[2,576,20,64],f16>
  %2972 = torch.aten._unsafe_view %2971, %1220 : !torch.vtensor<[2,576,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
  %2973 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %2974 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %2975 = torch.aten.t %2974 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %2976 = torch.aten.view %2972, %1228 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
  %2977 = torch.aten.addmm %2973, %2976, %2975, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,1280],f16>
  %2978 = torch.aten.view %2977, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
  %2979 = torch.aten.add.Tensor %2978, %2910, %int1 : !torch.vtensor<[2,576,1280],f16>, !torch.vtensor<[2,576,1280],f16>, !torch.int -> !torch.vtensor<[2,576,1280],f16>
  %2980 = torch.aten._to_copy %2979, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f32>
  %result0_168, %result1_169, %result2_170 = torch.aten.native_layer_norm %2980, %1233, %24, %24, %float1.000000e-05 : !torch.vtensor<[2,576,1280],f32>, !torch.list<int>, !torch.vtensor<[1280],f32>, !torch.vtensor<[1280],f32>, !torch.float -> !torch.vtensor<[2,576,1280],f32>, !torch.vtensor<[2,576,1],f32>, !torch.vtensor<[2,576,1],f32>
  %2981 = torch.aten._to_copy %16, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[10240],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[10240],f16>
  %2982 = torch.aten._to_copy %17, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[10240,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[10240,1280],f16>
  %2983 = torch.aten._to_copy %result0_168, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,576,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,576,1280],f16>
  %2984 = torch.aten.t %2982 : !torch.vtensor<[10240,1280],f16> -> !torch.vtensor<[1280,10240],f16>
  %2985 = torch.aten.view %2983, %1228 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
  %2986 = torch.aten.addmm %2981, %2985, %2984, %int1, %int1 : !torch.vtensor<[10240],f16>, !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,10240],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,10240],f16>
  %2987 = torch.aten.view %2986, %1399 : !torch.vtensor<[1152,10240],f16>, !torch.list<int> -> !torch.vtensor<[2,576,10240],f16>
  %2988 = torch.aten.slice.Tensor %2987, %int-1, %int0, %int5120, %int1 : !torch.vtensor<[2,576,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,576,5120],f16>
  %2989 = torch.aten.slice.Tensor %2987, %int-1, %int5120, %int10240, %int1 : !torch.vtensor<[2,576,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,576,5120],f16>
  %2990 = torch.aten.gelu %2989, %str : !torch.vtensor<[2,576,5120],f16>, !torch.str -> !torch.vtensor<[2,576,5120],f16>
  %2991 = torch.aten.mul.Tensor %2988, %2990 : !torch.vtensor<[2,576,5120],f16>, !torch.vtensor<[2,576,5120],f16> -> !torch.vtensor<[2,576,5120],f16>
  %2992 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %2993 = torch.aten._to_copy %18, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,5120],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,5120],f16>
  %2994 = torch.aten.t %2993 : !torch.vtensor<[1280,5120],f16> -> !torch.vtensor<[5120,1280],f16>
  %2995 = torch.aten.view %2991, %1408 : !torch.vtensor<[2,576,5120],f16>, !torch.list<int> -> !torch.vtensor<[1152,5120],f16>
  %2996 = torch.aten.addmm %2992, %2995, %2994, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[1152,5120],f16>, !torch.vtensor<[5120,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,1280],f16>
  %2997 = torch.aten.view %2996, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
  %2998 = torch.aten.add.Tensor %2997, %2979, %int1 : !torch.vtensor<[2,576,1280],f16>, !torch.vtensor<[2,576,1280],f16>, !torch.int -> !torch.vtensor<[2,576,1280],f16>
  %2999 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %3000 = torch.aten._to_copy %19, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280],f16>
  %3001 = torch.aten.t %3000 : !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[1280,1280],f16>
  %3002 = torch.aten.view %2998, %1228 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int> -> !torch.vtensor<[1152,1280],f16>
  %3003 = torch.aten.addmm %2999, %3002, %3001, %int1, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[1152,1280],f16>, !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1152,1280],f16>
  %3004 = torch.aten.view %3003, %1220 : !torch.vtensor<[1152,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,576,1280],f16>
  %3005 = torch.aten._reshape_alias %3004, %1419, %1420 : !torch.vtensor<[2,576,1280],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,24,24,1280],f16>
  %3006 = torch.aten.permute %3005, %206 : !torch.vtensor<[2,24,24,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,24,24],f16>
  %3007 = torch.aten._reshape_alias %3006, %1183, %1423 : !torch.vtensor<[2,1280,24,24],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,1280,24,24],f16>
  %3008 = torch.aten.clone %3007, %int0 : !torch.vtensor<[2,1280,24,24],f16>, !torch.int -> !torch.vtensor<[2,1280,24,24],f16>
  %3009 = torch.aten.add.Tensor %3008, %2815, %int1 : !torch.vtensor<[2,1280,24,24],f16>, !torch.vtensor<[2,1280,24,24],f16>, !torch.int -> !torch.vtensor<[2,1280,24,24],f16>
  %3010 = torch.prim.ListConstruct %int48, %int48 : (!torch.int, !torch.int) -> !torch.list<int>
  %3011 = torch.aten.upsample_nearest2d %3009, %3010, %float2.000000e00, %float2.000000e00 : !torch.vtensor<[2,1280,24,24],f16>, !torch.list<int>, !torch.float, !torch.float -> !torch.vtensor<[2,1280,48,48],f16>
  %3012 = torch.aten._to_copy %24, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280],f16>
  %3013 = torch.aten._to_copy %20, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[1280,1280,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1280,1280,3,3],f16>
  %3014 = torch.aten._convolution %3011, %3013, %3012, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,48,48],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,1280,48,48],f16>
  %3015 = torch.prim.ListConstruct %3014, %1140 : (!torch.vtensor<[2,1280,48,48],f16>, !torch.vtensor<[2,640,48,48],f16>) -> !torch.list<vtensor>
  %3016 = torch.aten.cat %3015, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,1920,48,48],f16>
  %3017 = torch.aten._to_copy %3016, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,1920,48,48],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1920,48,48],f32>
  %3018 = torch.prim.ListConstruct %int2, %int32, %int60, %int2304 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %3019 = torch.prim.ListConstruct %int4423680, %int138240, %int2304, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %3020 = torch.aten._reshape_alias %3017, %3018, %3019 : !torch.vtensor<[2,1920,48,48],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,60,2304],f32>
  %result0_171, %result1_172 = torch.aten.var_mean.correction %3020, %85, %int0, %true : !torch.vtensor<[2,32,60,2304],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %3021 = torch.aten.add.Tensor %result0_171, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %3022 = torch.aten.rsqrt %3021 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %3023 = torch.aten.sub.Tensor %3020, %result1_172, %int1 : !torch.vtensor<[2,32,60,2304],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,60,2304],f32>
  %3024 = torch.aten.mul.Tensor %3023, %3022 : !torch.vtensor<[2,32,60,2304],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,60,2304],f32>
  %3025 = torch.prim.ListConstruct %int2, %int1920, %int48, %int48 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %3026 = torch.aten.view %3024, %3025 : !torch.vtensor<[2,32,60,2304],f32>, !torch.list<int> -> !torch.vtensor<[2,1920,48,48],f32>
  %3027 = torch.aten.unsqueeze %21, %int0 : !torch.vtensor<[1920],f32>, !torch.int -> !torch.vtensor<[1,1920],f32>
  %3028 = torch.aten.unsqueeze %3027, %int2 : !torch.vtensor<[1,1920],f32>, !torch.int -> !torch.vtensor<[1,1920,1],f32>
  %3029 = torch.aten.unsqueeze %3028, %int3 : !torch.vtensor<[1,1920,1],f32>, !torch.int -> !torch.vtensor<[1,1920,1,1],f32>
  %3030 = torch.aten.unsqueeze %21, %int0 : !torch.vtensor<[1920],f32>, !torch.int -> !torch.vtensor<[1,1920],f32>
  %3031 = torch.aten.unsqueeze %3030, %int2 : !torch.vtensor<[1,1920],f32>, !torch.int -> !torch.vtensor<[1,1920,1],f32>
  %3032 = torch.aten.unsqueeze %3031, %int3 : !torch.vtensor<[1,1920,1],f32>, !torch.int -> !torch.vtensor<[1,1920,1,1],f32>
  %3033 = torch.aten.mul.Tensor %3026, %3032 : !torch.vtensor<[2,1920,48,48],f32>, !torch.vtensor<[1,1920,1,1],f32> -> !torch.vtensor<[2,1920,48,48],f32>
  %3034 = torch.aten.add.Tensor %3033, %3029, %int1 : !torch.vtensor<[2,1920,48,48],f32>, !torch.vtensor<[1,1920,1,1],f32>, !torch.int -> !torch.vtensor<[2,1920,48,48],f32>
  %3035 = torch.aten._to_copy %3034, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,1920,48,48],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1920,48,48],f16>
  %3036 = torch.aten.silu %3035 : !torch.vtensor<[2,1920,48,48],f16> -> !torch.vtensor<[2,1920,48,48],f16>
  %3037 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
  %3038 = torch.aten._to_copy %22, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,1920,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,1920,3,3],f16>
  %3039 = torch.aten._convolution %3036, %3038, %3037, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1920,48,48],f16>, !torch.vtensor<[640,1920,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,48,48],f16>
  %3040 = torch.aten.silu %74 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
  %3041 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
  %3042 = torch.aten._to_copy %28, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,1280],f16>
  %3043 = torch.aten.t %3042 : !torch.vtensor<[640,1280],f16> -> !torch.vtensor<[1280,640],f16>
  %3044 = torch.aten.addmm %3041, %3040, %3043, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
  %3045 = torch.aten.unsqueeze %3044, %int2 : !torch.vtensor<[2,640],f16>, !torch.int -> !torch.vtensor<[2,640,1],f16>
  %3046 = torch.aten.unsqueeze %3045, %int3 : !torch.vtensor<[2,640,1],f16>, !torch.int -> !torch.vtensor<[2,640,1,1],f16>
  %3047 = torch.aten.add.Tensor %3039, %3046, %int1 : !torch.vtensor<[2,640,48,48],f16>, !torch.vtensor<[2,640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,48,48],f16>
  %3048 = torch.aten._to_copy %3047, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,640,48,48],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,48,48],f32>
  %3049 = torch.aten._reshape_alias %3048, %647, %648 : !torch.vtensor<[2,640,48,48],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,20,2304],f32>
  %result0_173, %result1_174 = torch.aten.var_mean.correction %3049, %85, %int0, %true : !torch.vtensor<[2,32,20,2304],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %3050 = torch.aten.add.Tensor %result0_173, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %3051 = torch.aten.rsqrt %3050 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %3052 = torch.aten.sub.Tensor %3049, %result1_174, %int1 : !torch.vtensor<[2,32,20,2304],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,2304],f32>
  %3053 = torch.aten.mul.Tensor %3052, %3051 : !torch.vtensor<[2,32,20,2304],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,2304],f32>
  %3054 = torch.aten.view %3053, %654 : !torch.vtensor<[2,32,20,2304],f32>, !torch.list<int> -> !torch.vtensor<[2,640,48,48],f32>
  %3055 = torch.aten.unsqueeze %39, %int0 : !torch.vtensor<[640],f32>, !torch.int -> !torch.vtensor<[1,640],f32>
  %3056 = torch.aten.unsqueeze %3055, %int2 : !torch.vtensor<[1,640],f32>, !torch.int -> !torch.vtensor<[1,640,1],f32>
  %3057 = torch.aten.unsqueeze %3056, %int3 : !torch.vtensor<[1,640,1],f32>, !torch.int -> !torch.vtensor<[1,640,1,1],f32>
  %3058 = torch.aten.unsqueeze %39, %int0 : !torch.vtensor<[640],f32>, !torch.int -> !torch.vtensor<[1,640],f32>
  %3059 = torch.aten.unsqueeze %3058, %int2 : !torch.vtensor<[1,640],f32>, !torch.int -> !torch.vtensor<[1,640,1],f32>
  %3060 = torch.aten.unsqueeze %3059, %int3 : !torch.vtensor<[1,640,1],f32>, !torch.int -> !torch.vtensor<[1,640,1,1],f32>
  %3061 = torch.aten.mul.Tensor %3054, %3060 : !torch.vtensor<[2,640,48,48],f32>, !torch.vtensor<[1,640,1,1],f32> -> !torch.vtensor<[2,640,48,48],f32>
  %3062 = torch.aten.add.Tensor %3061, %3057, %int1 : !torch.vtensor<[2,640,48,48],f32>, !torch.vtensor<[1,640,1,1],f32>, !torch.int -> !torch.vtensor<[2,640,48,48],f32>
  %3063 = torch.aten._to_copy %3062, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,640,48,48],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,48,48],f16>
  %3064 = torch.aten.silu %3063 : !torch.vtensor<[2,640,48,48],f16> -> !torch.vtensor<[2,640,48,48],f16>
  %3065 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
  %3066 = torch.aten._to_copy %35, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640,3,3],f16>
  %3067 = torch.aten._convolution %3064, %3066, %3065, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,48,48],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,48,48],f16>
  %3068 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
  %3069 = torch.aten._to_copy %23, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,1920,1,1],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,1920,1,1],f16>
  %3070 = torch.aten._convolution %3016, %3069, %3068, %78, %79, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1920,48,48],f16>, !torch.vtensor<[640,1920,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,48,48],f16>
  %3071 = torch.aten.add.Tensor %3070, %3067, %int1 : !torch.vtensor<[2,640,48,48],f16>, !torch.vtensor<[2,640,48,48],f16>, !torch.int -> !torch.vtensor<[2,640,48,48],f16>
  %3072 = torch.aten._to_copy %3071, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,640,48,48],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,48,48],f32>
  %3073 = torch.aten._reshape_alias %3072, %647, %648 : !torch.vtensor<[2,640,48,48],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,20,2304],f32>
  %result0_175, %result1_176 = torch.aten.var_mean.correction %3073, %85, %int0, %true : !torch.vtensor<[2,32,20,2304],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %3074 = torch.aten.add.Tensor %result0_175, %1, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %3075 = torch.aten.rsqrt %3074 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %3076 = torch.aten.sub.Tensor %3073, %result1_176, %int1 : !torch.vtensor<[2,32,20,2304],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,2304],f32>
  %3077 = torch.aten.mul.Tensor %3076, %3075 : !torch.vtensor<[2,32,20,2304],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,2304],f32>
  %3078 = torch.aten.view %3077, %654 : !torch.vtensor<[2,32,20,2304],f32>, !torch.list<int> -> !torch.vtensor<[2,640,48,48],f32>
  %3079 = torch.aten.unsqueeze %39, %int0 : !torch.vtensor<[640],f32>, !torch.int -> !torch.vtensor<[1,640],f32>
  %3080 = torch.aten.unsqueeze %3079, %int2 : !torch.vtensor<[1,640],f32>, !torch.int -> !torch.vtensor<[1,640,1],f32>
  %3081 = torch.aten.unsqueeze %3080, %int3 : !torch.vtensor<[1,640,1],f32>, !torch.int -> !torch.vtensor<[1,640,1,1],f32>
  %3082 = torch.aten.unsqueeze %39, %int0 : !torch.vtensor<[640],f32>, !torch.int -> !torch.vtensor<[1,640],f32>
  %3083 = torch.aten.unsqueeze %3082, %int2 : !torch.vtensor<[1,640],f32>, !torch.int -> !torch.vtensor<[1,640,1],f32>
  %3084 = torch.aten.unsqueeze %3083, %int3 : !torch.vtensor<[1,640,1],f32>, !torch.int -> !torch.vtensor<[1,640,1,1],f32>
  %3085 = torch.aten.mul.Tensor %3078, %3084 : !torch.vtensor<[2,640,48,48],f32>, !torch.vtensor<[1,640,1,1],f32> -> !torch.vtensor<[2,640,48,48],f32>
  %3086 = torch.aten.add.Tensor %3085, %3081, %int1 : !torch.vtensor<[2,640,48,48],f32>, !torch.vtensor<[1,640,1,1],f32>, !torch.int -> !torch.vtensor<[2,640,48,48],f32>
  %3087 = torch.aten._reshape_alias %3086, %654, %688 : !torch.vtensor<[2,640,48,48],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,640,48,48],f32>
  %3088 = torch.aten.permute %3087, %151 : !torch.vtensor<[2,640,48,48],f32>, !torch.list<int> -> !torch.vtensor<[2,48,48,640],f32>
  %3089 = torch.aten._reshape_alias %3088, %691, %692 : !torch.vtensor<[2,48,48,640],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f32>
  %3090 = torch.aten.clone %3089, %int0 : !torch.vtensor<[2,2304,640],f32>, !torch.int -> !torch.vtensor<[2,2304,640],f32>
  %3091 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
  %3092 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
  %3093 = torch.aten._to_copy %3090, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
  %3094 = torch.aten.t %3092 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %3095 = torch.aten.view %3093, %699 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
  %3096 = torch.aten.addmm %3091, %3095, %3094, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,640],f16>
  %3097 = torch.aten.view %3096, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
  %3098 = torch.aten._to_copy %3097, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f32>
  %result0_177, %result1_178, %result2_179 = torch.aten.native_layer_norm %3098, %704, %39, %39, %float1.000000e-05 : !torch.vtensor<[2,2304,640],f32>, !torch.list<int>, !torch.vtensor<[640],f32>, !torch.vtensor<[640],f32>, !torch.float -> !torch.vtensor<[2,2304,640],f32>, !torch.vtensor<[2,2304,1],f32>, !torch.vtensor<[2,2304,1],f32>
  %3099 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
  %3100 = torch.aten._to_copy %result0_177, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
  %3101 = torch.aten.t %3099 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %3102 = torch.aten._reshape_alias %3100, %699, %708 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
  %3103 = torch.aten.mm %3102, %3101 : !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[4608,640],f16>
  %3104 = torch.aten._unsafe_view %3103, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
  %3105 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
  %3106 = torch.aten._to_copy %result0_177, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
  %3107 = torch.aten.t %3105 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %3108 = torch.aten._reshape_alias %3106, %699, %708 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
  %3109 = torch.aten.mm %3108, %3107 : !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[4608,640],f16>
  %3110 = torch.aten._unsafe_view %3109, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
  %3111 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
  %3112 = torch.aten._to_copy %result0_177, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
  %3113 = torch.aten.t %3111 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %3114 = torch.aten._reshape_alias %3112, %699, %708 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
  %3115 = torch.aten.mm %3114, %3113 : !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[4608,640],f16>
  %3116 = torch.aten._unsafe_view %3115, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
  %3117 = torch.aten._reshape_alias %3104, %724, %725 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
  %3118 = torch.aten.permute %3117, %189 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
  %3119 = torch.aten.clone %3118, %int0 : !torch.vtensor<[2,10,2304,64],f16>, !torch.int -> !torch.vtensor<[2,10,2304,64],f16>
  %3120 = torch.aten._unsafe_view %3119, %729 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
  %3121 = torch.aten._reshape_alias %3110, %724, %725 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
  %3122 = torch.aten.permute %3121, %189 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
  %3123 = torch.aten.clone %3122, %int0 : !torch.vtensor<[2,10,2304,64],f16>, !torch.int -> !torch.vtensor<[2,10,2304,64],f16>
  %3124 = torch.aten._unsafe_view %3123, %729 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
  %3125 = torch.aten._reshape_alias %3116, %724, %725 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
  %3126 = torch.aten.permute %3125, %189 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
  %3127 = torch.aten.clone %3126, %int0 : !torch.vtensor<[2,10,2304,64],f16>, !torch.int -> !torch.vtensor<[2,10,2304,64],f16>
  %3128 = torch.aten._unsafe_view %3127, %729 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
  %3129 = torch.aten.unsqueeze %3120, %int3 : !torch.vtensor<[20,2304,64],f16>, !torch.int -> !torch.vtensor<[20,2304,64,1],f16>
  %3130 = torch.aten.permute %3129, %203 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,64],f16>
  %3131 = torch.aten.unsqueeze %3124, %int3 : !torch.vtensor<[20,2304,64],f16>, !torch.int -> !torch.vtensor<[20,2304,64,1],f16>
  %3132 = torch.aten.permute %3131, %206 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,1,2304,64],f16>
  %3133 = torch.aten.permute %3130, %203 : !torch.vtensor<[20,2304,1,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64,1],f16>
  %3134 = torch.aten._reshape_alias %3133, %729, %744 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
  %3135 = torch.aten.permute %3132, %211 : !torch.vtensor<[20,1,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,64,2304,1],f16>
  %3136 = torch.aten._reshape_alias %3135, %747, %748 : !torch.vtensor<[20,64,2304,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,64,2304],f16>
  %3137 = torch.aten.bmm %3134, %3136 : !torch.vtensor<[20,2304,64],f16>, !torch.vtensor<[20,64,2304],f16> -> !torch.vtensor<[20,2304,2304],f16>
  %3138 = torch.aten.view %3137, %751 : !torch.vtensor<[20,2304,2304],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,2304],f16>
  %3139 = torch.aten.permute %3138, %203 : !torch.vtensor<[20,2304,1,2304],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,2304,1],f16>
  %3140 = torch.aten.view %3139, %754 : !torch.vtensor<[20,2304,2304,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,2304],f16>
  %3141 = torch.aten.mul.Tensor %3140, %0 : !torch.vtensor<[20,2304,2304],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[20,2304,2304],f16>
  %3142 = torch.aten._softmax %3141, %int-1, %true : !torch.vtensor<[20,2304,2304],f16>, !torch.int, !torch.bool -> !torch.vtensor<[20,2304,2304],f32>
  %3143 = torch.aten._to_copy %3142, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[20,2304,2304],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[20,2304,2304],f16>
  %3144 = torch.aten.unsqueeze %3143, %int3 : !torch.vtensor<[20,2304,2304],f16>, !torch.int -> !torch.vtensor<[20,2304,2304,1],f16>
  %3145 = torch.aten.permute %3144, %203 : !torch.vtensor<[20,2304,2304,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,2304],f16>
  %3146 = torch.aten.unsqueeze %3128, %int3 : !torch.vtensor<[20,2304,64],f16>, !torch.int -> !torch.vtensor<[20,2304,64,1],f16>
  %3147 = torch.aten.permute %3146, %211 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,1,64,2304],f16>
  %3148 = torch.aten.permute %3145, %203 : !torch.vtensor<[20,2304,1,2304],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,2304,1],f16>
  %3149 = torch.aten._reshape_alias %3148, %754, %764 : !torch.vtensor<[20,2304,2304,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,2304,2304],f16>
  %3150 = torch.aten.permute %3147, %211 : !torch.vtensor<[20,1,64,2304],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64,1],f16>
  %3151 = torch.aten._reshape_alias %3150, %729, %744 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
  %3152 = torch.aten.bmm %3149, %3151 : !torch.vtensor<[20,2304,2304],f16>, !torch.vtensor<[20,2304,64],f16> -> !torch.vtensor<[20,2304,64],f16>
  %3153 = torch.aten.view %3152, %769 : !torch.vtensor<[20,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,64],f16>
  %3154 = torch.aten.permute %3153, %203 : !torch.vtensor<[20,2304,1,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64,1],f16>
  %3155 = torch.aten.view %3154, %729 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
  %3156 = torch.aten._reshape_alias %3155, %773, %774 : !torch.vtensor<[20,2304,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
  %3157 = torch.aten.permute %3156, %189 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
  %3158 = torch.aten.clone %3157, %int0 : !torch.vtensor<[2,2304,10,64],f16>, !torch.int -> !torch.vtensor<[2,2304,10,64],f16>
  %3159 = torch.aten._unsafe_view %3158, %691 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
  %3160 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
  %3161 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
  %3162 = torch.aten.t %3161 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %3163 = torch.aten.view %3159, %699 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
  %3164 = torch.aten.addmm %3160, %3163, %3162, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,640],f16>
  %3165 = torch.aten.view %3164, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
  %3166 = torch.aten.add.Tensor %3165, %3097, %int1 : !torch.vtensor<[2,2304,640],f16>, !torch.vtensor<[2,2304,640],f16>, !torch.int -> !torch.vtensor<[2,2304,640],f16>
  %3167 = torch.aten._to_copy %3166, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f32>
  %result0_180, %result1_181, %result2_182 = torch.aten.native_layer_norm %3167, %704, %39, %39, %float1.000000e-05 : !torch.vtensor<[2,2304,640],f32>, !torch.list<int>, !torch.vtensor<[640],f32>, !torch.vtensor<[640],f32>, !torch.float -> !torch.vtensor<[2,2304,640],f32>, !torch.vtensor<[2,2304,1],f32>, !torch.vtensor<[2,2304,1],f32>
  %3168 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
  %3169 = torch.aten._to_copy %result0_180, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
  %3170 = torch.aten.t %3168 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %3171 = torch.aten._reshape_alias %3169, %699, %708 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
  %3172 = torch.aten.mm %3171, %3170 : !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[4608,640],f16>
  %3173 = torch.aten._unsafe_view %3172, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
  %3174 = torch.aten._to_copy %30, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,1024],f16>
  %3175 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
  %3176 = torch.aten.t %3174 : !torch.vtensor<[640,1024],f16> -> !torch.vtensor<[1024,640],f16>
  %3177 = torch.aten._reshape_alias %3175, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
  %3178 = torch.aten.mm %3177, %3176 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,640],f16> -> !torch.vtensor<[154,640],f16>
  %3179 = torch.aten._unsafe_view %3178, %798 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
  %3180 = torch.aten._to_copy %30, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,1024],f16>
  %3181 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
  %3182 = torch.aten.t %3180 : !torch.vtensor<[640,1024],f16> -> !torch.vtensor<[1024,640],f16>
  %3183 = torch.aten._reshape_alias %3181, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
  %3184 = torch.aten.mm %3183, %3182 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,640],f16> -> !torch.vtensor<[154,640],f16>
  %3185 = torch.aten._unsafe_view %3184, %798 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
  %3186 = torch.aten._reshape_alias %3173, %724, %725 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
  %3187 = torch.aten.permute %3186, %189 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
  %3188 = torch.aten.clone %3187, %int0 : !torch.vtensor<[2,10,2304,64],f16>, !torch.int -> !torch.vtensor<[2,10,2304,64],f16>
  %3189 = torch.aten._unsafe_view %3188, %729 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
  %3190 = torch.aten._reshape_alias %3179, %810, %811 : !torch.vtensor<[2,77,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,10,64],f16>
  %3191 = torch.aten.permute %3190, %189 : !torch.vtensor<[2,77,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,77,64],f16>
  %3192 = torch.aten.clone %3191, %int0 : !torch.vtensor<[2,10,77,64],f16>, !torch.int -> !torch.vtensor<[2,10,77,64],f16>
  %3193 = torch.aten._unsafe_view %3192, %815 : !torch.vtensor<[2,10,77,64],f16>, !torch.list<int> -> !torch.vtensor<[20,77,64],f16>
  %3194 = torch.aten._reshape_alias %3185, %810, %811 : !torch.vtensor<[2,77,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,10,64],f16>
  %3195 = torch.aten.permute %3194, %189 : !torch.vtensor<[2,77,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,77,64],f16>
  %3196 = torch.aten.clone %3195, %int0 : !torch.vtensor<[2,10,77,64],f16>, !torch.int -> !torch.vtensor<[2,10,77,64],f16>
  %3197 = torch.aten._unsafe_view %3196, %815 : !torch.vtensor<[2,10,77,64],f16>, !torch.list<int> -> !torch.vtensor<[20,77,64],f16>
  %3198 = torch.aten.unsqueeze %3189, %int3 : !torch.vtensor<[20,2304,64],f16>, !torch.int -> !torch.vtensor<[20,2304,64,1],f16>
  %3199 = torch.aten.permute %3198, %203 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,64],f16>
  %3200 = torch.aten.unsqueeze %3193, %int3 : !torch.vtensor<[20,77,64],f16>, !torch.int -> !torch.vtensor<[20,77,64,1],f16>
  %3201 = torch.aten.permute %3200, %206 : !torch.vtensor<[20,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,1,77,64],f16>
  %3202 = torch.aten.permute %3199, %203 : !torch.vtensor<[20,2304,1,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64,1],f16>
  %3203 = torch.aten._reshape_alias %3202, %729, %744 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
  %3204 = torch.aten.permute %3201, %211 : !torch.vtensor<[20,1,77,64],f16>, !torch.list<int> -> !torch.vtensor<[20,64,77,1],f16>
  %3205 = torch.aten._reshape_alias %3204, %828, %297 : !torch.vtensor<[20,64,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,64,77],f16>
  %3206 = torch.aten.bmm %3203, %3205 : !torch.vtensor<[20,2304,64],f16>, !torch.vtensor<[20,64,77],f16> -> !torch.vtensor<[20,2304,77],f16>
  %3207 = torch.aten.view %3206, %831 : !torch.vtensor<[20,2304,77],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,77],f16>
  %3208 = torch.aten.permute %3207, %203 : !torch.vtensor<[20,2304,1,77],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,77,1],f16>
  %3209 = torch.aten.view %3208, %834 : !torch.vtensor<[20,2304,77,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,77],f16>
  %3210 = torch.aten.mul.Tensor %3209, %0 : !torch.vtensor<[20,2304,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[20,2304,77],f16>
  %3211 = torch.aten._softmax %3210, %int-1, %true : !torch.vtensor<[20,2304,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[20,2304,77],f32>
  %3212 = torch.aten._to_copy %3211, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[20,2304,77],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[20,2304,77],f16>
  %3213 = torch.aten.unsqueeze %3212, %int3 : !torch.vtensor<[20,2304,77],f16>, !torch.int -> !torch.vtensor<[20,2304,77,1],f16>
  %3214 = torch.aten.permute %3213, %203 : !torch.vtensor<[20,2304,77,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,77],f16>
  %3215 = torch.aten.unsqueeze %3197, %int3 : !torch.vtensor<[20,77,64],f16>, !torch.int -> !torch.vtensor<[20,77,64,1],f16>
  %3216 = torch.aten.permute %3215, %211 : !torch.vtensor<[20,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,1,64,77],f16>
  %3217 = torch.aten.permute %3214, %203 : !torch.vtensor<[20,2304,1,77],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,77,1],f16>
  %3218 = torch.aten._reshape_alias %3217, %834, %844 : !torch.vtensor<[20,2304,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,2304,77],f16>
  %3219 = torch.aten.permute %3216, %211 : !torch.vtensor<[20,1,64,77],f16>, !torch.list<int> -> !torch.vtensor<[20,77,64,1],f16>
  %3220 = torch.aten._reshape_alias %3219, %815, %316 : !torch.vtensor<[20,77,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,77,64],f16>
  %3221 = torch.aten.bmm %3218, %3220 : !torch.vtensor<[20,2304,77],f16>, !torch.vtensor<[20,77,64],f16> -> !torch.vtensor<[20,2304,64],f16>
  %3222 = torch.aten.view %3221, %769 : !torch.vtensor<[20,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,64],f16>
  %3223 = torch.aten.permute %3222, %203 : !torch.vtensor<[20,2304,1,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64,1],f16>
  %3224 = torch.aten.view %3223, %729 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
  %3225 = torch.aten._reshape_alias %3224, %773, %774 : !torch.vtensor<[20,2304,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
  %3226 = torch.aten.permute %3225, %189 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
  %3227 = torch.aten.clone %3226, %int0 : !torch.vtensor<[2,2304,10,64],f16>, !torch.int -> !torch.vtensor<[2,2304,10,64],f16>
  %3228 = torch.aten._unsafe_view %3227, %691 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
  %3229 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
  %3230 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
  %3231 = torch.aten.t %3230 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %3232 = torch.aten.view %3228, %699 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
  %3233 = torch.aten.addmm %3229, %3232, %3231, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,640],f16>
  %3234 = torch.aten.view %3233, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
  %3235 = torch.aten.add.Tensor %3234, %3166, %int1 : !torch.vtensor<[2,2304,640],f16>, !torch.vtensor<[2,2304,640],f16>, !torch.int -> !torch.vtensor<[2,2304,640],f16>
  %3236 = torch.aten._to_copy %3235, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f32>
  %result0_183, %result1_184, %result2_185 = torch.aten.native_layer_norm %3236, %704, %39, %39, %float1.000000e-05 : !torch.vtensor<[2,2304,640],f32>, !torch.list<int>, !torch.vtensor<[640],f32>, !torch.vtensor<[640],f32>, !torch.float -> !torch.vtensor<[2,2304,640],f32>, !torch.vtensor<[2,2304,1],f32>, !torch.vtensor<[2,2304,1],f32>
  %3237 = torch.aten._to_copy %31, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[5120],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[5120],f16>
  %3238 = torch.aten._to_copy %32, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[5120,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[5120,640],f16>
  %3239 = torch.aten._to_copy %result0_183, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
  %3240 = torch.aten.t %3238 : !torch.vtensor<[5120,640],f16> -> !torch.vtensor<[640,5120],f16>
  %3241 = torch.aten.view %3239, %699 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
  %3242 = torch.aten.addmm %3237, %3241, %3240, %int1, %int1 : !torch.vtensor<[5120],f16>, !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,5120],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,5120],f16>
  %3243 = torch.aten.view %3242, %870 : !torch.vtensor<[4608,5120],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,5120],f16>
  %3244 = torch.aten.slice.Tensor %3243, %int-1, %int0, %int2560, %int1 : !torch.vtensor<[2,2304,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,2304,2560],f16>
  %3245 = torch.aten.slice.Tensor %3243, %int-1, %int2560, %int5120, %int1 : !torch.vtensor<[2,2304,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,2304,2560],f16>
  %3246 = torch.aten.gelu %3245, %str : !torch.vtensor<[2,2304,2560],f16>, !torch.str -> !torch.vtensor<[2,2304,2560],f16>
  %3247 = torch.aten.mul.Tensor %3244, %3246 : !torch.vtensor<[2,2304,2560],f16>, !torch.vtensor<[2,2304,2560],f16> -> !torch.vtensor<[2,2304,2560],f16>
  %3248 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
  %3249 = torch.aten._to_copy %33, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,2560],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,2560],f16>
  %3250 = torch.aten.t %3249 : !torch.vtensor<[640,2560],f16> -> !torch.vtensor<[2560,640],f16>
  %3251 = torch.aten.view %3247, %879 : !torch.vtensor<[2,2304,2560],f16>, !torch.list<int> -> !torch.vtensor<[4608,2560],f16>
  %3252 = torch.aten.addmm %3248, %3251, %3250, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[4608,2560],f16>, !torch.vtensor<[2560,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,640],f16>
  %3253 = torch.aten.view %3252, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
  %3254 = torch.aten.add.Tensor %3253, %3235, %int1 : !torch.vtensor<[2,2304,640],f16>, !torch.vtensor<[2,2304,640],f16>, !torch.int -> !torch.vtensor<[2,2304,640],f16>
  %3255 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
  %3256 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
  %3257 = torch.aten.t %3256 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %3258 = torch.aten.view %3254, %699 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
  %3259 = torch.aten.addmm %3255, %3258, %3257, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,640],f16>
  %3260 = torch.aten.view %3259, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
  %3261 = torch.aten._reshape_alias %3260, %890, %891 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,48,48,640],f16>
  %3262 = torch.aten.permute %3261, %206 : !torch.vtensor<[2,48,48,640],f16>, !torch.list<int> -> !torch.vtensor<[2,640,48,48],f16>
  %3263 = torch.aten._reshape_alias %3262, %654, %894 : !torch.vtensor<[2,640,48,48],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,640,48,48],f16>
  %3264 = torch.aten.clone %3263, %int0 : !torch.vtensor<[2,640,48,48],f16>, !torch.int -> !torch.vtensor<[2,640,48,48],f16>
  %3265 = torch.aten.add.Tensor %3264, %3071, %int1 : !torch.vtensor<[2,640,48,48],f16>, !torch.vtensor<[2,640,48,48],f16>, !torch.int -> !torch.vtensor<[2,640,48,48],f16>
  %3266 = torch.prim.ListConstruct %3265, %897 : (!torch.vtensor<[2,640,48,48],f16>, !torch.vtensor<[2,640,48,48],f16>) -> !torch.list<vtensor>
  %3267 = torch.aten.cat %3266, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,1280,48,48],f16>
  %3268 = torch.aten._to_copy %3267, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,1280,48,48],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,48,48],f32>
  %3269 = torch.prim.ListConstruct %int2, %int32, %int40, %int2304 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %3270 = torch.prim.ListConstruct %int2949120, %int92160, %int2304, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %3271 = torch.aten._reshape_alias %3268, %3269, %3270 : !torch.vtensor<[2,1280,48,48],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,40,2304],f32>
  %result0_186, %result1_187 = torch.aten.var_mean.correction %3271, %85, %int0, %true : !torch.vtensor<[2,32,40,2304],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %3272 = torch.aten.add.Tensor %result0_186, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %3273 = torch.aten.rsqrt %3272 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %3274 = torch.aten.sub.Tensor %3271, %result1_187, %int1 : !torch.vtensor<[2,32,40,2304],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,40,2304],f32>
  %3275 = torch.aten.mul.Tensor %3274, %3273 : !torch.vtensor<[2,32,40,2304],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,40,2304],f32>
  %3276 = torch.prim.ListConstruct %int2, %int1280, %int48, %int48 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %3277 = torch.aten.view %3275, %3276 : !torch.vtensor<[2,32,40,2304],f32>, !torch.list<int> -> !torch.vtensor<[2,1280,48,48],f32>
  %3278 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
  %3279 = torch.aten.unsqueeze %3278, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
  %3280 = torch.aten.unsqueeze %3279, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
  %3281 = torch.aten.unsqueeze %24, %int0 : !torch.vtensor<[1280],f32>, !torch.int -> !torch.vtensor<[1,1280],f32>
  %3282 = torch.aten.unsqueeze %3281, %int2 : !torch.vtensor<[1,1280],f32>, !torch.int -> !torch.vtensor<[1,1280,1],f32>
  %3283 = torch.aten.unsqueeze %3282, %int3 : !torch.vtensor<[1,1280,1],f32>, !torch.int -> !torch.vtensor<[1,1280,1,1],f32>
  %3284 = torch.aten.mul.Tensor %3277, %3283 : !torch.vtensor<[2,1280,48,48],f32>, !torch.vtensor<[1,1280,1,1],f32> -> !torch.vtensor<[2,1280,48,48],f32>
  %3285 = torch.aten.add.Tensor %3284, %3280, %int1 : !torch.vtensor<[2,1280,48,48],f32>, !torch.vtensor<[1,1280,1,1],f32>, !torch.int -> !torch.vtensor<[2,1280,48,48],f32>
  %3286 = torch.aten._to_copy %3285, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,1280,48,48],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,1280,48,48],f16>
  %3287 = torch.aten.silu %3286 : !torch.vtensor<[2,1280,48,48],f16> -> !torch.vtensor<[2,1280,48,48],f16>
  %3288 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
  %3289 = torch.aten._to_copy %25, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,1280,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,1280,3,3],f16>
  %3290 = torch.aten._convolution %3287, %3289, %3288, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,48,48],f16>, !torch.vtensor<[640,1280,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,48,48],f16>
  %3291 = torch.aten.silu %74 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
  %3292 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
  %3293 = torch.aten._to_copy %28, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,1280],f16>
  %3294 = torch.aten.t %3293 : !torch.vtensor<[640,1280],f16> -> !torch.vtensor<[1280,640],f16>
  %3295 = torch.aten.addmm %3292, %3291, %3294, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
  %3296 = torch.aten.unsqueeze %3295, %int2 : !torch.vtensor<[2,640],f16>, !torch.int -> !torch.vtensor<[2,640,1],f16>
  %3297 = torch.aten.unsqueeze %3296, %int3 : !torch.vtensor<[2,640,1],f16>, !torch.int -> !torch.vtensor<[2,640,1,1],f16>
  %3298 = torch.aten.add.Tensor %3290, %3297, %int1 : !torch.vtensor<[2,640,48,48],f16>, !torch.vtensor<[2,640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,48,48],f16>
  %3299 = torch.aten._to_copy %3298, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,640,48,48],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,48,48],f32>
  %3300 = torch.aten._reshape_alias %3299, %647, %648 : !torch.vtensor<[2,640,48,48],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,20,2304],f32>
  %result0_188, %result1_189 = torch.aten.var_mean.correction %3300, %85, %int0, %true : !torch.vtensor<[2,32,20,2304],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %3301 = torch.aten.add.Tensor %result0_188, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %3302 = torch.aten.rsqrt %3301 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %3303 = torch.aten.sub.Tensor %3300, %result1_189, %int1 : !torch.vtensor<[2,32,20,2304],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,2304],f32>
  %3304 = torch.aten.mul.Tensor %3303, %3302 : !torch.vtensor<[2,32,20,2304],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,2304],f32>
  %3305 = torch.aten.view %3304, %654 : !torch.vtensor<[2,32,20,2304],f32>, !torch.list<int> -> !torch.vtensor<[2,640,48,48],f32>
  %3306 = torch.aten.unsqueeze %39, %int0 : !torch.vtensor<[640],f32>, !torch.int -> !torch.vtensor<[1,640],f32>
  %3307 = torch.aten.unsqueeze %3306, %int2 : !torch.vtensor<[1,640],f32>, !torch.int -> !torch.vtensor<[1,640,1],f32>
  %3308 = torch.aten.unsqueeze %3307, %int3 : !torch.vtensor<[1,640,1],f32>, !torch.int -> !torch.vtensor<[1,640,1,1],f32>
  %3309 = torch.aten.unsqueeze %39, %int0 : !torch.vtensor<[640],f32>, !torch.int -> !torch.vtensor<[1,640],f32>
  %3310 = torch.aten.unsqueeze %3309, %int2 : !torch.vtensor<[1,640],f32>, !torch.int -> !torch.vtensor<[1,640,1],f32>
  %3311 = torch.aten.unsqueeze %3310, %int3 : !torch.vtensor<[1,640,1],f32>, !torch.int -> !torch.vtensor<[1,640,1,1],f32>
  %3312 = torch.aten.mul.Tensor %3305, %3311 : !torch.vtensor<[2,640,48,48],f32>, !torch.vtensor<[1,640,1,1],f32> -> !torch.vtensor<[2,640,48,48],f32>
  %3313 = torch.aten.add.Tensor %3312, %3308, %int1 : !torch.vtensor<[2,640,48,48],f32>, !torch.vtensor<[1,640,1,1],f32>, !torch.int -> !torch.vtensor<[2,640,48,48],f32>
  %3314 = torch.aten._to_copy %3313, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,640,48,48],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,48,48],f16>
  %3315 = torch.aten.silu %3314 : !torch.vtensor<[2,640,48,48],f16> -> !torch.vtensor<[2,640,48,48],f16>
  %3316 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
  %3317 = torch.aten._to_copy %35, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640,3,3],f16>
  %3318 = torch.aten._convolution %3315, %3317, %3316, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,48,48],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,48,48],f16>
  %3319 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
  %3320 = torch.aten._to_copy %26, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,1280,1,1],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,1280,1,1],f16>
  %3321 = torch.aten._convolution %3267, %3320, %3319, %78, %79, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,1280,48,48],f16>, !torch.vtensor<[640,1280,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,48,48],f16>
  %3322 = torch.aten.add.Tensor %3321, %3318, %int1 : !torch.vtensor<[2,640,48,48],f16>, !torch.vtensor<[2,640,48,48],f16>, !torch.int -> !torch.vtensor<[2,640,48,48],f16>
  %3323 = torch.aten._to_copy %3322, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,640,48,48],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,48,48],f32>
  %3324 = torch.aten._reshape_alias %3323, %647, %648 : !torch.vtensor<[2,640,48,48],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,20,2304],f32>
  %result0_190, %result1_191 = torch.aten.var_mean.correction %3324, %85, %int0, %true : !torch.vtensor<[2,32,20,2304],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %3325 = torch.aten.add.Tensor %result0_190, %1, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %3326 = torch.aten.rsqrt %3325 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %3327 = torch.aten.sub.Tensor %3324, %result1_191, %int1 : !torch.vtensor<[2,32,20,2304],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,2304],f32>
  %3328 = torch.aten.mul.Tensor %3327, %3326 : !torch.vtensor<[2,32,20,2304],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,2304],f32>
  %3329 = torch.aten.view %3328, %654 : !torch.vtensor<[2,32,20,2304],f32>, !torch.list<int> -> !torch.vtensor<[2,640,48,48],f32>
  %3330 = torch.aten.unsqueeze %39, %int0 : !torch.vtensor<[640],f32>, !torch.int -> !torch.vtensor<[1,640],f32>
  %3331 = torch.aten.unsqueeze %3330, %int2 : !torch.vtensor<[1,640],f32>, !torch.int -> !torch.vtensor<[1,640,1],f32>
  %3332 = torch.aten.unsqueeze %3331, %int3 : !torch.vtensor<[1,640,1],f32>, !torch.int -> !torch.vtensor<[1,640,1,1],f32>
  %3333 = torch.aten.unsqueeze %39, %int0 : !torch.vtensor<[640],f32>, !torch.int -> !torch.vtensor<[1,640],f32>
  %3334 = torch.aten.unsqueeze %3333, %int2 : !torch.vtensor<[1,640],f32>, !torch.int -> !torch.vtensor<[1,640,1],f32>
  %3335 = torch.aten.unsqueeze %3334, %int3 : !torch.vtensor<[1,640,1],f32>, !torch.int -> !torch.vtensor<[1,640,1,1],f32>
  %3336 = torch.aten.mul.Tensor %3329, %3335 : !torch.vtensor<[2,640,48,48],f32>, !torch.vtensor<[1,640,1,1],f32> -> !torch.vtensor<[2,640,48,48],f32>
  %3337 = torch.aten.add.Tensor %3336, %3332, %int1 : !torch.vtensor<[2,640,48,48],f32>, !torch.vtensor<[1,640,1,1],f32>, !torch.int -> !torch.vtensor<[2,640,48,48],f32>
  %3338 = torch.aten._reshape_alias %3337, %654, %688 : !torch.vtensor<[2,640,48,48],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,640,48,48],f32>
  %3339 = torch.aten.permute %3338, %151 : !torch.vtensor<[2,640,48,48],f32>, !torch.list<int> -> !torch.vtensor<[2,48,48,640],f32>
  %3340 = torch.aten._reshape_alias %3339, %691, %692 : !torch.vtensor<[2,48,48,640],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f32>
  %3341 = torch.aten.clone %3340, %int0 : !torch.vtensor<[2,2304,640],f32>, !torch.int -> !torch.vtensor<[2,2304,640],f32>
  %3342 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
  %3343 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
  %3344 = torch.aten._to_copy %3341, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
  %3345 = torch.aten.t %3343 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %3346 = torch.aten.view %3344, %699 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
  %3347 = torch.aten.addmm %3342, %3346, %3345, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,640],f16>
  %3348 = torch.aten.view %3347, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
  %3349 = torch.aten._to_copy %3348, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f32>
  %result0_192, %result1_193, %result2_194 = torch.aten.native_layer_norm %3349, %704, %39, %39, %float1.000000e-05 : !torch.vtensor<[2,2304,640],f32>, !torch.list<int>, !torch.vtensor<[640],f32>, !torch.vtensor<[640],f32>, !torch.float -> !torch.vtensor<[2,2304,640],f32>, !torch.vtensor<[2,2304,1],f32>, !torch.vtensor<[2,2304,1],f32>
  %3350 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
  %3351 = torch.aten._to_copy %result0_192, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
  %3352 = torch.aten.t %3350 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %3353 = torch.aten._reshape_alias %3351, %699, %708 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
  %3354 = torch.aten.mm %3353, %3352 : !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[4608,640],f16>
  %3355 = torch.aten._unsafe_view %3354, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
  %3356 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
  %3357 = torch.aten._to_copy %result0_192, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
  %3358 = torch.aten.t %3356 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %3359 = torch.aten._reshape_alias %3357, %699, %708 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
  %3360 = torch.aten.mm %3359, %3358 : !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[4608,640],f16>
  %3361 = torch.aten._unsafe_view %3360, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
  %3362 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
  %3363 = torch.aten._to_copy %result0_192, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
  %3364 = torch.aten.t %3362 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %3365 = torch.aten._reshape_alias %3363, %699, %708 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
  %3366 = torch.aten.mm %3365, %3364 : !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[4608,640],f16>
  %3367 = torch.aten._unsafe_view %3366, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
  %3368 = torch.aten._reshape_alias %3355, %724, %725 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
  %3369 = torch.aten.permute %3368, %189 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
  %3370 = torch.aten.clone %3369, %int0 : !torch.vtensor<[2,10,2304,64],f16>, !torch.int -> !torch.vtensor<[2,10,2304,64],f16>
  %3371 = torch.aten._unsafe_view %3370, %729 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
  %3372 = torch.aten._reshape_alias %3361, %724, %725 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
  %3373 = torch.aten.permute %3372, %189 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
  %3374 = torch.aten.clone %3373, %int0 : !torch.vtensor<[2,10,2304,64],f16>, !torch.int -> !torch.vtensor<[2,10,2304,64],f16>
  %3375 = torch.aten._unsafe_view %3374, %729 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
  %3376 = torch.aten._reshape_alias %3367, %724, %725 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
  %3377 = torch.aten.permute %3376, %189 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
  %3378 = torch.aten.clone %3377, %int0 : !torch.vtensor<[2,10,2304,64],f16>, !torch.int -> !torch.vtensor<[2,10,2304,64],f16>
  %3379 = torch.aten._unsafe_view %3378, %729 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
  %3380 = torch.aten.unsqueeze %3371, %int3 : !torch.vtensor<[20,2304,64],f16>, !torch.int -> !torch.vtensor<[20,2304,64,1],f16>
  %3381 = torch.aten.permute %3380, %203 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,64],f16>
  %3382 = torch.aten.unsqueeze %3375, %int3 : !torch.vtensor<[20,2304,64],f16>, !torch.int -> !torch.vtensor<[20,2304,64,1],f16>
  %3383 = torch.aten.permute %3382, %206 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,1,2304,64],f16>
  %3384 = torch.aten.permute %3381, %203 : !torch.vtensor<[20,2304,1,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64,1],f16>
  %3385 = torch.aten._reshape_alias %3384, %729, %744 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
  %3386 = torch.aten.permute %3383, %211 : !torch.vtensor<[20,1,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,64,2304,1],f16>
  %3387 = torch.aten._reshape_alias %3386, %747, %748 : !torch.vtensor<[20,64,2304,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,64,2304],f16>
  %3388 = torch.aten.bmm %3385, %3387 : !torch.vtensor<[20,2304,64],f16>, !torch.vtensor<[20,64,2304],f16> -> !torch.vtensor<[20,2304,2304],f16>
  %3389 = torch.aten.view %3388, %751 : !torch.vtensor<[20,2304,2304],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,2304],f16>
  %3390 = torch.aten.permute %3389, %203 : !torch.vtensor<[20,2304,1,2304],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,2304,1],f16>
  %3391 = torch.aten.view %3390, %754 : !torch.vtensor<[20,2304,2304,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,2304],f16>
  %3392 = torch.aten.mul.Tensor %3391, %0 : !torch.vtensor<[20,2304,2304],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[20,2304,2304],f16>
  %3393 = torch.aten._softmax %3392, %int-1, %true : !torch.vtensor<[20,2304,2304],f16>, !torch.int, !torch.bool -> !torch.vtensor<[20,2304,2304],f32>
  %3394 = torch.aten._to_copy %3393, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[20,2304,2304],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[20,2304,2304],f16>
  %3395 = torch.aten.unsqueeze %3394, %int3 : !torch.vtensor<[20,2304,2304],f16>, !torch.int -> !torch.vtensor<[20,2304,2304,1],f16>
  %3396 = torch.aten.permute %3395, %203 : !torch.vtensor<[20,2304,2304,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,2304],f16>
  %3397 = torch.aten.unsqueeze %3379, %int3 : !torch.vtensor<[20,2304,64],f16>, !torch.int -> !torch.vtensor<[20,2304,64,1],f16>
  %3398 = torch.aten.permute %3397, %211 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,1,64,2304],f16>
  %3399 = torch.aten.permute %3396, %203 : !torch.vtensor<[20,2304,1,2304],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,2304,1],f16>
  %3400 = torch.aten._reshape_alias %3399, %754, %764 : !torch.vtensor<[20,2304,2304,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,2304,2304],f16>
  %3401 = torch.aten.permute %3398, %211 : !torch.vtensor<[20,1,64,2304],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64,1],f16>
  %3402 = torch.aten._reshape_alias %3401, %729, %744 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
  %3403 = torch.aten.bmm %3400, %3402 : !torch.vtensor<[20,2304,2304],f16>, !torch.vtensor<[20,2304,64],f16> -> !torch.vtensor<[20,2304,64],f16>
  %3404 = torch.aten.view %3403, %769 : !torch.vtensor<[20,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,64],f16>
  %3405 = torch.aten.permute %3404, %203 : !torch.vtensor<[20,2304,1,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64,1],f16>
  %3406 = torch.aten.view %3405, %729 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
  %3407 = torch.aten._reshape_alias %3406, %773, %774 : !torch.vtensor<[20,2304,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
  %3408 = torch.aten.permute %3407, %189 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
  %3409 = torch.aten.clone %3408, %int0 : !torch.vtensor<[2,2304,10,64],f16>, !torch.int -> !torch.vtensor<[2,2304,10,64],f16>
  %3410 = torch.aten._unsafe_view %3409, %691 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
  %3411 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
  %3412 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
  %3413 = torch.aten.t %3412 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %3414 = torch.aten.view %3410, %699 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
  %3415 = torch.aten.addmm %3411, %3414, %3413, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,640],f16>
  %3416 = torch.aten.view %3415, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
  %3417 = torch.aten.add.Tensor %3416, %3348, %int1 : !torch.vtensor<[2,2304,640],f16>, !torch.vtensor<[2,2304,640],f16>, !torch.int -> !torch.vtensor<[2,2304,640],f16>
  %3418 = torch.aten._to_copy %3417, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f32>
  %result0_195, %result1_196, %result2_197 = torch.aten.native_layer_norm %3418, %704, %39, %39, %float1.000000e-05 : !torch.vtensor<[2,2304,640],f32>, !torch.list<int>, !torch.vtensor<[640],f32>, !torch.vtensor<[640],f32>, !torch.float -> !torch.vtensor<[2,2304,640],f32>, !torch.vtensor<[2,2304,1],f32>, !torch.vtensor<[2,2304,1],f32>
  %3419 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
  %3420 = torch.aten._to_copy %result0_195, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
  %3421 = torch.aten.t %3419 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %3422 = torch.aten._reshape_alias %3420, %699, %708 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
  %3423 = torch.aten.mm %3422, %3421 : !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[4608,640],f16>
  %3424 = torch.aten._unsafe_view %3423, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
  %3425 = torch.aten._to_copy %30, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,1024],f16>
  %3426 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
  %3427 = torch.aten.t %3425 : !torch.vtensor<[640,1024],f16> -> !torch.vtensor<[1024,640],f16>
  %3428 = torch.aten._reshape_alias %3426, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
  %3429 = torch.aten.mm %3428, %3427 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,640],f16> -> !torch.vtensor<[154,640],f16>
  %3430 = torch.aten._unsafe_view %3429, %798 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
  %3431 = torch.aten._to_copy %30, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,1024],f16>
  %3432 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
  %3433 = torch.aten.t %3431 : !torch.vtensor<[640,1024],f16> -> !torch.vtensor<[1024,640],f16>
  %3434 = torch.aten._reshape_alias %3432, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
  %3435 = torch.aten.mm %3434, %3433 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,640],f16> -> !torch.vtensor<[154,640],f16>
  %3436 = torch.aten._unsafe_view %3435, %798 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
  %3437 = torch.aten._reshape_alias %3424, %724, %725 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
  %3438 = torch.aten.permute %3437, %189 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
  %3439 = torch.aten.clone %3438, %int0 : !torch.vtensor<[2,10,2304,64],f16>, !torch.int -> !torch.vtensor<[2,10,2304,64],f16>
  %3440 = torch.aten._unsafe_view %3439, %729 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
  %3441 = torch.aten._reshape_alias %3430, %810, %811 : !torch.vtensor<[2,77,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,10,64],f16>
  %3442 = torch.aten.permute %3441, %189 : !torch.vtensor<[2,77,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,77,64],f16>
  %3443 = torch.aten.clone %3442, %int0 : !torch.vtensor<[2,10,77,64],f16>, !torch.int -> !torch.vtensor<[2,10,77,64],f16>
  %3444 = torch.aten._unsafe_view %3443, %815 : !torch.vtensor<[2,10,77,64],f16>, !torch.list<int> -> !torch.vtensor<[20,77,64],f16>
  %3445 = torch.aten._reshape_alias %3436, %810, %811 : !torch.vtensor<[2,77,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,10,64],f16>
  %3446 = torch.aten.permute %3445, %189 : !torch.vtensor<[2,77,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,77,64],f16>
  %3447 = torch.aten.clone %3446, %int0 : !torch.vtensor<[2,10,77,64],f16>, !torch.int -> !torch.vtensor<[2,10,77,64],f16>
  %3448 = torch.aten._unsafe_view %3447, %815 : !torch.vtensor<[2,10,77,64],f16>, !torch.list<int> -> !torch.vtensor<[20,77,64],f16>
  %3449 = torch.aten.unsqueeze %3440, %int3 : !torch.vtensor<[20,2304,64],f16>, !torch.int -> !torch.vtensor<[20,2304,64,1],f16>
  %3450 = torch.aten.permute %3449, %203 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,64],f16>
  %3451 = torch.aten.unsqueeze %3444, %int3 : !torch.vtensor<[20,77,64],f16>, !torch.int -> !torch.vtensor<[20,77,64,1],f16>
  %3452 = torch.aten.permute %3451, %206 : !torch.vtensor<[20,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,1,77,64],f16>
  %3453 = torch.aten.permute %3450, %203 : !torch.vtensor<[20,2304,1,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64,1],f16>
  %3454 = torch.aten._reshape_alias %3453, %729, %744 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
  %3455 = torch.aten.permute %3452, %211 : !torch.vtensor<[20,1,77,64],f16>, !torch.list<int> -> !torch.vtensor<[20,64,77,1],f16>
  %3456 = torch.aten._reshape_alias %3455, %828, %297 : !torch.vtensor<[20,64,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,64,77],f16>
  %3457 = torch.aten.bmm %3454, %3456 : !torch.vtensor<[20,2304,64],f16>, !torch.vtensor<[20,64,77],f16> -> !torch.vtensor<[20,2304,77],f16>
  %3458 = torch.aten.view %3457, %831 : !torch.vtensor<[20,2304,77],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,77],f16>
  %3459 = torch.aten.permute %3458, %203 : !torch.vtensor<[20,2304,1,77],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,77,1],f16>
  %3460 = torch.aten.view %3459, %834 : !torch.vtensor<[20,2304,77,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,77],f16>
  %3461 = torch.aten.mul.Tensor %3460, %0 : !torch.vtensor<[20,2304,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[20,2304,77],f16>
  %3462 = torch.aten._softmax %3461, %int-1, %true : !torch.vtensor<[20,2304,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[20,2304,77],f32>
  %3463 = torch.aten._to_copy %3462, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[20,2304,77],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[20,2304,77],f16>
  %3464 = torch.aten.unsqueeze %3463, %int3 : !torch.vtensor<[20,2304,77],f16>, !torch.int -> !torch.vtensor<[20,2304,77,1],f16>
  %3465 = torch.aten.permute %3464, %203 : !torch.vtensor<[20,2304,77,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,77],f16>
  %3466 = torch.aten.unsqueeze %3448, %int3 : !torch.vtensor<[20,77,64],f16>, !torch.int -> !torch.vtensor<[20,77,64,1],f16>
  %3467 = torch.aten.permute %3466, %211 : !torch.vtensor<[20,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,1,64,77],f16>
  %3468 = torch.aten.permute %3465, %203 : !torch.vtensor<[20,2304,1,77],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,77,1],f16>
  %3469 = torch.aten._reshape_alias %3468, %834, %844 : !torch.vtensor<[20,2304,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,2304,77],f16>
  %3470 = torch.aten.permute %3467, %211 : !torch.vtensor<[20,1,64,77],f16>, !torch.list<int> -> !torch.vtensor<[20,77,64,1],f16>
  %3471 = torch.aten._reshape_alias %3470, %815, %316 : !torch.vtensor<[20,77,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,77,64],f16>
  %3472 = torch.aten.bmm %3469, %3471 : !torch.vtensor<[20,2304,77],f16>, !torch.vtensor<[20,77,64],f16> -> !torch.vtensor<[20,2304,64],f16>
  %3473 = torch.aten.view %3472, %769 : !torch.vtensor<[20,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,64],f16>
  %3474 = torch.aten.permute %3473, %203 : !torch.vtensor<[20,2304,1,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64,1],f16>
  %3475 = torch.aten.view %3474, %729 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
  %3476 = torch.aten._reshape_alias %3475, %773, %774 : !torch.vtensor<[20,2304,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
  %3477 = torch.aten.permute %3476, %189 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
  %3478 = torch.aten.clone %3477, %int0 : !torch.vtensor<[2,2304,10,64],f16>, !torch.int -> !torch.vtensor<[2,2304,10,64],f16>
  %3479 = torch.aten._unsafe_view %3478, %691 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
  %3480 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
  %3481 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
  %3482 = torch.aten.t %3481 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %3483 = torch.aten.view %3479, %699 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
  %3484 = torch.aten.addmm %3480, %3483, %3482, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,640],f16>
  %3485 = torch.aten.view %3484, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
  %3486 = torch.aten.add.Tensor %3485, %3417, %int1 : !torch.vtensor<[2,2304,640],f16>, !torch.vtensor<[2,2304,640],f16>, !torch.int -> !torch.vtensor<[2,2304,640],f16>
  %3487 = torch.aten._to_copy %3486, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f32>
  %result0_198, %result1_199, %result2_200 = torch.aten.native_layer_norm %3487, %704, %39, %39, %float1.000000e-05 : !torch.vtensor<[2,2304,640],f32>, !torch.list<int>, !torch.vtensor<[640],f32>, !torch.vtensor<[640],f32>, !torch.float -> !torch.vtensor<[2,2304,640],f32>, !torch.vtensor<[2,2304,1],f32>, !torch.vtensor<[2,2304,1],f32>
  %3488 = torch.aten._to_copy %31, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[5120],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[5120],f16>
  %3489 = torch.aten._to_copy %32, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[5120,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[5120,640],f16>
  %3490 = torch.aten._to_copy %result0_198, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
  %3491 = torch.aten.t %3489 : !torch.vtensor<[5120,640],f16> -> !torch.vtensor<[640,5120],f16>
  %3492 = torch.aten.view %3490, %699 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
  %3493 = torch.aten.addmm %3488, %3492, %3491, %int1, %int1 : !torch.vtensor<[5120],f16>, !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,5120],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,5120],f16>
  %3494 = torch.aten.view %3493, %870 : !torch.vtensor<[4608,5120],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,5120],f16>
  %3495 = torch.aten.slice.Tensor %3494, %int-1, %int0, %int2560, %int1 : !torch.vtensor<[2,2304,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,2304,2560],f16>
  %3496 = torch.aten.slice.Tensor %3494, %int-1, %int2560, %int5120, %int1 : !torch.vtensor<[2,2304,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,2304,2560],f16>
  %3497 = torch.aten.gelu %3496, %str : !torch.vtensor<[2,2304,2560],f16>, !torch.str -> !torch.vtensor<[2,2304,2560],f16>
  %3498 = torch.aten.mul.Tensor %3495, %3497 : !torch.vtensor<[2,2304,2560],f16>, !torch.vtensor<[2,2304,2560],f16> -> !torch.vtensor<[2,2304,2560],f16>
  %3499 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
  %3500 = torch.aten._to_copy %33, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,2560],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,2560],f16>
  %3501 = torch.aten.t %3500 : !torch.vtensor<[640,2560],f16> -> !torch.vtensor<[2560,640],f16>
  %3502 = torch.aten.view %3498, %879 : !torch.vtensor<[2,2304,2560],f16>, !torch.list<int> -> !torch.vtensor<[4608,2560],f16>
  %3503 = torch.aten.addmm %3499, %3502, %3501, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[4608,2560],f16>, !torch.vtensor<[2560,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,640],f16>
  %3504 = torch.aten.view %3503, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
  %3505 = torch.aten.add.Tensor %3504, %3486, %int1 : !torch.vtensor<[2,2304,640],f16>, !torch.vtensor<[2,2304,640],f16>, !torch.int -> !torch.vtensor<[2,2304,640],f16>
  %3506 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
  %3507 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
  %3508 = torch.aten.t %3507 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %3509 = torch.aten.view %3505, %699 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
  %3510 = torch.aten.addmm %3506, %3509, %3508, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,640],f16>
  %3511 = torch.aten.view %3510, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
  %3512 = torch.aten._reshape_alias %3511, %890, %891 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,48,48,640],f16>
  %3513 = torch.aten.permute %3512, %206 : !torch.vtensor<[2,48,48,640],f16>, !torch.list<int> -> !torch.vtensor<[2,640,48,48],f16>
  %3514 = torch.aten._reshape_alias %3513, %654, %894 : !torch.vtensor<[2,640,48,48],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,640,48,48],f16>
  %3515 = torch.aten.clone %3514, %int0 : !torch.vtensor<[2,640,48,48],f16>, !torch.int -> !torch.vtensor<[2,640,48,48],f16>
  %3516 = torch.aten.add.Tensor %3515, %3322, %int1 : !torch.vtensor<[2,640,48,48],f16>, !torch.vtensor<[2,640,48,48],f16>, !torch.int -> !torch.vtensor<[2,640,48,48],f16>
  %3517 = torch.prim.ListConstruct %3516, %614 : (!torch.vtensor<[2,640,48,48],f16>, !torch.vtensor<[2,320,48,48],f16>) -> !torch.list<vtensor>
  %3518 = torch.aten.cat %3517, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,960,48,48],f16>
  %3519 = torch.aten._to_copy %3518, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,960,48,48],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,960,48,48],f32>
  %3520 = torch.prim.ListConstruct %int2, %int32, %int30, %int2304 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %3521 = torch.prim.ListConstruct %int2211840, %int69120, %int2304, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %3522 = torch.aten._reshape_alias %3519, %3520, %3521 : !torch.vtensor<[2,960,48,48],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,30,2304],f32>
  %result0_201, %result1_202 = torch.aten.var_mean.correction %3522, %85, %int0, %true : !torch.vtensor<[2,32,30,2304],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %3523 = torch.aten.add.Tensor %result0_201, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %3524 = torch.aten.rsqrt %3523 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %3525 = torch.aten.sub.Tensor %3522, %result1_202, %int1 : !torch.vtensor<[2,32,30,2304],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,30,2304],f32>
  %3526 = torch.aten.mul.Tensor %3525, %3524 : !torch.vtensor<[2,32,30,2304],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,30,2304],f32>
  %3527 = torch.prim.ListConstruct %int2, %int960, %int48, %int48 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %3528 = torch.aten.view %3526, %3527 : !torch.vtensor<[2,32,30,2304],f32>, !torch.list<int> -> !torch.vtensor<[2,960,48,48],f32>
  %3529 = torch.aten.unsqueeze %36, %int0 : !torch.vtensor<[960],f32>, !torch.int -> !torch.vtensor<[1,960],f32>
  %3530 = torch.aten.unsqueeze %3529, %int2 : !torch.vtensor<[1,960],f32>, !torch.int -> !torch.vtensor<[1,960,1],f32>
  %3531 = torch.aten.unsqueeze %3530, %int3 : !torch.vtensor<[1,960,1],f32>, !torch.int -> !torch.vtensor<[1,960,1,1],f32>
  %3532 = torch.aten.unsqueeze %36, %int0 : !torch.vtensor<[960],f32>, !torch.int -> !torch.vtensor<[1,960],f32>
  %3533 = torch.aten.unsqueeze %3532, %int2 : !torch.vtensor<[1,960],f32>, !torch.int -> !torch.vtensor<[1,960,1],f32>
  %3534 = torch.aten.unsqueeze %3533, %int3 : !torch.vtensor<[1,960,1],f32>, !torch.int -> !torch.vtensor<[1,960,1,1],f32>
  %3535 = torch.aten.mul.Tensor %3528, %3534 : !torch.vtensor<[2,960,48,48],f32>, !torch.vtensor<[1,960,1,1],f32> -> !torch.vtensor<[2,960,48,48],f32>
  %3536 = torch.aten.add.Tensor %3535, %3531, %int1 : !torch.vtensor<[2,960,48,48],f32>, !torch.vtensor<[1,960,1,1],f32>, !torch.int -> !torch.vtensor<[2,960,48,48],f32>
  %3537 = torch.aten._to_copy %3536, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,960,48,48],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,960,48,48],f16>
  %3538 = torch.aten.silu %3537 : !torch.vtensor<[2,960,48,48],f16> -> !torch.vtensor<[2,960,48,48],f16>
  %3539 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
  %3540 = torch.aten._to_copy %27, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,960,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,960,3,3],f16>
  %3541 = torch.aten._convolution %3538, %3540, %3539, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,960,48,48],f16>, !torch.vtensor<[640,960,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,48,48],f16>
  %3542 = torch.aten.silu %74 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
  %3543 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
  %3544 = torch.aten._to_copy %28, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,1280],f16>
  %3545 = torch.aten.t %3544 : !torch.vtensor<[640,1280],f16> -> !torch.vtensor<[1280,640],f16>
  %3546 = torch.aten.addmm %3543, %3542, %3545, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,640],f16>
  %3547 = torch.aten.unsqueeze %3546, %int2 : !torch.vtensor<[2,640],f16>, !torch.int -> !torch.vtensor<[2,640,1],f16>
  %3548 = torch.aten.unsqueeze %3547, %int3 : !torch.vtensor<[2,640,1],f16>, !torch.int -> !torch.vtensor<[2,640,1,1],f16>
  %3549 = torch.aten.add.Tensor %3541, %3548, %int1 : !torch.vtensor<[2,640,48,48],f16>, !torch.vtensor<[2,640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,48,48],f16>
  %3550 = torch.aten._to_copy %3549, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,640,48,48],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,48,48],f32>
  %3551 = torch.aten._reshape_alias %3550, %647, %648 : !torch.vtensor<[2,640,48,48],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,20,2304],f32>
  %result0_203, %result1_204 = torch.aten.var_mean.correction %3551, %85, %int0, %true : !torch.vtensor<[2,32,20,2304],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %3552 = torch.aten.add.Tensor %result0_203, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %3553 = torch.aten.rsqrt %3552 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %3554 = torch.aten.sub.Tensor %3551, %result1_204, %int1 : !torch.vtensor<[2,32,20,2304],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,2304],f32>
  %3555 = torch.aten.mul.Tensor %3554, %3553 : !torch.vtensor<[2,32,20,2304],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,2304],f32>
  %3556 = torch.aten.view %3555, %654 : !torch.vtensor<[2,32,20,2304],f32>, !torch.list<int> -> !torch.vtensor<[2,640,48,48],f32>
  %3557 = torch.aten.unsqueeze %39, %int0 : !torch.vtensor<[640],f32>, !torch.int -> !torch.vtensor<[1,640],f32>
  %3558 = torch.aten.unsqueeze %3557, %int2 : !torch.vtensor<[1,640],f32>, !torch.int -> !torch.vtensor<[1,640,1],f32>
  %3559 = torch.aten.unsqueeze %3558, %int3 : !torch.vtensor<[1,640,1],f32>, !torch.int -> !torch.vtensor<[1,640,1,1],f32>
  %3560 = torch.aten.unsqueeze %39, %int0 : !torch.vtensor<[640],f32>, !torch.int -> !torch.vtensor<[1,640],f32>
  %3561 = torch.aten.unsqueeze %3560, %int2 : !torch.vtensor<[1,640],f32>, !torch.int -> !torch.vtensor<[1,640,1],f32>
  %3562 = torch.aten.unsqueeze %3561, %int3 : !torch.vtensor<[1,640,1],f32>, !torch.int -> !torch.vtensor<[1,640,1,1],f32>
  %3563 = torch.aten.mul.Tensor %3556, %3562 : !torch.vtensor<[2,640,48,48],f32>, !torch.vtensor<[1,640,1,1],f32> -> !torch.vtensor<[2,640,48,48],f32>
  %3564 = torch.aten.add.Tensor %3563, %3559, %int1 : !torch.vtensor<[2,640,48,48],f32>, !torch.vtensor<[1,640,1,1],f32>, !torch.int -> !torch.vtensor<[2,640,48,48],f32>
  %3565 = torch.aten._to_copy %3564, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,640,48,48],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,48,48],f16>
  %3566 = torch.aten.silu %3565 : !torch.vtensor<[2,640,48,48],f16> -> !torch.vtensor<[2,640,48,48],f16>
  %3567 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
  %3568 = torch.aten._to_copy %35, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640,3,3],f16>
  %3569 = torch.aten._convolution %3566, %3568, %3567, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,48,48],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,48,48],f16>
  %3570 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
  %3571 = torch.aten._to_copy %29, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,960,1,1],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,960,1,1],f16>
  %3572 = torch.aten._convolution %3518, %3571, %3570, %78, %79, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,960,48,48],f16>, !torch.vtensor<[640,960,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,48,48],f16>
  %3573 = torch.aten.add.Tensor %3572, %3569, %int1 : !torch.vtensor<[2,640,48,48],f16>, !torch.vtensor<[2,640,48,48],f16>, !torch.int -> !torch.vtensor<[2,640,48,48],f16>
  %3574 = torch.aten._to_copy %3573, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,640,48,48],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,48,48],f32>
  %3575 = torch.aten._reshape_alias %3574, %647, %648 : !torch.vtensor<[2,640,48,48],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,20,2304],f32>
  %result0_205, %result1_206 = torch.aten.var_mean.correction %3575, %85, %int0, %true : !torch.vtensor<[2,32,20,2304],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %3576 = torch.aten.add.Tensor %result0_205, %1, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %3577 = torch.aten.rsqrt %3576 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %3578 = torch.aten.sub.Tensor %3575, %result1_206, %int1 : !torch.vtensor<[2,32,20,2304],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,2304],f32>
  %3579 = torch.aten.mul.Tensor %3578, %3577 : !torch.vtensor<[2,32,20,2304],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,2304],f32>
  %3580 = torch.aten.view %3579, %654 : !torch.vtensor<[2,32,20,2304],f32>, !torch.list<int> -> !torch.vtensor<[2,640,48,48],f32>
  %3581 = torch.aten.unsqueeze %39, %int0 : !torch.vtensor<[640],f32>, !torch.int -> !torch.vtensor<[1,640],f32>
  %3582 = torch.aten.unsqueeze %3581, %int2 : !torch.vtensor<[1,640],f32>, !torch.int -> !torch.vtensor<[1,640,1],f32>
  %3583 = torch.aten.unsqueeze %3582, %int3 : !torch.vtensor<[1,640,1],f32>, !torch.int -> !torch.vtensor<[1,640,1,1],f32>
  %3584 = torch.aten.unsqueeze %39, %int0 : !torch.vtensor<[640],f32>, !torch.int -> !torch.vtensor<[1,640],f32>
  %3585 = torch.aten.unsqueeze %3584, %int2 : !torch.vtensor<[1,640],f32>, !torch.int -> !torch.vtensor<[1,640,1],f32>
  %3586 = torch.aten.unsqueeze %3585, %int3 : !torch.vtensor<[1,640,1],f32>, !torch.int -> !torch.vtensor<[1,640,1,1],f32>
  %3587 = torch.aten.mul.Tensor %3580, %3586 : !torch.vtensor<[2,640,48,48],f32>, !torch.vtensor<[1,640,1,1],f32> -> !torch.vtensor<[2,640,48,48],f32>
  %3588 = torch.aten.add.Tensor %3587, %3583, %int1 : !torch.vtensor<[2,640,48,48],f32>, !torch.vtensor<[1,640,1,1],f32>, !torch.int -> !torch.vtensor<[2,640,48,48],f32>
  %3589 = torch.aten._reshape_alias %3588, %654, %688 : !torch.vtensor<[2,640,48,48],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,640,48,48],f32>
  %3590 = torch.aten.permute %3589, %151 : !torch.vtensor<[2,640,48,48],f32>, !torch.list<int> -> !torch.vtensor<[2,48,48,640],f32>
  %3591 = torch.aten._reshape_alias %3590, %691, %692 : !torch.vtensor<[2,48,48,640],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f32>
  %3592 = torch.aten.clone %3591, %int0 : !torch.vtensor<[2,2304,640],f32>, !torch.int -> !torch.vtensor<[2,2304,640],f32>
  %3593 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
  %3594 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
  %3595 = torch.aten._to_copy %3592, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
  %3596 = torch.aten.t %3594 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %3597 = torch.aten.view %3595, %699 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
  %3598 = torch.aten.addmm %3593, %3597, %3596, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,640],f16>
  %3599 = torch.aten.view %3598, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
  %3600 = torch.aten._to_copy %3599, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f32>
  %result0_207, %result1_208, %result2_209 = torch.aten.native_layer_norm %3600, %704, %39, %39, %float1.000000e-05 : !torch.vtensor<[2,2304,640],f32>, !torch.list<int>, !torch.vtensor<[640],f32>, !torch.vtensor<[640],f32>, !torch.float -> !torch.vtensor<[2,2304,640],f32>, !torch.vtensor<[2,2304,1],f32>, !torch.vtensor<[2,2304,1],f32>
  %3601 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
  %3602 = torch.aten._to_copy %result0_207, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
  %3603 = torch.aten.t %3601 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %3604 = torch.aten._reshape_alias %3602, %699, %708 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
  %3605 = torch.aten.mm %3604, %3603 : !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[4608,640],f16>
  %3606 = torch.aten._unsafe_view %3605, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
  %3607 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
  %3608 = torch.aten._to_copy %result0_207, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
  %3609 = torch.aten.t %3607 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %3610 = torch.aten._reshape_alias %3608, %699, %708 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
  %3611 = torch.aten.mm %3610, %3609 : !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[4608,640],f16>
  %3612 = torch.aten._unsafe_view %3611, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
  %3613 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
  %3614 = torch.aten._to_copy %result0_207, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
  %3615 = torch.aten.t %3613 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %3616 = torch.aten._reshape_alias %3614, %699, %708 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
  %3617 = torch.aten.mm %3616, %3615 : !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[4608,640],f16>
  %3618 = torch.aten._unsafe_view %3617, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
  %3619 = torch.aten._reshape_alias %3606, %724, %725 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
  %3620 = torch.aten.permute %3619, %189 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
  %3621 = torch.aten.clone %3620, %int0 : !torch.vtensor<[2,10,2304,64],f16>, !torch.int -> !torch.vtensor<[2,10,2304,64],f16>
  %3622 = torch.aten._unsafe_view %3621, %729 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
  %3623 = torch.aten._reshape_alias %3612, %724, %725 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
  %3624 = torch.aten.permute %3623, %189 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
  %3625 = torch.aten.clone %3624, %int0 : !torch.vtensor<[2,10,2304,64],f16>, !torch.int -> !torch.vtensor<[2,10,2304,64],f16>
  %3626 = torch.aten._unsafe_view %3625, %729 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
  %3627 = torch.aten._reshape_alias %3618, %724, %725 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
  %3628 = torch.aten.permute %3627, %189 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
  %3629 = torch.aten.clone %3628, %int0 : !torch.vtensor<[2,10,2304,64],f16>, !torch.int -> !torch.vtensor<[2,10,2304,64],f16>
  %3630 = torch.aten._unsafe_view %3629, %729 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
  %3631 = torch.aten.unsqueeze %3622, %int3 : !torch.vtensor<[20,2304,64],f16>, !torch.int -> !torch.vtensor<[20,2304,64,1],f16>
  %3632 = torch.aten.permute %3631, %203 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,64],f16>
  %3633 = torch.aten.unsqueeze %3626, %int3 : !torch.vtensor<[20,2304,64],f16>, !torch.int -> !torch.vtensor<[20,2304,64,1],f16>
  %3634 = torch.aten.permute %3633, %206 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,1,2304,64],f16>
  %3635 = torch.aten.permute %3632, %203 : !torch.vtensor<[20,2304,1,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64,1],f16>
  %3636 = torch.aten._reshape_alias %3635, %729, %744 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
  %3637 = torch.aten.permute %3634, %211 : !torch.vtensor<[20,1,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,64,2304,1],f16>
  %3638 = torch.aten._reshape_alias %3637, %747, %748 : !torch.vtensor<[20,64,2304,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,64,2304],f16>
  %3639 = torch.aten.bmm %3636, %3638 : !torch.vtensor<[20,2304,64],f16>, !torch.vtensor<[20,64,2304],f16> -> !torch.vtensor<[20,2304,2304],f16>
  %3640 = torch.aten.view %3639, %751 : !torch.vtensor<[20,2304,2304],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,2304],f16>
  %3641 = torch.aten.permute %3640, %203 : !torch.vtensor<[20,2304,1,2304],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,2304,1],f16>
  %3642 = torch.aten.view %3641, %754 : !torch.vtensor<[20,2304,2304,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,2304],f16>
  %3643 = torch.aten.mul.Tensor %3642, %0 : !torch.vtensor<[20,2304,2304],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[20,2304,2304],f16>
  %3644 = torch.aten._softmax %3643, %int-1, %true : !torch.vtensor<[20,2304,2304],f16>, !torch.int, !torch.bool -> !torch.vtensor<[20,2304,2304],f32>
  %3645 = torch.aten._to_copy %3644, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[20,2304,2304],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[20,2304,2304],f16>
  %3646 = torch.aten.unsqueeze %3645, %int3 : !torch.vtensor<[20,2304,2304],f16>, !torch.int -> !torch.vtensor<[20,2304,2304,1],f16>
  %3647 = torch.aten.permute %3646, %203 : !torch.vtensor<[20,2304,2304,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,2304],f16>
  %3648 = torch.aten.unsqueeze %3630, %int3 : !torch.vtensor<[20,2304,64],f16>, !torch.int -> !torch.vtensor<[20,2304,64,1],f16>
  %3649 = torch.aten.permute %3648, %211 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,1,64,2304],f16>
  %3650 = torch.aten.permute %3647, %203 : !torch.vtensor<[20,2304,1,2304],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,2304,1],f16>
  %3651 = torch.aten._reshape_alias %3650, %754, %764 : !torch.vtensor<[20,2304,2304,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,2304,2304],f16>
  %3652 = torch.aten.permute %3649, %211 : !torch.vtensor<[20,1,64,2304],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64,1],f16>
  %3653 = torch.aten._reshape_alias %3652, %729, %744 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
  %3654 = torch.aten.bmm %3651, %3653 : !torch.vtensor<[20,2304,2304],f16>, !torch.vtensor<[20,2304,64],f16> -> !torch.vtensor<[20,2304,64],f16>
  %3655 = torch.aten.view %3654, %769 : !torch.vtensor<[20,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,64],f16>
  %3656 = torch.aten.permute %3655, %203 : !torch.vtensor<[20,2304,1,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64,1],f16>
  %3657 = torch.aten.view %3656, %729 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
  %3658 = torch.aten._reshape_alias %3657, %773, %774 : !torch.vtensor<[20,2304,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
  %3659 = torch.aten.permute %3658, %189 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
  %3660 = torch.aten.clone %3659, %int0 : !torch.vtensor<[2,2304,10,64],f16>, !torch.int -> !torch.vtensor<[2,2304,10,64],f16>
  %3661 = torch.aten._unsafe_view %3660, %691 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
  %3662 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
  %3663 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
  %3664 = torch.aten.t %3663 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %3665 = torch.aten.view %3661, %699 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
  %3666 = torch.aten.addmm %3662, %3665, %3664, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,640],f16>
  %3667 = torch.aten.view %3666, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
  %3668 = torch.aten.add.Tensor %3667, %3599, %int1 : !torch.vtensor<[2,2304,640],f16>, !torch.vtensor<[2,2304,640],f16>, !torch.int -> !torch.vtensor<[2,2304,640],f16>
  %3669 = torch.aten._to_copy %3668, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f32>
  %result0_210, %result1_211, %result2_212 = torch.aten.native_layer_norm %3669, %704, %39, %39, %float1.000000e-05 : !torch.vtensor<[2,2304,640],f32>, !torch.list<int>, !torch.vtensor<[640],f32>, !torch.vtensor<[640],f32>, !torch.float -> !torch.vtensor<[2,2304,640],f32>, !torch.vtensor<[2,2304,1],f32>, !torch.vtensor<[2,2304,1],f32>
  %3670 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
  %3671 = torch.aten._to_copy %result0_210, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
  %3672 = torch.aten.t %3670 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %3673 = torch.aten._reshape_alias %3671, %699, %708 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
  %3674 = torch.aten.mm %3673, %3672 : !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[4608,640],f16>
  %3675 = torch.aten._unsafe_view %3674, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
  %3676 = torch.aten._to_copy %30, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,1024],f16>
  %3677 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
  %3678 = torch.aten.t %3676 : !torch.vtensor<[640,1024],f16> -> !torch.vtensor<[1024,640],f16>
  %3679 = torch.aten._reshape_alias %3677, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
  %3680 = torch.aten.mm %3679, %3678 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,640],f16> -> !torch.vtensor<[154,640],f16>
  %3681 = torch.aten._unsafe_view %3680, %798 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
  %3682 = torch.aten._to_copy %30, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,1024],f16>
  %3683 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
  %3684 = torch.aten.t %3682 : !torch.vtensor<[640,1024],f16> -> !torch.vtensor<[1024,640],f16>
  %3685 = torch.aten._reshape_alias %3683, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
  %3686 = torch.aten.mm %3685, %3684 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,640],f16> -> !torch.vtensor<[154,640],f16>
  %3687 = torch.aten._unsafe_view %3686, %798 : !torch.vtensor<[154,640],f16>, !torch.list<int> -> !torch.vtensor<[2,77,640],f16>
  %3688 = torch.aten._reshape_alias %3675, %724, %725 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
  %3689 = torch.aten.permute %3688, %189 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
  %3690 = torch.aten.clone %3689, %int0 : !torch.vtensor<[2,10,2304,64],f16>, !torch.int -> !torch.vtensor<[2,10,2304,64],f16>
  %3691 = torch.aten._unsafe_view %3690, %729 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
  %3692 = torch.aten._reshape_alias %3681, %810, %811 : !torch.vtensor<[2,77,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,10,64],f16>
  %3693 = torch.aten.permute %3692, %189 : !torch.vtensor<[2,77,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,77,64],f16>
  %3694 = torch.aten.clone %3693, %int0 : !torch.vtensor<[2,10,77,64],f16>, !torch.int -> !torch.vtensor<[2,10,77,64],f16>
  %3695 = torch.aten._unsafe_view %3694, %815 : !torch.vtensor<[2,10,77,64],f16>, !torch.list<int> -> !torch.vtensor<[20,77,64],f16>
  %3696 = torch.aten._reshape_alias %3687, %810, %811 : !torch.vtensor<[2,77,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,10,64],f16>
  %3697 = torch.aten.permute %3696, %189 : !torch.vtensor<[2,77,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,77,64],f16>
  %3698 = torch.aten.clone %3697, %int0 : !torch.vtensor<[2,10,77,64],f16>, !torch.int -> !torch.vtensor<[2,10,77,64],f16>
  %3699 = torch.aten._unsafe_view %3698, %815 : !torch.vtensor<[2,10,77,64],f16>, !torch.list<int> -> !torch.vtensor<[20,77,64],f16>
  %3700 = torch.aten.unsqueeze %3691, %int3 : !torch.vtensor<[20,2304,64],f16>, !torch.int -> !torch.vtensor<[20,2304,64,1],f16>
  %3701 = torch.aten.permute %3700, %203 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,64],f16>
  %3702 = torch.aten.unsqueeze %3695, %int3 : !torch.vtensor<[20,77,64],f16>, !torch.int -> !torch.vtensor<[20,77,64,1],f16>
  %3703 = torch.aten.permute %3702, %206 : !torch.vtensor<[20,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,1,77,64],f16>
  %3704 = torch.aten.permute %3701, %203 : !torch.vtensor<[20,2304,1,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64,1],f16>
  %3705 = torch.aten._reshape_alias %3704, %729, %744 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
  %3706 = torch.aten.permute %3703, %211 : !torch.vtensor<[20,1,77,64],f16>, !torch.list<int> -> !torch.vtensor<[20,64,77,1],f16>
  %3707 = torch.aten._reshape_alias %3706, %828, %297 : !torch.vtensor<[20,64,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,64,77],f16>
  %3708 = torch.aten.bmm %3705, %3707 : !torch.vtensor<[20,2304,64],f16>, !torch.vtensor<[20,64,77],f16> -> !torch.vtensor<[20,2304,77],f16>
  %3709 = torch.aten.view %3708, %831 : !torch.vtensor<[20,2304,77],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,77],f16>
  %3710 = torch.aten.permute %3709, %203 : !torch.vtensor<[20,2304,1,77],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,77,1],f16>
  %3711 = torch.aten.view %3710, %834 : !torch.vtensor<[20,2304,77,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,77],f16>
  %3712 = torch.aten.mul.Tensor %3711, %0 : !torch.vtensor<[20,2304,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[20,2304,77],f16>
  %3713 = torch.aten._softmax %3712, %int-1, %true : !torch.vtensor<[20,2304,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[20,2304,77],f32>
  %3714 = torch.aten._to_copy %3713, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[20,2304,77],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[20,2304,77],f16>
  %3715 = torch.aten.unsqueeze %3714, %int3 : !torch.vtensor<[20,2304,77],f16>, !torch.int -> !torch.vtensor<[20,2304,77,1],f16>
  %3716 = torch.aten.permute %3715, %203 : !torch.vtensor<[20,2304,77,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,77],f16>
  %3717 = torch.aten.unsqueeze %3699, %int3 : !torch.vtensor<[20,77,64],f16>, !torch.int -> !torch.vtensor<[20,77,64,1],f16>
  %3718 = torch.aten.permute %3717, %211 : !torch.vtensor<[20,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,1,64,77],f16>
  %3719 = torch.aten.permute %3716, %203 : !torch.vtensor<[20,2304,1,77],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,77,1],f16>
  %3720 = torch.aten._reshape_alias %3719, %834, %844 : !torch.vtensor<[20,2304,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,2304,77],f16>
  %3721 = torch.aten.permute %3718, %211 : !torch.vtensor<[20,1,64,77],f16>, !torch.list<int> -> !torch.vtensor<[20,77,64,1],f16>
  %3722 = torch.aten._reshape_alias %3721, %815, %316 : !torch.vtensor<[20,77,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[20,77,64],f16>
  %3723 = torch.aten.bmm %3720, %3722 : !torch.vtensor<[20,2304,77],f16>, !torch.vtensor<[20,77,64],f16> -> !torch.vtensor<[20,2304,64],f16>
  %3724 = torch.aten.view %3723, %769 : !torch.vtensor<[20,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,1,64],f16>
  %3725 = torch.aten.permute %3724, %203 : !torch.vtensor<[20,2304,1,64],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64,1],f16>
  %3726 = torch.aten.view %3725, %729 : !torch.vtensor<[20,2304,64,1],f16>, !torch.list<int> -> !torch.vtensor<[20,2304,64],f16>
  %3727 = torch.aten._reshape_alias %3726, %773, %774 : !torch.vtensor<[20,2304,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,10,2304,64],f16>
  %3728 = torch.aten.permute %3727, %189 : !torch.vtensor<[2,10,2304,64],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,10,64],f16>
  %3729 = torch.aten.clone %3728, %int0 : !torch.vtensor<[2,2304,10,64],f16>, !torch.int -> !torch.vtensor<[2,2304,10,64],f16>
  %3730 = torch.aten._unsafe_view %3729, %691 : !torch.vtensor<[2,2304,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
  %3731 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
  %3732 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
  %3733 = torch.aten.t %3732 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %3734 = torch.aten.view %3730, %699 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
  %3735 = torch.aten.addmm %3731, %3734, %3733, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,640],f16>
  %3736 = torch.aten.view %3735, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
  %3737 = torch.aten.add.Tensor %3736, %3668, %int1 : !torch.vtensor<[2,2304,640],f16>, !torch.vtensor<[2,2304,640],f16>, !torch.int -> !torch.vtensor<[2,2304,640],f16>
  %3738 = torch.aten._to_copy %3737, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f32>
  %result0_213, %result1_214, %result2_215 = torch.aten.native_layer_norm %3738, %704, %39, %39, %float1.000000e-05 : !torch.vtensor<[2,2304,640],f32>, !torch.list<int>, !torch.vtensor<[640],f32>, !torch.vtensor<[640],f32>, !torch.float -> !torch.vtensor<[2,2304,640],f32>, !torch.vtensor<[2,2304,1],f32>, !torch.vtensor<[2,2304,1],f32>
  %3739 = torch.aten._to_copy %31, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[5120],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[5120],f16>
  %3740 = torch.aten._to_copy %32, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[5120,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[5120,640],f16>
  %3741 = torch.aten._to_copy %result0_213, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,2304,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,2304,640],f16>
  %3742 = torch.aten.t %3740 : !torch.vtensor<[5120,640],f16> -> !torch.vtensor<[640,5120],f16>
  %3743 = torch.aten.view %3741, %699 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
  %3744 = torch.aten.addmm %3739, %3743, %3742, %int1, %int1 : !torch.vtensor<[5120],f16>, !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,5120],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,5120],f16>
  %3745 = torch.aten.view %3744, %870 : !torch.vtensor<[4608,5120],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,5120],f16>
  %3746 = torch.aten.slice.Tensor %3745, %int-1, %int0, %int2560, %int1 : !torch.vtensor<[2,2304,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,2304,2560],f16>
  %3747 = torch.aten.slice.Tensor %3745, %int-1, %int2560, %int5120, %int1 : !torch.vtensor<[2,2304,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,2304,2560],f16>
  %3748 = torch.aten.gelu %3747, %str : !torch.vtensor<[2,2304,2560],f16>, !torch.str -> !torch.vtensor<[2,2304,2560],f16>
  %3749 = torch.aten.mul.Tensor %3746, %3748 : !torch.vtensor<[2,2304,2560],f16>, !torch.vtensor<[2,2304,2560],f16> -> !torch.vtensor<[2,2304,2560],f16>
  %3750 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
  %3751 = torch.aten._to_copy %33, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,2560],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,2560],f16>
  %3752 = torch.aten.t %3751 : !torch.vtensor<[640,2560],f16> -> !torch.vtensor<[2560,640],f16>
  %3753 = torch.aten.view %3749, %879 : !torch.vtensor<[2,2304,2560],f16>, !torch.list<int> -> !torch.vtensor<[4608,2560],f16>
  %3754 = torch.aten.addmm %3750, %3753, %3752, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[4608,2560],f16>, !torch.vtensor<[2560,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,640],f16>
  %3755 = torch.aten.view %3754, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
  %3756 = torch.aten.add.Tensor %3755, %3737, %int1 : !torch.vtensor<[2,2304,640],f16>, !torch.vtensor<[2,2304,640],f16>, !torch.int -> !torch.vtensor<[2,2304,640],f16>
  %3757 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
  %3758 = torch.aten._to_copy %34, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640],f16>
  %3759 = torch.aten.t %3758 : !torch.vtensor<[640,640],f16> -> !torch.vtensor<[640,640],f16>
  %3760 = torch.aten.view %3756, %699 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int> -> !torch.vtensor<[4608,640],f16>
  %3761 = torch.aten.addmm %3757, %3760, %3759, %int1, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[4608,640],f16>, !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[4608,640],f16>
  %3762 = torch.aten.view %3761, %691 : !torch.vtensor<[4608,640],f16>, !torch.list<int> -> !torch.vtensor<[2,2304,640],f16>
  %3763 = torch.aten._reshape_alias %3762, %890, %891 : !torch.vtensor<[2,2304,640],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,48,48,640],f16>
  %3764 = torch.aten.permute %3763, %206 : !torch.vtensor<[2,48,48,640],f16>, !torch.list<int> -> !torch.vtensor<[2,640,48,48],f16>
  %3765 = torch.aten._reshape_alias %3764, %654, %894 : !torch.vtensor<[2,640,48,48],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,640,48,48],f16>
  %3766 = torch.aten.clone %3765, %int0 : !torch.vtensor<[2,640,48,48],f16>, !torch.int -> !torch.vtensor<[2,640,48,48],f16>
  %3767 = torch.aten.add.Tensor %3766, %3573, %int1 : !torch.vtensor<[2,640,48,48],f16>, !torch.vtensor<[2,640,48,48],f16>, !torch.int -> !torch.vtensor<[2,640,48,48],f16>
  %3768 = torch.prim.ListConstruct %int96, %int96 : (!torch.int, !torch.int) -> !torch.list<int>
  %3769 = torch.aten.upsample_nearest2d %3767, %3768, %float2.000000e00, %float2.000000e00 : !torch.vtensor<[2,640,48,48],f16>, !torch.list<int>, !torch.float, !torch.float -> !torch.vtensor<[2,640,96,96],f16>
  %3770 = torch.aten._to_copy %39, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640],f16>
  %3771 = torch.aten._to_copy %35, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[640,640,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[640,640,3,3],f16>
  %3772 = torch.aten._convolution %3769, %3771, %3770, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,96,96],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,640,96,96],f16>
  %3773 = torch.prim.ListConstruct %3772, %610 : (!torch.vtensor<[2,640,96,96],f16>, !torch.vtensor<[2,320,96,96],f16>) -> !torch.list<vtensor>
  %3774 = torch.aten.cat %3773, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,960,96,96],f16>
  %3775 = torch.aten._to_copy %3774, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,960,96,96],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,960,96,96],f32>
  %3776 = torch.prim.ListConstruct %int2, %int32, %int30, %int9216 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %3777 = torch.prim.ListConstruct %int8847360, %int276480, %int9216, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %3778 = torch.aten._reshape_alias %3775, %3776, %3777 : !torch.vtensor<[2,960,96,96],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,30,9216],f32>
  %result0_216, %result1_217 = torch.aten.var_mean.correction %3778, %85, %int0, %true : !torch.vtensor<[2,32,30,9216],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %3779 = torch.aten.add.Tensor %result0_216, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %3780 = torch.aten.rsqrt %3779 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %3781 = torch.aten.sub.Tensor %3778, %result1_217, %int1 : !torch.vtensor<[2,32,30,9216],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,30,9216],f32>
  %3782 = torch.aten.mul.Tensor %3781, %3780 : !torch.vtensor<[2,32,30,9216],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,30,9216],f32>
  %3783 = torch.prim.ListConstruct %int2, %int960, %int96, %int96 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %3784 = torch.aten.view %3782, %3783 : !torch.vtensor<[2,32,30,9216],f32>, !torch.list<int> -> !torch.vtensor<[2,960,96,96],f32>
  %3785 = torch.aten.unsqueeze %36, %int0 : !torch.vtensor<[960],f32>, !torch.int -> !torch.vtensor<[1,960],f32>
  %3786 = torch.aten.unsqueeze %3785, %int2 : !torch.vtensor<[1,960],f32>, !torch.int -> !torch.vtensor<[1,960,1],f32>
  %3787 = torch.aten.unsqueeze %3786, %int3 : !torch.vtensor<[1,960,1],f32>, !torch.int -> !torch.vtensor<[1,960,1,1],f32>
  %3788 = torch.aten.unsqueeze %36, %int0 : !torch.vtensor<[960],f32>, !torch.int -> !torch.vtensor<[1,960],f32>
  %3789 = torch.aten.unsqueeze %3788, %int2 : !torch.vtensor<[1,960],f32>, !torch.int -> !torch.vtensor<[1,960,1],f32>
  %3790 = torch.aten.unsqueeze %3789, %int3 : !torch.vtensor<[1,960,1],f32>, !torch.int -> !torch.vtensor<[1,960,1,1],f32>
  %3791 = torch.aten.mul.Tensor %3784, %3790 : !torch.vtensor<[2,960,96,96],f32>, !torch.vtensor<[1,960,1,1],f32> -> !torch.vtensor<[2,960,96,96],f32>
  %3792 = torch.aten.add.Tensor %3791, %3787, %int1 : !torch.vtensor<[2,960,96,96],f32>, !torch.vtensor<[1,960,1,1],f32>, !torch.int -> !torch.vtensor<[2,960,96,96],f32>
  %3793 = torch.aten._to_copy %3792, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,960,96,96],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,960,96,96],f16>
  %3794 = torch.aten.silu %3793 : !torch.vtensor<[2,960,96,96],f16> -> !torch.vtensor<[2,960,96,96],f16>
  %3795 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
  %3796 = torch.aten._to_copy %37, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,960,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,960,3,3],f16>
  %3797 = torch.aten._convolution %3794, %3796, %3795, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,960,96,96],f16>, !torch.vtensor<[320,960,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,96,96],f16>
  %3798 = torch.aten.silu %74 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
  %3799 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
  %3800 = torch.aten._to_copy %46, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,1280],f16>
  %3801 = torch.aten.t %3800 : !torch.vtensor<[320,1280],f16> -> !torch.vtensor<[1280,320],f16>
  %3802 = torch.aten.addmm %3799, %3798, %3801, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
  %3803 = torch.aten.unsqueeze %3802, %int2 : !torch.vtensor<[2,320],f16>, !torch.int -> !torch.vtensor<[2,320,1],f16>
  %3804 = torch.aten.unsqueeze %3803, %int3 : !torch.vtensor<[2,320,1],f16>, !torch.int -> !torch.vtensor<[2,320,1,1],f16>
  %3805 = torch.aten.add.Tensor %3797, %3804, %int1 : !torch.vtensor<[2,320,96,96],f16>, !torch.vtensor<[2,320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,96,96],f16>
  %3806 = torch.aten._to_copy %3805, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,320,96,96],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,96,96],f32>
  %3807 = torch.aten._reshape_alias %3806, %82, %83 : !torch.vtensor<[2,320,96,96],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,10,9216],f32>
  %result0_218, %result1_219 = torch.aten.var_mean.correction %3807, %85, %int0, %true : !torch.vtensor<[2,32,10,9216],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %3808 = torch.aten.add.Tensor %result0_218, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %3809 = torch.aten.rsqrt %3808 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %3810 = torch.aten.sub.Tensor %3807, %result1_219, %int1 : !torch.vtensor<[2,32,10,9216],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,9216],f32>
  %3811 = torch.aten.mul.Tensor %3810, %3809 : !torch.vtensor<[2,32,10,9216],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,9216],f32>
  %3812 = torch.aten.view %3811, %90 : !torch.vtensor<[2,32,10,9216],f32>, !torch.list<int> -> !torch.vtensor<[2,320,96,96],f32>
  %3813 = torch.aten.unsqueeze %48, %int0 : !torch.vtensor<[320],f32>, !torch.int -> !torch.vtensor<[1,320],f32>
  %3814 = torch.aten.unsqueeze %3813, %int2 : !torch.vtensor<[1,320],f32>, !torch.int -> !torch.vtensor<[1,320,1],f32>
  %3815 = torch.aten.unsqueeze %3814, %int3 : !torch.vtensor<[1,320,1],f32>, !torch.int -> !torch.vtensor<[1,320,1,1],f32>
  %3816 = torch.aten.unsqueeze %48, %int0 : !torch.vtensor<[320],f32>, !torch.int -> !torch.vtensor<[1,320],f32>
  %3817 = torch.aten.unsqueeze %3816, %int2 : !torch.vtensor<[1,320],f32>, !torch.int -> !torch.vtensor<[1,320,1],f32>
  %3818 = torch.aten.unsqueeze %3817, %int3 : !torch.vtensor<[1,320,1],f32>, !torch.int -> !torch.vtensor<[1,320,1,1],f32>
  %3819 = torch.aten.mul.Tensor %3812, %3818 : !torch.vtensor<[2,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f32> -> !torch.vtensor<[2,320,96,96],f32>
  %3820 = torch.aten.add.Tensor %3819, %3815, %int1 : !torch.vtensor<[2,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f32>, !torch.int -> !torch.vtensor<[2,320,96,96],f32>
  %3821 = torch.aten._to_copy %3820, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,320,96,96],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,96,96],f16>
  %3822 = torch.aten.silu %3821 : !torch.vtensor<[2,320,96,96],f16> -> !torch.vtensor<[2,320,96,96],f16>
  %3823 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
  %3824 = torch.aten._to_copy %41, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320,3,3],f16>
  %3825 = torch.aten._convolution %3822, %3824, %3823, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,96,96],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,96,96],f16>
  %3826 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
  %3827 = torch.aten._to_copy %38, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,960,1,1],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,960,1,1],f16>
  %3828 = torch.aten._convolution %3774, %3827, %3826, %78, %79, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,960,96,96],f16>, !torch.vtensor<[320,960,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,96,96],f16>
  %3829 = torch.aten.add.Tensor %3828, %3825, %int1 : !torch.vtensor<[2,320,96,96],f16>, !torch.vtensor<[2,320,96,96],f16>, !torch.int -> !torch.vtensor<[2,320,96,96],f16>
  %3830 = torch.aten._to_copy %3829, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,320,96,96],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,96,96],f32>
  %3831 = torch.aten._reshape_alias %3830, %82, %83 : !torch.vtensor<[2,320,96,96],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,10,9216],f32>
  %result0_220, %result1_221 = torch.aten.var_mean.correction %3831, %85, %int0, %true : !torch.vtensor<[2,32,10,9216],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %3832 = torch.aten.add.Tensor %result0_220, %1, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %3833 = torch.aten.rsqrt %3832 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %3834 = torch.aten.sub.Tensor %3831, %result1_221, %int1 : !torch.vtensor<[2,32,10,9216],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,9216],f32>
  %3835 = torch.aten.mul.Tensor %3834, %3833 : !torch.vtensor<[2,32,10,9216],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,9216],f32>
  %3836 = torch.aten.view %3835, %90 : !torch.vtensor<[2,32,10,9216],f32>, !torch.list<int> -> !torch.vtensor<[2,320,96,96],f32>
  %3837 = torch.aten.unsqueeze %48, %int0 : !torch.vtensor<[320],f32>, !torch.int -> !torch.vtensor<[1,320],f32>
  %3838 = torch.aten.unsqueeze %3837, %int2 : !torch.vtensor<[1,320],f32>, !torch.int -> !torch.vtensor<[1,320,1],f32>
  %3839 = torch.aten.unsqueeze %3838, %int3 : !torch.vtensor<[1,320,1],f32>, !torch.int -> !torch.vtensor<[1,320,1,1],f32>
  %3840 = torch.aten.unsqueeze %48, %int0 : !torch.vtensor<[320],f32>, !torch.int -> !torch.vtensor<[1,320],f32>
  %3841 = torch.aten.unsqueeze %3840, %int2 : !torch.vtensor<[1,320],f32>, !torch.int -> !torch.vtensor<[1,320,1],f32>
  %3842 = torch.aten.unsqueeze %3841, %int3 : !torch.vtensor<[1,320,1],f32>, !torch.int -> !torch.vtensor<[1,320,1,1],f32>
  %3843 = torch.aten.mul.Tensor %3836, %3842 : !torch.vtensor<[2,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f32> -> !torch.vtensor<[2,320,96,96],f32>
  %3844 = torch.aten.add.Tensor %3843, %3839, %int1 : !torch.vtensor<[2,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f32>, !torch.int -> !torch.vtensor<[2,320,96,96],f32>
  %3845 = torch.aten._reshape_alias %3844, %90, %149 : !torch.vtensor<[2,320,96,96],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,320,96,96],f32>
  %3846 = torch.aten.permute %3845, %151 : !torch.vtensor<[2,320,96,96],f32>, !torch.list<int> -> !torch.vtensor<[2,96,96,320],f32>
  %3847 = torch.aten._reshape_alias %3846, %153, %154 : !torch.vtensor<[2,96,96,320],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f32>
  %3848 = torch.aten.clone %3847, %int0 : !torch.vtensor<[2,9216,320],f32>, !torch.int -> !torch.vtensor<[2,9216,320],f32>
  %3849 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
  %3850 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
  %3851 = torch.aten._to_copy %3848, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
  %3852 = torch.aten.t %3850 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %3853 = torch.aten.view %3851, %161 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
  %3854 = torch.aten.addmm %3849, %3853, %3852, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,320],f16>
  %3855 = torch.aten.view %3854, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
  %3856 = torch.aten._to_copy %3855, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f32>
  %result0_222, %result1_223, %result2_224 = torch.aten.native_layer_norm %3856, %166, %48, %48, %float1.000000e-05 : !torch.vtensor<[2,9216,320],f32>, !torch.list<int>, !torch.vtensor<[320],f32>, !torch.vtensor<[320],f32>, !torch.float -> !torch.vtensor<[2,9216,320],f32>, !torch.vtensor<[2,9216,1],f32>, !torch.vtensor<[2,9216,1],f32>
  %3857 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
  %3858 = torch.aten._to_copy %result0_222, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
  %3859 = torch.aten.t %3857 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %3860 = torch.aten._reshape_alias %3858, %161, %170 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
  %3861 = torch.aten.mm %3860, %3859 : !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[18432,320],f16>
  %3862 = torch.aten._unsafe_view %3861, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
  %3863 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
  %3864 = torch.aten._to_copy %result0_222, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
  %3865 = torch.aten.t %3863 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %3866 = torch.aten._reshape_alias %3864, %161, %170 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
  %3867 = torch.aten.mm %3866, %3865 : !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[18432,320],f16>
  %3868 = torch.aten._unsafe_view %3867, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
  %3869 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
  %3870 = torch.aten._to_copy %result0_222, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
  %3871 = torch.aten.t %3869 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %3872 = torch.aten._reshape_alias %3870, %161, %170 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
  %3873 = torch.aten.mm %3872, %3871 : !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[18432,320],f16>
  %3874 = torch.aten._unsafe_view %3873, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
  %3875 = torch.aten._reshape_alias %3862, %186, %187 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
  %3876 = torch.aten.permute %3875, %189 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
  %3877 = torch.aten.clone %3876, %int0 : !torch.vtensor<[2,5,9216,64],f16>, !torch.int -> !torch.vtensor<[2,5,9216,64],f16>
  %3878 = torch.aten._unsafe_view %3877, %192 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
  %3879 = torch.aten._reshape_alias %3868, %186, %187 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
  %3880 = torch.aten.permute %3879, %189 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
  %3881 = torch.aten.clone %3880, %int0 : !torch.vtensor<[2,5,9216,64],f16>, !torch.int -> !torch.vtensor<[2,5,9216,64],f16>
  %3882 = torch.aten._unsafe_view %3881, %192 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
  %3883 = torch.aten._reshape_alias %3874, %186, %187 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
  %3884 = torch.aten.permute %3883, %189 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
  %3885 = torch.aten.clone %3884, %int0 : !torch.vtensor<[2,5,9216,64],f16>, !torch.int -> !torch.vtensor<[2,5,9216,64],f16>
  %3886 = torch.aten._unsafe_view %3885, %192 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
  %3887 = torch.aten.unsqueeze %3878, %int3 : !torch.vtensor<[10,9216,64],f16>, !torch.int -> !torch.vtensor<[10,9216,64,1],f16>
  %3888 = torch.aten.permute %3887, %203 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,64],f16>
  %3889 = torch.aten.unsqueeze %3882, %int3 : !torch.vtensor<[10,9216,64],f16>, !torch.int -> !torch.vtensor<[10,9216,64,1],f16>
  %3890 = torch.aten.permute %3889, %206 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,1,9216,64],f16>
  %3891 = torch.aten.permute %3888, %203 : !torch.vtensor<[10,9216,1,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64,1],f16>
  %3892 = torch.aten._reshape_alias %3891, %192, %209 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
  %3893 = torch.aten.permute %3890, %211 : !torch.vtensor<[10,1,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,64,9216,1],f16>
  %3894 = torch.aten._reshape_alias %3893, %213, %214 : !torch.vtensor<[10,64,9216,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,64,9216],f16>
  %3895 = torch.aten.bmm %3892, %3894 : !torch.vtensor<[10,9216,64],f16>, !torch.vtensor<[10,64,9216],f16> -> !torch.vtensor<[10,9216,9216],f16>
  %3896 = torch.aten.view %3895, %217 : !torch.vtensor<[10,9216,9216],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,9216],f16>
  %3897 = torch.aten.permute %3896, %203 : !torch.vtensor<[10,9216,1,9216],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,9216,1],f16>
  %3898 = torch.aten.view %3897, %220 : !torch.vtensor<[10,9216,9216,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,9216],f16>
  %3899 = torch.aten.mul.Tensor %3898, %0 : !torch.vtensor<[10,9216,9216],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[10,9216,9216],f16>
  %3900 = torch.aten._softmax %3899, %int-1, %true : !torch.vtensor<[10,9216,9216],f16>, !torch.int, !torch.bool -> !torch.vtensor<[10,9216,9216],f32>
  %3901 = torch.aten._to_copy %3900, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[10,9216,9216],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[10,9216,9216],f16>
  %3902 = torch.aten.unsqueeze %3901, %int3 : !torch.vtensor<[10,9216,9216],f16>, !torch.int -> !torch.vtensor<[10,9216,9216,1],f16>
  %3903 = torch.aten.permute %3902, %203 : !torch.vtensor<[10,9216,9216,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,9216],f16>
  %3904 = torch.aten.unsqueeze %3886, %int3 : !torch.vtensor<[10,9216,64],f16>, !torch.int -> !torch.vtensor<[10,9216,64,1],f16>
  %3905 = torch.aten.permute %3904, %211 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,1,64,9216],f16>
  %3906 = torch.aten.permute %3903, %203 : !torch.vtensor<[10,9216,1,9216],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,9216,1],f16>
  %3907 = torch.aten._reshape_alias %3906, %220, %230 : !torch.vtensor<[10,9216,9216,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,9216,9216],f16>
  %3908 = torch.aten.permute %3905, %211 : !torch.vtensor<[10,1,64,9216],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64,1],f16>
  %3909 = torch.aten._reshape_alias %3908, %192, %209 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
  %3910 = torch.aten.bmm %3907, %3909 : !torch.vtensor<[10,9216,9216],f16>, !torch.vtensor<[10,9216,64],f16> -> !torch.vtensor<[10,9216,64],f16>
  %3911 = torch.aten.view %3910, %235 : !torch.vtensor<[10,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,64],f16>
  %3912 = torch.aten.permute %3911, %203 : !torch.vtensor<[10,9216,1,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64,1],f16>
  %3913 = torch.aten.view %3912, %192 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
  %3914 = torch.aten._reshape_alias %3913, %239, %240 : !torch.vtensor<[10,9216,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
  %3915 = torch.aten.permute %3914, %189 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
  %3916 = torch.aten.clone %3915, %int0 : !torch.vtensor<[2,9216,5,64],f16>, !torch.int -> !torch.vtensor<[2,9216,5,64],f16>
  %3917 = torch.aten._unsafe_view %3916, %153 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
  %3918 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
  %3919 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
  %3920 = torch.aten.t %3919 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %3921 = torch.aten.view %3917, %161 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
  %3922 = torch.aten.addmm %3918, %3921, %3920, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,320],f16>
  %3923 = torch.aten.view %3922, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
  %3924 = torch.aten.add.Tensor %3923, %3855, %int1 : !torch.vtensor<[2,9216,320],f16>, !torch.vtensor<[2,9216,320],f16>, !torch.int -> !torch.vtensor<[2,9216,320],f16>
  %3925 = torch.aten._to_copy %3924, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f32>
  %result0_225, %result1_226, %result2_227 = torch.aten.native_layer_norm %3925, %166, %48, %48, %float1.000000e-05 : !torch.vtensor<[2,9216,320],f32>, !torch.list<int>, !torch.vtensor<[320],f32>, !torch.vtensor<[320],f32>, !torch.float -> !torch.vtensor<[2,9216,320],f32>, !torch.vtensor<[2,9216,1],f32>, !torch.vtensor<[2,9216,1],f32>
  %3926 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
  %3927 = torch.aten._to_copy %result0_225, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
  %3928 = torch.aten.t %3926 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %3929 = torch.aten._reshape_alias %3927, %161, %170 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
  %3930 = torch.aten.mm %3929, %3928 : !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[18432,320],f16>
  %3931 = torch.aten._unsafe_view %3930, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
  %3932 = torch.aten._to_copy %43, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,1024],f16>
  %3933 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
  %3934 = torch.aten.t %3932 : !torch.vtensor<[320,1024],f16> -> !torch.vtensor<[1024,320],f16>
  %3935 = torch.aten._reshape_alias %3933, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
  %3936 = torch.aten.mm %3935, %3934 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,320],f16> -> !torch.vtensor<[154,320],f16>
  %3937 = torch.aten._unsafe_view %3936, %266 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
  %3938 = torch.aten._to_copy %43, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,1024],f16>
  %3939 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
  %3940 = torch.aten.t %3938 : !torch.vtensor<[320,1024],f16> -> !torch.vtensor<[1024,320],f16>
  %3941 = torch.aten._reshape_alias %3939, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
  %3942 = torch.aten.mm %3941, %3940 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,320],f16> -> !torch.vtensor<[154,320],f16>
  %3943 = torch.aten._unsafe_view %3942, %266 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
  %3944 = torch.aten._reshape_alias %3931, %186, %187 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
  %3945 = torch.aten.permute %3944, %189 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
  %3946 = torch.aten.clone %3945, %int0 : !torch.vtensor<[2,5,9216,64],f16>, !torch.int -> !torch.vtensor<[2,5,9216,64],f16>
  %3947 = torch.aten._unsafe_view %3946, %192 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
  %3948 = torch.aten._reshape_alias %3937, %278, %279 : !torch.vtensor<[2,77,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,5,64],f16>
  %3949 = torch.aten.permute %3948, %189 : !torch.vtensor<[2,77,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,77,64],f16>
  %3950 = torch.aten.clone %3949, %int0 : !torch.vtensor<[2,5,77,64],f16>, !torch.int -> !torch.vtensor<[2,5,77,64],f16>
  %3951 = torch.aten._unsafe_view %3950, %283 : !torch.vtensor<[2,5,77,64],f16>, !torch.list<int> -> !torch.vtensor<[10,77,64],f16>
  %3952 = torch.aten._reshape_alias %3943, %278, %279 : !torch.vtensor<[2,77,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,5,64],f16>
  %3953 = torch.aten.permute %3952, %189 : !torch.vtensor<[2,77,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,77,64],f16>
  %3954 = torch.aten.clone %3953, %int0 : !torch.vtensor<[2,5,77,64],f16>, !torch.int -> !torch.vtensor<[2,5,77,64],f16>
  %3955 = torch.aten._unsafe_view %3954, %283 : !torch.vtensor<[2,5,77,64],f16>, !torch.list<int> -> !torch.vtensor<[10,77,64],f16>
  %3956 = torch.aten.unsqueeze %3947, %int3 : !torch.vtensor<[10,9216,64],f16>, !torch.int -> !torch.vtensor<[10,9216,64,1],f16>
  %3957 = torch.aten.permute %3956, %203 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,64],f16>
  %3958 = torch.aten.unsqueeze %3951, %int3 : !torch.vtensor<[10,77,64],f16>, !torch.int -> !torch.vtensor<[10,77,64,1],f16>
  %3959 = torch.aten.permute %3958, %206 : !torch.vtensor<[10,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,1,77,64],f16>
  %3960 = torch.aten.permute %3957, %203 : !torch.vtensor<[10,9216,1,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64,1],f16>
  %3961 = torch.aten._reshape_alias %3960, %192, %209 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
  %3962 = torch.aten.permute %3959, %211 : !torch.vtensor<[10,1,77,64],f16>, !torch.list<int> -> !torch.vtensor<[10,64,77,1],f16>
  %3963 = torch.aten._reshape_alias %3962, %296, %297 : !torch.vtensor<[10,64,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,64,77],f16>
  %3964 = torch.aten.bmm %3961, %3963 : !torch.vtensor<[10,9216,64],f16>, !torch.vtensor<[10,64,77],f16> -> !torch.vtensor<[10,9216,77],f16>
  %3965 = torch.aten.view %3964, %300 : !torch.vtensor<[10,9216,77],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,77],f16>
  %3966 = torch.aten.permute %3965, %203 : !torch.vtensor<[10,9216,1,77],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,77,1],f16>
  %3967 = torch.aten.view %3966, %303 : !torch.vtensor<[10,9216,77,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,77],f16>
  %3968 = torch.aten.mul.Tensor %3967, %0 : !torch.vtensor<[10,9216,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[10,9216,77],f16>
  %3969 = torch.aten._softmax %3968, %int-1, %true : !torch.vtensor<[10,9216,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[10,9216,77],f32>
  %3970 = torch.aten._to_copy %3969, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[10,9216,77],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[10,9216,77],f16>
  %3971 = torch.aten.unsqueeze %3970, %int3 : !torch.vtensor<[10,9216,77],f16>, !torch.int -> !torch.vtensor<[10,9216,77,1],f16>
  %3972 = torch.aten.permute %3971, %203 : !torch.vtensor<[10,9216,77,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,77],f16>
  %3973 = torch.aten.unsqueeze %3955, %int3 : !torch.vtensor<[10,77,64],f16>, !torch.int -> !torch.vtensor<[10,77,64,1],f16>
  %3974 = torch.aten.permute %3973, %211 : !torch.vtensor<[10,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,1,64,77],f16>
  %3975 = torch.aten.permute %3972, %203 : !torch.vtensor<[10,9216,1,77],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,77,1],f16>
  %3976 = torch.aten._reshape_alias %3975, %303, %313 : !torch.vtensor<[10,9216,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,9216,77],f16>
  %3977 = torch.aten.permute %3974, %211 : !torch.vtensor<[10,1,64,77],f16>, !torch.list<int> -> !torch.vtensor<[10,77,64,1],f16>
  %3978 = torch.aten._reshape_alias %3977, %283, %316 : !torch.vtensor<[10,77,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,77,64],f16>
  %3979 = torch.aten.bmm %3976, %3978 : !torch.vtensor<[10,9216,77],f16>, !torch.vtensor<[10,77,64],f16> -> !torch.vtensor<[10,9216,64],f16>
  %3980 = torch.aten.view %3979, %235 : !torch.vtensor<[10,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,64],f16>
  %3981 = torch.aten.permute %3980, %203 : !torch.vtensor<[10,9216,1,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64,1],f16>
  %3982 = torch.aten.view %3981, %192 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
  %3983 = torch.aten._reshape_alias %3982, %239, %240 : !torch.vtensor<[10,9216,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
  %3984 = torch.aten.permute %3983, %189 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
  %3985 = torch.aten.clone %3984, %int0 : !torch.vtensor<[2,9216,5,64],f16>, !torch.int -> !torch.vtensor<[2,9216,5,64],f16>
  %3986 = torch.aten._unsafe_view %3985, %153 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
  %3987 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
  %3988 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
  %3989 = torch.aten.t %3988 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %3990 = torch.aten.view %3986, %161 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
  %3991 = torch.aten.addmm %3987, %3990, %3989, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,320],f16>
  %3992 = torch.aten.view %3991, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
  %3993 = torch.aten.add.Tensor %3992, %3924, %int1 : !torch.vtensor<[2,9216,320],f16>, !torch.vtensor<[2,9216,320],f16>, !torch.int -> !torch.vtensor<[2,9216,320],f16>
  %3994 = torch.aten._to_copy %3993, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f32>
  %result0_228, %result1_229, %result2_230 = torch.aten.native_layer_norm %3994, %166, %48, %48, %float1.000000e-05 : !torch.vtensor<[2,9216,320],f32>, !torch.list<int>, !torch.vtensor<[320],f32>, !torch.vtensor<[320],f32>, !torch.float -> !torch.vtensor<[2,9216,320],f32>, !torch.vtensor<[2,9216,1],f32>, !torch.vtensor<[2,9216,1],f32>
  %3995 = torch.aten._to_copy %44, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2560],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2560],f16>
  %3996 = torch.aten._to_copy %45, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2560,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2560,320],f16>
  %3997 = torch.aten._to_copy %result0_228, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
  %3998 = torch.aten.t %3996 : !torch.vtensor<[2560,320],f16> -> !torch.vtensor<[320,2560],f16>
  %3999 = torch.aten.view %3997, %161 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
  %4000 = torch.aten.addmm %3995, %3999, %3998, %int1, %int1 : !torch.vtensor<[2560],f16>, !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,2560],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,2560],f16>
  %4001 = torch.aten.view %4000, %340 : !torch.vtensor<[18432,2560],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,2560],f16>
  %4002 = torch.aten.slice.Tensor %4001, %int-1, %int0, %int1280, %int1 : !torch.vtensor<[2,9216,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,9216,1280],f16>
  %4003 = torch.aten.slice.Tensor %4001, %int-1, %int1280, %int2560, %int1 : !torch.vtensor<[2,9216,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,9216,1280],f16>
  %4004 = torch.aten.gelu %4003, %str : !torch.vtensor<[2,9216,1280],f16>, !torch.str -> !torch.vtensor<[2,9216,1280],f16>
  %4005 = torch.aten.mul.Tensor %4002, %4004 : !torch.vtensor<[2,9216,1280],f16>, !torch.vtensor<[2,9216,1280],f16> -> !torch.vtensor<[2,9216,1280],f16>
  %4006 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
  %4007 = torch.aten._to_copy %46, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,1280],f16>
  %4008 = torch.aten.t %4007 : !torch.vtensor<[320,1280],f16> -> !torch.vtensor<[1280,320],f16>
  %4009 = torch.aten.view %4005, %349 : !torch.vtensor<[2,9216,1280],f16>, !torch.list<int> -> !torch.vtensor<[18432,1280],f16>
  %4010 = torch.aten.addmm %4006, %4009, %4008, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[18432,1280],f16>, !torch.vtensor<[1280,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,320],f16>
  %4011 = torch.aten.view %4010, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
  %4012 = torch.aten.add.Tensor %4011, %3993, %int1 : !torch.vtensor<[2,9216,320],f16>, !torch.vtensor<[2,9216,320],f16>, !torch.int -> !torch.vtensor<[2,9216,320],f16>
  %4013 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
  %4014 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
  %4015 = torch.aten.t %4014 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %4016 = torch.aten.view %4012, %161 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
  %4017 = torch.aten.addmm %4013, %4016, %4015, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,320],f16>
  %4018 = torch.aten.view %4017, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
  %4019 = torch.aten._reshape_alias %4018, %360, %361 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,96,96,320],f16>
  %4020 = torch.aten.permute %4019, %206 : !torch.vtensor<[2,96,96,320],f16>, !torch.list<int> -> !torch.vtensor<[2,320,96,96],f16>
  %4021 = torch.aten._reshape_alias %4020, %90, %364 : !torch.vtensor<[2,320,96,96],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,320,96,96],f16>
  %4022 = torch.aten.clone %4021, %int0 : !torch.vtensor<[2,320,96,96],f16>, !torch.int -> !torch.vtensor<[2,320,96,96],f16>
  %4023 = torch.aten.add.Tensor %4022, %3829, %int1 : !torch.vtensor<[2,320,96,96],f16>, !torch.vtensor<[2,320,96,96],f16>, !torch.int -> !torch.vtensor<[2,320,96,96],f16>
  %4024 = torch.prim.ListConstruct %4023, %367 : (!torch.vtensor<[2,320,96,96],f16>, !torch.vtensor<[2,320,96,96],f16>) -> !torch.list<vtensor>
  %4025 = torch.aten.cat %4024, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,640,96,96],f16>
  %4026 = torch.aten._to_copy %4025, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,640,96,96],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,96,96],f32>
  %4027 = torch.prim.ListConstruct %int2, %int32, %int20, %int9216 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %4028 = torch.prim.ListConstruct %int5898240, %int184320, %int9216, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %4029 = torch.aten._reshape_alias %4026, %4027, %4028 : !torch.vtensor<[2,640,96,96],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,20,9216],f32>
  %result0_231, %result1_232 = torch.aten.var_mean.correction %4029, %85, %int0, %true : !torch.vtensor<[2,32,20,9216],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %4030 = torch.aten.add.Tensor %result0_231, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %4031 = torch.aten.rsqrt %4030 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %4032 = torch.aten.sub.Tensor %4029, %result1_232, %int1 : !torch.vtensor<[2,32,20,9216],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,9216],f32>
  %4033 = torch.aten.mul.Tensor %4032, %4031 : !torch.vtensor<[2,32,20,9216],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,9216],f32>
  %4034 = torch.prim.ListConstruct %int2, %int640, %int96, %int96 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %4035 = torch.aten.view %4033, %4034 : !torch.vtensor<[2,32,20,9216],f32>, !torch.list<int> -> !torch.vtensor<[2,640,96,96],f32>
  %4036 = torch.aten.unsqueeze %39, %int0 : !torch.vtensor<[640],f32>, !torch.int -> !torch.vtensor<[1,640],f32>
  %4037 = torch.aten.unsqueeze %4036, %int2 : !torch.vtensor<[1,640],f32>, !torch.int -> !torch.vtensor<[1,640,1],f32>
  %4038 = torch.aten.unsqueeze %4037, %int3 : !torch.vtensor<[1,640,1],f32>, !torch.int -> !torch.vtensor<[1,640,1,1],f32>
  %4039 = torch.aten.unsqueeze %39, %int0 : !torch.vtensor<[640],f32>, !torch.int -> !torch.vtensor<[1,640],f32>
  %4040 = torch.aten.unsqueeze %4039, %int2 : !torch.vtensor<[1,640],f32>, !torch.int -> !torch.vtensor<[1,640,1],f32>
  %4041 = torch.aten.unsqueeze %4040, %int3 : !torch.vtensor<[1,640,1],f32>, !torch.int -> !torch.vtensor<[1,640,1,1],f32>
  %4042 = torch.aten.mul.Tensor %4035, %4041 : !torch.vtensor<[2,640,96,96],f32>, !torch.vtensor<[1,640,1,1],f32> -> !torch.vtensor<[2,640,96,96],f32>
  %4043 = torch.aten.add.Tensor %4042, %4038, %int1 : !torch.vtensor<[2,640,96,96],f32>, !torch.vtensor<[1,640,1,1],f32>, !torch.int -> !torch.vtensor<[2,640,96,96],f32>
  %4044 = torch.aten._to_copy %4043, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,640,96,96],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,96,96],f16>
  %4045 = torch.aten.silu %4044 : !torch.vtensor<[2,640,96,96],f16> -> !torch.vtensor<[2,640,96,96],f16>
  %4046 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
  %4047 = torch.aten._to_copy %40, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,640,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,640,3,3],f16>
  %4048 = torch.aten._convolution %4045, %4047, %4046, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,96,96],f16>, !torch.vtensor<[320,640,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,96,96],f16>
  %4049 = torch.aten.silu %74 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
  %4050 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
  %4051 = torch.aten._to_copy %46, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,1280],f16>
  %4052 = torch.aten.t %4051 : !torch.vtensor<[320,1280],f16> -> !torch.vtensor<[1280,320],f16>
  %4053 = torch.aten.addmm %4050, %4049, %4052, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
  %4054 = torch.aten.unsqueeze %4053, %int2 : !torch.vtensor<[2,320],f16>, !torch.int -> !torch.vtensor<[2,320,1],f16>
  %4055 = torch.aten.unsqueeze %4054, %int3 : !torch.vtensor<[2,320,1],f16>, !torch.int -> !torch.vtensor<[2,320,1,1],f16>
  %4056 = torch.aten.add.Tensor %4048, %4055, %int1 : !torch.vtensor<[2,320,96,96],f16>, !torch.vtensor<[2,320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,96,96],f16>
  %4057 = torch.aten._to_copy %4056, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,320,96,96],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,96,96],f32>
  %4058 = torch.aten._reshape_alias %4057, %82, %83 : !torch.vtensor<[2,320,96,96],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,10,9216],f32>
  %result0_233, %result1_234 = torch.aten.var_mean.correction %4058, %85, %int0, %true : !torch.vtensor<[2,32,10,9216],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %4059 = torch.aten.add.Tensor %result0_233, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %4060 = torch.aten.rsqrt %4059 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %4061 = torch.aten.sub.Tensor %4058, %result1_234, %int1 : !torch.vtensor<[2,32,10,9216],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,9216],f32>
  %4062 = torch.aten.mul.Tensor %4061, %4060 : !torch.vtensor<[2,32,10,9216],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,9216],f32>
  %4063 = torch.aten.view %4062, %90 : !torch.vtensor<[2,32,10,9216],f32>, !torch.list<int> -> !torch.vtensor<[2,320,96,96],f32>
  %4064 = torch.aten.unsqueeze %48, %int0 : !torch.vtensor<[320],f32>, !torch.int -> !torch.vtensor<[1,320],f32>
  %4065 = torch.aten.unsqueeze %4064, %int2 : !torch.vtensor<[1,320],f32>, !torch.int -> !torch.vtensor<[1,320,1],f32>
  %4066 = torch.aten.unsqueeze %4065, %int3 : !torch.vtensor<[1,320,1],f32>, !torch.int -> !torch.vtensor<[1,320,1,1],f32>
  %4067 = torch.aten.unsqueeze %48, %int0 : !torch.vtensor<[320],f32>, !torch.int -> !torch.vtensor<[1,320],f32>
  %4068 = torch.aten.unsqueeze %4067, %int2 : !torch.vtensor<[1,320],f32>, !torch.int -> !torch.vtensor<[1,320,1],f32>
  %4069 = torch.aten.unsqueeze %4068, %int3 : !torch.vtensor<[1,320,1],f32>, !torch.int -> !torch.vtensor<[1,320,1,1],f32>
  %4070 = torch.aten.mul.Tensor %4063, %4069 : !torch.vtensor<[2,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f32> -> !torch.vtensor<[2,320,96,96],f32>
  %4071 = torch.aten.add.Tensor %4070, %4066, %int1 : !torch.vtensor<[2,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f32>, !torch.int -> !torch.vtensor<[2,320,96,96],f32>
  %4072 = torch.aten._to_copy %4071, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,320,96,96],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,96,96],f16>
  %4073 = torch.aten.silu %4072 : !torch.vtensor<[2,320,96,96],f16> -> !torch.vtensor<[2,320,96,96],f16>
  %4074 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
  %4075 = torch.aten._to_copy %41, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320,3,3],f16>
  %4076 = torch.aten._convolution %4073, %4075, %4074, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,96,96],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,96,96],f16>
  %4077 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
  %4078 = torch.aten._to_copy %42, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,640,1,1],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,640,1,1],f16>
  %4079 = torch.aten._convolution %4025, %4078, %4077, %78, %79, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,96,96],f16>, !torch.vtensor<[320,640,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,96,96],f16>
  %4080 = torch.aten.add.Tensor %4079, %4076, %int1 : !torch.vtensor<[2,320,96,96],f16>, !torch.vtensor<[2,320,96,96],f16>, !torch.int -> !torch.vtensor<[2,320,96,96],f16>
  %4081 = torch.aten._to_copy %4080, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,320,96,96],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,96,96],f32>
  %4082 = torch.aten._reshape_alias %4081, %82, %83 : !torch.vtensor<[2,320,96,96],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,10,9216],f32>
  %result0_235, %result1_236 = torch.aten.var_mean.correction %4082, %85, %int0, %true : !torch.vtensor<[2,32,10,9216],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %4083 = torch.aten.add.Tensor %result0_235, %1, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %4084 = torch.aten.rsqrt %4083 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %4085 = torch.aten.sub.Tensor %4082, %result1_236, %int1 : !torch.vtensor<[2,32,10,9216],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,9216],f32>
  %4086 = torch.aten.mul.Tensor %4085, %4084 : !torch.vtensor<[2,32,10,9216],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,9216],f32>
  %4087 = torch.aten.view %4086, %90 : !torch.vtensor<[2,32,10,9216],f32>, !torch.list<int> -> !torch.vtensor<[2,320,96,96],f32>
  %4088 = torch.aten.unsqueeze %48, %int0 : !torch.vtensor<[320],f32>, !torch.int -> !torch.vtensor<[1,320],f32>
  %4089 = torch.aten.unsqueeze %4088, %int2 : !torch.vtensor<[1,320],f32>, !torch.int -> !torch.vtensor<[1,320,1],f32>
  %4090 = torch.aten.unsqueeze %4089, %int3 : !torch.vtensor<[1,320,1],f32>, !torch.int -> !torch.vtensor<[1,320,1,1],f32>
  %4091 = torch.aten.unsqueeze %48, %int0 : !torch.vtensor<[320],f32>, !torch.int -> !torch.vtensor<[1,320],f32>
  %4092 = torch.aten.unsqueeze %4091, %int2 : !torch.vtensor<[1,320],f32>, !torch.int -> !torch.vtensor<[1,320,1],f32>
  %4093 = torch.aten.unsqueeze %4092, %int3 : !torch.vtensor<[1,320,1],f32>, !torch.int -> !torch.vtensor<[1,320,1,1],f32>
  %4094 = torch.aten.mul.Tensor %4087, %4093 : !torch.vtensor<[2,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f32> -> !torch.vtensor<[2,320,96,96],f32>
  %4095 = torch.aten.add.Tensor %4094, %4090, %int1 : !torch.vtensor<[2,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f32>, !torch.int -> !torch.vtensor<[2,320,96,96],f32>
  %4096 = torch.aten._reshape_alias %4095, %90, %149 : !torch.vtensor<[2,320,96,96],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,320,96,96],f32>
  %4097 = torch.aten.permute %4096, %151 : !torch.vtensor<[2,320,96,96],f32>, !torch.list<int> -> !torch.vtensor<[2,96,96,320],f32>
  %4098 = torch.aten._reshape_alias %4097, %153, %154 : !torch.vtensor<[2,96,96,320],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f32>
  %4099 = torch.aten.clone %4098, %int0 : !torch.vtensor<[2,9216,320],f32>, !torch.int -> !torch.vtensor<[2,9216,320],f32>
  %4100 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
  %4101 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
  %4102 = torch.aten._to_copy %4099, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
  %4103 = torch.aten.t %4101 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %4104 = torch.aten.view %4102, %161 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
  %4105 = torch.aten.addmm %4100, %4104, %4103, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,320],f16>
  %4106 = torch.aten.view %4105, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
  %4107 = torch.aten._to_copy %4106, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f32>
  %result0_237, %result1_238, %result2_239 = torch.aten.native_layer_norm %4107, %166, %48, %48, %float1.000000e-05 : !torch.vtensor<[2,9216,320],f32>, !torch.list<int>, !torch.vtensor<[320],f32>, !torch.vtensor<[320],f32>, !torch.float -> !torch.vtensor<[2,9216,320],f32>, !torch.vtensor<[2,9216,1],f32>, !torch.vtensor<[2,9216,1],f32>
  %4108 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
  %4109 = torch.aten._to_copy %result0_237, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
  %4110 = torch.aten.t %4108 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %4111 = torch.aten._reshape_alias %4109, %161, %170 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
  %4112 = torch.aten.mm %4111, %4110 : !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[18432,320],f16>
  %4113 = torch.aten._unsafe_view %4112, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
  %4114 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
  %4115 = torch.aten._to_copy %result0_237, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
  %4116 = torch.aten.t %4114 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %4117 = torch.aten._reshape_alias %4115, %161, %170 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
  %4118 = torch.aten.mm %4117, %4116 : !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[18432,320],f16>
  %4119 = torch.aten._unsafe_view %4118, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
  %4120 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
  %4121 = torch.aten._to_copy %result0_237, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
  %4122 = torch.aten.t %4120 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %4123 = torch.aten._reshape_alias %4121, %161, %170 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
  %4124 = torch.aten.mm %4123, %4122 : !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[18432,320],f16>
  %4125 = torch.aten._unsafe_view %4124, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
  %4126 = torch.aten._reshape_alias %4113, %186, %187 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
  %4127 = torch.aten.permute %4126, %189 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
  %4128 = torch.aten.clone %4127, %int0 : !torch.vtensor<[2,5,9216,64],f16>, !torch.int -> !torch.vtensor<[2,5,9216,64],f16>
  %4129 = torch.aten._unsafe_view %4128, %192 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
  %4130 = torch.aten._reshape_alias %4119, %186, %187 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
  %4131 = torch.aten.permute %4130, %189 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
  %4132 = torch.aten.clone %4131, %int0 : !torch.vtensor<[2,5,9216,64],f16>, !torch.int -> !torch.vtensor<[2,5,9216,64],f16>
  %4133 = torch.aten._unsafe_view %4132, %192 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
  %4134 = torch.aten._reshape_alias %4125, %186, %187 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
  %4135 = torch.aten.permute %4134, %189 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
  %4136 = torch.aten.clone %4135, %int0 : !torch.vtensor<[2,5,9216,64],f16>, !torch.int -> !torch.vtensor<[2,5,9216,64],f16>
  %4137 = torch.aten._unsafe_view %4136, %192 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
  %4138 = torch.aten.unsqueeze %4129, %int3 : !torch.vtensor<[10,9216,64],f16>, !torch.int -> !torch.vtensor<[10,9216,64,1],f16>
  %4139 = torch.aten.permute %4138, %203 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,64],f16>
  %4140 = torch.aten.unsqueeze %4133, %int3 : !torch.vtensor<[10,9216,64],f16>, !torch.int -> !torch.vtensor<[10,9216,64,1],f16>
  %4141 = torch.aten.permute %4140, %206 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,1,9216,64],f16>
  %4142 = torch.aten.permute %4139, %203 : !torch.vtensor<[10,9216,1,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64,1],f16>
  %4143 = torch.aten._reshape_alias %4142, %192, %209 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
  %4144 = torch.aten.permute %4141, %211 : !torch.vtensor<[10,1,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,64,9216,1],f16>
  %4145 = torch.aten._reshape_alias %4144, %213, %214 : !torch.vtensor<[10,64,9216,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,64,9216],f16>
  %4146 = torch.aten.bmm %4143, %4145 : !torch.vtensor<[10,9216,64],f16>, !torch.vtensor<[10,64,9216],f16> -> !torch.vtensor<[10,9216,9216],f16>
  %4147 = torch.aten.view %4146, %217 : !torch.vtensor<[10,9216,9216],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,9216],f16>
  %4148 = torch.aten.permute %4147, %203 : !torch.vtensor<[10,9216,1,9216],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,9216,1],f16>
  %4149 = torch.aten.view %4148, %220 : !torch.vtensor<[10,9216,9216,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,9216],f16>
  %4150 = torch.aten.mul.Tensor %4149, %0 : !torch.vtensor<[10,9216,9216],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[10,9216,9216],f16>
  %4151 = torch.aten._softmax %4150, %int-1, %true : !torch.vtensor<[10,9216,9216],f16>, !torch.int, !torch.bool -> !torch.vtensor<[10,9216,9216],f32>
  %4152 = torch.aten._to_copy %4151, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[10,9216,9216],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[10,9216,9216],f16>
  %4153 = torch.aten.unsqueeze %4152, %int3 : !torch.vtensor<[10,9216,9216],f16>, !torch.int -> !torch.vtensor<[10,9216,9216,1],f16>
  %4154 = torch.aten.permute %4153, %203 : !torch.vtensor<[10,9216,9216,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,9216],f16>
  %4155 = torch.aten.unsqueeze %4137, %int3 : !torch.vtensor<[10,9216,64],f16>, !torch.int -> !torch.vtensor<[10,9216,64,1],f16>
  %4156 = torch.aten.permute %4155, %211 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,1,64,9216],f16>
  %4157 = torch.aten.permute %4154, %203 : !torch.vtensor<[10,9216,1,9216],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,9216,1],f16>
  %4158 = torch.aten._reshape_alias %4157, %220, %230 : !torch.vtensor<[10,9216,9216,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,9216,9216],f16>
  %4159 = torch.aten.permute %4156, %211 : !torch.vtensor<[10,1,64,9216],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64,1],f16>
  %4160 = torch.aten._reshape_alias %4159, %192, %209 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
  %4161 = torch.aten.bmm %4158, %4160 : !torch.vtensor<[10,9216,9216],f16>, !torch.vtensor<[10,9216,64],f16> -> !torch.vtensor<[10,9216,64],f16>
  %4162 = torch.aten.view %4161, %235 : !torch.vtensor<[10,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,64],f16>
  %4163 = torch.aten.permute %4162, %203 : !torch.vtensor<[10,9216,1,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64,1],f16>
  %4164 = torch.aten.view %4163, %192 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
  %4165 = torch.aten._reshape_alias %4164, %239, %240 : !torch.vtensor<[10,9216,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
  %4166 = torch.aten.permute %4165, %189 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
  %4167 = torch.aten.clone %4166, %int0 : !torch.vtensor<[2,9216,5,64],f16>, !torch.int -> !torch.vtensor<[2,9216,5,64],f16>
  %4168 = torch.aten._unsafe_view %4167, %153 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
  %4169 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
  %4170 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
  %4171 = torch.aten.t %4170 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %4172 = torch.aten.view %4168, %161 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
  %4173 = torch.aten.addmm %4169, %4172, %4171, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,320],f16>
  %4174 = torch.aten.view %4173, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
  %4175 = torch.aten.add.Tensor %4174, %4106, %int1 : !torch.vtensor<[2,9216,320],f16>, !torch.vtensor<[2,9216,320],f16>, !torch.int -> !torch.vtensor<[2,9216,320],f16>
  %4176 = torch.aten._to_copy %4175, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f32>
  %result0_240, %result1_241, %result2_242 = torch.aten.native_layer_norm %4176, %166, %48, %48, %float1.000000e-05 : !torch.vtensor<[2,9216,320],f32>, !torch.list<int>, !torch.vtensor<[320],f32>, !torch.vtensor<[320],f32>, !torch.float -> !torch.vtensor<[2,9216,320],f32>, !torch.vtensor<[2,9216,1],f32>, !torch.vtensor<[2,9216,1],f32>
  %4177 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
  %4178 = torch.aten._to_copy %result0_240, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
  %4179 = torch.aten.t %4177 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %4180 = torch.aten._reshape_alias %4178, %161, %170 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
  %4181 = torch.aten.mm %4180, %4179 : !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[18432,320],f16>
  %4182 = torch.aten._unsafe_view %4181, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
  %4183 = torch.aten._to_copy %43, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,1024],f16>
  %4184 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
  %4185 = torch.aten.t %4183 : !torch.vtensor<[320,1024],f16> -> !torch.vtensor<[1024,320],f16>
  %4186 = torch.aten._reshape_alias %4184, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
  %4187 = torch.aten.mm %4186, %4185 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,320],f16> -> !torch.vtensor<[154,320],f16>
  %4188 = torch.aten._unsafe_view %4187, %266 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
  %4189 = torch.aten._to_copy %43, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,1024],f16>
  %4190 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
  %4191 = torch.aten.t %4189 : !torch.vtensor<[320,1024],f16> -> !torch.vtensor<[1024,320],f16>
  %4192 = torch.aten._reshape_alias %4190, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
  %4193 = torch.aten.mm %4192, %4191 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,320],f16> -> !torch.vtensor<[154,320],f16>
  %4194 = torch.aten._unsafe_view %4193, %266 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
  %4195 = torch.aten._reshape_alias %4182, %186, %187 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
  %4196 = torch.aten.permute %4195, %189 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
  %4197 = torch.aten.clone %4196, %int0 : !torch.vtensor<[2,5,9216,64],f16>, !torch.int -> !torch.vtensor<[2,5,9216,64],f16>
  %4198 = torch.aten._unsafe_view %4197, %192 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
  %4199 = torch.aten._reshape_alias %4188, %278, %279 : !torch.vtensor<[2,77,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,5,64],f16>
  %4200 = torch.aten.permute %4199, %189 : !torch.vtensor<[2,77,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,77,64],f16>
  %4201 = torch.aten.clone %4200, %int0 : !torch.vtensor<[2,5,77,64],f16>, !torch.int -> !torch.vtensor<[2,5,77,64],f16>
  %4202 = torch.aten._unsafe_view %4201, %283 : !torch.vtensor<[2,5,77,64],f16>, !torch.list<int> -> !torch.vtensor<[10,77,64],f16>
  %4203 = torch.aten._reshape_alias %4194, %278, %279 : !torch.vtensor<[2,77,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,5,64],f16>
  %4204 = torch.aten.permute %4203, %189 : !torch.vtensor<[2,77,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,77,64],f16>
  %4205 = torch.aten.clone %4204, %int0 : !torch.vtensor<[2,5,77,64],f16>, !torch.int -> !torch.vtensor<[2,5,77,64],f16>
  %4206 = torch.aten._unsafe_view %4205, %283 : !torch.vtensor<[2,5,77,64],f16>, !torch.list<int> -> !torch.vtensor<[10,77,64],f16>
  %4207 = torch.aten.unsqueeze %4198, %int3 : !torch.vtensor<[10,9216,64],f16>, !torch.int -> !torch.vtensor<[10,9216,64,1],f16>
  %4208 = torch.aten.permute %4207, %203 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,64],f16>
  %4209 = torch.aten.unsqueeze %4202, %int3 : !torch.vtensor<[10,77,64],f16>, !torch.int -> !torch.vtensor<[10,77,64,1],f16>
  %4210 = torch.aten.permute %4209, %206 : !torch.vtensor<[10,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,1,77,64],f16>
  %4211 = torch.aten.permute %4208, %203 : !torch.vtensor<[10,9216,1,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64,1],f16>
  %4212 = torch.aten._reshape_alias %4211, %192, %209 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
  %4213 = torch.aten.permute %4210, %211 : !torch.vtensor<[10,1,77,64],f16>, !torch.list<int> -> !torch.vtensor<[10,64,77,1],f16>
  %4214 = torch.aten._reshape_alias %4213, %296, %297 : !torch.vtensor<[10,64,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,64,77],f16>
  %4215 = torch.aten.bmm %4212, %4214 : !torch.vtensor<[10,9216,64],f16>, !torch.vtensor<[10,64,77],f16> -> !torch.vtensor<[10,9216,77],f16>
  %4216 = torch.aten.view %4215, %300 : !torch.vtensor<[10,9216,77],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,77],f16>
  %4217 = torch.aten.permute %4216, %203 : !torch.vtensor<[10,9216,1,77],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,77,1],f16>
  %4218 = torch.aten.view %4217, %303 : !torch.vtensor<[10,9216,77,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,77],f16>
  %4219 = torch.aten.mul.Tensor %4218, %0 : !torch.vtensor<[10,9216,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[10,9216,77],f16>
  %4220 = torch.aten._softmax %4219, %int-1, %true : !torch.vtensor<[10,9216,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[10,9216,77],f32>
  %4221 = torch.aten._to_copy %4220, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[10,9216,77],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[10,9216,77],f16>
  %4222 = torch.aten.unsqueeze %4221, %int3 : !torch.vtensor<[10,9216,77],f16>, !torch.int -> !torch.vtensor<[10,9216,77,1],f16>
  %4223 = torch.aten.permute %4222, %203 : !torch.vtensor<[10,9216,77,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,77],f16>
  %4224 = torch.aten.unsqueeze %4206, %int3 : !torch.vtensor<[10,77,64],f16>, !torch.int -> !torch.vtensor<[10,77,64,1],f16>
  %4225 = torch.aten.permute %4224, %211 : !torch.vtensor<[10,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,1,64,77],f16>
  %4226 = torch.aten.permute %4223, %203 : !torch.vtensor<[10,9216,1,77],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,77,1],f16>
  %4227 = torch.aten._reshape_alias %4226, %303, %313 : !torch.vtensor<[10,9216,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,9216,77],f16>
  %4228 = torch.aten.permute %4225, %211 : !torch.vtensor<[10,1,64,77],f16>, !torch.list<int> -> !torch.vtensor<[10,77,64,1],f16>
  %4229 = torch.aten._reshape_alias %4228, %283, %316 : !torch.vtensor<[10,77,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,77,64],f16>
  %4230 = torch.aten.bmm %4227, %4229 : !torch.vtensor<[10,9216,77],f16>, !torch.vtensor<[10,77,64],f16> -> !torch.vtensor<[10,9216,64],f16>
  %4231 = torch.aten.view %4230, %235 : !torch.vtensor<[10,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,64],f16>
  %4232 = torch.aten.permute %4231, %203 : !torch.vtensor<[10,9216,1,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64,1],f16>
  %4233 = torch.aten.view %4232, %192 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
  %4234 = torch.aten._reshape_alias %4233, %239, %240 : !torch.vtensor<[10,9216,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
  %4235 = torch.aten.permute %4234, %189 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
  %4236 = torch.aten.clone %4235, %int0 : !torch.vtensor<[2,9216,5,64],f16>, !torch.int -> !torch.vtensor<[2,9216,5,64],f16>
  %4237 = torch.aten._unsafe_view %4236, %153 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
  %4238 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
  %4239 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
  %4240 = torch.aten.t %4239 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %4241 = torch.aten.view %4237, %161 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
  %4242 = torch.aten.addmm %4238, %4241, %4240, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,320],f16>
  %4243 = torch.aten.view %4242, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
  %4244 = torch.aten.add.Tensor %4243, %4175, %int1 : !torch.vtensor<[2,9216,320],f16>, !torch.vtensor<[2,9216,320],f16>, !torch.int -> !torch.vtensor<[2,9216,320],f16>
  %4245 = torch.aten._to_copy %4244, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f32>
  %result0_243, %result1_244, %result2_245 = torch.aten.native_layer_norm %4245, %166, %48, %48, %float1.000000e-05 : !torch.vtensor<[2,9216,320],f32>, !torch.list<int>, !torch.vtensor<[320],f32>, !torch.vtensor<[320],f32>, !torch.float -> !torch.vtensor<[2,9216,320],f32>, !torch.vtensor<[2,9216,1],f32>, !torch.vtensor<[2,9216,1],f32>
  %4246 = torch.aten._to_copy %44, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2560],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2560],f16>
  %4247 = torch.aten._to_copy %45, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2560,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2560,320],f16>
  %4248 = torch.aten._to_copy %result0_243, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
  %4249 = torch.aten.t %4247 : !torch.vtensor<[2560,320],f16> -> !torch.vtensor<[320,2560],f16>
  %4250 = torch.aten.view %4248, %161 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
  %4251 = torch.aten.addmm %4246, %4250, %4249, %int1, %int1 : !torch.vtensor<[2560],f16>, !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,2560],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,2560],f16>
  %4252 = torch.aten.view %4251, %340 : !torch.vtensor<[18432,2560],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,2560],f16>
  %4253 = torch.aten.slice.Tensor %4252, %int-1, %int0, %int1280, %int1 : !torch.vtensor<[2,9216,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,9216,1280],f16>
  %4254 = torch.aten.slice.Tensor %4252, %int-1, %int1280, %int2560, %int1 : !torch.vtensor<[2,9216,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,9216,1280],f16>
  %4255 = torch.aten.gelu %4254, %str : !torch.vtensor<[2,9216,1280],f16>, !torch.str -> !torch.vtensor<[2,9216,1280],f16>
  %4256 = torch.aten.mul.Tensor %4253, %4255 : !torch.vtensor<[2,9216,1280],f16>, !torch.vtensor<[2,9216,1280],f16> -> !torch.vtensor<[2,9216,1280],f16>
  %4257 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
  %4258 = torch.aten._to_copy %46, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,1280],f16>
  %4259 = torch.aten.t %4258 : !torch.vtensor<[320,1280],f16> -> !torch.vtensor<[1280,320],f16>
  %4260 = torch.aten.view %4256, %349 : !torch.vtensor<[2,9216,1280],f16>, !torch.list<int> -> !torch.vtensor<[18432,1280],f16>
  %4261 = torch.aten.addmm %4257, %4260, %4259, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[18432,1280],f16>, !torch.vtensor<[1280,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,320],f16>
  %4262 = torch.aten.view %4261, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
  %4263 = torch.aten.add.Tensor %4262, %4244, %int1 : !torch.vtensor<[2,9216,320],f16>, !torch.vtensor<[2,9216,320],f16>, !torch.int -> !torch.vtensor<[2,9216,320],f16>
  %4264 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
  %4265 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
  %4266 = torch.aten.t %4265 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %4267 = torch.aten.view %4263, %161 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
  %4268 = torch.aten.addmm %4264, %4267, %4266, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,320],f16>
  %4269 = torch.aten.view %4268, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
  %4270 = torch.aten._reshape_alias %4269, %360, %361 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,96,96,320],f16>
  %4271 = torch.aten.permute %4270, %206 : !torch.vtensor<[2,96,96,320],f16>, !torch.list<int> -> !torch.vtensor<[2,320,96,96],f16>
  %4272 = torch.aten._reshape_alias %4271, %90, %364 : !torch.vtensor<[2,320,96,96],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,320,96,96],f16>
  %4273 = torch.aten.clone %4272, %int0 : !torch.vtensor<[2,320,96,96],f16>, !torch.int -> !torch.vtensor<[2,320,96,96],f16>
  %4274 = torch.aten.add.Tensor %4273, %4080, %int1 : !torch.vtensor<[2,320,96,96],f16>, !torch.vtensor<[2,320,96,96],f16>, !torch.int -> !torch.vtensor<[2,320,96,96],f16>
  %4275 = torch.prim.ListConstruct %4274, %80 : (!torch.vtensor<[2,320,96,96],f16>, !torch.vtensor<[2,320,96,96],f16>) -> !torch.list<vtensor>
  %4276 = torch.aten.cat %4275, %int1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,640,96,96],f16>
  %4277 = torch.aten._to_copy %4276, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,640,96,96],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,96,96],f32>
  %4278 = torch.aten._reshape_alias %4277, %4027, %4028 : !torch.vtensor<[2,640,96,96],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,20,9216],f32>
  %result0_246, %result1_247 = torch.aten.var_mean.correction %4278, %85, %int0, %true : !torch.vtensor<[2,32,20,9216],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %4279 = torch.aten.add.Tensor %result0_246, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %4280 = torch.aten.rsqrt %4279 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %4281 = torch.aten.sub.Tensor %4278, %result1_247, %int1 : !torch.vtensor<[2,32,20,9216],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,20,9216],f32>
  %4282 = torch.aten.mul.Tensor %4281, %4280 : !torch.vtensor<[2,32,20,9216],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,20,9216],f32>
  %4283 = torch.aten.view %4282, %4034 : !torch.vtensor<[2,32,20,9216],f32>, !torch.list<int> -> !torch.vtensor<[2,640,96,96],f32>
  %4284 = torch.aten.unsqueeze %39, %int0 : !torch.vtensor<[640],f32>, !torch.int -> !torch.vtensor<[1,640],f32>
  %4285 = torch.aten.unsqueeze %4284, %int2 : !torch.vtensor<[1,640],f32>, !torch.int -> !torch.vtensor<[1,640,1],f32>
  %4286 = torch.aten.unsqueeze %4285, %int3 : !torch.vtensor<[1,640,1],f32>, !torch.int -> !torch.vtensor<[1,640,1,1],f32>
  %4287 = torch.aten.unsqueeze %39, %int0 : !torch.vtensor<[640],f32>, !torch.int -> !torch.vtensor<[1,640],f32>
  %4288 = torch.aten.unsqueeze %4287, %int2 : !torch.vtensor<[1,640],f32>, !torch.int -> !torch.vtensor<[1,640,1],f32>
  %4289 = torch.aten.unsqueeze %4288, %int3 : !torch.vtensor<[1,640,1],f32>, !torch.int -> !torch.vtensor<[1,640,1,1],f32>
  %4290 = torch.aten.mul.Tensor %4283, %4289 : !torch.vtensor<[2,640,96,96],f32>, !torch.vtensor<[1,640,1,1],f32> -> !torch.vtensor<[2,640,96,96],f32>
  %4291 = torch.aten.add.Tensor %4290, %4286, %int1 : !torch.vtensor<[2,640,96,96],f32>, !torch.vtensor<[1,640,1,1],f32>, !torch.int -> !torch.vtensor<[2,640,96,96],f32>
  %4292 = torch.aten._to_copy %4291, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,640,96,96],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,640,96,96],f16>
  %4293 = torch.aten.silu %4292 : !torch.vtensor<[2,640,96,96],f16> -> !torch.vtensor<[2,640,96,96],f16>
  %4294 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
  %4295 = torch.aten._to_copy %40, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,640,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,640,3,3],f16>
  %4296 = torch.aten._convolution %4293, %4295, %4294, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,96,96],f16>, !torch.vtensor<[320,640,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,96,96],f16>
  %4297 = torch.aten.silu %74 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16>
  %4298 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
  %4299 = torch.aten._to_copy %46, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,1280],f16>
  %4300 = torch.aten.t %4299 : !torch.vtensor<[320,1280],f16> -> !torch.vtensor<[1280,320],f16>
  %4301 = torch.aten.addmm %4298, %4297, %4300, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[2,320],f16>
  %4302 = torch.aten.unsqueeze %4301, %int2 : !torch.vtensor<[2,320],f16>, !torch.int -> !torch.vtensor<[2,320,1],f16>
  %4303 = torch.aten.unsqueeze %4302, %int3 : !torch.vtensor<[2,320,1],f16>, !torch.int -> !torch.vtensor<[2,320,1,1],f16>
  %4304 = torch.aten.add.Tensor %4296, %4303, %int1 : !torch.vtensor<[2,320,96,96],f16>, !torch.vtensor<[2,320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,96,96],f16>
  %4305 = torch.aten._to_copy %4304, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,320,96,96],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,96,96],f32>
  %4306 = torch.aten._reshape_alias %4305, %82, %83 : !torch.vtensor<[2,320,96,96],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,10,9216],f32>
  %result0_248, %result1_249 = torch.aten.var_mean.correction %4306, %85, %int0, %true : !torch.vtensor<[2,32,10,9216],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %4307 = torch.aten.add.Tensor %result0_248, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %4308 = torch.aten.rsqrt %4307 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %4309 = torch.aten.sub.Tensor %4306, %result1_249, %int1 : !torch.vtensor<[2,32,10,9216],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,9216],f32>
  %4310 = torch.aten.mul.Tensor %4309, %4308 : !torch.vtensor<[2,32,10,9216],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,9216],f32>
  %4311 = torch.aten.view %4310, %90 : !torch.vtensor<[2,32,10,9216],f32>, !torch.list<int> -> !torch.vtensor<[2,320,96,96],f32>
  %4312 = torch.aten.unsqueeze %48, %int0 : !torch.vtensor<[320],f32>, !torch.int -> !torch.vtensor<[1,320],f32>
  %4313 = torch.aten.unsqueeze %4312, %int2 : !torch.vtensor<[1,320],f32>, !torch.int -> !torch.vtensor<[1,320,1],f32>
  %4314 = torch.aten.unsqueeze %4313, %int3 : !torch.vtensor<[1,320,1],f32>, !torch.int -> !torch.vtensor<[1,320,1,1],f32>
  %4315 = torch.aten.unsqueeze %48, %int0 : !torch.vtensor<[320],f32>, !torch.int -> !torch.vtensor<[1,320],f32>
  %4316 = torch.aten.unsqueeze %4315, %int2 : !torch.vtensor<[1,320],f32>, !torch.int -> !torch.vtensor<[1,320,1],f32>
  %4317 = torch.aten.unsqueeze %4316, %int3 : !torch.vtensor<[1,320,1],f32>, !torch.int -> !torch.vtensor<[1,320,1,1],f32>
  %4318 = torch.aten.mul.Tensor %4311, %4317 : !torch.vtensor<[2,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f32> -> !torch.vtensor<[2,320,96,96],f32>
  %4319 = torch.aten.add.Tensor %4318, %4314, %int1 : !torch.vtensor<[2,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f32>, !torch.int -> !torch.vtensor<[2,320,96,96],f32>
  %4320 = torch.aten._to_copy %4319, %int5, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,320,96,96],f32>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,96,96],f16>
  %4321 = torch.aten.silu %4320 : !torch.vtensor<[2,320,96,96],f16> -> !torch.vtensor<[2,320,96,96],f16>
  %4322 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
  %4323 = torch.aten._to_copy %41, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320,3,3],f16>
  %4324 = torch.aten._convolution %4321, %4323, %4322, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,96,96],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,96,96],f16>
  %4325 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
  %4326 = torch.aten._to_copy %42, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,640,1,1],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,640,1,1],f16>
  %4327 = torch.aten._convolution %4276, %4326, %4325, %78, %79, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,640,96,96],f16>, !torch.vtensor<[320,640,1,1],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,320,96,96],f16>
  %4328 = torch.aten.add.Tensor %4327, %4324, %int1 : !torch.vtensor<[2,320,96,96],f16>, !torch.vtensor<[2,320,96,96],f16>, !torch.int -> !torch.vtensor<[2,320,96,96],f16>
  %4329 = torch.aten._to_copy %4328, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,320,96,96],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,96,96],f32>
  %4330 = torch.aten._reshape_alias %4329, %82, %83 : !torch.vtensor<[2,320,96,96],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,10,9216],f32>
  %result0_250, %result1_251 = torch.aten.var_mean.correction %4330, %85, %int0, %true : !torch.vtensor<[2,32,10,9216],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %4331 = torch.aten.add.Tensor %result0_250, %1, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %4332 = torch.aten.rsqrt %4331 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %4333 = torch.aten.sub.Tensor %4330, %result1_251, %int1 : !torch.vtensor<[2,32,10,9216],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,9216],f32>
  %4334 = torch.aten.mul.Tensor %4333, %4332 : !torch.vtensor<[2,32,10,9216],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,9216],f32>
  %4335 = torch.aten.view %4334, %90 : !torch.vtensor<[2,32,10,9216],f32>, !torch.list<int> -> !torch.vtensor<[2,320,96,96],f32>
  %4336 = torch.aten.unsqueeze %48, %int0 : !torch.vtensor<[320],f32>, !torch.int -> !torch.vtensor<[1,320],f32>
  %4337 = torch.aten.unsqueeze %4336, %int2 : !torch.vtensor<[1,320],f32>, !torch.int -> !torch.vtensor<[1,320,1],f32>
  %4338 = torch.aten.unsqueeze %4337, %int3 : !torch.vtensor<[1,320,1],f32>, !torch.int -> !torch.vtensor<[1,320,1,1],f32>
  %4339 = torch.aten.unsqueeze %48, %int0 : !torch.vtensor<[320],f32>, !torch.int -> !torch.vtensor<[1,320],f32>
  %4340 = torch.aten.unsqueeze %4339, %int2 : !torch.vtensor<[1,320],f32>, !torch.int -> !torch.vtensor<[1,320,1],f32>
  %4341 = torch.aten.unsqueeze %4340, %int3 : !torch.vtensor<[1,320,1],f32>, !torch.int -> !torch.vtensor<[1,320,1,1],f32>
  %4342 = torch.aten.mul.Tensor %4335, %4341 : !torch.vtensor<[2,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f32> -> !torch.vtensor<[2,320,96,96],f32>
  %4343 = torch.aten.add.Tensor %4342, %4338, %int1 : !torch.vtensor<[2,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f32>, !torch.int -> !torch.vtensor<[2,320,96,96],f32>
  %4344 = torch.aten._reshape_alias %4343, %90, %149 : !torch.vtensor<[2,320,96,96],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,320,96,96],f32>
  %4345 = torch.aten.permute %4344, %151 : !torch.vtensor<[2,320,96,96],f32>, !torch.list<int> -> !torch.vtensor<[2,96,96,320],f32>
  %4346 = torch.aten._reshape_alias %4345, %153, %154 : !torch.vtensor<[2,96,96,320],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f32>
  %4347 = torch.aten.clone %4346, %int0 : !torch.vtensor<[2,9216,320],f32>, !torch.int -> !torch.vtensor<[2,9216,320],f32>
  %4348 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
  %4349 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
  %4350 = torch.aten._to_copy %4347, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
  %4351 = torch.aten.t %4349 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %4352 = torch.aten.view %4350, %161 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
  %4353 = torch.aten.addmm %4348, %4352, %4351, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,320],f16>
  %4354 = torch.aten.view %4353, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
  %4355 = torch.aten._to_copy %4354, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f32>
  %result0_252, %result1_253, %result2_254 = torch.aten.native_layer_norm %4355, %166, %48, %48, %float1.000000e-05 : !torch.vtensor<[2,9216,320],f32>, !torch.list<int>, !torch.vtensor<[320],f32>, !torch.vtensor<[320],f32>, !torch.float -> !torch.vtensor<[2,9216,320],f32>, !torch.vtensor<[2,9216,1],f32>, !torch.vtensor<[2,9216,1],f32>
  %4356 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
  %4357 = torch.aten._to_copy %result0_252, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
  %4358 = torch.aten.t %4356 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %4359 = torch.aten._reshape_alias %4357, %161, %170 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
  %4360 = torch.aten.mm %4359, %4358 : !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[18432,320],f16>
  %4361 = torch.aten._unsafe_view %4360, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
  %4362 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
  %4363 = torch.aten._to_copy %result0_252, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
  %4364 = torch.aten.t %4362 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %4365 = torch.aten._reshape_alias %4363, %161, %170 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
  %4366 = torch.aten.mm %4365, %4364 : !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[18432,320],f16>
  %4367 = torch.aten._unsafe_view %4366, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
  %4368 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
  %4369 = torch.aten._to_copy %result0_252, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
  %4370 = torch.aten.t %4368 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %4371 = torch.aten._reshape_alias %4369, %161, %170 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
  %4372 = torch.aten.mm %4371, %4370 : !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[18432,320],f16>
  %4373 = torch.aten._unsafe_view %4372, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
  %4374 = torch.aten._reshape_alias %4361, %186, %187 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
  %4375 = torch.aten.permute %4374, %189 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
  %4376 = torch.aten.clone %4375, %int0 : !torch.vtensor<[2,5,9216,64],f16>, !torch.int -> !torch.vtensor<[2,5,9216,64],f16>
  %4377 = torch.aten._unsafe_view %4376, %192 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
  %4378 = torch.aten._reshape_alias %4367, %186, %187 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
  %4379 = torch.aten.permute %4378, %189 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
  %4380 = torch.aten.clone %4379, %int0 : !torch.vtensor<[2,5,9216,64],f16>, !torch.int -> !torch.vtensor<[2,5,9216,64],f16>
  %4381 = torch.aten._unsafe_view %4380, %192 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
  %4382 = torch.aten._reshape_alias %4373, %186, %187 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
  %4383 = torch.aten.permute %4382, %189 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
  %4384 = torch.aten.clone %4383, %int0 : !torch.vtensor<[2,5,9216,64],f16>, !torch.int -> !torch.vtensor<[2,5,9216,64],f16>
  %4385 = torch.aten._unsafe_view %4384, %192 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
  %4386 = torch.aten.unsqueeze %4377, %int3 : !torch.vtensor<[10,9216,64],f16>, !torch.int -> !torch.vtensor<[10,9216,64,1],f16>
  %4387 = torch.aten.permute %4386, %203 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,64],f16>
  %4388 = torch.aten.unsqueeze %4381, %int3 : !torch.vtensor<[10,9216,64],f16>, !torch.int -> !torch.vtensor<[10,9216,64,1],f16>
  %4389 = torch.aten.permute %4388, %206 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,1,9216,64],f16>
  %4390 = torch.aten.permute %4387, %203 : !torch.vtensor<[10,9216,1,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64,1],f16>
  %4391 = torch.aten._reshape_alias %4390, %192, %209 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
  %4392 = torch.aten.permute %4389, %211 : !torch.vtensor<[10,1,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,64,9216,1],f16>
  %4393 = torch.aten._reshape_alias %4392, %213, %214 : !torch.vtensor<[10,64,9216,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,64,9216],f16>
  %4394 = torch.aten.bmm %4391, %4393 : !torch.vtensor<[10,9216,64],f16>, !torch.vtensor<[10,64,9216],f16> -> !torch.vtensor<[10,9216,9216],f16>
  %4395 = torch.aten.view %4394, %217 : !torch.vtensor<[10,9216,9216],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,9216],f16>
  %4396 = torch.aten.permute %4395, %203 : !torch.vtensor<[10,9216,1,9216],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,9216,1],f16>
  %4397 = torch.aten.view %4396, %220 : !torch.vtensor<[10,9216,9216,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,9216],f16>
  %4398 = torch.aten.mul.Tensor %4397, %0 : !torch.vtensor<[10,9216,9216],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[10,9216,9216],f16>
  %4399 = torch.aten._softmax %4398, %int-1, %true : !torch.vtensor<[10,9216,9216],f16>, !torch.int, !torch.bool -> !torch.vtensor<[10,9216,9216],f32>
  %4400 = torch.aten._to_copy %4399, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[10,9216,9216],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[10,9216,9216],f16>
  %4401 = torch.aten.unsqueeze %4400, %int3 : !torch.vtensor<[10,9216,9216],f16>, !torch.int -> !torch.vtensor<[10,9216,9216,1],f16>
  %4402 = torch.aten.permute %4401, %203 : !torch.vtensor<[10,9216,9216,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,9216],f16>
  %4403 = torch.aten.unsqueeze %4385, %int3 : !torch.vtensor<[10,9216,64],f16>, !torch.int -> !torch.vtensor<[10,9216,64,1],f16>
  %4404 = torch.aten.permute %4403, %211 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,1,64,9216],f16>
  %4405 = torch.aten.permute %4402, %203 : !torch.vtensor<[10,9216,1,9216],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,9216,1],f16>
  %4406 = torch.aten._reshape_alias %4405, %220, %230 : !torch.vtensor<[10,9216,9216,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,9216,9216],f16>
  %4407 = torch.aten.permute %4404, %211 : !torch.vtensor<[10,1,64,9216],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64,1],f16>
  %4408 = torch.aten._reshape_alias %4407, %192, %209 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
  %4409 = torch.aten.bmm %4406, %4408 : !torch.vtensor<[10,9216,9216],f16>, !torch.vtensor<[10,9216,64],f16> -> !torch.vtensor<[10,9216,64],f16>
  %4410 = torch.aten.view %4409, %235 : !torch.vtensor<[10,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,64],f16>
  %4411 = torch.aten.permute %4410, %203 : !torch.vtensor<[10,9216,1,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64,1],f16>
  %4412 = torch.aten.view %4411, %192 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
  %4413 = torch.aten._reshape_alias %4412, %239, %240 : !torch.vtensor<[10,9216,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
  %4414 = torch.aten.permute %4413, %189 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
  %4415 = torch.aten.clone %4414, %int0 : !torch.vtensor<[2,9216,5,64],f16>, !torch.int -> !torch.vtensor<[2,9216,5,64],f16>
  %4416 = torch.aten._unsafe_view %4415, %153 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
  %4417 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
  %4418 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
  %4419 = torch.aten.t %4418 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %4420 = torch.aten.view %4416, %161 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
  %4421 = torch.aten.addmm %4417, %4420, %4419, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,320],f16>
  %4422 = torch.aten.view %4421, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
  %4423 = torch.aten.add.Tensor %4422, %4354, %int1 : !torch.vtensor<[2,9216,320],f16>, !torch.vtensor<[2,9216,320],f16>, !torch.int -> !torch.vtensor<[2,9216,320],f16>
  %4424 = torch.aten._to_copy %4423, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f32>
  %result0_255, %result1_256, %result2_257 = torch.aten.native_layer_norm %4424, %166, %48, %48, %float1.000000e-05 : !torch.vtensor<[2,9216,320],f32>, !torch.list<int>, !torch.vtensor<[320],f32>, !torch.vtensor<[320],f32>, !torch.float -> !torch.vtensor<[2,9216,320],f32>, !torch.vtensor<[2,9216,1],f32>, !torch.vtensor<[2,9216,1],f32>
  %4425 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
  %4426 = torch.aten._to_copy %result0_255, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
  %4427 = torch.aten.t %4425 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %4428 = torch.aten._reshape_alias %4426, %161, %170 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
  %4429 = torch.aten.mm %4428, %4427 : !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[18432,320],f16>
  %4430 = torch.aten._unsafe_view %4429, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
  %4431 = torch.aten._to_copy %43, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,1024],f16>
  %4432 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
  %4433 = torch.aten.t %4431 : !torch.vtensor<[320,1024],f16> -> !torch.vtensor<[1024,320],f16>
  %4434 = torch.aten._reshape_alias %4432, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
  %4435 = torch.aten.mm %4434, %4433 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,320],f16> -> !torch.vtensor<[154,320],f16>
  %4436 = torch.aten._unsafe_view %4435, %266 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
  %4437 = torch.aten._to_copy %43, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,1024],f16>
  %4438 = torch.aten._to_copy %arg2, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,77,1024],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,77,1024],f16>
  %4439 = torch.aten.t %4437 : !torch.vtensor<[320,1024],f16> -> !torch.vtensor<[1024,320],f16>
  %4440 = torch.aten._reshape_alias %4438, %262, %263 : !torch.vtensor<[2,77,1024],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[154,1024],f16>
  %4441 = torch.aten.mm %4440, %4439 : !torch.vtensor<[154,1024],f16>, !torch.vtensor<[1024,320],f16> -> !torch.vtensor<[154,320],f16>
  %4442 = torch.aten._unsafe_view %4441, %266 : !torch.vtensor<[154,320],f16>, !torch.list<int> -> !torch.vtensor<[2,77,320],f16>
  %4443 = torch.aten._reshape_alias %4430, %186, %187 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
  %4444 = torch.aten.permute %4443, %189 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
  %4445 = torch.aten.clone %4444, %int0 : !torch.vtensor<[2,5,9216,64],f16>, !torch.int -> !torch.vtensor<[2,5,9216,64],f16>
  %4446 = torch.aten._unsafe_view %4445, %192 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
  %4447 = torch.aten._reshape_alias %4436, %278, %279 : !torch.vtensor<[2,77,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,5,64],f16>
  %4448 = torch.aten.permute %4447, %189 : !torch.vtensor<[2,77,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,77,64],f16>
  %4449 = torch.aten.clone %4448, %int0 : !torch.vtensor<[2,5,77,64],f16>, !torch.int -> !torch.vtensor<[2,5,77,64],f16>
  %4450 = torch.aten._unsafe_view %4449, %283 : !torch.vtensor<[2,5,77,64],f16>, !torch.list<int> -> !torch.vtensor<[10,77,64],f16>
  %4451 = torch.aten._reshape_alias %4442, %278, %279 : !torch.vtensor<[2,77,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,77,5,64],f16>
  %4452 = torch.aten.permute %4451, %189 : !torch.vtensor<[2,77,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,77,64],f16>
  %4453 = torch.aten.clone %4452, %int0 : !torch.vtensor<[2,5,77,64],f16>, !torch.int -> !torch.vtensor<[2,5,77,64],f16>
  %4454 = torch.aten._unsafe_view %4453, %283 : !torch.vtensor<[2,5,77,64],f16>, !torch.list<int> -> !torch.vtensor<[10,77,64],f16>
  %4455 = torch.aten.unsqueeze %4446, %int3 : !torch.vtensor<[10,9216,64],f16>, !torch.int -> !torch.vtensor<[10,9216,64,1],f16>
  %4456 = torch.aten.permute %4455, %203 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,64],f16>
  %4457 = torch.aten.unsqueeze %4450, %int3 : !torch.vtensor<[10,77,64],f16>, !torch.int -> !torch.vtensor<[10,77,64,1],f16>
  %4458 = torch.aten.permute %4457, %206 : !torch.vtensor<[10,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,1,77,64],f16>
  %4459 = torch.aten.permute %4456, %203 : !torch.vtensor<[10,9216,1,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64,1],f16>
  %4460 = torch.aten._reshape_alias %4459, %192, %209 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
  %4461 = torch.aten.permute %4458, %211 : !torch.vtensor<[10,1,77,64],f16>, !torch.list<int> -> !torch.vtensor<[10,64,77,1],f16>
  %4462 = torch.aten._reshape_alias %4461, %296, %297 : !torch.vtensor<[10,64,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,64,77],f16>
  %4463 = torch.aten.bmm %4460, %4462 : !torch.vtensor<[10,9216,64],f16>, !torch.vtensor<[10,64,77],f16> -> !torch.vtensor<[10,9216,77],f16>
  %4464 = torch.aten.view %4463, %300 : !torch.vtensor<[10,9216,77],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,77],f16>
  %4465 = torch.aten.permute %4464, %203 : !torch.vtensor<[10,9216,1,77],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,77,1],f16>
  %4466 = torch.aten.view %4465, %303 : !torch.vtensor<[10,9216,77,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,77],f16>
  %4467 = torch.aten.mul.Tensor %4466, %0 : !torch.vtensor<[10,9216,77],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[10,9216,77],f16>
  %4468 = torch.aten._softmax %4467, %int-1, %true : !torch.vtensor<[10,9216,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[10,9216,77],f32>
  %4469 = torch.aten._to_copy %4468, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[10,9216,77],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[10,9216,77],f16>
  %4470 = torch.aten.unsqueeze %4469, %int3 : !torch.vtensor<[10,9216,77],f16>, !torch.int -> !torch.vtensor<[10,9216,77,1],f16>
  %4471 = torch.aten.permute %4470, %203 : !torch.vtensor<[10,9216,77,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,77],f16>
  %4472 = torch.aten.unsqueeze %4454, %int3 : !torch.vtensor<[10,77,64],f16>, !torch.int -> !torch.vtensor<[10,77,64,1],f16>
  %4473 = torch.aten.permute %4472, %211 : !torch.vtensor<[10,77,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,1,64,77],f16>
  %4474 = torch.aten.permute %4471, %203 : !torch.vtensor<[10,9216,1,77],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,77,1],f16>
  %4475 = torch.aten._reshape_alias %4474, %303, %313 : !torch.vtensor<[10,9216,77,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,9216,77],f16>
  %4476 = torch.aten.permute %4473, %211 : !torch.vtensor<[10,1,64,77],f16>, !torch.list<int> -> !torch.vtensor<[10,77,64,1],f16>
  %4477 = torch.aten._reshape_alias %4476, %283, %316 : !torch.vtensor<[10,77,64,1],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[10,77,64],f16>
  %4478 = torch.aten.bmm %4475, %4477 : !torch.vtensor<[10,9216,77],f16>, !torch.vtensor<[10,77,64],f16> -> !torch.vtensor<[10,9216,64],f16>
  %4479 = torch.aten.view %4478, %235 : !torch.vtensor<[10,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,1,64],f16>
  %4480 = torch.aten.permute %4479, %203 : !torch.vtensor<[10,9216,1,64],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64,1],f16>
  %4481 = torch.aten.view %4480, %192 : !torch.vtensor<[10,9216,64,1],f16>, !torch.list<int> -> !torch.vtensor<[10,9216,64],f16>
  %4482 = torch.aten._reshape_alias %4481, %239, %240 : !torch.vtensor<[10,9216,64],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,5,9216,64],f16>
  %4483 = torch.aten.permute %4482, %189 : !torch.vtensor<[2,5,9216,64],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,5,64],f16>
  %4484 = torch.aten.clone %4483, %int0 : !torch.vtensor<[2,9216,5,64],f16>, !torch.int -> !torch.vtensor<[2,9216,5,64],f16>
  %4485 = torch.aten._unsafe_view %4484, %153 : !torch.vtensor<[2,9216,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
  %4486 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
  %4487 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
  %4488 = torch.aten.t %4487 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %4489 = torch.aten.view %4485, %161 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
  %4490 = torch.aten.addmm %4486, %4489, %4488, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,320],f16>
  %4491 = torch.aten.view %4490, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
  %4492 = torch.aten.add.Tensor %4491, %4423, %int1 : !torch.vtensor<[2,9216,320],f16>, !torch.vtensor<[2,9216,320],f16>, !torch.int -> !torch.vtensor<[2,9216,320],f16>
  %4493 = torch.aten._to_copy %4492, %int6, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f16>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f32>
  %result0_258, %result1_259, %result2_260 = torch.aten.native_layer_norm %4493, %166, %48, %48, %float1.000000e-05 : !torch.vtensor<[2,9216,320],f32>, !torch.list<int>, !torch.vtensor<[320],f32>, !torch.vtensor<[320],f32>, !torch.float -> !torch.vtensor<[2,9216,320],f32>, !torch.vtensor<[2,9216,1],f32>, !torch.vtensor<[2,9216,1],f32>
  %4494 = torch.aten._to_copy %44, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2560],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2560],f16>
  %4495 = torch.aten._to_copy %45, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2560,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2560,320],f16>
  %4496 = torch.aten._to_copy %result0_258, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,9216,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,9216,320],f16>
  %4497 = torch.aten.t %4495 : !torch.vtensor<[2560,320],f16> -> !torch.vtensor<[320,2560],f16>
  %4498 = torch.aten.view %4496, %161 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
  %4499 = torch.aten.addmm %4494, %4498, %4497, %int1, %int1 : !torch.vtensor<[2560],f16>, !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,2560],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,2560],f16>
  %4500 = torch.aten.view %4499, %340 : !torch.vtensor<[18432,2560],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,2560],f16>
  %4501 = torch.aten.slice.Tensor %4500, %int-1, %int0, %int1280, %int1 : !torch.vtensor<[2,9216,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,9216,1280],f16>
  %4502 = torch.aten.slice.Tensor %4500, %int-1, %int1280, %int2560, %int1 : !torch.vtensor<[2,9216,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,9216,1280],f16>
  %4503 = torch.aten.gelu %4502, %str : !torch.vtensor<[2,9216,1280],f16>, !torch.str -> !torch.vtensor<[2,9216,1280],f16>
  %4504 = torch.aten.mul.Tensor %4501, %4503 : !torch.vtensor<[2,9216,1280],f16>, !torch.vtensor<[2,9216,1280],f16> -> !torch.vtensor<[2,9216,1280],f16>
  %4505 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
  %4506 = torch.aten._to_copy %46, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,1280],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,1280],f16>
  %4507 = torch.aten.t %4506 : !torch.vtensor<[320,1280],f16> -> !torch.vtensor<[1280,320],f16>
  %4508 = torch.aten.view %4504, %349 : !torch.vtensor<[2,9216,1280],f16>, !torch.list<int> -> !torch.vtensor<[18432,1280],f16>
  %4509 = torch.aten.addmm %4505, %4508, %4507, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[18432,1280],f16>, !torch.vtensor<[1280,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,320],f16>
  %4510 = torch.aten.view %4509, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
  %4511 = torch.aten.add.Tensor %4510, %4492, %int1 : !torch.vtensor<[2,9216,320],f16>, !torch.vtensor<[2,9216,320],f16>, !torch.int -> !torch.vtensor<[2,9216,320],f16>
  %4512 = torch.aten._to_copy %48, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320],f16>
  %4513 = torch.aten._to_copy %47, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[320,320],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[320,320],f16>
  %4514 = torch.aten.t %4513 : !torch.vtensor<[320,320],f16> -> !torch.vtensor<[320,320],f16>
  %4515 = torch.aten.view %4511, %161 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int> -> !torch.vtensor<[18432,320],f16>
  %4516 = torch.aten.addmm %4512, %4515, %4514, %int1, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[18432,320],f16>, !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[18432,320],f16>
  %4517 = torch.aten.view %4516, %153 : !torch.vtensor<[18432,320],f16>, !torch.list<int> -> !torch.vtensor<[2,9216,320],f16>
  %4518 = torch.aten._reshape_alias %4517, %360, %361 : !torch.vtensor<[2,9216,320],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,96,96,320],f16>
  %4519 = torch.aten.permute %4518, %206 : !torch.vtensor<[2,96,96,320],f16>, !torch.list<int> -> !torch.vtensor<[2,320,96,96],f16>
  %4520 = torch.aten._reshape_alias %4519, %90, %364 : !torch.vtensor<[2,320,96,96],f16>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,320,96,96],f16>
  %4521 = torch.aten.clone %4520, %int0 : !torch.vtensor<[2,320,96,96],f16>, !torch.int -> !torch.vtensor<[2,320,96,96],f16>
  %4522 = torch.aten.add.Tensor %4521, %4328, %int1 : !torch.vtensor<[2,320,96,96],f16>, !torch.vtensor<[2,320,96,96],f16>, !torch.int -> !torch.vtensor<[2,320,96,96],f16>
  %4523 = torch.aten._to_copy %4522, %int6, %none, %cuda3A0, %none, %false, %none : !torch.vtensor<[2,320,96,96],f16>, !torch.int, !torch.none, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,96,96],f32>
  %4524 = torch.aten._reshape_alias %4523, %82, %83 : !torch.vtensor<[2,320,96,96],f32>, !torch.list<int>, !torch.list<int> -> !torch.vtensor<[2,32,10,9216],f32>
  %result0_261, %result1_262 = torch.aten.var_mean.correction %4524, %85, %int0, %true : !torch.vtensor<[2,32,10,9216],f32>, !torch.list<int>, !torch.int, !torch.bool -> !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[2,32,1,1],f32>
  %4525 = torch.aten.add.Tensor %result0_261, %2, %int1 : !torch.vtensor<[2,32,1,1],f32>, !torch.vtensor<[],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f32>
  %4526 = torch.aten.rsqrt %4525 : !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,1,1],f32>
  %4527 = torch.aten.sub.Tensor %4524, %result1_262, %int1 : !torch.vtensor<[2,32,10,9216],f32>, !torch.vtensor<[2,32,1,1],f32>, !torch.int -> !torch.vtensor<[2,32,10,9216],f32>
  %4528 = torch.aten.mul.Tensor %4527, %4526 : !torch.vtensor<[2,32,10,9216],f32>, !torch.vtensor<[2,32,1,1],f32> -> !torch.vtensor<[2,32,10,9216],f32>
  %4529 = torch.aten.view %4528, %90 : !torch.vtensor<[2,32,10,9216],f32>, !torch.list<int> -> !torch.vtensor<[2,320,96,96],f32>
  %4530 = torch.aten.unsqueeze %48, %int0 : !torch.vtensor<[320],f32>, !torch.int -> !torch.vtensor<[1,320],f32>
  %4531 = torch.aten.unsqueeze %4530, %int2 : !torch.vtensor<[1,320],f32>, !torch.int -> !torch.vtensor<[1,320,1],f32>
  %4532 = torch.aten.unsqueeze %4531, %int3 : !torch.vtensor<[1,320,1],f32>, !torch.int -> !torch.vtensor<[1,320,1,1],f32>
  %4533 = torch.aten.unsqueeze %48, %int0 : !torch.vtensor<[320],f32>, !torch.int -> !torch.vtensor<[1,320],f32>
  %4534 = torch.aten.unsqueeze %4533, %int2 : !torch.vtensor<[1,320],f32>, !torch.int -> !torch.vtensor<[1,320,1],f32>
  %4535 = torch.aten.unsqueeze %4534, %int3 : !torch.vtensor<[1,320,1],f32>, !torch.int -> !torch.vtensor<[1,320,1,1],f32>
  %4536 = torch.aten.mul.Tensor %4529, %4535 : !torch.vtensor<[2,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f32> -> !torch.vtensor<[2,320,96,96],f32>
  %4537 = torch.aten.add.Tensor %4536, %4532, %int1 : !torch.vtensor<[2,320,96,96],f32>, !torch.vtensor<[1,320,1,1],f32>, !torch.int -> !torch.vtensor<[2,320,96,96],f32>
  %4538 = torch.aten.silu %4537 : !torch.vtensor<[2,320,96,96],f32> -> !torch.vtensor<[2,320,96,96],f32>
  %4539 = torch.aten._to_copy %49, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[4],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[4],f16>
  %4540 = torch.aten._to_copy %50, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[4,320,3,3],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[4,320,3,3],f16>
  %4541 = torch.aten._to_copy %4538, %int5, %none, %none, %none, %false, %none : !torch.vtensor<[2,320,96,96],f32>, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[2,320,96,96],f16>
  %4542 = torch.aten._convolution %4541, %4540, %4539, %78, %78, %78, %false, %79, %int1, %false, %false, %true, %true : !torch.vtensor<[2,320,96,96],f16>, !torch.vtensor<[4,320,3,3],f16>, !torch.vtensor<[4],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int, !torch.bool, !torch.bool, !torch.bool, !torch.bool -> !torch.vtensor<[2,4,96,96],f16>
  return %4542 : !torch.vtensor<[2,4,96,96],f16>
 }