pashu123 · November 28, 2022 16:29
diff --git a/refine.mlir b/refine.mlir
 func.func @forward(%arg0: !torch.vtensor<[1,77],si64>) -> !torch.vtensor<[1,77,1024],f16> {
  %0 = torch.vtensor.literal(dense<1.250000e-01> : tensor<f64>) : !torch.vtensor<[],f64>
  %1 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1x77xsi64>) : !torch.vtensor<[1,77],si64>
  %2 = torch.vtensor.literal(dense_resource<__elided__> : tensor<49408x1024xf16>) : !torch.vtensor<[49408,1024],f16>
  %3 = torch.vtensor.literal(dense_resource<__elided__> : tensor<77x1024xf16>) : !torch.vtensor<[77,1024],f16>
  %4 = torch.vtensor.literal(dense<-6.550400e+04> : tensor<f32>) : !torch.vtensor<[],f32>
  %5 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1024x1024xf16>) : !torch.vtensor<[1024,1024],f16>
  %6 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4096x1024xf16>) : !torch.vtensor<[4096,1024],f16>
  %7 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4096xf16>) : !torch.vtensor<[4096],f16>
  %8 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1024x4096xf16>) : !torch.vtensor<[1024,4096],f16>
  %9 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1024xf16>) : !torch.vtensor<[1024],f16>
  %int-1 = torch.constant.int -1
  %int77 = torch.constant.int 77
  %int0 = torch.constant.int 0
  %int9223372036854775807 = torch.constant.int 9223372036854775807
  %int1 = torch.constant.int 1
  %false = torch.constant.bool false
  %int5 = torch.constant.int 5
  %none = torch.constant.none
  %int1024 = torch.constant.int 1024
  %float1.000000e-05 = torch.constant.float 1.000000e-05
  %int16 = torch.constant.int 16
  %int64 = torch.constant.int 64
  %int2 = torch.constant.int 2
  %int4096 = torch.constant.int 4096
  %str = torch.constant.str "none"
  %10 = torch.prim.ListConstruct %int-1, %int77 : (!torch.int, !torch.int) -> !torch.list<int>
  %11 = torch.aten.view %arg0, %10 : !torch.vtensor<[1,77],si64>, !torch.list<int> -> !torch.vtensor<[1,77],si64>
  %12 = torch.aten.slice.Tensor %1, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[1,77],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,77],si64>
  %13 = torch.aten.embedding %2, %11, %int-1, %false, %false : !torch.vtensor<[49408,1024],f16>, !torch.vtensor<[1,77],si64>, !torch.int, !torch.bool, !torch.bool -> !torch.vtensor<[1,77,1024],f16>
  %14 = torch.aten.embedding %3, %12, %int-1, %false, %false : !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1,77],si64>, !torch.int, !torch.bool, !torch.bool -> !torch.vtensor<[1,77,1024],f16>
  %15 = torch.aten.add.Tensor %13, %14, %int1 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1024],f16>, !torch.int -> !torch.vtensor<[1,77,1024],f16>
  %16 = torch.prim.ListConstruct %int1, %int77, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %cpu = torch.constant.device "cpu"
  %17 = torch.aten.empty.memory_format %16, %int5, %none, %cpu, %false, %none : !torch.list<int>, !torch.int, !torch.none, !torch.Device, !torch.bool, !torch.none -> !torch.vtensor<[1,77,77],f16>
  %18 = torch.aten.lift_fresh_copy %4 : !torch.vtensor<[],f32> -> !torch.vtensor<[],f32>
  %19 = torch.aten.fill.Tensor %17, %18 : !torch.vtensor<[1,77,77],f16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,77,77],f16>
  %20 = torch.aten.triu %19, %int1 : !torch.vtensor<[1,77,77],f16>, !torch.int -> !torch.vtensor<[1,77,77],f16>
  %21 = torch.aten.unsqueeze %20, %int1 : !torch.vtensor<[1,77,77],f16>, !torch.int -> !torch.vtensor<[1,1,77,77],f16>
  %cuda3A0 = torch.constant.device "cuda:0"
  %22 = torch.aten._to_copy %21, %int5, %int0, %cuda3A0, %none, %false, %none : !torch.vtensor<[1,1,77,77],f16>, !torch.int, !torch.int, !torch.Device, !torch.none, !torch.bool, !torch.none -> !torch.vtensor<[1,1,77,77],f16>
  %23 = torch.prim.ListConstruct %int1024 : (!torch.int) -> !torch.list<int>
  %result0, %result1, %result2 = torch.aten.native_layer_norm %15, %23, %9, %9, %float1.000000e-05 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int>, !torch.vtensor<[1024],f16>, !torch.vtensor<[1024],f16>, !torch.float -> !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>
  %24 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %25 = torch.prim.ListConstruct %int77, %int1024 : (!torch.int, !torch.int) -> !torch.list<int>
  %26 = torch.aten.view %result0, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %27 = torch.aten.addmm %9, %26, %24, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %28 = torch.prim.ListConstruct %int1, %int77, %int1024 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %29 = torch.aten.view %27, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %30 = torch.aten.mul.Tensor %29, %0 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,77,1024],f16>
  %31 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %32 = torch.aten.view %result0, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %33 = torch.aten.addmm %9, %32, %31, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %34 = torch.aten.view %33, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %35 = torch.prim.ListConstruct %int1, %int-1, %int16, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %36 = torch.aten.view %34, %35 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %37 = torch.aten.transpose.int %36, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %38 = torch.aten.clone %37, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %39 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %40 = torch.aten.view %result0, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %41 = torch.aten.addmm %9, %40, %39, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %42 = torch.aten.view %41, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %43 = torch.aten.view %42, %35 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %44 = torch.aten.transpose.int %43, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %45 = torch.aten.clone %44, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %46 = torch.prim.ListConstruct %int1, %int77, %int16, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %47 = torch.aten.view %30, %46 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %48 = torch.aten.transpose.int %47, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %49 = torch.aten.clone %48, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %50 = torch.prim.ListConstruct %int16, %int-1, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %51 = torch.aten.view %49, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %52 = torch.aten.view %38, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %53 = torch.aten.view %45, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %54 = torch.aten.transpose.int %52, %int1, %int2 : !torch.vtensor<[16,77,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,64,77],f16>
  %55 = torch.aten.bmm %51, %54 : !torch.vtensor<[16,77,64],f16>, !torch.vtensor<[16,64,77],f16> -> !torch.vtensor<[16,77,77],f16>
  %56 = torch.prim.ListConstruct %int1, %int16, %int77, %int77 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %57 = torch.aten.view %55, %56 : !torch.vtensor<[16,77,77],f16>, !torch.list<int> -> !torch.vtensor<[1,16,77,77],f16>
  %58 = torch.aten.add.Tensor %57, %22, %int1 : !torch.vtensor<[1,16,77,77],f16>, !torch.vtensor<[1,1,77,77],f16>, !torch.int -> !torch.vtensor<[1,16,77,77],f16>
  %59 = torch.prim.ListConstruct %int16, %int77, %int77 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %60 = torch.aten.view %58, %59 : !torch.vtensor<[1,16,77,77],f16>, !torch.list<int> -> !torch.vtensor<[16,77,77],f16>
  %61 = torch.aten._softmax %60, %int-1, %false : !torch.vtensor<[16,77,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,77,77],f16>
  %62 = torch.aten.bmm %61, %53 : !torch.vtensor<[16,77,77],f16>, !torch.vtensor<[16,77,64],f16> -> !torch.vtensor<[16,77,64],f16>
  %63 = torch.prim.ListConstruct %int1, %int16, %int77, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %64 = torch.aten.view %62, %63 : !torch.vtensor<[16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[1,16,77,64],f16>
  %65 = torch.aten.transpose.int %64, %int1, %int2 : !torch.vtensor<[1,16,77,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,77,16,64],f16>
  %66 = torch.aten.clone %65, %int0 : !torch.vtensor<[1,77,16,64],f16>, !torch.int -> !torch.vtensor<[1,77,16,64],f16>
  %67 = torch.aten._unsafe_view %66, %28 : !torch.vtensor<[1,77,16,64],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %68 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %69 = torch.aten.view %67, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %70 = torch.aten.addmm %9, %69, %68, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %71 = torch.aten.view %70, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %72 = torch.aten.add.Tensor %15, %71, %int1 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1024],f16>, !torch.int -> !torch.vtensor<[1,77,1024],f16>
  %result0_0, %result1_1, %result2_2 = torch.aten.native_layer_norm %72, %23, %9, %9, %float1.000000e-05 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int>, !torch.vtensor<[1024],f16>, !torch.vtensor<[1024],f16>, !torch.float -> !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>
  %73 = torch.aten.t %6 : !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[1024,4096],f16>
  %74 = torch.aten.view %result0_0, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %75 = torch.aten.addmm %7, %74, %73, %int1, %int1 : !torch.vtensor<[4096],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,4096],f16>
  %76 = torch.prim.ListConstruct %int1, %int77, %int4096 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
  %77 = torch.aten.view %75, %76 : !torch.vtensor<[77,4096],f16>, !torch.list<int> -> !torch.vtensor<[1,77,4096],f16>
  %78 = torch.aten.gelu %77, %str : !torch.vtensor<[1,77,4096],f16>, !torch.str -> !torch.vtensor<[1,77,4096],f16>
  %79 = torch.aten.t %8 : !torch.vtensor<[1024,4096],f16> -> !torch.vtensor<[4096,1024],f16>
  %80 = torch.prim.ListConstruct %int77, %int4096 : (!torch.int, !torch.int) -> !torch.list<int>
  %81 = torch.aten.view %78, %80 : !torch.vtensor<[1,77,4096],f16>, !torch.list<int> -> !torch.vtensor<[77,4096],f16>
  %82 = torch.aten.addmm %9, %81, %79, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,4096],f16>, !torch.vtensor<[4096,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %83 = torch.aten.view %82, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %84 = torch.aten.add.Tensor %72, %83, %int1 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1024],f16>, !torch.int -> !torch.vtensor<[1,77,1024],f16>
  %result0_3, %result1_4, %result2_5 = torch.aten.native_layer_norm %84, %23, %9, %9, %float1.000000e-05 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int>, !torch.vtensor<[1024],f16>, !torch.vtensor<[1024],f16>, !torch.float -> !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>
  %85 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %86 = torch.aten.view %result0_3, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %87 = torch.aten.addmm %9, %86, %85, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %88 = torch.aten.view %87, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %89 = torch.aten.mul.Tensor %88, %0 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,77,1024],f16>
  %90 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %91 = torch.aten.view %result0_3, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %92 = torch.aten.addmm %9, %91, %90, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %93 = torch.aten.view %92, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %94 = torch.aten.view %93, %35 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %95 = torch.aten.transpose.int %94, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %96 = torch.aten.clone %95, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %97 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %98 = torch.aten.view %result0_3, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %99 = torch.aten.addmm %9, %98, %97, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %100 = torch.aten.view %99, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %101 = torch.aten.view %100, %35 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %102 = torch.aten.transpose.int %101, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %103 = torch.aten.clone %102, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %104 = torch.aten.view %89, %46 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %105 = torch.aten.transpose.int %104, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %106 = torch.aten.clone %105, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %107 = torch.aten.view %106, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %108 = torch.aten.view %96, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %109 = torch.aten.view %103, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %110 = torch.aten.transpose.int %108, %int1, %int2 : !torch.vtensor<[16,77,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,64,77],f16>
  %111 = torch.aten.bmm %107, %110 : !torch.vtensor<[16,77,64],f16>, !torch.vtensor<[16,64,77],f16> -> !torch.vtensor<[16,77,77],f16>
  %112 = torch.aten.view %111, %56 : !torch.vtensor<[16,77,77],f16>, !torch.list<int> -> !torch.vtensor<[1,16,77,77],f16>
  %113 = torch.aten.add.Tensor %112, %22, %int1 : !torch.vtensor<[1,16,77,77],f16>, !torch.vtensor<[1,1,77,77],f16>, !torch.int -> !torch.vtensor<[1,16,77,77],f16>
  %114 = torch.aten.view %113, %59 : !torch.vtensor<[1,16,77,77],f16>, !torch.list<int> -> !torch.vtensor<[16,77,77],f16>
  %115 = torch.aten._softmax %114, %int-1, %false : !torch.vtensor<[16,77,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,77,77],f16>
  %116 = torch.aten.bmm %115, %109 : !torch.vtensor<[16,77,77],f16>, !torch.vtensor<[16,77,64],f16> -> !torch.vtensor<[16,77,64],f16>
  %117 = torch.aten.view %116, %63 : !torch.vtensor<[16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[1,16,77,64],f16>
  %118 = torch.aten.transpose.int %117, %int1, %int2 : !torch.vtensor<[1,16,77,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,77,16,64],f16>
  %119 = torch.aten.clone %118, %int0 : !torch.vtensor<[1,77,16,64],f16>, !torch.int -> !torch.vtensor<[1,77,16,64],f16>
  %120 = torch.aten._unsafe_view %119, %28 : !torch.vtensor<[1,77,16,64],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %121 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %122 = torch.aten.view %120, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %123 = torch.aten.addmm %9, %122, %121, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %124 = torch.aten.view %123, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %125 = torch.aten.add.Tensor %84, %124, %int1 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1024],f16>, !torch.int -> !torch.vtensor<[1,77,1024],f16>
  %result0_6, %result1_7, %result2_8 = torch.aten.native_layer_norm %125, %23, %9, %9, %float1.000000e-05 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int>, !torch.vtensor<[1024],f16>, !torch.vtensor<[1024],f16>, !torch.float -> !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>
  %126 = torch.aten.t %6 : !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[1024,4096],f16>
  %127 = torch.aten.view %result0_6, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %128 = torch.aten.addmm %7, %127, %126, %int1, %int1 : !torch.vtensor<[4096],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,4096],f16>
  %129 = torch.aten.view %128, %76 : !torch.vtensor<[77,4096],f16>, !torch.list<int> -> !torch.vtensor<[1,77,4096],f16>
  %130 = torch.aten.gelu %129, %str : !torch.vtensor<[1,77,4096],f16>, !torch.str -> !torch.vtensor<[1,77,4096],f16>
  %131 = torch.aten.t %8 : !torch.vtensor<[1024,4096],f16> -> !torch.vtensor<[4096,1024],f16>
  %132 = torch.aten.view %130, %80 : !torch.vtensor<[1,77,4096],f16>, !torch.list<int> -> !torch.vtensor<[77,4096],f16>
  %133 = torch.aten.addmm %9, %132, %131, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,4096],f16>, !torch.vtensor<[4096,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %134 = torch.aten.view %133, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %135 = torch.aten.add.Tensor %125, %134, %int1 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1024],f16>, !torch.int -> !torch.vtensor<[1,77,1024],f16>
  %result0_9, %result1_10, %result2_11 = torch.aten.native_layer_norm %135, %23, %9, %9, %float1.000000e-05 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int>, !torch.vtensor<[1024],f16>, !torch.vtensor<[1024],f16>, !torch.float -> !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>
  %136 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %137 = torch.aten.view %result0_9, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %138 = torch.aten.addmm %9, %137, %136, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %139 = torch.aten.view %138, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %140 = torch.aten.mul.Tensor %139, %0 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,77,1024],f16>
  %141 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %142 = torch.aten.view %result0_9, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %143 = torch.aten.addmm %9, %142, %141, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %144 = torch.aten.view %143, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %145 = torch.aten.view %144, %35 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %146 = torch.aten.transpose.int %145, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %147 = torch.aten.clone %146, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %148 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %149 = torch.aten.view %result0_9, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %150 = torch.aten.addmm %9, %149, %148, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %151 = torch.aten.view %150, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %152 = torch.aten.view %151, %35 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %153 = torch.aten.transpose.int %152, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %154 = torch.aten.clone %153, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %155 = torch.aten.view %140, %46 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %156 = torch.aten.transpose.int %155, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %157 = torch.aten.clone %156, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %158 = torch.aten.view %157, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %159 = torch.aten.view %147, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %160 = torch.aten.view %154, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %161 = torch.aten.transpose.int %159, %int1, %int2 : !torch.vtensor<[16,77,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,64,77],f16>
  %162 = torch.aten.bmm %158, %161 : !torch.vtensor<[16,77,64],f16>, !torch.vtensor<[16,64,77],f16> -> !torch.vtensor<[16,77,77],f16>
  %163 = torch.aten.view %162, %56 : !torch.vtensor<[16,77,77],f16>, !torch.list<int> -> !torch.vtensor<[1,16,77,77],f16>
  %164 = torch.aten.add.Tensor %163, %22, %int1 : !torch.vtensor<[1,16,77,77],f16>, !torch.vtensor<[1,1,77,77],f16>, !torch.int -> !torch.vtensor<[1,16,77,77],f16>
  %165 = torch.aten.view %164, %59 : !torch.vtensor<[1,16,77,77],f16>, !torch.list<int> -> !torch.vtensor<[16,77,77],f16>
  %166 = torch.aten._softmax %165, %int-1, %false : !torch.vtensor<[16,77,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,77,77],f16>
  %167 = torch.aten.bmm %166, %160 : !torch.vtensor<[16,77,77],f16>, !torch.vtensor<[16,77,64],f16> -> !torch.vtensor<[16,77,64],f16>
  %168 = torch.aten.view %167, %63 : !torch.vtensor<[16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[1,16,77,64],f16>
  %169 = torch.aten.transpose.int %168, %int1, %int2 : !torch.vtensor<[1,16,77,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,77,16,64],f16>
  %170 = torch.aten.clone %169, %int0 : !torch.vtensor<[1,77,16,64],f16>, !torch.int -> !torch.vtensor<[1,77,16,64],f16>
  %171 = torch.aten._unsafe_view %170, %28 : !torch.vtensor<[1,77,16,64],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %172 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %173 = torch.aten.view %171, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %174 = torch.aten.addmm %9, %173, %172, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %175 = torch.aten.view %174, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %176 = torch.aten.add.Tensor %135, %175, %int1 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1024],f16>, !torch.int -> !torch.vtensor<[1,77,1024],f16>
  %result0_12, %result1_13, %result2_14 = torch.aten.native_layer_norm %176, %23, %9, %9, %float1.000000e-05 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int>, !torch.vtensor<[1024],f16>, !torch.vtensor<[1024],f16>, !torch.float -> !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>
  %177 = torch.aten.t %6 : !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[1024,4096],f16>
  %178 = torch.aten.view %result0_12, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %179 = torch.aten.addmm %7, %178, %177, %int1, %int1 : !torch.vtensor<[4096],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,4096],f16>
  %180 = torch.aten.view %179, %76 : !torch.vtensor<[77,4096],f16>, !torch.list<int> -> !torch.vtensor<[1,77,4096],f16>
  %181 = torch.aten.gelu %180, %str : !torch.vtensor<[1,77,4096],f16>, !torch.str -> !torch.vtensor<[1,77,4096],f16>
  %182 = torch.aten.t %8 : !torch.vtensor<[1024,4096],f16> -> !torch.vtensor<[4096,1024],f16>
  %183 = torch.aten.view %181, %80 : !torch.vtensor<[1,77,4096],f16>, !torch.list<int> -> !torch.vtensor<[77,4096],f16>
  %184 = torch.aten.addmm %9, %183, %182, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,4096],f16>, !torch.vtensor<[4096,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %185 = torch.aten.view %184, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %186 = torch.aten.add.Tensor %176, %185, %int1 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1024],f16>, !torch.int -> !torch.vtensor<[1,77,1024],f16>
  %result0_15, %result1_16, %result2_17 = torch.aten.native_layer_norm %186, %23, %9, %9, %float1.000000e-05 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int>, !torch.vtensor<[1024],f16>, !torch.vtensor<[1024],f16>, !torch.float -> !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>
  %187 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %188 = torch.aten.view %result0_15, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %189 = torch.aten.addmm %9, %188, %187, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %190 = torch.aten.view %189, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %191 = torch.aten.mul.Tensor %190, %0 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,77,1024],f16>
  %192 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %193 = torch.aten.view %result0_15, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %194 = torch.aten.addmm %9, %193, %192, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %195 = torch.aten.view %194, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %196 = torch.aten.view %195, %35 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %197 = torch.aten.transpose.int %196, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %198 = torch.aten.clone %197, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %199 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %200 = torch.aten.view %result0_15, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %201 = torch.aten.addmm %9, %200, %199, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %202 = torch.aten.view %201, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %203 = torch.aten.view %202, %35 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %204 = torch.aten.transpose.int %203, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %205 = torch.aten.clone %204, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %206 = torch.aten.view %191, %46 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %207 = torch.aten.transpose.int %206, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %208 = torch.aten.clone %207, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %209 = torch.aten.view %208, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %210 = torch.aten.view %198, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %211 = torch.aten.view %205, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %212 = torch.aten.transpose.int %210, %int1, %int2 : !torch.vtensor<[16,77,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,64,77],f16>
  %213 = torch.aten.bmm %209, %212 : !torch.vtensor<[16,77,64],f16>, !torch.vtensor<[16,64,77],f16> -> !torch.vtensor<[16,77,77],f16>
  %214 = torch.aten.view %213, %56 : !torch.vtensor<[16,77,77],f16>, !torch.list<int> -> !torch.vtensor<[1,16,77,77],f16>
  %215 = torch.aten.add.Tensor %214, %22, %int1 : !torch.vtensor<[1,16,77,77],f16>, !torch.vtensor<[1,1,77,77],f16>, !torch.int -> !torch.vtensor<[1,16,77,77],f16>
  %216 = torch.aten.view %215, %59 : !torch.vtensor<[1,16,77,77],f16>, !torch.list<int> -> !torch.vtensor<[16,77,77],f16>
  %217 = torch.aten._softmax %216, %int-1, %false : !torch.vtensor<[16,77,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,77,77],f16>
  %218 = torch.aten.bmm %217, %211 : !torch.vtensor<[16,77,77],f16>, !torch.vtensor<[16,77,64],f16> -> !torch.vtensor<[16,77,64],f16>
  %219 = torch.aten.view %218, %63 : !torch.vtensor<[16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[1,16,77,64],f16>
  %220 = torch.aten.transpose.int %219, %int1, %int2 : !torch.vtensor<[1,16,77,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,77,16,64],f16>
  %221 = torch.aten.clone %220, %int0 : !torch.vtensor<[1,77,16,64],f16>, !torch.int -> !torch.vtensor<[1,77,16,64],f16>
  %222 = torch.aten._unsafe_view %221, %28 : !torch.vtensor<[1,77,16,64],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %223 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %224 = torch.aten.view %222, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %225 = torch.aten.addmm %9, %224, %223, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %226 = torch.aten.view %225, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %227 = torch.aten.add.Tensor %186, %226, %int1 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1024],f16>, !torch.int -> !torch.vtensor<[1,77,1024],f16>
  %result0_18, %result1_19, %result2_20 = torch.aten.native_layer_norm %227, %23, %9, %9, %float1.000000e-05 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int>, !torch.vtensor<[1024],f16>, !torch.vtensor<[1024],f16>, !torch.float -> !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>
  %228 = torch.aten.t %6 : !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[1024,4096],f16>
  %229 = torch.aten.view %result0_18, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %230 = torch.aten.addmm %7, %229, %228, %int1, %int1 : !torch.vtensor<[4096],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,4096],f16>
  %231 = torch.aten.view %230, %76 : !torch.vtensor<[77,4096],f16>, !torch.list<int> -> !torch.vtensor<[1,77,4096],f16>
  %232 = torch.aten.gelu %231, %str : !torch.vtensor<[1,77,4096],f16>, !torch.str -> !torch.vtensor<[1,77,4096],f16>
  %233 = torch.aten.t %8 : !torch.vtensor<[1024,4096],f16> -> !torch.vtensor<[4096,1024],f16>
  %234 = torch.aten.view %232, %80 : !torch.vtensor<[1,77,4096],f16>, !torch.list<int> -> !torch.vtensor<[77,4096],f16>
  %235 = torch.aten.addmm %9, %234, %233, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,4096],f16>, !torch.vtensor<[4096,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %236 = torch.aten.view %235, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %237 = torch.aten.add.Tensor %227, %236, %int1 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1024],f16>, !torch.int -> !torch.vtensor<[1,77,1024],f16>
  %result0_21, %result1_22, %result2_23 = torch.aten.native_layer_norm %237, %23, %9, %9, %float1.000000e-05 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int>, !torch.vtensor<[1024],f16>, !torch.vtensor<[1024],f16>, !torch.float -> !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>
  %238 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %239 = torch.aten.view %result0_21, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %240 = torch.aten.addmm %9, %239, %238, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %241 = torch.aten.view %240, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %242 = torch.aten.mul.Tensor %241, %0 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,77,1024],f16>
  %243 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %244 = torch.aten.view %result0_21, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %245 = torch.aten.addmm %9, %244, %243, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %246 = torch.aten.view %245, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %247 = torch.aten.view %246, %35 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %248 = torch.aten.transpose.int %247, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %249 = torch.aten.clone %248, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %250 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %251 = torch.aten.view %result0_21, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %252 = torch.aten.addmm %9, %251, %250, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %253 = torch.aten.view %252, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %254 = torch.aten.view %253, %35 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %255 = torch.aten.transpose.int %254, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %256 = torch.aten.clone %255, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %257 = torch.aten.view %242, %46 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %258 = torch.aten.transpose.int %257, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %259 = torch.aten.clone %258, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %260 = torch.aten.view %259, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %261 = torch.aten.view %249, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %262 = torch.aten.view %256, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %263 = torch.aten.transpose.int %261, %int1, %int2 : !torch.vtensor<[16,77,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,64,77],f16>
  %264 = torch.aten.bmm %260, %263 : !torch.vtensor<[16,77,64],f16>, !torch.vtensor<[16,64,77],f16> -> !torch.vtensor<[16,77,77],f16>
  %265 = torch.aten.view %264, %56 : !torch.vtensor<[16,77,77],f16>, !torch.list<int> -> !torch.vtensor<[1,16,77,77],f16>
  %266 = torch.aten.add.Tensor %265, %22, %int1 : !torch.vtensor<[1,16,77,77],f16>, !torch.vtensor<[1,1,77,77],f16>, !torch.int -> !torch.vtensor<[1,16,77,77],f16>
  %267 = torch.aten.view %266, %59 : !torch.vtensor<[1,16,77,77],f16>, !torch.list<int> -> !torch.vtensor<[16,77,77],f16>
  %268 = torch.aten._softmax %267, %int-1, %false : !torch.vtensor<[16,77,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,77,77],f16>
  %269 = torch.aten.bmm %268, %262 : !torch.vtensor<[16,77,77],f16>, !torch.vtensor<[16,77,64],f16> -> !torch.vtensor<[16,77,64],f16>
  %270 = torch.aten.view %269, %63 : !torch.vtensor<[16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[1,16,77,64],f16>
  %271 = torch.aten.transpose.int %270, %int1, %int2 : !torch.vtensor<[1,16,77,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,77,16,64],f16>
  %272 = torch.aten.clone %271, %int0 : !torch.vtensor<[1,77,16,64],f16>, !torch.int -> !torch.vtensor<[1,77,16,64],f16>
  %273 = torch.aten._unsafe_view %272, %28 : !torch.vtensor<[1,77,16,64],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %274 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %275 = torch.aten.view %273, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %276 = torch.aten.addmm %9, %275, %274, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %277 = torch.aten.view %276, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %278 = torch.aten.add.Tensor %237, %277, %int1 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1024],f16>, !torch.int -> !torch.vtensor<[1,77,1024],f16>
  %result0_24, %result1_25, %result2_26 = torch.aten.native_layer_norm %278, %23, %9, %9, %float1.000000e-05 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int>, !torch.vtensor<[1024],f16>, !torch.vtensor<[1024],f16>, !torch.float -> !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>
  %279 = torch.aten.t %6 : !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[1024,4096],f16>
  %280 = torch.aten.view %result0_24, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %281 = torch.aten.addmm %7, %280, %279, %int1, %int1 : !torch.vtensor<[4096],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,4096],f16>
  %282 = torch.aten.view %281, %76 : !torch.vtensor<[77,4096],f16>, !torch.list<int> -> !torch.vtensor<[1,77,4096],f16>
  %283 = torch.aten.gelu %282, %str : !torch.vtensor<[1,77,4096],f16>, !torch.str -> !torch.vtensor<[1,77,4096],f16>
  %284 = torch.aten.t %8 : !torch.vtensor<[1024,4096],f16> -> !torch.vtensor<[4096,1024],f16>
  %285 = torch.aten.view %283, %80 : !torch.vtensor<[1,77,4096],f16>, !torch.list<int> -> !torch.vtensor<[77,4096],f16>
  %286 = torch.aten.addmm %9, %285, %284, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,4096],f16>, !torch.vtensor<[4096,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %287 = torch.aten.view %286, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %288 = torch.aten.add.Tensor %278, %287, %int1 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1024],f16>, !torch.int -> !torch.vtensor<[1,77,1024],f16>
  %result0_27, %result1_28, %result2_29 = torch.aten.native_layer_norm %288, %23, %9, %9, %float1.000000e-05 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int>, !torch.vtensor<[1024],f16>, !torch.vtensor<[1024],f16>, !torch.float -> !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>
  %289 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %290 = torch.aten.view %result0_27, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %291 = torch.aten.addmm %9, %290, %289, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %292 = torch.aten.view %291, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %293 = torch.aten.mul.Tensor %292, %0 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,77,1024],f16>
  %294 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %295 = torch.aten.view %result0_27, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %296 = torch.aten.addmm %9, %295, %294, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %297 = torch.aten.view %296, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %298 = torch.aten.view %297, %35 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %299 = torch.aten.transpose.int %298, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %300 = torch.aten.clone %299, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %301 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %302 = torch.aten.view %result0_27, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %303 = torch.aten.addmm %9, %302, %301, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %304 = torch.aten.view %303, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %305 = torch.aten.view %304, %35 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %306 = torch.aten.transpose.int %305, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %307 = torch.aten.clone %306, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %308 = torch.aten.view %293, %46 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %309 = torch.aten.transpose.int %308, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %310 = torch.aten.clone %309, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %311 = torch.aten.view %310, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %312 = torch.aten.view %300, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %313 = torch.aten.view %307, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %314 = torch.aten.transpose.int %312, %int1, %int2 : !torch.vtensor<[16,77,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,64,77],f16>
  %315 = torch.aten.bmm %311, %314 : !torch.vtensor<[16,77,64],f16>, !torch.vtensor<[16,64,77],f16> -> !torch.vtensor<[16,77,77],f16>
  %316 = torch.aten.view %315, %56 : !torch.vtensor<[16,77,77],f16>, !torch.list<int> -> !torch.vtensor<[1,16,77,77],f16>
  %317 = torch.aten.add.Tensor %316, %22, %int1 : !torch.vtensor<[1,16,77,77],f16>, !torch.vtensor<[1,1,77,77],f16>, !torch.int -> !torch.vtensor<[1,16,77,77],f16>
  %318 = torch.aten.view %317, %59 : !torch.vtensor<[1,16,77,77],f16>, !torch.list<int> -> !torch.vtensor<[16,77,77],f16>
  %319 = torch.aten._softmax %318, %int-1, %false : !torch.vtensor<[16,77,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,77,77],f16>
  %320 = torch.aten.bmm %319, %313 : !torch.vtensor<[16,77,77],f16>, !torch.vtensor<[16,77,64],f16> -> !torch.vtensor<[16,77,64],f16>
  %321 = torch.aten.view %320, %63 : !torch.vtensor<[16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[1,16,77,64],f16>
  %322 = torch.aten.transpose.int %321, %int1, %int2 : !torch.vtensor<[1,16,77,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,77,16,64],f16>
  %323 = torch.aten.clone %322, %int0 : !torch.vtensor<[1,77,16,64],f16>, !torch.int -> !torch.vtensor<[1,77,16,64],f16>
  %324 = torch.aten._unsafe_view %323, %28 : !torch.vtensor<[1,77,16,64],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %325 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %326 = torch.aten.view %324, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %327 = torch.aten.addmm %9, %326, %325, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %328 = torch.aten.view %327, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %329 = torch.aten.add.Tensor %288, %328, %int1 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1024],f16>, !torch.int -> !torch.vtensor<[1,77,1024],f16>
  %result0_30, %result1_31, %result2_32 = torch.aten.native_layer_norm %329, %23, %9, %9, %float1.000000e-05 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int>, !torch.vtensor<[1024],f16>, !torch.vtensor<[1024],f16>, !torch.float -> !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>
  %330 = torch.aten.t %6 : !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[1024,4096],f16>
  %331 = torch.aten.view %result0_30, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %332 = torch.aten.addmm %7, %331, %330, %int1, %int1 : !torch.vtensor<[4096],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,4096],f16>
  %333 = torch.aten.view %332, %76 : !torch.vtensor<[77,4096],f16>, !torch.list<int> -> !torch.vtensor<[1,77,4096],f16>
  %334 = torch.aten.gelu %333, %str : !torch.vtensor<[1,77,4096],f16>, !torch.str -> !torch.vtensor<[1,77,4096],f16>
  %335 = torch.aten.t %8 : !torch.vtensor<[1024,4096],f16> -> !torch.vtensor<[4096,1024],f16>
  %336 = torch.aten.view %334, %80 : !torch.vtensor<[1,77,4096],f16>, !torch.list<int> -> !torch.vtensor<[77,4096],f16>
  %337 = torch.aten.addmm %9, %336, %335, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,4096],f16>, !torch.vtensor<[4096,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %338 = torch.aten.view %337, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %339 = torch.aten.add.Tensor %329, %338, %int1 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1024],f16>, !torch.int -> !torch.vtensor<[1,77,1024],f16>
  %result0_33, %result1_34, %result2_35 = torch.aten.native_layer_norm %339, %23, %9, %9, %float1.000000e-05 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int>, !torch.vtensor<[1024],f16>, !torch.vtensor<[1024],f16>, !torch.float -> !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>
  %340 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %341 = torch.aten.view %result0_33, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %342 = torch.aten.addmm %9, %341, %340, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %343 = torch.aten.view %342, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %344 = torch.aten.mul.Tensor %343, %0 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,77,1024],f16>
  %345 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %346 = torch.aten.view %result0_33, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %347 = torch.aten.addmm %9, %346, %345, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %348 = torch.aten.view %347, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %349 = torch.aten.view %348, %35 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %350 = torch.aten.transpose.int %349, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %351 = torch.aten.clone %350, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %352 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %353 = torch.aten.view %result0_33, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %354 = torch.aten.addmm %9, %353, %352, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %355 = torch.aten.view %354, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %356 = torch.aten.view %355, %35 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %357 = torch.aten.transpose.int %356, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %358 = torch.aten.clone %357, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %359 = torch.aten.view %344, %46 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %360 = torch.aten.transpose.int %359, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %361 = torch.aten.clone %360, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %362 = torch.aten.view %361, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %363 = torch.aten.view %351, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %364 = torch.aten.view %358, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %365 = torch.aten.transpose.int %363, %int1, %int2 : !torch.vtensor<[16,77,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,64,77],f16>
  %366 = torch.aten.bmm %362, %365 : !torch.vtensor<[16,77,64],f16>, !torch.vtensor<[16,64,77],f16> -> !torch.vtensor<[16,77,77],f16>
  %367 = torch.aten.view %366, %56 : !torch.vtensor<[16,77,77],f16>, !torch.list<int> -> !torch.vtensor<[1,16,77,77],f16>
  %368 = torch.aten.add.Tensor %367, %22, %int1 : !torch.vtensor<[1,16,77,77],f16>, !torch.vtensor<[1,1,77,77],f16>, !torch.int -> !torch.vtensor<[1,16,77,77],f16>
  %369 = torch.aten.view %368, %59 : !torch.vtensor<[1,16,77,77],f16>, !torch.list<int> -> !torch.vtensor<[16,77,77],f16>
  %370 = torch.aten._softmax %369, %int-1, %false : !torch.vtensor<[16,77,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,77,77],f16>
  %371 = torch.aten.bmm %370, %364 : !torch.vtensor<[16,77,77],f16>, !torch.vtensor<[16,77,64],f16> -> !torch.vtensor<[16,77,64],f16>
  %372 = torch.aten.view %371, %63 : !torch.vtensor<[16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[1,16,77,64],f16>
  %373 = torch.aten.transpose.int %372, %int1, %int2 : !torch.vtensor<[1,16,77,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,77,16,64],f16>
  %374 = torch.aten.clone %373, %int0 : !torch.vtensor<[1,77,16,64],f16>, !torch.int -> !torch.vtensor<[1,77,16,64],f16>
  %375 = torch.aten._unsafe_view %374, %28 : !torch.vtensor<[1,77,16,64],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %376 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %377 = torch.aten.view %375, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %378 = torch.aten.addmm %9, %377, %376, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %379 = torch.aten.view %378, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %380 = torch.aten.add.Tensor %339, %379, %int1 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1024],f16>, !torch.int -> !torch.vtensor<[1,77,1024],f16>
  %result0_36, %result1_37, %result2_38 = torch.aten.native_layer_norm %380, %23, %9, %9, %float1.000000e-05 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int>, !torch.vtensor<[1024],f16>, !torch.vtensor<[1024],f16>, !torch.float -> !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>
  %381 = torch.aten.t %6 : !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[1024,4096],f16>
  %382 = torch.aten.view %result0_36, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %383 = torch.aten.addmm %7, %382, %381, %int1, %int1 : !torch.vtensor<[4096],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,4096],f16>
  %384 = torch.aten.view %383, %76 : !torch.vtensor<[77,4096],f16>, !torch.list<int> -> !torch.vtensor<[1,77,4096],f16>
  %385 = torch.aten.gelu %384, %str : !torch.vtensor<[1,77,4096],f16>, !torch.str -> !torch.vtensor<[1,77,4096],f16>
  %386 = torch.aten.t %8 : !torch.vtensor<[1024,4096],f16> -> !torch.vtensor<[4096,1024],f16>
  %387 = torch.aten.view %385, %80 : !torch.vtensor<[1,77,4096],f16>, !torch.list<int> -> !torch.vtensor<[77,4096],f16>
  %388 = torch.aten.addmm %9, %387, %386, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,4096],f16>, !torch.vtensor<[4096,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %389 = torch.aten.view %388, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %390 = torch.aten.add.Tensor %380, %389, %int1 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1024],f16>, !torch.int -> !torch.vtensor<[1,77,1024],f16>
  %result0_39, %result1_40, %result2_41 = torch.aten.native_layer_norm %390, %23, %9, %9, %float1.000000e-05 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int>, !torch.vtensor<[1024],f16>, !torch.vtensor<[1024],f16>, !torch.float -> !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>
  %391 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %392 = torch.aten.view %result0_39, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %393 = torch.aten.addmm %9, %392, %391, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %394 = torch.aten.view %393, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %395 = torch.aten.mul.Tensor %394, %0 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,77,1024],f16>
  %396 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %397 = torch.aten.view %result0_39, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %398 = torch.aten.addmm %9, %397, %396, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %399 = torch.aten.view %398, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %400 = torch.aten.view %399, %35 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %401 = torch.aten.transpose.int %400, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %402 = torch.aten.clone %401, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %403 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %404 = torch.aten.view %result0_39, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %405 = torch.aten.addmm %9, %404, %403, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %406 = torch.aten.view %405, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %407 = torch.aten.view %406, %35 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %408 = torch.aten.transpose.int %407, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %409 = torch.aten.clone %408, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %410 = torch.aten.view %395, %46 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %411 = torch.aten.transpose.int %410, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %412 = torch.aten.clone %411, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %413 = torch.aten.view %412, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %414 = torch.aten.view %402, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %415 = torch.aten.view %409, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %416 = torch.aten.transpose.int %414, %int1, %int2 : !torch.vtensor<[16,77,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,64,77],f16>
  %417 = torch.aten.bmm %413, %416 : !torch.vtensor<[16,77,64],f16>, !torch.vtensor<[16,64,77],f16> -> !torch.vtensor<[16,77,77],f16>
  %418 = torch.aten.view %417, %56 : !torch.vtensor<[16,77,77],f16>, !torch.list<int> -> !torch.vtensor<[1,16,77,77],f16>
  %419 = torch.aten.add.Tensor %418, %22, %int1 : !torch.vtensor<[1,16,77,77],f16>, !torch.vtensor<[1,1,77,77],f16>, !torch.int -> !torch.vtensor<[1,16,77,77],f16>
  %420 = torch.aten.view %419, %59 : !torch.vtensor<[1,16,77,77],f16>, !torch.list<int> -> !torch.vtensor<[16,77,77],f16>
  %421 = torch.aten._softmax %420, %int-1, %false : !torch.vtensor<[16,77,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,77,77],f16>
  %422 = torch.aten.bmm %421, %415 : !torch.vtensor<[16,77,77],f16>, !torch.vtensor<[16,77,64],f16> -> !torch.vtensor<[16,77,64],f16>
  %423 = torch.aten.view %422, %63 : !torch.vtensor<[16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[1,16,77,64],f16>
  %424 = torch.aten.transpose.int %423, %int1, %int2 : !torch.vtensor<[1,16,77,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,77,16,64],f16>
  %425 = torch.aten.clone %424, %int0 : !torch.vtensor<[1,77,16,64],f16>, !torch.int -> !torch.vtensor<[1,77,16,64],f16>
  %426 = torch.aten._unsafe_view %425, %28 : !torch.vtensor<[1,77,16,64],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %427 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %428 = torch.aten.view %426, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %429 = torch.aten.addmm %9, %428, %427, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %430 = torch.aten.view %429, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %431 = torch.aten.add.Tensor %390, %430, %int1 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1024],f16>, !torch.int -> !torch.vtensor<[1,77,1024],f16>
  %result0_42, %result1_43, %result2_44 = torch.aten.native_layer_norm %431, %23, %9, %9, %float1.000000e-05 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int>, !torch.vtensor<[1024],f16>, !torch.vtensor<[1024],f16>, !torch.float -> !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>
  %432 = torch.aten.t %6 : !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[1024,4096],f16>
  %433 = torch.aten.view %result0_42, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %434 = torch.aten.addmm %7, %433, %432, %int1, %int1 : !torch.vtensor<[4096],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,4096],f16>
  %435 = torch.aten.view %434, %76 : !torch.vtensor<[77,4096],f16>, !torch.list<int> -> !torch.vtensor<[1,77,4096],f16>
  %436 = torch.aten.gelu %435, %str : !torch.vtensor<[1,77,4096],f16>, !torch.str -> !torch.vtensor<[1,77,4096],f16>
  %437 = torch.aten.t %8 : !torch.vtensor<[1024,4096],f16> -> !torch.vtensor<[4096,1024],f16>
  %438 = torch.aten.view %436, %80 : !torch.vtensor<[1,77,4096],f16>, !torch.list<int> -> !torch.vtensor<[77,4096],f16>
  %439 = torch.aten.addmm %9, %438, %437, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,4096],f16>, !torch.vtensor<[4096,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %440 = torch.aten.view %439, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %441 = torch.aten.add.Tensor %431, %440, %int1 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1024],f16>, !torch.int -> !torch.vtensor<[1,77,1024],f16>
  %result0_45, %result1_46, %result2_47 = torch.aten.native_layer_norm %441, %23, %9, %9, %float1.000000e-05 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int>, !torch.vtensor<[1024],f16>, !torch.vtensor<[1024],f16>, !torch.float -> !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>
  %442 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %443 = torch.aten.view %result0_45, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %444 = torch.aten.addmm %9, %443, %442, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %445 = torch.aten.view %444, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %446 = torch.aten.mul.Tensor %445, %0 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,77,1024],f16>
  %447 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %448 = torch.aten.view %result0_45, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %449 = torch.aten.addmm %9, %448, %447, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %450 = torch.aten.view %449, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %451 = torch.aten.view %450, %35 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %452 = torch.aten.transpose.int %451, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %453 = torch.aten.clone %452, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %454 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %455 = torch.aten.view %result0_45, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %456 = torch.aten.addmm %9, %455, %454, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %457 = torch.aten.view %456, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %458 = torch.aten.view %457, %35 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %459 = torch.aten.transpose.int %458, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %460 = torch.aten.clone %459, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %461 = torch.aten.view %446, %46 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %462 = torch.aten.transpose.int %461, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %463 = torch.aten.clone %462, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %464 = torch.aten.view %463, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %465 = torch.aten.view %453, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %466 = torch.aten.view %460, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %467 = torch.aten.transpose.int %465, %int1, %int2 : !torch.vtensor<[16,77,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,64,77],f16>
  %468 = torch.aten.bmm %464, %467 : !torch.vtensor<[16,77,64],f16>, !torch.vtensor<[16,64,77],f16> -> !torch.vtensor<[16,77,77],f16>
  %469 = torch.aten.view %468, %56 : !torch.vtensor<[16,77,77],f16>, !torch.list<int> -> !torch.vtensor<[1,16,77,77],f16>
  %470 = torch.aten.add.Tensor %469, %22, %int1 : !torch.vtensor<[1,16,77,77],f16>, !torch.vtensor<[1,1,77,77],f16>, !torch.int -> !torch.vtensor<[1,16,77,77],f16>
  %471 = torch.aten.view %470, %59 : !torch.vtensor<[1,16,77,77],f16>, !torch.list<int> -> !torch.vtensor<[16,77,77],f16>
  %472 = torch.aten._softmax %471, %int-1, %false : !torch.vtensor<[16,77,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,77,77],f16>
  %473 = torch.aten.bmm %472, %466 : !torch.vtensor<[16,77,77],f16>, !torch.vtensor<[16,77,64],f16> -> !torch.vtensor<[16,77,64],f16>
  %474 = torch.aten.view %473, %63 : !torch.vtensor<[16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[1,16,77,64],f16>
  %475 = torch.aten.transpose.int %474, %int1, %int2 : !torch.vtensor<[1,16,77,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,77,16,64],f16>
  %476 = torch.aten.clone %475, %int0 : !torch.vtensor<[1,77,16,64],f16>, !torch.int -> !torch.vtensor<[1,77,16,64],f16>
  %477 = torch.aten._unsafe_view %476, %28 : !torch.vtensor<[1,77,16,64],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %478 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %479 = torch.aten.view %477, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %480 = torch.aten.addmm %9, %479, %478, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %481 = torch.aten.view %480, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %482 = torch.aten.add.Tensor %441, %481, %int1 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1024],f16>, !torch.int -> !torch.vtensor<[1,77,1024],f16>
  %result0_48, %result1_49, %result2_50 = torch.aten.native_layer_norm %482, %23, %9, %9, %float1.000000e-05 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int>, !torch.vtensor<[1024],f16>, !torch.vtensor<[1024],f16>, !torch.float -> !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>
  %483 = torch.aten.t %6 : !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[1024,4096],f16>
  %484 = torch.aten.view %result0_48, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %485 = torch.aten.addmm %7, %484, %483, %int1, %int1 : !torch.vtensor<[4096],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,4096],f16>
  %486 = torch.aten.view %485, %76 : !torch.vtensor<[77,4096],f16>, !torch.list<int> -> !torch.vtensor<[1,77,4096],f16>
  %487 = torch.aten.gelu %486, %str : !torch.vtensor<[1,77,4096],f16>, !torch.str -> !torch.vtensor<[1,77,4096],f16>
  %488 = torch.aten.t %8 : !torch.vtensor<[1024,4096],f16> -> !torch.vtensor<[4096,1024],f16>
  %489 = torch.aten.view %487, %80 : !torch.vtensor<[1,77,4096],f16>, !torch.list<int> -> !torch.vtensor<[77,4096],f16>
  %490 = torch.aten.addmm %9, %489, %488, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,4096],f16>, !torch.vtensor<[4096,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %491 = torch.aten.view %490, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %492 = torch.aten.add.Tensor %482, %491, %int1 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1024],f16>, !torch.int -> !torch.vtensor<[1,77,1024],f16>
  %result0_51, %result1_52, %result2_53 = torch.aten.native_layer_norm %492, %23, %9, %9, %float1.000000e-05 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int>, !torch.vtensor<[1024],f16>, !torch.vtensor<[1024],f16>, !torch.float -> !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>
  %493 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %494 = torch.aten.view %result0_51, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %495 = torch.aten.addmm %9, %494, %493, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %496 = torch.aten.view %495, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %497 = torch.aten.mul.Tensor %496, %0 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,77,1024],f16>
  %498 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %499 = torch.aten.view %result0_51, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %500 = torch.aten.addmm %9, %499, %498, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %501 = torch.aten.view %500, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %502 = torch.aten.view %501, %35 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %503 = torch.aten.transpose.int %502, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %504 = torch.aten.clone %503, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %505 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %506 = torch.aten.view %result0_51, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %507 = torch.aten.addmm %9, %506, %505, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %508 = torch.aten.view %507, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %509 = torch.aten.view %508, %35 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %510 = torch.aten.transpose.int %509, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %511 = torch.aten.clone %510, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %512 = torch.aten.view %497, %46 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %513 = torch.aten.transpose.int %512, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %514 = torch.aten.clone %513, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %515 = torch.aten.view %514, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %516 = torch.aten.view %504, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %517 = torch.aten.view %511, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %518 = torch.aten.transpose.int %516, %int1, %int2 : !torch.vtensor<[16,77,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,64,77],f16>
  %519 = torch.aten.bmm %515, %518 : !torch.vtensor<[16,77,64],f16>, !torch.vtensor<[16,64,77],f16> -> !torch.vtensor<[16,77,77],f16>
  %520 = torch.aten.view %519, %56 : !torch.vtensor<[16,77,77],f16>, !torch.list<int> -> !torch.vtensor<[1,16,77,77],f16>
  %521 = torch.aten.add.Tensor %520, %22, %int1 : !torch.vtensor<[1,16,77,77],f16>, !torch.vtensor<[1,1,77,77],f16>, !torch.int -> !torch.vtensor<[1,16,77,77],f16>
  %522 = torch.aten.view %521, %59 : !torch.vtensor<[1,16,77,77],f16>, !torch.list<int> -> !torch.vtensor<[16,77,77],f16>
  %523 = torch.aten._softmax %522, %int-1, %false : !torch.vtensor<[16,77,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,77,77],f16>
  %524 = torch.aten.bmm %523, %517 : !torch.vtensor<[16,77,77],f16>, !torch.vtensor<[16,77,64],f16> -> !torch.vtensor<[16,77,64],f16>
  %525 = torch.aten.view %524, %63 : !torch.vtensor<[16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[1,16,77,64],f16>
  %526 = torch.aten.transpose.int %525, %int1, %int2 : !torch.vtensor<[1,16,77,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,77,16,64],f16>
  %527 = torch.aten.clone %526, %int0 : !torch.vtensor<[1,77,16,64],f16>, !torch.int -> !torch.vtensor<[1,77,16,64],f16>
  %528 = torch.aten._unsafe_view %527, %28 : !torch.vtensor<[1,77,16,64],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %529 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %530 = torch.aten.view %528, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %531 = torch.aten.addmm %9, %530, %529, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %532 = torch.aten.view %531, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %533 = torch.aten.add.Tensor %492, %532, %int1 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1024],f16>, !torch.int -> !torch.vtensor<[1,77,1024],f16>
  %result0_54, %result1_55, %result2_56 = torch.aten.native_layer_norm %533, %23, %9, %9, %float1.000000e-05 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int>, !torch.vtensor<[1024],f16>, !torch.vtensor<[1024],f16>, !torch.float -> !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>
  %534 = torch.aten.t %6 : !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[1024,4096],f16>
  %535 = torch.aten.view %result0_54, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %536 = torch.aten.addmm %7, %535, %534, %int1, %int1 : !torch.vtensor<[4096],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,4096],f16>
  %537 = torch.aten.view %536, %76 : !torch.vtensor<[77,4096],f16>, !torch.list<int> -> !torch.vtensor<[1,77,4096],f16>
  %538 = torch.aten.gelu %537, %str : !torch.vtensor<[1,77,4096],f16>, !torch.str -> !torch.vtensor<[1,77,4096],f16>
  %539 = torch.aten.t %8 : !torch.vtensor<[1024,4096],f16> -> !torch.vtensor<[4096,1024],f16>
  %540 = torch.aten.view %538, %80 : !torch.vtensor<[1,77,4096],f16>, !torch.list<int> -> !torch.vtensor<[77,4096],f16>
  %541 = torch.aten.addmm %9, %540, %539, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,4096],f16>, !torch.vtensor<[4096,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %542 = torch.aten.view %541, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %543 = torch.aten.add.Tensor %533, %542, %int1 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1024],f16>, !torch.int -> !torch.vtensor<[1,77,1024],f16>
  %result0_57, %result1_58, %result2_59 = torch.aten.native_layer_norm %543, %23, %9, %9, %float1.000000e-05 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int>, !torch.vtensor<[1024],f16>, !torch.vtensor<[1024],f16>, !torch.float -> !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>
  %544 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %545 = torch.aten.view %result0_57, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %546 = torch.aten.addmm %9, %545, %544, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %547 = torch.aten.view %546, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %548 = torch.aten.mul.Tensor %547, %0 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,77,1024],f16>
  %549 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %550 = torch.aten.view %result0_57, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %551 = torch.aten.addmm %9, %550, %549, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %552 = torch.aten.view %551, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %553 = torch.aten.view %552, %35 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %554 = torch.aten.transpose.int %553, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %555 = torch.aten.clone %554, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %556 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %557 = torch.aten.view %result0_57, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %558 = torch.aten.addmm %9, %557, %556, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %559 = torch.aten.view %558, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %560 = torch.aten.view %559, %35 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %561 = torch.aten.transpose.int %560, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %562 = torch.aten.clone %561, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %563 = torch.aten.view %548, %46 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %564 = torch.aten.transpose.int %563, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %565 = torch.aten.clone %564, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %566 = torch.aten.view %565, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %567 = torch.aten.view %555, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %568 = torch.aten.view %562, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %569 = torch.aten.transpose.int %567, %int1, %int2 : !torch.vtensor<[16,77,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,64,77],f16>
  %570 = torch.aten.bmm %566, %569 : !torch.vtensor<[16,77,64],f16>, !torch.vtensor<[16,64,77],f16> -> !torch.vtensor<[16,77,77],f16>
  %571 = torch.aten.view %570, %56 : !torch.vtensor<[16,77,77],f16>, !torch.list<int> -> !torch.vtensor<[1,16,77,77],f16>
  %572 = torch.aten.add.Tensor %571, %22, %int1 : !torch.vtensor<[1,16,77,77],f16>, !torch.vtensor<[1,1,77,77],f16>, !torch.int -> !torch.vtensor<[1,16,77,77],f16>
  %573 = torch.aten.view %572, %59 : !torch.vtensor<[1,16,77,77],f16>, !torch.list<int> -> !torch.vtensor<[16,77,77],f16>
  %574 = torch.aten._softmax %573, %int-1, %false : !torch.vtensor<[16,77,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,77,77],f16>
  %575 = torch.aten.bmm %574, %568 : !torch.vtensor<[16,77,77],f16>, !torch.vtensor<[16,77,64],f16> -> !torch.vtensor<[16,77,64],f16>
  %576 = torch.aten.view %575, %63 : !torch.vtensor<[16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[1,16,77,64],f16>
  %577 = torch.aten.transpose.int %576, %int1, %int2 : !torch.vtensor<[1,16,77,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,77,16,64],f16>
  %578 = torch.aten.clone %577, %int0 : !torch.vtensor<[1,77,16,64],f16>, !torch.int -> !torch.vtensor<[1,77,16,64],f16>
  %579 = torch.aten._unsafe_view %578, %28 : !torch.vtensor<[1,77,16,64],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %580 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %581 = torch.aten.view %579, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %582 = torch.aten.addmm %9, %581, %580, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %583 = torch.aten.view %582, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %584 = torch.aten.add.Tensor %543, %583, %int1 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1024],f16>, !torch.int -> !torch.vtensor<[1,77,1024],f16>
  %result0_60, %result1_61, %result2_62 = torch.aten.native_layer_norm %584, %23, %9, %9, %float1.000000e-05 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int>, !torch.vtensor<[1024],f16>, !torch.vtensor<[1024],f16>, !torch.float -> !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>
  %585 = torch.aten.t %6 : !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[1024,4096],f16>
  %586 = torch.aten.view %result0_60, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %587 = torch.aten.addmm %7, %586, %585, %int1, %int1 : !torch.vtensor<[4096],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,4096],f16>
  %588 = torch.aten.view %587, %76 : !torch.vtensor<[77,4096],f16>, !torch.list<int> -> !torch.vtensor<[1,77,4096],f16>
  %589 = torch.aten.gelu %588, %str : !torch.vtensor<[1,77,4096],f16>, !torch.str -> !torch.vtensor<[1,77,4096],f16>
  %590 = torch.aten.t %8 : !torch.vtensor<[1024,4096],f16> -> !torch.vtensor<[4096,1024],f16>
  %591 = torch.aten.view %589, %80 : !torch.vtensor<[1,77,4096],f16>, !torch.list<int> -> !torch.vtensor<[77,4096],f16>
  %592 = torch.aten.addmm %9, %591, %590, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,4096],f16>, !torch.vtensor<[4096,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %593 = torch.aten.view %592, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %594 = torch.aten.add.Tensor %584, %593, %int1 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1024],f16>, !torch.int -> !torch.vtensor<[1,77,1024],f16>
  %result0_63, %result1_64, %result2_65 = torch.aten.native_layer_norm %594, %23, %9, %9, %float1.000000e-05 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int>, !torch.vtensor<[1024],f16>, !torch.vtensor<[1024],f16>, !torch.float -> !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>
  %595 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %596 = torch.aten.view %result0_63, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %597 = torch.aten.addmm %9, %596, %595, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %598 = torch.aten.view %597, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %599 = torch.aten.mul.Tensor %598, %0 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,77,1024],f16>
  %600 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %601 = torch.aten.view %result0_63, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %602 = torch.aten.addmm %9, %601, %600, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %603 = torch.aten.view %602, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %604 = torch.aten.view %603, %35 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %605 = torch.aten.transpose.int %604, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %606 = torch.aten.clone %605, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %607 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %608 = torch.aten.view %result0_63, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %609 = torch.aten.addmm %9, %608, %607, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %610 = torch.aten.view %609, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %611 = torch.aten.view %610, %35 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %612 = torch.aten.transpose.int %611, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %613 = torch.aten.clone %612, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %614 = torch.aten.view %599, %46 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %615 = torch.aten.transpose.int %614, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %616 = torch.aten.clone %615, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %617 = torch.aten.view %616, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %618 = torch.aten.view %606, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %619 = torch.aten.view %613, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %620 = torch.aten.transpose.int %618, %int1, %int2 : !torch.vtensor<[16,77,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,64,77],f16>
  %621 = torch.aten.bmm %617, %620 : !torch.vtensor<[16,77,64],f16>, !torch.vtensor<[16,64,77],f16> -> !torch.vtensor<[16,77,77],f16>
  %622 = torch.aten.view %621, %56 : !torch.vtensor<[16,77,77],f16>, !torch.list<int> -> !torch.vtensor<[1,16,77,77],f16>
  %623 = torch.aten.add.Tensor %622, %22, %int1 : !torch.vtensor<[1,16,77,77],f16>, !torch.vtensor<[1,1,77,77],f16>, !torch.int -> !torch.vtensor<[1,16,77,77],f16>
  %624 = torch.aten.view %623, %59 : !torch.vtensor<[1,16,77,77],f16>, !torch.list<int> -> !torch.vtensor<[16,77,77],f16>
  %625 = torch.aten._softmax %624, %int-1, %false : !torch.vtensor<[16,77,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,77,77],f16>
  %626 = torch.aten.bmm %625, %619 : !torch.vtensor<[16,77,77],f16>, !torch.vtensor<[16,77,64],f16> -> !torch.vtensor<[16,77,64],f16>
  %627 = torch.aten.view %626, %63 : !torch.vtensor<[16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[1,16,77,64],f16>
  %628 = torch.aten.transpose.int %627, %int1, %int2 : !torch.vtensor<[1,16,77,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,77,16,64],f16>
  %629 = torch.aten.clone %628, %int0 : !torch.vtensor<[1,77,16,64],f16>, !torch.int -> !torch.vtensor<[1,77,16,64],f16>
  %630 = torch.aten._unsafe_view %629, %28 : !torch.vtensor<[1,77,16,64],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %631 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %632 = torch.aten.view %630, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %633 = torch.aten.addmm %9, %632, %631, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %634 = torch.aten.view %633, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %635 = torch.aten.add.Tensor %594, %634, %int1 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1024],f16>, !torch.int -> !torch.vtensor<[1,77,1024],f16>
  %result0_66, %result1_67, %result2_68 = torch.aten.native_layer_norm %635, %23, %9, %9, %float1.000000e-05 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int>, !torch.vtensor<[1024],f16>, !torch.vtensor<[1024],f16>, !torch.float -> !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>
  %636 = torch.aten.t %6 : !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[1024,4096],f16>
  %637 = torch.aten.view %result0_66, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %638 = torch.aten.addmm %7, %637, %636, %int1, %int1 : !torch.vtensor<[4096],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,4096],f16>
  %639 = torch.aten.view %638, %76 : !torch.vtensor<[77,4096],f16>, !torch.list<int> -> !torch.vtensor<[1,77,4096],f16>
  %640 = torch.aten.gelu %639, %str : !torch.vtensor<[1,77,4096],f16>, !torch.str -> !torch.vtensor<[1,77,4096],f16>
  %641 = torch.aten.t %8 : !torch.vtensor<[1024,4096],f16> -> !torch.vtensor<[4096,1024],f16>
  %642 = torch.aten.view %640, %80 : !torch.vtensor<[1,77,4096],f16>, !torch.list<int> -> !torch.vtensor<[77,4096],f16>
  %643 = torch.aten.addmm %9, %642, %641, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,4096],f16>, !torch.vtensor<[4096,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %644 = torch.aten.view %643, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %645 = torch.aten.add.Tensor %635, %644, %int1 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1024],f16>, !torch.int -> !torch.vtensor<[1,77,1024],f16>
  %result0_69, %result1_70, %result2_71 = torch.aten.native_layer_norm %645, %23, %9, %9, %float1.000000e-05 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int>, !torch.vtensor<[1024],f16>, !torch.vtensor<[1024],f16>, !torch.float -> !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>
  %646 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %647 = torch.aten.view %result0_69, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %648 = torch.aten.addmm %9, %647, %646, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %649 = torch.aten.view %648, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %650 = torch.aten.mul.Tensor %649, %0 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,77,1024],f16>
  %651 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %652 = torch.aten.view %result0_69, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %653 = torch.aten.addmm %9, %652, %651, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %654 = torch.aten.view %653, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %655 = torch.aten.view %654, %35 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %656 = torch.aten.transpose.int %655, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %657 = torch.aten.clone %656, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %658 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %659 = torch.aten.view %result0_69, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %660 = torch.aten.addmm %9, %659, %658, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %661 = torch.aten.view %660, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %662 = torch.aten.view %661, %35 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %663 = torch.aten.transpose.int %662, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %664 = torch.aten.clone %663, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %665 = torch.aten.view %650, %46 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %666 = torch.aten.transpose.int %665, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %667 = torch.aten.clone %666, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %668 = torch.aten.view %667, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %669 = torch.aten.view %657, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %670 = torch.aten.view %664, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %671 = torch.aten.transpose.int %669, %int1, %int2 : !torch.vtensor<[16,77,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,64,77],f16>
  %672 = torch.aten.bmm %668, %671 : !torch.vtensor<[16,77,64],f16>, !torch.vtensor<[16,64,77],f16> -> !torch.vtensor<[16,77,77],f16>
  %673 = torch.aten.view %672, %56 : !torch.vtensor<[16,77,77],f16>, !torch.list<int> -> !torch.vtensor<[1,16,77,77],f16>
  %674 = torch.aten.add.Tensor %673, %22, %int1 : !torch.vtensor<[1,16,77,77],f16>, !torch.vtensor<[1,1,77,77],f16>, !torch.int -> !torch.vtensor<[1,16,77,77],f16>
  %675 = torch.aten.view %674, %59 : !torch.vtensor<[1,16,77,77],f16>, !torch.list<int> -> !torch.vtensor<[16,77,77],f16>
  %676 = torch.aten._softmax %675, %int-1, %false : !torch.vtensor<[16,77,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,77,77],f16>
  %677 = torch.aten.bmm %676, %670 : !torch.vtensor<[16,77,77],f16>, !torch.vtensor<[16,77,64],f16> -> !torch.vtensor<[16,77,64],f16>
  %678 = torch.aten.view %677, %63 : !torch.vtensor<[16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[1,16,77,64],f16>
  %679 = torch.aten.transpose.int %678, %int1, %int2 : !torch.vtensor<[1,16,77,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,77,16,64],f16>
  %680 = torch.aten.clone %679, %int0 : !torch.vtensor<[1,77,16,64],f16>, !torch.int -> !torch.vtensor<[1,77,16,64],f16>
  %681 = torch.aten._unsafe_view %680, %28 : !torch.vtensor<[1,77,16,64],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %682 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %683 = torch.aten.view %681, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %684 = torch.aten.addmm %9, %683, %682, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %685 = torch.aten.view %684, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %686 = torch.aten.add.Tensor %645, %685, %int1 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1024],f16>, !torch.int -> !torch.vtensor<[1,77,1024],f16>
  %result0_72, %result1_73, %result2_74 = torch.aten.native_layer_norm %686, %23, %9, %9, %float1.000000e-05 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int>, !torch.vtensor<[1024],f16>, !torch.vtensor<[1024],f16>, !torch.float -> !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>
  %687 = torch.aten.t %6 : !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[1024,4096],f16>
  %688 = torch.aten.view %result0_72, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %689 = torch.aten.addmm %7, %688, %687, %int1, %int1 : !torch.vtensor<[4096],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,4096],f16>
  %690 = torch.aten.view %689, %76 : !torch.vtensor<[77,4096],f16>, !torch.list<int> -> !torch.vtensor<[1,77,4096],f16>
  %691 = torch.aten.gelu %690, %str : !torch.vtensor<[1,77,4096],f16>, !torch.str -> !torch.vtensor<[1,77,4096],f16>
  %692 = torch.aten.t %8 : !torch.vtensor<[1024,4096],f16> -> !torch.vtensor<[4096,1024],f16>
  %693 = torch.aten.view %691, %80 : !torch.vtensor<[1,77,4096],f16>, !torch.list<int> -> !torch.vtensor<[77,4096],f16>
  %694 = torch.aten.addmm %9, %693, %692, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,4096],f16>, !torch.vtensor<[4096,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %695 = torch.aten.view %694, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %696 = torch.aten.add.Tensor %686, %695, %int1 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1024],f16>, !torch.int -> !torch.vtensor<[1,77,1024],f16>
  %result0_75, %result1_76, %result2_77 = torch.aten.native_layer_norm %696, %23, %9, %9, %float1.000000e-05 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int>, !torch.vtensor<[1024],f16>, !torch.vtensor<[1024],f16>, !torch.float -> !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>
  %697 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %698 = torch.aten.view %result0_75, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %699 = torch.aten.addmm %9, %698, %697, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %700 = torch.aten.view %699, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %701 = torch.aten.mul.Tensor %700, %0 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,77,1024],f16>
  %702 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %703 = torch.aten.view %result0_75, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %704 = torch.aten.addmm %9, %703, %702, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %705 = torch.aten.view %704, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %706 = torch.aten.view %705, %35 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %707 = torch.aten.transpose.int %706, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %708 = torch.aten.clone %707, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %709 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %710 = torch.aten.view %result0_75, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %711 = torch.aten.addmm %9, %710, %709, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %712 = torch.aten.view %711, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %713 = torch.aten.view %712, %35 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %714 = torch.aten.transpose.int %713, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %715 = torch.aten.clone %714, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %716 = torch.aten.view %701, %46 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %717 = torch.aten.transpose.int %716, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %718 = torch.aten.clone %717, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %719 = torch.aten.view %718, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %720 = torch.aten.view %708, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %721 = torch.aten.view %715, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %722 = torch.aten.transpose.int %720, %int1, %int2 : !torch.vtensor<[16,77,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,64,77],f16>
  %723 = torch.aten.bmm %719, %722 : !torch.vtensor<[16,77,64],f16>, !torch.vtensor<[16,64,77],f16> -> !torch.vtensor<[16,77,77],f16>
  %724 = torch.aten.view %723, %56 : !torch.vtensor<[16,77,77],f16>, !torch.list<int> -> !torch.vtensor<[1,16,77,77],f16>
  %725 = torch.aten.add.Tensor %724, %22, %int1 : !torch.vtensor<[1,16,77,77],f16>, !torch.vtensor<[1,1,77,77],f16>, !torch.int -> !torch.vtensor<[1,16,77,77],f16>
  %726 = torch.aten.view %725, %59 : !torch.vtensor<[1,16,77,77],f16>, !torch.list<int> -> !torch.vtensor<[16,77,77],f16>
  %727 = torch.aten._softmax %726, %int-1, %false : !torch.vtensor<[16,77,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,77,77],f16>
  %728 = torch.aten.bmm %727, %721 : !torch.vtensor<[16,77,77],f16>, !torch.vtensor<[16,77,64],f16> -> !torch.vtensor<[16,77,64],f16>
  %729 = torch.aten.view %728, %63 : !torch.vtensor<[16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[1,16,77,64],f16>
  %730 = torch.aten.transpose.int %729, %int1, %int2 : !torch.vtensor<[1,16,77,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,77,16,64],f16>
  %731 = torch.aten.clone %730, %int0 : !torch.vtensor<[1,77,16,64],f16>, !torch.int -> !torch.vtensor<[1,77,16,64],f16>
  %732 = torch.aten._unsafe_view %731, %28 : !torch.vtensor<[1,77,16,64],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %733 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %734 = torch.aten.view %732, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %735 = torch.aten.addmm %9, %734, %733, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %736 = torch.aten.view %735, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %737 = torch.aten.add.Tensor %696, %736, %int1 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1024],f16>, !torch.int -> !torch.vtensor<[1,77,1024],f16>
  %result0_78, %result1_79, %result2_80 = torch.aten.native_layer_norm %737, %23, %9, %9, %float1.000000e-05 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int>, !torch.vtensor<[1024],f16>, !torch.vtensor<[1024],f16>, !torch.float -> !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>
  %738 = torch.aten.t %6 : !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[1024,4096],f16>
  %739 = torch.aten.view %result0_78, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %740 = torch.aten.addmm %7, %739, %738, %int1, %int1 : !torch.vtensor<[4096],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,4096],f16>
  %741 = torch.aten.view %740, %76 : !torch.vtensor<[77,4096],f16>, !torch.list<int> -> !torch.vtensor<[1,77,4096],f16>
  %742 = torch.aten.gelu %741, %str : !torch.vtensor<[1,77,4096],f16>, !torch.str -> !torch.vtensor<[1,77,4096],f16>
  %743 = torch.aten.t %8 : !torch.vtensor<[1024,4096],f16> -> !torch.vtensor<[4096,1024],f16>
  %744 = torch.aten.view %742, %80 : !torch.vtensor<[1,77,4096],f16>, !torch.list<int> -> !torch.vtensor<[77,4096],f16>
  %745 = torch.aten.addmm %9, %744, %743, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,4096],f16>, !torch.vtensor<[4096,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %746 = torch.aten.view %745, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %747 = torch.aten.add.Tensor %737, %746, %int1 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1024],f16>, !torch.int -> !torch.vtensor<[1,77,1024],f16>
  %result0_81, %result1_82, %result2_83 = torch.aten.native_layer_norm %747, %23, %9, %9, %float1.000000e-05 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int>, !torch.vtensor<[1024],f16>, !torch.vtensor<[1024],f16>, !torch.float -> !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>
  %748 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %749 = torch.aten.view %result0_81, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %750 = torch.aten.addmm %9, %749, %748, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %751 = torch.aten.view %750, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %752 = torch.aten.mul.Tensor %751, %0 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,77,1024],f16>
  %753 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %754 = torch.aten.view %result0_81, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %755 = torch.aten.addmm %9, %754, %753, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %756 = torch.aten.view %755, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %757 = torch.aten.view %756, %35 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %758 = torch.aten.transpose.int %757, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %759 = torch.aten.clone %758, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %760 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %761 = torch.aten.view %result0_81, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %762 = torch.aten.addmm %9, %761, %760, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %763 = torch.aten.view %762, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %764 = torch.aten.view %763, %35 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %765 = torch.aten.transpose.int %764, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %766 = torch.aten.clone %765, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %767 = torch.aten.view %752, %46 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %768 = torch.aten.transpose.int %767, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %769 = torch.aten.clone %768, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %770 = torch.aten.view %769, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %771 = torch.aten.view %759, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %772 = torch.aten.view %766, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %773 = torch.aten.transpose.int %771, %int1, %int2 : !torch.vtensor<[16,77,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,64,77],f16>
  %774 = torch.aten.bmm %770, %773 : !torch.vtensor<[16,77,64],f16>, !torch.vtensor<[16,64,77],f16> -> !torch.vtensor<[16,77,77],f16>
  %775 = torch.aten.view %774, %56 : !torch.vtensor<[16,77,77],f16>, !torch.list<int> -> !torch.vtensor<[1,16,77,77],f16>
  %776 = torch.aten.add.Tensor %775, %22, %int1 : !torch.vtensor<[1,16,77,77],f16>, !torch.vtensor<[1,1,77,77],f16>, !torch.int -> !torch.vtensor<[1,16,77,77],f16>
  %777 = torch.aten.view %776, %59 : !torch.vtensor<[1,16,77,77],f16>, !torch.list<int> -> !torch.vtensor<[16,77,77],f16>
  %778 = torch.aten._softmax %777, %int-1, %false : !torch.vtensor<[16,77,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,77,77],f16>
  %779 = torch.aten.bmm %778, %772 : !torch.vtensor<[16,77,77],f16>, !torch.vtensor<[16,77,64],f16> -> !torch.vtensor<[16,77,64],f16>
  %780 = torch.aten.view %779, %63 : !torch.vtensor<[16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[1,16,77,64],f16>
  %781 = torch.aten.transpose.int %780, %int1, %int2 : !torch.vtensor<[1,16,77,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,77,16,64],f16>
  %782 = torch.aten.clone %781, %int0 : !torch.vtensor<[1,77,16,64],f16>, !torch.int -> !torch.vtensor<[1,77,16,64],f16>
  %783 = torch.aten._unsafe_view %782, %28 : !torch.vtensor<[1,77,16,64],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %784 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %785 = torch.aten.view %783, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %786 = torch.aten.addmm %9, %785, %784, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %787 = torch.aten.view %786, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %788 = torch.aten.add.Tensor %747, %787, %int1 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1024],f16>, !torch.int -> !torch.vtensor<[1,77,1024],f16>
  %result0_84, %result1_85, %result2_86 = torch.aten.native_layer_norm %788, %23, %9, %9, %float1.000000e-05 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int>, !torch.vtensor<[1024],f16>, !torch.vtensor<[1024],f16>, !torch.float -> !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>
  %789 = torch.aten.t %6 : !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[1024,4096],f16>
  %790 = torch.aten.view %result0_84, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %791 = torch.aten.addmm %7, %790, %789, %int1, %int1 : !torch.vtensor<[4096],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,4096],f16>
  %792 = torch.aten.view %791, %76 : !torch.vtensor<[77,4096],f16>, !torch.list<int> -> !torch.vtensor<[1,77,4096],f16>
  %793 = torch.aten.gelu %792, %str : !torch.vtensor<[1,77,4096],f16>, !torch.str -> !torch.vtensor<[1,77,4096],f16>
  %794 = torch.aten.t %8 : !torch.vtensor<[1024,4096],f16> -> !torch.vtensor<[4096,1024],f16>
  %795 = torch.aten.view %793, %80 : !torch.vtensor<[1,77,4096],f16>, !torch.list<int> -> !torch.vtensor<[77,4096],f16>
  %796 = torch.aten.addmm %9, %795, %794, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,4096],f16>, !torch.vtensor<[4096,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %797 = torch.aten.view %796, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %798 = torch.aten.add.Tensor %788, %797, %int1 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1024],f16>, !torch.int -> !torch.vtensor<[1,77,1024],f16>
  %result0_87, %result1_88, %result2_89 = torch.aten.native_layer_norm %798, %23, %9, %9, %float1.000000e-05 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int>, !torch.vtensor<[1024],f16>, !torch.vtensor<[1024],f16>, !torch.float -> !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>
  %799 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %800 = torch.aten.view %result0_87, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %801 = torch.aten.addmm %9, %800, %799, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %802 = torch.aten.view %801, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %803 = torch.aten.mul.Tensor %802, %0 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,77,1024],f16>
  %804 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %805 = torch.aten.view %result0_87, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %806 = torch.aten.addmm %9, %805, %804, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %807 = torch.aten.view %806, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %808 = torch.aten.view %807, %35 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %809 = torch.aten.transpose.int %808, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %810 = torch.aten.clone %809, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %811 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %812 = torch.aten.view %result0_87, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %813 = torch.aten.addmm %9, %812, %811, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %814 = torch.aten.view %813, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %815 = torch.aten.view %814, %35 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %816 = torch.aten.transpose.int %815, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %817 = torch.aten.clone %816, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %818 = torch.aten.view %803, %46 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %819 = torch.aten.transpose.int %818, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %820 = torch.aten.clone %819, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %821 = torch.aten.view %820, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %822 = torch.aten.view %810, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %823 = torch.aten.view %817, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %824 = torch.aten.transpose.int %822, %int1, %int2 : !torch.vtensor<[16,77,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,64,77],f16>
  %825 = torch.aten.bmm %821, %824 : !torch.vtensor<[16,77,64],f16>, !torch.vtensor<[16,64,77],f16> -> !torch.vtensor<[16,77,77],f16>
  %826 = torch.aten.view %825, %56 : !torch.vtensor<[16,77,77],f16>, !torch.list<int> -> !torch.vtensor<[1,16,77,77],f16>
  %827 = torch.aten.add.Tensor %826, %22, %int1 : !torch.vtensor<[1,16,77,77],f16>, !torch.vtensor<[1,1,77,77],f16>, !torch.int -> !torch.vtensor<[1,16,77,77],f16>
  %828 = torch.aten.view %827, %59 : !torch.vtensor<[1,16,77,77],f16>, !torch.list<int> -> !torch.vtensor<[16,77,77],f16>
  %829 = torch.aten._softmax %828, %int-1, %false : !torch.vtensor<[16,77,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,77,77],f16>
  %830 = torch.aten.bmm %829, %823 : !torch.vtensor<[16,77,77],f16>, !torch.vtensor<[16,77,64],f16> -> !torch.vtensor<[16,77,64],f16>
  %831 = torch.aten.view %830, %63 : !torch.vtensor<[16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[1,16,77,64],f16>
  %832 = torch.aten.transpose.int %831, %int1, %int2 : !torch.vtensor<[1,16,77,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,77,16,64],f16>
  %833 = torch.aten.clone %832, %int0 : !torch.vtensor<[1,77,16,64],f16>, !torch.int -> !torch.vtensor<[1,77,16,64],f16>
  %834 = torch.aten._unsafe_view %833, %28 : !torch.vtensor<[1,77,16,64],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %835 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %836 = torch.aten.view %834, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %837 = torch.aten.addmm %9, %836, %835, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %838 = torch.aten.view %837, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %839 = torch.aten.add.Tensor %798, %838, %int1 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1024],f16>, !torch.int -> !torch.vtensor<[1,77,1024],f16>
  %result0_90, %result1_91, %result2_92 = torch.aten.native_layer_norm %839, %23, %9, %9, %float1.000000e-05 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int>, !torch.vtensor<[1024],f16>, !torch.vtensor<[1024],f16>, !torch.float -> !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>
  %840 = torch.aten.t %6 : !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[1024,4096],f16>
  %841 = torch.aten.view %result0_90, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %842 = torch.aten.addmm %7, %841, %840, %int1, %int1 : !torch.vtensor<[4096],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,4096],f16>
  %843 = torch.aten.view %842, %76 : !torch.vtensor<[77,4096],f16>, !torch.list<int> -> !torch.vtensor<[1,77,4096],f16>
  %844 = torch.aten.gelu %843, %str : !torch.vtensor<[1,77,4096],f16>, !torch.str -> !torch.vtensor<[1,77,4096],f16>
  %845 = torch.aten.t %8 : !torch.vtensor<[1024,4096],f16> -> !torch.vtensor<[4096,1024],f16>
  %846 = torch.aten.view %844, %80 : !torch.vtensor<[1,77,4096],f16>, !torch.list<int> -> !torch.vtensor<[77,4096],f16>
  %847 = torch.aten.addmm %9, %846, %845, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,4096],f16>, !torch.vtensor<[4096,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %848 = torch.aten.view %847, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %849 = torch.aten.add.Tensor %839, %848, %int1 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1024],f16>, !torch.int -> !torch.vtensor<[1,77,1024],f16>
  %result0_93, %result1_94, %result2_95 = torch.aten.native_layer_norm %849, %23, %9, %9, %float1.000000e-05 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int>, !torch.vtensor<[1024],f16>, !torch.vtensor<[1024],f16>, !torch.float -> !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>
  %850 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %851 = torch.aten.view %result0_93, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %852 = torch.aten.addmm %9, %851, %850, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %853 = torch.aten.view %852, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %854 = torch.aten.mul.Tensor %853, %0 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,77,1024],f16>
  %855 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %856 = torch.aten.view %result0_93, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %857 = torch.aten.addmm %9, %856, %855, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %858 = torch.aten.view %857, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %859 = torch.aten.view %858, %35 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %860 = torch.aten.transpose.int %859, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %861 = torch.aten.clone %860, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %862 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %863 = torch.aten.view %result0_93, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %864 = torch.aten.addmm %9, %863, %862, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %865 = torch.aten.view %864, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %866 = torch.aten.view %865, %35 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %867 = torch.aten.transpose.int %866, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %868 = torch.aten.clone %867, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %869 = torch.aten.view %854, %46 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %870 = torch.aten.transpose.int %869, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %871 = torch.aten.clone %870, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %872 = torch.aten.view %871, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %873 = torch.aten.view %861, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %874 = torch.aten.view %868, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %875 = torch.aten.transpose.int %873, %int1, %int2 : !torch.vtensor<[16,77,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,64,77],f16>
  %876 = torch.aten.bmm %872, %875 : !torch.vtensor<[16,77,64],f16>, !torch.vtensor<[16,64,77],f16> -> !torch.vtensor<[16,77,77],f16>
  %877 = torch.aten.view %876, %56 : !torch.vtensor<[16,77,77],f16>, !torch.list<int> -> !torch.vtensor<[1,16,77,77],f16>
  %878 = torch.aten.add.Tensor %877, %22, %int1 : !torch.vtensor<[1,16,77,77],f16>, !torch.vtensor<[1,1,77,77],f16>, !torch.int -> !torch.vtensor<[1,16,77,77],f16>
  %879 = torch.aten.view %878, %59 : !torch.vtensor<[1,16,77,77],f16>, !torch.list<int> -> !torch.vtensor<[16,77,77],f16>
  %880 = torch.aten._softmax %879, %int-1, %false : !torch.vtensor<[16,77,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,77,77],f16>
  %881 = torch.aten.bmm %880, %874 : !torch.vtensor<[16,77,77],f16>, !torch.vtensor<[16,77,64],f16> -> !torch.vtensor<[16,77,64],f16>
  %882 = torch.aten.view %881, %63 : !torch.vtensor<[16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[1,16,77,64],f16>
  %883 = torch.aten.transpose.int %882, %int1, %int2 : !torch.vtensor<[1,16,77,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,77,16,64],f16>
  %884 = torch.aten.clone %883, %int0 : !torch.vtensor<[1,77,16,64],f16>, !torch.int -> !torch.vtensor<[1,77,16,64],f16>
  %885 = torch.aten._unsafe_view %884, %28 : !torch.vtensor<[1,77,16,64],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %886 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %887 = torch.aten.view %885, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %888 = torch.aten.addmm %9, %887, %886, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %889 = torch.aten.view %888, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %890 = torch.aten.add.Tensor %849, %889, %int1 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1024],f16>, !torch.int -> !torch.vtensor<[1,77,1024],f16>
  %result0_96, %result1_97, %result2_98 = torch.aten.native_layer_norm %890, %23, %9, %9, %float1.000000e-05 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int>, !torch.vtensor<[1024],f16>, !torch.vtensor<[1024],f16>, !torch.float -> !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>
  %891 = torch.aten.t %6 : !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[1024,4096],f16>
  %892 = torch.aten.view %result0_96, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %893 = torch.aten.addmm %7, %892, %891, %int1, %int1 : !torch.vtensor<[4096],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,4096],f16>
  %894 = torch.aten.view %893, %76 : !torch.vtensor<[77,4096],f16>, !torch.list<int> -> !torch.vtensor<[1,77,4096],f16>
  %895 = torch.aten.gelu %894, %str : !torch.vtensor<[1,77,4096],f16>, !torch.str -> !torch.vtensor<[1,77,4096],f16>
  %896 = torch.aten.t %8 : !torch.vtensor<[1024,4096],f16> -> !torch.vtensor<[4096,1024],f16>
  %897 = torch.aten.view %895, %80 : !torch.vtensor<[1,77,4096],f16>, !torch.list<int> -> !torch.vtensor<[77,4096],f16>
  %898 = torch.aten.addmm %9, %897, %896, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,4096],f16>, !torch.vtensor<[4096,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %899 = torch.aten.view %898, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %900 = torch.aten.add.Tensor %890, %899, %int1 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1024],f16>, !torch.int -> !torch.vtensor<[1,77,1024],f16>
  %result0_99, %result1_100, %result2_101 = torch.aten.native_layer_norm %900, %23, %9, %9, %float1.000000e-05 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int>, !torch.vtensor<[1024],f16>, !torch.vtensor<[1024],f16>, !torch.float -> !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>
  %901 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %902 = torch.aten.view %result0_99, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %903 = torch.aten.addmm %9, %902, %901, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %904 = torch.aten.view %903, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %905 = torch.aten.mul.Tensor %904, %0 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,77,1024],f16>
  %906 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %907 = torch.aten.view %result0_99, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %908 = torch.aten.addmm %9, %907, %906, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %909 = torch.aten.view %908, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %910 = torch.aten.view %909, %35 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %911 = torch.aten.transpose.int %910, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %912 = torch.aten.clone %911, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %913 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %914 = torch.aten.view %result0_99, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %915 = torch.aten.addmm %9, %914, %913, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %916 = torch.aten.view %915, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %917 = torch.aten.view %916, %35 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %918 = torch.aten.transpose.int %917, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %919 = torch.aten.clone %918, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %920 = torch.aten.view %905, %46 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %921 = torch.aten.transpose.int %920, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %922 = torch.aten.clone %921, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %923 = torch.aten.view %922, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %924 = torch.aten.view %912, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %925 = torch.aten.view %919, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %926 = torch.aten.transpose.int %924, %int1, %int2 : !torch.vtensor<[16,77,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,64,77],f16>
  %927 = torch.aten.bmm %923, %926 : !torch.vtensor<[16,77,64],f16>, !torch.vtensor<[16,64,77],f16> -> !torch.vtensor<[16,77,77],f16>
  %928 = torch.aten.view %927, %56 : !torch.vtensor<[16,77,77],f16>, !torch.list<int> -> !torch.vtensor<[1,16,77,77],f16>
  %929 = torch.aten.add.Tensor %928, %22, %int1 : !torch.vtensor<[1,16,77,77],f16>, !torch.vtensor<[1,1,77,77],f16>, !torch.int -> !torch.vtensor<[1,16,77,77],f16>
  %930 = torch.aten.view %929, %59 : !torch.vtensor<[1,16,77,77],f16>, !torch.list<int> -> !torch.vtensor<[16,77,77],f16>
  %931 = torch.aten._softmax %930, %int-1, %false : !torch.vtensor<[16,77,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,77,77],f16>
  %932 = torch.aten.bmm %931, %925 : !torch.vtensor<[16,77,77],f16>, !torch.vtensor<[16,77,64],f16> -> !torch.vtensor<[16,77,64],f16>
  %933 = torch.aten.view %932, %63 : !torch.vtensor<[16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[1,16,77,64],f16>
  %934 = torch.aten.transpose.int %933, %int1, %int2 : !torch.vtensor<[1,16,77,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,77,16,64],f16>
  %935 = torch.aten.clone %934, %int0 : !torch.vtensor<[1,77,16,64],f16>, !torch.int -> !torch.vtensor<[1,77,16,64],f16>
  %936 = torch.aten._unsafe_view %935, %28 : !torch.vtensor<[1,77,16,64],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %937 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %938 = torch.aten.view %936, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %939 = torch.aten.addmm %9, %938, %937, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %940 = torch.aten.view %939, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %941 = torch.aten.add.Tensor %900, %940, %int1 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1024],f16>, !torch.int -> !torch.vtensor<[1,77,1024],f16>
  %result0_102, %result1_103, %result2_104 = torch.aten.native_layer_norm %941, %23, %9, %9, %float1.000000e-05 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int>, !torch.vtensor<[1024],f16>, !torch.vtensor<[1024],f16>, !torch.float -> !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>
  %942 = torch.aten.t %6 : !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[1024,4096],f16>
  %943 = torch.aten.view %result0_102, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %944 = torch.aten.addmm %7, %943, %942, %int1, %int1 : !torch.vtensor<[4096],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,4096],f16>
  %945 = torch.aten.view %944, %76 : !torch.vtensor<[77,4096],f16>, !torch.list<int> -> !torch.vtensor<[1,77,4096],f16>
  %946 = torch.aten.gelu %945, %str : !torch.vtensor<[1,77,4096],f16>, !torch.str -> !torch.vtensor<[1,77,4096],f16>
  %947 = torch.aten.t %8 : !torch.vtensor<[1024,4096],f16> -> !torch.vtensor<[4096,1024],f16>
  %948 = torch.aten.view %946, %80 : !torch.vtensor<[1,77,4096],f16>, !torch.list<int> -> !torch.vtensor<[77,4096],f16>
  %949 = torch.aten.addmm %9, %948, %947, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,4096],f16>, !torch.vtensor<[4096,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %950 = torch.aten.view %949, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %951 = torch.aten.add.Tensor %941, %950, %int1 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1024],f16>, !torch.int -> !torch.vtensor<[1,77,1024],f16>
  %result0_105, %result1_106, %result2_107 = torch.aten.native_layer_norm %951, %23, %9, %9, %float1.000000e-05 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int>, !torch.vtensor<[1024],f16>, !torch.vtensor<[1024],f16>, !torch.float -> !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>
  %952 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %953 = torch.aten.view %result0_105, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %954 = torch.aten.addmm %9, %953, %952, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %955 = torch.aten.view %954, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %956 = torch.aten.mul.Tensor %955, %0 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,77,1024],f16>
  %957 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %958 = torch.aten.view %result0_105, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %959 = torch.aten.addmm %9, %958, %957, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %960 = torch.aten.view %959, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %961 = torch.aten.view %960, %35 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %962 = torch.aten.transpose.int %961, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %963 = torch.aten.clone %962, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %964 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %965 = torch.aten.view %result0_105, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %966 = torch.aten.addmm %9, %965, %964, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %967 = torch.aten.view %966, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %968 = torch.aten.view %967, %35 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %969 = torch.aten.transpose.int %968, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %970 = torch.aten.clone %969, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %971 = torch.aten.view %956, %46 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %972 = torch.aten.transpose.int %971, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %973 = torch.aten.clone %972, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %974 = torch.aten.view %973, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %975 = torch.aten.view %963, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %976 = torch.aten.view %970, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %977 = torch.aten.transpose.int %975, %int1, %int2 : !torch.vtensor<[16,77,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,64,77],f16>
  %978 = torch.aten.bmm %974, %977 : !torch.vtensor<[16,77,64],f16>, !torch.vtensor<[16,64,77],f16> -> !torch.vtensor<[16,77,77],f16>
  %979 = torch.aten.view %978, %56 : !torch.vtensor<[16,77,77],f16>, !torch.list<int> -> !torch.vtensor<[1,16,77,77],f16>
  %980 = torch.aten.add.Tensor %979, %22, %int1 : !torch.vtensor<[1,16,77,77],f16>, !torch.vtensor<[1,1,77,77],f16>, !torch.int -> !torch.vtensor<[1,16,77,77],f16>
  %981 = torch.aten.view %980, %59 : !torch.vtensor<[1,16,77,77],f16>, !torch.list<int> -> !torch.vtensor<[16,77,77],f16>
  %982 = torch.aten._softmax %981, %int-1, %false : !torch.vtensor<[16,77,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,77,77],f16>
  %983 = torch.aten.bmm %982, %976 : !torch.vtensor<[16,77,77],f16>, !torch.vtensor<[16,77,64],f16> -> !torch.vtensor<[16,77,64],f16>
  %984 = torch.aten.view %983, %63 : !torch.vtensor<[16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[1,16,77,64],f16>
  %985 = torch.aten.transpose.int %984, %int1, %int2 : !torch.vtensor<[1,16,77,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,77,16,64],f16>
  %986 = torch.aten.clone %985, %int0 : !torch.vtensor<[1,77,16,64],f16>, !torch.int -> !torch.vtensor<[1,77,16,64],f16>
  %987 = torch.aten._unsafe_view %986, %28 : !torch.vtensor<[1,77,16,64],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %988 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %989 = torch.aten.view %987, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %990 = torch.aten.addmm %9, %989, %988, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %991 = torch.aten.view %990, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %992 = torch.aten.add.Tensor %951, %991, %int1 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1024],f16>, !torch.int -> !torch.vtensor<[1,77,1024],f16>
  %result0_108, %result1_109, %result2_110 = torch.aten.native_layer_norm %992, %23, %9, %9, %float1.000000e-05 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int>, !torch.vtensor<[1024],f16>, !torch.vtensor<[1024],f16>, !torch.float -> !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>
  %993 = torch.aten.t %6 : !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[1024,4096],f16>
  %994 = torch.aten.view %result0_108, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %995 = torch.aten.addmm %7, %994, %993, %int1, %int1 : !torch.vtensor<[4096],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,4096],f16>
  %996 = torch.aten.view %995, %76 : !torch.vtensor<[77,4096],f16>, !torch.list<int> -> !torch.vtensor<[1,77,4096],f16>
  %997 = torch.aten.gelu %996, %str : !torch.vtensor<[1,77,4096],f16>, !torch.str -> !torch.vtensor<[1,77,4096],f16>
  %998 = torch.aten.t %8 : !torch.vtensor<[1024,4096],f16> -> !torch.vtensor<[4096,1024],f16>
  %999 = torch.aten.view %997, %80 : !torch.vtensor<[1,77,4096],f16>, !torch.list<int> -> !torch.vtensor<[77,4096],f16>
  %1000 = torch.aten.addmm %9, %999, %998, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,4096],f16>, !torch.vtensor<[4096,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %1001 = torch.aten.view %1000, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %1002 = torch.aten.add.Tensor %992, %1001, %int1 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1024],f16>, !torch.int -> !torch.vtensor<[1,77,1024],f16>
  %result0_111, %result1_112, %result2_113 = torch.aten.native_layer_norm %1002, %23, %9, %9, %float1.000000e-05 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int>, !torch.vtensor<[1024],f16>, !torch.vtensor<[1024],f16>, !torch.float -> !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>
  %1003 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %1004 = torch.aten.view %result0_111, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %1005 = torch.aten.addmm %9, %1004, %1003, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %1006 = torch.aten.view %1005, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %1007 = torch.aten.mul.Tensor %1006, %0 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,77,1024],f16>
  %1008 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %1009 = torch.aten.view %result0_111, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %1010 = torch.aten.addmm %9, %1009, %1008, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %1011 = torch.aten.view %1010, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %1012 = torch.aten.view %1011, %35 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %1013 = torch.aten.transpose.int %1012, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %1014 = torch.aten.clone %1013, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %1015 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %1016 = torch.aten.view %result0_111, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %1017 = torch.aten.addmm %9, %1016, %1015, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %1018 = torch.aten.view %1017, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %1019 = torch.aten.view %1018, %35 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %1020 = torch.aten.transpose.int %1019, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %1021 = torch.aten.clone %1020, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %1022 = torch.aten.view %1007, %46 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %1023 = torch.aten.transpose.int %1022, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %1024 = torch.aten.clone %1023, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %1025 = torch.aten.view %1024, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %1026 = torch.aten.view %1014, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %1027 = torch.aten.view %1021, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %1028 = torch.aten.transpose.int %1026, %int1, %int2 : !torch.vtensor<[16,77,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,64,77],f16>
  %1029 = torch.aten.bmm %1025, %1028 : !torch.vtensor<[16,77,64],f16>, !torch.vtensor<[16,64,77],f16> -> !torch.vtensor<[16,77,77],f16>
  %1030 = torch.aten.view %1029, %56 : !torch.vtensor<[16,77,77],f16>, !torch.list<int> -> !torch.vtensor<[1,16,77,77],f16>
  %1031 = torch.aten.add.Tensor %1030, %22, %int1 : !torch.vtensor<[1,16,77,77],f16>, !torch.vtensor<[1,1,77,77],f16>, !torch.int -> !torch.vtensor<[1,16,77,77],f16>
  %1032 = torch.aten.view %1031, %59 : !torch.vtensor<[1,16,77,77],f16>, !torch.list<int> -> !torch.vtensor<[16,77,77],f16>
  %1033 = torch.aten._softmax %1032, %int-1, %false : !torch.vtensor<[16,77,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,77,77],f16>
  %1034 = torch.aten.bmm %1033, %1027 : !torch.vtensor<[16,77,77],f16>, !torch.vtensor<[16,77,64],f16> -> !torch.vtensor<[16,77,64],f16>
  %1035 = torch.aten.view %1034, %63 : !torch.vtensor<[16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[1,16,77,64],f16>
  %1036 = torch.aten.transpose.int %1035, %int1, %int2 : !torch.vtensor<[1,16,77,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,77,16,64],f16>
  %1037 = torch.aten.clone %1036, %int0 : !torch.vtensor<[1,77,16,64],f16>, !torch.int -> !torch.vtensor<[1,77,16,64],f16>
  %1038 = torch.aten._unsafe_view %1037, %28 : !torch.vtensor<[1,77,16,64],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %1039 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %1040 = torch.aten.view %1038, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %1041 = torch.aten.addmm %9, %1040, %1039, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %1042 = torch.aten.view %1041, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %1043 = torch.aten.add.Tensor %1002, %1042, %int1 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1024],f16>, !torch.int -> !torch.vtensor<[1,77,1024],f16>
  %result0_114, %result1_115, %result2_116 = torch.aten.native_layer_norm %1043, %23, %9, %9, %float1.000000e-05 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int>, !torch.vtensor<[1024],f16>, !torch.vtensor<[1024],f16>, !torch.float -> !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>
  %1044 = torch.aten.t %6 : !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[1024,4096],f16>
  %1045 = torch.aten.view %result0_114, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %1046 = torch.aten.addmm %7, %1045, %1044, %int1, %int1 : !torch.vtensor<[4096],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,4096],f16>
  %1047 = torch.aten.view %1046, %76 : !torch.vtensor<[77,4096],f16>, !torch.list<int> -> !torch.vtensor<[1,77,4096],f16>
  %1048 = torch.aten.gelu %1047, %str : !torch.vtensor<[1,77,4096],f16>, !torch.str -> !torch.vtensor<[1,77,4096],f16>
  %1049 = torch.aten.t %8 : !torch.vtensor<[1024,4096],f16> -> !torch.vtensor<[4096,1024],f16>
  %1050 = torch.aten.view %1048, %80 : !torch.vtensor<[1,77,4096],f16>, !torch.list<int> -> !torch.vtensor<[77,4096],f16>
  %1051 = torch.aten.addmm %9, %1050, %1049, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,4096],f16>, !torch.vtensor<[4096,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %1052 = torch.aten.view %1051, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %1053 = torch.aten.add.Tensor %1043, %1052, %int1 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1024],f16>, !torch.int -> !torch.vtensor<[1,77,1024],f16>
  %result0_117, %result1_118, %result2_119 = torch.aten.native_layer_norm %1053, %23, %9, %9, %float1.000000e-05 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int>, !torch.vtensor<[1024],f16>, !torch.vtensor<[1024],f16>, !torch.float -> !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>
  %1054 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %1055 = torch.aten.view %result0_117, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %1056 = torch.aten.addmm %9, %1055, %1054, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %1057 = torch.aten.view %1056, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %1058 = torch.aten.mul.Tensor %1057, %0 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,77,1024],f16>
  %1059 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %1060 = torch.aten.view %result0_117, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %1061 = torch.aten.addmm %9, %1060, %1059, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %1062 = torch.aten.view %1061, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %1063 = torch.aten.view %1062, %35 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %1064 = torch.aten.transpose.int %1063, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %1065 = torch.aten.clone %1064, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %1066 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %1067 = torch.aten.view %result0_117, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %1068 = torch.aten.addmm %9, %1067, %1066, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %1069 = torch.aten.view %1068, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %1070 = torch.aten.view %1069, %35 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %1071 = torch.aten.transpose.int %1070, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %1072 = torch.aten.clone %1071, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %1073 = torch.aten.view %1058, %46 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %1074 = torch.aten.transpose.int %1073, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %1075 = torch.aten.clone %1074, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %1076 = torch.aten.view %1075, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %1077 = torch.aten.view %1065, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %1078 = torch.aten.view %1072, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %1079 = torch.aten.transpose.int %1077, %int1, %int2 : !torch.vtensor<[16,77,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,64,77],f16>
  %1080 = torch.aten.bmm %1076, %1079 : !torch.vtensor<[16,77,64],f16>, !torch.vtensor<[16,64,77],f16> -> !torch.vtensor<[16,77,77],f16>
  %1081 = torch.aten.view %1080, %56 : !torch.vtensor<[16,77,77],f16>, !torch.list<int> -> !torch.vtensor<[1,16,77,77],f16>
  %1082 = torch.aten.add.Tensor %1081, %22, %int1 : !torch.vtensor<[1,16,77,77],f16>, !torch.vtensor<[1,1,77,77],f16>, !torch.int -> !torch.vtensor<[1,16,77,77],f16>
  %1083 = torch.aten.view %1082, %59 : !torch.vtensor<[1,16,77,77],f16>, !torch.list<int> -> !torch.vtensor<[16,77,77],f16>
  %1084 = torch.aten._softmax %1083, %int-1, %false : !torch.vtensor<[16,77,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,77,77],f16>
  %1085 = torch.aten.bmm %1084, %1078 : !torch.vtensor<[16,77,77],f16>, !torch.vtensor<[16,77,64],f16> -> !torch.vtensor<[16,77,64],f16>
  %1086 = torch.aten.view %1085, %63 : !torch.vtensor<[16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[1,16,77,64],f16>
  %1087 = torch.aten.transpose.int %1086, %int1, %int2 : !torch.vtensor<[1,16,77,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,77,16,64],f16>
  %1088 = torch.aten.clone %1087, %int0 : !torch.vtensor<[1,77,16,64],f16>, !torch.int -> !torch.vtensor<[1,77,16,64],f16>
  %1089 = torch.aten._unsafe_view %1088, %28 : !torch.vtensor<[1,77,16,64],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %1090 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %1091 = torch.aten.view %1089, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %1092 = torch.aten.addmm %9, %1091, %1090, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %1093 = torch.aten.view %1092, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %1094 = torch.aten.add.Tensor %1053, %1093, %int1 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1024],f16>, !torch.int -> !torch.vtensor<[1,77,1024],f16>
  %result0_120, %result1_121, %result2_122 = torch.aten.native_layer_norm %1094, %23, %9, %9, %float1.000000e-05 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int>, !torch.vtensor<[1024],f16>, !torch.vtensor<[1024],f16>, !torch.float -> !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>
  %1095 = torch.aten.t %6 : !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[1024,4096],f16>
  %1096 = torch.aten.view %result0_120, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %1097 = torch.aten.addmm %7, %1096, %1095, %int1, %int1 : !torch.vtensor<[4096],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,4096],f16>
  %1098 = torch.aten.view %1097, %76 : !torch.vtensor<[77,4096],f16>, !torch.list<int> -> !torch.vtensor<[1,77,4096],f16>
  %1099 = torch.aten.gelu %1098, %str : !torch.vtensor<[1,77,4096],f16>, !torch.str -> !torch.vtensor<[1,77,4096],f16>
  %1100 = torch.aten.t %8 : !torch.vtensor<[1024,4096],f16> -> !torch.vtensor<[4096,1024],f16>
  %1101 = torch.aten.view %1099, %80 : !torch.vtensor<[1,77,4096],f16>, !torch.list<int> -> !torch.vtensor<[77,4096],f16>
  %1102 = torch.aten.addmm %9, %1101, %1100, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,4096],f16>, !torch.vtensor<[4096,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %1103 = torch.aten.view %1102, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %1104 = torch.aten.add.Tensor %1094, %1103, %int1 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1024],f16>, !torch.int -> !torch.vtensor<[1,77,1024],f16>
  %result0_123, %result1_124, %result2_125 = torch.aten.native_layer_norm %1104, %23, %9, %9, %float1.000000e-05 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int>, !torch.vtensor<[1024],f16>, !torch.vtensor<[1024],f16>, !torch.float -> !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>
  %1105 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %1106 = torch.aten.view %result0_123, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %1107 = torch.aten.addmm %9, %1106, %1105, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %1108 = torch.aten.view %1107, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %1109 = torch.aten.mul.Tensor %1108, %0 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,77,1024],f16>
  %1110 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %1111 = torch.aten.view %result0_123, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %1112 = torch.aten.addmm %9, %1111, %1110, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %1113 = torch.aten.view %1112, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %1114 = torch.aten.view %1113, %35 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %1115 = torch.aten.transpose.int %1114, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %1116 = torch.aten.clone %1115, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %1117 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %1118 = torch.aten.view %result0_123, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %1119 = torch.aten.addmm %9, %1118, %1117, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %1120 = torch.aten.view %1119, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %1121 = torch.aten.view %1120, %35 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %1122 = torch.aten.transpose.int %1121, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %1123 = torch.aten.clone %1122, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %1124 = torch.aten.view %1109, %46 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %1125 = torch.aten.transpose.int %1124, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %1126 = torch.aten.clone %1125, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %1127 = torch.aten.view %1126, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %1128 = torch.aten.view %1116, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %1129 = torch.aten.view %1123, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %1130 = torch.aten.transpose.int %1128, %int1, %int2 : !torch.vtensor<[16,77,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,64,77],f16>
  %1131 = torch.aten.bmm %1127, %1130 : !torch.vtensor<[16,77,64],f16>, !torch.vtensor<[16,64,77],f16> -> !torch.vtensor<[16,77,77],f16>
  %1132 = torch.aten.view %1131, %56 : !torch.vtensor<[16,77,77],f16>, !torch.list<int> -> !torch.vtensor<[1,16,77,77],f16>
  %1133 = torch.aten.add.Tensor %1132, %22, %int1 : !torch.vtensor<[1,16,77,77],f16>, !torch.vtensor<[1,1,77,77],f16>, !torch.int -> !torch.vtensor<[1,16,77,77],f16>
  %1134 = torch.aten.view %1133, %59 : !torch.vtensor<[1,16,77,77],f16>, !torch.list<int> -> !torch.vtensor<[16,77,77],f16>
  %1135 = torch.aten._softmax %1134, %int-1, %false : !torch.vtensor<[16,77,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,77,77],f16>
  %1136 = torch.aten.bmm %1135, %1129 : !torch.vtensor<[16,77,77],f16>, !torch.vtensor<[16,77,64],f16> -> !torch.vtensor<[16,77,64],f16>
  %1137 = torch.aten.view %1136, %63 : !torch.vtensor<[16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[1,16,77,64],f16>
  %1138 = torch.aten.transpose.int %1137, %int1, %int2 : !torch.vtensor<[1,16,77,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,77,16,64],f16>
  %1139 = torch.aten.clone %1138, %int0 : !torch.vtensor<[1,77,16,64],f16>, !torch.int -> !torch.vtensor<[1,77,16,64],f16>
  %1140 = torch.aten._unsafe_view %1139, %28 : !torch.vtensor<[1,77,16,64],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %1141 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %1142 = torch.aten.view %1140, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %1143 = torch.aten.addmm %9, %1142, %1141, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %1144 = torch.aten.view %1143, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %1145 = torch.aten.add.Tensor %1104, %1144, %int1 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1024],f16>, !torch.int -> !torch.vtensor<[1,77,1024],f16>
  %result0_126, %result1_127, %result2_128 = torch.aten.native_layer_norm %1145, %23, %9, %9, %float1.000000e-05 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int>, !torch.vtensor<[1024],f16>, !torch.vtensor<[1024],f16>, !torch.float -> !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>
  %1146 = torch.aten.t %6 : !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[1024,4096],f16>
  %1147 = torch.aten.view %result0_126, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %1148 = torch.aten.addmm %7, %1147, %1146, %int1, %int1 : !torch.vtensor<[4096],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,4096],f16>
  %1149 = torch.aten.view %1148, %76 : !torch.vtensor<[77,4096],f16>, !torch.list<int> -> !torch.vtensor<[1,77,4096],f16>
  %1150 = torch.aten.gelu %1149, %str : !torch.vtensor<[1,77,4096],f16>, !torch.str -> !torch.vtensor<[1,77,4096],f16>
  %1151 = torch.aten.t %8 : !torch.vtensor<[1024,4096],f16> -> !torch.vtensor<[4096,1024],f16>
  %1152 = torch.aten.view %1150, %80 : !torch.vtensor<[1,77,4096],f16>, !torch.list<int> -> !torch.vtensor<[77,4096],f16>
  %1153 = torch.aten.addmm %9, %1152, %1151, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,4096],f16>, !torch.vtensor<[4096,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %1154 = torch.aten.view %1153, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %1155 = torch.aten.add.Tensor %1145, %1154, %int1 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1024],f16>, !torch.int -> !torch.vtensor<[1,77,1024],f16>
  %result0_129, %result1_130, %result2_131 = torch.aten.native_layer_norm %1155, %23, %9, %9, %float1.000000e-05 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int>, !torch.vtensor<[1024],f16>, !torch.vtensor<[1024],f16>, !torch.float -> !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>
  %1156 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %1157 = torch.aten.view %result0_129, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %1158 = torch.aten.addmm %9, %1157, %1156, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %1159 = torch.aten.view %1158, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %1160 = torch.aten.mul.Tensor %1159, %0 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,77,1024],f16>
  %1161 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %1162 = torch.aten.view %result0_129, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %1163 = torch.aten.addmm %9, %1162, %1161, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %1164 = torch.aten.view %1163, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %1165 = torch.aten.view %1164, %35 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %1166 = torch.aten.transpose.int %1165, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %1167 = torch.aten.clone %1166, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %1168 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %1169 = torch.aten.view %result0_129, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %1170 = torch.aten.addmm %9, %1169, %1168, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %1171 = torch.aten.view %1170, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %1172 = torch.aten.view %1171, %35 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %1173 = torch.aten.transpose.int %1172, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %1174 = torch.aten.clone %1173, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %1175 = torch.aten.view %1160, %46 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,16,64],f16>
  %1176 = torch.aten.transpose.int %1175, %int1, %int2 : !torch.vtensor<[1,77,16,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %1177 = torch.aten.clone %1176, %int0 : !torch.vtensor<[1,16,77,64],f16>, !torch.int -> !torch.vtensor<[1,16,77,64],f16>
  %1178 = torch.aten.view %1177, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %1179 = torch.aten.view %1167, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %1180 = torch.aten.view %1174, %50 : !torch.vtensor<[1,16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[16,77,64],f16>
  %1181 = torch.aten.transpose.int %1179, %int1, %int2 : !torch.vtensor<[16,77,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[16,64,77],f16>
  %1182 = torch.aten.bmm %1178, %1181 : !torch.vtensor<[16,77,64],f16>, !torch.vtensor<[16,64,77],f16> -> !torch.vtensor<[16,77,77],f16>
  %1183 = torch.aten.view %1182, %56 : !torch.vtensor<[16,77,77],f16>, !torch.list<int> -> !torch.vtensor<[1,16,77,77],f16>
  %1184 = torch.aten.add.Tensor %1183, %22, %int1 : !torch.vtensor<[1,16,77,77],f16>, !torch.vtensor<[1,1,77,77],f16>, !torch.int -> !torch.vtensor<[1,16,77,77],f16>
  %1185 = torch.aten.view %1184, %59 : !torch.vtensor<[1,16,77,77],f16>, !torch.list<int> -> !torch.vtensor<[16,77,77],f16>
  %1186 = torch.aten._softmax %1185, %int-1, %false : !torch.vtensor<[16,77,77],f16>, !torch.int, !torch.bool -> !torch.vtensor<[16,77,77],f16>
  %1187 = torch.aten.bmm %1186, %1180 : !torch.vtensor<[16,77,77],f16>, !torch.vtensor<[16,77,64],f16> -> !torch.vtensor<[16,77,64],f16>
  %1188 = torch.aten.view %1187, %63 : !torch.vtensor<[16,77,64],f16>, !torch.list<int> -> !torch.vtensor<[1,16,77,64],f16>
  %1189 = torch.aten.transpose.int %1188, %int1, %int2 : !torch.vtensor<[1,16,77,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[1,77,16,64],f16>
  %1190 = torch.aten.clone %1189, %int0 : !torch.vtensor<[1,77,16,64],f16>, !torch.int -> !torch.vtensor<[1,77,16,64],f16>
  %1191 = torch.aten._unsafe_view %1190, %28 : !torch.vtensor<[1,77,16,64],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %1192 = torch.aten.t %5 : !torch.vtensor<[1024,1024],f16> -> !torch.vtensor<[1024,1024],f16>
  %1193 = torch.aten.view %1191, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %1194 = torch.aten.addmm %9, %1193, %1192, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %1195 = torch.aten.view %1194, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %1196 = torch.aten.add.Tensor %1155, %1195, %int1 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1024],f16>, !torch.int -> !torch.vtensor<[1,77,1024],f16>
  %result0_132, %result1_133, %result2_134 = torch.aten.native_layer_norm %1196, %23, %9, %9, %float1.000000e-05 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int>, !torch.vtensor<[1024],f16>, !torch.vtensor<[1024],f16>, !torch.float -> !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>
  %1197 = torch.aten.t %6 : !torch.vtensor<[4096,1024],f16> -> !torch.vtensor<[1024,4096],f16>
  %1198 = torch.aten.view %result0_132, %25 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int> -> !torch.vtensor<[77,1024],f16>
  %1199 = torch.aten.addmm %7, %1198, %1197, %int1, %int1 : !torch.vtensor<[4096],f16>, !torch.vtensor<[77,1024],f16>, !torch.vtensor<[1024,4096],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,4096],f16>
  %1200 = torch.aten.view %1199, %76 : !torch.vtensor<[77,4096],f16>, !torch.list<int> -> !torch.vtensor<[1,77,4096],f16>
  %1201 = torch.aten.gelu %1200, %str : !torch.vtensor<[1,77,4096],f16>, !torch.str -> !torch.vtensor<[1,77,4096],f16>
  %1202 = torch.aten.t %8 : !torch.vtensor<[1024,4096],f16> -> !torch.vtensor<[4096,1024],f16>
  %1203 = torch.aten.view %1201, %80 : !torch.vtensor<[1,77,4096],f16>, !torch.list<int> -> !torch.vtensor<[77,4096],f16>
  %1204 = torch.aten.addmm %9, %1203, %1202, %int1, %int1 : !torch.vtensor<[1024],f16>, !torch.vtensor<[77,4096],f16>, !torch.vtensor<[4096,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[77,1024],f16>
  %1205 = torch.aten.view %1204, %28 : !torch.vtensor<[77,1024],f16>, !torch.list<int> -> !torch.vtensor<[1,77,1024],f16>
  %1206 = torch.aten.add.Tensor %1196, %1205, %int1 : !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1024],f16>, !torch.int -> !torch.vtensor<[1,77,1024],f16>
  %result0_135, %result1_136, %result2_137 = torch.aten.native_layer_norm %1206, %23, %9, %9, %float1.000000e-05 : !torch.vtensor<[1,77,1024],f16>, !torch.list<int>, !torch.vtensor<[1024],f16>, !torch.vtensor<[1024],f16>, !torch.float -> !torch.vtensor<[1,77,1024],f16>, !torch.vtensor<[1,77,1],f32>, !torch.vtensor<[1,77,1],f32>
  return %result0_135 : !torch.vtensor<[1,77,1024],f16>
 }