pashu123 · June 22, 2022 13:24
diff --git a/minilm_canonicalize.mlir b/minilm_canonicalize.mlir
 module attributes {torch.debug_module_name = "MiniLMSequenceClassification"} {
  func.func @forward(%arg0: !torch.vtensor<[1,128],si32>, %arg1: !torch.vtensor<[1,128],si32>, %arg2: !torch.vtensor<[1,128],si32>) -> !torch.vtensor<[1,2],f32> {
    %int1 = torch.constant.int 1
    %none = torch.constant.none
    %true = torch.constant.bool true
    %float1.000000e00 = torch.constant.float 1.000000e+00
    %int128 = torch.constant.int 128
    %int0 = torch.constant.int 0
    %int9223372036854775807 = torch.constant.int 9223372036854775807
    %float9.999990e-13 = torch.constant.float 9.9999999999999998E-13
    %int384 = torch.constant.int 384
    %str = torch.constant.str "none"
    %int-2 = torch.constant.int -2
    %int-1 = torch.constant.int -1
    %int3 = torch.constant.int 3
    %int2 = torch.constant.int 2
    %int32 = torch.constant.int 32
    %int12 = torch.constant.int 12
    %int6 = torch.constant.int 6
    %false = torch.constant.bool false
    %0 = torch.vtensor.literal(dense<-1.000000e+04> : tensor<f64>) : !torch.vtensor<[],f64>
    %1 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1x512xsi64>) : !torch.vtensor<[1,512],si64>
    %2 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<30522x384xf32>) : !torch.vtensor<[30522,384],f32>
    %3 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<2x384xf32>) : !torch.vtensor<[2,384],f32>
    %4 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<512x384xf32>) : !torch.vtensor<[512,384],f32>
    %5 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %6 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %7 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %8 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32>
    %9 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32>
    %10 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %11 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32>
    %12 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %13 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32>
    %14 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %15 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %16 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536xf32>) : !torch.vtensor<[1536],f32>
    %17 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536x384xf32>) : !torch.vtensor<[1536,384],f32>
    %18 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %19 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x1536xf32>) : !torch.vtensor<[384,1536],f32>
    %20 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %21 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %22 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %23 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32>
    %24 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32>
    %25 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %26 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32>
    %27 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %28 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32>
    %29 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %30 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %31 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536xf32>) : !torch.vtensor<[1536],f32>
    %32 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536x384xf32>) : !torch.vtensor<[1536,384],f32>
    %33 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %34 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x1536xf32>) : !torch.vtensor<[384,1536],f32>
    %35 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %36 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %37 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %38 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32>
    %39 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32>
    %40 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %41 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32>
    %42 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %43 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32>
    %44 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %45 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %46 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536xf32>) : !torch.vtensor<[1536],f32>
    %47 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536x384xf32>) : !torch.vtensor<[1536,384],f32>
    %48 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %49 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x1536xf32>) : !torch.vtensor<[384,1536],f32>
    %50 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %51 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %52 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %53 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32>
    %54 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32>
    %55 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %56 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32>
    %57 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %58 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32>
    %59 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %60 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %61 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536xf32>) : !torch.vtensor<[1536],f32>
    %62 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536x384xf32>) : !torch.vtensor<[1536,384],f32>
    %63 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %64 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x1536xf32>) : !torch.vtensor<[384,1536],f32>
    %65 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %66 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %67 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %68 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32>
    %69 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32>
    %70 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %71 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32>
    %72 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %73 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32>
    %74 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %75 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %76 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536xf32>) : !torch.vtensor<[1536],f32>
    %77 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536x384xf32>) : !torch.vtensor<[1536,384],f32>
    %78 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %79 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x1536xf32>) : !torch.vtensor<[384,1536],f32>
    %80 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %81 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %82 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %83 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32>
    %84 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32>
    %85 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %86 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32>
    %87 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %88 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32>
    %89 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %90 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %91 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536xf32>) : !torch.vtensor<[1536],f32>
    %92 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536x384xf32>) : !torch.vtensor<[1536,384],f32>
    %93 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %94 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x1536xf32>) : !torch.vtensor<[384,1536],f32>
    %95 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %96 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %97 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %98 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32>
    %99 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32>
    %100 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %101 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32>
    %102 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %103 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32>
    %104 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %105 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %106 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536xf32>) : !torch.vtensor<[1536],f32>
    %107 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536x384xf32>) : !torch.vtensor<[1536,384],f32>
    %108 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %109 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x1536xf32>) : !torch.vtensor<[384,1536],f32>
    %110 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %111 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %112 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %113 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32>
    %114 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32>
    %115 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %116 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32>
    %117 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %118 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32>
    %119 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %120 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %121 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536xf32>) : !torch.vtensor<[1536],f32>
    %122 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536x384xf32>) : !torch.vtensor<[1536,384],f32>
    %123 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %124 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x1536xf32>) : !torch.vtensor<[384,1536],f32>
    %125 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %126 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %127 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %128 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32>
    %129 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32>
    %130 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %131 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32>
    %132 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %133 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32>
    %134 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %135 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %136 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536xf32>) : !torch.vtensor<[1536],f32>
    %137 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536x384xf32>) : !torch.vtensor<[1536,384],f32>
    %138 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %139 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x1536xf32>) : !torch.vtensor<[384,1536],f32>
    %140 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %141 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %142 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %143 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32>
    %144 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32>
    %145 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %146 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32>
    %147 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %148 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32>
    %149 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %150 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %151 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536xf32>) : !torch.vtensor<[1536],f32>
    %152 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536x384xf32>) : !torch.vtensor<[1536,384],f32>
    %153 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %154 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x1536xf32>) : !torch.vtensor<[384,1536],f32>
    %155 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %156 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %157 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %158 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32>
    %159 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32>
    %160 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %161 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32>
    %162 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %163 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32>
    %164 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %165 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %166 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536xf32>) : !torch.vtensor<[1536],f32>
    %167 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536x384xf32>) : !torch.vtensor<[1536,384],f32>
    %168 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %169 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x1536xf32>) : !torch.vtensor<[384,1536],f32>
    %170 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %171 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %172 = torch.vtensor.literal(dense<5.6568542494923806> : tensor<f64>) : !torch.vtensor<[],f64>
    %173 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %174 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32>
    %175 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32>
    %176 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %177 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32>
    %178 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32>
    %179 = torch.vtensor.literal(dense<0.000000e+00> : tensor<1536xf32>) : !torch.vtensor<[1536],f32>
    %180 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<1536x384xf32>) : !torch.vtensor<[1536,384],f32>
    %181 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x1536xf32>) : !torch.vtensor<[384,1536],f32>
    %182 = torch.vtensor.literal(dense<0.455810547> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %183 = torch.vtensor.literal(dense<0.000000e+00> : tensor<384xf32>) : !torch.vtensor<[384],f32>
    %184 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<384x384xf32>) : !torch.vtensor<[384,384],f32>
    %185 = torch.vtensor.literal(dense<0.000000e+00> : tensor<2xf32>) : !torch.vtensor<[2],f32>
    %186 = torch.vtensor.literal(opaque<"elided_large_const", "0xDEADBEEF"> : tensor<2x384xf32>) : !torch.vtensor<[2,384],f32>
    %187 = torch.aten.slice.Tensor %arg1, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[1,128],si32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,128],si32>
    %188 = torch.aten.unsqueeze %187, %int1 : !torch.vtensor<[1,128],si32>, !torch.int -> !torch.vtensor<[1,1,128],si32>
    %189 = torch.aten.unsqueeze %188, %int2 : !torch.vtensor<[1,1,128],si32>, !torch.int -> !torch.vtensor<[1,1,1,128],si32>
    %190 = torch.aten.slice.Tensor %189, %int3, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[1,1,1,128],si32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,1,1,128],si32>
    %191 = torch.aten.to.dtype %190, %int6, %false, %false, %none : !torch.vtensor<[1,1,1,128],si32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[1,1,1,128],f32>
    %192 = torch.aten.rsub.Scalar %191, %float1.000000e00, %int1 : !torch.vtensor<[1,1,1,128],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,1,1,128],f32>
    %193 = torch.aten.mul.Tensor %192, %0 : !torch.vtensor<[1,1,1,128],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,1,1,128],f32>
    %194 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64>
    %195 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64>
    %196 = torch.aten.slice.Tensor %1, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[1,512],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,512],si64>
    %197 = torch.aten.slice.Tensor %196, %int1, %int0, %int128, %int1 : !torch.vtensor<[1,512],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?],si64>
    %198 = torch.aten.embedding %2, %arg0, %int0, %false, %false : !torch.vtensor<[30522,384],f32>, !torch.vtensor<[1,128],si32>, !torch.int, !torch.bool, !torch.bool -> !torch.vtensor<[1,128,384],f32>
    %199 = torch.aten.embedding %3, %arg2, %int-1, %false, %false : !torch.vtensor<[2,384],f32>, !torch.vtensor<[1,128],si32>, !torch.int, !torch.bool, !torch.bool -> !torch.vtensor<[1,128,384],f32>
    %200 = torch.aten.add.Tensor %198, %199, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32>
    %201 = torch.aten.embedding %4, %197, %int-1, %false, %false : !torch.vtensor<[512,384],f32>, !torch.vtensor<[1,?],si64>, !torch.int, !torch.bool, !torch.bool -> !torch.vtensor<[1,?,384],f32>
    %202 = torch.aten.add.Tensor %200, %201, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,?,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32>
    %203 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int>
    %result0, %result1, %result2 = torch.aten.native_layer_norm %202, %203, %6, %5, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32>
    %204 = torch.aten.linear %result0, %8, %7 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %205 = torch.aten.linear %result0, %9, %183 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %206 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64>
    %207 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64>
    %208 = torch.prim.ListConstruct %int1, %int128, %int12, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %209 = torch.aten.view %205, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32>
    %210 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %211 = torch.aten.permute %209, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32>
    %212 = torch.aten.linear %result0, %11, %10 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %213 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64>
    %214 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64>
    %215 = torch.aten.view %212, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32>
    %216 = torch.aten.permute %215, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32>
    %217 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64>
    %218 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64>
    %219 = torch.aten.view %204, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32>
    %220 = torch.aten.permute %219, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32>
    %221 = torch.aten.transpose.int %211, %int-1, %int-2 : !torch.vtensor<[1,12,128,32],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,32,128],f32>
    %222 = torch.aten.matmul %220, %221 : !torch.vtensor<[1,12,128,32],f32>, !torch.vtensor<[1,12,32,128],f32> -> !torch.vtensor<[1,12,128,128],f32>
    %223 = torch.aten.div.Tensor %222, %172 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,12,128,128],f32>
    %224 = torch.aten.add.Tensor %223, %193, %int1 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,1,1,128],f32>, !torch.int -> !torch.vtensor<[1,12,128,128],f32>
    %values, %indices = torch.aten.max.dim %224, %int-1, %true : !torch.vtensor<[1,12,128,128],f32>, !torch.int, !torch.bool -> !torch.vtensor<[1,12,128,1],f32>, !torch.vtensor<[1,12,128,1],si64>
    %225 = torch.aten.sub.Tensor %224, %values, %float1.000000e00 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32>, !torch.float -> !torch.vtensor<[1,12,128,128],f32>
    %226 = torch.aten.exp %225 : !torch.vtensor<[1,12,128,128],f32> -> !torch.vtensor<[1,12,128,128],f32>
    %227 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %228 = torch.aten.sum.dim_IntList %226, %227, %true, %none : !torch.vtensor<[1,12,128,128],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,12,128,1],f32>
    %229 = torch.aten.div.Tensor %226, %228 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32> -> !torch.vtensor<[1,12,128,128],f32>
    %230 = torch.aten.matmul %229, %216 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,32],f32> -> !torch.vtensor<[1,12,128,32],f32>
    %231 = torch.aten.permute %230, %210 : !torch.vtensor<[1,12,128,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32>
    %232 = torch.aten.contiguous %231, %int0 : !torch.vtensor<[1,128,12,32],f32>, !torch.int -> !torch.vtensor<[1,128,12,32],f32>
    %233 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64>
    %234 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64>
    %235 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %236 = torch.aten.view %232, %235 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,384],f32>
    %237 = torch.aten.linear %236, %13, %12 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %238 = torch.aten.add.Tensor %237, %result0, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32>
    %result0_0, %result1_1, %result2_2 = torch.aten.native_layer_norm %238, %203, %15, %14, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32>
    %239 = torch.aten.linear %result0_0, %17, %16 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1536,384],f32>, !torch.vtensor<[1536],f32> -> !torch.vtensor<[1,128,1536],f32>
    %240 = torch.aten.gelu %239, %str : !torch.vtensor<[1,128,1536],f32>, !torch.str -> !torch.vtensor<[1,128,1536],f32>
    %241 = torch.aten.linear %240, %19, %18 : !torch.vtensor<[1,128,1536],f32>, !torch.vtensor<[384,1536],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %242 = torch.aten.add.Tensor %241, %result0_0, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32>
    %result0_3, %result1_4, %result2_5 = torch.aten.native_layer_norm %242, %203, %21, %20, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32>
    %243 = torch.aten.linear %result0_3, %23, %22 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %244 = torch.aten.linear %result0_3, %24, %183 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %245 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64>
    %246 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64>
    %247 = torch.aten.view %244, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32>
    %248 = torch.aten.permute %247, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32>
    %249 = torch.aten.linear %result0_3, %26, %25 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %250 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64>
    %251 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64>
    %252 = torch.aten.view %249, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32>
    %253 = torch.aten.permute %252, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32>
    %254 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64>
    %255 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64>
    %256 = torch.aten.view %243, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32>
    %257 = torch.aten.permute %256, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32>
    %258 = torch.aten.transpose.int %248, %int-1, %int-2 : !torch.vtensor<[1,12,128,32],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,32,128],f32>
    %259 = torch.aten.matmul %257, %258 : !torch.vtensor<[1,12,128,32],f32>, !torch.vtensor<[1,12,32,128],f32> -> !torch.vtensor<[1,12,128,128],f32>
    %260 = torch.aten.div.Tensor %259, %172 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,12,128,128],f32>
    %261 = torch.aten.add.Tensor %260, %193, %int1 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,1,1,128],f32>, !torch.int -> !torch.vtensor<[1,12,128,128],f32>
    %values_6, %indices_7 = torch.aten.max.dim %261, %int-1, %true : !torch.vtensor<[1,12,128,128],f32>, !torch.int, !torch.bool -> !torch.vtensor<[1,12,128,1],f32>, !torch.vtensor<[1,12,128,1],si64>
    %262 = torch.aten.sub.Tensor %261, %values_6, %float1.000000e00 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32>, !torch.float -> !torch.vtensor<[1,12,128,128],f32>
    %263 = torch.aten.exp %262 : !torch.vtensor<[1,12,128,128],f32> -> !torch.vtensor<[1,12,128,128],f32>
    %264 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %265 = torch.aten.sum.dim_IntList %263, %264, %true, %none : !torch.vtensor<[1,12,128,128],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,12,128,1],f32>
    %266 = torch.aten.div.Tensor %263, %265 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32> -> !torch.vtensor<[1,12,128,128],f32>
    %267 = torch.aten.matmul %266, %253 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,32],f32> -> !torch.vtensor<[1,12,128,32],f32>
    %268 = torch.aten.permute %267, %210 : !torch.vtensor<[1,12,128,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32>
    %269 = torch.aten.contiguous %268, %int0 : !torch.vtensor<[1,128,12,32],f32>, !torch.int -> !torch.vtensor<[1,128,12,32],f32>
    %270 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64>
    %271 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64>
    %272 = torch.aten.view %269, %235 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,384],f32>
    %273 = torch.aten.linear %272, %28, %27 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %274 = torch.aten.add.Tensor %273, %result0_3, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32>
    %result0_8, %result1_9, %result2_10 = torch.aten.native_layer_norm %274, %203, %30, %29, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32>
    %275 = torch.aten.linear %result0_8, %32, %31 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1536,384],f32>, !torch.vtensor<[1536],f32> -> !torch.vtensor<[1,128,1536],f32>
    %276 = torch.aten.gelu %275, %str : !torch.vtensor<[1,128,1536],f32>, !torch.str -> !torch.vtensor<[1,128,1536],f32>
    %277 = torch.aten.linear %276, %34, %33 : !torch.vtensor<[1,128,1536],f32>, !torch.vtensor<[384,1536],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %278 = torch.aten.add.Tensor %277, %result0_8, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32>
    %result0_11, %result1_12, %result2_13 = torch.aten.native_layer_norm %278, %203, %36, %35, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32>
    %279 = torch.aten.linear %result0_11, %38, %37 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %280 = torch.aten.linear %result0_11, %39, %183 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %281 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64>
    %282 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64>
    %283 = torch.aten.view %280, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32>
    %284 = torch.aten.permute %283, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32>
    %285 = torch.aten.linear %result0_11, %41, %40 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %286 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64>
    %287 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64>
    %288 = torch.aten.view %285, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32>
    %289 = torch.aten.permute %288, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32>
    %290 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64>
    %291 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64>
    %292 = torch.aten.view %279, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32>
    %293 = torch.aten.permute %292, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32>
    %294 = torch.aten.transpose.int %284, %int-1, %int-2 : !torch.vtensor<[1,12,128,32],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,32,128],f32>
    %295 = torch.aten.matmul %293, %294 : !torch.vtensor<[1,12,128,32],f32>, !torch.vtensor<[1,12,32,128],f32> -> !torch.vtensor<[1,12,128,128],f32>
    %296 = torch.aten.div.Tensor %295, %172 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,12,128,128],f32>
    %297 = torch.aten.add.Tensor %296, %193, %int1 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,1,1,128],f32>, !torch.int -> !torch.vtensor<[1,12,128,128],f32>
    %values_14, %indices_15 = torch.aten.max.dim %297, %int-1, %true : !torch.vtensor<[1,12,128,128],f32>, !torch.int, !torch.bool -> !torch.vtensor<[1,12,128,1],f32>, !torch.vtensor<[1,12,128,1],si64>
    %298 = torch.aten.sub.Tensor %297, %values_14, %float1.000000e00 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32>, !torch.float -> !torch.vtensor<[1,12,128,128],f32>
    %299 = torch.aten.exp %298 : !torch.vtensor<[1,12,128,128],f32> -> !torch.vtensor<[1,12,128,128],f32>
    %300 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %301 = torch.aten.sum.dim_IntList %299, %300, %true, %none : !torch.vtensor<[1,12,128,128],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,12,128,1],f32>
    %302 = torch.aten.div.Tensor %299, %301 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32> -> !torch.vtensor<[1,12,128,128],f32>
    %303 = torch.aten.matmul %302, %289 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,32],f32> -> !torch.vtensor<[1,12,128,32],f32>
    %304 = torch.aten.permute %303, %210 : !torch.vtensor<[1,12,128,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32>
    %305 = torch.aten.contiguous %304, %int0 : !torch.vtensor<[1,128,12,32],f32>, !torch.int -> !torch.vtensor<[1,128,12,32],f32>
    %306 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64>
    %307 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64>
    %308 = torch.aten.view %305, %235 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,384],f32>
    %309 = torch.aten.linear %308, %43, %42 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %310 = torch.aten.add.Tensor %309, %result0_11, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32>
    %result0_16, %result1_17, %result2_18 = torch.aten.native_layer_norm %310, %203, %45, %44, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32>
    %311 = torch.aten.linear %result0_16, %47, %46 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1536,384],f32>, !torch.vtensor<[1536],f32> -> !torch.vtensor<[1,128,1536],f32>
    %312 = torch.aten.gelu %311, %str : !torch.vtensor<[1,128,1536],f32>, !torch.str -> !torch.vtensor<[1,128,1536],f32>
    %313 = torch.aten.linear %312, %49, %48 : !torch.vtensor<[1,128,1536],f32>, !torch.vtensor<[384,1536],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %314 = torch.aten.add.Tensor %313, %result0_16, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32>
    %result0_19, %result1_20, %result2_21 = torch.aten.native_layer_norm %314, %203, %51, %50, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32>
    %315 = torch.aten.linear %result0_19, %53, %52 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %316 = torch.aten.linear %result0_19, %54, %183 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %317 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64>
    %318 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64>
    %319 = torch.aten.view %316, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32>
    %320 = torch.aten.permute %319, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32>
    %321 = torch.aten.linear %result0_19, %56, %55 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %322 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64>
    %323 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64>
    %324 = torch.aten.view %321, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32>
    %325 = torch.aten.permute %324, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32>
    %326 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64>
    %327 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64>
    %328 = torch.aten.view %315, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32>
    %329 = torch.aten.permute %328, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32>
    %330 = torch.aten.transpose.int %320, %int-1, %int-2 : !torch.vtensor<[1,12,128,32],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,32,128],f32>
    %331 = torch.aten.matmul %329, %330 : !torch.vtensor<[1,12,128,32],f32>, !torch.vtensor<[1,12,32,128],f32> -> !torch.vtensor<[1,12,128,128],f32>
    %332 = torch.aten.div.Tensor %331, %172 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,12,128,128],f32>
    %333 = torch.aten.add.Tensor %332, %193, %int1 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,1,1,128],f32>, !torch.int -> !torch.vtensor<[1,12,128,128],f32>
    %values_22, %indices_23 = torch.aten.max.dim %333, %int-1, %true : !torch.vtensor<[1,12,128,128],f32>, !torch.int, !torch.bool -> !torch.vtensor<[1,12,128,1],f32>, !torch.vtensor<[1,12,128,1],si64>
    %334 = torch.aten.sub.Tensor %333, %values_22, %float1.000000e00 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32>, !torch.float -> !torch.vtensor<[1,12,128,128],f32>
    %335 = torch.aten.exp %334 : !torch.vtensor<[1,12,128,128],f32> -> !torch.vtensor<[1,12,128,128],f32>
    %336 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %337 = torch.aten.sum.dim_IntList %335, %336, %true, %none : !torch.vtensor<[1,12,128,128],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,12,128,1],f32>
    %338 = torch.aten.div.Tensor %335, %337 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32> -> !torch.vtensor<[1,12,128,128],f32>
    %339 = torch.aten.matmul %338, %325 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,32],f32> -> !torch.vtensor<[1,12,128,32],f32>
    %340 = torch.aten.permute %339, %210 : !torch.vtensor<[1,12,128,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32>
    %341 = torch.aten.contiguous %340, %int0 : !torch.vtensor<[1,128,12,32],f32>, !torch.int -> !torch.vtensor<[1,128,12,32],f32>
    %342 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64>
    %343 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64>
    %344 = torch.aten.view %341, %235 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,384],f32>
    %345 = torch.aten.linear %344, %58, %57 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %346 = torch.aten.add.Tensor %345, %result0_19, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32>
    %result0_24, %result1_25, %result2_26 = torch.aten.native_layer_norm %346, %203, %60, %59, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32>
    %347 = torch.aten.linear %result0_24, %62, %61 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1536,384],f32>, !torch.vtensor<[1536],f32> -> !torch.vtensor<[1,128,1536],f32>
    %348 = torch.aten.gelu %347, %str : !torch.vtensor<[1,128,1536],f32>, !torch.str -> !torch.vtensor<[1,128,1536],f32>
    %349 = torch.aten.linear %348, %64, %63 : !torch.vtensor<[1,128,1536],f32>, !torch.vtensor<[384,1536],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %350 = torch.aten.add.Tensor %349, %result0_24, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32>
    %result0_27, %result1_28, %result2_29 = torch.aten.native_layer_norm %350, %203, %66, %65, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32>
    %351 = torch.aten.linear %result0_27, %68, %67 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %352 = torch.aten.linear %result0_27, %69, %183 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %353 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64>
    %354 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64>
    %355 = torch.aten.view %352, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32>
    %356 = torch.aten.permute %355, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32>
    %357 = torch.aten.linear %result0_27, %71, %70 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %358 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64>
    %359 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64>
    %360 = torch.aten.view %357, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32>
    %361 = torch.aten.permute %360, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32>
    %362 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64>
    %363 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64>
    %364 = torch.aten.view %351, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32>
    %365 = torch.aten.permute %364, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32>
    %366 = torch.aten.transpose.int %356, %int-1, %int-2 : !torch.vtensor<[1,12,128,32],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,32,128],f32>
    %367 = torch.aten.matmul %365, %366 : !torch.vtensor<[1,12,128,32],f32>, !torch.vtensor<[1,12,32,128],f32> -> !torch.vtensor<[1,12,128,128],f32>
    %368 = torch.aten.div.Tensor %367, %172 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,12,128,128],f32>
    %369 = torch.aten.add.Tensor %368, %193, %int1 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,1,1,128],f32>, !torch.int -> !torch.vtensor<[1,12,128,128],f32>
    %values_30, %indices_31 = torch.aten.max.dim %369, %int-1, %true : !torch.vtensor<[1,12,128,128],f32>, !torch.int, !torch.bool -> !torch.vtensor<[1,12,128,1],f32>, !torch.vtensor<[1,12,128,1],si64>
    %370 = torch.aten.sub.Tensor %369, %values_30, %float1.000000e00 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32>, !torch.float -> !torch.vtensor<[1,12,128,128],f32>
    %371 = torch.aten.exp %370 : !torch.vtensor<[1,12,128,128],f32> -> !torch.vtensor<[1,12,128,128],f32>
    %372 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %373 = torch.aten.sum.dim_IntList %371, %372, %true, %none : !torch.vtensor<[1,12,128,128],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,12,128,1],f32>
    %374 = torch.aten.div.Tensor %371, %373 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32> -> !torch.vtensor<[1,12,128,128],f32>
    %375 = torch.aten.matmul %374, %361 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,32],f32> -> !torch.vtensor<[1,12,128,32],f32>
    %376 = torch.aten.permute %375, %210 : !torch.vtensor<[1,12,128,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32>
    %377 = torch.aten.contiguous %376, %int0 : !torch.vtensor<[1,128,12,32],f32>, !torch.int -> !torch.vtensor<[1,128,12,32],f32>
    %378 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64>
    %379 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64>
    %380 = torch.aten.view %377, %235 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,384],f32>
    %381 = torch.aten.linear %380, %73, %72 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %382 = torch.aten.add.Tensor %381, %result0_27, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32>
    %result0_32, %result1_33, %result2_34 = torch.aten.native_layer_norm %382, %203, %75, %74, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32>
    %383 = torch.aten.linear %result0_32, %77, %76 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1536,384],f32>, !torch.vtensor<[1536],f32> -> !torch.vtensor<[1,128,1536],f32>
    %384 = torch.aten.gelu %383, %str : !torch.vtensor<[1,128,1536],f32>, !torch.str -> !torch.vtensor<[1,128,1536],f32>
    %385 = torch.aten.linear %384, %79, %78 : !torch.vtensor<[1,128,1536],f32>, !torch.vtensor<[384,1536],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %386 = torch.aten.add.Tensor %385, %result0_32, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32>
    %result0_35, %result1_36, %result2_37 = torch.aten.native_layer_norm %386, %203, %81, %80, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32>
    %387 = torch.aten.linear %result0_35, %83, %82 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %388 = torch.aten.linear %result0_35, %84, %183 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %389 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64>
    %390 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64>
    %391 = torch.aten.view %388, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32>
    %392 = torch.aten.permute %391, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32>
    %393 = torch.aten.linear %result0_35, %86, %85 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %394 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64>
    %395 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64>
    %396 = torch.aten.view %393, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32>
    %397 = torch.aten.permute %396, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32>
    %398 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64>
    %399 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64>
    %400 = torch.aten.view %387, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32>
    %401 = torch.aten.permute %400, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32>
    %402 = torch.aten.transpose.int %392, %int-1, %int-2 : !torch.vtensor<[1,12,128,32],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,32,128],f32>
    %403 = torch.aten.matmul %401, %402 : !torch.vtensor<[1,12,128,32],f32>, !torch.vtensor<[1,12,32,128],f32> -> !torch.vtensor<[1,12,128,128],f32>
    %404 = torch.aten.div.Tensor %403, %172 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,12,128,128],f32>
    %405 = torch.aten.add.Tensor %404, %193, %int1 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,1,1,128],f32>, !torch.int -> !torch.vtensor<[1,12,128,128],f32>
    %values_38, %indices_39 = torch.aten.max.dim %405, %int-1, %true : !torch.vtensor<[1,12,128,128],f32>, !torch.int, !torch.bool -> !torch.vtensor<[1,12,128,1],f32>, !torch.vtensor<[1,12,128,1],si64>
    %406 = torch.aten.sub.Tensor %405, %values_38, %float1.000000e00 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32>, !torch.float -> !torch.vtensor<[1,12,128,128],f32>
    %407 = torch.aten.exp %406 : !torch.vtensor<[1,12,128,128],f32> -> !torch.vtensor<[1,12,128,128],f32>
    %408 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %409 = torch.aten.sum.dim_IntList %407, %408, %true, %none : !torch.vtensor<[1,12,128,128],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,12,128,1],f32>
    %410 = torch.aten.div.Tensor %407, %409 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32> -> !torch.vtensor<[1,12,128,128],f32>
    %411 = torch.aten.matmul %410, %397 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,32],f32> -> !torch.vtensor<[1,12,128,32],f32>
    %412 = torch.aten.permute %411, %210 : !torch.vtensor<[1,12,128,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32>
    %413 = torch.aten.contiguous %412, %int0 : !torch.vtensor<[1,128,12,32],f32>, !torch.int -> !torch.vtensor<[1,128,12,32],f32>
    %414 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64>
    %415 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64>
    %416 = torch.aten.view %413, %235 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,384],f32>
    %417 = torch.aten.linear %416, %88, %87 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %418 = torch.aten.add.Tensor %417, %result0_35, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32>
    %result0_40, %result1_41, %result2_42 = torch.aten.native_layer_norm %418, %203, %90, %89, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32>
    %419 = torch.aten.linear %result0_40, %92, %91 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1536,384],f32>, !torch.vtensor<[1536],f32> -> !torch.vtensor<[1,128,1536],f32>
    %420 = torch.aten.gelu %419, %str : !torch.vtensor<[1,128,1536],f32>, !torch.str -> !torch.vtensor<[1,128,1536],f32>
    %421 = torch.aten.linear %420, %94, %93 : !torch.vtensor<[1,128,1536],f32>, !torch.vtensor<[384,1536],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %422 = torch.aten.add.Tensor %421, %result0_40, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32>
    %result0_43, %result1_44, %result2_45 = torch.aten.native_layer_norm %422, %203, %96, %95, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32>
    %423 = torch.aten.linear %result0_43, %98, %97 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %424 = torch.aten.linear %result0_43, %99, %183 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %425 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64>
    %426 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64>
    %427 = torch.aten.view %424, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32>
    %428 = torch.aten.permute %427, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32>
    %429 = torch.aten.linear %result0_43, %101, %100 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %430 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64>
    %431 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64>
    %432 = torch.aten.view %429, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32>
    %433 = torch.aten.permute %432, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32>
    %434 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64>
    %435 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64>
    %436 = torch.aten.view %423, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32>
    %437 = torch.aten.permute %436, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32>
    %438 = torch.aten.transpose.int %428, %int-1, %int-2 : !torch.vtensor<[1,12,128,32],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,32,128],f32>
    %439 = torch.aten.matmul %437, %438 : !torch.vtensor<[1,12,128,32],f32>, !torch.vtensor<[1,12,32,128],f32> -> !torch.vtensor<[1,12,128,128],f32>
    %440 = torch.aten.div.Tensor %439, %172 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,12,128,128],f32>
    %441 = torch.aten.add.Tensor %440, %193, %int1 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,1,1,128],f32>, !torch.int -> !torch.vtensor<[1,12,128,128],f32>
    %values_46, %indices_47 = torch.aten.max.dim %441, %int-1, %true : !torch.vtensor<[1,12,128,128],f32>, !torch.int, !torch.bool -> !torch.vtensor<[1,12,128,1],f32>, !torch.vtensor<[1,12,128,1],si64>
    %442 = torch.aten.sub.Tensor %441, %values_46, %float1.000000e00 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32>, !torch.float -> !torch.vtensor<[1,12,128,128],f32>
    %443 = torch.aten.exp %442 : !torch.vtensor<[1,12,128,128],f32> -> !torch.vtensor<[1,12,128,128],f32>
    %444 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %445 = torch.aten.sum.dim_IntList %443, %444, %true, %none : !torch.vtensor<[1,12,128,128],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,12,128,1],f32>
    %446 = torch.aten.div.Tensor %443, %445 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32> -> !torch.vtensor<[1,12,128,128],f32>
    %447 = torch.aten.matmul %446, %433 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,32],f32> -> !torch.vtensor<[1,12,128,32],f32>
    %448 = torch.aten.permute %447, %210 : !torch.vtensor<[1,12,128,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32>
    %449 = torch.aten.contiguous %448, %int0 : !torch.vtensor<[1,128,12,32],f32>, !torch.int -> !torch.vtensor<[1,128,12,32],f32>
    %450 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64>
    %451 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64>
    %452 = torch.aten.view %449, %235 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,384],f32>
    %453 = torch.aten.linear %452, %103, %102 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %454 = torch.aten.add.Tensor %453, %result0_43, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32>
    %result0_48, %result1_49, %result2_50 = torch.aten.native_layer_norm %454, %203, %105, %104, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32>
    %455 = torch.aten.linear %result0_48, %107, %106 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1536,384],f32>, !torch.vtensor<[1536],f32> -> !torch.vtensor<[1,128,1536],f32>
    %456 = torch.aten.gelu %455, %str : !torch.vtensor<[1,128,1536],f32>, !torch.str -> !torch.vtensor<[1,128,1536],f32>
    %457 = torch.aten.linear %456, %109, %108 : !torch.vtensor<[1,128,1536],f32>, !torch.vtensor<[384,1536],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %458 = torch.aten.add.Tensor %457, %result0_48, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32>
    %result0_51, %result1_52, %result2_53 = torch.aten.native_layer_norm %458, %203, %111, %110, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32>
    %459 = torch.aten.linear %result0_51, %113, %112 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %460 = torch.aten.linear %result0_51, %114, %183 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %461 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64>
    %462 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64>
    %463 = torch.aten.view %460, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32>
    %464 = torch.aten.permute %463, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32>
    %465 = torch.aten.linear %result0_51, %116, %115 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %466 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64>
    %467 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64>
    %468 = torch.aten.view %465, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32>
    %469 = torch.aten.permute %468, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32>
    %470 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64>
    %471 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64>
    %472 = torch.aten.view %459, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32>
    %473 = torch.aten.permute %472, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32>
    %474 = torch.aten.transpose.int %464, %int-1, %int-2 : !torch.vtensor<[1,12,128,32],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,32,128],f32>
    %475 = torch.aten.matmul %473, %474 : !torch.vtensor<[1,12,128,32],f32>, !torch.vtensor<[1,12,32,128],f32> -> !torch.vtensor<[1,12,128,128],f32>
    %476 = torch.aten.div.Tensor %475, %172 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,12,128,128],f32>
    %477 = torch.aten.add.Tensor %476, %193, %int1 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,1,1,128],f32>, !torch.int -> !torch.vtensor<[1,12,128,128],f32>
    %values_54, %indices_55 = torch.aten.max.dim %477, %int-1, %true : !torch.vtensor<[1,12,128,128],f32>, !torch.int, !torch.bool -> !torch.vtensor<[1,12,128,1],f32>, !torch.vtensor<[1,12,128,1],si64>
    %478 = torch.aten.sub.Tensor %477, %values_54, %float1.000000e00 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32>, !torch.float -> !torch.vtensor<[1,12,128,128],f32>
    %479 = torch.aten.exp %478 : !torch.vtensor<[1,12,128,128],f32> -> !torch.vtensor<[1,12,128,128],f32>
    %480 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %481 = torch.aten.sum.dim_IntList %479, %480, %true, %none : !torch.vtensor<[1,12,128,128],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,12,128,1],f32>
    %482 = torch.aten.div.Tensor %479, %481 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32> -> !torch.vtensor<[1,12,128,128],f32>
    %483 = torch.aten.matmul %482, %469 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,32],f32> -> !torch.vtensor<[1,12,128,32],f32>
    %484 = torch.aten.permute %483, %210 : !torch.vtensor<[1,12,128,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32>
    %485 = torch.aten.contiguous %484, %int0 : !torch.vtensor<[1,128,12,32],f32>, !torch.int -> !torch.vtensor<[1,128,12,32],f32>
    %486 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64>
    %487 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64>
    %488 = torch.aten.view %485, %235 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,384],f32>
    %489 = torch.aten.linear %488, %118, %117 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %490 = torch.aten.add.Tensor %489, %result0_51, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32>
    %result0_56, %result1_57, %result2_58 = torch.aten.native_layer_norm %490, %203, %120, %119, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32>
    %491 = torch.aten.linear %result0_56, %122, %121 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1536,384],f32>, !torch.vtensor<[1536],f32> -> !torch.vtensor<[1,128,1536],f32>
    %492 = torch.aten.gelu %491, %str : !torch.vtensor<[1,128,1536],f32>, !torch.str -> !torch.vtensor<[1,128,1536],f32>
    %493 = torch.aten.linear %492, %124, %123 : !torch.vtensor<[1,128,1536],f32>, !torch.vtensor<[384,1536],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %494 = torch.aten.add.Tensor %493, %result0_56, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32>
    %result0_59, %result1_60, %result2_61 = torch.aten.native_layer_norm %494, %203, %126, %125, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32>
    %495 = torch.aten.linear %result0_59, %128, %127 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %496 = torch.aten.linear %result0_59, %129, %183 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %497 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64>
    %498 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64>
    %499 = torch.aten.view %496, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32>
    %500 = torch.aten.permute %499, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32>
    %501 = torch.aten.linear %result0_59, %131, %130 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %502 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64>
    %503 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64>
    %504 = torch.aten.view %501, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32>
    %505 = torch.aten.permute %504, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32>
    %506 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64>
    %507 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64>
    %508 = torch.aten.view %495, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32>
    %509 = torch.aten.permute %508, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32>
    %510 = torch.aten.transpose.int %500, %int-1, %int-2 : !torch.vtensor<[1,12,128,32],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,32,128],f32>
    %511 = torch.aten.matmul %509, %510 : !torch.vtensor<[1,12,128,32],f32>, !torch.vtensor<[1,12,32,128],f32> -> !torch.vtensor<[1,12,128,128],f32>
    %512 = torch.aten.div.Tensor %511, %172 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,12,128,128],f32>
    %513 = torch.aten.add.Tensor %512, %193, %int1 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,1,1,128],f32>, !torch.int -> !torch.vtensor<[1,12,128,128],f32>
    %values_62, %indices_63 = torch.aten.max.dim %513, %int-1, %true : !torch.vtensor<[1,12,128,128],f32>, !torch.int, !torch.bool -> !torch.vtensor<[1,12,128,1],f32>, !torch.vtensor<[1,12,128,1],si64>
    %514 = torch.aten.sub.Tensor %513, %values_62, %float1.000000e00 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32>, !torch.float -> !torch.vtensor<[1,12,128,128],f32>
    %515 = torch.aten.exp %514 : !torch.vtensor<[1,12,128,128],f32> -> !torch.vtensor<[1,12,128,128],f32>
    %516 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %517 = torch.aten.sum.dim_IntList %515, %516, %true, %none : !torch.vtensor<[1,12,128,128],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,12,128,1],f32>
    %518 = torch.aten.div.Tensor %515, %517 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32> -> !torch.vtensor<[1,12,128,128],f32>
    %519 = torch.aten.matmul %518, %505 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,32],f32> -> !torch.vtensor<[1,12,128,32],f32>
    %520 = torch.aten.permute %519, %210 : !torch.vtensor<[1,12,128,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32>
    %521 = torch.aten.contiguous %520, %int0 : !torch.vtensor<[1,128,12,32],f32>, !torch.int -> !torch.vtensor<[1,128,12,32],f32>
    %522 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64>
    %523 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64>
    %524 = torch.aten.view %521, %235 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,384],f32>
    %525 = torch.aten.linear %524, %133, %132 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %526 = torch.aten.add.Tensor %525, %result0_59, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32>
    %result0_64, %result1_65, %result2_66 = torch.aten.native_layer_norm %526, %203, %135, %134, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32>
    %527 = torch.aten.linear %result0_64, %137, %136 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1536,384],f32>, !torch.vtensor<[1536],f32> -> !torch.vtensor<[1,128,1536],f32>
    %528 = torch.aten.gelu %527, %str : !torch.vtensor<[1,128,1536],f32>, !torch.str -> !torch.vtensor<[1,128,1536],f32>
    %529 = torch.aten.linear %528, %139, %138 : !torch.vtensor<[1,128,1536],f32>, !torch.vtensor<[384,1536],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %530 = torch.aten.add.Tensor %529, %result0_64, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32>
    %result0_67, %result1_68, %result2_69 = torch.aten.native_layer_norm %530, %203, %141, %140, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32>
    %531 = torch.aten.linear %result0_67, %143, %142 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %532 = torch.aten.linear %result0_67, %144, %183 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %533 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64>
    %534 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64>
    %535 = torch.aten.view %532, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32>
    %536 = torch.aten.permute %535, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32>
    %537 = torch.aten.linear %result0_67, %146, %145 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %538 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64>
    %539 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64>
    %540 = torch.aten.view %537, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32>
    %541 = torch.aten.permute %540, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32>
    %542 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64>
    %543 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64>
    %544 = torch.aten.view %531, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32>
    %545 = torch.aten.permute %544, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32>
    %546 = torch.aten.transpose.int %536, %int-1, %int-2 : !torch.vtensor<[1,12,128,32],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,32,128],f32>
    %547 = torch.aten.matmul %545, %546 : !torch.vtensor<[1,12,128,32],f32>, !torch.vtensor<[1,12,32,128],f32> -> !torch.vtensor<[1,12,128,128],f32>
    %548 = torch.aten.div.Tensor %547, %172 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,12,128,128],f32>
    %549 = torch.aten.add.Tensor %548, %193, %int1 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,1,1,128],f32>, !torch.int -> !torch.vtensor<[1,12,128,128],f32>
    %values_70, %indices_71 = torch.aten.max.dim %549, %int-1, %true : !torch.vtensor<[1,12,128,128],f32>, !torch.int, !torch.bool -> !torch.vtensor<[1,12,128,1],f32>, !torch.vtensor<[1,12,128,1],si64>
    %550 = torch.aten.sub.Tensor %549, %values_70, %float1.000000e00 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32>, !torch.float -> !torch.vtensor<[1,12,128,128],f32>
    %551 = torch.aten.exp %550 : !torch.vtensor<[1,12,128,128],f32> -> !torch.vtensor<[1,12,128,128],f32>
    %552 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %553 = torch.aten.sum.dim_IntList %551, %552, %true, %none : !torch.vtensor<[1,12,128,128],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,12,128,1],f32>
    %554 = torch.aten.div.Tensor %551, %553 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32> -> !torch.vtensor<[1,12,128,128],f32>
    %555 = torch.aten.matmul %554, %541 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,32],f32> -> !torch.vtensor<[1,12,128,32],f32>
    %556 = torch.aten.permute %555, %210 : !torch.vtensor<[1,12,128,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32>
    %557 = torch.aten.contiguous %556, %int0 : !torch.vtensor<[1,128,12,32],f32>, !torch.int -> !torch.vtensor<[1,128,12,32],f32>
    %558 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64>
    %559 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64>
    %560 = torch.aten.view %557, %235 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,384],f32>
    %561 = torch.aten.linear %560, %148, %147 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %562 = torch.aten.add.Tensor %561, %result0_67, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32>
    %result0_72, %result1_73, %result2_74 = torch.aten.native_layer_norm %562, %203, %150, %149, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32>
    %563 = torch.aten.linear %result0_72, %152, %151 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1536,384],f32>, !torch.vtensor<[1536],f32> -> !torch.vtensor<[1,128,1536],f32>
    %564 = torch.aten.gelu %563, %str : !torch.vtensor<[1,128,1536],f32>, !torch.str -> !torch.vtensor<[1,128,1536],f32>
    %565 = torch.aten.linear %564, %154, %153 : !torch.vtensor<[1,128,1536],f32>, !torch.vtensor<[384,1536],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %566 = torch.aten.add.Tensor %565, %result0_72, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32>
    %result0_75, %result1_76, %result2_77 = torch.aten.native_layer_norm %566, %203, %156, %155, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32>
    %567 = torch.aten.linear %result0_75, %158, %157 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %568 = torch.aten.linear %result0_75, %159, %183 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %569 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64>
    %570 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64>
    %571 = torch.aten.view %568, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32>
    %572 = torch.aten.permute %571, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32>
    %573 = torch.aten.linear %result0_75, %161, %160 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %574 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64>
    %575 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64>
    %576 = torch.aten.view %573, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32>
    %577 = torch.aten.permute %576, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32>
    %578 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64>
    %579 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64>
    %580 = torch.aten.view %567, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32>
    %581 = torch.aten.permute %580, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32>
    %582 = torch.aten.transpose.int %572, %int-1, %int-2 : !torch.vtensor<[1,12,128,32],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,32,128],f32>
    %583 = torch.aten.matmul %581, %582 : !torch.vtensor<[1,12,128,32],f32>, !torch.vtensor<[1,12,32,128],f32> -> !torch.vtensor<[1,12,128,128],f32>
    %584 = torch.aten.div.Tensor %583, %172 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,12,128,128],f32>
    %585 = torch.aten.add.Tensor %584, %193, %int1 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,1,1,128],f32>, !torch.int -> !torch.vtensor<[1,12,128,128],f32>
    %values_78, %indices_79 = torch.aten.max.dim %585, %int-1, %true : !torch.vtensor<[1,12,128,128],f32>, !torch.int, !torch.bool -> !torch.vtensor<[1,12,128,1],f32>, !torch.vtensor<[1,12,128,1],si64>
    %586 = torch.aten.sub.Tensor %585, %values_78, %float1.000000e00 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32>, !torch.float -> !torch.vtensor<[1,12,128,128],f32>
    %587 = torch.aten.exp %586 : !torch.vtensor<[1,12,128,128],f32> -> !torch.vtensor<[1,12,128,128],f32>
    %588 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %589 = torch.aten.sum.dim_IntList %587, %588, %true, %none : !torch.vtensor<[1,12,128,128],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,12,128,1],f32>
    %590 = torch.aten.div.Tensor %587, %589 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32> -> !torch.vtensor<[1,12,128,128],f32>
    %591 = torch.aten.matmul %590, %577 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,32],f32> -> !torch.vtensor<[1,12,128,32],f32>
    %592 = torch.aten.permute %591, %210 : !torch.vtensor<[1,12,128,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32>
    %593 = torch.aten.contiguous %592, %int0 : !torch.vtensor<[1,128,12,32],f32>, !torch.int -> !torch.vtensor<[1,128,12,32],f32>
    %594 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64>
    %595 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64>
    %596 = torch.aten.view %593, %235 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,384],f32>
    %597 = torch.aten.linear %596, %163, %162 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %598 = torch.aten.add.Tensor %597, %result0_75, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32>
    %result0_80, %result1_81, %result2_82 = torch.aten.native_layer_norm %598, %203, %165, %164, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32>
    %599 = torch.aten.linear %result0_80, %167, %166 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1536,384],f32>, !torch.vtensor<[1536],f32> -> !torch.vtensor<[1,128,1536],f32>
    %600 = torch.aten.gelu %599, %str : !torch.vtensor<[1,128,1536],f32>, !torch.str -> !torch.vtensor<[1,128,1536],f32>
    %601 = torch.aten.linear %600, %169, %168 : !torch.vtensor<[1,128,1536],f32>, !torch.vtensor<[384,1536],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %602 = torch.aten.add.Tensor %601, %result0_80, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32>
    %result0_83, %result1_84, %result2_85 = torch.aten.native_layer_norm %602, %203, %171, %170, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32>
    %603 = torch.aten.linear %result0_83, %174, %173 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %604 = torch.aten.linear %result0_83, %175, %183 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %605 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64>
    %606 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64>
    %607 = torch.aten.view %604, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32>
    %608 = torch.aten.permute %607, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32>
    %609 = torch.aten.linear %result0_83, %177, %176 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %610 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64>
    %611 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64>
    %612 = torch.aten.view %609, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32>
    %613 = torch.aten.permute %612, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32>
    %614 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64>
    %615 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64>
    %616 = torch.aten.view %603, %208 : !torch.vtensor<[1,128,384],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32>
    %617 = torch.aten.permute %616, %210 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,12,128,32],f32>
    %618 = torch.aten.transpose.int %608, %int-1, %int-2 : !torch.vtensor<[1,12,128,32],f32>, !torch.int, !torch.int -> !torch.vtensor<[1,12,32,128],f32>
    %619 = torch.aten.matmul %617, %618 : !torch.vtensor<[1,12,128,32],f32>, !torch.vtensor<[1,12,32,128],f32> -> !torch.vtensor<[1,12,128,128],f32>
    %620 = torch.aten.div.Tensor %619, %172 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[1,12,128,128],f32>
    %621 = torch.aten.add.Tensor %620, %193, %int1 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,1,1,128],f32>, !torch.int -> !torch.vtensor<[1,12,128,128],f32>
    %values_86, %indices_87 = torch.aten.max.dim %621, %int-1, %true : !torch.vtensor<[1,12,128,128],f32>, !torch.int, !torch.bool -> !torch.vtensor<[1,12,128,1],f32>, !torch.vtensor<[1,12,128,1],si64>
    %622 = torch.aten.sub.Tensor %621, %values_86, %float1.000000e00 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32>, !torch.float -> !torch.vtensor<[1,12,128,128],f32>
    %623 = torch.aten.exp %622 : !torch.vtensor<[1,12,128,128],f32> -> !torch.vtensor<[1,12,128,128],f32>
    %624 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %625 = torch.aten.sum.dim_IntList %623, %624, %true, %none : !torch.vtensor<[1,12,128,128],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,12,128,1],f32>
    %626 = torch.aten.div.Tensor %623, %625 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,1],f32> -> !torch.vtensor<[1,12,128,128],f32>
    %627 = torch.aten.matmul %626, %613 : !torch.vtensor<[1,12,128,128],f32>, !torch.vtensor<[1,12,128,32],f32> -> !torch.vtensor<[1,12,128,32],f32>
    %628 = torch.aten.permute %627, %210 : !torch.vtensor<[1,12,128,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,12,32],f32>
    %629 = torch.aten.contiguous %628, %int0 : !torch.vtensor<[1,128,12,32],f32>, !torch.int -> !torch.vtensor<[1,128,12,32],f32>
    %630 = torch.prim.NumToTensor.Scalar %int1 : !torch.int -> !torch.vtensor<[],si64>
    %631 = torch.prim.NumToTensor.Scalar %int128 : !torch.int -> !torch.vtensor<[],si64>
    %632 = torch.aten.view %629, %235 : !torch.vtensor<[1,128,12,32],f32>, !torch.list<int> -> !torch.vtensor<[1,128,384],f32>
    %633 = torch.aten.linear %632, %178, %183 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %634 = torch.aten.add.Tensor %633, %result0_83, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32>
    %result0_88, %result1_89, %result2_90 = torch.aten.native_layer_norm %634, %203, %182, %183, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32>
    %635 = torch.aten.linear %result0_88, %180, %179 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1536,384],f32>, !torch.vtensor<[1536],f32> -> !torch.vtensor<[1,128,1536],f32>
    %636 = torch.aten.gelu %635, %str : !torch.vtensor<[1,128,1536],f32>, !torch.str -> !torch.vtensor<[1,128,1536],f32>
    %637 = torch.aten.linear %636, %181, %183 : !torch.vtensor<[1,128,1536],f32>, !torch.vtensor<[384,1536],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,128,384],f32>
    %638 = torch.aten.add.Tensor %637, %result0_88, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,384],f32>, !torch.int -> !torch.vtensor<[1,128,384],f32>
    %result0_91, %result1_92, %result2_93 = torch.aten.native_layer_norm %638, %203, %182, %183, %float9.999990e-13 : !torch.vtensor<[1,128,384],f32>, !torch.list<int>, !torch.vtensor<[384],f32>, !torch.vtensor<[384],f32>, !torch.float -> !torch.vtensor<[1,128,384],f32>, !torch.vtensor<[1,128,1],f32>, !torch.vtensor<[1,128,1],f32>
    %639 = torch.aten.slice.Tensor %result0_91, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,128,384],f32>
    %640 = torch.aten.slice.Tensor %639, %int1, %int0, %int1, %int1 : !torch.vtensor<[1,128,384],f32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,1,384],f32>
    %641 = torch.aten.squeeze.dim %640, %int1 : !torch.vtensor<[1,1,384],f32>, !torch.int -> !torch.vtensor<[1,384],f32>
    %642 = torch.aten.linear %641, %184, %183 : !torch.vtensor<[1,384],f32>, !torch.vtensor<[384,384],f32>, !torch.vtensor<[384],f32> -> !torch.vtensor<[1,384],f32>
    %643 = torch.aten.tanh %642 : !torch.vtensor<[1,384],f32> -> !torch.vtensor<[1,384],f32>
    %644 = torch.aten.linear %643, %186, %185 : !torch.vtensor<[1,384],f32>, !torch.vtensor<[2,384],f32>, !torch.vtensor<[2],f32> -> !torch.vtensor<[1,2],f32>
    return %644 : !torch.vtensor<[1,2],f32>
  }
 }