AmosLewis · May 12, 2023 16:12
diff --git a/bert_torchbackend.mlir b/bert_torchbackend.mlir
 #loc = loc(unknown)
 module attributes {torch.debug_module_name = "HuggingFaceLanguage"} {
  func.func @forward(%arg0: !torch.vtensor<[?,?],si64> loc(unknown)) -> !torch.vtensor<[?,2],f32> {
    %int768 = torch.constant.int 768 loc(#loc1)
    %true = torch.constant.bool true loc(#loc1)
    %float1.000000e00 = torch.constant.float 1.000000e+00 loc(#loc2)
    %none = torch.constant.none loc(#loc)
    %int0 = torch.constant.int 0 loc(#loc3)
    %int1 = torch.constant.int 1 loc(#loc3)
    %false = torch.constant.bool false loc(#loc4)
    %int2 = torch.constant.int 2 loc(#loc5)
    %0 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2x768xf32>) : !torch.vtensor<[2,768],f32> loc(#loc)
    %1 = torch.vtensor.literal(dense<0.000000e+00> : tensor<2xf32>) : !torch.vtensor<[2],f32> loc(#loc)
    %2 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.vtensor<[768,768],f32> loc(#loc)
    %3 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %4 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %5 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %6 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x3072xf32>) : !torch.vtensor<[768,3072],f32> loc(#loc)
    %7 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %8 = torch.vtensor.literal(dense_resource<__elided__> : tensor<3072x768xf32>) : !torch.vtensor<[3072,768],f32> loc(#loc)
    %9 = torch.vtensor.literal(dense_resource<__elided__> : tensor<3072xf32>) : !torch.vtensor<[3072],f32> loc(#loc)
    %10 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %11 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %12 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.vtensor<[768,768],f32> loc(#loc)
    %13 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %14 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.vtensor<[768,768],f32> loc(#loc)
    %15 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %16 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.vtensor<[768,768],f32> loc(#loc)
    %17 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %18 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.vtensor<[768,768],f32> loc(#loc)
    %19 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %20 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %21 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %22 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x3072xf32>) : !torch.vtensor<[768,3072],f32> loc(#loc)
    %23 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %24 = torch.vtensor.literal(dense_resource<__elided__> : tensor<3072x768xf32>) : !torch.vtensor<[3072,768],f32> loc(#loc)
    %25 = torch.vtensor.literal(dense_resource<__elided__> : tensor<3072xf32>) : !torch.vtensor<[3072],f32> loc(#loc)
    %26 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %27 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %28 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.vtensor<[768,768],f32> loc(#loc)
    %29 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %30 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.vtensor<[768,768],f32> loc(#loc)
    %31 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %32 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.vtensor<[768,768],f32> loc(#loc)
    %33 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %34 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.vtensor<[768,768],f32> loc(#loc)
    %35 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %36 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %37 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %38 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x3072xf32>) : !torch.vtensor<[768,3072],f32> loc(#loc)
    %39 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %40 = torch.vtensor.literal(dense_resource<__elided__> : tensor<3072x768xf32>) : !torch.vtensor<[3072,768],f32> loc(#loc)
    %41 = torch.vtensor.literal(dense_resource<__elided__> : tensor<3072xf32>) : !torch.vtensor<[3072],f32> loc(#loc)
    %42 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %43 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %44 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.vtensor<[768,768],f32> loc(#loc)
    %45 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %46 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.vtensor<[768,768],f32> loc(#loc)
    %47 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %48 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.vtensor<[768,768],f32> loc(#loc)
    %49 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %50 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.vtensor<[768,768],f32> loc(#loc)
    %51 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %52 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %53 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %54 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x3072xf32>) : !torch.vtensor<[768,3072],f32> loc(#loc)
    %55 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %56 = torch.vtensor.literal(dense_resource<__elided__> : tensor<3072x768xf32>) : !torch.vtensor<[3072,768],f32> loc(#loc)
    %57 = torch.vtensor.literal(dense_resource<__elided__> : tensor<3072xf32>) : !torch.vtensor<[3072],f32> loc(#loc)
    %58 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %59 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %60 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.vtensor<[768,768],f32> loc(#loc)
    %61 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %62 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.vtensor<[768,768],f32> loc(#loc)
    %63 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %64 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.vtensor<[768,768],f32> loc(#loc)
    %65 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %66 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.vtensor<[768,768],f32> loc(#loc)
    %67 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %68 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %69 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %70 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x3072xf32>) : !torch.vtensor<[768,3072],f32> loc(#loc)
    %71 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %72 = torch.vtensor.literal(dense_resource<__elided__> : tensor<3072x768xf32>) : !torch.vtensor<[3072,768],f32> loc(#loc)
    %73 = torch.vtensor.literal(dense_resource<__elided__> : tensor<3072xf32>) : !torch.vtensor<[3072],f32> loc(#loc)
    %74 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %75 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %76 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.vtensor<[768,768],f32> loc(#loc)
    %77 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %78 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.vtensor<[768,768],f32> loc(#loc)
    %79 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %80 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.vtensor<[768,768],f32> loc(#loc)
    %81 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %82 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.vtensor<[768,768],f32> loc(#loc)
    %83 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %84 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %85 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %86 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x3072xf32>) : !torch.vtensor<[768,3072],f32> loc(#loc)
    %87 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %88 = torch.vtensor.literal(dense_resource<__elided__> : tensor<3072x768xf32>) : !torch.vtensor<[3072,768],f32> loc(#loc)
    %89 = torch.vtensor.literal(dense_resource<__elided__> : tensor<3072xf32>) : !torch.vtensor<[3072],f32> loc(#loc)
    %90 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %91 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %92 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.vtensor<[768,768],f32> loc(#loc)
    %93 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %94 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.vtensor<[768,768],f32> loc(#loc)
    %95 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %96 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.vtensor<[768,768],f32> loc(#loc)
    %97 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %98 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.vtensor<[768,768],f32> loc(#loc)
    %99 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %100 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %101 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %102 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x3072xf32>) : !torch.vtensor<[768,3072],f32> loc(#loc)
    %103 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %104 = torch.vtensor.literal(dense_resource<__elided__> : tensor<3072x768xf32>) : !torch.vtensor<[3072,768],f32> loc(#loc)
    %105 = torch.vtensor.literal(dense_resource<__elided__> : tensor<3072xf32>) : !torch.vtensor<[3072],f32> loc(#loc)
    %106 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %107 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %108 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.vtensor<[768,768],f32> loc(#loc)
    %109 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %110 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.vtensor<[768,768],f32> loc(#loc)
    %111 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %112 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.vtensor<[768,768],f32> loc(#loc)
    %113 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %114 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.vtensor<[768,768],f32> loc(#loc)
    %115 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %116 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %117 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %118 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x3072xf32>) : !torch.vtensor<[768,3072],f32> loc(#loc)
    %119 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %120 = torch.vtensor.literal(dense_resource<__elided__> : tensor<3072x768xf32>) : !torch.vtensor<[3072,768],f32> loc(#loc)
    %121 = torch.vtensor.literal(dense_resource<__elided__> : tensor<3072xf32>) : !torch.vtensor<[3072],f32> loc(#loc)
    %122 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %123 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %124 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.vtensor<[768,768],f32> loc(#loc)
    %125 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %126 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.vtensor<[768,768],f32> loc(#loc)
    %127 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %128 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.vtensor<[768,768],f32> loc(#loc)
    %129 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %130 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.vtensor<[768,768],f32> loc(#loc)
    %131 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %132 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %133 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %134 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x3072xf32>) : !torch.vtensor<[768,3072],f32> loc(#loc)
    %135 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %136 = torch.vtensor.literal(dense_resource<__elided__> : tensor<3072x768xf32>) : !torch.vtensor<[3072,768],f32> loc(#loc)
    %137 = torch.vtensor.literal(dense_resource<__elided__> : tensor<3072xf32>) : !torch.vtensor<[3072],f32> loc(#loc)
    %138 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %139 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %140 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.vtensor<[768,768],f32> loc(#loc)
    %141 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %142 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.vtensor<[768,768],f32> loc(#loc)
    %143 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %144 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.vtensor<[768,768],f32> loc(#loc)
    %145 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %146 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.vtensor<[768,768],f32> loc(#loc)
    %147 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %148 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %149 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %150 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x3072xf32>) : !torch.vtensor<[768,3072],f32> loc(#loc)
    %151 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %152 = torch.vtensor.literal(dense_resource<__elided__> : tensor<3072x768xf32>) : !torch.vtensor<[3072,768],f32> loc(#loc)
    %153 = torch.vtensor.literal(dense_resource<__elided__> : tensor<3072xf32>) : !torch.vtensor<[3072],f32> loc(#loc)
    %154 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %155 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %156 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.vtensor<[768,768],f32> loc(#loc)
    %157 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %158 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.vtensor<[768,768],f32> loc(#loc)
    %159 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %160 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.vtensor<[768,768],f32> loc(#loc)
    %161 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %162 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.vtensor<[768,768],f32> loc(#loc)
    %163 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %164 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %165 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %166 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x3072xf32>) : !torch.vtensor<[768,3072],f32> loc(#loc)
    %167 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %168 = torch.vtensor.literal(dense_resource<__elided__> : tensor<3072x768xf32>) : !torch.vtensor<[3072,768],f32> loc(#loc)
    %169 = torch.vtensor.literal(dense_resource<__elided__> : tensor<3072xf32>) : !torch.vtensor<[3072],f32> loc(#loc)
    %170 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %171 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %172 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.vtensor<[768,768],f32> loc(#loc)
    %173 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %174 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.vtensor<[768,768],f32> loc(#loc)
    %175 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %176 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.vtensor<[768,768],f32> loc(#loc)
    %177 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %178 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.vtensor<[768,768],f32> loc(#loc)
    %179 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %180 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %181 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %182 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x3072xf32>) : !torch.vtensor<[768,3072],f32> loc(#loc)
    %183 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %184 = torch.vtensor.literal(dense_resource<__elided__> : tensor<3072x768xf32>) : !torch.vtensor<[3072,768],f32> loc(#loc)
    %185 = torch.vtensor.literal(dense_resource<__elided__> : tensor<3072xf32>) : !torch.vtensor<[3072],f32> loc(#loc)
    %186 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %187 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %188 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.vtensor<[768,768],f32> loc(#loc)
    %189 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %190 = torch.vtensor.literal(dense<8.000000e+00> : tensor<f64>) : !torch.vtensor<[],f64> loc(#loc6)
    %191 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.vtensor<[768,768],f32> loc(#loc)
    %192 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %193 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.vtensor<[768,768],f32> loc(#loc)
    %194 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %195 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.vtensor<[768,768],f32> loc(#loc)
    %196 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %197 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %198 = torch.vtensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.vtensor<[768],f32> loc(#loc)
    %199 = torch.vtensor.literal(dense_resource<__elided__> : tensor<512x768xf32>) : !torch.vtensor<[512,768],f32> loc(#loc)
    %200 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2x768xf32>) : !torch.vtensor<[2,768],f32> loc(#loc)
    %201 = torch.vtensor.literal(dense_resource<__elided__> : tensor<30522x768xf32>) : !torch.vtensor<[30522,768],f32> loc(#loc)
    %202 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1x512xsi64>) : !torch.vtensor<[1,512],si64> loc(#loc)
    %203 = torch.vtensor.literal(dense<-3.4028234663852886E+38> : tensor<f64>) : !torch.vtensor<[],f64> loc(#loc7)
    %204 = torch.vtensor.literal(dense<0> : tensor<1x512xsi64>) : !torch.vtensor<[1,512],si64> loc(#loc)
    %int9223372036854775807 = torch.constant.int 9223372036854775807 loc(#loc8)
    %int3 = torch.constant.int 3 loc(#loc5)
    %float9.999990e-13 = torch.constant.float 9.9999999999999998E-13 loc(#loc1)
    %int-1 = torch.constant.int -1 loc(#loc9)
    %str = torch.constant.str "none" loc(#loc10)
    %int-2 = torch.constant.int -2 loc(#loc11)
    %int64 = torch.constant.int 64 loc(#loc12)
    %int12 = torch.constant.int 12 loc(#loc12)
    %205 = torch.aten.size.int %arg0, %int0 : !torch.vtensor<[?,?],si64>, !torch.int -> !torch.int loc(#loc3)
    %206 = torch.prim.NumToTensor.Scalar %205 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %207 = torch.aten.size.int %arg0, %int1 : !torch.vtensor<[?,?],si64>, !torch.int -> !torch.int loc(#loc3)
    %208 = torch.prim.NumToTensor.Scalar %207 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %209 = torch.aten.add.int %207, %int0 : !torch.int, !torch.int -> !torch.int loc(#loc4)
    %210 = torch.prim.NumToTensor.Scalar %209 : !torch.int -> !torch.vtensor<[],si64> loc(#loc4)
    %211 = torch.prim.ListConstruct %205, %209 : (!torch.int, !torch.int) -> !torch.list<int> loc(#loc)
    %cpu = torch.constant.device "cpu" loc(#loc4)
    %212 = torch.aten.ones %211, %none, %none, %cpu, %false : !torch.list<int>, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[?,?],f32> loc(#loc4)
    %213 = torch.aten.slice.Tensor %204, %int1, %int0, %207, %int1 : !torch.vtensor<[1,512],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?],si64> loc(#loc8)
    %214 = torch.prim.ListConstruct %205, %207 : (!torch.int, !torch.int) -> !torch.list<int> loc(#loc)
    %215 = torch.aten.broadcast_to %213, %214 : !torch.vtensor<[1,?],si64>, !torch.list<int> -> !torch.vtensor<[?,?],si64> loc(#loc13)
    %216 = torch.aten.slice.Tensor %212, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[?,?],f32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,?],f32> loc(#loc5)
    %217 = torch.aten.unsqueeze %216, %int1 : !torch.vtensor<[?,?],f32>, !torch.int -> !torch.vtensor<[?,1,?],f32> loc(#loc5)
    %218 = torch.aten.unsqueeze %217, %int2 : !torch.vtensor<[?,1,?],f32>, !torch.int -> !torch.vtensor<[?,1,1,?],f32> loc(#loc5)
    %219 = torch.aten.slice.Tensor %218, %int3, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[?,1,1,?],f32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,1,1,?],f32> loc(#loc5)
    %220 = torch.aten.rsub.Scalar %219, %float1.000000e00, %int1 : !torch.vtensor<[?,1,1,?],f32>, !torch.float, !torch.int -> !torch.vtensor<[?,1,1,?],f32> loc(#loc2)
    %221 = torch.aten.mul.Tensor %220, %203 : !torch.vtensor<[?,1,1,?],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[?,1,1,?],f32> loc(#loc7)
    %222 = torch.aten.size.int %arg0, %int1 : !torch.vtensor<[?,?],si64>, !torch.int -> !torch.int loc(#loc14)
    %223 = torch.prim.NumToTensor.Scalar %222 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %224 = torch.aten.add.int %222, %int0 : !torch.int, !torch.int -> !torch.int loc(#loc15)
    %225 = torch.prim.NumToTensor.Scalar %224 : !torch.int -> !torch.vtensor<[],si64> loc(#loc15)
    %226 = torch.aten.slice.Tensor %202, %int1, %int0, %224, %int1 : !torch.vtensor<[1,512],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?],si64> loc(#loc15)
    %227 = torch.aten.embedding %201, %arg0, %int0, %false, %false : !torch.vtensor<[30522,768],f32>, !torch.vtensor<[?,?],si64>, !torch.int, !torch.bool, !torch.bool -> !torch.vtensor<[?,?,768],f32> loc(#loc9)
    %228 = torch.aten.embedding %200, %215, %int-1, %false, %false : !torch.vtensor<[2,768],f32>, !torch.vtensor<[?,?],si64>, !torch.int, !torch.bool, !torch.bool -> !torch.vtensor<[?,?,768],f32> loc(#loc9)
    %229 = torch.aten.add.Tensor %227, %228, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc16)
    %230 = torch.aten.embedding %199, %226, %int-1, %false, %false : !torch.vtensor<[512,768],f32>, !torch.vtensor<[1,?],si64>, !torch.int, !torch.bool, !torch.bool -> !torch.vtensor<[1,?,768],f32> loc(#loc9)
    %231 = torch.aten.add.Tensor %229, %230, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[1,?,768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc17)
    %232 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> loc(#loc1)
    %233 = torch.aten.sum.dim_IntList %231, %232, %true, %none : !torch.vtensor<[?,?,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %234 = torch.aten.div.Scalar %233, %int768 : !torch.vtensor<[?,?,1],f32>, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %235 = torch.aten.size.int %231, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %236 = torch.aten.size.int %231, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %237 = torch.prim.ListConstruct %235, %236, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %238 = torch.aten.broadcast_to %234, %237 : !torch.vtensor<[?,?,1],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %239 = torch.aten.sub.Tensor %231, %238, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %240 = torch.aten.mul.Tensor %239, %239 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %241 = torch.aten.sum.dim_IntList %240, %232, %true, %none : !torch.vtensor<[?,?,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %242 = torch.aten.div.Scalar %241, %int768 : !torch.vtensor<[?,?,1],f32>, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %243 = torch.aten.add.Scalar %242, %float9.999990e-13, %int1 : !torch.vtensor<[?,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %244 = torch.aten.rsqrt %243 : !torch.vtensor<[?,?,1],f32> -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %245 = torch.aten.size.int %231, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %246 = torch.aten.size.int %231, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %247 = torch.prim.ListConstruct %245, %246, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %248 = torch.aten.broadcast_to %244, %247 : !torch.vtensor<[?,?,1],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %249 = torch.aten.mul.Tensor %239, %248 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %250 = torch.aten.mul.Tensor %249, %197 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %251 = torch.aten.add.Tensor %250, %198, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %252 = torch.aten.transpose.int %195, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> loc(#loc18)
    %253 = torch.aten.matmul %251, %252 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %254 = torch.aten.add.Tensor %253, %196, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %255 = torch.aten.transpose.int %193, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> loc(#loc18)
    %256 = torch.aten.matmul %251, %255 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %257 = torch.aten.add.Tensor %256, %194, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %258 = torch.aten.size.int %257, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %259 = torch.prim.NumToTensor.Scalar %258 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %260 = torch.aten.size.int %257, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %261 = torch.prim.NumToTensor.Scalar %260 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %262 = torch.prim.ListConstruct %258, %260, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc34)
    %263 = torch.aten.view %257, %262 : !torch.vtensor<[?,?,768],f32>, !torch.list<int> -> !torch.vtensor<[?,?,12,64],f32> loc(#loc12)
    %264 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc)
    %265 = torch.aten.permute %263, %264 : !torch.vtensor<[?,?,12,64],f32>, !torch.list<int> -> !torch.vtensor<[?,12,?,64],f32> loc(#loc22)
    %266 = torch.aten.transpose.int %191, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> loc(#loc18)
    %267 = torch.aten.matmul %251, %266 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %268 = torch.aten.add.Tensor %267, %192, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %269 = torch.aten.size.int %268, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %270 = torch.prim.NumToTensor.Scalar %269 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %271 = torch.aten.size.int %268, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %272 = torch.prim.NumToTensor.Scalar %271 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %273 = torch.prim.ListConstruct %269, %271, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc34)
    %274 = torch.aten.view %268, %273 : !torch.vtensor<[?,?,768],f32>, !torch.list<int> -> !torch.vtensor<[?,?,12,64],f32> loc(#loc12)
    %275 = torch.aten.permute %274, %264 : !torch.vtensor<[?,?,12,64],f32>, !torch.list<int> -> !torch.vtensor<[?,12,?,64],f32> loc(#loc22)
    %276 = torch.aten.size.int %254, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %277 = torch.prim.NumToTensor.Scalar %276 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %278 = torch.aten.size.int %254, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %279 = torch.prim.NumToTensor.Scalar %278 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %280 = torch.prim.ListConstruct %276, %278, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc34)
    %281 = torch.aten.view %254, %280 : !torch.vtensor<[?,?,768],f32>, !torch.list<int> -> !torch.vtensor<[?,?,12,64],f32> loc(#loc12)
    %282 = torch.aten.permute %281, %264 : !torch.vtensor<[?,?,12,64],f32>, !torch.list<int> -> !torch.vtensor<[?,12,?,64],f32> loc(#loc22)
    %283 = torch.aten.transpose.int %265, %int-1, %int-2 : !torch.vtensor<[?,12,?,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[?,12,64,?],f32> loc(#loc11)
    %284 = torch.aten.matmul %282, %283 : !torch.vtensor<[?,12,?,64],f32>, !torch.vtensor<[?,12,64,?],f32> -> !torch.vtensor<[?,12,?,?],f32> loc(#loc11)
    %285 = torch.aten.div.Tensor %284, %190 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[?,12,?,?],f32> loc(#loc6)
    %286 = torch.aten.add.Tensor %285, %221, %int1 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[?,1,1,?],f32>, !torch.int -> !torch.vtensor<[?,12,?,?],f32> loc(#loc23)
    %values, %indices = torch.aten.max.dim %286, %int-1, %true : !torch.vtensor<[?,12,?,?],f32>, !torch.int, !torch.bool -> !torch.vtensor<[?,12,?,1],f32>, !torch.vtensor<[?,12,?,1],si64> loc(#loc24)
    %287 = torch.aten.sub.Tensor %286, %values, %float1.000000e00 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[?,12,?,1],f32>, !torch.float -> !torch.vtensor<[?,12,?,?],f32> loc(#loc24)
    %288 = torch.aten.exp %287 : !torch.vtensor<[?,12,?,?],f32> -> !torch.vtensor<[?,12,?,?],f32> loc(#loc24)
    %289 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> loc(#loc24)
    %290 = torch.aten.sum.dim_IntList %288, %289, %true, %none : !torch.vtensor<[?,12,?,?],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,12,?,1],f32> loc(#loc24)
    %291 = torch.aten.div.Tensor %288, %290 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[?,12,?,1],f32> -> !torch.vtensor<[?,12,?,?],f32> loc(#loc24)
    %292 = torch.aten.matmul %291, %275 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[?,12,?,64],f32> -> !torch.vtensor<[?,12,?,64],f32> loc(#loc25)
    %293 = torch.aten.permute %292, %264 : !torch.vtensor<[?,12,?,64],f32>, !torch.list<int> -> !torch.vtensor<[?,?,12,64],f32> loc(#loc26)
    %294 = torch.aten.contiguous %293, %int0 : !torch.vtensor<[?,?,12,64],f32>, !torch.int -> !torch.vtensor<[?,?,12,64],f32> loc(#loc26)
    %295 = torch.aten.size.int %294, %int0 : !torch.vtensor<[?,?,12,64],f32>, !torch.int -> !torch.int loc(#loc27)
    %296 = torch.prim.NumToTensor.Scalar %295 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %297 = torch.aten.size.int %294, %int1 : !torch.vtensor<[?,?,12,64],f32>, !torch.int -> !torch.int loc(#loc27)
    %298 = torch.prim.NumToTensor.Scalar %297 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %299 = torch.prim.ListConstruct %295, %297, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc35)
    %300 = torch.aten.view %294, %299 : !torch.vtensor<[?,?,12,64],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc28)
    %301 = torch.aten.transpose.int %188, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> loc(#loc18)
    %302 = torch.aten.matmul %300, %301 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %303 = torch.aten.add.Tensor %302, %189, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %304 = torch.aten.add.Tensor %303, %251, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc29)
    %305 = torch.aten.sum.dim_IntList %304, %232, %true, %none : !torch.vtensor<[?,?,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %306 = torch.aten.div.Scalar %305, %int768 : !torch.vtensor<[?,?,1],f32>, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %307 = torch.aten.size.int %304, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %308 = torch.aten.size.int %304, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %309 = torch.prim.ListConstruct %307, %308, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %310 = torch.aten.broadcast_to %306, %309 : !torch.vtensor<[?,?,1],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %311 = torch.aten.sub.Tensor %304, %310, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %312 = torch.aten.mul.Tensor %311, %311 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %313 = torch.aten.sum.dim_IntList %312, %232, %true, %none : !torch.vtensor<[?,?,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %314 = torch.aten.div.Scalar %313, %int768 : !torch.vtensor<[?,?,1],f32>, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %315 = torch.aten.add.Scalar %314, %float9.999990e-13, %int1 : !torch.vtensor<[?,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %316 = torch.aten.rsqrt %315 : !torch.vtensor<[?,?,1],f32> -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %317 = torch.aten.size.int %304, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %318 = torch.aten.size.int %304, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %319 = torch.prim.ListConstruct %317, %318, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %320 = torch.aten.broadcast_to %316, %319 : !torch.vtensor<[?,?,1],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %321 = torch.aten.mul.Tensor %311, %320 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %322 = torch.aten.mul.Tensor %321, %186 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %323 = torch.aten.add.Tensor %322, %187, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %324 = torch.aten.transpose.int %184, %int0, %int1 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32> loc(#loc18)
    %325 = torch.aten.matmul %323, %324 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,3072],f32> -> !torch.vtensor<[?,?,3072],f32> loc(#loc18)
    %326 = torch.aten.add.Tensor %325, %185, %float1.000000e00 : !torch.vtensor<[?,?,3072],f32>, !torch.vtensor<[3072],f32>, !torch.float -> !torch.vtensor<[?,?,3072],f32> loc(#loc18)
    %327 = torch.aten.gelu %326, %str : !torch.vtensor<[?,?,3072],f32>, !torch.str -> !torch.vtensor<[?,?,3072],f32> loc(#loc10)
    %328 = torch.aten.transpose.int %182, %int0, %int1 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32> loc(#loc18)
    %329 = torch.aten.matmul %327, %328 : !torch.vtensor<[?,?,3072],f32>, !torch.vtensor<[3072,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %330 = torch.aten.add.Tensor %329, %183, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %331 = torch.aten.add.Tensor %330, %323, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc30)
    %332 = torch.aten.sum.dim_IntList %331, %232, %true, %none : !torch.vtensor<[?,?,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %333 = torch.aten.div.Scalar %332, %int768 : !torch.vtensor<[?,?,1],f32>, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %334 = torch.aten.size.int %331, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %335 = torch.aten.size.int %331, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %336 = torch.prim.ListConstruct %334, %335, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %337 = torch.aten.broadcast_to %333, %336 : !torch.vtensor<[?,?,1],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %338 = torch.aten.sub.Tensor %331, %337, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %339 = torch.aten.mul.Tensor %338, %338 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %340 = torch.aten.sum.dim_IntList %339, %232, %true, %none : !torch.vtensor<[?,?,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %341 = torch.aten.div.Scalar %340, %int768 : !torch.vtensor<[?,?,1],f32>, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %342 = torch.aten.add.Scalar %341, %float9.999990e-13, %int1 : !torch.vtensor<[?,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %343 = torch.aten.rsqrt %342 : !torch.vtensor<[?,?,1],f32> -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %344 = torch.aten.size.int %331, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %345 = torch.aten.size.int %331, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %346 = torch.prim.ListConstruct %344, %345, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %347 = torch.aten.broadcast_to %343, %346 : !torch.vtensor<[?,?,1],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %348 = torch.aten.mul.Tensor %338, %347 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %349 = torch.aten.mul.Tensor %348, %180 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %350 = torch.aten.add.Tensor %349, %181, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %351 = torch.aten.transpose.int %178, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> loc(#loc18)
    %352 = torch.aten.matmul %350, %351 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %353 = torch.aten.add.Tensor %352, %179, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %354 = torch.aten.transpose.int %176, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> loc(#loc18)
    %355 = torch.aten.matmul %350, %354 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %356 = torch.aten.add.Tensor %355, %177, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %357 = torch.aten.size.int %356, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %358 = torch.prim.NumToTensor.Scalar %357 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %359 = torch.aten.size.int %356, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %360 = torch.prim.NumToTensor.Scalar %359 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %361 = torch.prim.ListConstruct %357, %359, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc34)
    %362 = torch.aten.view %356, %361 : !torch.vtensor<[?,?,768],f32>, !torch.list<int> -> !torch.vtensor<[?,?,12,64],f32> loc(#loc12)
    %363 = torch.aten.permute %362, %264 : !torch.vtensor<[?,?,12,64],f32>, !torch.list<int> -> !torch.vtensor<[?,12,?,64],f32> loc(#loc22)
    %364 = torch.aten.transpose.int %174, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> loc(#loc18)
    %365 = torch.aten.matmul %350, %364 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %366 = torch.aten.add.Tensor %365, %175, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %367 = torch.aten.size.int %366, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %368 = torch.prim.NumToTensor.Scalar %367 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %369 = torch.aten.size.int %366, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %370 = torch.prim.NumToTensor.Scalar %369 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %371 = torch.prim.ListConstruct %367, %369, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc34)
    %372 = torch.aten.view %366, %371 : !torch.vtensor<[?,?,768],f32>, !torch.list<int> -> !torch.vtensor<[?,?,12,64],f32> loc(#loc12)
    %373 = torch.aten.permute %372, %264 : !torch.vtensor<[?,?,12,64],f32>, !torch.list<int> -> !torch.vtensor<[?,12,?,64],f32> loc(#loc22)
    %374 = torch.aten.size.int %353, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %375 = torch.prim.NumToTensor.Scalar %374 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %376 = torch.aten.size.int %353, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %377 = torch.prim.NumToTensor.Scalar %376 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %378 = torch.prim.ListConstruct %374, %376, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc34)
    %379 = torch.aten.view %353, %378 : !torch.vtensor<[?,?,768],f32>, !torch.list<int> -> !torch.vtensor<[?,?,12,64],f32> loc(#loc12)
    %380 = torch.aten.permute %379, %264 : !torch.vtensor<[?,?,12,64],f32>, !torch.list<int> -> !torch.vtensor<[?,12,?,64],f32> loc(#loc22)
    %381 = torch.aten.transpose.int %363, %int-1, %int-2 : !torch.vtensor<[?,12,?,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[?,12,64,?],f32> loc(#loc11)
    %382 = torch.aten.matmul %380, %381 : !torch.vtensor<[?,12,?,64],f32>, !torch.vtensor<[?,12,64,?],f32> -> !torch.vtensor<[?,12,?,?],f32> loc(#loc11)
    %383 = torch.aten.div.Tensor %382, %190 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[?,12,?,?],f32> loc(#loc6)
    %384 = torch.aten.add.Tensor %383, %221, %int1 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[?,1,1,?],f32>, !torch.int -> !torch.vtensor<[?,12,?,?],f32> loc(#loc23)
    %values_0, %indices_1 = torch.aten.max.dim %384, %int-1, %true : !torch.vtensor<[?,12,?,?],f32>, !torch.int, !torch.bool -> !torch.vtensor<[?,12,?,1],f32>, !torch.vtensor<[?,12,?,1],si64> loc(#loc24)
    %385 = torch.aten.sub.Tensor %384, %values_0, %float1.000000e00 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[?,12,?,1],f32>, !torch.float -> !torch.vtensor<[?,12,?,?],f32> loc(#loc24)
    %386 = torch.aten.exp %385 : !torch.vtensor<[?,12,?,?],f32> -> !torch.vtensor<[?,12,?,?],f32> loc(#loc24)
    %387 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> loc(#loc24)
    %388 = torch.aten.sum.dim_IntList %386, %387, %true, %none : !torch.vtensor<[?,12,?,?],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,12,?,1],f32> loc(#loc24)
    %389 = torch.aten.div.Tensor %386, %388 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[?,12,?,1],f32> -> !torch.vtensor<[?,12,?,?],f32> loc(#loc24)
    %390 = torch.aten.matmul %389, %373 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[?,12,?,64],f32> -> !torch.vtensor<[?,12,?,64],f32> loc(#loc25)
    %391 = torch.aten.permute %390, %264 : !torch.vtensor<[?,12,?,64],f32>, !torch.list<int> -> !torch.vtensor<[?,?,12,64],f32> loc(#loc26)
    %392 = torch.aten.contiguous %391, %int0 : !torch.vtensor<[?,?,12,64],f32>, !torch.int -> !torch.vtensor<[?,?,12,64],f32> loc(#loc26)
    %393 = torch.aten.size.int %392, %int0 : !torch.vtensor<[?,?,12,64],f32>, !torch.int -> !torch.int loc(#loc27)
    %394 = torch.prim.NumToTensor.Scalar %393 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %395 = torch.aten.size.int %392, %int1 : !torch.vtensor<[?,?,12,64],f32>, !torch.int -> !torch.int loc(#loc27)
    %396 = torch.prim.NumToTensor.Scalar %395 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %397 = torch.prim.ListConstruct %393, %395, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc35)
    %398 = torch.aten.view %392, %397 : !torch.vtensor<[?,?,12,64],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc28)
    %399 = torch.aten.transpose.int %172, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> loc(#loc18)
    %400 = torch.aten.matmul %398, %399 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %401 = torch.aten.add.Tensor %400, %173, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %402 = torch.aten.add.Tensor %401, %350, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc29)
    %403 = torch.aten.sum.dim_IntList %402, %232, %true, %none : !torch.vtensor<[?,?,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %404 = torch.aten.div.Scalar %403, %int768 : !torch.vtensor<[?,?,1],f32>, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %405 = torch.aten.size.int %402, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %406 = torch.aten.size.int %402, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %407 = torch.prim.ListConstruct %405, %406, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %408 = torch.aten.broadcast_to %404, %407 : !torch.vtensor<[?,?,1],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %409 = torch.aten.sub.Tensor %402, %408, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %410 = torch.aten.mul.Tensor %409, %409 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %411 = torch.aten.sum.dim_IntList %410, %232, %true, %none : !torch.vtensor<[?,?,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %412 = torch.aten.div.Scalar %411, %int768 : !torch.vtensor<[?,?,1],f32>, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %413 = torch.aten.add.Scalar %412, %float9.999990e-13, %int1 : !torch.vtensor<[?,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %414 = torch.aten.rsqrt %413 : !torch.vtensor<[?,?,1],f32> -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %415 = torch.aten.size.int %402, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %416 = torch.aten.size.int %402, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %417 = torch.prim.ListConstruct %415, %416, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %418 = torch.aten.broadcast_to %414, %417 : !torch.vtensor<[?,?,1],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %419 = torch.aten.mul.Tensor %409, %418 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %420 = torch.aten.mul.Tensor %419, %170 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %421 = torch.aten.add.Tensor %420, %171, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %422 = torch.aten.transpose.int %168, %int0, %int1 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32> loc(#loc18)
    %423 = torch.aten.matmul %421, %422 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,3072],f32> -> !torch.vtensor<[?,?,3072],f32> loc(#loc18)
    %424 = torch.aten.add.Tensor %423, %169, %float1.000000e00 : !torch.vtensor<[?,?,3072],f32>, !torch.vtensor<[3072],f32>, !torch.float -> !torch.vtensor<[?,?,3072],f32> loc(#loc18)
    %425 = torch.aten.gelu %424, %str : !torch.vtensor<[?,?,3072],f32>, !torch.str -> !torch.vtensor<[?,?,3072],f32> loc(#loc10)
    %426 = torch.aten.transpose.int %166, %int0, %int1 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32> loc(#loc18)
    %427 = torch.aten.matmul %425, %426 : !torch.vtensor<[?,?,3072],f32>, !torch.vtensor<[3072,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %428 = torch.aten.add.Tensor %427, %167, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %429 = torch.aten.add.Tensor %428, %421, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc30)
    %430 = torch.aten.sum.dim_IntList %429, %232, %true, %none : !torch.vtensor<[?,?,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %431 = torch.aten.div.Scalar %430, %int768 : !torch.vtensor<[?,?,1],f32>, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %432 = torch.aten.size.int %429, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %433 = torch.aten.size.int %429, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %434 = torch.prim.ListConstruct %432, %433, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %435 = torch.aten.broadcast_to %431, %434 : !torch.vtensor<[?,?,1],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %436 = torch.aten.sub.Tensor %429, %435, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %437 = torch.aten.mul.Tensor %436, %436 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %438 = torch.aten.sum.dim_IntList %437, %232, %true, %none : !torch.vtensor<[?,?,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %439 = torch.aten.div.Scalar %438, %int768 : !torch.vtensor<[?,?,1],f32>, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %440 = torch.aten.add.Scalar %439, %float9.999990e-13, %int1 : !torch.vtensor<[?,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %441 = torch.aten.rsqrt %440 : !torch.vtensor<[?,?,1],f32> -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %442 = torch.aten.size.int %429, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %443 = torch.aten.size.int %429, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %444 = torch.prim.ListConstruct %442, %443, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %445 = torch.aten.broadcast_to %441, %444 : !torch.vtensor<[?,?,1],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %446 = torch.aten.mul.Tensor %436, %445 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %447 = torch.aten.mul.Tensor %446, %164 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %448 = torch.aten.add.Tensor %447, %165, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %449 = torch.aten.transpose.int %162, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> loc(#loc18)
    %450 = torch.aten.matmul %448, %449 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %451 = torch.aten.add.Tensor %450, %163, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %452 = torch.aten.transpose.int %160, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> loc(#loc18)
    %453 = torch.aten.matmul %448, %452 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %454 = torch.aten.add.Tensor %453, %161, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %455 = torch.aten.size.int %454, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %456 = torch.prim.NumToTensor.Scalar %455 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %457 = torch.aten.size.int %454, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %458 = torch.prim.NumToTensor.Scalar %457 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %459 = torch.prim.ListConstruct %455, %457, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc34)
    %460 = torch.aten.view %454, %459 : !torch.vtensor<[?,?,768],f32>, !torch.list<int> -> !torch.vtensor<[?,?,12,64],f32> loc(#loc12)
    %461 = torch.aten.permute %460, %264 : !torch.vtensor<[?,?,12,64],f32>, !torch.list<int> -> !torch.vtensor<[?,12,?,64],f32> loc(#loc22)
    %462 = torch.aten.transpose.int %158, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> loc(#loc18)
    %463 = torch.aten.matmul %448, %462 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %464 = torch.aten.add.Tensor %463, %159, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %465 = torch.aten.size.int %464, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %466 = torch.prim.NumToTensor.Scalar %465 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %467 = torch.aten.size.int %464, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %468 = torch.prim.NumToTensor.Scalar %467 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %469 = torch.prim.ListConstruct %465, %467, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc34)
    %470 = torch.aten.view %464, %469 : !torch.vtensor<[?,?,768],f32>, !torch.list<int> -> !torch.vtensor<[?,?,12,64],f32> loc(#loc12)
    %471 = torch.aten.permute %470, %264 : !torch.vtensor<[?,?,12,64],f32>, !torch.list<int> -> !torch.vtensor<[?,12,?,64],f32> loc(#loc22)
    %472 = torch.aten.size.int %451, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %473 = torch.prim.NumToTensor.Scalar %472 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %474 = torch.aten.size.int %451, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %475 = torch.prim.NumToTensor.Scalar %474 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %476 = torch.prim.ListConstruct %472, %474, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc34)
    %477 = torch.aten.view %451, %476 : !torch.vtensor<[?,?,768],f32>, !torch.list<int> -> !torch.vtensor<[?,?,12,64],f32> loc(#loc12)
    %478 = torch.aten.permute %477, %264 : !torch.vtensor<[?,?,12,64],f32>, !torch.list<int> -> !torch.vtensor<[?,12,?,64],f32> loc(#loc22)
    %479 = torch.aten.transpose.int %461, %int-1, %int-2 : !torch.vtensor<[?,12,?,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[?,12,64,?],f32> loc(#loc11)
    %480 = torch.aten.matmul %478, %479 : !torch.vtensor<[?,12,?,64],f32>, !torch.vtensor<[?,12,64,?],f32> -> !torch.vtensor<[?,12,?,?],f32> loc(#loc11)
    %481 = torch.aten.div.Tensor %480, %190 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[?,12,?,?],f32> loc(#loc6)
    %482 = torch.aten.add.Tensor %481, %221, %int1 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[?,1,1,?],f32>, !torch.int -> !torch.vtensor<[?,12,?,?],f32> loc(#loc23)
    %values_2, %indices_3 = torch.aten.max.dim %482, %int-1, %true : !torch.vtensor<[?,12,?,?],f32>, !torch.int, !torch.bool -> !torch.vtensor<[?,12,?,1],f32>, !torch.vtensor<[?,12,?,1],si64> loc(#loc24)
    %483 = torch.aten.sub.Tensor %482, %values_2, %float1.000000e00 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[?,12,?,1],f32>, !torch.float -> !torch.vtensor<[?,12,?,?],f32> loc(#loc24)
    %484 = torch.aten.exp %483 : !torch.vtensor<[?,12,?,?],f32> -> !torch.vtensor<[?,12,?,?],f32> loc(#loc24)
    %485 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> loc(#loc24)
    %486 = torch.aten.sum.dim_IntList %484, %485, %true, %none : !torch.vtensor<[?,12,?,?],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,12,?,1],f32> loc(#loc24)
    %487 = torch.aten.div.Tensor %484, %486 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[?,12,?,1],f32> -> !torch.vtensor<[?,12,?,?],f32> loc(#loc24)
    %488 = torch.aten.matmul %487, %471 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[?,12,?,64],f32> -> !torch.vtensor<[?,12,?,64],f32> loc(#loc25)
    %489 = torch.aten.permute %488, %264 : !torch.vtensor<[?,12,?,64],f32>, !torch.list<int> -> !torch.vtensor<[?,?,12,64],f32> loc(#loc26)
    %490 = torch.aten.contiguous %489, %int0 : !torch.vtensor<[?,?,12,64],f32>, !torch.int -> !torch.vtensor<[?,?,12,64],f32> loc(#loc26)
    %491 = torch.aten.size.int %490, %int0 : !torch.vtensor<[?,?,12,64],f32>, !torch.int -> !torch.int loc(#loc27)
    %492 = torch.prim.NumToTensor.Scalar %491 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %493 = torch.aten.size.int %490, %int1 : !torch.vtensor<[?,?,12,64],f32>, !torch.int -> !torch.int loc(#loc27)
    %494 = torch.prim.NumToTensor.Scalar %493 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %495 = torch.prim.ListConstruct %491, %493, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc35)
    %496 = torch.aten.view %490, %495 : !torch.vtensor<[?,?,12,64],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc28)
    %497 = torch.aten.transpose.int %156, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> loc(#loc18)
    %498 = torch.aten.matmul %496, %497 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %499 = torch.aten.add.Tensor %498, %157, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %500 = torch.aten.add.Tensor %499, %448, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc29)
    %501 = torch.aten.sum.dim_IntList %500, %232, %true, %none : !torch.vtensor<[?,?,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %502 = torch.aten.div.Scalar %501, %int768 : !torch.vtensor<[?,?,1],f32>, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %503 = torch.aten.size.int %500, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %504 = torch.aten.size.int %500, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %505 = torch.prim.ListConstruct %503, %504, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %506 = torch.aten.broadcast_to %502, %505 : !torch.vtensor<[?,?,1],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %507 = torch.aten.sub.Tensor %500, %506, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %508 = torch.aten.mul.Tensor %507, %507 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %509 = torch.aten.sum.dim_IntList %508, %232, %true, %none : !torch.vtensor<[?,?,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %510 = torch.aten.div.Scalar %509, %int768 : !torch.vtensor<[?,?,1],f32>, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %511 = torch.aten.add.Scalar %510, %float9.999990e-13, %int1 : !torch.vtensor<[?,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %512 = torch.aten.rsqrt %511 : !torch.vtensor<[?,?,1],f32> -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %513 = torch.aten.size.int %500, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %514 = torch.aten.size.int %500, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %515 = torch.prim.ListConstruct %513, %514, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %516 = torch.aten.broadcast_to %512, %515 : !torch.vtensor<[?,?,1],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %517 = torch.aten.mul.Tensor %507, %516 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %518 = torch.aten.mul.Tensor %517, %154 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %519 = torch.aten.add.Tensor %518, %155, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %520 = torch.aten.transpose.int %152, %int0, %int1 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32> loc(#loc18)
    %521 = torch.aten.matmul %519, %520 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,3072],f32> -> !torch.vtensor<[?,?,3072],f32> loc(#loc18)
    %522 = torch.aten.add.Tensor %521, %153, %float1.000000e00 : !torch.vtensor<[?,?,3072],f32>, !torch.vtensor<[3072],f32>, !torch.float -> !torch.vtensor<[?,?,3072],f32> loc(#loc18)
    %523 = torch.aten.gelu %522, %str : !torch.vtensor<[?,?,3072],f32>, !torch.str -> !torch.vtensor<[?,?,3072],f32> loc(#loc10)
    %524 = torch.aten.transpose.int %150, %int0, %int1 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32> loc(#loc18)
    %525 = torch.aten.matmul %523, %524 : !torch.vtensor<[?,?,3072],f32>, !torch.vtensor<[3072,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %526 = torch.aten.add.Tensor %525, %151, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %527 = torch.aten.add.Tensor %526, %519, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc30)
    %528 = torch.aten.sum.dim_IntList %527, %232, %true, %none : !torch.vtensor<[?,?,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %529 = torch.aten.div.Scalar %528, %int768 : !torch.vtensor<[?,?,1],f32>, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %530 = torch.aten.size.int %527, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %531 = torch.aten.size.int %527, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %532 = torch.prim.ListConstruct %530, %531, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %533 = torch.aten.broadcast_to %529, %532 : !torch.vtensor<[?,?,1],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %534 = torch.aten.sub.Tensor %527, %533, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %535 = torch.aten.mul.Tensor %534, %534 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %536 = torch.aten.sum.dim_IntList %535, %232, %true, %none : !torch.vtensor<[?,?,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %537 = torch.aten.div.Scalar %536, %int768 : !torch.vtensor<[?,?,1],f32>, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %538 = torch.aten.add.Scalar %537, %float9.999990e-13, %int1 : !torch.vtensor<[?,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %539 = torch.aten.rsqrt %538 : !torch.vtensor<[?,?,1],f32> -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %540 = torch.aten.size.int %527, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %541 = torch.aten.size.int %527, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %542 = torch.prim.ListConstruct %540, %541, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %543 = torch.aten.broadcast_to %539, %542 : !torch.vtensor<[?,?,1],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %544 = torch.aten.mul.Tensor %534, %543 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %545 = torch.aten.mul.Tensor %544, %148 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %546 = torch.aten.add.Tensor %545, %149, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %547 = torch.aten.transpose.int %146, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> loc(#loc18)
    %548 = torch.aten.matmul %546, %547 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %549 = torch.aten.add.Tensor %548, %147, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %550 = torch.aten.transpose.int %144, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> loc(#loc18)
    %551 = torch.aten.matmul %546, %550 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %552 = torch.aten.add.Tensor %551, %145, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %553 = torch.aten.size.int %552, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %554 = torch.prim.NumToTensor.Scalar %553 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %555 = torch.aten.size.int %552, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %556 = torch.prim.NumToTensor.Scalar %555 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %557 = torch.prim.ListConstruct %553, %555, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc34)
    %558 = torch.aten.view %552, %557 : !torch.vtensor<[?,?,768],f32>, !torch.list<int> -> !torch.vtensor<[?,?,12,64],f32> loc(#loc12)
    %559 = torch.aten.permute %558, %264 : !torch.vtensor<[?,?,12,64],f32>, !torch.list<int> -> !torch.vtensor<[?,12,?,64],f32> loc(#loc22)
    %560 = torch.aten.transpose.int %142, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> loc(#loc18)
    %561 = torch.aten.matmul %546, %560 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %562 = torch.aten.add.Tensor %561, %143, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %563 = torch.aten.size.int %562, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %564 = torch.prim.NumToTensor.Scalar %563 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %565 = torch.aten.size.int %562, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %566 = torch.prim.NumToTensor.Scalar %565 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %567 = torch.prim.ListConstruct %563, %565, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc34)
    %568 = torch.aten.view %562, %567 : !torch.vtensor<[?,?,768],f32>, !torch.list<int> -> !torch.vtensor<[?,?,12,64],f32> loc(#loc12)
    %569 = torch.aten.permute %568, %264 : !torch.vtensor<[?,?,12,64],f32>, !torch.list<int> -> !torch.vtensor<[?,12,?,64],f32> loc(#loc22)
    %570 = torch.aten.size.int %549, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %571 = torch.prim.NumToTensor.Scalar %570 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %572 = torch.aten.size.int %549, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %573 = torch.prim.NumToTensor.Scalar %572 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %574 = torch.prim.ListConstruct %570, %572, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc34)
    %575 = torch.aten.view %549, %574 : !torch.vtensor<[?,?,768],f32>, !torch.list<int> -> !torch.vtensor<[?,?,12,64],f32> loc(#loc12)
    %576 = torch.aten.permute %575, %264 : !torch.vtensor<[?,?,12,64],f32>, !torch.list<int> -> !torch.vtensor<[?,12,?,64],f32> loc(#loc22)
    %577 = torch.aten.transpose.int %559, %int-1, %int-2 : !torch.vtensor<[?,12,?,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[?,12,64,?],f32> loc(#loc11)
    %578 = torch.aten.matmul %576, %577 : !torch.vtensor<[?,12,?,64],f32>, !torch.vtensor<[?,12,64,?],f32> -> !torch.vtensor<[?,12,?,?],f32> loc(#loc11)
    %579 = torch.aten.div.Tensor %578, %190 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[?,12,?,?],f32> loc(#loc6)
    %580 = torch.aten.add.Tensor %579, %221, %int1 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[?,1,1,?],f32>, !torch.int -> !torch.vtensor<[?,12,?,?],f32> loc(#loc23)
    %values_4, %indices_5 = torch.aten.max.dim %580, %int-1, %true : !torch.vtensor<[?,12,?,?],f32>, !torch.int, !torch.bool -> !torch.vtensor<[?,12,?,1],f32>, !torch.vtensor<[?,12,?,1],si64> loc(#loc24)
    %581 = torch.aten.sub.Tensor %580, %values_4, %float1.000000e00 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[?,12,?,1],f32>, !torch.float -> !torch.vtensor<[?,12,?,?],f32> loc(#loc24)
    %582 = torch.aten.exp %581 : !torch.vtensor<[?,12,?,?],f32> -> !torch.vtensor<[?,12,?,?],f32> loc(#loc24)
    %583 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> loc(#loc24)
    %584 = torch.aten.sum.dim_IntList %582, %583, %true, %none : !torch.vtensor<[?,12,?,?],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,12,?,1],f32> loc(#loc24)
    %585 = torch.aten.div.Tensor %582, %584 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[?,12,?,1],f32> -> !torch.vtensor<[?,12,?,?],f32> loc(#loc24)
    %586 = torch.aten.matmul %585, %569 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[?,12,?,64],f32> -> !torch.vtensor<[?,12,?,64],f32> loc(#loc25)
    %587 = torch.aten.permute %586, %264 : !torch.vtensor<[?,12,?,64],f32>, !torch.list<int> -> !torch.vtensor<[?,?,12,64],f32> loc(#loc26)
    %588 = torch.aten.contiguous %587, %int0 : !torch.vtensor<[?,?,12,64],f32>, !torch.int -> !torch.vtensor<[?,?,12,64],f32> loc(#loc26)
    %589 = torch.aten.size.int %588, %int0 : !torch.vtensor<[?,?,12,64],f32>, !torch.int -> !torch.int loc(#loc27)
    %590 = torch.prim.NumToTensor.Scalar %589 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %591 = torch.aten.size.int %588, %int1 : !torch.vtensor<[?,?,12,64],f32>, !torch.int -> !torch.int loc(#loc27)
    %592 = torch.prim.NumToTensor.Scalar %591 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %593 = torch.prim.ListConstruct %589, %591, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc35)
    %594 = torch.aten.view %588, %593 : !torch.vtensor<[?,?,12,64],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc28)
    %595 = torch.aten.transpose.int %140, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> loc(#loc18)
    %596 = torch.aten.matmul %594, %595 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %597 = torch.aten.add.Tensor %596, %141, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %598 = torch.aten.add.Tensor %597, %546, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc29)
    %599 = torch.aten.sum.dim_IntList %598, %232, %true, %none : !torch.vtensor<[?,?,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %600 = torch.aten.div.Scalar %599, %int768 : !torch.vtensor<[?,?,1],f32>, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %601 = torch.aten.size.int %598, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %602 = torch.aten.size.int %598, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %603 = torch.prim.ListConstruct %601, %602, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %604 = torch.aten.broadcast_to %600, %603 : !torch.vtensor<[?,?,1],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %605 = torch.aten.sub.Tensor %598, %604, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %606 = torch.aten.mul.Tensor %605, %605 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %607 = torch.aten.sum.dim_IntList %606, %232, %true, %none : !torch.vtensor<[?,?,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %608 = torch.aten.div.Scalar %607, %int768 : !torch.vtensor<[?,?,1],f32>, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %609 = torch.aten.add.Scalar %608, %float9.999990e-13, %int1 : !torch.vtensor<[?,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %610 = torch.aten.rsqrt %609 : !torch.vtensor<[?,?,1],f32> -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %611 = torch.aten.size.int %598, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %612 = torch.aten.size.int %598, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %613 = torch.prim.ListConstruct %611, %612, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %614 = torch.aten.broadcast_to %610, %613 : !torch.vtensor<[?,?,1],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %615 = torch.aten.mul.Tensor %605, %614 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %616 = torch.aten.mul.Tensor %615, %138 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %617 = torch.aten.add.Tensor %616, %139, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %618 = torch.aten.transpose.int %136, %int0, %int1 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32> loc(#loc18)
    %619 = torch.aten.matmul %617, %618 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,3072],f32> -> !torch.vtensor<[?,?,3072],f32> loc(#loc18)
    %620 = torch.aten.add.Tensor %619, %137, %float1.000000e00 : !torch.vtensor<[?,?,3072],f32>, !torch.vtensor<[3072],f32>, !torch.float -> !torch.vtensor<[?,?,3072],f32> loc(#loc18)
    %621 = torch.aten.gelu %620, %str : !torch.vtensor<[?,?,3072],f32>, !torch.str -> !torch.vtensor<[?,?,3072],f32> loc(#loc10)
    %622 = torch.aten.transpose.int %134, %int0, %int1 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32> loc(#loc18)
    %623 = torch.aten.matmul %621, %622 : !torch.vtensor<[?,?,3072],f32>, !torch.vtensor<[3072,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %624 = torch.aten.add.Tensor %623, %135, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %625 = torch.aten.add.Tensor %624, %617, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc30)
    %626 = torch.aten.sum.dim_IntList %625, %232, %true, %none : !torch.vtensor<[?,?,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %627 = torch.aten.div.Scalar %626, %int768 : !torch.vtensor<[?,?,1],f32>, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %628 = torch.aten.size.int %625, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %629 = torch.aten.size.int %625, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %630 = torch.prim.ListConstruct %628, %629, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %631 = torch.aten.broadcast_to %627, %630 : !torch.vtensor<[?,?,1],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %632 = torch.aten.sub.Tensor %625, %631, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %633 = torch.aten.mul.Tensor %632, %632 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %634 = torch.aten.sum.dim_IntList %633, %232, %true, %none : !torch.vtensor<[?,?,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %635 = torch.aten.div.Scalar %634, %int768 : !torch.vtensor<[?,?,1],f32>, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %636 = torch.aten.add.Scalar %635, %float9.999990e-13, %int1 : !torch.vtensor<[?,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %637 = torch.aten.rsqrt %636 : !torch.vtensor<[?,?,1],f32> -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %638 = torch.aten.size.int %625, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %639 = torch.aten.size.int %625, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %640 = torch.prim.ListConstruct %638, %639, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %641 = torch.aten.broadcast_to %637, %640 : !torch.vtensor<[?,?,1],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %642 = torch.aten.mul.Tensor %632, %641 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %643 = torch.aten.mul.Tensor %642, %132 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %644 = torch.aten.add.Tensor %643, %133, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %645 = torch.aten.transpose.int %130, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> loc(#loc18)
    %646 = torch.aten.matmul %644, %645 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %647 = torch.aten.add.Tensor %646, %131, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %648 = torch.aten.transpose.int %128, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> loc(#loc18)
    %649 = torch.aten.matmul %644, %648 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %650 = torch.aten.add.Tensor %649, %129, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %651 = torch.aten.size.int %650, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %652 = torch.prim.NumToTensor.Scalar %651 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %653 = torch.aten.size.int %650, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %654 = torch.prim.NumToTensor.Scalar %653 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %655 = torch.prim.ListConstruct %651, %653, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc34)
    %656 = torch.aten.view %650, %655 : !torch.vtensor<[?,?,768],f32>, !torch.list<int> -> !torch.vtensor<[?,?,12,64],f32> loc(#loc12)
    %657 = torch.aten.permute %656, %264 : !torch.vtensor<[?,?,12,64],f32>, !torch.list<int> -> !torch.vtensor<[?,12,?,64],f32> loc(#loc22)
    %658 = torch.aten.transpose.int %126, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> loc(#loc18)
    %659 = torch.aten.matmul %644, %658 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %660 = torch.aten.add.Tensor %659, %127, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %661 = torch.aten.size.int %660, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %662 = torch.prim.NumToTensor.Scalar %661 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %663 = torch.aten.size.int %660, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %664 = torch.prim.NumToTensor.Scalar %663 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %665 = torch.prim.ListConstruct %661, %663, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc34)
    %666 = torch.aten.view %660, %665 : !torch.vtensor<[?,?,768],f32>, !torch.list<int> -> !torch.vtensor<[?,?,12,64],f32> loc(#loc12)
    %667 = torch.aten.permute %666, %264 : !torch.vtensor<[?,?,12,64],f32>, !torch.list<int> -> !torch.vtensor<[?,12,?,64],f32> loc(#loc22)
    %668 = torch.aten.size.int %647, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %669 = torch.prim.NumToTensor.Scalar %668 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %670 = torch.aten.size.int %647, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %671 = torch.prim.NumToTensor.Scalar %670 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %672 = torch.prim.ListConstruct %668, %670, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc34)
    %673 = torch.aten.view %647, %672 : !torch.vtensor<[?,?,768],f32>, !torch.list<int> -> !torch.vtensor<[?,?,12,64],f32> loc(#loc12)
    %674 = torch.aten.permute %673, %264 : !torch.vtensor<[?,?,12,64],f32>, !torch.list<int> -> !torch.vtensor<[?,12,?,64],f32> loc(#loc22)
    %675 = torch.aten.transpose.int %657, %int-1, %int-2 : !torch.vtensor<[?,12,?,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[?,12,64,?],f32> loc(#loc11)
    %676 = torch.aten.matmul %674, %675 : !torch.vtensor<[?,12,?,64],f32>, !torch.vtensor<[?,12,64,?],f32> -> !torch.vtensor<[?,12,?,?],f32> loc(#loc11)
    %677 = torch.aten.div.Tensor %676, %190 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[?,12,?,?],f32> loc(#loc6)
    %678 = torch.aten.add.Tensor %677, %221, %int1 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[?,1,1,?],f32>, !torch.int -> !torch.vtensor<[?,12,?,?],f32> loc(#loc23)
    %values_6, %indices_7 = torch.aten.max.dim %678, %int-1, %true : !torch.vtensor<[?,12,?,?],f32>, !torch.int, !torch.bool -> !torch.vtensor<[?,12,?,1],f32>, !torch.vtensor<[?,12,?,1],si64> loc(#loc24)
    %679 = torch.aten.sub.Tensor %678, %values_6, %float1.000000e00 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[?,12,?,1],f32>, !torch.float -> !torch.vtensor<[?,12,?,?],f32> loc(#loc24)
    %680 = torch.aten.exp %679 : !torch.vtensor<[?,12,?,?],f32> -> !torch.vtensor<[?,12,?,?],f32> loc(#loc24)
    %681 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> loc(#loc24)
    %682 = torch.aten.sum.dim_IntList %680, %681, %true, %none : !torch.vtensor<[?,12,?,?],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,12,?,1],f32> loc(#loc24)
    %683 = torch.aten.div.Tensor %680, %682 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[?,12,?,1],f32> -> !torch.vtensor<[?,12,?,?],f32> loc(#loc24)
    %684 = torch.aten.matmul %683, %667 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[?,12,?,64],f32> -> !torch.vtensor<[?,12,?,64],f32> loc(#loc25)
    %685 = torch.aten.permute %684, %264 : !torch.vtensor<[?,12,?,64],f32>, !torch.list<int> -> !torch.vtensor<[?,?,12,64],f32> loc(#loc26)
    %686 = torch.aten.contiguous %685, %int0 : !torch.vtensor<[?,?,12,64],f32>, !torch.int -> !torch.vtensor<[?,?,12,64],f32> loc(#loc26)
    %687 = torch.aten.size.int %686, %int0 : !torch.vtensor<[?,?,12,64],f32>, !torch.int -> !torch.int loc(#loc27)
    %688 = torch.prim.NumToTensor.Scalar %687 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %689 = torch.aten.size.int %686, %int1 : !torch.vtensor<[?,?,12,64],f32>, !torch.int -> !torch.int loc(#loc27)
    %690 = torch.prim.NumToTensor.Scalar %689 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %691 = torch.prim.ListConstruct %687, %689, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc35)
    %692 = torch.aten.view %686, %691 : !torch.vtensor<[?,?,12,64],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc28)
    %693 = torch.aten.transpose.int %124, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> loc(#loc18)
    %694 = torch.aten.matmul %692, %693 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %695 = torch.aten.add.Tensor %694, %125, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %696 = torch.aten.add.Tensor %695, %644, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc29)
    %697 = torch.aten.sum.dim_IntList %696, %232, %true, %none : !torch.vtensor<[?,?,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %698 = torch.aten.div.Scalar %697, %int768 : !torch.vtensor<[?,?,1],f32>, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %699 = torch.aten.size.int %696, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %700 = torch.aten.size.int %696, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %701 = torch.prim.ListConstruct %699, %700, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %702 = torch.aten.broadcast_to %698, %701 : !torch.vtensor<[?,?,1],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %703 = torch.aten.sub.Tensor %696, %702, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %704 = torch.aten.mul.Tensor %703, %703 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %705 = torch.aten.sum.dim_IntList %704, %232, %true, %none : !torch.vtensor<[?,?,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %706 = torch.aten.div.Scalar %705, %int768 : !torch.vtensor<[?,?,1],f32>, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %707 = torch.aten.add.Scalar %706, %float9.999990e-13, %int1 : !torch.vtensor<[?,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %708 = torch.aten.rsqrt %707 : !torch.vtensor<[?,?,1],f32> -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %709 = torch.aten.size.int %696, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %710 = torch.aten.size.int %696, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %711 = torch.prim.ListConstruct %709, %710, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %712 = torch.aten.broadcast_to %708, %711 : !torch.vtensor<[?,?,1],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %713 = torch.aten.mul.Tensor %703, %712 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %714 = torch.aten.mul.Tensor %713, %122 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %715 = torch.aten.add.Tensor %714, %123, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %716 = torch.aten.transpose.int %120, %int0, %int1 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32> loc(#loc18)
    %717 = torch.aten.matmul %715, %716 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,3072],f32> -> !torch.vtensor<[?,?,3072],f32> loc(#loc18)
    %718 = torch.aten.add.Tensor %717, %121, %float1.000000e00 : !torch.vtensor<[?,?,3072],f32>, !torch.vtensor<[3072],f32>, !torch.float -> !torch.vtensor<[?,?,3072],f32> loc(#loc18)
    %719 = torch.aten.gelu %718, %str : !torch.vtensor<[?,?,3072],f32>, !torch.str -> !torch.vtensor<[?,?,3072],f32> loc(#loc10)
    %720 = torch.aten.transpose.int %118, %int0, %int1 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32> loc(#loc18)
    %721 = torch.aten.matmul %719, %720 : !torch.vtensor<[?,?,3072],f32>, !torch.vtensor<[3072,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %722 = torch.aten.add.Tensor %721, %119, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %723 = torch.aten.add.Tensor %722, %715, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc30)
    %724 = torch.aten.sum.dim_IntList %723, %232, %true, %none : !torch.vtensor<[?,?,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %725 = torch.aten.div.Scalar %724, %int768 : !torch.vtensor<[?,?,1],f32>, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %726 = torch.aten.size.int %723, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %727 = torch.aten.size.int %723, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %728 = torch.prim.ListConstruct %726, %727, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %729 = torch.aten.broadcast_to %725, %728 : !torch.vtensor<[?,?,1],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %730 = torch.aten.sub.Tensor %723, %729, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %731 = torch.aten.mul.Tensor %730, %730 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %732 = torch.aten.sum.dim_IntList %731, %232, %true, %none : !torch.vtensor<[?,?,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %733 = torch.aten.div.Scalar %732, %int768 : !torch.vtensor<[?,?,1],f32>, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %734 = torch.aten.add.Scalar %733, %float9.999990e-13, %int1 : !torch.vtensor<[?,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %735 = torch.aten.rsqrt %734 : !torch.vtensor<[?,?,1],f32> -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %736 = torch.aten.size.int %723, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %737 = torch.aten.size.int %723, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %738 = torch.prim.ListConstruct %736, %737, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %739 = torch.aten.broadcast_to %735, %738 : !torch.vtensor<[?,?,1],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %740 = torch.aten.mul.Tensor %730, %739 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %741 = torch.aten.mul.Tensor %740, %116 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %742 = torch.aten.add.Tensor %741, %117, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %743 = torch.aten.transpose.int %114, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> loc(#loc18)
    %744 = torch.aten.matmul %742, %743 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %745 = torch.aten.add.Tensor %744, %115, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %746 = torch.aten.transpose.int %112, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> loc(#loc18)
    %747 = torch.aten.matmul %742, %746 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %748 = torch.aten.add.Tensor %747, %113, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %749 = torch.aten.size.int %748, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %750 = torch.prim.NumToTensor.Scalar %749 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %751 = torch.aten.size.int %748, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %752 = torch.prim.NumToTensor.Scalar %751 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %753 = torch.prim.ListConstruct %749, %751, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc34)
    %754 = torch.aten.view %748, %753 : !torch.vtensor<[?,?,768],f32>, !torch.list<int> -> !torch.vtensor<[?,?,12,64],f32> loc(#loc12)
    %755 = torch.aten.permute %754, %264 : !torch.vtensor<[?,?,12,64],f32>, !torch.list<int> -> !torch.vtensor<[?,12,?,64],f32> loc(#loc22)
    %756 = torch.aten.transpose.int %110, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> loc(#loc18)
    %757 = torch.aten.matmul %742, %756 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %758 = torch.aten.add.Tensor %757, %111, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %759 = torch.aten.size.int %758, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %760 = torch.prim.NumToTensor.Scalar %759 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %761 = torch.aten.size.int %758, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %762 = torch.prim.NumToTensor.Scalar %761 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %763 = torch.prim.ListConstruct %759, %761, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc34)
    %764 = torch.aten.view %758, %763 : !torch.vtensor<[?,?,768],f32>, !torch.list<int> -> !torch.vtensor<[?,?,12,64],f32> loc(#loc12)
    %765 = torch.aten.permute %764, %264 : !torch.vtensor<[?,?,12,64],f32>, !torch.list<int> -> !torch.vtensor<[?,12,?,64],f32> loc(#loc22)
    %766 = torch.aten.size.int %745, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %767 = torch.prim.NumToTensor.Scalar %766 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %768 = torch.aten.size.int %745, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %769 = torch.prim.NumToTensor.Scalar %768 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %770 = torch.prim.ListConstruct %766, %768, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc34)
    %771 = torch.aten.view %745, %770 : !torch.vtensor<[?,?,768],f32>, !torch.list<int> -> !torch.vtensor<[?,?,12,64],f32> loc(#loc12)
    %772 = torch.aten.permute %771, %264 : !torch.vtensor<[?,?,12,64],f32>, !torch.list<int> -> !torch.vtensor<[?,12,?,64],f32> loc(#loc22)
    %773 = torch.aten.transpose.int %755, %int-1, %int-2 : !torch.vtensor<[?,12,?,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[?,12,64,?],f32> loc(#loc11)
    %774 = torch.aten.matmul %772, %773 : !torch.vtensor<[?,12,?,64],f32>, !torch.vtensor<[?,12,64,?],f32> -> !torch.vtensor<[?,12,?,?],f32> loc(#loc11)
    %775 = torch.aten.div.Tensor %774, %190 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[?,12,?,?],f32> loc(#loc6)
    %776 = torch.aten.add.Tensor %775, %221, %int1 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[?,1,1,?],f32>, !torch.int -> !torch.vtensor<[?,12,?,?],f32> loc(#loc23)
    %values_8, %indices_9 = torch.aten.max.dim %776, %int-1, %true : !torch.vtensor<[?,12,?,?],f32>, !torch.int, !torch.bool -> !torch.vtensor<[?,12,?,1],f32>, !torch.vtensor<[?,12,?,1],si64> loc(#loc24)
    %777 = torch.aten.sub.Tensor %776, %values_8, %float1.000000e00 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[?,12,?,1],f32>, !torch.float -> !torch.vtensor<[?,12,?,?],f32> loc(#loc24)
    %778 = torch.aten.exp %777 : !torch.vtensor<[?,12,?,?],f32> -> !torch.vtensor<[?,12,?,?],f32> loc(#loc24)
    %779 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> loc(#loc24)
    %780 = torch.aten.sum.dim_IntList %778, %779, %true, %none : !torch.vtensor<[?,12,?,?],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,12,?,1],f32> loc(#loc24)
    %781 = torch.aten.div.Tensor %778, %780 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[?,12,?,1],f32> -> !torch.vtensor<[?,12,?,?],f32> loc(#loc24)
    %782 = torch.aten.matmul %781, %765 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[?,12,?,64],f32> -> !torch.vtensor<[?,12,?,64],f32> loc(#loc25)
    %783 = torch.aten.permute %782, %264 : !torch.vtensor<[?,12,?,64],f32>, !torch.list<int> -> !torch.vtensor<[?,?,12,64],f32> loc(#loc26)
    %784 = torch.aten.contiguous %783, %int0 : !torch.vtensor<[?,?,12,64],f32>, !torch.int -> !torch.vtensor<[?,?,12,64],f32> loc(#loc26)
    %785 = torch.aten.size.int %784, %int0 : !torch.vtensor<[?,?,12,64],f32>, !torch.int -> !torch.int loc(#loc27)
    %786 = torch.prim.NumToTensor.Scalar %785 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %787 = torch.aten.size.int %784, %int1 : !torch.vtensor<[?,?,12,64],f32>, !torch.int -> !torch.int loc(#loc27)
    %788 = torch.prim.NumToTensor.Scalar %787 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %789 = torch.prim.ListConstruct %785, %787, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc35)
    %790 = torch.aten.view %784, %789 : !torch.vtensor<[?,?,12,64],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc28)
    %791 = torch.aten.transpose.int %108, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> loc(#loc18)
    %792 = torch.aten.matmul %790, %791 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %793 = torch.aten.add.Tensor %792, %109, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %794 = torch.aten.add.Tensor %793, %742, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc29)
    %795 = torch.aten.sum.dim_IntList %794, %232, %true, %none : !torch.vtensor<[?,?,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %796 = torch.aten.div.Scalar %795, %int768 : !torch.vtensor<[?,?,1],f32>, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %797 = torch.aten.size.int %794, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %798 = torch.aten.size.int %794, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %799 = torch.prim.ListConstruct %797, %798, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %800 = torch.aten.broadcast_to %796, %799 : !torch.vtensor<[?,?,1],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %801 = torch.aten.sub.Tensor %794, %800, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %802 = torch.aten.mul.Tensor %801, %801 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %803 = torch.aten.sum.dim_IntList %802, %232, %true, %none : !torch.vtensor<[?,?,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %804 = torch.aten.div.Scalar %803, %int768 : !torch.vtensor<[?,?,1],f32>, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %805 = torch.aten.add.Scalar %804, %float9.999990e-13, %int1 : !torch.vtensor<[?,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %806 = torch.aten.rsqrt %805 : !torch.vtensor<[?,?,1],f32> -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %807 = torch.aten.size.int %794, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %808 = torch.aten.size.int %794, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %809 = torch.prim.ListConstruct %807, %808, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %810 = torch.aten.broadcast_to %806, %809 : !torch.vtensor<[?,?,1],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %811 = torch.aten.mul.Tensor %801, %810 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %812 = torch.aten.mul.Tensor %811, %106 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %813 = torch.aten.add.Tensor %812, %107, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %814 = torch.aten.transpose.int %104, %int0, %int1 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32> loc(#loc18)
    %815 = torch.aten.matmul %813, %814 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,3072],f32> -> !torch.vtensor<[?,?,3072],f32> loc(#loc18)
    %816 = torch.aten.add.Tensor %815, %105, %float1.000000e00 : !torch.vtensor<[?,?,3072],f32>, !torch.vtensor<[3072],f32>, !torch.float -> !torch.vtensor<[?,?,3072],f32> loc(#loc18)
    %817 = torch.aten.gelu %816, %str : !torch.vtensor<[?,?,3072],f32>, !torch.str -> !torch.vtensor<[?,?,3072],f32> loc(#loc10)
    %818 = torch.aten.transpose.int %102, %int0, %int1 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32> loc(#loc18)
    %819 = torch.aten.matmul %817, %818 : !torch.vtensor<[?,?,3072],f32>, !torch.vtensor<[3072,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %820 = torch.aten.add.Tensor %819, %103, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %821 = torch.aten.add.Tensor %820, %813, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc30)
    %822 = torch.aten.sum.dim_IntList %821, %232, %true, %none : !torch.vtensor<[?,?,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %823 = torch.aten.div.Scalar %822, %int768 : !torch.vtensor<[?,?,1],f32>, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %824 = torch.aten.size.int %821, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %825 = torch.aten.size.int %821, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %826 = torch.prim.ListConstruct %824, %825, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %827 = torch.aten.broadcast_to %823, %826 : !torch.vtensor<[?,?,1],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %828 = torch.aten.sub.Tensor %821, %827, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %829 = torch.aten.mul.Tensor %828, %828 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %830 = torch.aten.sum.dim_IntList %829, %232, %true, %none : !torch.vtensor<[?,?,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %831 = torch.aten.div.Scalar %830, %int768 : !torch.vtensor<[?,?,1],f32>, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %832 = torch.aten.add.Scalar %831, %float9.999990e-13, %int1 : !torch.vtensor<[?,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %833 = torch.aten.rsqrt %832 : !torch.vtensor<[?,?,1],f32> -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %834 = torch.aten.size.int %821, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %835 = torch.aten.size.int %821, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %836 = torch.prim.ListConstruct %834, %835, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %837 = torch.aten.broadcast_to %833, %836 : !torch.vtensor<[?,?,1],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %838 = torch.aten.mul.Tensor %828, %837 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %839 = torch.aten.mul.Tensor %838, %100 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %840 = torch.aten.add.Tensor %839, %101, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %841 = torch.aten.transpose.int %98, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> loc(#loc18)
    %842 = torch.aten.matmul %840, %841 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %843 = torch.aten.add.Tensor %842, %99, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %844 = torch.aten.transpose.int %96, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> loc(#loc18)
    %845 = torch.aten.matmul %840, %844 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %846 = torch.aten.add.Tensor %845, %97, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %847 = torch.aten.size.int %846, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %848 = torch.prim.NumToTensor.Scalar %847 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %849 = torch.aten.size.int %846, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %850 = torch.prim.NumToTensor.Scalar %849 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %851 = torch.prim.ListConstruct %847, %849, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc34)
    %852 = torch.aten.view %846, %851 : !torch.vtensor<[?,?,768],f32>, !torch.list<int> -> !torch.vtensor<[?,?,12,64],f32> loc(#loc12)
    %853 = torch.aten.permute %852, %264 : !torch.vtensor<[?,?,12,64],f32>, !torch.list<int> -> !torch.vtensor<[?,12,?,64],f32> loc(#loc22)
    %854 = torch.aten.transpose.int %94, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> loc(#loc18)
    %855 = torch.aten.matmul %840, %854 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %856 = torch.aten.add.Tensor %855, %95, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %857 = torch.aten.size.int %856, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %858 = torch.prim.NumToTensor.Scalar %857 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %859 = torch.aten.size.int %856, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %860 = torch.prim.NumToTensor.Scalar %859 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %861 = torch.prim.ListConstruct %857, %859, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc34)
    %862 = torch.aten.view %856, %861 : !torch.vtensor<[?,?,768],f32>, !torch.list<int> -> !torch.vtensor<[?,?,12,64],f32> loc(#loc12)
    %863 = torch.aten.permute %862, %264 : !torch.vtensor<[?,?,12,64],f32>, !torch.list<int> -> !torch.vtensor<[?,12,?,64],f32> loc(#loc22)
    %864 = torch.aten.size.int %843, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %865 = torch.prim.NumToTensor.Scalar %864 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %866 = torch.aten.size.int %843, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %867 = torch.prim.NumToTensor.Scalar %866 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %868 = torch.prim.ListConstruct %864, %866, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc34)
    %869 = torch.aten.view %843, %868 : !torch.vtensor<[?,?,768],f32>, !torch.list<int> -> !torch.vtensor<[?,?,12,64],f32> loc(#loc12)
    %870 = torch.aten.permute %869, %264 : !torch.vtensor<[?,?,12,64],f32>, !torch.list<int> -> !torch.vtensor<[?,12,?,64],f32> loc(#loc22)
    %871 = torch.aten.transpose.int %853, %int-1, %int-2 : !torch.vtensor<[?,12,?,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[?,12,64,?],f32> loc(#loc11)
    %872 = torch.aten.matmul %870, %871 : !torch.vtensor<[?,12,?,64],f32>, !torch.vtensor<[?,12,64,?],f32> -> !torch.vtensor<[?,12,?,?],f32> loc(#loc11)
    %873 = torch.aten.div.Tensor %872, %190 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[?,12,?,?],f32> loc(#loc6)
    %874 = torch.aten.add.Tensor %873, %221, %int1 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[?,1,1,?],f32>, !torch.int -> !torch.vtensor<[?,12,?,?],f32> loc(#loc23)
    %values_10, %indices_11 = torch.aten.max.dim %874, %int-1, %true : !torch.vtensor<[?,12,?,?],f32>, !torch.int, !torch.bool -> !torch.vtensor<[?,12,?,1],f32>, !torch.vtensor<[?,12,?,1],si64> loc(#loc24)
    %875 = torch.aten.sub.Tensor %874, %values_10, %float1.000000e00 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[?,12,?,1],f32>, !torch.float -> !torch.vtensor<[?,12,?,?],f32> loc(#loc24)
    %876 = torch.aten.exp %875 : !torch.vtensor<[?,12,?,?],f32> -> !torch.vtensor<[?,12,?,?],f32> loc(#loc24)
    %877 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> loc(#loc24)
    %878 = torch.aten.sum.dim_IntList %876, %877, %true, %none : !torch.vtensor<[?,12,?,?],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,12,?,1],f32> loc(#loc24)
    %879 = torch.aten.div.Tensor %876, %878 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[?,12,?,1],f32> -> !torch.vtensor<[?,12,?,?],f32> loc(#loc24)
    %880 = torch.aten.matmul %879, %863 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[?,12,?,64],f32> -> !torch.vtensor<[?,12,?,64],f32> loc(#loc25)
    %881 = torch.aten.permute %880, %264 : !torch.vtensor<[?,12,?,64],f32>, !torch.list<int> -> !torch.vtensor<[?,?,12,64],f32> loc(#loc26)
    %882 = torch.aten.contiguous %881, %int0 : !torch.vtensor<[?,?,12,64],f32>, !torch.int -> !torch.vtensor<[?,?,12,64],f32> loc(#loc26)
    %883 = torch.aten.size.int %882, %int0 : !torch.vtensor<[?,?,12,64],f32>, !torch.int -> !torch.int loc(#loc27)
    %884 = torch.prim.NumToTensor.Scalar %883 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %885 = torch.aten.size.int %882, %int1 : !torch.vtensor<[?,?,12,64],f32>, !torch.int -> !torch.int loc(#loc27)
    %886 = torch.prim.NumToTensor.Scalar %885 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %887 = torch.prim.ListConstruct %883, %885, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc35)
    %888 = torch.aten.view %882, %887 : !torch.vtensor<[?,?,12,64],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc28)
    %889 = torch.aten.transpose.int %92, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> loc(#loc18)
    %890 = torch.aten.matmul %888, %889 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %891 = torch.aten.add.Tensor %890, %93, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %892 = torch.aten.add.Tensor %891, %840, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc29)
    %893 = torch.aten.sum.dim_IntList %892, %232, %true, %none : !torch.vtensor<[?,?,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %894 = torch.aten.div.Scalar %893, %int768 : !torch.vtensor<[?,?,1],f32>, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %895 = torch.aten.size.int %892, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %896 = torch.aten.size.int %892, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %897 = torch.prim.ListConstruct %895, %896, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %898 = torch.aten.broadcast_to %894, %897 : !torch.vtensor<[?,?,1],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %899 = torch.aten.sub.Tensor %892, %898, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %900 = torch.aten.mul.Tensor %899, %899 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %901 = torch.aten.sum.dim_IntList %900, %232, %true, %none : !torch.vtensor<[?,?,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %902 = torch.aten.div.Scalar %901, %int768 : !torch.vtensor<[?,?,1],f32>, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %903 = torch.aten.add.Scalar %902, %float9.999990e-13, %int1 : !torch.vtensor<[?,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %904 = torch.aten.rsqrt %903 : !torch.vtensor<[?,?,1],f32> -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %905 = torch.aten.size.int %892, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %906 = torch.aten.size.int %892, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %907 = torch.prim.ListConstruct %905, %906, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %908 = torch.aten.broadcast_to %904, %907 : !torch.vtensor<[?,?,1],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %909 = torch.aten.mul.Tensor %899, %908 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %910 = torch.aten.mul.Tensor %909, %90 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %911 = torch.aten.add.Tensor %910, %91, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %912 = torch.aten.transpose.int %88, %int0, %int1 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32> loc(#loc18)
    %913 = torch.aten.matmul %911, %912 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,3072],f32> -> !torch.vtensor<[?,?,3072],f32> loc(#loc18)
    %914 = torch.aten.add.Tensor %913, %89, %float1.000000e00 : !torch.vtensor<[?,?,3072],f32>, !torch.vtensor<[3072],f32>, !torch.float -> !torch.vtensor<[?,?,3072],f32> loc(#loc18)
    %915 = torch.aten.gelu %914, %str : !torch.vtensor<[?,?,3072],f32>, !torch.str -> !torch.vtensor<[?,?,3072],f32> loc(#loc10)
    %916 = torch.aten.transpose.int %86, %int0, %int1 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32> loc(#loc18)
    %917 = torch.aten.matmul %915, %916 : !torch.vtensor<[?,?,3072],f32>, !torch.vtensor<[3072,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %918 = torch.aten.add.Tensor %917, %87, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %919 = torch.aten.add.Tensor %918, %911, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc30)
    %920 = torch.aten.sum.dim_IntList %919, %232, %true, %none : !torch.vtensor<[?,?,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %921 = torch.aten.div.Scalar %920, %int768 : !torch.vtensor<[?,?,1],f32>, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %922 = torch.aten.size.int %919, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %923 = torch.aten.size.int %919, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %924 = torch.prim.ListConstruct %922, %923, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %925 = torch.aten.broadcast_to %921, %924 : !torch.vtensor<[?,?,1],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %926 = torch.aten.sub.Tensor %919, %925, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %927 = torch.aten.mul.Tensor %926, %926 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %928 = torch.aten.sum.dim_IntList %927, %232, %true, %none : !torch.vtensor<[?,?,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %929 = torch.aten.div.Scalar %928, %int768 : !torch.vtensor<[?,?,1],f32>, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %930 = torch.aten.add.Scalar %929, %float9.999990e-13, %int1 : !torch.vtensor<[?,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %931 = torch.aten.rsqrt %930 : !torch.vtensor<[?,?,1],f32> -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %932 = torch.aten.size.int %919, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %933 = torch.aten.size.int %919, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %934 = torch.prim.ListConstruct %932, %933, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %935 = torch.aten.broadcast_to %931, %934 : !torch.vtensor<[?,?,1],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %936 = torch.aten.mul.Tensor %926, %935 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %937 = torch.aten.mul.Tensor %936, %84 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %938 = torch.aten.add.Tensor %937, %85, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %939 = torch.aten.transpose.int %82, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> loc(#loc18)
    %940 = torch.aten.matmul %938, %939 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %941 = torch.aten.add.Tensor %940, %83, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %942 = torch.aten.transpose.int %80, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> loc(#loc18)
    %943 = torch.aten.matmul %938, %942 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %944 = torch.aten.add.Tensor %943, %81, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %945 = torch.aten.size.int %944, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %946 = torch.prim.NumToTensor.Scalar %945 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %947 = torch.aten.size.int %944, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %948 = torch.prim.NumToTensor.Scalar %947 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %949 = torch.prim.ListConstruct %945, %947, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc34)
    %950 = torch.aten.view %944, %949 : !torch.vtensor<[?,?,768],f32>, !torch.list<int> -> !torch.vtensor<[?,?,12,64],f32> loc(#loc12)
    %951 = torch.aten.permute %950, %264 : !torch.vtensor<[?,?,12,64],f32>, !torch.list<int> -> !torch.vtensor<[?,12,?,64],f32> loc(#loc22)
    %952 = torch.aten.transpose.int %78, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> loc(#loc18)
    %953 = torch.aten.matmul %938, %952 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %954 = torch.aten.add.Tensor %953, %79, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %955 = torch.aten.size.int %954, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %956 = torch.prim.NumToTensor.Scalar %955 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %957 = torch.aten.size.int %954, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %958 = torch.prim.NumToTensor.Scalar %957 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %959 = torch.prim.ListConstruct %955, %957, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc34)
    %960 = torch.aten.view %954, %959 : !torch.vtensor<[?,?,768],f32>, !torch.list<int> -> !torch.vtensor<[?,?,12,64],f32> loc(#loc12)
    %961 = torch.aten.permute %960, %264 : !torch.vtensor<[?,?,12,64],f32>, !torch.list<int> -> !torch.vtensor<[?,12,?,64],f32> loc(#loc22)
    %962 = torch.aten.size.int %941, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %963 = torch.prim.NumToTensor.Scalar %962 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %964 = torch.aten.size.int %941, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %965 = torch.prim.NumToTensor.Scalar %964 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %966 = torch.prim.ListConstruct %962, %964, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc34)
    %967 = torch.aten.view %941, %966 : !torch.vtensor<[?,?,768],f32>, !torch.list<int> -> !torch.vtensor<[?,?,12,64],f32> loc(#loc12)
    %968 = torch.aten.permute %967, %264 : !torch.vtensor<[?,?,12,64],f32>, !torch.list<int> -> !torch.vtensor<[?,12,?,64],f32> loc(#loc22)
    %969 = torch.aten.transpose.int %951, %int-1, %int-2 : !torch.vtensor<[?,12,?,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[?,12,64,?],f32> loc(#loc11)
    %970 = torch.aten.matmul %968, %969 : !torch.vtensor<[?,12,?,64],f32>, !torch.vtensor<[?,12,64,?],f32> -> !torch.vtensor<[?,12,?,?],f32> loc(#loc11)
    %971 = torch.aten.div.Tensor %970, %190 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[?,12,?,?],f32> loc(#loc6)
    %972 = torch.aten.add.Tensor %971, %221, %int1 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[?,1,1,?],f32>, !torch.int -> !torch.vtensor<[?,12,?,?],f32> loc(#loc23)
    %values_12, %indices_13 = torch.aten.max.dim %972, %int-1, %true : !torch.vtensor<[?,12,?,?],f32>, !torch.int, !torch.bool -> !torch.vtensor<[?,12,?,1],f32>, !torch.vtensor<[?,12,?,1],si64> loc(#loc24)
    %973 = torch.aten.sub.Tensor %972, %values_12, %float1.000000e00 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[?,12,?,1],f32>, !torch.float -> !torch.vtensor<[?,12,?,?],f32> loc(#loc24)
    %974 = torch.aten.exp %973 : !torch.vtensor<[?,12,?,?],f32> -> !torch.vtensor<[?,12,?,?],f32> loc(#loc24)
    %975 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> loc(#loc24)
    %976 = torch.aten.sum.dim_IntList %974, %975, %true, %none : !torch.vtensor<[?,12,?,?],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,12,?,1],f32> loc(#loc24)
    %977 = torch.aten.div.Tensor %974, %976 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[?,12,?,1],f32> -> !torch.vtensor<[?,12,?,?],f32> loc(#loc24)
    %978 = torch.aten.matmul %977, %961 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[?,12,?,64],f32> -> !torch.vtensor<[?,12,?,64],f32> loc(#loc25)
    %979 = torch.aten.permute %978, %264 : !torch.vtensor<[?,12,?,64],f32>, !torch.list<int> -> !torch.vtensor<[?,?,12,64],f32> loc(#loc26)
    %980 = torch.aten.contiguous %979, %int0 : !torch.vtensor<[?,?,12,64],f32>, !torch.int -> !torch.vtensor<[?,?,12,64],f32> loc(#loc26)
    %981 = torch.aten.size.int %980, %int0 : !torch.vtensor<[?,?,12,64],f32>, !torch.int -> !torch.int loc(#loc27)
    %982 = torch.prim.NumToTensor.Scalar %981 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %983 = torch.aten.size.int %980, %int1 : !torch.vtensor<[?,?,12,64],f32>, !torch.int -> !torch.int loc(#loc27)
    %984 = torch.prim.NumToTensor.Scalar %983 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %985 = torch.prim.ListConstruct %981, %983, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc35)
    %986 = torch.aten.view %980, %985 : !torch.vtensor<[?,?,12,64],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc28)
    %987 = torch.aten.transpose.int %76, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> loc(#loc18)
    %988 = torch.aten.matmul %986, %987 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %989 = torch.aten.add.Tensor %988, %77, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %990 = torch.aten.add.Tensor %989, %938, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc29)
    %991 = torch.aten.sum.dim_IntList %990, %232, %true, %none : !torch.vtensor<[?,?,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %992 = torch.aten.div.Scalar %991, %int768 : !torch.vtensor<[?,?,1],f32>, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %993 = torch.aten.size.int %990, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %994 = torch.aten.size.int %990, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %995 = torch.prim.ListConstruct %993, %994, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %996 = torch.aten.broadcast_to %992, %995 : !torch.vtensor<[?,?,1],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %997 = torch.aten.sub.Tensor %990, %996, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %998 = torch.aten.mul.Tensor %997, %997 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %999 = torch.aten.sum.dim_IntList %998, %232, %true, %none : !torch.vtensor<[?,?,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1000 = torch.aten.div.Scalar %999, %int768 : !torch.vtensor<[?,?,1],f32>, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1001 = torch.aten.add.Scalar %1000, %float9.999990e-13, %int1 : !torch.vtensor<[?,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1002 = torch.aten.rsqrt %1001 : !torch.vtensor<[?,?,1],f32> -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1003 = torch.aten.size.int %990, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %1004 = torch.aten.size.int %990, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %1005 = torch.prim.ListConstruct %1003, %1004, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %1006 = torch.aten.broadcast_to %1002, %1005 : !torch.vtensor<[?,?,1],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1007 = torch.aten.mul.Tensor %997, %1006 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1008 = torch.aten.mul.Tensor %1007, %74 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1009 = torch.aten.add.Tensor %1008, %75, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1010 = torch.aten.transpose.int %72, %int0, %int1 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32> loc(#loc18)
    %1011 = torch.aten.matmul %1009, %1010 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,3072],f32> -> !torch.vtensor<[?,?,3072],f32> loc(#loc18)
    %1012 = torch.aten.add.Tensor %1011, %73, %float1.000000e00 : !torch.vtensor<[?,?,3072],f32>, !torch.vtensor<[3072],f32>, !torch.float -> !torch.vtensor<[?,?,3072],f32> loc(#loc18)
    %1013 = torch.aten.gelu %1012, %str : !torch.vtensor<[?,?,3072],f32>, !torch.str -> !torch.vtensor<[?,?,3072],f32> loc(#loc10)
    %1014 = torch.aten.transpose.int %70, %int0, %int1 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32> loc(#loc18)
    %1015 = torch.aten.matmul %1013, %1014 : !torch.vtensor<[?,?,3072],f32>, !torch.vtensor<[3072,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %1016 = torch.aten.add.Tensor %1015, %71, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %1017 = torch.aten.add.Tensor %1016, %1009, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc30)
    %1018 = torch.aten.sum.dim_IntList %1017, %232, %true, %none : !torch.vtensor<[?,?,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1019 = torch.aten.div.Scalar %1018, %int768 : !torch.vtensor<[?,?,1],f32>, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1020 = torch.aten.size.int %1017, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %1021 = torch.aten.size.int %1017, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %1022 = torch.prim.ListConstruct %1020, %1021, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %1023 = torch.aten.broadcast_to %1019, %1022 : !torch.vtensor<[?,?,1],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1024 = torch.aten.sub.Tensor %1017, %1023, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1025 = torch.aten.mul.Tensor %1024, %1024 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1026 = torch.aten.sum.dim_IntList %1025, %232, %true, %none : !torch.vtensor<[?,?,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1027 = torch.aten.div.Scalar %1026, %int768 : !torch.vtensor<[?,?,1],f32>, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1028 = torch.aten.add.Scalar %1027, %float9.999990e-13, %int1 : !torch.vtensor<[?,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1029 = torch.aten.rsqrt %1028 : !torch.vtensor<[?,?,1],f32> -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1030 = torch.aten.size.int %1017, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %1031 = torch.aten.size.int %1017, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %1032 = torch.prim.ListConstruct %1030, %1031, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %1033 = torch.aten.broadcast_to %1029, %1032 : !torch.vtensor<[?,?,1],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1034 = torch.aten.mul.Tensor %1024, %1033 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1035 = torch.aten.mul.Tensor %1034, %68 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1036 = torch.aten.add.Tensor %1035, %69, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1037 = torch.aten.transpose.int %66, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> loc(#loc18)
    %1038 = torch.aten.matmul %1036, %1037 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %1039 = torch.aten.add.Tensor %1038, %67, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %1040 = torch.aten.transpose.int %64, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> loc(#loc18)
    %1041 = torch.aten.matmul %1036, %1040 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %1042 = torch.aten.add.Tensor %1041, %65, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %1043 = torch.aten.size.int %1042, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %1044 = torch.prim.NumToTensor.Scalar %1043 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %1045 = torch.aten.size.int %1042, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %1046 = torch.prim.NumToTensor.Scalar %1045 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %1047 = torch.prim.ListConstruct %1043, %1045, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc34)
    %1048 = torch.aten.view %1042, %1047 : !torch.vtensor<[?,?,768],f32>, !torch.list<int> -> !torch.vtensor<[?,?,12,64],f32> loc(#loc12)
    %1049 = torch.aten.permute %1048, %264 : !torch.vtensor<[?,?,12,64],f32>, !torch.list<int> -> !torch.vtensor<[?,12,?,64],f32> loc(#loc22)
    %1050 = torch.aten.transpose.int %62, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> loc(#loc18)
    %1051 = torch.aten.matmul %1036, %1050 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %1052 = torch.aten.add.Tensor %1051, %63, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %1053 = torch.aten.size.int %1052, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %1054 = torch.prim.NumToTensor.Scalar %1053 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %1055 = torch.aten.size.int %1052, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %1056 = torch.prim.NumToTensor.Scalar %1055 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %1057 = torch.prim.ListConstruct %1053, %1055, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc34)
    %1058 = torch.aten.view %1052, %1057 : !torch.vtensor<[?,?,768],f32>, !torch.list<int> -> !torch.vtensor<[?,?,12,64],f32> loc(#loc12)
    %1059 = torch.aten.permute %1058, %264 : !torch.vtensor<[?,?,12,64],f32>, !torch.list<int> -> !torch.vtensor<[?,12,?,64],f32> loc(#loc22)
    %1060 = torch.aten.size.int %1039, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %1061 = torch.prim.NumToTensor.Scalar %1060 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %1062 = torch.aten.size.int %1039, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %1063 = torch.prim.NumToTensor.Scalar %1062 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %1064 = torch.prim.ListConstruct %1060, %1062, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc34)
    %1065 = torch.aten.view %1039, %1064 : !torch.vtensor<[?,?,768],f32>, !torch.list<int> -> !torch.vtensor<[?,?,12,64],f32> loc(#loc12)
    %1066 = torch.aten.permute %1065, %264 : !torch.vtensor<[?,?,12,64],f32>, !torch.list<int> -> !torch.vtensor<[?,12,?,64],f32> loc(#loc22)
    %1067 = torch.aten.transpose.int %1049, %int-1, %int-2 : !torch.vtensor<[?,12,?,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[?,12,64,?],f32> loc(#loc11)
    %1068 = torch.aten.matmul %1066, %1067 : !torch.vtensor<[?,12,?,64],f32>, !torch.vtensor<[?,12,64,?],f32> -> !torch.vtensor<[?,12,?,?],f32> loc(#loc11)
    %1069 = torch.aten.div.Tensor %1068, %190 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[?,12,?,?],f32> loc(#loc6)
    %1070 = torch.aten.add.Tensor %1069, %221, %int1 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[?,1,1,?],f32>, !torch.int -> !torch.vtensor<[?,12,?,?],f32> loc(#loc23)
    %values_14, %indices_15 = torch.aten.max.dim %1070, %int-1, %true : !torch.vtensor<[?,12,?,?],f32>, !torch.int, !torch.bool -> !torch.vtensor<[?,12,?,1],f32>, !torch.vtensor<[?,12,?,1],si64> loc(#loc24)
    %1071 = torch.aten.sub.Tensor %1070, %values_14, %float1.000000e00 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[?,12,?,1],f32>, !torch.float -> !torch.vtensor<[?,12,?,?],f32> loc(#loc24)
    %1072 = torch.aten.exp %1071 : !torch.vtensor<[?,12,?,?],f32> -> !torch.vtensor<[?,12,?,?],f32> loc(#loc24)
    %1073 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> loc(#loc24)
    %1074 = torch.aten.sum.dim_IntList %1072, %1073, %true, %none : !torch.vtensor<[?,12,?,?],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,12,?,1],f32> loc(#loc24)
    %1075 = torch.aten.div.Tensor %1072, %1074 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[?,12,?,1],f32> -> !torch.vtensor<[?,12,?,?],f32> loc(#loc24)
    %1076 = torch.aten.matmul %1075, %1059 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[?,12,?,64],f32> -> !torch.vtensor<[?,12,?,64],f32> loc(#loc25)
    %1077 = torch.aten.permute %1076, %264 : !torch.vtensor<[?,12,?,64],f32>, !torch.list<int> -> !torch.vtensor<[?,?,12,64],f32> loc(#loc26)
    %1078 = torch.aten.contiguous %1077, %int0 : !torch.vtensor<[?,?,12,64],f32>, !torch.int -> !torch.vtensor<[?,?,12,64],f32> loc(#loc26)
    %1079 = torch.aten.size.int %1078, %int0 : !torch.vtensor<[?,?,12,64],f32>, !torch.int -> !torch.int loc(#loc27)
    %1080 = torch.prim.NumToTensor.Scalar %1079 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %1081 = torch.aten.size.int %1078, %int1 : !torch.vtensor<[?,?,12,64],f32>, !torch.int -> !torch.int loc(#loc27)
    %1082 = torch.prim.NumToTensor.Scalar %1081 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %1083 = torch.prim.ListConstruct %1079, %1081, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc35)
    %1084 = torch.aten.view %1078, %1083 : !torch.vtensor<[?,?,12,64],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc28)
    %1085 = torch.aten.transpose.int %60, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> loc(#loc18)
    %1086 = torch.aten.matmul %1084, %1085 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %1087 = torch.aten.add.Tensor %1086, %61, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %1088 = torch.aten.add.Tensor %1087, %1036, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc29)
    %1089 = torch.aten.sum.dim_IntList %1088, %232, %true, %none : !torch.vtensor<[?,?,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1090 = torch.aten.div.Scalar %1089, %int768 : !torch.vtensor<[?,?,1],f32>, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1091 = torch.aten.size.int %1088, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %1092 = torch.aten.size.int %1088, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %1093 = torch.prim.ListConstruct %1091, %1092, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %1094 = torch.aten.broadcast_to %1090, %1093 : !torch.vtensor<[?,?,1],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1095 = torch.aten.sub.Tensor %1088, %1094, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1096 = torch.aten.mul.Tensor %1095, %1095 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1097 = torch.aten.sum.dim_IntList %1096, %232, %true, %none : !torch.vtensor<[?,?,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1098 = torch.aten.div.Scalar %1097, %int768 : !torch.vtensor<[?,?,1],f32>, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1099 = torch.aten.add.Scalar %1098, %float9.999990e-13, %int1 : !torch.vtensor<[?,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1100 = torch.aten.rsqrt %1099 : !torch.vtensor<[?,?,1],f32> -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1101 = torch.aten.size.int %1088, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %1102 = torch.aten.size.int %1088, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %1103 = torch.prim.ListConstruct %1101, %1102, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %1104 = torch.aten.broadcast_to %1100, %1103 : !torch.vtensor<[?,?,1],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1105 = torch.aten.mul.Tensor %1095, %1104 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1106 = torch.aten.mul.Tensor %1105, %58 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1107 = torch.aten.add.Tensor %1106, %59, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1108 = torch.aten.transpose.int %56, %int0, %int1 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32> loc(#loc18)
    %1109 = torch.aten.matmul %1107, %1108 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,3072],f32> -> !torch.vtensor<[?,?,3072],f32> loc(#loc18)
    %1110 = torch.aten.add.Tensor %1109, %57, %float1.000000e00 : !torch.vtensor<[?,?,3072],f32>, !torch.vtensor<[3072],f32>, !torch.float -> !torch.vtensor<[?,?,3072],f32> loc(#loc18)
    %1111 = torch.aten.gelu %1110, %str : !torch.vtensor<[?,?,3072],f32>, !torch.str -> !torch.vtensor<[?,?,3072],f32> loc(#loc10)
    %1112 = torch.aten.transpose.int %54, %int0, %int1 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32> loc(#loc18)
    %1113 = torch.aten.matmul %1111, %1112 : !torch.vtensor<[?,?,3072],f32>, !torch.vtensor<[3072,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %1114 = torch.aten.add.Tensor %1113, %55, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %1115 = torch.aten.add.Tensor %1114, %1107, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc30)
    %1116 = torch.aten.sum.dim_IntList %1115, %232, %true, %none : !torch.vtensor<[?,?,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1117 = torch.aten.div.Scalar %1116, %int768 : !torch.vtensor<[?,?,1],f32>, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1118 = torch.aten.size.int %1115, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %1119 = torch.aten.size.int %1115, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %1120 = torch.prim.ListConstruct %1118, %1119, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %1121 = torch.aten.broadcast_to %1117, %1120 : !torch.vtensor<[?,?,1],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1122 = torch.aten.sub.Tensor %1115, %1121, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1123 = torch.aten.mul.Tensor %1122, %1122 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1124 = torch.aten.sum.dim_IntList %1123, %232, %true, %none : !torch.vtensor<[?,?,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1125 = torch.aten.div.Scalar %1124, %int768 : !torch.vtensor<[?,?,1],f32>, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1126 = torch.aten.add.Scalar %1125, %float9.999990e-13, %int1 : !torch.vtensor<[?,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1127 = torch.aten.rsqrt %1126 : !torch.vtensor<[?,?,1],f32> -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1128 = torch.aten.size.int %1115, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %1129 = torch.aten.size.int %1115, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %1130 = torch.prim.ListConstruct %1128, %1129, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %1131 = torch.aten.broadcast_to %1127, %1130 : !torch.vtensor<[?,?,1],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1132 = torch.aten.mul.Tensor %1122, %1131 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1133 = torch.aten.mul.Tensor %1132, %52 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1134 = torch.aten.add.Tensor %1133, %53, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1135 = torch.aten.transpose.int %50, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> loc(#loc18)
    %1136 = torch.aten.matmul %1134, %1135 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %1137 = torch.aten.add.Tensor %1136, %51, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %1138 = torch.aten.transpose.int %48, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> loc(#loc18)
    %1139 = torch.aten.matmul %1134, %1138 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %1140 = torch.aten.add.Tensor %1139, %49, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %1141 = torch.aten.size.int %1140, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %1142 = torch.prim.NumToTensor.Scalar %1141 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %1143 = torch.aten.size.int %1140, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %1144 = torch.prim.NumToTensor.Scalar %1143 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %1145 = torch.prim.ListConstruct %1141, %1143, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc34)
    %1146 = torch.aten.view %1140, %1145 : !torch.vtensor<[?,?,768],f32>, !torch.list<int> -> !torch.vtensor<[?,?,12,64],f32> loc(#loc12)
    %1147 = torch.aten.permute %1146, %264 : !torch.vtensor<[?,?,12,64],f32>, !torch.list<int> -> !torch.vtensor<[?,12,?,64],f32> loc(#loc22)
    %1148 = torch.aten.transpose.int %46, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> loc(#loc18)
    %1149 = torch.aten.matmul %1134, %1148 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %1150 = torch.aten.add.Tensor %1149, %47, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %1151 = torch.aten.size.int %1150, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %1152 = torch.prim.NumToTensor.Scalar %1151 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %1153 = torch.aten.size.int %1150, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %1154 = torch.prim.NumToTensor.Scalar %1153 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %1155 = torch.prim.ListConstruct %1151, %1153, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc34)
    %1156 = torch.aten.view %1150, %1155 : !torch.vtensor<[?,?,768],f32>, !torch.list<int> -> !torch.vtensor<[?,?,12,64],f32> loc(#loc12)
    %1157 = torch.aten.permute %1156, %264 : !torch.vtensor<[?,?,12,64],f32>, !torch.list<int> -> !torch.vtensor<[?,12,?,64],f32> loc(#loc22)
    %1158 = torch.aten.size.int %1137, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %1159 = torch.prim.NumToTensor.Scalar %1158 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %1160 = torch.aten.size.int %1137, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %1161 = torch.prim.NumToTensor.Scalar %1160 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %1162 = torch.prim.ListConstruct %1158, %1160, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc34)
    %1163 = torch.aten.view %1137, %1162 : !torch.vtensor<[?,?,768],f32>, !torch.list<int> -> !torch.vtensor<[?,?,12,64],f32> loc(#loc12)
    %1164 = torch.aten.permute %1163, %264 : !torch.vtensor<[?,?,12,64],f32>, !torch.list<int> -> !torch.vtensor<[?,12,?,64],f32> loc(#loc22)
    %1165 = torch.aten.transpose.int %1147, %int-1, %int-2 : !torch.vtensor<[?,12,?,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[?,12,64,?],f32> loc(#loc11)
    %1166 = torch.aten.matmul %1164, %1165 : !torch.vtensor<[?,12,?,64],f32>, !torch.vtensor<[?,12,64,?],f32> -> !torch.vtensor<[?,12,?,?],f32> loc(#loc11)
    %1167 = torch.aten.div.Tensor %1166, %190 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[?,12,?,?],f32> loc(#loc6)
    %1168 = torch.aten.add.Tensor %1167, %221, %int1 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[?,1,1,?],f32>, !torch.int -> !torch.vtensor<[?,12,?,?],f32> loc(#loc23)
    %values_16, %indices_17 = torch.aten.max.dim %1168, %int-1, %true : !torch.vtensor<[?,12,?,?],f32>, !torch.int, !torch.bool -> !torch.vtensor<[?,12,?,1],f32>, !torch.vtensor<[?,12,?,1],si64> loc(#loc24)
    %1169 = torch.aten.sub.Tensor %1168, %values_16, %float1.000000e00 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[?,12,?,1],f32>, !torch.float -> !torch.vtensor<[?,12,?,?],f32> loc(#loc24)
    %1170 = torch.aten.exp %1169 : !torch.vtensor<[?,12,?,?],f32> -> !torch.vtensor<[?,12,?,?],f32> loc(#loc24)
    %1171 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> loc(#loc24)
    %1172 = torch.aten.sum.dim_IntList %1170, %1171, %true, %none : !torch.vtensor<[?,12,?,?],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,12,?,1],f32> loc(#loc24)
    %1173 = torch.aten.div.Tensor %1170, %1172 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[?,12,?,1],f32> -> !torch.vtensor<[?,12,?,?],f32> loc(#loc24)
    %1174 = torch.aten.matmul %1173, %1157 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[?,12,?,64],f32> -> !torch.vtensor<[?,12,?,64],f32> loc(#loc25)
    %1175 = torch.aten.permute %1174, %264 : !torch.vtensor<[?,12,?,64],f32>, !torch.list<int> -> !torch.vtensor<[?,?,12,64],f32> loc(#loc26)
    %1176 = torch.aten.contiguous %1175, %int0 : !torch.vtensor<[?,?,12,64],f32>, !torch.int -> !torch.vtensor<[?,?,12,64],f32> loc(#loc26)
    %1177 = torch.aten.size.int %1176, %int0 : !torch.vtensor<[?,?,12,64],f32>, !torch.int -> !torch.int loc(#loc27)
    %1178 = torch.prim.NumToTensor.Scalar %1177 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %1179 = torch.aten.size.int %1176, %int1 : !torch.vtensor<[?,?,12,64],f32>, !torch.int -> !torch.int loc(#loc27)
    %1180 = torch.prim.NumToTensor.Scalar %1179 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %1181 = torch.prim.ListConstruct %1177, %1179, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc35)
    %1182 = torch.aten.view %1176, %1181 : !torch.vtensor<[?,?,12,64],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc28)
    %1183 = torch.aten.transpose.int %44, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> loc(#loc18)
    %1184 = torch.aten.matmul %1182, %1183 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %1185 = torch.aten.add.Tensor %1184, %45, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %1186 = torch.aten.add.Tensor %1185, %1134, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc29)
    %1187 = torch.aten.sum.dim_IntList %1186, %232, %true, %none : !torch.vtensor<[?,?,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1188 = torch.aten.div.Scalar %1187, %int768 : !torch.vtensor<[?,?,1],f32>, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1189 = torch.aten.size.int %1186, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %1190 = torch.aten.size.int %1186, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %1191 = torch.prim.ListConstruct %1189, %1190, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %1192 = torch.aten.broadcast_to %1188, %1191 : !torch.vtensor<[?,?,1],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1193 = torch.aten.sub.Tensor %1186, %1192, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1194 = torch.aten.mul.Tensor %1193, %1193 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1195 = torch.aten.sum.dim_IntList %1194, %232, %true, %none : !torch.vtensor<[?,?,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1196 = torch.aten.div.Scalar %1195, %int768 : !torch.vtensor<[?,?,1],f32>, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1197 = torch.aten.add.Scalar %1196, %float9.999990e-13, %int1 : !torch.vtensor<[?,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1198 = torch.aten.rsqrt %1197 : !torch.vtensor<[?,?,1],f32> -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1199 = torch.aten.size.int %1186, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %1200 = torch.aten.size.int %1186, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %1201 = torch.prim.ListConstruct %1199, %1200, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %1202 = torch.aten.broadcast_to %1198, %1201 : !torch.vtensor<[?,?,1],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1203 = torch.aten.mul.Tensor %1193, %1202 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1204 = torch.aten.mul.Tensor %1203, %42 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1205 = torch.aten.add.Tensor %1204, %43, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1206 = torch.aten.transpose.int %40, %int0, %int1 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32> loc(#loc18)
    %1207 = torch.aten.matmul %1205, %1206 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,3072],f32> -> !torch.vtensor<[?,?,3072],f32> loc(#loc18)
    %1208 = torch.aten.add.Tensor %1207, %41, %float1.000000e00 : !torch.vtensor<[?,?,3072],f32>, !torch.vtensor<[3072],f32>, !torch.float -> !torch.vtensor<[?,?,3072],f32> loc(#loc18)
    %1209 = torch.aten.gelu %1208, %str : !torch.vtensor<[?,?,3072],f32>, !torch.str -> !torch.vtensor<[?,?,3072],f32> loc(#loc10)
    %1210 = torch.aten.transpose.int %38, %int0, %int1 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32> loc(#loc18)
    %1211 = torch.aten.matmul %1209, %1210 : !torch.vtensor<[?,?,3072],f32>, !torch.vtensor<[3072,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %1212 = torch.aten.add.Tensor %1211, %39, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %1213 = torch.aten.add.Tensor %1212, %1205, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc30)
    %1214 = torch.aten.sum.dim_IntList %1213, %232, %true, %none : !torch.vtensor<[?,?,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1215 = torch.aten.div.Scalar %1214, %int768 : !torch.vtensor<[?,?,1],f32>, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1216 = torch.aten.size.int %1213, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %1217 = torch.aten.size.int %1213, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %1218 = torch.prim.ListConstruct %1216, %1217, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %1219 = torch.aten.broadcast_to %1215, %1218 : !torch.vtensor<[?,?,1],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1220 = torch.aten.sub.Tensor %1213, %1219, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1221 = torch.aten.mul.Tensor %1220, %1220 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1222 = torch.aten.sum.dim_IntList %1221, %232, %true, %none : !torch.vtensor<[?,?,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1223 = torch.aten.div.Scalar %1222, %int768 : !torch.vtensor<[?,?,1],f32>, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1224 = torch.aten.add.Scalar %1223, %float9.999990e-13, %int1 : !torch.vtensor<[?,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1225 = torch.aten.rsqrt %1224 : !torch.vtensor<[?,?,1],f32> -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1226 = torch.aten.size.int %1213, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %1227 = torch.aten.size.int %1213, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %1228 = torch.prim.ListConstruct %1226, %1227, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %1229 = torch.aten.broadcast_to %1225, %1228 : !torch.vtensor<[?,?,1],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1230 = torch.aten.mul.Tensor %1220, %1229 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1231 = torch.aten.mul.Tensor %1230, %36 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1232 = torch.aten.add.Tensor %1231, %37, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1233 = torch.aten.transpose.int %34, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> loc(#loc18)
    %1234 = torch.aten.matmul %1232, %1233 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %1235 = torch.aten.add.Tensor %1234, %35, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %1236 = torch.aten.transpose.int %32, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> loc(#loc18)
    %1237 = torch.aten.matmul %1232, %1236 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %1238 = torch.aten.add.Tensor %1237, %33, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %1239 = torch.aten.size.int %1238, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %1240 = torch.prim.NumToTensor.Scalar %1239 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %1241 = torch.aten.size.int %1238, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %1242 = torch.prim.NumToTensor.Scalar %1241 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %1243 = torch.prim.ListConstruct %1239, %1241, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc34)
    %1244 = torch.aten.view %1238, %1243 : !torch.vtensor<[?,?,768],f32>, !torch.list<int> -> !torch.vtensor<[?,?,12,64],f32> loc(#loc12)
    %1245 = torch.aten.permute %1244, %264 : !torch.vtensor<[?,?,12,64],f32>, !torch.list<int> -> !torch.vtensor<[?,12,?,64],f32> loc(#loc22)
    %1246 = torch.aten.transpose.int %30, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> loc(#loc18)
    %1247 = torch.aten.matmul %1232, %1246 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %1248 = torch.aten.add.Tensor %1247, %31, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %1249 = torch.aten.size.int %1248, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %1250 = torch.prim.NumToTensor.Scalar %1249 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %1251 = torch.aten.size.int %1248, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %1252 = torch.prim.NumToTensor.Scalar %1251 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %1253 = torch.prim.ListConstruct %1249, %1251, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc34)
    %1254 = torch.aten.view %1248, %1253 : !torch.vtensor<[?,?,768],f32>, !torch.list<int> -> !torch.vtensor<[?,?,12,64],f32> loc(#loc12)
    %1255 = torch.aten.permute %1254, %264 : !torch.vtensor<[?,?,12,64],f32>, !torch.list<int> -> !torch.vtensor<[?,12,?,64],f32> loc(#loc22)
    %1256 = torch.aten.size.int %1235, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %1257 = torch.prim.NumToTensor.Scalar %1256 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %1258 = torch.aten.size.int %1235, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %1259 = torch.prim.NumToTensor.Scalar %1258 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %1260 = torch.prim.ListConstruct %1256, %1258, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc34)
    %1261 = torch.aten.view %1235, %1260 : !torch.vtensor<[?,?,768],f32>, !torch.list<int> -> !torch.vtensor<[?,?,12,64],f32> loc(#loc12)
    %1262 = torch.aten.permute %1261, %264 : !torch.vtensor<[?,?,12,64],f32>, !torch.list<int> -> !torch.vtensor<[?,12,?,64],f32> loc(#loc22)
    %1263 = torch.aten.transpose.int %1245, %int-1, %int-2 : !torch.vtensor<[?,12,?,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[?,12,64,?],f32> loc(#loc11)
    %1264 = torch.aten.matmul %1262, %1263 : !torch.vtensor<[?,12,?,64],f32>, !torch.vtensor<[?,12,64,?],f32> -> !torch.vtensor<[?,12,?,?],f32> loc(#loc11)
    %1265 = torch.aten.div.Tensor %1264, %190 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[?,12,?,?],f32> loc(#loc6)
    %1266 = torch.aten.add.Tensor %1265, %221, %int1 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[?,1,1,?],f32>, !torch.int -> !torch.vtensor<[?,12,?,?],f32> loc(#loc23)
    %values_18, %indices_19 = torch.aten.max.dim %1266, %int-1, %true : !torch.vtensor<[?,12,?,?],f32>, !torch.int, !torch.bool -> !torch.vtensor<[?,12,?,1],f32>, !torch.vtensor<[?,12,?,1],si64> loc(#loc24)
    %1267 = torch.aten.sub.Tensor %1266, %values_18, %float1.000000e00 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[?,12,?,1],f32>, !torch.float -> !torch.vtensor<[?,12,?,?],f32> loc(#loc24)
    %1268 = torch.aten.exp %1267 : !torch.vtensor<[?,12,?,?],f32> -> !torch.vtensor<[?,12,?,?],f32> loc(#loc24)
    %1269 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> loc(#loc24)
    %1270 = torch.aten.sum.dim_IntList %1268, %1269, %true, %none : !torch.vtensor<[?,12,?,?],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,12,?,1],f32> loc(#loc24)
    %1271 = torch.aten.div.Tensor %1268, %1270 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[?,12,?,1],f32> -> !torch.vtensor<[?,12,?,?],f32> loc(#loc24)
    %1272 = torch.aten.matmul %1271, %1255 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[?,12,?,64],f32> -> !torch.vtensor<[?,12,?,64],f32> loc(#loc25)
    %1273 = torch.aten.permute %1272, %264 : !torch.vtensor<[?,12,?,64],f32>, !torch.list<int> -> !torch.vtensor<[?,?,12,64],f32> loc(#loc26)
    %1274 = torch.aten.contiguous %1273, %int0 : !torch.vtensor<[?,?,12,64],f32>, !torch.int -> !torch.vtensor<[?,?,12,64],f32> loc(#loc26)
    %1275 = torch.aten.size.int %1274, %int0 : !torch.vtensor<[?,?,12,64],f32>, !torch.int -> !torch.int loc(#loc27)
    %1276 = torch.prim.NumToTensor.Scalar %1275 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %1277 = torch.aten.size.int %1274, %int1 : !torch.vtensor<[?,?,12,64],f32>, !torch.int -> !torch.int loc(#loc27)
    %1278 = torch.prim.NumToTensor.Scalar %1277 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %1279 = torch.prim.ListConstruct %1275, %1277, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc35)
    %1280 = torch.aten.view %1274, %1279 : !torch.vtensor<[?,?,12,64],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc28)
    %1281 = torch.aten.transpose.int %28, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> loc(#loc18)
    %1282 = torch.aten.matmul %1280, %1281 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %1283 = torch.aten.add.Tensor %1282, %29, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %1284 = torch.aten.add.Tensor %1283, %1232, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc29)
    %1285 = torch.aten.sum.dim_IntList %1284, %232, %true, %none : !torch.vtensor<[?,?,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1286 = torch.aten.div.Scalar %1285, %int768 : !torch.vtensor<[?,?,1],f32>, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1287 = torch.aten.size.int %1284, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %1288 = torch.aten.size.int %1284, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %1289 = torch.prim.ListConstruct %1287, %1288, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %1290 = torch.aten.broadcast_to %1286, %1289 : !torch.vtensor<[?,?,1],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1291 = torch.aten.sub.Tensor %1284, %1290, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1292 = torch.aten.mul.Tensor %1291, %1291 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1293 = torch.aten.sum.dim_IntList %1292, %232, %true, %none : !torch.vtensor<[?,?,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1294 = torch.aten.div.Scalar %1293, %int768 : !torch.vtensor<[?,?,1],f32>, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1295 = torch.aten.add.Scalar %1294, %float9.999990e-13, %int1 : !torch.vtensor<[?,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1296 = torch.aten.rsqrt %1295 : !torch.vtensor<[?,?,1],f32> -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1297 = torch.aten.size.int %1284, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %1298 = torch.aten.size.int %1284, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %1299 = torch.prim.ListConstruct %1297, %1298, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %1300 = torch.aten.broadcast_to %1296, %1299 : !torch.vtensor<[?,?,1],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1301 = torch.aten.mul.Tensor %1291, %1300 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1302 = torch.aten.mul.Tensor %1301, %26 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1303 = torch.aten.add.Tensor %1302, %27, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1304 = torch.aten.transpose.int %24, %int0, %int1 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32> loc(#loc18)
    %1305 = torch.aten.matmul %1303, %1304 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,3072],f32> -> !torch.vtensor<[?,?,3072],f32> loc(#loc18)
    %1306 = torch.aten.add.Tensor %1305, %25, %float1.000000e00 : !torch.vtensor<[?,?,3072],f32>, !torch.vtensor<[3072],f32>, !torch.float -> !torch.vtensor<[?,?,3072],f32> loc(#loc18)
    %1307 = torch.aten.gelu %1306, %str : !torch.vtensor<[?,?,3072],f32>, !torch.str -> !torch.vtensor<[?,?,3072],f32> loc(#loc10)
    %1308 = torch.aten.transpose.int %22, %int0, %int1 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32> loc(#loc18)
    %1309 = torch.aten.matmul %1307, %1308 : !torch.vtensor<[?,?,3072],f32>, !torch.vtensor<[3072,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %1310 = torch.aten.add.Tensor %1309, %23, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %1311 = torch.aten.add.Tensor %1310, %1303, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc30)
    %1312 = torch.aten.sum.dim_IntList %1311, %232, %true, %none : !torch.vtensor<[?,?,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1313 = torch.aten.div.Scalar %1312, %int768 : !torch.vtensor<[?,?,1],f32>, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1314 = torch.aten.size.int %1311, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %1315 = torch.aten.size.int %1311, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %1316 = torch.prim.ListConstruct %1314, %1315, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %1317 = torch.aten.broadcast_to %1313, %1316 : !torch.vtensor<[?,?,1],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1318 = torch.aten.sub.Tensor %1311, %1317, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1319 = torch.aten.mul.Tensor %1318, %1318 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1320 = torch.aten.sum.dim_IntList %1319, %232, %true, %none : !torch.vtensor<[?,?,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1321 = torch.aten.div.Scalar %1320, %int768 : !torch.vtensor<[?,?,1],f32>, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1322 = torch.aten.add.Scalar %1321, %float9.999990e-13, %int1 : !torch.vtensor<[?,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1323 = torch.aten.rsqrt %1322 : !torch.vtensor<[?,?,1],f32> -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1324 = torch.aten.size.int %1311, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %1325 = torch.aten.size.int %1311, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %1326 = torch.prim.ListConstruct %1324, %1325, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %1327 = torch.aten.broadcast_to %1323, %1326 : !torch.vtensor<[?,?,1],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1328 = torch.aten.mul.Tensor %1318, %1327 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1329 = torch.aten.mul.Tensor %1328, %20 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1330 = torch.aten.add.Tensor %1329, %21, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1331 = torch.aten.transpose.int %18, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> loc(#loc18)
    %1332 = torch.aten.matmul %1330, %1331 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %1333 = torch.aten.add.Tensor %1332, %19, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %1334 = torch.aten.transpose.int %16, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> loc(#loc18)
    %1335 = torch.aten.matmul %1330, %1334 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %1336 = torch.aten.add.Tensor %1335, %17, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %1337 = torch.aten.size.int %1336, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %1338 = torch.prim.NumToTensor.Scalar %1337 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %1339 = torch.aten.size.int %1336, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %1340 = torch.prim.NumToTensor.Scalar %1339 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %1341 = torch.prim.ListConstruct %1337, %1339, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc34)
    %1342 = torch.aten.view %1336, %1341 : !torch.vtensor<[?,?,768],f32>, !torch.list<int> -> !torch.vtensor<[?,?,12,64],f32> loc(#loc12)
    %1343 = torch.aten.permute %1342, %264 : !torch.vtensor<[?,?,12,64],f32>, !torch.list<int> -> !torch.vtensor<[?,12,?,64],f32> loc(#loc22)
    %1344 = torch.aten.transpose.int %14, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> loc(#loc18)
    %1345 = torch.aten.matmul %1330, %1344 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %1346 = torch.aten.add.Tensor %1345, %15, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %1347 = torch.aten.size.int %1346, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %1348 = torch.prim.NumToTensor.Scalar %1347 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %1349 = torch.aten.size.int %1346, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %1350 = torch.prim.NumToTensor.Scalar %1349 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %1351 = torch.prim.ListConstruct %1347, %1349, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc34)
    %1352 = torch.aten.view %1346, %1351 : !torch.vtensor<[?,?,768],f32>, !torch.list<int> -> !torch.vtensor<[?,?,12,64],f32> loc(#loc12)
    %1353 = torch.aten.permute %1352, %264 : !torch.vtensor<[?,?,12,64],f32>, !torch.list<int> -> !torch.vtensor<[?,12,?,64],f32> loc(#loc22)
    %1354 = torch.aten.size.int %1333, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %1355 = torch.prim.NumToTensor.Scalar %1354 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %1356 = torch.aten.size.int %1333, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc19)
    %1357 = torch.prim.NumToTensor.Scalar %1356 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %1358 = torch.prim.ListConstruct %1354, %1356, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc34)
    %1359 = torch.aten.view %1333, %1358 : !torch.vtensor<[?,?,768],f32>, !torch.list<int> -> !torch.vtensor<[?,?,12,64],f32> loc(#loc12)
    %1360 = torch.aten.permute %1359, %264 : !torch.vtensor<[?,?,12,64],f32>, !torch.list<int> -> !torch.vtensor<[?,12,?,64],f32> loc(#loc22)
    %1361 = torch.aten.transpose.int %1343, %int-1, %int-2 : !torch.vtensor<[?,12,?,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[?,12,64,?],f32> loc(#loc11)
    %1362 = torch.aten.matmul %1360, %1361 : !torch.vtensor<[?,12,?,64],f32>, !torch.vtensor<[?,12,64,?],f32> -> !torch.vtensor<[?,12,?,?],f32> loc(#loc11)
    %1363 = torch.aten.div.Tensor %1362, %190 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[?,12,?,?],f32> loc(#loc6)
    %1364 = torch.aten.add.Tensor %1363, %221, %int1 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[?,1,1,?],f32>, !torch.int -> !torch.vtensor<[?,12,?,?],f32> loc(#loc23)
    %values_20, %indices_21 = torch.aten.max.dim %1364, %int-1, %true : !torch.vtensor<[?,12,?,?],f32>, !torch.int, !torch.bool -> !torch.vtensor<[?,12,?,1],f32>, !torch.vtensor<[?,12,?,1],si64> loc(#loc24)
    %1365 = torch.aten.sub.Tensor %1364, %values_20, %float1.000000e00 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[?,12,?,1],f32>, !torch.float -> !torch.vtensor<[?,12,?,?],f32> loc(#loc24)
    %1366 = torch.aten.exp %1365 : !torch.vtensor<[?,12,?,?],f32> -> !torch.vtensor<[?,12,?,?],f32> loc(#loc24)
    %1367 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> loc(#loc24)
    %1368 = torch.aten.sum.dim_IntList %1366, %1367, %true, %none : !torch.vtensor<[?,12,?,?],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,12,?,1],f32> loc(#loc24)
    %1369 = torch.aten.div.Tensor %1366, %1368 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[?,12,?,1],f32> -> !torch.vtensor<[?,12,?,?],f32> loc(#loc24)
    %1370 = torch.aten.matmul %1369, %1353 : !torch.vtensor<[?,12,?,?],f32>, !torch.vtensor<[?,12,?,64],f32> -> !torch.vtensor<[?,12,?,64],f32> loc(#loc25)
    %1371 = torch.aten.permute %1370, %264 : !torch.vtensor<[?,12,?,64],f32>, !torch.list<int> -> !torch.vtensor<[?,?,12,64],f32> loc(#loc26)
    %1372 = torch.aten.contiguous %1371, %int0 : !torch.vtensor<[?,?,12,64],f32>, !torch.int -> !torch.vtensor<[?,?,12,64],f32> loc(#loc26)
    %1373 = torch.aten.size.int %1372, %int0 : !torch.vtensor<[?,?,12,64],f32>, !torch.int -> !torch.int loc(#loc27)
    %1374 = torch.prim.NumToTensor.Scalar %1373 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %1375 = torch.aten.size.int %1372, %int1 : !torch.vtensor<[?,?,12,64],f32>, !torch.int -> !torch.int loc(#loc27)
    %1376 = torch.prim.NumToTensor.Scalar %1375 : !torch.int -> !torch.vtensor<[],si64> loc(#loc)
    %1377 = torch.prim.ListConstruct %1373, %1375, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc35)
    %1378 = torch.aten.view %1372, %1377 : !torch.vtensor<[?,?,12,64],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc28)
    %1379 = torch.aten.transpose.int %12, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> loc(#loc18)
    %1380 = torch.aten.matmul %1378, %1379 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %1381 = torch.aten.add.Tensor %1380, %13, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %1382 = torch.aten.add.Tensor %1381, %1330, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc29)
    %1383 = torch.aten.sum.dim_IntList %1382, %232, %true, %none : !torch.vtensor<[?,?,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1384 = torch.aten.div.Scalar %1383, %int768 : !torch.vtensor<[?,?,1],f32>, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1385 = torch.aten.size.int %1382, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %1386 = torch.aten.size.int %1382, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %1387 = torch.prim.ListConstruct %1385, %1386, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %1388 = torch.aten.broadcast_to %1384, %1387 : !torch.vtensor<[?,?,1],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1389 = torch.aten.sub.Tensor %1382, %1388, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1390 = torch.aten.mul.Tensor %1389, %1389 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1391 = torch.aten.sum.dim_IntList %1390, %232, %true, %none : !torch.vtensor<[?,?,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1392 = torch.aten.div.Scalar %1391, %int768 : !torch.vtensor<[?,?,1],f32>, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1393 = torch.aten.add.Scalar %1392, %float9.999990e-13, %int1 : !torch.vtensor<[?,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1394 = torch.aten.rsqrt %1393 : !torch.vtensor<[?,?,1],f32> -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1395 = torch.aten.size.int %1382, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %1396 = torch.aten.size.int %1382, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %1397 = torch.prim.ListConstruct %1395, %1396, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %1398 = torch.aten.broadcast_to %1394, %1397 : !torch.vtensor<[?,?,1],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1399 = torch.aten.mul.Tensor %1389, %1398 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1400 = torch.aten.mul.Tensor %1399, %10 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1401 = torch.aten.add.Tensor %1400, %11, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1402 = torch.aten.transpose.int %8, %int0, %int1 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32> loc(#loc18)
    %1403 = torch.aten.matmul %1401, %1402 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768,3072],f32> -> !torch.vtensor<[?,?,3072],f32> loc(#loc18)
    %1404 = torch.aten.add.Tensor %1403, %9, %float1.000000e00 : !torch.vtensor<[?,?,3072],f32>, !torch.vtensor<[3072],f32>, !torch.float -> !torch.vtensor<[?,?,3072],f32> loc(#loc18)
    %1405 = torch.aten.gelu %1404, %str : !torch.vtensor<[?,?,3072],f32>, !torch.str -> !torch.vtensor<[?,?,3072],f32> loc(#loc10)
    %1406 = torch.aten.transpose.int %6, %int0, %int1 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32> loc(#loc18)
    %1407 = torch.aten.matmul %1405, %1406 : !torch.vtensor<[?,?,3072],f32>, !torch.vtensor<[3072,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %1408 = torch.aten.add.Tensor %1407, %7, %float1.000000e00 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,?,768],f32> loc(#loc18)
    %1409 = torch.aten.add.Tensor %1408, %1401, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc30)
    %1410 = torch.aten.sum.dim_IntList %1409, %232, %true, %none : !torch.vtensor<[?,?,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1411 = torch.aten.div.Scalar %1410, %int768 : !torch.vtensor<[?,?,1],f32>, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1412 = torch.aten.size.int %1409, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %1413 = torch.aten.size.int %1409, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %1414 = torch.prim.ListConstruct %1412, %1413, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %1415 = torch.aten.broadcast_to %1411, %1414 : !torch.vtensor<[?,?,1],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1416 = torch.aten.sub.Tensor %1409, %1415, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1417 = torch.aten.mul.Tensor %1416, %1416 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1418 = torch.aten.sum.dim_IntList %1417, %232, %true, %none : !torch.vtensor<[?,?,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1419 = torch.aten.div.Scalar %1418, %int768 : !torch.vtensor<[?,?,1],f32>, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1420 = torch.aten.add.Scalar %1419, %float9.999990e-13, %int1 : !torch.vtensor<[?,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1421 = torch.aten.rsqrt %1420 : !torch.vtensor<[?,?,1],f32> -> !torch.vtensor<[?,?,1],f32> loc(#loc1)
    %1422 = torch.aten.size.int %1409, %int0 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %1423 = torch.aten.size.int %1409, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc1)
    %1424 = torch.prim.ListConstruct %1422, %1423, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> loc(#loc1)
    %1425 = torch.aten.broadcast_to %1421, %1424 : !torch.vtensor<[?,?,1],f32>, !torch.list<int> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1426 = torch.aten.mul.Tensor %1416, %1425 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[?,?,768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1427 = torch.aten.mul.Tensor %1426, %4 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1428 = torch.aten.add.Tensor %1427, %5, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.vtensor<[768],f32>, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc1)
    %1429 = torch.aten.slice.Tensor %1428, %int0, %int0, %int9223372036854775807, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,?,768],f32> loc(#loc31)
    %1430 = torch.aten.size.int %1429, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int -> !torch.int loc(#loc31)
    %1431 = torch.aten.slice.Tensor %1429, %int1, %int0, %int1, %int1 : !torch.vtensor<[?,?,768],f32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,1,768],f32> loc(#loc31)
    %1432 = torch.aten.squeeze.dim %1431, %int1 : !torch.vtensor<[?,1,768],f32>, !torch.int -> !torch.vtensor<[?,768],f32> loc(#loc31)
    %1433 = torch.aten.transpose.int %2, %int0, %int1 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> loc(#loc18)
    %1434 = torch.aten.mm %1432, %1433 : !torch.vtensor<[?,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[?,768],f32> loc(#loc18)
    %1435 = torch.aten.add.Tensor %1434, %3, %float1.000000e00 : !torch.vtensor<[?,768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[?,768],f32> loc(#loc18)
    %1436 = torch.aten.tanh %1435 : !torch.vtensor<[?,768],f32> -> !torch.vtensor<[?,768],f32> loc(#loc32)
    %1437 = torch.aten.transpose.int %0, %int0, %int1 : !torch.vtensor<[2,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2],f32> loc(#loc18)
    %1438 = torch.aten.mm %1436, %1437 : !torch.vtensor<[?,768],f32>, !torch.vtensor<[768,2],f32> -> !torch.vtensor<[?,2],f32> loc(#loc18)
    %1439 = torch.aten.add.Tensor %1438, %1, %float1.000000e00 : !torch.vtensor<[?,2],f32>, !torch.vtensor<[2],f32>, !torch.float -> !torch.vtensor<[?,2],f32> loc(#loc18)
    return %1439 : !torch.vtensor<[?,2],f32> loc(#loc)
  } loc(#loc)
 } loc(#loc)
 #loc1 = loc("/home/chi/src/ubuntu20/shark/SHARK/shark.venv/lib/python3.10/site-packages/torch/nn/functional.py":2547:0)
 #loc2 = loc("/home/chi/src/ubuntu20/shark/SHARK/shark.venv/lib/python3.10/site-packages/torch/_tensor.py":849:0)
 #loc3 = loc("/home/chi/src/ubuntu20/shark/SHARK/shark.venv/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py":967:0)
 #loc4 = loc("/home/chi/src/ubuntu20/shark/SHARK/shark.venv/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py":980:0)
 #loc5 = loc("/home/chi/src/ubuntu20/shark/SHARK/shark.venv/lib/python3.10/site-packages/transformers/modeling_utils.py":826:0)
 #loc6 = loc("/home/chi/src/ubuntu20/shark/SHARK/shark.venv/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py":347:0)
 #loc7 = loc("/home/chi/src/ubuntu20/shark/SHARK/shark.venv/lib/python3.10/site-packages/transformers/modeling_utils.py":839:0)
 #loc8 = loc("/home/chi/src/ubuntu20/shark/SHARK/shark.venv/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py":984:0)
 #loc9 = loc("/home/chi/src/ubuntu20/shark/SHARK/shark.venv/lib/python3.10/site-packages/torch/nn/functional.py":2237:0)
 #loc10 = loc("/home/chi/src/ubuntu20/shark/SHARK/shark.venv/lib/python3.10/site-packages/transformers/activations.py":57:0)
 #loc11 = loc("/home/chi/src/ubuntu20/shark/SHARK/shark.venv/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py":323:0)
 #loc12 = loc("/home/chi/src/ubuntu20/shark/SHARK/shark.venv/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py":271:0)
 #loc13 = loc("/home/chi/src/ubuntu20/shark/SHARK/shark.venv/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py":985:0)
 #loc14 = loc("/home/chi/src/ubuntu20/shark/SHARK/shark.venv/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py":209:0)
 #loc15 = loc("/home/chi/src/ubuntu20/shark/SHARK/shark.venv/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py":216:0)
 #loc16 = loc("/home/chi/src/ubuntu20/shark/SHARK/shark.venv/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py":233:0)
 #loc17 = loc("/home/chi/src/ubuntu20/shark/SHARK/shark.venv/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py":236:0)
 #loc18 = loc("/home/chi/src/ubuntu20/shark/SHARK/shark.venv/lib/python3.10/site-packages/torch/nn/modules/linear.py":114:0)
 #loc19 = loc("/home/chi/src/ubuntu20/shark/SHARK/shark.venv/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py":270:0)
 #loc20 = loc("-":4783:13)
 #loc21 = loc("-":6691:10)
 #loc22 = loc("/home/chi/src/ubuntu20/shark/SHARK/shark.venv/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py":272:0)
 #loc23 = loc("/home/chi/src/ubuntu20/shark/SHARK/shark.venv/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py":350:0)
 #loc24 = loc("/home/chi/src/ubuntu20/shark/SHARK/shark.venv/lib/python3.10/site-packages/torch/nn/functional.py":1860:0)
 #loc25 = loc("/home/chi/src/ubuntu20/shark/SHARK/shark.venv/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py":363:0)
 #loc26 = loc("/home/chi/src/ubuntu20/shark/SHARK/shark.venv/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py":365:0)
 #loc27 = loc("/home/chi/src/ubuntu20/shark/SHARK/shark.venv/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py":366:0)
 #loc28 = loc("/home/chi/src/ubuntu20/shark/SHARK/shark.venv/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py":367:0)
 #loc29 = loc("/home/chi/src/ubuntu20/shark/SHARK/shark.venv/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py":386:0)
 #loc30 = loc("/home/chi/src/ubuntu20/shark/SHARK/shark.venv/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py":464:0)
 #loc31 = loc("/home/chi/src/ubuntu20/shark/SHARK/shark.venv/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py":660:0)
 #loc32 = loc("/home/chi/src/ubuntu20/shark/SHARK/shark.venv/lib/python3.10/site-packages/torch/nn/modules/activation.py":360:0)
 #loc33 = loc(callsite(#loc20 at #loc21))
 #loc34 = loc(callsite(#loc33 at #loc12))
 #loc35 = loc(callsite(#loc33 at #loc28))