AmosLewis · March 7, 2023 00:25
diff --git a/t5_large_torchscript.mlir b/t5_large_torchscript.mlir
 module attributes {torch.debug_module_name = "_lambda"} {
  func.func private @__torch__.torch.fx.graph_module._lambda.__code_getter(%arg0: !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda">) -> !torch.str {
    %133 = torch.prim.GetAttr %arg0["_code"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.str
    return %133 : !torch.str
  }
  func.func private @__torch__.torch.fx.graph_module._lambda.forward(%arg0: !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda">, %arg1: !torch.tensor {torch.type_bound = !torch.vtensor<[1,15],si64>}, %arg2: !torch.tensor {torch.type_bound = !torch.vtensor<[1,4],si64>}) -> !torch.tensor {
    %int6 = torch.constant.int 6
    %true_0 = torch.constant.bool true
    %float-3.402820e38 = torch.constant.float -3.4028234663852886E+38
    %int-100 = torch.constant.int -100
    %none_1 = torch.constant.none
    %int-1 = torch.constant.int -1
    %false = torch.constant.bool false
    %cpu = torch.constant.device "cpu"
    %int1 = torch.constant.int 1
    %int4 = torch.constant.int 4
    %int0 = torch.constant.int 0
    %int9223372036854775807 = torch.constant.int 9223372036854775807
    %int15 = torch.constant.int 15
    %int2 = torch.constant.int 2
    %int3 = torch.constant.int 3
    %float1.000000e00 = torch.constant.float 1.000000e+00
    %float9.999990e-07 = torch.constant.float 9.9999999999999995E-7
    %int512 = torch.constant.int 512
    %int8 = torch.constant.int 8
    %int64 = torch.constant.int 64
    %int16 = torch.constant.int 16
    %float2.772590e00 = torch.constant.float 2.7725887222397811
    %int2048 = torch.constant.int 2048
    %float2.079440e00 = torch.constant.float 2.0794415416798357
    %int31 = torch.constant.int 31
    %float4.419420e-02 = torch.constant.float 0.044194173824159223
    %int32128 = torch.constant.int 32128
    %133 = torch.prim.ListConstruct %int1, %int4 : (!torch.int, !torch.int) -> !torch.list<int>
    %134 = torch.aten.new_zeros %arg2, %133, %int4, %int0, %cpu, %false : !torch.tensor, !torch.list<int>, !torch.int, !torch.int, !torch.Device, !torch.bool -> !torch.tensor
    %135 = torch.aten.slice.Tensor %arg2, %int1, %int0, %int-1, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
    %136 = torch.aten.clone %135, %none_1 : !torch.tensor, !torch.none -> !torch.tensor
    %137 = torch.aten.slice.Tensor %134, %int1, %int1, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
    %138 = torch.aten.copy_ %137, %136, %false : !torch.tensor, !torch.tensor, !torch.bool -> !torch.tensor
    %139 = torch.prim.GetAttr %arg0["_tensor_constant0"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %140 = torch.aten.lift_fresh_copy %139 : !torch.tensor -> !torch.tensor
    %141 = torch.aten.select.int %134, %int1, %int0 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %142 = torch.aten.fill_.Tensor %141, %140 : !torch.tensor, !torch.tensor -> !torch.tensor
    %143 = torch.aten.eq.Scalar %134, %int-100 : !torch.tensor, !torch.int -> !torch.tensor
    %144 = torch.aten.masked_fill_.Scalar %134, %143, %int0 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %145 = torch.prim.ListConstruct %int-1, %int15 : (!torch.int, !torch.int) -> !torch.list<int>
    %146 = torch.aten.view %arg1, %145 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %147 = torch.prim.GetAttr %arg0["_param_constant0"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %148 = torch.aten.embedding %147, %146, %int-1, %false, %false : !torch.tensor, !torch.tensor, !torch.int, !torch.bool, !torch.bool -> !torch.tensor
    %149 = torch.prim.ListConstruct %int1, %int15 : (!torch.int, !torch.int) -> !torch.list<int>
    %150 = torch.aten.ones %149, %none_1, %none_1, %cpu, %false : !torch.list<int>, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.tensor
    %151 = torch.aten.slice.Tensor %150, %int0, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
    %152 = torch.aten.unsqueeze %151, %int1 : !torch.tensor, !torch.int -> !torch.tensor
    %153 = torch.aten.unsqueeze %152, %int2 : !torch.tensor, !torch.int -> !torch.tensor
    %154 = torch.aten.slice.Tensor %153, %int3, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
    %155 = torch.aten.rsub.Scalar %154, %float1.000000e00, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
    %156 = torch.aten.mul.Scalar %155, %float-3.402820e38 : !torch.tensor, !torch.float -> !torch.tensor
    %157 = torch.aten.pow.Tensor_Scalar %148, %int2 : !torch.tensor, !torch.int -> !torch.tensor
    %158 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %159 = torch.aten.mean.dim %157, %158, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
    %160 = torch.aten.add.Scalar %159, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
    %161 = torch.aten.rsqrt %160 : !torch.tensor -> !torch.tensor
    %162 = torch.aten.mul.Tensor %148, %161 : !torch.tensor, !torch.tensor -> !torch.tensor
    %163 = torch.prim.GetAttr %arg0["_param_constant1"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %164 = torch.aten.mul.Tensor %163, %162 : !torch.tensor, !torch.tensor -> !torch.tensor
    %165 = torch.prim.GetAttr %arg0["_param_constant2"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %166 = torch.aten.t %165 : !torch.tensor -> !torch.tensor
    %167 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %168 = torch.aten.view %164, %167 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %169 = torch.aten.mm %168, %166 : !torch.tensor, !torch.tensor -> !torch.tensor
    %170 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %171 = torch.aten._unsafe_view %169, %170 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %172 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %173 = torch.aten.view %171, %172 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %174 = torch.aten.transpose.int %173, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %175 = torch.prim.GetAttr %arg0["_param_constant3"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %176 = torch.aten.t %175 : !torch.tensor -> !torch.tensor
    %177 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %178 = torch.aten.view %164, %177 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %179 = torch.aten.mm %178, %176 : !torch.tensor, !torch.tensor -> !torch.tensor
    %180 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %181 = torch.aten._unsafe_view %179, %180 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %182 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %183 = torch.aten.view %181, %182 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %184 = torch.aten.transpose.int %183, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %185 = torch.prim.GetAttr %arg0["_param_constant4"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %186 = torch.aten.t %185 : !torch.tensor -> !torch.tensor
    %187 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %188 = torch.aten.view %164, %187 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %189 = torch.aten.mm %188, %186 : !torch.tensor, !torch.tensor -> !torch.tensor
    %190 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %191 = torch.aten._unsafe_view %189, %190 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %192 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %193 = torch.aten.view %191, %192 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %194 = torch.aten.transpose.int %193, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %195 = torch.aten.transpose.int %184, %int3, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %196 = torch.prim.ListConstruct %int1, %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %197 = torch.aten.expand %174, %196, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %198 = torch.prim.ListConstruct %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %199 = torch.aten.view %197, %198 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %200 = torch.prim.ListConstruct %int1, %int8, %int64, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %201 = torch.aten.expand %195, %200, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %202 = torch.prim.ListConstruct %int8, %int64, %int15 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %203 = torch.aten.view %201, %202 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %204 = torch.aten.bmm %199, %203 : !torch.tensor, !torch.tensor -> !torch.tensor
    %205 = torch.prim.ListConstruct %int1, %int8, %int15, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %206 = torch.aten._unsafe_view %204, %205 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %207 = torch.aten.arange %int15, %int4, %none_1, %cpu, %false : !torch.int, !torch.int, !torch.none, !torch.Device, !torch.bool -> !torch.tensor
    %208 = torch.aten.slice.Tensor %207, %int0, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
    %209 = torch.aten.unsqueeze %208, %int1 : !torch.tensor, !torch.int -> !torch.tensor
    %210 = torch.aten.arange %int15, %int4, %none_1, %cpu, %false : !torch.int, !torch.int, !torch.none, !torch.Device, !torch.bool -> !torch.tensor
    %211 = torch.aten.unsqueeze %210, %int0 : !torch.tensor, !torch.int -> !torch.tensor
    %212 = torch.aten.slice.Tensor %211, %int1, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
    %213 = torch.aten.sub.Tensor %212, %209, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %214 = torch.aten.gt.Scalar %213, %int0 : !torch.tensor, !torch.int -> !torch.tensor
    %215 = torch.prims.convert_element_type %214, %int4 : !torch.tensor, !torch.int -> !torch.tensor
    %216 = torch.aten.mul.Scalar %215, %int16 : !torch.tensor, !torch.int -> !torch.tensor
    %217 = torch.aten.add.Scalar %216, %int0, %int1 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %218 = torch.aten.abs %213 : !torch.tensor -> !torch.tensor
    %219 = torch.aten.lt.Scalar %218, %int8 : !torch.tensor, !torch.int -> !torch.tensor
    %220 = torch.prims.convert_element_type %218, %int6 : !torch.tensor, !torch.int -> !torch.tensor
    %221 = torch.aten.div.Scalar %220, %int8 : !torch.tensor, !torch.int -> !torch.tensor
    %222 = torch.aten.log %221 : !torch.tensor -> !torch.tensor
    %223 = torch.aten.div.Scalar %222, %float2.772590e00 : !torch.tensor, !torch.float -> !torch.tensor
    %224 = torch.aten.mul.Scalar %223, %int8 : !torch.tensor, !torch.int -> !torch.tensor
    %225 = torch.prims.convert_element_type %224, %int4 : !torch.tensor, !torch.int -> !torch.tensor
    %226 = torch.aten.add.Scalar %225, %int8, %int1 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %227 = torch.aten.full_like %226, %int15, %int4, %int0, %cpu, %false, %none_1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.Device, !torch.bool, !torch.none -> !torch.tensor
    %228 = torch.aten.minimum %226, %227 : !torch.tensor, !torch.tensor -> !torch.tensor
    %229 = torch.aten.where.self %219, %218, %228 : !torch.tensor, !torch.tensor, !torch.tensor -> !torch.tensor
    %230 = torch.aten.add_.Tensor %217, %229, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %231 = torch.prim.GetAttr %arg0["_param_constant5"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %232 = torch.aten.embedding %231, %230, %int-1, %false, %false : !torch.tensor, !torch.tensor, !torch.int, !torch.bool, !torch.bool -> !torch.tensor
    %233 = torch.prim.ListConstruct %int2, %int0, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %234 = torch.aten.permute %232, %233 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %235 = torch.aten.unsqueeze %234, %int0 : !torch.tensor, !torch.int -> !torch.tensor
    %236 = torch.aten.add.Tensor %235, %156, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %237 = torch.aten.add_.Tensor %206, %236, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %238 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %239 = torch.aten.amax %237, %238, %true_0 : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %240 = torch.aten.sub.Tensor %237, %239, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %241 = torch.aten.exp %240 : !torch.tensor -> !torch.tensor
    %242 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %243 = torch.aten.sum.dim_IntList %241, %242, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
    %244 = torch.aten.div.Tensor %241, %243 : !torch.tensor, !torch.tensor -> !torch.tensor
    %245 = torch.prim.ListConstruct %int1, %int8, %int15, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %246 = torch.aten.expand %244, %245, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %247 = torch.prim.ListConstruct %int8, %int15, %int15 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %248 = torch.aten.view %246, %247 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %249 = torch.prim.ListConstruct %int1, %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %250 = torch.aten.expand %194, %249, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %251 = torch.prim.ListConstruct %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %252 = torch.aten.view %250, %251 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %253 = torch.aten.bmm %248, %252 : !torch.tensor, !torch.tensor -> !torch.tensor
    %254 = torch.prim.ListConstruct %int1, %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %255 = torch.aten._unsafe_view %253, %254 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %256 = torch.aten.transpose.int %255, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %257 = torch.aten.clone %256, %int0 : !torch.tensor, !torch.int -> !torch.tensor
    %258 = torch.prim.ListConstruct %int1, %int-1, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %259 = torch.aten.view %257, %258 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %260 = torch.prim.GetAttr %arg0["_param_constant6"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %261 = torch.aten.t %260 : !torch.tensor -> !torch.tensor
    %262 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %263 = torch.aten.view %259, %262 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %264 = torch.aten.mm %263, %261 : !torch.tensor, !torch.tensor -> !torch.tensor
    %265 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %266 = torch.aten._unsafe_view %264, %265 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %267 = torch.aten.add.Tensor %148, %266, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %268 = torch.aten.pow.Tensor_Scalar %267, %int2 : !torch.tensor, !torch.int -> !torch.tensor
    %269 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %270 = torch.aten.mean.dim %268, %269, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
    %271 = torch.aten.add.Scalar %270, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
    %272 = torch.aten.rsqrt %271 : !torch.tensor -> !torch.tensor
    %273 = torch.aten.mul.Tensor %267, %272 : !torch.tensor, !torch.tensor -> !torch.tensor
    %274 = torch.prim.GetAttr %arg0["_param_constant7"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %275 = torch.aten.mul.Tensor %274, %273 : !torch.tensor, !torch.tensor -> !torch.tensor
    %276 = torch.prim.GetAttr %arg0["_param_constant8"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %277 = torch.aten.t %276 : !torch.tensor -> !torch.tensor
    %278 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %279 = torch.aten.view %275, %278 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %280 = torch.aten.mm %279, %277 : !torch.tensor, !torch.tensor -> !torch.tensor
    %281 = torch.prim.ListConstruct %int1, %int15, %int2048 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %282 = torch.aten._unsafe_view %280, %281 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %283 = torch.aten.relu %282 : !torch.tensor -> !torch.tensor
    %284 = torch.prim.GetAttr %arg0["_param_constant9"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %285 = torch.aten.t %284 : !torch.tensor -> !torch.tensor
    %286 = torch.prim.ListConstruct %int15, %int2048 : (!torch.int, !torch.int) -> !torch.list<int>
    %287 = torch.aten.view %283, %286 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %288 = torch.aten.mm %287, %285 : !torch.tensor, !torch.tensor -> !torch.tensor
    %289 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %290 = torch.aten._unsafe_view %288, %289 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %291 = torch.aten.add.Tensor %267, %290, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %292 = torch.aten.pow.Tensor_Scalar %291, %int2 : !torch.tensor, !torch.int -> !torch.tensor
    %293 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %294 = torch.aten.mean.dim %292, %293, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
    %295 = torch.aten.add.Scalar %294, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
    %296 = torch.aten.rsqrt %295 : !torch.tensor -> !torch.tensor
    %297 = torch.aten.mul.Tensor %291, %296 : !torch.tensor, !torch.tensor -> !torch.tensor
    %298 = torch.prim.GetAttr %arg0["_param_constant10"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %299 = torch.aten.mul.Tensor %298, %297 : !torch.tensor, !torch.tensor -> !torch.tensor
    %300 = torch.prim.GetAttr %arg0["_param_constant11"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %301 = torch.aten.t %300 : !torch.tensor -> !torch.tensor
    %302 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %303 = torch.aten.view %299, %302 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %304 = torch.aten.mm %303, %301 : !torch.tensor, !torch.tensor -> !torch.tensor
    %305 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %306 = torch.aten._unsafe_view %304, %305 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %307 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %308 = torch.aten.view %306, %307 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %309 = torch.aten.transpose.int %308, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %310 = torch.prim.GetAttr %arg0["_param_constant12"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %311 = torch.aten.t %310 : !torch.tensor -> !torch.tensor
    %312 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %313 = torch.aten.view %299, %312 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %314 = torch.aten.mm %313, %311 : !torch.tensor, !torch.tensor -> !torch.tensor
    %315 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %316 = torch.aten._unsafe_view %314, %315 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %317 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %318 = torch.aten.view %316, %317 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %319 = torch.aten.transpose.int %318, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %320 = torch.prim.GetAttr %arg0["_param_constant13"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %321 = torch.aten.t %320 : !torch.tensor -> !torch.tensor
    %322 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %323 = torch.aten.view %299, %322 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %324 = torch.aten.mm %323, %321 : !torch.tensor, !torch.tensor -> !torch.tensor
    %325 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %326 = torch.aten._unsafe_view %324, %325 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %327 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %328 = torch.aten.view %326, %327 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %329 = torch.aten.transpose.int %328, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %330 = torch.aten.transpose.int %319, %int3, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %331 = torch.prim.ListConstruct %int1, %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %332 = torch.aten.expand %309, %331, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %333 = torch.prim.ListConstruct %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %334 = torch.aten.view %332, %333 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %335 = torch.prim.ListConstruct %int1, %int8, %int64, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %336 = torch.aten.expand %330, %335, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %337 = torch.prim.ListConstruct %int8, %int64, %int15 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %338 = torch.aten.view %336, %337 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %339 = torch.aten.bmm %334, %338 : !torch.tensor, !torch.tensor -> !torch.tensor
    %340 = torch.prim.ListConstruct %int1, %int8, %int15, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %341 = torch.aten._unsafe_view %339, %340 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %342 = torch.aten.add_.Tensor %341, %236, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %343 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %344 = torch.aten.amax %342, %343, %true_0 : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %345 = torch.aten.sub.Tensor %342, %344, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %346 = torch.aten.exp %345 : !torch.tensor -> !torch.tensor
    %347 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %348 = torch.aten.sum.dim_IntList %346, %347, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
    %349 = torch.aten.div.Tensor %346, %348 : !torch.tensor, !torch.tensor -> !torch.tensor
    %350 = torch.prim.ListConstruct %int1, %int8, %int15, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %351 = torch.aten.expand %349, %350, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %352 = torch.prim.ListConstruct %int8, %int15, %int15 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %353 = torch.aten.view %351, %352 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %354 = torch.prim.ListConstruct %int1, %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %355 = torch.aten.expand %329, %354, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %356 = torch.prim.ListConstruct %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %357 = torch.aten.view %355, %356 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %358 = torch.aten.bmm %353, %357 : !torch.tensor, !torch.tensor -> !torch.tensor
    %359 = torch.prim.ListConstruct %int1, %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %360 = torch.aten._unsafe_view %358, %359 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %361 = torch.aten.transpose.int %360, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %362 = torch.aten.clone %361, %int0 : !torch.tensor, !torch.int -> !torch.tensor
    %363 = torch.prim.ListConstruct %int1, %int-1, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %364 = torch.aten.view %362, %363 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %365 = torch.prim.GetAttr %arg0["_param_constant14"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %366 = torch.aten.t %365 : !torch.tensor -> !torch.tensor
    %367 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %368 = torch.aten.view %364, %367 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %369 = torch.aten.mm %368, %366 : !torch.tensor, !torch.tensor -> !torch.tensor
    %370 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %371 = torch.aten._unsafe_view %369, %370 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %372 = torch.aten.add.Tensor %291, %371, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %373 = torch.aten.pow.Tensor_Scalar %372, %int2 : !torch.tensor, !torch.int -> !torch.tensor
    %374 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %375 = torch.aten.mean.dim %373, %374, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
    %376 = torch.aten.add.Scalar %375, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
    %377 = torch.aten.rsqrt %376 : !torch.tensor -> !torch.tensor
    %378 = torch.aten.mul.Tensor %372, %377 : !torch.tensor, !torch.tensor -> !torch.tensor
    %379 = torch.prim.GetAttr %arg0["_param_constant15"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %380 = torch.aten.mul.Tensor %379, %378 : !torch.tensor, !torch.tensor -> !torch.tensor
    %381 = torch.prim.GetAttr %arg0["_param_constant16"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %382 = torch.aten.t %381 : !torch.tensor -> !torch.tensor
    %383 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %384 = torch.aten.view %380, %383 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %385 = torch.aten.mm %384, %382 : !torch.tensor, !torch.tensor -> !torch.tensor
    %386 = torch.prim.ListConstruct %int1, %int15, %int2048 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %387 = torch.aten._unsafe_view %385, %386 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %388 = torch.aten.relu %387 : !torch.tensor -> !torch.tensor
    %389 = torch.prim.GetAttr %arg0["_param_constant17"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %390 = torch.aten.t %389 : !torch.tensor -> !torch.tensor
    %391 = torch.prim.ListConstruct %int15, %int2048 : (!torch.int, !torch.int) -> !torch.list<int>
    %392 = torch.aten.view %388, %391 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %393 = torch.aten.mm %392, %390 : !torch.tensor, !torch.tensor -> !torch.tensor
    %394 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %395 = torch.aten._unsafe_view %393, %394 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %396 = torch.aten.add.Tensor %372, %395, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %397 = torch.aten.pow.Tensor_Scalar %396, %int2 : !torch.tensor, !torch.int -> !torch.tensor
    %398 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %399 = torch.aten.mean.dim %397, %398, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
    %400 = torch.aten.add.Scalar %399, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
    %401 = torch.aten.rsqrt %400 : !torch.tensor -> !torch.tensor
    %402 = torch.aten.mul.Tensor %396, %401 : !torch.tensor, !torch.tensor -> !torch.tensor
    %403 = torch.prim.GetAttr %arg0["_param_constant18"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %404 = torch.aten.mul.Tensor %403, %402 : !torch.tensor, !torch.tensor -> !torch.tensor
    %405 = torch.prim.GetAttr %arg0["_param_constant19"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %406 = torch.aten.t %405 : !torch.tensor -> !torch.tensor
    %407 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %408 = torch.aten.view %404, %407 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %409 = torch.aten.mm %408, %406 : !torch.tensor, !torch.tensor -> !torch.tensor
    %410 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %411 = torch.aten._unsafe_view %409, %410 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %412 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %413 = torch.aten.view %411, %412 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %414 = torch.aten.transpose.int %413, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %415 = torch.prim.GetAttr %arg0["_param_constant20"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %416 = torch.aten.t %415 : !torch.tensor -> !torch.tensor
    %417 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %418 = torch.aten.view %404, %417 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %419 = torch.aten.mm %418, %416 : !torch.tensor, !torch.tensor -> !torch.tensor
    %420 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %421 = torch.aten._unsafe_view %419, %420 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %422 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %423 = torch.aten.view %421, %422 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %424 = torch.aten.transpose.int %423, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %425 = torch.prim.GetAttr %arg0["_param_constant21"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %426 = torch.aten.t %425 : !torch.tensor -> !torch.tensor
    %427 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %428 = torch.aten.view %404, %427 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %429 = torch.aten.mm %428, %426 : !torch.tensor, !torch.tensor -> !torch.tensor
    %430 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %431 = torch.aten._unsafe_view %429, %430 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %432 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %433 = torch.aten.view %431, %432 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %434 = torch.aten.transpose.int %433, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %435 = torch.aten.transpose.int %424, %int3, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %436 = torch.prim.ListConstruct %int1, %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %437 = torch.aten.expand %414, %436, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %438 = torch.prim.ListConstruct %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %439 = torch.aten.view %437, %438 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %440 = torch.prim.ListConstruct %int1, %int8, %int64, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %441 = torch.aten.expand %435, %440, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %442 = torch.prim.ListConstruct %int8, %int64, %int15 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %443 = torch.aten.view %441, %442 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %444 = torch.aten.bmm %439, %443 : !torch.tensor, !torch.tensor -> !torch.tensor
    %445 = torch.prim.ListConstruct %int1, %int8, %int15, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %446 = torch.aten._unsafe_view %444, %445 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %447 = torch.aten.add_.Tensor %446, %236, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %448 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %449 = torch.aten.amax %447, %448, %true_0 : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %450 = torch.aten.sub.Tensor %447, %449, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %451 = torch.aten.exp %450 : !torch.tensor -> !torch.tensor
    %452 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %453 = torch.aten.sum.dim_IntList %451, %452, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
    %454 = torch.aten.div.Tensor %451, %453 : !torch.tensor, !torch.tensor -> !torch.tensor
    %455 = torch.prim.ListConstruct %int1, %int8, %int15, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %456 = torch.aten.expand %454, %455, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %457 = torch.prim.ListConstruct %int8, %int15, %int15 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %458 = torch.aten.view %456, %457 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %459 = torch.prim.ListConstruct %int1, %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %460 = torch.aten.expand %434, %459, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %461 = torch.prim.ListConstruct %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %462 = torch.aten.view %460, %461 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %463 = torch.aten.bmm %458, %462 : !torch.tensor, !torch.tensor -> !torch.tensor
    %464 = torch.prim.ListConstruct %int1, %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %465 = torch.aten._unsafe_view %463, %464 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %466 = torch.aten.transpose.int %465, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %467 = torch.aten.clone %466, %int0 : !torch.tensor, !torch.int -> !torch.tensor
    %468 = torch.prim.ListConstruct %int1, %int-1, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %469 = torch.aten.view %467, %468 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %470 = torch.prim.GetAttr %arg0["_param_constant22"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %471 = torch.aten.t %470 : !torch.tensor -> !torch.tensor
    %472 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %473 = torch.aten.view %469, %472 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %474 = torch.aten.mm %473, %471 : !torch.tensor, !torch.tensor -> !torch.tensor
    %475 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %476 = torch.aten._unsafe_view %474, %475 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %477 = torch.aten.add.Tensor %396, %476, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %478 = torch.aten.pow.Tensor_Scalar %477, %int2 : !torch.tensor, !torch.int -> !torch.tensor
    %479 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %480 = torch.aten.mean.dim %478, %479, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
    %481 = torch.aten.add.Scalar %480, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
    %482 = torch.aten.rsqrt %481 : !torch.tensor -> !torch.tensor
    %483 = torch.aten.mul.Tensor %477, %482 : !torch.tensor, !torch.tensor -> !torch.tensor
    %484 = torch.prim.GetAttr %arg0["_param_constant23"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %485 = torch.aten.mul.Tensor %484, %483 : !torch.tensor, !torch.tensor -> !torch.tensor
    %486 = torch.prim.GetAttr %arg0["_param_constant24"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %487 = torch.aten.t %486 : !torch.tensor -> !torch.tensor
    %488 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %489 = torch.aten.view %485, %488 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %490 = torch.aten.mm %489, %487 : !torch.tensor, !torch.tensor -> !torch.tensor
    %491 = torch.prim.ListConstruct %int1, %int15, %int2048 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %492 = torch.aten._unsafe_view %490, %491 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %493 = torch.aten.relu %492 : !torch.tensor -> !torch.tensor
    %494 = torch.prim.GetAttr %arg0["_param_constant25"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %495 = torch.aten.t %494 : !torch.tensor -> !torch.tensor
    %496 = torch.prim.ListConstruct %int15, %int2048 : (!torch.int, !torch.int) -> !torch.list<int>
    %497 = torch.aten.view %493, %496 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %498 = torch.aten.mm %497, %495 : !torch.tensor, !torch.tensor -> !torch.tensor
    %499 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %500 = torch.aten._unsafe_view %498, %499 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %501 = torch.aten.add.Tensor %477, %500, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %502 = torch.aten.pow.Tensor_Scalar %501, %int2 : !torch.tensor, !torch.int -> !torch.tensor
    %503 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %504 = torch.aten.mean.dim %502, %503, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
    %505 = torch.aten.add.Scalar %504, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
    %506 = torch.aten.rsqrt %505 : !torch.tensor -> !torch.tensor
    %507 = torch.aten.mul.Tensor %501, %506 : !torch.tensor, !torch.tensor -> !torch.tensor
    %508 = torch.prim.GetAttr %arg0["_param_constant26"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %509 = torch.aten.mul.Tensor %508, %507 : !torch.tensor, !torch.tensor -> !torch.tensor
    %510 = torch.prim.GetAttr %arg0["_param_constant27"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %511 = torch.aten.t %510 : !torch.tensor -> !torch.tensor
    %512 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %513 = torch.aten.view %509, %512 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %514 = torch.aten.mm %513, %511 : !torch.tensor, !torch.tensor -> !torch.tensor
    %515 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %516 = torch.aten._unsafe_view %514, %515 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %517 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %518 = torch.aten.view %516, %517 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %519 = torch.aten.transpose.int %518, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %520 = torch.prim.GetAttr %arg0["_param_constant28"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %521 = torch.aten.t %520 : !torch.tensor -> !torch.tensor
    %522 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %523 = torch.aten.view %509, %522 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %524 = torch.aten.mm %523, %521 : !torch.tensor, !torch.tensor -> !torch.tensor
    %525 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %526 = torch.aten._unsafe_view %524, %525 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %527 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %528 = torch.aten.view %526, %527 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %529 = torch.aten.transpose.int %528, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %530 = torch.prim.GetAttr %arg0["_param_constant29"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %531 = torch.aten.t %530 : !torch.tensor -> !torch.tensor
    %532 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %533 = torch.aten.view %509, %532 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %534 = torch.aten.mm %533, %531 : !torch.tensor, !torch.tensor -> !torch.tensor
    %535 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %536 = torch.aten._unsafe_view %534, %535 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %537 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %538 = torch.aten.view %536, %537 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %539 = torch.aten.transpose.int %538, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %540 = torch.aten.transpose.int %529, %int3, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %541 = torch.prim.ListConstruct %int1, %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %542 = torch.aten.expand %519, %541, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %543 = torch.prim.ListConstruct %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %544 = torch.aten.view %542, %543 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %545 = torch.prim.ListConstruct %int1, %int8, %int64, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %546 = torch.aten.expand %540, %545, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %547 = torch.prim.ListConstruct %int8, %int64, %int15 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %548 = torch.aten.view %546, %547 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %549 = torch.aten.bmm %544, %548 : !torch.tensor, !torch.tensor -> !torch.tensor
    %550 = torch.prim.ListConstruct %int1, %int8, %int15, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %551 = torch.aten._unsafe_view %549, %550 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %552 = torch.aten.add_.Tensor %551, %236, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %553 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %554 = torch.aten.amax %552, %553, %true_0 : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %555 = torch.aten.sub.Tensor %552, %554, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %556 = torch.aten.exp %555 : !torch.tensor -> !torch.tensor
    %557 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %558 = torch.aten.sum.dim_IntList %556, %557, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
    %559 = torch.aten.div.Tensor %556, %558 : !torch.tensor, !torch.tensor -> !torch.tensor
    %560 = torch.prim.ListConstruct %int1, %int8, %int15, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %561 = torch.aten.expand %559, %560, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %562 = torch.prim.ListConstruct %int8, %int15, %int15 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %563 = torch.aten.view %561, %562 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %564 = torch.prim.ListConstruct %int1, %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %565 = torch.aten.expand %539, %564, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %566 = torch.prim.ListConstruct %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %567 = torch.aten.view %565, %566 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %568 = torch.aten.bmm %563, %567 : !torch.tensor, !torch.tensor -> !torch.tensor
    %569 = torch.prim.ListConstruct %int1, %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %570 = torch.aten._unsafe_view %568, %569 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %571 = torch.aten.transpose.int %570, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %572 = torch.aten.clone %571, %int0 : !torch.tensor, !torch.int -> !torch.tensor
    %573 = torch.prim.ListConstruct %int1, %int-1, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %574 = torch.aten.view %572, %573 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %575 = torch.prim.GetAttr %arg0["_param_constant30"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %576 = torch.aten.t %575 : !torch.tensor -> !torch.tensor
    %577 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %578 = torch.aten.view %574, %577 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %579 = torch.aten.mm %578, %576 : !torch.tensor, !torch.tensor -> !torch.tensor
    %580 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %581 = torch.aten._unsafe_view %579, %580 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %582 = torch.aten.add.Tensor %501, %581, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %583 = torch.aten.pow.Tensor_Scalar %582, %int2 : !torch.tensor, !torch.int -> !torch.tensor
    %584 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %585 = torch.aten.mean.dim %583, %584, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
    %586 = torch.aten.add.Scalar %585, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
    %587 = torch.aten.rsqrt %586 : !torch.tensor -> !torch.tensor
    %588 = torch.aten.mul.Tensor %582, %587 : !torch.tensor, !torch.tensor -> !torch.tensor
    %589 = torch.prim.GetAttr %arg0["_param_constant31"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %590 = torch.aten.mul.Tensor %589, %588 : !torch.tensor, !torch.tensor -> !torch.tensor
    %591 = torch.prim.GetAttr %arg0["_param_constant32"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %592 = torch.aten.t %591 : !torch.tensor -> !torch.tensor
    %593 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %594 = torch.aten.view %590, %593 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %595 = torch.aten.mm %594, %592 : !torch.tensor, !torch.tensor -> !torch.tensor
    %596 = torch.prim.ListConstruct %int1, %int15, %int2048 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %597 = torch.aten._unsafe_view %595, %596 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %598 = torch.aten.relu %597 : !torch.tensor -> !torch.tensor
    %599 = torch.prim.GetAttr %arg0["_param_constant33"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %600 = torch.aten.t %599 : !torch.tensor -> !torch.tensor
    %601 = torch.prim.ListConstruct %int15, %int2048 : (!torch.int, !torch.int) -> !torch.list<int>
    %602 = torch.aten.view %598, %601 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %603 = torch.aten.mm %602, %600 : !torch.tensor, !torch.tensor -> !torch.tensor
    %604 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %605 = torch.aten._unsafe_view %603, %604 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %606 = torch.aten.add.Tensor %582, %605, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %607 = torch.aten.pow.Tensor_Scalar %606, %int2 : !torch.tensor, !torch.int -> !torch.tensor
    %608 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %609 = torch.aten.mean.dim %607, %608, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
    %610 = torch.aten.add.Scalar %609, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
    %611 = torch.aten.rsqrt %610 : !torch.tensor -> !torch.tensor
    %612 = torch.aten.mul.Tensor %606, %611 : !torch.tensor, !torch.tensor -> !torch.tensor
    %613 = torch.prim.GetAttr %arg0["_param_constant34"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %614 = torch.aten.mul.Tensor %613, %612 : !torch.tensor, !torch.tensor -> !torch.tensor
    %615 = torch.prim.GetAttr %arg0["_param_constant35"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %616 = torch.aten.t %615 : !torch.tensor -> !torch.tensor
    %617 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %618 = torch.aten.view %614, %617 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %619 = torch.aten.mm %618, %616 : !torch.tensor, !torch.tensor -> !torch.tensor
    %620 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %621 = torch.aten._unsafe_view %619, %620 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %622 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %623 = torch.aten.view %621, %622 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %624 = torch.aten.transpose.int %623, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %625 = torch.prim.GetAttr %arg0["_param_constant36"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %626 = torch.aten.t %625 : !torch.tensor -> !torch.tensor
    %627 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %628 = torch.aten.view %614, %627 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %629 = torch.aten.mm %628, %626 : !torch.tensor, !torch.tensor -> !torch.tensor
    %630 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %631 = torch.aten._unsafe_view %629, %630 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %632 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %633 = torch.aten.view %631, %632 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %634 = torch.aten.transpose.int %633, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %635 = torch.prim.GetAttr %arg0["_param_constant37"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %636 = torch.aten.t %635 : !torch.tensor -> !torch.tensor
    %637 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %638 = torch.aten.view %614, %637 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %639 = torch.aten.mm %638, %636 : !torch.tensor, !torch.tensor -> !torch.tensor
    %640 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %641 = torch.aten._unsafe_view %639, %640 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %642 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %643 = torch.aten.view %641, %642 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %644 = torch.aten.transpose.int %643, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %645 = torch.aten.transpose.int %634, %int3, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %646 = torch.prim.ListConstruct %int1, %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %647 = torch.aten.expand %624, %646, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %648 = torch.prim.ListConstruct %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %649 = torch.aten.view %647, %648 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %650 = torch.prim.ListConstruct %int1, %int8, %int64, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %651 = torch.aten.expand %645, %650, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %652 = torch.prim.ListConstruct %int8, %int64, %int15 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %653 = torch.aten.view %651, %652 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %654 = torch.aten.bmm %649, %653 : !torch.tensor, !torch.tensor -> !torch.tensor
    %655 = torch.prim.ListConstruct %int1, %int8, %int15, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %656 = torch.aten._unsafe_view %654, %655 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %657 = torch.aten.add_.Tensor %656, %236, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %658 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %659 = torch.aten.amax %657, %658, %true_0 : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %660 = torch.aten.sub.Tensor %657, %659, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %661 = torch.aten.exp %660 : !torch.tensor -> !torch.tensor
    %662 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %663 = torch.aten.sum.dim_IntList %661, %662, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
    %664 = torch.aten.div.Tensor %661, %663 : !torch.tensor, !torch.tensor -> !torch.tensor
    %665 = torch.prim.ListConstruct %int1, %int8, %int15, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %666 = torch.aten.expand %664, %665, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %667 = torch.prim.ListConstruct %int8, %int15, %int15 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %668 = torch.aten.view %666, %667 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %669 = torch.prim.ListConstruct %int1, %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %670 = torch.aten.expand %644, %669, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %671 = torch.prim.ListConstruct %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %672 = torch.aten.view %670, %671 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %673 = torch.aten.bmm %668, %672 : !torch.tensor, !torch.tensor -> !torch.tensor
    %674 = torch.prim.ListConstruct %int1, %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %675 = torch.aten._unsafe_view %673, %674 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %676 = torch.aten.transpose.int %675, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %677 = torch.aten.clone %676, %int0 : !torch.tensor, !torch.int -> !torch.tensor
    %678 = torch.prim.ListConstruct %int1, %int-1, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %679 = torch.aten.view %677, %678 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %680 = torch.prim.GetAttr %arg0["_param_constant38"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %681 = torch.aten.t %680 : !torch.tensor -> !torch.tensor
    %682 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %683 = torch.aten.view %679, %682 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %684 = torch.aten.mm %683, %681 : !torch.tensor, !torch.tensor -> !torch.tensor
    %685 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %686 = torch.aten._unsafe_view %684, %685 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %687 = torch.aten.add.Tensor %606, %686, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %688 = torch.aten.pow.Tensor_Scalar %687, %int2 : !torch.tensor, !torch.int -> !torch.tensor
    %689 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %690 = torch.aten.mean.dim %688, %689, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
    %691 = torch.aten.add.Scalar %690, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
    %692 = torch.aten.rsqrt %691 : !torch.tensor -> !torch.tensor
    %693 = torch.aten.mul.Tensor %687, %692 : !torch.tensor, !torch.tensor -> !torch.tensor
    %694 = torch.prim.GetAttr %arg0["_param_constant39"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %695 = torch.aten.mul.Tensor %694, %693 : !torch.tensor, !torch.tensor -> !torch.tensor
    %696 = torch.prim.GetAttr %arg0["_param_constant40"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %697 = torch.aten.t %696 : !torch.tensor -> !torch.tensor
    %698 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %699 = torch.aten.view %695, %698 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %700 = torch.aten.mm %699, %697 : !torch.tensor, !torch.tensor -> !torch.tensor
    %701 = torch.prim.ListConstruct %int1, %int15, %int2048 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %702 = torch.aten._unsafe_view %700, %701 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %703 = torch.aten.relu %702 : !torch.tensor -> !torch.tensor
    %704 = torch.prim.GetAttr %arg0["_param_constant41"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %705 = torch.aten.t %704 : !torch.tensor -> !torch.tensor
    %706 = torch.prim.ListConstruct %int15, %int2048 : (!torch.int, !torch.int) -> !torch.list<int>
    %707 = torch.aten.view %703, %706 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %708 = torch.aten.mm %707, %705 : !torch.tensor, !torch.tensor -> !torch.tensor
    %709 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %710 = torch.aten._unsafe_view %708, %709 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %711 = torch.aten.add.Tensor %687, %710, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %712 = torch.aten.pow.Tensor_Scalar %711, %int2 : !torch.tensor, !torch.int -> !torch.tensor
    %713 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %714 = torch.aten.mean.dim %712, %713, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
    %715 = torch.aten.add.Scalar %714, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
    %716 = torch.aten.rsqrt %715 : !torch.tensor -> !torch.tensor
    %717 = torch.aten.mul.Tensor %711, %716 : !torch.tensor, !torch.tensor -> !torch.tensor
    %718 = torch.prim.GetAttr %arg0["_param_constant42"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %719 = torch.aten.mul.Tensor %718, %717 : !torch.tensor, !torch.tensor -> !torch.tensor
    %720 = torch.prim.GetAttr %arg0["_param_constant43"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %721 = torch.aten.t %720 : !torch.tensor -> !torch.tensor
    %722 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %723 = torch.aten.view %719, %722 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %724 = torch.aten.mm %723, %721 : !torch.tensor, !torch.tensor -> !torch.tensor
    %725 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %726 = torch.aten._unsafe_view %724, %725 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %727 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %728 = torch.aten.view %726, %727 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %729 = torch.aten.transpose.int %728, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %730 = torch.prim.GetAttr %arg0["_param_constant44"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %731 = torch.aten.t %730 : !torch.tensor -> !torch.tensor
    %732 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %733 = torch.aten.view %719, %732 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %734 = torch.aten.mm %733, %731 : !torch.tensor, !torch.tensor -> !torch.tensor
    %735 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %736 = torch.aten._unsafe_view %734, %735 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %737 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %738 = torch.aten.view %736, %737 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %739 = torch.aten.transpose.int %738, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %740 = torch.prim.GetAttr %arg0["_param_constant45"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %741 = torch.aten.t %740 : !torch.tensor -> !torch.tensor
    %742 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %743 = torch.aten.view %719, %742 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %744 = torch.aten.mm %743, %741 : !torch.tensor, !torch.tensor -> !torch.tensor
    %745 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %746 = torch.aten._unsafe_view %744, %745 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %747 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %748 = torch.aten.view %746, %747 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %749 = torch.aten.transpose.int %748, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %750 = torch.aten.transpose.int %739, %int3, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %751 = torch.prim.ListConstruct %int1, %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %752 = torch.aten.expand %729, %751, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %753 = torch.prim.ListConstruct %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %754 = torch.aten.view %752, %753 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %755 = torch.prim.ListConstruct %int1, %int8, %int64, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %756 = torch.aten.expand %750, %755, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %757 = torch.prim.ListConstruct %int8, %int64, %int15 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %758 = torch.aten.view %756, %757 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %759 = torch.aten.bmm %754, %758 : !torch.tensor, !torch.tensor -> !torch.tensor
    %760 = torch.prim.ListConstruct %int1, %int8, %int15, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %761 = torch.aten._unsafe_view %759, %760 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %762 = torch.aten.add_.Tensor %761, %236, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %763 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %764 = torch.aten.amax %762, %763, %true_0 : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %765 = torch.aten.sub.Tensor %762, %764, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %766 = torch.aten.exp %765 : !torch.tensor -> !torch.tensor
    %767 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %768 = torch.aten.sum.dim_IntList %766, %767, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
    %769 = torch.aten.div.Tensor %766, %768 : !torch.tensor, !torch.tensor -> !torch.tensor
    %770 = torch.prim.ListConstruct %int1, %int8, %int15, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %771 = torch.aten.expand %769, %770, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %772 = torch.prim.ListConstruct %int8, %int15, %int15 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %773 = torch.aten.view %771, %772 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %774 = torch.prim.ListConstruct %int1, %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %775 = torch.aten.expand %749, %774, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %776 = torch.prim.ListConstruct %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %777 = torch.aten.view %775, %776 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %778 = torch.aten.bmm %773, %777 : !torch.tensor, !torch.tensor -> !torch.tensor
    %779 = torch.prim.ListConstruct %int1, %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %780 = torch.aten._unsafe_view %778, %779 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %781 = torch.aten.transpose.int %780, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %782 = torch.aten.clone %781, %int0 : !torch.tensor, !torch.int -> !torch.tensor
    %783 = torch.prim.ListConstruct %int1, %int-1, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %784 = torch.aten.view %782, %783 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %785 = torch.prim.GetAttr %arg0["_param_constant46"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %786 = torch.aten.t %785 : !torch.tensor -> !torch.tensor
    %787 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %788 = torch.aten.view %784, %787 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %789 = torch.aten.mm %788, %786 : !torch.tensor, !torch.tensor -> !torch.tensor
    %790 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %791 = torch.aten._unsafe_view %789, %790 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %792 = torch.aten.add.Tensor %711, %791, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %793 = torch.aten.pow.Tensor_Scalar %792, %int2 : !torch.tensor, !torch.int -> !torch.tensor
    %794 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %795 = torch.aten.mean.dim %793, %794, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
    %796 = torch.aten.add.Scalar %795, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
    %797 = torch.aten.rsqrt %796 : !torch.tensor -> !torch.tensor
    %798 = torch.aten.mul.Tensor %792, %797 : !torch.tensor, !torch.tensor -> !torch.tensor
    %799 = torch.prim.GetAttr %arg0["_param_constant47"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %800 = torch.aten.mul.Tensor %799, %798 : !torch.tensor, !torch.tensor -> !torch.tensor
    %801 = torch.prim.GetAttr %arg0["_param_constant48"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %802 = torch.aten.t %801 : !torch.tensor -> !torch.tensor
    %803 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %804 = torch.aten.view %800, %803 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %805 = torch.aten.mm %804, %802 : !torch.tensor, !torch.tensor -> !torch.tensor
    %806 = torch.prim.ListConstruct %int1, %int15, %int2048 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %807 = torch.aten._unsafe_view %805, %806 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %808 = torch.aten.relu %807 : !torch.tensor -> !torch.tensor
    %809 = torch.prim.GetAttr %arg0["_param_constant49"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %810 = torch.aten.t %809 : !torch.tensor -> !torch.tensor
    %811 = torch.prim.ListConstruct %int15, %int2048 : (!torch.int, !torch.int) -> !torch.list<int>
    %812 = torch.aten.view %808, %811 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %813 = torch.aten.mm %812, %810 : !torch.tensor, !torch.tensor -> !torch.tensor
    %814 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %815 = torch.aten._unsafe_view %813, %814 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %816 = torch.aten.add.Tensor %792, %815, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %817 = torch.aten.pow.Tensor_Scalar %816, %int2 : !torch.tensor, !torch.int -> !torch.tensor
    %818 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %819 = torch.aten.mean.dim %817, %818, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
    %820 = torch.aten.add.Scalar %819, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
    %821 = torch.aten.rsqrt %820 : !torch.tensor -> !torch.tensor
    %822 = torch.aten.mul.Tensor %816, %821 : !torch.tensor, !torch.tensor -> !torch.tensor
    %823 = torch.prim.GetAttr %arg0["_param_constant50"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %824 = torch.aten.mul.Tensor %823, %822 : !torch.tensor, !torch.tensor -> !torch.tensor
    %825 = torch.prim.ListConstruct %int-1, %int4 : (!torch.int, !torch.int) -> !torch.list<int>
    %826 = torch.aten.view %144, %825 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %827 = torch.prim.GetAttr %arg0["_param_constant0"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %828 = torch.aten.embedding %827, %826, %int-1, %false, %false : !torch.tensor, !torch.tensor, !torch.int, !torch.bool, !torch.bool -> !torch.tensor
    %829 = torch.prim.ListConstruct %int1, %int4 : (!torch.int, !torch.int) -> !torch.list<int>
    %830 = torch.aten.ones %829, %none_1, %none_1, %cpu, %false : !torch.list<int>, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.tensor
    %831 = torch.prim.ListConstruct %int1, %int15 : (!torch.int, !torch.int) -> !torch.list<int>
    %832 = torch.aten.ones %831, %int4, %none_1, %cpu, %false : !torch.list<int>, !torch.int, !torch.none, !torch.Device, !torch.bool -> !torch.tensor
    %833 = torch.aten.arange %int4, %none_1, %none_1, %cpu, %false : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.tensor
    %834 = torch.aten.unsqueeze %833, %int0 : !torch.tensor, !torch.int -> !torch.tensor
    %835 = torch.aten.unsqueeze %834, %int1 : !torch.tensor, !torch.int -> !torch.tensor
    %836 = torch.aten.slice.Tensor %835, %int2, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
    %837 = torch.prim.ListConstruct %int1, %int4, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %838 = torch.aten.repeat %836, %837 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %839 = torch.aten.unsqueeze %833, %int0 : !torch.tensor, !torch.int -> !torch.tensor
    %840 = torch.aten.slice.Tensor %839, %int1, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
    %841 = torch.aten.unsqueeze %840, %int2 : !torch.tensor, !torch.int -> !torch.tensor
    %842 = torch.aten.le.Tensor %838, %841 : !torch.tensor, !torch.tensor -> !torch.tensor
    %843 = torch.prims.convert_element_type %842, %int6 : !torch.tensor, !torch.int -> !torch.tensor
    %844 = torch.aten.slice.Tensor %843, %int0, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
    %845 = torch.aten.unsqueeze %844, %int1 : !torch.tensor, !torch.int -> !torch.tensor
    %846 = torch.aten.slice.Tensor %845, %int2, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
    %847 = torch.aten.slice.Tensor %846, %int3, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
    %848 = torch.aten.slice.Tensor %830, %int0, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
    %849 = torch.aten.unsqueeze %848, %int1 : !torch.tensor, !torch.int -> !torch.tensor
    %850 = torch.aten.unsqueeze %849, %int2 : !torch.tensor, !torch.int -> !torch.tensor
    %851 = torch.aten.slice.Tensor %850, %int3, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
    %852 = torch.aten.mul.Tensor %847, %851 : !torch.tensor, !torch.tensor -> !torch.tensor
    %853 = torch.aten.rsub.Scalar %852, %float1.000000e00, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
    %854 = torch.aten.mul.Scalar %853, %float-3.402820e38 : !torch.tensor, !torch.float -> !torch.tensor
    %855 = torch.aten.slice.Tensor %832, %int0, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
    %856 = torch.aten.unsqueeze %855, %int1 : !torch.tensor, !torch.int -> !torch.tensor
    %857 = torch.aten.unsqueeze %856, %int2 : !torch.tensor, !torch.int -> !torch.tensor
    %858 = torch.aten.slice.Tensor %857, %int3, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
    %859 = torch.prims.convert_element_type %858, %int6 : !torch.tensor, !torch.int -> !torch.tensor
    %860 = torch.aten.rsub.Scalar %859, %float1.000000e00, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
    %861 = torch.aten.mul.Scalar %860, %float-3.402820e38 : !torch.tensor, !torch.float -> !torch.tensor
    %862 = torch.aten.pow.Tensor_Scalar %828, %int2 : !torch.tensor, !torch.int -> !torch.tensor
    %863 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %864 = torch.aten.mean.dim %862, %863, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
    %865 = torch.aten.add.Scalar %864, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
    %866 = torch.aten.rsqrt %865 : !torch.tensor -> !torch.tensor
    %867 = torch.aten.mul.Tensor %828, %866 : !torch.tensor, !torch.tensor -> !torch.tensor
    %868 = torch.prim.GetAttr %arg0["_param_constant51"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %869 = torch.aten.mul.Tensor %868, %867 : !torch.tensor, !torch.tensor -> !torch.tensor
    %870 = torch.prim.GetAttr %arg0["_param_constant52"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %871 = torch.aten.t %870 : !torch.tensor -> !torch.tensor
    %872 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %873 = torch.aten.view %869, %872 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %874 = torch.aten.mm %873, %871 : !torch.tensor, !torch.tensor -> !torch.tensor
    %875 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %876 = torch.aten._unsafe_view %874, %875 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %877 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %878 = torch.aten.view %876, %877 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %879 = torch.aten.transpose.int %878, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %880 = torch.prim.GetAttr %arg0["_param_constant53"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %881 = torch.aten.t %880 : !torch.tensor -> !torch.tensor
    %882 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %883 = torch.aten.view %869, %882 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %884 = torch.aten.mm %883, %881 : !torch.tensor, !torch.tensor -> !torch.tensor
    %885 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %886 = torch.aten._unsafe_view %884, %885 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %887 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %888 = torch.aten.view %886, %887 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %889 = torch.aten.transpose.int %888, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %890 = torch.prim.GetAttr %arg0["_param_constant54"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %891 = torch.aten.t %890 : !torch.tensor -> !torch.tensor
    %892 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %893 = torch.aten.view %869, %892 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %894 = torch.aten.mm %893, %891 : !torch.tensor, !torch.tensor -> !torch.tensor
    %895 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %896 = torch.aten._unsafe_view %894, %895 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %897 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %898 = torch.aten.view %896, %897 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %899 = torch.aten.transpose.int %898, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %900 = torch.aten.transpose.int %889, %int3, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %901 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %902 = torch.aten.expand %879, %901, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %903 = torch.prim.ListConstruct %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %904 = torch.aten.view %902, %903 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %905 = torch.prim.ListConstruct %int1, %int8, %int64, %int4 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %906 = torch.aten.expand %900, %905, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %907 = torch.prim.ListConstruct %int8, %int64, %int4 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %908 = torch.aten.view %906, %907 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %909 = torch.aten.bmm %904, %908 : !torch.tensor, !torch.tensor -> !torch.tensor
    %910 = torch.prim.ListConstruct %int1, %int8, %int4, %int4 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %911 = torch.aten._unsafe_view %909, %910 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %912 = torch.aten.arange %int4, %int4, %none_1, %cpu, %false : !torch.int, !torch.int, !torch.none, !torch.Device, !torch.bool -> !torch.tensor
    %913 = torch.aten.slice.Tensor %912, %int0, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
    %914 = torch.aten.unsqueeze %913, %int1 : !torch.tensor, !torch.int -> !torch.tensor
    %915 = torch.aten.arange %int4, %int4, %none_1, %cpu, %false : !torch.int, !torch.int, !torch.none, !torch.Device, !torch.bool -> !torch.tensor
    %916 = torch.aten.unsqueeze %915, %int0 : !torch.tensor, !torch.int -> !torch.tensor
    %917 = torch.aten.slice.Tensor %916, %int1, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
    %918 = torch.aten.sub.Tensor %917, %914, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %919 = torch.aten.zeros_like %918, %int4, %int0, %cpu, %false, %none_1 : !torch.tensor, !torch.int, !torch.int, !torch.Device, !torch.bool, !torch.none -> !torch.tensor
    %920 = torch.aten.minimum %918, %919 : !torch.tensor, !torch.tensor -> !torch.tensor
    %921 = torch.aten.neg %920 : !torch.tensor -> !torch.tensor
    %922 = torch.aten.lt.Scalar %921, %int16 : !torch.tensor, !torch.int -> !torch.tensor
    %923 = torch.prims.convert_element_type %921, %int6 : !torch.tensor, !torch.int -> !torch.tensor
    %924 = torch.aten.div.Scalar %923, %int16 : !torch.tensor, !torch.int -> !torch.tensor
    %925 = torch.aten.log %924 : !torch.tensor -> !torch.tensor
    %926 = torch.aten.div.Scalar %925, %float2.079440e00 : !torch.tensor, !torch.float -> !torch.tensor
    %927 = torch.aten.mul.Scalar %926, %int16 : !torch.tensor, !torch.int -> !torch.tensor
    %928 = torch.prims.convert_element_type %927, %int4 : !torch.tensor, !torch.int -> !torch.tensor
    %929 = torch.aten.add.Scalar %928, %int16, %int1 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %930 = torch.aten.full_like %929, %int31, %int4, %int0, %cpu, %false, %none_1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.Device, !torch.bool, !torch.none -> !torch.tensor
    %931 = torch.aten.minimum %929, %930 : !torch.tensor, !torch.tensor -> !torch.tensor
    %932 = torch.aten.where.self %922, %921, %931 : !torch.tensor, !torch.tensor, !torch.tensor -> !torch.tensor
    %933 = torch.aten.add.Scalar %932, %int0, %int1 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %934 = torch.prim.GetAttr %arg0["_param_constant55"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %935 = torch.aten.embedding %934, %933, %int-1, %false, %false : !torch.tensor, !torch.tensor, !torch.int, !torch.bool, !torch.bool -> !torch.tensor
    %936 = torch.prim.ListConstruct %int2, %int0, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %937 = torch.aten.permute %935, %936 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %938 = torch.aten.unsqueeze %937, %int0 : !torch.tensor, !torch.int -> !torch.tensor
    %939 = torch.aten.add.Tensor %938, %854, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %940 = torch.aten.add_.Tensor %911, %939, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %941 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %942 = torch.aten.amax %940, %941, %true_0 : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %943 = torch.aten.sub.Tensor %940, %942, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %944 = torch.aten.exp %943 : !torch.tensor -> !torch.tensor
    %945 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %946 = torch.aten.sum.dim_IntList %944, %945, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
    %947 = torch.aten.div.Tensor %944, %946 : !torch.tensor, !torch.tensor -> !torch.tensor
    %948 = torch.prim.ListConstruct %int1, %int8, %int4, %int4 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %949 = torch.aten.expand %947, %948, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %950 = torch.prim.ListConstruct %int8, %int4, %int4 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %951 = torch.aten.view %949, %950 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %952 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %953 = torch.aten.expand %899, %952, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %954 = torch.prim.ListConstruct %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %955 = torch.aten.view %953, %954 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %956 = torch.aten.bmm %951, %955 : !torch.tensor, !torch.tensor -> !torch.tensor
    %957 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %958 = torch.aten._unsafe_view %956, %957 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %959 = torch.aten.transpose.int %958, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %960 = torch.aten.clone %959, %int0 : !torch.tensor, !torch.int -> !torch.tensor
    %961 = torch.prim.ListConstruct %int1, %int-1, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %962 = torch.aten.view %960, %961 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %963 = torch.prim.GetAttr %arg0["_param_constant56"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %964 = torch.aten.t %963 : !torch.tensor -> !torch.tensor
    %965 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %966 = torch.aten.view %962, %965 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %967 = torch.aten.mm %966, %964 : !torch.tensor, !torch.tensor -> !torch.tensor
    %968 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %969 = torch.aten._unsafe_view %967, %968 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %970 = torch.aten.add.Tensor %828, %969, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %971 = torch.aten.pow.Tensor_Scalar %970, %int2 : !torch.tensor, !torch.int -> !torch.tensor
    %972 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %973 = torch.aten.mean.dim %971, %972, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
    %974 = torch.aten.add.Scalar %973, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
    %975 = torch.aten.rsqrt %974 : !torch.tensor -> !torch.tensor
    %976 = torch.aten.mul.Tensor %970, %975 : !torch.tensor, !torch.tensor -> !torch.tensor
    %977 = torch.prim.GetAttr %arg0["_param_constant57"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %978 = torch.aten.mul.Tensor %977, %976 : !torch.tensor, !torch.tensor -> !torch.tensor
    %979 = torch.prim.GetAttr %arg0["_param_constant58"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %980 = torch.aten.t %979 : !torch.tensor -> !torch.tensor
    %981 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %982 = torch.aten.view %978, %981 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %983 = torch.aten.mm %982, %980 : !torch.tensor, !torch.tensor -> !torch.tensor
    %984 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %985 = torch.aten._unsafe_view %983, %984 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %986 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %987 = torch.aten.view %985, %986 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %988 = torch.aten.transpose.int %987, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %989 = torch.prim.GetAttr %arg0["_param_constant59"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %990 = torch.aten.t %989 : !torch.tensor -> !torch.tensor
    %991 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %992 = torch.aten.view %824, %991 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %993 = torch.aten.mm %992, %990 : !torch.tensor, !torch.tensor -> !torch.tensor
    %994 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %995 = torch.aten._unsafe_view %993, %994 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %996 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %997 = torch.aten.view %995, %996 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %998 = torch.aten.transpose.int %997, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %999 = torch.prim.GetAttr %arg0["_param_constant60"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1000 = torch.aten.t %999 : !torch.tensor -> !torch.tensor
    %1001 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %1002 = torch.aten.view %824, %1001 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1003 = torch.aten.mm %1002, %1000 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1004 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1005 = torch.aten._unsafe_view %1003, %1004 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1006 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1007 = torch.aten.view %1005, %1006 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1008 = torch.aten.transpose.int %1007, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %1009 = torch.aten.transpose.int %998, %int3, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %1010 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1011 = torch.aten.expand %988, %1010, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %1012 = torch.prim.ListConstruct %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1013 = torch.aten.view %1011, %1012 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1014 = torch.prim.ListConstruct %int1, %int8, %int64, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1015 = torch.aten.expand %1009, %1014, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %1016 = torch.prim.ListConstruct %int8, %int64, %int15 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1017 = torch.aten.view %1015, %1016 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1018 = torch.aten.bmm %1013, %1017 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1019 = torch.prim.ListConstruct %int1, %int8, %int4, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1020 = torch.aten._unsafe_view %1018, %1019 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1021 = torch.prim.ListConstruct %int1, %int8, %int4, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1022 = torch.aten.zeros %1021, %int6, %none_1, %cpu, %false : !torch.list<int>, !torch.int, !torch.none, !torch.Device, !torch.bool -> !torch.tensor
    %1023 = torch.aten.add.Tensor %1022, %861, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %1024 = torch.aten.add_.Tensor %1020, %1023, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %1025 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1026 = torch.aten.amax %1024, %1025, %true_0 : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %1027 = torch.aten.sub.Tensor %1024, %1026, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %1028 = torch.aten.exp %1027 : !torch.tensor -> !torch.tensor
    %1029 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1030 = torch.aten.sum.dim_IntList %1028, %1029, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
    %1031 = torch.aten.div.Tensor %1028, %1030 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1032 = torch.prim.ListConstruct %int1, %int8, %int4, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1033 = torch.aten.expand %1031, %1032, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %1034 = torch.prim.ListConstruct %int8, %int4, %int15 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1035 = torch.aten.view %1033, %1034 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1036 = torch.prim.ListConstruct %int1, %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1037 = torch.aten.expand %1008, %1036, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %1038 = torch.prim.ListConstruct %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1039 = torch.aten.view %1037, %1038 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1040 = torch.aten.bmm %1035, %1039 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1041 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1042 = torch.aten._unsafe_view %1040, %1041 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1043 = torch.aten.transpose.int %1042, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %1044 = torch.aten.clone %1043, %int0 : !torch.tensor, !torch.int -> !torch.tensor
    %1045 = torch.prim.ListConstruct %int1, %int-1, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1046 = torch.aten.view %1044, %1045 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1047 = torch.prim.GetAttr %arg0["_param_constant61"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1048 = torch.aten.t %1047 : !torch.tensor -> !torch.tensor
    %1049 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %1050 = torch.aten.view %1046, %1049 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1051 = torch.aten.mm %1050, %1048 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1052 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1053 = torch.aten._unsafe_view %1051, %1052 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1054 = torch.aten.add.Tensor %970, %1053, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %1055 = torch.aten.pow.Tensor_Scalar %1054, %int2 : !torch.tensor, !torch.int -> !torch.tensor
    %1056 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1057 = torch.aten.mean.dim %1055, %1056, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
    %1058 = torch.aten.add.Scalar %1057, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
    %1059 = torch.aten.rsqrt %1058 : !torch.tensor -> !torch.tensor
    %1060 = torch.aten.mul.Tensor %1054, %1059 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1061 = torch.prim.GetAttr %arg0["_param_constant62"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1062 = torch.aten.mul.Tensor %1061, %1060 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1063 = torch.prim.GetAttr %arg0["_param_constant63"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1064 = torch.aten.t %1063 : !torch.tensor -> !torch.tensor
    %1065 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %1066 = torch.aten.view %1062, %1065 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1067 = torch.aten.mm %1066, %1064 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1068 = torch.prim.ListConstruct %int1, %int4, %int2048 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1069 = torch.aten._unsafe_view %1067, %1068 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1070 = torch.aten.relu %1069 : !torch.tensor -> !torch.tensor
    %1071 = torch.prim.GetAttr %arg0["_param_constant64"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1072 = torch.aten.t %1071 : !torch.tensor -> !torch.tensor
    %1073 = torch.prim.ListConstruct %int4, %int2048 : (!torch.int, !torch.int) -> !torch.list<int>
    %1074 = torch.aten.view %1070, %1073 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1075 = torch.aten.mm %1074, %1072 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1076 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1077 = torch.aten._unsafe_view %1075, %1076 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1078 = torch.aten.add.Tensor %1054, %1077, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %1079 = torch.aten.pow.Tensor_Scalar %1078, %int2 : !torch.tensor, !torch.int -> !torch.tensor
    %1080 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1081 = torch.aten.mean.dim %1079, %1080, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
    %1082 = torch.aten.add.Scalar %1081, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
    %1083 = torch.aten.rsqrt %1082 : !torch.tensor -> !torch.tensor
    %1084 = torch.aten.mul.Tensor %1078, %1083 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1085 = torch.prim.GetAttr %arg0["_param_constant65"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1086 = torch.aten.mul.Tensor %1085, %1084 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1087 = torch.prim.GetAttr %arg0["_param_constant66"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1088 = torch.aten.t %1087 : !torch.tensor -> !torch.tensor
    %1089 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %1090 = torch.aten.view %1086, %1089 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1091 = torch.aten.mm %1090, %1088 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1092 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1093 = torch.aten._unsafe_view %1091, %1092 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1094 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1095 = torch.aten.view %1093, %1094 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1096 = torch.aten.transpose.int %1095, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %1097 = torch.prim.GetAttr %arg0["_param_constant67"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1098 = torch.aten.t %1097 : !torch.tensor -> !torch.tensor
    %1099 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %1100 = torch.aten.view %1086, %1099 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1101 = torch.aten.mm %1100, %1098 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1102 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1103 = torch.aten._unsafe_view %1101, %1102 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1104 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1105 = torch.aten.view %1103, %1104 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1106 = torch.aten.transpose.int %1105, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %1107 = torch.prim.GetAttr %arg0["_param_constant68"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1108 = torch.aten.t %1107 : !torch.tensor -> !torch.tensor
    %1109 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %1110 = torch.aten.view %1086, %1109 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1111 = torch.aten.mm %1110, %1108 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1112 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1113 = torch.aten._unsafe_view %1111, %1112 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1114 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1115 = torch.aten.view %1113, %1114 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1116 = torch.aten.transpose.int %1115, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %1117 = torch.aten.transpose.int %1106, %int3, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %1118 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1119 = torch.aten.expand %1096, %1118, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %1120 = torch.prim.ListConstruct %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1121 = torch.aten.view %1119, %1120 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1122 = torch.prim.ListConstruct %int1, %int8, %int64, %int4 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1123 = torch.aten.expand %1117, %1122, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %1124 = torch.prim.ListConstruct %int8, %int64, %int4 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1125 = torch.aten.view %1123, %1124 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1126 = torch.aten.bmm %1121, %1125 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1127 = torch.prim.ListConstruct %int1, %int8, %int4, %int4 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1128 = torch.aten._unsafe_view %1126, %1127 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1129 = torch.aten.add_.Tensor %1128, %939, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %1130 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1131 = torch.aten.amax %1129, %1130, %true_0 : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %1132 = torch.aten.sub.Tensor %1129, %1131, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %1133 = torch.aten.exp %1132 : !torch.tensor -> !torch.tensor
    %1134 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1135 = torch.aten.sum.dim_IntList %1133, %1134, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
    %1136 = torch.aten.div.Tensor %1133, %1135 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1137 = torch.prim.ListConstruct %int1, %int8, %int4, %int4 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1138 = torch.aten.expand %1136, %1137, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %1139 = torch.prim.ListConstruct %int8, %int4, %int4 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1140 = torch.aten.view %1138, %1139 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1141 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1142 = torch.aten.expand %1116, %1141, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %1143 = torch.prim.ListConstruct %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1144 = torch.aten.view %1142, %1143 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1145 = torch.aten.bmm %1140, %1144 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1146 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1147 = torch.aten._unsafe_view %1145, %1146 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1148 = torch.aten.transpose.int %1147, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %1149 = torch.aten.clone %1148, %int0 : !torch.tensor, !torch.int -> !torch.tensor
    %1150 = torch.prim.ListConstruct %int1, %int-1, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1151 = torch.aten.view %1149, %1150 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1152 = torch.prim.GetAttr %arg0["_param_constant69"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1153 = torch.aten.t %1152 : !torch.tensor -> !torch.tensor
    %1154 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %1155 = torch.aten.view %1151, %1154 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1156 = torch.aten.mm %1155, %1153 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1157 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1158 = torch.aten._unsafe_view %1156, %1157 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1159 = torch.aten.add.Tensor %1078, %1158, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %1160 = torch.aten.pow.Tensor_Scalar %1159, %int2 : !torch.tensor, !torch.int -> !torch.tensor
    %1161 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1162 = torch.aten.mean.dim %1160, %1161, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
    %1163 = torch.aten.add.Scalar %1162, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
    %1164 = torch.aten.rsqrt %1163 : !torch.tensor -> !torch.tensor
    %1165 = torch.aten.mul.Tensor %1159, %1164 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1166 = torch.prim.GetAttr %arg0["_param_constant70"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1167 = torch.aten.mul.Tensor %1166, %1165 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1168 = torch.prim.GetAttr %arg0["_param_constant71"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1169 = torch.aten.t %1168 : !torch.tensor -> !torch.tensor
    %1170 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %1171 = torch.aten.view %1167, %1170 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1172 = torch.aten.mm %1171, %1169 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1173 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1174 = torch.aten._unsafe_view %1172, %1173 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1175 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1176 = torch.aten.view %1174, %1175 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1177 = torch.aten.transpose.int %1176, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %1178 = torch.prim.GetAttr %arg0["_param_constant72"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1179 = torch.aten.t %1178 : !torch.tensor -> !torch.tensor
    %1180 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %1181 = torch.aten.view %824, %1180 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1182 = torch.aten.mm %1181, %1179 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1183 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1184 = torch.aten._unsafe_view %1182, %1183 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1185 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1186 = torch.aten.view %1184, %1185 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1187 = torch.aten.transpose.int %1186, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %1188 = torch.prim.GetAttr %arg0["_param_constant73"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1189 = torch.aten.t %1188 : !torch.tensor -> !torch.tensor
    %1190 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %1191 = torch.aten.view %824, %1190 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1192 = torch.aten.mm %1191, %1189 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1193 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1194 = torch.aten._unsafe_view %1192, %1193 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1195 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1196 = torch.aten.view %1194, %1195 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1197 = torch.aten.transpose.int %1196, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %1198 = torch.aten.transpose.int %1187, %int3, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %1199 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1200 = torch.aten.expand %1177, %1199, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %1201 = torch.prim.ListConstruct %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1202 = torch.aten.view %1200, %1201 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1203 = torch.prim.ListConstruct %int1, %int8, %int64, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1204 = torch.aten.expand %1198, %1203, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %1205 = torch.prim.ListConstruct %int8, %int64, %int15 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1206 = torch.aten.view %1204, %1205 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1207 = torch.aten.bmm %1202, %1206 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1208 = torch.prim.ListConstruct %int1, %int8, %int4, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1209 = torch.aten._unsafe_view %1207, %1208 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1210 = torch.aten.add_.Tensor %1209, %1023, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %1211 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1212 = torch.aten.amax %1210, %1211, %true_0 : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %1213 = torch.aten.sub.Tensor %1210, %1212, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %1214 = torch.aten.exp %1213 : !torch.tensor -> !torch.tensor
    %1215 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1216 = torch.aten.sum.dim_IntList %1214, %1215, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
    %1217 = torch.aten.div.Tensor %1214, %1216 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1218 = torch.prim.ListConstruct %int1, %int8, %int4, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1219 = torch.aten.expand %1217, %1218, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %1220 = torch.prim.ListConstruct %int8, %int4, %int15 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1221 = torch.aten.view %1219, %1220 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1222 = torch.prim.ListConstruct %int1, %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1223 = torch.aten.expand %1197, %1222, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %1224 = torch.prim.ListConstruct %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1225 = torch.aten.view %1223, %1224 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1226 = torch.aten.bmm %1221, %1225 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1227 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1228 = torch.aten._unsafe_view %1226, %1227 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1229 = torch.aten.transpose.int %1228, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %1230 = torch.aten.clone %1229, %int0 : !torch.tensor, !torch.int -> !torch.tensor
    %1231 = torch.prim.ListConstruct %int1, %int-1, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1232 = torch.aten.view %1230, %1231 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1233 = torch.prim.GetAttr %arg0["_param_constant74"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1234 = torch.aten.t %1233 : !torch.tensor -> !torch.tensor
    %1235 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %1236 = torch.aten.view %1232, %1235 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1237 = torch.aten.mm %1236, %1234 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1238 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1239 = torch.aten._unsafe_view %1237, %1238 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1240 = torch.aten.add.Tensor %1159, %1239, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %1241 = torch.aten.pow.Tensor_Scalar %1240, %int2 : !torch.tensor, !torch.int -> !torch.tensor
    %1242 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1243 = torch.aten.mean.dim %1241, %1242, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
    %1244 = torch.aten.add.Scalar %1243, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
    %1245 = torch.aten.rsqrt %1244 : !torch.tensor -> !torch.tensor
    %1246 = torch.aten.mul.Tensor %1240, %1245 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1247 = torch.prim.GetAttr %arg0["_param_constant75"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1248 = torch.aten.mul.Tensor %1247, %1246 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1249 = torch.prim.GetAttr %arg0["_param_constant76"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1250 = torch.aten.t %1249 : !torch.tensor -> !torch.tensor
    %1251 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %1252 = torch.aten.view %1248, %1251 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1253 = torch.aten.mm %1252, %1250 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1254 = torch.prim.ListConstruct %int1, %int4, %int2048 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1255 = torch.aten._unsafe_view %1253, %1254 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1256 = torch.aten.relu %1255 : !torch.tensor -> !torch.tensor
    %1257 = torch.prim.GetAttr %arg0["_param_constant77"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1258 = torch.aten.t %1257 : !torch.tensor -> !torch.tensor
    %1259 = torch.prim.ListConstruct %int4, %int2048 : (!torch.int, !torch.int) -> !torch.list<int>
    %1260 = torch.aten.view %1256, %1259 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1261 = torch.aten.mm %1260, %1258 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1262 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1263 = torch.aten._unsafe_view %1261, %1262 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1264 = torch.aten.add.Tensor %1240, %1263, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %1265 = torch.aten.pow.Tensor_Scalar %1264, %int2 : !torch.tensor, !torch.int -> !torch.tensor
    %1266 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1267 = torch.aten.mean.dim %1265, %1266, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
    %1268 = torch.aten.add.Scalar %1267, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
    %1269 = torch.aten.rsqrt %1268 : !torch.tensor -> !torch.tensor
    %1270 = torch.aten.mul.Tensor %1264, %1269 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1271 = torch.prim.GetAttr %arg0["_param_constant78"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1272 = torch.aten.mul.Tensor %1271, %1270 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1273 = torch.prim.GetAttr %arg0["_param_constant79"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1274 = torch.aten.t %1273 : !torch.tensor -> !torch.tensor
    %1275 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %1276 = torch.aten.view %1272, %1275 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1277 = torch.aten.mm %1276, %1274 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1278 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1279 = torch.aten._unsafe_view %1277, %1278 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1280 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1281 = torch.aten.view %1279, %1280 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1282 = torch.aten.transpose.int %1281, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %1283 = torch.prim.GetAttr %arg0["_param_constant80"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1284 = torch.aten.t %1283 : !torch.tensor -> !torch.tensor
    %1285 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %1286 = torch.aten.view %1272, %1285 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1287 = torch.aten.mm %1286, %1284 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1288 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1289 = torch.aten._unsafe_view %1287, %1288 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1290 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1291 = torch.aten.view %1289, %1290 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1292 = torch.aten.transpose.int %1291, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %1293 = torch.prim.GetAttr %arg0["_param_constant81"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1294 = torch.aten.t %1293 : !torch.tensor -> !torch.tensor
    %1295 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %1296 = torch.aten.view %1272, %1295 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1297 = torch.aten.mm %1296, %1294 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1298 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1299 = torch.aten._unsafe_view %1297, %1298 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1300 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1301 = torch.aten.view %1299, %1300 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1302 = torch.aten.transpose.int %1301, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %1303 = torch.aten.transpose.int %1292, %int3, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %1304 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1305 = torch.aten.expand %1282, %1304, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %1306 = torch.prim.ListConstruct %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1307 = torch.aten.view %1305, %1306 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1308 = torch.prim.ListConstruct %int1, %int8, %int64, %int4 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1309 = torch.aten.expand %1303, %1308, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %1310 = torch.prim.ListConstruct %int8, %int64, %int4 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1311 = torch.aten.view %1309, %1310 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1312 = torch.aten.bmm %1307, %1311 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1313 = torch.prim.ListConstruct %int1, %int8, %int4, %int4 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1314 = torch.aten._unsafe_view %1312, %1313 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1315 = torch.aten.add_.Tensor %1314, %939, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %1316 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1317 = torch.aten.amax %1315, %1316, %true_0 : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %1318 = torch.aten.sub.Tensor %1315, %1317, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %1319 = torch.aten.exp %1318 : !torch.tensor -> !torch.tensor
    %1320 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1321 = torch.aten.sum.dim_IntList %1319, %1320, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
    %1322 = torch.aten.div.Tensor %1319, %1321 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1323 = torch.prim.ListConstruct %int1, %int8, %int4, %int4 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1324 = torch.aten.expand %1322, %1323, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %1325 = torch.prim.ListConstruct %int8, %int4, %int4 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1326 = torch.aten.view %1324, %1325 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1327 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1328 = torch.aten.expand %1302, %1327, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %1329 = torch.prim.ListConstruct %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1330 = torch.aten.view %1328, %1329 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1331 = torch.aten.bmm %1326, %1330 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1332 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1333 = torch.aten._unsafe_view %1331, %1332 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1334 = torch.aten.transpose.int %1333, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %1335 = torch.aten.clone %1334, %int0 : !torch.tensor, !torch.int -> !torch.tensor
    %1336 = torch.prim.ListConstruct %int1, %int-1, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1337 = torch.aten.view %1335, %1336 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1338 = torch.prim.GetAttr %arg0["_param_constant82"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1339 = torch.aten.t %1338 : !torch.tensor -> !torch.tensor
    %1340 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %1341 = torch.aten.view %1337, %1340 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1342 = torch.aten.mm %1341, %1339 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1343 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1344 = torch.aten._unsafe_view %1342, %1343 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1345 = torch.aten.add.Tensor %1264, %1344, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %1346 = torch.aten.pow.Tensor_Scalar %1345, %int2 : !torch.tensor, !torch.int -> !torch.tensor
    %1347 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1348 = torch.aten.mean.dim %1346, %1347, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
    %1349 = torch.aten.add.Scalar %1348, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
    %1350 = torch.aten.rsqrt %1349 : !torch.tensor -> !torch.tensor
    %1351 = torch.aten.mul.Tensor %1345, %1350 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1352 = torch.prim.GetAttr %arg0["_param_constant83"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1353 = torch.aten.mul.Tensor %1352, %1351 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1354 = torch.prim.GetAttr %arg0["_param_constant84"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1355 = torch.aten.t %1354 : !torch.tensor -> !torch.tensor
    %1356 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %1357 = torch.aten.view %1353, %1356 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1358 = torch.aten.mm %1357, %1355 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1359 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1360 = torch.aten._unsafe_view %1358, %1359 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1361 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1362 = torch.aten.view %1360, %1361 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1363 = torch.aten.transpose.int %1362, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %1364 = torch.prim.GetAttr %arg0["_param_constant85"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1365 = torch.aten.t %1364 : !torch.tensor -> !torch.tensor
    %1366 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %1367 = torch.aten.view %824, %1366 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1368 = torch.aten.mm %1367, %1365 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1369 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1370 = torch.aten._unsafe_view %1368, %1369 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1371 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1372 = torch.aten.view %1370, %1371 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1373 = torch.aten.transpose.int %1372, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %1374 = torch.prim.GetAttr %arg0["_param_constant86"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1375 = torch.aten.t %1374 : !torch.tensor -> !torch.tensor
    %1376 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %1377 = torch.aten.view %824, %1376 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1378 = torch.aten.mm %1377, %1375 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1379 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1380 = torch.aten._unsafe_view %1378, %1379 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1381 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1382 = torch.aten.view %1380, %1381 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1383 = torch.aten.transpose.int %1382, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %1384 = torch.aten.transpose.int %1373, %int3, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %1385 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1386 = torch.aten.expand %1363, %1385, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %1387 = torch.prim.ListConstruct %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1388 = torch.aten.view %1386, %1387 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1389 = torch.prim.ListConstruct %int1, %int8, %int64, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1390 = torch.aten.expand %1384, %1389, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %1391 = torch.prim.ListConstruct %int8, %int64, %int15 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1392 = torch.aten.view %1390, %1391 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1393 = torch.aten.bmm %1388, %1392 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1394 = torch.prim.ListConstruct %int1, %int8, %int4, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1395 = torch.aten._unsafe_view %1393, %1394 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1396 = torch.aten.add_.Tensor %1395, %1023, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %1397 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1398 = torch.aten.amax %1396, %1397, %true_0 : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %1399 = torch.aten.sub.Tensor %1396, %1398, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %1400 = torch.aten.exp %1399 : !torch.tensor -> !torch.tensor
    %1401 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1402 = torch.aten.sum.dim_IntList %1400, %1401, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
    %1403 = torch.aten.div.Tensor %1400, %1402 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1404 = torch.prim.ListConstruct %int1, %int8, %int4, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1405 = torch.aten.expand %1403, %1404, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %1406 = torch.prim.ListConstruct %int8, %int4, %int15 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1407 = torch.aten.view %1405, %1406 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1408 = torch.prim.ListConstruct %int1, %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1409 = torch.aten.expand %1383, %1408, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %1410 = torch.prim.ListConstruct %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1411 = torch.aten.view %1409, %1410 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1412 = torch.aten.bmm %1407, %1411 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1413 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1414 = torch.aten._unsafe_view %1412, %1413 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1415 = torch.aten.transpose.int %1414, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %1416 = torch.aten.clone %1415, %int0 : !torch.tensor, !torch.int -> !torch.tensor
    %1417 = torch.prim.ListConstruct %int1, %int-1, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1418 = torch.aten.view %1416, %1417 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1419 = torch.prim.GetAttr %arg0["_param_constant87"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1420 = torch.aten.t %1419 : !torch.tensor -> !torch.tensor
    %1421 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %1422 = torch.aten.view %1418, %1421 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1423 = torch.aten.mm %1422, %1420 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1424 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1425 = torch.aten._unsafe_view %1423, %1424 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1426 = torch.aten.add.Tensor %1345, %1425, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %1427 = torch.aten.pow.Tensor_Scalar %1426, %int2 : !torch.tensor, !torch.int -> !torch.tensor
    %1428 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1429 = torch.aten.mean.dim %1427, %1428, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
    %1430 = torch.aten.add.Scalar %1429, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
    %1431 = torch.aten.rsqrt %1430 : !torch.tensor -> !torch.tensor
    %1432 = torch.aten.mul.Tensor %1426, %1431 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1433 = torch.prim.GetAttr %arg0["_param_constant88"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1434 = torch.aten.mul.Tensor %1433, %1432 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1435 = torch.prim.GetAttr %arg0["_param_constant89"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1436 = torch.aten.t %1435 : !torch.tensor -> !torch.tensor
    %1437 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %1438 = torch.aten.view %1434, %1437 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1439 = torch.aten.mm %1438, %1436 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1440 = torch.prim.ListConstruct %int1, %int4, %int2048 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1441 = torch.aten._unsafe_view %1439, %1440 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1442 = torch.aten.relu %1441 : !torch.tensor -> !torch.tensor
    %1443 = torch.prim.GetAttr %arg0["_param_constant90"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1444 = torch.aten.t %1443 : !torch.tensor -> !torch.tensor
    %1445 = torch.prim.ListConstruct %int4, %int2048 : (!torch.int, !torch.int) -> !torch.list<int>
    %1446 = torch.aten.view %1442, %1445 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1447 = torch.aten.mm %1446, %1444 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1448 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1449 = torch.aten._unsafe_view %1447, %1448 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1450 = torch.aten.add.Tensor %1426, %1449, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %1451 = torch.aten.pow.Tensor_Scalar %1450, %int2 : !torch.tensor, !torch.int -> !torch.tensor
    %1452 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1453 = torch.aten.mean.dim %1451, %1452, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
    %1454 = torch.aten.add.Scalar %1453, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
    %1455 = torch.aten.rsqrt %1454 : !torch.tensor -> !torch.tensor
    %1456 = torch.aten.mul.Tensor %1450, %1455 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1457 = torch.prim.GetAttr %arg0["_param_constant91"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1458 = torch.aten.mul.Tensor %1457, %1456 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1459 = torch.prim.GetAttr %arg0["_param_constant92"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1460 = torch.aten.t %1459 : !torch.tensor -> !torch.tensor
    %1461 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %1462 = torch.aten.view %1458, %1461 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1463 = torch.aten.mm %1462, %1460 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1464 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1465 = torch.aten._unsafe_view %1463, %1464 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1466 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1467 = torch.aten.view %1465, %1466 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1468 = torch.aten.transpose.int %1467, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %1469 = torch.prim.GetAttr %arg0["_param_constant93"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1470 = torch.aten.t %1469 : !torch.tensor -> !torch.tensor
    %1471 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %1472 = torch.aten.view %1458, %1471 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1473 = torch.aten.mm %1472, %1470 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1474 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1475 = torch.aten._unsafe_view %1473, %1474 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1476 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1477 = torch.aten.view %1475, %1476 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1478 = torch.aten.transpose.int %1477, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %1479 = torch.prim.GetAttr %arg0["_param_constant94"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1480 = torch.aten.t %1479 : !torch.tensor -> !torch.tensor
    %1481 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %1482 = torch.aten.view %1458, %1481 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1483 = torch.aten.mm %1482, %1480 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1484 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1485 = torch.aten._unsafe_view %1483, %1484 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1486 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1487 = torch.aten.view %1485, %1486 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1488 = torch.aten.transpose.int %1487, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %1489 = torch.aten.transpose.int %1478, %int3, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %1490 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1491 = torch.aten.expand %1468, %1490, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %1492 = torch.prim.ListConstruct %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1493 = torch.aten.view %1491, %1492 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1494 = torch.prim.ListConstruct %int1, %int8, %int64, %int4 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1495 = torch.aten.expand %1489, %1494, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %1496 = torch.prim.ListConstruct %int8, %int64, %int4 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1497 = torch.aten.view %1495, %1496 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1498 = torch.aten.bmm %1493, %1497 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1499 = torch.prim.ListConstruct %int1, %int8, %int4, %int4 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1500 = torch.aten._unsafe_view %1498, %1499 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1501 = torch.aten.add_.Tensor %1500, %939, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %1502 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1503 = torch.aten.amax %1501, %1502, %true_0 : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %1504 = torch.aten.sub.Tensor %1501, %1503, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %1505 = torch.aten.exp %1504 : !torch.tensor -> !torch.tensor
    %1506 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1507 = torch.aten.sum.dim_IntList %1505, %1506, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
    %1508 = torch.aten.div.Tensor %1505, %1507 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1509 = torch.prim.ListConstruct %int1, %int8, %int4, %int4 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1510 = torch.aten.expand %1508, %1509, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %1511 = torch.prim.ListConstruct %int8, %int4, %int4 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1512 = torch.aten.view %1510, %1511 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1513 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1514 = torch.aten.expand %1488, %1513, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %1515 = torch.prim.ListConstruct %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1516 = torch.aten.view %1514, %1515 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1517 = torch.aten.bmm %1512, %1516 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1518 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1519 = torch.aten._unsafe_view %1517, %1518 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1520 = torch.aten.transpose.int %1519, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %1521 = torch.aten.clone %1520, %int0 : !torch.tensor, !torch.int -> !torch.tensor
    %1522 = torch.prim.ListConstruct %int1, %int-1, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1523 = torch.aten.view %1521, %1522 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1524 = torch.prim.GetAttr %arg0["_param_constant95"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1525 = torch.aten.t %1524 : !torch.tensor -> !torch.tensor
    %1526 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %1527 = torch.aten.view %1523, %1526 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1528 = torch.aten.mm %1527, %1525 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1529 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1530 = torch.aten._unsafe_view %1528, %1529 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1531 = torch.aten.add.Tensor %1450, %1530, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %1532 = torch.aten.pow.Tensor_Scalar %1531, %int2 : !torch.tensor, !torch.int -> !torch.tensor
    %1533 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1534 = torch.aten.mean.dim %1532, %1533, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
    %1535 = torch.aten.add.Scalar %1534, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
    %1536 = torch.aten.rsqrt %1535 : !torch.tensor -> !torch.tensor
    %1537 = torch.aten.mul.Tensor %1531, %1536 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1538 = torch.prim.GetAttr %arg0["_param_constant96"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1539 = torch.aten.mul.Tensor %1538, %1537 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1540 = torch.prim.GetAttr %arg0["_param_constant97"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1541 = torch.aten.t %1540 : !torch.tensor -> !torch.tensor
    %1542 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %1543 = torch.aten.view %1539, %1542 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1544 = torch.aten.mm %1543, %1541 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1545 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1546 = torch.aten._unsafe_view %1544, %1545 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1547 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1548 = torch.aten.view %1546, %1547 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1549 = torch.aten.transpose.int %1548, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %1550 = torch.prim.GetAttr %arg0["_param_constant98"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1551 = torch.aten.t %1550 : !torch.tensor -> !torch.tensor
    %1552 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %1553 = torch.aten.view %824, %1552 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1554 = torch.aten.mm %1553, %1551 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1555 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1556 = torch.aten._unsafe_view %1554, %1555 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1557 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1558 = torch.aten.view %1556, %1557 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1559 = torch.aten.transpose.int %1558, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %1560 = torch.prim.GetAttr %arg0["_param_constant99"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1561 = torch.aten.t %1560 : !torch.tensor -> !torch.tensor
    %1562 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %1563 = torch.aten.view %824, %1562 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1564 = torch.aten.mm %1563, %1561 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1565 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1566 = torch.aten._unsafe_view %1564, %1565 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1567 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1568 = torch.aten.view %1566, %1567 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1569 = torch.aten.transpose.int %1568, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %1570 = torch.aten.transpose.int %1559, %int3, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %1571 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1572 = torch.aten.expand %1549, %1571, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %1573 = torch.prim.ListConstruct %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1574 = torch.aten.view %1572, %1573 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1575 = torch.prim.ListConstruct %int1, %int8, %int64, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1576 = torch.aten.expand %1570, %1575, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %1577 = torch.prim.ListConstruct %int8, %int64, %int15 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1578 = torch.aten.view %1576, %1577 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1579 = torch.aten.bmm %1574, %1578 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1580 = torch.prim.ListConstruct %int1, %int8, %int4, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1581 = torch.aten._unsafe_view %1579, %1580 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1582 = torch.aten.add_.Tensor %1581, %1023, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %1583 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1584 = torch.aten.amax %1582, %1583, %true_0 : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %1585 = torch.aten.sub.Tensor %1582, %1584, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %1586 = torch.aten.exp %1585 : !torch.tensor -> !torch.tensor
    %1587 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1588 = torch.aten.sum.dim_IntList %1586, %1587, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
    %1589 = torch.aten.div.Tensor %1586, %1588 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1590 = torch.prim.ListConstruct %int1, %int8, %int4, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1591 = torch.aten.expand %1589, %1590, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %1592 = torch.prim.ListConstruct %int8, %int4, %int15 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1593 = torch.aten.view %1591, %1592 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1594 = torch.prim.ListConstruct %int1, %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1595 = torch.aten.expand %1569, %1594, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %1596 = torch.prim.ListConstruct %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1597 = torch.aten.view %1595, %1596 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1598 = torch.aten.bmm %1593, %1597 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1599 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1600 = torch.aten._unsafe_view %1598, %1599 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1601 = torch.aten.transpose.int %1600, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %1602 = torch.aten.clone %1601, %int0 : !torch.tensor, !torch.int -> !torch.tensor
    %1603 = torch.prim.ListConstruct %int1, %int-1, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1604 = torch.aten.view %1602, %1603 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1605 = torch.prim.GetAttr %arg0["_param_constant100"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1606 = torch.aten.t %1605 : !torch.tensor -> !torch.tensor
    %1607 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %1608 = torch.aten.view %1604, %1607 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1609 = torch.aten.mm %1608, %1606 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1610 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1611 = torch.aten._unsafe_view %1609, %1610 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1612 = torch.aten.add.Tensor %1531, %1611, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %1613 = torch.aten.pow.Tensor_Scalar %1612, %int2 : !torch.tensor, !torch.int -> !torch.tensor
    %1614 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1615 = torch.aten.mean.dim %1613, %1614, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
    %1616 = torch.aten.add.Scalar %1615, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
    %1617 = torch.aten.rsqrt %1616 : !torch.tensor -> !torch.tensor
    %1618 = torch.aten.mul.Tensor %1612, %1617 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1619 = torch.prim.GetAttr %arg0["_param_constant101"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1620 = torch.aten.mul.Tensor %1619, %1618 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1621 = torch.prim.GetAttr %arg0["_param_constant102"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1622 = torch.aten.t %1621 : !torch.tensor -> !torch.tensor
    %1623 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %1624 = torch.aten.view %1620, %1623 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1625 = torch.aten.mm %1624, %1622 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1626 = torch.prim.ListConstruct %int1, %int4, %int2048 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1627 = torch.aten._unsafe_view %1625, %1626 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1628 = torch.aten.relu %1627 : !torch.tensor -> !torch.tensor
    %1629 = torch.prim.GetAttr %arg0["_param_constant103"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1630 = torch.aten.t %1629 : !torch.tensor -> !torch.tensor
    %1631 = torch.prim.ListConstruct %int4, %int2048 : (!torch.int, !torch.int) -> !torch.list<int>
    %1632 = torch.aten.view %1628, %1631 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1633 = torch.aten.mm %1632, %1630 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1634 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1635 = torch.aten._unsafe_view %1633, %1634 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1636 = torch.aten.add.Tensor %1612, %1635, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %1637 = torch.aten.pow.Tensor_Scalar %1636, %int2 : !torch.tensor, !torch.int -> !torch.tensor
    %1638 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1639 = torch.aten.mean.dim %1637, %1638, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
    %1640 = torch.aten.add.Scalar %1639, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
    %1641 = torch.aten.rsqrt %1640 : !torch.tensor -> !torch.tensor
    %1642 = torch.aten.mul.Tensor %1636, %1641 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1643 = torch.prim.GetAttr %arg0["_param_constant104"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1644 = torch.aten.mul.Tensor %1643, %1642 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1645 = torch.prim.GetAttr %arg0["_param_constant105"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1646 = torch.aten.t %1645 : !torch.tensor -> !torch.tensor
    %1647 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %1648 = torch.aten.view %1644, %1647 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1649 = torch.aten.mm %1648, %1646 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1650 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1651 = torch.aten._unsafe_view %1649, %1650 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1652 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1653 = torch.aten.view %1651, %1652 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1654 = torch.aten.transpose.int %1653, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %1655 = torch.prim.GetAttr %arg0["_param_constant106"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1656 = torch.aten.t %1655 : !torch.tensor -> !torch.tensor
    %1657 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %1658 = torch.aten.view %1644, %1657 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1659 = torch.aten.mm %1658, %1656 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1660 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1661 = torch.aten._unsafe_view %1659, %1660 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1662 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1663 = torch.aten.view %1661, %1662 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1664 = torch.aten.transpose.int %1663, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %1665 = torch.prim.GetAttr %arg0["_param_constant107"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1666 = torch.aten.t %1665 : !torch.tensor -> !torch.tensor
    %1667 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %1668 = torch.aten.view %1644, %1667 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1669 = torch.aten.mm %1668, %1666 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1670 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1671 = torch.aten._unsafe_view %1669, %1670 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1672 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1673 = torch.aten.view %1671, %1672 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1674 = torch.aten.transpose.int %1673, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %1675 = torch.aten.transpose.int %1664, %int3, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %1676 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1677 = torch.aten.expand %1654, %1676, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %1678 = torch.prim.ListConstruct %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1679 = torch.aten.view %1677, %1678 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1680 = torch.prim.ListConstruct %int1, %int8, %int64, %int4 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1681 = torch.aten.expand %1675, %1680, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %1682 = torch.prim.ListConstruct %int8, %int64, %int4 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1683 = torch.aten.view %1681, %1682 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1684 = torch.aten.bmm %1679, %1683 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1685 = torch.prim.ListConstruct %int1, %int8, %int4, %int4 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1686 = torch.aten._unsafe_view %1684, %1685 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1687 = torch.aten.add_.Tensor %1686, %939, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %1688 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1689 = torch.aten.amax %1687, %1688, %true_0 : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %1690 = torch.aten.sub.Tensor %1687, %1689, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %1691 = torch.aten.exp %1690 : !torch.tensor -> !torch.tensor
    %1692 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1693 = torch.aten.sum.dim_IntList %1691, %1692, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
    %1694 = torch.aten.div.Tensor %1691, %1693 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1695 = torch.prim.ListConstruct %int1, %int8, %int4, %int4 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1696 = torch.aten.expand %1694, %1695, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %1697 = torch.prim.ListConstruct %int8, %int4, %int4 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1698 = torch.aten.view %1696, %1697 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1699 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1700 = torch.aten.expand %1674, %1699, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %1701 = torch.prim.ListConstruct %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1702 = torch.aten.view %1700, %1701 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1703 = torch.aten.bmm %1698, %1702 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1704 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1705 = torch.aten._unsafe_view %1703, %1704 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1706 = torch.aten.transpose.int %1705, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %1707 = torch.aten.clone %1706, %int0 : !torch.tensor, !torch.int -> !torch.tensor
    %1708 = torch.prim.ListConstruct %int1, %int-1, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1709 = torch.aten.view %1707, %1708 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1710 = torch.prim.GetAttr %arg0["_param_constant108"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1711 = torch.aten.t %1710 : !torch.tensor -> !torch.tensor
    %1712 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %1713 = torch.aten.view %1709, %1712 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1714 = torch.aten.mm %1713, %1711 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1715 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1716 = torch.aten._unsafe_view %1714, %1715 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1717 = torch.aten.add.Tensor %1636, %1716, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %1718 = torch.aten.pow.Tensor_Scalar %1717, %int2 : !torch.tensor, !torch.int -> !torch.tensor
    %1719 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1720 = torch.aten.mean.dim %1718, %1719, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
    %1721 = torch.aten.add.Scalar %1720, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
    %1722 = torch.aten.rsqrt %1721 : !torch.tensor -> !torch.tensor
    %1723 = torch.aten.mul.Tensor %1717, %1722 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1724 = torch.prim.GetAttr %arg0["_param_constant109"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1725 = torch.aten.mul.Tensor %1724, %1723 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1726 = torch.prim.GetAttr %arg0["_param_constant110"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1727 = torch.aten.t %1726 : !torch.tensor -> !torch.tensor
    %1728 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %1729 = torch.aten.view %1725, %1728 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1730 = torch.aten.mm %1729, %1727 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1731 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1732 = torch.aten._unsafe_view %1730, %1731 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1733 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1734 = torch.aten.view %1732, %1733 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1735 = torch.aten.transpose.int %1734, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %1736 = torch.prim.GetAttr %arg0["_param_constant111"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1737 = torch.aten.t %1736 : !torch.tensor -> !torch.tensor
    %1738 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %1739 = torch.aten.view %824, %1738 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1740 = torch.aten.mm %1739, %1737 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1741 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1742 = torch.aten._unsafe_view %1740, %1741 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1743 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1744 = torch.aten.view %1742, %1743 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1745 = torch.aten.transpose.int %1744, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %1746 = torch.prim.GetAttr %arg0["_param_constant112"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1747 = torch.aten.t %1746 : !torch.tensor -> !torch.tensor
    %1748 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %1749 = torch.aten.view %824, %1748 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1750 = torch.aten.mm %1749, %1747 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1751 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1752 = torch.aten._unsafe_view %1750, %1751 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1753 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1754 = torch.aten.view %1752, %1753 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1755 = torch.aten.transpose.int %1754, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %1756 = torch.aten.transpose.int %1745, %int3, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %1757 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1758 = torch.aten.expand %1735, %1757, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %1759 = torch.prim.ListConstruct %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1760 = torch.aten.view %1758, %1759 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1761 = torch.prim.ListConstruct %int1, %int8, %int64, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1762 = torch.aten.expand %1756, %1761, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %1763 = torch.prim.ListConstruct %int8, %int64, %int15 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1764 = torch.aten.view %1762, %1763 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1765 = torch.aten.bmm %1760, %1764 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1766 = torch.prim.ListConstruct %int1, %int8, %int4, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1767 = torch.aten._unsafe_view %1765, %1766 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1768 = torch.aten.add_.Tensor %1767, %1023, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %1769 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1770 = torch.aten.amax %1768, %1769, %true_0 : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %1771 = torch.aten.sub.Tensor %1768, %1770, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %1772 = torch.aten.exp %1771 : !torch.tensor -> !torch.tensor
    %1773 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1774 = torch.aten.sum.dim_IntList %1772, %1773, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
    %1775 = torch.aten.div.Tensor %1772, %1774 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1776 = torch.prim.ListConstruct %int1, %int8, %int4, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1777 = torch.aten.expand %1775, %1776, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %1778 = torch.prim.ListConstruct %int8, %int4, %int15 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1779 = torch.aten.view %1777, %1778 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1780 = torch.prim.ListConstruct %int1, %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1781 = torch.aten.expand %1755, %1780, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %1782 = torch.prim.ListConstruct %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1783 = torch.aten.view %1781, %1782 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1784 = torch.aten.bmm %1779, %1783 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1785 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1786 = torch.aten._unsafe_view %1784, %1785 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1787 = torch.aten.transpose.int %1786, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %1788 = torch.aten.clone %1787, %int0 : !torch.tensor, !torch.int -> !torch.tensor
    %1789 = torch.prim.ListConstruct %int1, %int-1, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1790 = torch.aten.view %1788, %1789 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1791 = torch.prim.GetAttr %arg0["_param_constant113"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1792 = torch.aten.t %1791 : !torch.tensor -> !torch.tensor
    %1793 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %1794 = torch.aten.view %1790, %1793 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1795 = torch.aten.mm %1794, %1792 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1796 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1797 = torch.aten._unsafe_view %1795, %1796 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1798 = torch.aten.add.Tensor %1717, %1797, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %1799 = torch.aten.pow.Tensor_Scalar %1798, %int2 : !torch.tensor, !torch.int -> !torch.tensor
    %1800 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1801 = torch.aten.mean.dim %1799, %1800, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
    %1802 = torch.aten.add.Scalar %1801, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
    %1803 = torch.aten.rsqrt %1802 : !torch.tensor -> !torch.tensor
    %1804 = torch.aten.mul.Tensor %1798, %1803 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1805 = torch.prim.GetAttr %arg0["_param_constant114"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1806 = torch.aten.mul.Tensor %1805, %1804 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1807 = torch.prim.GetAttr %arg0["_param_constant115"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1808 = torch.aten.t %1807 : !torch.tensor -> !torch.tensor
    %1809 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %1810 = torch.aten.view %1806, %1809 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1811 = torch.aten.mm %1810, %1808 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1812 = torch.prim.ListConstruct %int1, %int4, %int2048 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1813 = torch.aten._unsafe_view %1811, %1812 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1814 = torch.aten.relu %1813 : !torch.tensor -> !torch.tensor
    %1815 = torch.prim.GetAttr %arg0["_param_constant116"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1816 = torch.aten.t %1815 : !torch.tensor -> !torch.tensor
    %1817 = torch.prim.ListConstruct %int4, %int2048 : (!torch.int, !torch.int) -> !torch.list<int>
    %1818 = torch.aten.view %1814, %1817 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1819 = torch.aten.mm %1818, %1816 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1820 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1821 = torch.aten._unsafe_view %1819, %1820 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1822 = torch.aten.add.Tensor %1798, %1821, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %1823 = torch.aten.pow.Tensor_Scalar %1822, %int2 : !torch.tensor, !torch.int -> !torch.tensor
    %1824 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1825 = torch.aten.mean.dim %1823, %1824, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
    %1826 = torch.aten.add.Scalar %1825, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
    %1827 = torch.aten.rsqrt %1826 : !torch.tensor -> !torch.tensor
    %1828 = torch.aten.mul.Tensor %1822, %1827 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1829 = torch.prim.GetAttr %arg0["_param_constant117"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1830 = torch.aten.mul.Tensor %1829, %1828 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1831 = torch.prim.GetAttr %arg0["_param_constant118"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1832 = torch.aten.t %1831 : !torch.tensor -> !torch.tensor
    %1833 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %1834 = torch.aten.view %1830, %1833 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1835 = torch.aten.mm %1834, %1832 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1836 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1837 = torch.aten._unsafe_view %1835, %1836 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1838 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1839 = torch.aten.view %1837, %1838 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1840 = torch.aten.transpose.int %1839, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %1841 = torch.prim.GetAttr %arg0["_param_constant119"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1842 = torch.aten.t %1841 : !torch.tensor -> !torch.tensor
    %1843 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %1844 = torch.aten.view %1830, %1843 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1845 = torch.aten.mm %1844, %1842 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1846 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1847 = torch.aten._unsafe_view %1845, %1846 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1848 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1849 = torch.aten.view %1847, %1848 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1850 = torch.aten.transpose.int %1849, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %1851 = torch.prim.GetAttr %arg0["_param_constant120"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1852 = torch.aten.t %1851 : !torch.tensor -> !torch.tensor
    %1853 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %1854 = torch.aten.view %1830, %1853 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1855 = torch.aten.mm %1854, %1852 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1856 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1857 = torch.aten._unsafe_view %1855, %1856 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1858 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1859 = torch.aten.view %1857, %1858 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1860 = torch.aten.transpose.int %1859, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %1861 = torch.aten.transpose.int %1850, %int3, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %1862 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1863 = torch.aten.expand %1840, %1862, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %1864 = torch.prim.ListConstruct %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1865 = torch.aten.view %1863, %1864 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1866 = torch.prim.ListConstruct %int1, %int8, %int64, %int4 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1867 = torch.aten.expand %1861, %1866, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %1868 = torch.prim.ListConstruct %int8, %int64, %int4 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1869 = torch.aten.view %1867, %1868 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1870 = torch.aten.bmm %1865, %1869 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1871 = torch.prim.ListConstruct %int1, %int8, %int4, %int4 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1872 = torch.aten._unsafe_view %1870, %1871 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1873 = torch.aten.add_.Tensor %1872, %939, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %1874 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1875 = torch.aten.amax %1873, %1874, %true_0 : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %1876 = torch.aten.sub.Tensor %1873, %1875, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %1877 = torch.aten.exp %1876 : !torch.tensor -> !torch.tensor
    %1878 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1879 = torch.aten.sum.dim_IntList %1877, %1878, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
    %1880 = torch.aten.div.Tensor %1877, %1879 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1881 = torch.prim.ListConstruct %int1, %int8, %int4, %int4 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1882 = torch.aten.expand %1880, %1881, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %1883 = torch.prim.ListConstruct %int8, %int4, %int4 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1884 = torch.aten.view %1882, %1883 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1885 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1886 = torch.aten.expand %1860, %1885, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %1887 = torch.prim.ListConstruct %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1888 = torch.aten.view %1886, %1887 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1889 = torch.aten.bmm %1884, %1888 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1890 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1891 = torch.aten._unsafe_view %1889, %1890 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1892 = torch.aten.transpose.int %1891, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %1893 = torch.aten.clone %1892, %int0 : !torch.tensor, !torch.int -> !torch.tensor
    %1894 = torch.prim.ListConstruct %int1, %int-1, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1895 = torch.aten.view %1893, %1894 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1896 = torch.prim.GetAttr %arg0["_param_constant121"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1897 = torch.aten.t %1896 : !torch.tensor -> !torch.tensor
    %1898 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %1899 = torch.aten.view %1895, %1898 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1900 = torch.aten.mm %1899, %1897 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1901 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1902 = torch.aten._unsafe_view %1900, %1901 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1903 = torch.aten.add.Tensor %1822, %1902, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %1904 = torch.aten.pow.Tensor_Scalar %1903, %int2 : !torch.tensor, !torch.int -> !torch.tensor
    %1905 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1906 = torch.aten.mean.dim %1904, %1905, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
    %1907 = torch.aten.add.Scalar %1906, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
    %1908 = torch.aten.rsqrt %1907 : !torch.tensor -> !torch.tensor
    %1909 = torch.aten.mul.Tensor %1903, %1908 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1910 = torch.prim.GetAttr %arg0["_param_constant122"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1911 = torch.aten.mul.Tensor %1910, %1909 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1912 = torch.prim.GetAttr %arg0["_param_constant123"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1913 = torch.aten.t %1912 : !torch.tensor -> !torch.tensor
    %1914 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %1915 = torch.aten.view %1911, %1914 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1916 = torch.aten.mm %1915, %1913 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1917 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1918 = torch.aten._unsafe_view %1916, %1917 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1919 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1920 = torch.aten.view %1918, %1919 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1921 = torch.aten.transpose.int %1920, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %1922 = torch.prim.GetAttr %arg0["_param_constant124"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1923 = torch.aten.t %1922 : !torch.tensor -> !torch.tensor
    %1924 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %1925 = torch.aten.view %824, %1924 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1926 = torch.aten.mm %1925, %1923 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1927 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1928 = torch.aten._unsafe_view %1926, %1927 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1929 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1930 = torch.aten.view %1928, %1929 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1931 = torch.aten.transpose.int %1930, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %1932 = torch.prim.GetAttr %arg0["_param_constant125"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1933 = torch.aten.t %1932 : !torch.tensor -> !torch.tensor
    %1934 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %1935 = torch.aten.view %824, %1934 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1936 = torch.aten.mm %1935, %1933 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1937 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1938 = torch.aten._unsafe_view %1936, %1937 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1939 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1940 = torch.aten.view %1938, %1939 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1941 = torch.aten.transpose.int %1940, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %1942 = torch.aten.transpose.int %1931, %int3, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %1943 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1944 = torch.aten.expand %1921, %1943, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %1945 = torch.prim.ListConstruct %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1946 = torch.aten.view %1944, %1945 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1947 = torch.prim.ListConstruct %int1, %int8, %int64, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1948 = torch.aten.expand %1942, %1947, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %1949 = torch.prim.ListConstruct %int8, %int64, %int15 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1950 = torch.aten.view %1948, %1949 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1951 = torch.aten.bmm %1946, %1950 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1952 = torch.prim.ListConstruct %int1, %int8, %int4, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1953 = torch.aten._unsafe_view %1951, %1952 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1954 = torch.aten.add_.Tensor %1953, %1023, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %1955 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1956 = torch.aten.amax %1954, %1955, %true_0 : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %1957 = torch.aten.sub.Tensor %1954, %1956, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %1958 = torch.aten.exp %1957 : !torch.tensor -> !torch.tensor
    %1959 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1960 = torch.aten.sum.dim_IntList %1958, %1959, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
    %1961 = torch.aten.div.Tensor %1958, %1960 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1962 = torch.prim.ListConstruct %int1, %int8, %int4, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1963 = torch.aten.expand %1961, %1962, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %1964 = torch.prim.ListConstruct %int8, %int4, %int15 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1965 = torch.aten.view %1963, %1964 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1966 = torch.prim.ListConstruct %int1, %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1967 = torch.aten.expand %1941, %1966, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
    %1968 = torch.prim.ListConstruct %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1969 = torch.aten.view %1967, %1968 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1970 = torch.aten.bmm %1965, %1969 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1971 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1972 = torch.aten._unsafe_view %1970, %1971 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1973 = torch.aten.transpose.int %1972, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
    %1974 = torch.aten.clone %1973, %int0 : !torch.tensor, !torch.int -> !torch.tensor
    %1975 = torch.prim.ListConstruct %int1, %int-1, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1976 = torch.aten.view %1974, %1975 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1977 = torch.prim.GetAttr %arg0["_param_constant126"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1978 = torch.aten.t %1977 : !torch.tensor -> !torch.tensor
    %1979 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %1980 = torch.aten.view %1976, %1979 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1981 = torch.aten.mm %1980, %1978 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1982 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1983 = torch.aten._unsafe_view %1981, %1982 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1984 = torch.aten.add.Tensor %1903, %1983, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %1985 = torch.aten.pow.Tensor_Scalar %1984, %int2 : !torch.tensor, !torch.int -> !torch.tensor
    %1986 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %1987 = torch.aten.mean.dim %1985, %1986, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
    %1988 = torch.aten.add.Scalar %1987, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
    %1989 = torch.aten.rsqrt %1988 : !torch.tensor -> !torch.tensor
    %1990 = torch.aten.mul.Tensor %1984, %1989 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1991 = torch.prim.GetAttr %arg0["_param_constant127"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1992 = torch.aten.mul.Tensor %1991, %1990 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1993 = torch.prim.GetAttr %arg0["_param_constant128"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %1994 = torch.aten.t %1993 : !torch.tensor -> !torch.tensor
    %1995 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %1996 = torch.aten.view %1992, %1995 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %1997 = torch.aten.mm %1996, %1994 : !torch.tensor, !torch.tensor -> !torch.tensor
    %1998 = torch.prim.ListConstruct %int1, %int4, %int2048 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %1999 = torch.aten._unsafe_view %1997, %1998 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %2000 = torch.aten.relu %1999 : !torch.tensor -> !torch.tensor
    %2001 = torch.prim.GetAttr %arg0["_param_constant129"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %2002 = torch.aten.t %2001 : !torch.tensor -> !torch.tensor
    %2003 = torch.prim.ListConstruct %int4, %int2048 : (!torch.int, !torch.int) -> !torch.list<int>
    %2004 = torch.aten.view %2000, %2003 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %2005 = torch.aten.mm %2004, %2002 : !torch.tensor, !torch.tensor -> !torch.tensor
    %2006 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2007 = torch.aten._unsafe_view %2005, %2006 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %2008 = torch.aten.add.Tensor %1984, %2007, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
    %2009 = torch.aten.pow.Tensor_Scalar %2008, %int2 : !torch.tensor, !torch.int -> !torch.tensor
    %2010 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
    %2011 = torch.aten.mean.dim %2009, %2010, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
    %2012 = torch.aten.add.Scalar %2011, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
    %2013 = torch.aten.rsqrt %2012 : !torch.tensor -> !torch.tensor
    %2014 = torch.aten.mul.Tensor %2008, %2013 : !torch.tensor, !torch.tensor -> !torch.tensor
    %2015 = torch.prim.GetAttr %arg0["_param_constant130"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %2016 = torch.aten.mul.Tensor %2015, %2014 : !torch.tensor, !torch.tensor -> !torch.tensor
    %2017 = torch.aten.mul.Scalar %2016, %float4.419420e-02 : !torch.tensor, !torch.float -> !torch.tensor
    %2018 = torch.prim.GetAttr %arg0["_param_constant0"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
    %2019 = torch.aten.t %2018 : !torch.tensor -> !torch.tensor
    %2020 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
    %2021 = torch.aten.view %2017, %2020 : !torch.tensor, !torch.list<int> -> !torch.tensor
    %2022 = torch.aten.mm %2021, %2019 : !torch.tensor, !torch.tensor -> !torch.tensor
    %2023 = torch.prim.ListConstruct %int1, %int4, %int32128 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2024 = torch.aten._unsafe_view %2022, %2023 : !torch.tensor, !torch.list<int> -> !torch.tensor
    return %2024 : !torch.tensor
  }
  torch.class_type @__torch__.torch.fx.graph_module._lambda {
    torch.attr private "_param_constant0" : !torch.tensor
    torch.attr private "_param_constant1" : !torch.tensor
    torch.attr private "_param_constant2" : !torch.tensor
    torch.attr private "_param_constant3" : !torch.tensor
    torch.attr private "_param_constant4" : !torch.tensor
    torch.attr private "_param_constant5" : !torch.tensor
    torch.attr private "_param_constant6" : !torch.tensor
    torch.attr private "_param_constant7" : !torch.tensor
    torch.attr private "_param_constant8" : !torch.tensor
    torch.attr private "_param_constant9" : !torch.tensor
    torch.attr private "_param_constant10" : !torch.tensor
    torch.attr private "_param_constant11" : !torch.tensor
    torch.attr private "_param_constant12" : !torch.tensor
    torch.attr private "_param_constant13" : !torch.tensor
    torch.attr private "_param_constant14" : !torch.tensor
    torch.attr private "_param_constant15" : !torch.tensor
    torch.attr private "_param_constant16" : !torch.tensor
    torch.attr private "_param_constant17" : !torch.tensor
    torch.attr private "_param_constant18" : !torch.tensor
    torch.attr private "_param_constant19" : !torch.tensor
    torch.attr private "_param_constant20" : !torch.tensor
    torch.attr private "_param_constant21" : !torch.tensor
    torch.attr private "_param_constant22" : !torch.tensor
    torch.attr private "_param_constant23" : !torch.tensor
    torch.attr private "_param_constant24" : !torch.tensor
    torch.attr private "_param_constant25" : !torch.tensor
    torch.attr private "_param_constant26" : !torch.tensor
    torch.attr private "_param_constant27" : !torch.tensor
    torch.attr private "_param_constant28" : !torch.tensor
    torch.attr private "_param_constant29" : !torch.tensor
    torch.attr private "_param_constant30" : !torch.tensor
    torch.attr private "_param_constant31" : !torch.tensor
    torch.attr private "_param_constant32" : !torch.tensor
    torch.attr private "_param_constant33" : !torch.tensor
    torch.attr private "_param_constant34" : !torch.tensor
    torch.attr private "_param_constant35" : !torch.tensor
    torch.attr private "_param_constant36" : !torch.tensor
    torch.attr private "_param_constant37" : !torch.tensor
    torch.attr private "_param_constant38" : !torch.tensor
    torch.attr private "_param_constant39" : !torch.tensor
    torch.attr private "_param_constant40" : !torch.tensor
    torch.attr private "_param_constant41" : !torch.tensor
    torch.attr private "_param_constant42" : !torch.tensor
    torch.attr private "_param_constant43" : !torch.tensor
    torch.attr private "_param_constant44" : !torch.tensor
    torch.attr private "_param_constant45" : !torch.tensor
    torch.attr private "_param_constant46" : !torch.tensor
    torch.attr private "_param_constant47" : !torch.tensor
    torch.attr private "_param_constant48" : !torch.tensor
    torch.attr private "_param_constant49" : !torch.tensor
    torch.attr private "_param_constant50" : !torch.tensor
    torch.attr private "_param_constant51" : !torch.tensor
    torch.attr private "_param_constant52" : !torch.tensor
    torch.attr private "_param_constant53" : !torch.tensor
    torch.attr private "_param_constant54" : !torch.tensor
    torch.attr private "_param_constant55" : !torch.tensor
    torch.attr private "_param_constant56" : !torch.tensor
    torch.attr private "_param_constant57" : !torch.tensor
    torch.attr private "_param_constant58" : !torch.tensor
    torch.attr private "_param_constant59" : !torch.tensor
    torch.attr private "_param_constant60" : !torch.tensor
    torch.attr private "_param_constant61" : !torch.tensor
    torch.attr private "_param_constant62" : !torch.tensor
    torch.attr private "_param_constant63" : !torch.tensor
    torch.attr private "_param_constant64" : !torch.tensor
    torch.attr private "_param_constant65" : !torch.tensor
    torch.attr private "_param_constant66" : !torch.tensor
    torch.attr private "_param_constant67" : !torch.tensor
    torch.attr private "_param_constant68" : !torch.tensor
    torch.attr private "_param_constant69" : !torch.tensor
    torch.attr private "_param_constant70" : !torch.tensor
    torch.attr private "_param_constant71" : !torch.tensor
    torch.attr private "_param_constant72" : !torch.tensor
    torch.attr private "_param_constant73" : !torch.tensor
    torch.attr private "_param_constant74" : !torch.tensor
    torch.attr private "_param_constant75" : !torch.tensor
    torch.attr private "_param_constant76" : !torch.tensor
    torch.attr private "_param_constant77" : !torch.tensor
    torch.attr private "_param_constant78" : !torch.tensor
    torch.attr private "_param_constant79" : !torch.tensor
    torch.attr private "_param_constant80" : !torch.tensor
    torch.attr private "_param_constant81" : !torch.tensor
    torch.attr private "_param_constant82" : !torch.tensor
    torch.attr private "_param_constant83" : !torch.tensor
    torch.attr private "_param_constant84" : !torch.tensor
    torch.attr private "_param_constant85" : !torch.tensor
    torch.attr private "_param_constant86" : !torch.tensor
    torch.attr private "_param_constant87" : !torch.tensor
    torch.attr private "_param_constant88" : !torch.tensor
    torch.attr private "_param_constant89" : !torch.tensor
    torch.attr private "_param_constant90" : !torch.tensor
    torch.attr private "_param_constant91" : !torch.tensor
    torch.attr private "_param_constant92" : !torch.tensor
    torch.attr private "_param_constant93" : !torch.tensor
    torch.attr private "_param_constant94" : !torch.tensor
    torch.attr private "_param_constant95" : !torch.tensor
    torch.attr private "_param_constant96" : !torch.tensor
    torch.attr private "_param_constant97" : !torch.tensor
    torch.attr private "_param_constant98" : !torch.tensor
    torch.attr private "_param_constant99" : !torch.tensor
    torch.attr private "_param_constant100" : !torch.tensor
    torch.attr private "_param_constant101" : !torch.tensor
    torch.attr private "_param_constant102" : !torch.tensor
    torch.attr private "_param_constant103" : !torch.tensor
    torch.attr private "_param_constant104" : !torch.tensor
    torch.attr private "_param_constant105" : !torch.tensor
    torch.attr private "_param_constant106" : !torch.tensor
    torch.attr private "_param_constant107" : !torch.tensor
    torch.attr private "_param_constant108" : !torch.tensor
    torch.attr private "_param_constant109" : !torch.tensor
    torch.attr private "_param_constant110" : !torch.tensor
    torch.attr private "_param_constant111" : !torch.tensor
    torch.attr private "_param_constant112" : !torch.tensor
    torch.attr private "_param_constant113" : !torch.tensor
    torch.attr private "_param_constant114" : !torch.tensor
    torch.attr private "_param_constant115" : !torch.tensor
    torch.attr private "_param_constant116" : !torch.tensor
    torch.attr private "_param_constant117" : !torch.tensor
    torch.attr private "_param_constant118" : !torch.tensor
    torch.attr private "_param_constant119" : !torch.tensor
    torch.attr private "_param_constant120" : !torch.tensor
    torch.attr private "_param_constant121" : !torch.tensor
    torch.attr private "_param_constant122" : !torch.tensor
    torch.attr private "_param_constant123" : !torch.tensor
    torch.attr private "_param_constant124" : !torch.tensor
    torch.attr private "_param_constant125" : !torch.tensor
    torch.attr private "_param_constant126" : !torch.tensor
    torch.attr private "_param_constant127" : !torch.tensor
    torch.attr private "_param_constant128" : !torch.tensor
    torch.attr private "_param_constant129" : !torch.tensor
    torch.attr private "_param_constant130" : !torch.tensor
    torch.attr private "_tensor_constant0" : !torch.tensor
    torch.attr private "training" : !torch.bool
    torch.attr private "_is_full_backward_hook" : !torch.optional<bool>
    torch.attr private "_code" : !torch.str
    torch.method private "__code_getter", @__torch__.torch.fx.graph_module._lambda.__code_getter
    torch.method "forward", @__torch__.torch.fx.graph_module._lambda.forward
  }
  %0 = torch.tensor.literal(dense_resource<__elided__> : tensor<32128x512xf32>) : !torch.tensor<[32128,512],f32>
  %1 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
  %2 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %3 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %4 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %5 = torch.tensor.literal(dense_resource<__elided__> : tensor<32x8xf32>) : !torch.tensor<[32,8],f32>
  %6 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %7 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
  %8 = torch.tensor.literal(dense_resource<__elided__> : tensor<2048x512xf32>) : !torch.tensor<[2048,512],f32>
  %9 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x2048xf32>) : !torch.tensor<[512,2048],f32>
  %10 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
  %11 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %12 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %13 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %14 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %15 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
  %16 = torch.tensor.literal(dense_resource<__elided__> : tensor<2048x512xf32>) : !torch.tensor<[2048,512],f32>
  %17 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x2048xf32>) : !torch.tensor<[512,2048],f32>
  %18 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
  %19 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %20 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %21 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %22 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %23 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
  %24 = torch.tensor.literal(dense_resource<__elided__> : tensor<2048x512xf32>) : !torch.tensor<[2048,512],f32>
  %25 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x2048xf32>) : !torch.tensor<[512,2048],f32>
  %26 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
  %27 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %28 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %29 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %30 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %31 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
  %32 = torch.tensor.literal(dense_resource<__elided__> : tensor<2048x512xf32>) : !torch.tensor<[2048,512],f32>
  %33 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x2048xf32>) : !torch.tensor<[512,2048],f32>
  %34 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
  %35 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %36 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %37 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %38 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %39 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
  %40 = torch.tensor.literal(dense_resource<__elided__> : tensor<2048x512xf32>) : !torch.tensor<[2048,512],f32>
  %41 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x2048xf32>) : !torch.tensor<[512,2048],f32>
  %42 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
  %43 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %44 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %45 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %46 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %47 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
  %48 = torch.tensor.literal(dense_resource<__elided__> : tensor<2048x512xf32>) : !torch.tensor<[2048,512],f32>
  %49 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x2048xf32>) : !torch.tensor<[512,2048],f32>
  %50 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
  %51 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
  %52 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %53 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %54 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %55 = torch.tensor.literal(dense_resource<__elided__> : tensor<32x8xf32>) : !torch.tensor<[32,8],f32>
  %56 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %57 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
  %58 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %59 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %60 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %61 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %62 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
  %63 = torch.tensor.literal(dense_resource<__elided__> : tensor<2048x512xf32>) : !torch.tensor<[2048,512],f32>
  %64 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x2048xf32>) : !torch.tensor<[512,2048],f32>
  %65 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
  %66 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %67 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %68 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %69 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %70 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
  %71 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %72 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %73 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %74 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %75 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
  %76 = torch.tensor.literal(dense_resource<__elided__> : tensor<2048x512xf32>) : !torch.tensor<[2048,512],f32>
  %77 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x2048xf32>) : !torch.tensor<[512,2048],f32>
  %78 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
  %79 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %80 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %81 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %82 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %83 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
  %84 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %85 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %86 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %87 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %88 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
  %89 = torch.tensor.literal(dense_resource<__elided__> : tensor<2048x512xf32>) : !torch.tensor<[2048,512],f32>
  %90 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x2048xf32>) : !torch.tensor<[512,2048],f32>
  %91 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
  %92 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %93 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %94 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %95 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %96 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
  %97 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %98 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %99 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %100 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %101 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
  %102 = torch.tensor.literal(dense_resource<__elided__> : tensor<2048x512xf32>) : !torch.tensor<[2048,512],f32>
  %103 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x2048xf32>) : !torch.tensor<[512,2048],f32>
  %104 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
  %105 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %106 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %107 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %108 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %109 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
  %110 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %111 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %112 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %113 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %114 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
  %115 = torch.tensor.literal(dense_resource<__elided__> : tensor<2048x512xf32>) : !torch.tensor<[2048,512],f32>
  %116 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x2048xf32>) : !torch.tensor<[512,2048],f32>
  %117 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
  %118 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %119 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %120 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %121 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %122 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
  %123 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %124 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %125 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %126 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
  %127 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
  %128 = torch.tensor.literal(dense_resource<__elided__> : tensor<2048x512xf32>) : !torch.tensor<[2048,512],f32>
  %129 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x2048xf32>) : !torch.tensor<[512,2048],f32>
  %130 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
  %131 = torch.tensor.literal(dense<0> : tensor<si64>) : !torch.tensor<[],si64>
  %true = torch.constant.bool true
  %none = torch.constant.none
  %str = torch.constant.str "\0A\0A\0Adef forward(self, arg0_1, arg1_1):\0A    new_zeros = torch.ops.aten.new_zeros(arg1_1, [1, 4], dtype = torch.int64, layout = torch.strided, device = device(type='cpu'), pin_memory = False)\0A    slice_1 = torch.ops.aten.slice(arg1_1, 1, 0, -1);  arg1_1 = None\0A    clone = torch.ops.aten.clone(slice_1);  slice_1 = None\0A    slice_2 = torch.ops.aten.slice(new_zeros, 1, 1, 9223372036854775807)\0A    copy_ = torch.ops.aten.copy_(slice_2, clone);  slice_2 = clone = None\0A    _tensor_constant0 = self._tensor_constant0\0A    lift_fresh_copy = torch.ops.aten.lift_fresh_copy(_tensor_constant0);  _tensor_constant0 = None\0A    select = torch.ops.aten.select(new_zeros, 1, 0)\0A    fill_ = torch.ops.aten.fill_(select, lift_fresh_copy);  select = lift_fresh_copy = None\0A    eq = torch.ops.aten.eq(new_zeros, -100)\0A    masked_fill_ = torch.ops.aten.masked_fill_(new_zeros, eq, 0);  new_zeros = eq = None\0A    view = torch.ops.aten.view(arg0_1, [-1, 15]);  arg0_1 = None\0A    _param_constant0 = self._param_constant0\0A    embedding = torch.ops.aten.embedding(_param_constant0, view);  _param_constant0 = view = None\0A    ones = torch.ops.aten.ones([1, 15], device = device(type='cpu'), pin_memory = False)\0A    slice_3 = torch.ops.aten.slice(ones, 0, 0, 9223372036854775807);  ones = None\0A    unsqueeze = torch.ops.aten.unsqueeze(slice_3, 1);  slice_3 = None\0A    unsqueeze_1 = torch.ops.aten.unsqueeze(unsqueeze, 2);  unsqueeze = None\0A    slice_4 = torch.ops.aten.slice(unsqueeze_1, 3, 0, 9223372036854775807);  unsqueeze_1 = None\0A    rsub = torch.ops.aten.rsub(slice_4, 1.0);  slice_4 = None\0A    mul = torch.ops.aten.mul(rsub, -3.4028234663852886e+38);  rsub = None\0A    pow_1 = torch.ops.aten.pow(embedding, 2)\0A    mean = torch.ops.aten.mean(pow_1, [-1], True);  pow_1 = None\0A    add = torch.ops.aten.add(mean, 1e-06);  mean = None\0A    rsqrt = torch.ops.aten.rsqrt(add);  add = None\0A    detach = torch.ops.aten.detach(rsqrt)\0A    mul_1 = torch.ops.aten.mul(embedding, rsqrt);  rsqrt = None\0A    _param_constant1 = self._param_constant1\0A    mul_2 = torch.ops.aten.mul(_param_constant1, mul_1);  _param_constant1 = mul_1 = None\0A    _param_constant2 = self._param_constant2\0A    t = torch.ops.aten.t(_param_constant2);  _param_constant2 = None\0A    view_1 = torch.ops.aten.view(mul_2, [15, 512])\0A    mm = torch.ops.aten.mm(view_1, t);  view_1 = t = None\0A    _unsafe_view = torch.ops.aten._unsafe_view(mm, [1, 15, 512]);  mm = None\0A    view_2 = torch.ops.aten.view(_unsafe_view, [1, -1, 8, 64]);  _unsafe_view = None\0A    transpose = torch.ops.aten.transpose(view_2, 1, 2);  view_2 = None\0A    _param_constant3 = self._param_constant3\0A    t_1 = torch.ops.aten.t(_param_constant3);  _param_constant3 = None\0A    view_3 = torch.ops.aten.view(mul_2, [15, 512])\0A    mm_1 = torch.ops.aten.mm(view_3, t_1);  view_3 = t_1 = None\0A    _unsafe_view_1 = torch.ops.aten._unsafe_view(mm_1, [1, 15, 512]);  mm_1 = None\0A    view_4 = torch.ops.aten.view(_unsafe_view_1, [1, -1, 8, 64]);  _unsafe_view_1 = None\0A    transpose_1 = torch.ops.aten.transpose(view_4, 1, 2);  view_4 = None\0A    _param_constant4 = self._param_constant4\0A    t_2 = torch.ops.aten.t(_param_constant4);  _param_constant4 = None\0A    view_5 = torch.ops.aten.view(mul_2, [15, 512]);  mul_2 = None\0A    mm_2 = torch.ops.aten.mm(view_5, t_2);  view_5 = t_2 = None\0A    _unsafe_view_2 = torch.ops.aten._unsafe_view(mm_2, [1, 15, 512]);  mm_2 = None\0A    view_6 = torch.ops.aten.view(_unsafe_view_2, [1, -1, 8, 64]);  _unsafe_view_2 = None\0A    transpose_2 = torch.ops.aten.transpose(view_6, 1, 2);  view_6 = None\0A    transpose_3 = torch.ops.aten.transpose(transpose_1, 3, 2);  transpose_1 = None\0A    expand = torch.ops.aten.expand(transpose, [1, 8, 15, 64]);  transpose = None\0A    view_7 = torch.ops.aten.view(expand, [8, 15, 64]);  expand = None\0A    expand_1 = torch.ops.aten.expand(transpose_3, [1, 8, 64, 15]);  transpose_3 = None\0A    view_8 = torch.ops.aten.view(expand_1, [8, 64, 15]);  expand_1 = None\0A    bmm = torch.ops.aten.bmm(view_7, view_8);  view_7 = view_8 = None\0A    _unsafe_view_3 = torch.ops.aten._unsafe_view(bmm, [1, 8, 15, 15]);  bmm = None\0A    arange = torch.ops.aten.arange(15, dtype = torch.int64, device = device(type='cpu'), pin_memory = False)\0A    slice_5 = torch.ops.aten.slice(arange, 0, 0, 9223372036854775807);  arange = None\0A    unsqueeze_2 = torch.ops.aten.unsqueeze(slice_5, 1);  slice_5 = None\0A    arange_1 = torch.ops.aten.arange(15, dtype = torch.int64, device = device(type='cpu'), pin_memory = False)\0A    unsqueeze_3 = torch.ops.aten.unsqueeze(arange_1, 0);  arange_1 = None\0A    slice_6 = torch.ops.aten.slice(unsqueeze_3, 1, 0, 9223372036854775807);  unsqueeze_3 = None\0A    sub = torch.ops.aten.sub(slice_6, unsqueeze_2);  slice_6 = unsqueeze_2 = None\0A    gt = torch.ops.aten.gt(sub, 0)\0A    convert_element_type = torch.ops.prims.convert_element_type(gt, torch.int64);  gt = None\0A    mul_3 = torch.ops.aten.mul(convert_element_type, 16);  convert_element_type = None\0A    add_1 = torch.ops.aten.add(mul_3, 0);  mul_3 = None\0A    abs_1 = torch.ops.aten.abs(sub);  sub = None\0A    lt = torch.ops.aten.lt(abs_1, 8)\0A    convert_element_type_1 = torch.ops.prims.convert_element_type(abs_1, torch.float32)\0A    div = torch.ops.aten.div(convert_element_type_1, 8);  convert_element_type_1 = None\0A    log = torch.ops.aten.log(div);  div = None\0A    div_1 = torch.ops.aten.div(log, 2.772588722239781);  log = None\0A    mul_4 = torch.ops.aten.mul(div_1, 8);  div_1 = None\0A    convert_element_type_2 = torch.ops.prims.convert_element_type(mul_4, torch.int64);  mul_4 = None\0A    add_2 = torch.ops.aten.add(convert_element_type_2, 8);  convert_element_type_2 = None\0A    full_like = torch.ops.aten.full_like(add_2, 15, dtype = torch.int64, layout = torch.strided, device = device(type='cpu'), pin_memory = False)\0A    minimum = torch.ops.aten.minimum(add_2, full_like);  add_2 = full_like = None\0A    where = torch.ops.aten.where(lt, abs_1, minimum);  lt = abs_1 = minimum = None\0A    add_ = torch.ops.aten.add_(add_1, where);  add_1 = where = None\0A    _param_constant5 = self._param_constant5\0A    embedding_1 = torch.ops.aten.embedding(_param_constant5, add_);  _param_constant5 = add_ = None\0A    permute = torch.ops.aten.permute(embedding_1, [2, 0, 1]);  embedding_1 = None\0A    unsqueeze_4 = torch.ops.aten.unsqueeze(permute, 0);  permute = None\0A    add_3 = torch.ops.aten.add(unsqueeze_4, mul);  unsqueeze_4 = mul = None\0A    add__1 = torch.ops.aten.add_(_unsafe_view_3, add_3);  _unsafe_view_3 = None\0A    amax = torch.ops.aten.amax(add__1, [-1], True)\0A    sub_1 = torch.ops.aten.sub(add__1, amax);  add__1 = amax = None\0A    exp = torch.ops.aten.exp(sub_1);  sub_1 = None\0A    sum_1 = torch.ops.aten.sum(exp, [-1], True)\0A    div_2 = torch.ops.aten.div(exp, sum_1);  exp = sum_1 = None\0A    detach_1 = torch.ops.aten.detach(div_2)\0A    expand_2 = torch.ops.aten.expand(div_2, [1, 8, 15, 15]);  div_2 = None\0A    view_9 = torch.ops.aten.view(expand_2, [8, 15, 15]);  expand_2 = None\0A    expand_3 = torch.ops.aten.expand(transpose_2, [1, 8, 15, 64]);  transpose_2 = None\0A    view_10 = torch.ops.aten.view(expand_3, [8, 15, 64]);  expand_3 = None\0A    bmm_1 = torch.ops.aten.bmm(view_9, view_10);  view_9 = view_10 = None\0A    _unsafe_view_4 = torch.ops.aten._unsafe_view(bmm_1, [1, 8, 15, 64]);  bmm_1 = None\0A    transpose_4 = torch.ops.aten.transpose(_unsafe_view_4, 1, 2);  _unsafe_view_4 = None\0A    clone_1 = torch.ops.aten.clone(transpose_4, memory_format = torch.contiguous_format);  transpose_4 = None\0A    view_11 = torch.ops.aten.view(clone_1, [1, -1, 512]);  clone_1 = None\0A    _param_constant6 = self._param_constant6\0A    t_3 = torch.ops.aten.t(_param_constant6);  _param_constant6 = None\0A    view_12 = torch.ops.aten.view(view_11, [15, 512]);  view_11 = None\0A    mm_3 = torch.ops.aten.mm(view_12, t_3);  view_12 = t_3 = None\0A    _unsafe_view_5 = torch.ops.aten._unsafe_view(mm_3, [1, 15, 512]);  mm_3 = None\0A    add_4 = torch.ops.aten.add(embedding, _unsafe_view_5);  embedding = _unsafe_view_5 = None\0A    pow_2 = torch.ops.aten.pow(add_4, 2)\0A    mean_1 = torch.ops.aten.mean(pow_2, [-1], True);  pow_2 = None\0A    add_5 = torch.ops.aten.add(mean_1, 1e-06);  mean_1 = None\0A    rsqrt_1 = torch.ops.aten.rsqrt(add_5);  add_5 = None\0A    detach_2 = torch.ops.aten.detach(rsqrt_1)\0A    mul_5 = torch.ops.aten.mul(add_4, rsqrt_1);  rsqrt_1 = None\0A    _param_constant7 = self._param_constant7\0A    mul_6 = torch.ops.aten.mul(_param_constant7, mul_5);  _param_constant7 = mul_5 = None\0A    _param_constant8 = self._param_constant8\0A    t_4 = torch.ops.aten.t(_param_constant8);  _param_constant8 = None\0A    view_13 = torch.ops.aten.view(mul_6, [15, 512]);  mul_6 = None\0A    mm_4 = torch.ops.aten.mm(view_13, t_4);  view_13 = t_4 = None\0A    _unsafe_view_6 = torch.ops.aten._unsafe_view(mm_4, [1, 15, 2048]);  mm_4 = None\0A    relu = torch.ops.aten.relu(_unsafe_view_6);  _unsafe_view_6 = None\0A    detach_3 = torch.ops.aten.detach(relu)\0A    _param_constant9 = self._param_constant9\0A    t_5 = torch.ops.aten.t(_param_constant9);  _param_constant9 = None\0A    view_14 = torch.ops.aten.view(relu, [15, 2048]);  relu = None\0A    mm_5 = torch.ops.aten.mm(view_14, t_5);  view_14 = t_5 = None\0A    _unsafe_view_7 = torch.ops.aten._unsafe_view(mm_5, [1, 15, 512]);  mm_5 = None\0A    add_6 = torch.ops.aten.add(add_4, _unsafe_view_7);  add_4 = _unsafe_view_7 = None\0A    pow_3 = torch.ops.aten.pow(add_6, 2)\0A    mean_2 = torch.ops.aten.mean(pow_3, [-1], True);  pow_3 = None\0A    add_7 = torch.ops.aten.add(mean_2, 1e-06);  mean_2 = None\0A    rsqrt_2 = torch.ops.aten.rsqrt(add_7);  add_7 = None\0A    detach_4 = torch.ops.aten.detach(rsqrt_2)\0A    mul_7 = torch.ops.aten.mul(add_6, rsqrt_2);  rsqrt_2 = None\0A    _param_constant10 = self._param_constant10\0A    mul_8 = torch.ops.aten.mul(_param_constant10, mul_7);  _param_constant10 = mul_7 = None\0A    _param_constant11 = self._param_constant11\0A    t_6 = torch.ops.aten.t(_param_constant11);  _param_constant11 = None\0A    view_15 = torch.ops.aten.view(mul_8, [15, 512])\0A    mm_6 = torch.ops.aten.mm(view_15, t_6);  view_15 = t_6 = None\0A    _unsafe_view_8 = torch.ops.aten._unsafe_view(mm_6, [1, 15, 512]);  mm_6 = None\0A    view_16 = torch.ops.aten.view(_unsafe_view_8, [1, -1, 8, 64]);  _unsafe_view_8 = None\0A    transpose_5 = torch.ops.aten.transpose(view_16, 1, 2);  view_16 = None\0A    _param_constant12 = self._param_constant12\0A    t_7 = torch.ops.aten.t(_param_constant12);  _param_constant12 = None\0A    view_17 = torch.ops.aten.view(mul_8, [15, 512])\0A    mm_7 = torch.ops.aten.mm(view_17, t_7);  view_17 = t_7 = None\0A    _unsafe_view_9 = torch.ops.aten._unsafe_view(mm_7, [1, 15, 512]);  mm_7 = None\0A    view_18 = torch.ops.aten.view(_unsafe_view_9, [1, -1, 8, 64]);  _unsafe_view_9 = None\0A    transpose_6 = torch.ops.aten.transpose(view_18, 1, 2);  view_18 = None\0A    _param_constant13 = self._param_constant13\0A    t_8 = torch.ops.aten.t(_param_constant13);  _param_constant13 = None\0A    view_19 = torch.ops.aten.view(mul_8, [15, 512]);  mul_8 = None\0A    mm_8 = torch.ops.aten.mm(view_19, t_8);  view_19 = t_8 = None\0A    _unsafe_view_10 = torch.ops.aten._unsafe_view(mm_8, [1, 15, 512]);  mm_8 = None\0A    view_20 = torch.ops.aten.view(_unsafe_view_10, [1, -1, 8, 64]);  _unsafe_view_10 = None\0A    transpose_7 = torch.ops.aten.transpose(view_20, 1, 2);  view_20 = None\0A    transpose_8 = torch.ops.aten.transpose(transpose_6, 3, 2);  transpose_6 = None\0A    expand_4 = torch.ops.aten.expand(transpose_5, [1, 8, 15, 64]);  transpose_5 = None\0A    view_21 = torch.ops.aten.view(expand_4, [8, 15, 64]);  expand_4 = None\0A    expand_5 = torch.ops.aten.expand(transpose_8, [1, 8, 64, 15]);  transpose_8 = None\0A    view_22 = torch.ops.aten.view(expand_5, [8, 64, 15]);  expand_5 = None\0A    bmm_2 = torch.ops.aten.bmm(view_21, view_22);  view_21 = view_22 = None\0A    _unsafe_view_11 = torch.ops.aten._unsafe_view(bmm_2, [1, 8, 15, 15]);  bmm_2 = None\0A    add__2 = torch.ops.aten.add_(_unsafe_view_11, add_3);  _unsafe_view_11 = None\0A    amax_1 = torch.ops.aten.amax(add__2, [-1], True)\0A    sub_2 = torch.ops.aten.sub(add__2, amax_1);  add__2 = amax_1 = None\0A    exp_1 = torch.ops.aten.exp(sub_2);  sub_2 = None\0A    sum_2 = torch.ops.aten.sum(exp_1, [-1], True)\0A    div_3 = torch.ops.aten.div(exp_1, sum_2);  exp_1 = sum_2 = None\0A    detach_5 = torch.ops.aten.detach(div_3)\0A    expand_6 = torch.ops.aten.expand(div_3, [1, 8, 15, 15]);  div_3 = None\0A    view_23 = torch.ops.aten.view(expand_6, [8, 15, 15]);  expand_6 = None\0A    expand_7 = torch.ops.aten.expand(transpose_7, [1, 8, 15, 64]);  transpose_7 = None\0A    view_24 = torch.ops.aten.view(expand_7, [8, 15, 64]);  expand_7 = None\0A    bmm_3 = torch.ops.aten.bmm(view_23, view_24);  view_23 = view_24 = None\0A    _unsafe_view_12 = torch.ops.aten._unsafe_view(bmm_3, [1, 8, 15, 64]);  bmm_3 = None\0A    transpose_9 = torch.ops.aten.transpose(_unsafe_view_12, 1, 2);  _unsafe_view_12 = None\0A    clone_2 = torch.ops.aten.clone(transpose_9, memory_format = torch.contiguous_format);  transpose_9 = None\0A    view_25 = torch.ops.aten.view(clone_2, [1, -1, 512]);  clone_2 = None\0A    _param_constant14 = self._param_constant14\0A    t_9 = torch.ops.aten.t(_param_constant14);  _param_constant14 = None\0A    view_26 = torch.ops.aten.view(view_25, [15, 512]);  view_25 = None\0A    mm_9 = torch.ops.aten.mm(view_26, t_9);  view_26 = t_9 = None\0A    _unsafe_view_13 = torch.ops.aten._unsafe_view(mm_9, [1, 15, 512]);  mm_9 = None\0A    add_8 = torch.ops.aten.add(add_6, _unsafe_view_13);  add_6 = _unsafe_view_13 = None\0A    pow_4 = torch.ops.aten.pow(add_8, 2)\0A    mean_3 = torch.ops.aten.mean(pow_4, [-1], True);  pow_4 = None\0A    add_9 = torch.ops.aten.add(mean_3, 1e-06);  mean_3 = None\0A    rsqrt_3 = torch.ops.aten.rsqrt(add_9);  add_9 = None\0A    detach_6 = torch.ops.aten.detach(rsqrt_3)\0A    mul_9 = torch.ops.aten.mul(add_8, rsqrt_3);  rsqrt_3 = None\0A    _param_constant15 = self._param_constant15\0A    mul_10 = torch.ops.aten.mul(_param_constant15, mul_9);  _param_constant15 = mul_9 = None\0A    _param_constant16 = self._param_constant16\0A    t_10 = torch.ops.aten.t(_param_constant16);  _param_constant16 = None\0A    view_27 = torch.ops.aten.view(mul_10, [15, 512]);  mul_10 = None\0A    mm_10 = torch.ops.aten.mm(view_27, t_10);  view_27 = t_10 = None\0A    _unsafe_view_14 = torch.ops.aten._unsafe_view(mm_10, [1, 15, 2048]);  mm_10 = None\0A    relu_1 = torch.ops.aten.relu(_unsafe_view_14);  _unsafe_view_14 = None\0A    detach_7 = torch.ops.aten.detach(relu_1)\0A    _param_constant17 = self._param_constant17\0A    t_11 = torch.ops.aten.t(_param_constant17);  _param_constant17 = None\0A    view_28 = torch.ops.aten.view(relu_1, [15, 2048]);  relu_1 = None\0A    mm_11 = torch.ops.aten.mm(view_28, t_11);  view_28 = t_11 = None\0A    _unsafe_view_15 = torch.ops.aten._unsafe_view(mm_11, [1, 15, 512]);  mm_11 = None\0A    add_10 = torch.ops.aten.add(add_8, _unsafe_view_15);  add_8 = _unsafe_view_15 = None\0A    pow_5 = torch.ops.aten.pow(add_10, 2)\0A    mean_4 = torch.ops.aten.mean(pow_5, [-1], True);  pow_5 = None\0A    add_11 = torch.ops.aten.add(mean_4, 1e-06);  mean_4 = None\0A    rsqrt_4 = torch.ops.aten.rsqrt(add_11);  add_11 = None\0A    detach_8 = torch.ops.aten.detach(rsqrt_4)\0A    mul_11 = torch.ops.aten.mul(add_10, rsqrt_4);  rsqrt_4 = None\0A    _param_constant18 = self._param_constant18\0A    mul_12 = torch.ops.aten.mul(_param_constant18, mul_11);  _param_constant18 = mul_11 = None\0A    _param_constant19 = self._param_constant19\0A    t_12 = torch.ops.aten.t(_param_constant19);  _param_constant19 = None\0A    view_29 = torch.ops.aten.view(mul_12, [15, 512])\0A    mm_12 = torch.ops.aten.mm(view_29, t_12);  view_29 = t_12 = None\0A    _unsafe_view_16 = torch.ops.aten._unsafe_view(mm_12, [1, 15, 512]);  mm_12 = None\0A    view_30 = torch.ops.aten.view(_unsafe_view_16, [1, -1, 8, 64]);  _unsafe_view_16 = None\0A    transpose_10 = torch.ops.aten.transpose(view_30, 1, 2);  view_30 = None\0A    _param_constant20 = self._param_constant20\0A    t_13 = torch.ops.aten.t(_param_constant20);  _param_constant20 = None\0A    view_31 = torch.ops.aten.view(mul_12, [15, 512])\0A    mm_13 = torch.ops.aten.mm(view_31, t_13);  view_31 = t_13 = None\0A    _unsafe_view_17 = torch.ops.aten._unsafe_view(mm_13, [1, 15, 512]);  mm_13 = None\0A    view_32 = torch.ops.aten.view(_unsafe_view_17, [1, -1, 8, 64]);  _unsafe_view_17 = None\0A    transpose_11 = torch.ops.aten.transpose(view_32, 1, 2);  view_32 = None\0A    _param_constant21 = self._param_constant21\0A    t_14 = torch.ops.aten.t(_param_constant21);  _param_constant21 = None\0A    view_33 = torch.ops.aten.view(mul_12, [15, 512]);  mul_12 = None\0A    mm_14 = torch.ops.aten.mm(view_33, t_14);  view_33 = t_14 = None\0A    _unsafe_view_18 = torch.ops.aten._unsafe_view(mm_14, [1, 15, 512]);  mm_14 = None\0A    view_34 = torch.ops.aten.view(_unsafe_view_18, [1, -1, 8, 64]);  _unsafe_view_18 = None\0A    transpose_12 = torch.ops.aten.transpose(view_34, 1, 2);  view_34 = None\0A    transpose_13 = torch.ops.aten.transpose(transpose_11, 3, 2);  transpose_11 = None\0A    expand_8 = torch.ops.aten.expand(transpose_10, [1, 8, 15, 64]);  transpose_10 = None\0A    view_35 = torch.ops.aten.view(expand_8, [8, 15, 64]);  expand_8 = None\0A    expand_9 = torch.ops.aten.expand(transpose_13, [1, 8, 64, 15]);  transpose_13 = None\0A    view_36 = torch.ops.aten.view(expand_9, [8, 64, 15]);  expand_9 = None\0A    bmm_4 = torch.ops.aten.bmm(view_35, view_36);  view_35 = view_36 = None\0A    _unsafe_view_19 = torch.ops.aten._unsafe_view(bmm_4, [1, 8, 15, 15]);  bmm_4 = None\0A    add__3 = torch.ops.aten.add_(_unsafe_view_19, add_3);  _unsafe_view_19 = None\0A    amax_2 = torch.ops.aten.amax(add__3, [-1], True)\0A    sub_3 = torch.ops.aten.sub(add__3, amax_2);  add__3 = amax_2 = None\0A    exp_2 = torch.ops.aten.exp(sub_3);  sub_3 = None\0A    sum_3 = torch.ops.aten.sum(exp_2, [-1], True)\0A    div_4 = torch.ops.aten.div(exp_2, sum_3);  exp_2 = sum_3 = None\0A    detach_9 = torch.ops.aten.detach(div_4)\0A    expand_10 = torch.ops.aten.expand(div_4, [1, 8, 15, 15]);  div_4 = None\0A    view_37 = torch.ops.aten.view(expand_10, [8, 15, 15]);  expand_10 = None\0A    expand_11 = torch.ops.aten.expand(transpose_12, [1, 8, 15, 64]);  transpose_12 = None\0A    view_38 = torch.ops.aten.view(expand_11, [8, 15, 64]);  expand_11 = None\0A    bmm_5 = torch.ops.aten.bmm(view_37, view_38);  view_37 = view_38 = None\0A    _unsafe_view_20 = torch.ops.aten._unsafe_view(bmm_5, [1, 8, 15, 64]);  bmm_5 = None\0A    transpose_14 = torch.ops.aten.transpose(_unsafe_view_20, 1, 2);  _unsafe_view_20 = None\0A    clone_3 = torch.ops.aten.clone(transpose_14, memory_format = torch.contiguous_format);  transpose_14 = None\0A    view_39 = torch.ops.aten.view(clone_3, [1, -1, 512]);  clone_3 = None\0A    _param_constant22 = self._param_constant22\0A    t_15 = torch.ops.aten.t(_param_constant22);  _param_constant22 = None\0A    view_40 = torch.ops.aten.view(view_39, [15, 512]);  view_39 = None\0A    mm_15 = torch.ops.aten.mm(view_40, t_15);  view_40 = t_15 = None\0A    _unsafe_view_21 = torch.ops.aten._unsafe_view(mm_15, [1, 15, 512]);  mm_15 = None\0A    add_12 = torch.ops.aten.add(add_10, _unsafe_view_21);  add_10 = _unsafe_view_21 = None\0A    pow_6 = torch.ops.aten.pow(add_12, 2)\0A    mean_5 = torch.ops.aten.mean(pow_6, [-1], True);  pow_6 = None\0A    add_13 = torch.ops.aten.add(mean_5, 1e-06);  mean_5 = None\0A    rsqrt_5 = torch.ops.aten.rsqrt(add_13);  add_13 = None\0A    detach_10 = torch.ops.aten.detach(rsqrt_5)\0A    mul_13 = torch.ops.aten.mul(add_12, rsqrt_5);  rsqrt_5 = None\0A    _param_constant23 = self._param_constant23\0A    mul_14 = torch.ops.aten.mul(_param_constant23, mul_13);  _param_constant23 = mul_13 = None\0A    _param_constant24 = self._param_constant24\0A    t_16 = torch.ops.aten.t(_param_constant24);  _param_constant24 = None\0A    view_41 = torch.ops.aten.view(mul_14, [15, 512]);  mul_14 = None\0A    mm_16 = torch.ops.aten.mm(view_41, t_16);  view_41 = t_16 = None\0A    _unsafe_view_22 = torch.ops.aten._unsafe_view(mm_16, [1, 15, 2048]);  mm_16 = None\0A    relu_2 = torch.ops.aten.relu(_unsafe_view_22);  _unsafe_view_22 = None\0A    detach_11 = torch.ops.aten.detach(relu_2)\0A    _param_constant25 = self._param_constant25\0A    t_17 = torch.ops.aten.t(_param_constant25);  _param_constant25 = None\0A    view_42 = torch.ops.aten.view(relu_2, [15, 2048]);  relu_2 = None\0A    mm_17 = torch.ops.aten.mm(view_42, t_17);  view_42 = t_17 = None\0A    _unsafe_view_23 = torch.ops.aten._unsafe_view(mm_17, [1, 15, 512]);  mm_17 = None\0A    add_14 = torch.ops.aten.add(add_12, _unsafe_view_23);  add_12 = _unsafe_view_23 = None\0A    pow_7 = torch.ops.aten.pow(add_14, 2)\0A    mean_6 = torch.ops.aten.mean(pow_7, [-1], True);  pow_7 = None\0A    add_15 = torch.ops.aten.add(mean_6, 1e-06);  mean_6 = None\0A    rsqrt_6 = torch.ops.aten.rsqrt(add_15);  add_15 = None\0A    detach_12 = torch.ops.aten.detach(rsqrt_6)\0A    mul_15 = torch.ops.aten.mul(add_14, rsqrt_6);  rsqrt_6 = None\0A    _param_constant26 = self._param_constant26\0A    mul_16 = torch.ops.aten.mul(_param_constant26, mul_15);  _param_constant26 = mul_15 = None\0A    _param_constant27 = self._param_constant27\0A    t_18 = torch.ops.aten.t(_param_constant27);  _param_constant27 = None\0A    view_43 = torch.ops.aten.view(mul_16, [15, 512])\0A    mm_18 = torch.ops.aten.mm(view_43, t_18);  view_43 = t_18 = None\0A    _unsafe_view_24 = torch.ops.aten._unsafe_view(mm_18, [1, 15, 512]);  mm_18 = None\0A    view_44 = torch.ops.aten.view(_unsafe_view_24, [1, -1, 8, 64]);  _unsafe_view_24 = None\0A    transpose_15 = torch.ops.aten.transpose(view_44, 1, 2);  view_44 = None\0A    _param_constant28 = self._param_constant28\0A    t_19 = torch.ops.aten.t(_param_constant28);  _param_constant28 = None\0A    view_45 = torch.ops.aten.view(mul_16, [15, 512])\0A    mm_19 = torch.ops.aten.mm(view_45, t_19);  view_45 = t_19 = None\0A    _unsafe_view_25 = torch.ops.aten._unsafe_view(mm_19, [1, 15, 512]);  mm_19 = None\0A    view_46 = torch.ops.aten.view(_unsafe_view_25, [1, -1, 8, 64]);  _unsafe_view_25 = None\0A    transpose_16 = torch.ops.aten.transpose(view_46, 1, 2);  view_46 = None\0A    _param_constant29 = self._param_constant29\0A    t_20 = torch.ops.aten.t(_param_constant29);  _param_constant29 = None\0A    view_47 = torch.ops.aten.view(mul_16, [15, 512]);  mul_16 = None\0A    mm_20 = torch.ops.aten.mm(view_47, t_20);  view_47 = t_20 = None\0A    _unsafe_view_26 = torch.ops.aten._unsafe_view(mm_20, [1, 15, 512]);  mm_20 = None\0A    view_48 = torch.ops.aten.view(_unsafe_view_26, [1, -1, 8, 64]);  _unsafe_view_26 = None\0A    transpose_17 = torch.ops.aten.transpose(view_48, 1, 2);  view_48 = None\0A    transpose_18 = torch.ops.aten.transpose(transpose_16, 3, 2);  transpose_16 = None\0A    expand_12 = torch.ops.aten.expand(transpose_15, [1, 8, 15, 64]);  transpose_15 = None\0A    view_49 = torch.ops.aten.view(expand_12, [8, 15, 64]);  expand_12 = None\0A    expand_13 = torch.ops.aten.expand(transpose_18, [1, 8, 64, 15]);  transpose_18 = None\0A    view_50 = torch.ops.aten.view(expand_13, [8, 64, 15]);  expand_13 = None\0A    bmm_6 = torch.ops.aten.bmm(view_49, view_50);  view_49 = view_50 = None\0A    _unsafe_view_27 = torch.ops.aten._unsafe_view(bmm_6, [1, 8, 15, 15]);  bmm_6 = None\0A    add__4 = torch.ops.aten.add_(_unsafe_view_27, add_3);  _unsafe_view_27 = None\0A    amax_3 = torch.ops.aten.amax(add__4, [-1], True)\0A    sub_4 = torch.ops.aten.sub(add__4, amax_3);  add__4 = amax_3 = None\0A    exp_3 = torch.ops.aten.exp(sub_4);  sub_4 = None\0A    sum_4 = torch.ops.aten.sum(exp_3, [-1], True)\0A    div_5 = torch.ops.aten.div(exp_3, sum_4);  exp_3 = sum_4 = None\0A    detach_13 = torch.ops.aten.detach(div_5)\0A    expand_14 = torch.ops.aten.expand(div_5, [1, 8, 15, 15]);  div_5 = None\0A    view_51 = torch.ops.aten.view(expand_14, [8, 15, 15]);  expand_14 = None\0A    expand_15 = torch.ops.aten.expand(transpose_17, [1, 8, 15, 64]);  transpose_17 = None\0A    view_52 = torch.ops.aten.view(expand_15, [8, 15, 64]);  expand_15 = None\0A    bmm_7 = torch.ops.aten.bmm(view_51, view_52);  view_51 = view_52 = None\0A    _unsafe_view_28 = torch.ops.aten._unsafe_view(bmm_7, [1, 8, 15, 64]);  bmm_7 = None\0A    transpose_19 = torch.ops.aten.transpose(_unsafe_view_28, 1, 2);  _unsafe_view_28 = None\0A    clone_4 = torch.ops.aten.clone(transpose_19, memory_format = torch.contiguous_format);  transpose_19 = None\0A    view_53 = torch.ops.aten.view(clone_4, [1, -1, 512]);  clone_4 = None\0A    _param_constant30 = self._param_constant30\0A    t_21 = torch.ops.aten.t(_param_constant30);  _param_constant30 = None\0A    view_54 = torch.ops.aten.view(view_53, [15, 512]);  view_53 = None\0A    mm_21 = torch.ops.aten.mm(view_54, t_21);  view_54 = t_21 = None\0A    _unsafe_view_29 = torch.ops.aten._unsafe_view(mm_21, [1, 15, 512]);  mm_21 = None\0A    add_16 = torch.ops.aten.add(add_14, _unsafe_view_29);  add_14 = _unsafe_view_29 = None\0A    pow_8 = torch.ops.aten.pow(add_16, 2)\0A    mean_7 = torch.ops.aten.mean(pow_8, [-1], True);  pow_8 = None\0A    add_17 = torch.ops.aten.add(mean_7, 1e-06);  mean_7 = None\0A    rsqrt_7 = torch.ops.aten.rsqrt(add_17);  add_17 = None\0A    detach_14 = torch.ops.aten.detach(rsqrt_7)\0A    mul_17 = torch.ops.aten.mul(add_16, rsqrt_7);  rsqrt_7 = None\0A    _param_constant31 = self._param_constant31\0A    mul_18 = torch.ops.aten.mul(_param_constant31, mul_17);  _param_constant31 = mul_17 = None\0A    _param_constant32 = self._param_constant32\0A    t_22 = torch.ops.aten.t(_param_constant32);  _param_constant32 = None\0A    view_55 = torch.ops.aten.view(mul_18, [15, 512]);  mul_18 = None\0A    mm_22 = torch.ops.aten.mm(view_55, t_22);  view_55 = t_22 = None\0A    _unsafe_view_30 = torch.ops.aten._unsafe_view(mm_22, [1, 15, 2048]);  mm_22 = None\0A    relu_3 = torch.ops.aten.relu(_unsafe_view_30);  _unsafe_view_30 = None\0A    detach_15 = torch.ops.aten.detach(relu_3)\0A    _param_constant33 = self._param_constant33\0A    t_23 = torch.ops.aten.t(_param_constant33);  _param_constant33 = None\0A    view_56 = torch.ops.aten.view(relu_3, [15, 2048]);  relu_3 = None\0A    mm_23 = torch.ops.aten.mm(view_56, t_23);  view_56 = t_23 = None\0A    _unsafe_view_31 = torch.ops.aten._unsafe_view(mm_23, [1, 15, 512]);  mm_23 = None\0A    add_18 = torch.ops.aten.add(add_16, _unsafe_view_31);  add_16 = _unsafe_view_31 = None\0A    pow_9 = torch.ops.aten.pow(add_18, 2)\0A    mean_8 = torch.ops.aten.mean(pow_9, [-1], True);  pow_9 = None\0A    add_19 = torch.ops.aten.add(mean_8, 1e-06);  mean_8 = None\0A    rsqrt_8 = torch.ops.aten.rsqrt(add_19);  add_19 = None\0A    detach_16 = torch.ops.aten.detach(rsqrt_8)\0A    mul_19 = torch.ops.aten.mul(add_18, rsqrt_8);  rsqrt_8 = None\0A    _param_constant34 = self._param_constant34\0A    mul_20 = torch.ops.aten.mul(_param_constant34, mul_19);  _param_constant34 = mul_19 = None\0A    _param_constant35 = self._param_constant35\0A    t_24 = torch.ops.aten.t(_param_constant35);  _param_constant35 = None\0A    view_57 = torch.ops.aten.view(mul_20, [15, 512])\0A    mm_24 = torch.ops.aten.mm(view_57, t_24);  view_57 = t_24 = None\0A    _unsafe_view_32 = torch.ops.aten._unsafe_view(mm_24, [1, 15, 512]);  mm_24 = None\0A    view_58 = torch.ops.aten.view(_unsafe_view_32, [1, -1, 8, 64]);  _unsafe_view_32 = None\0A    transpose_20 = torch.ops.aten.transpose(view_58, 1, 2);  view_58 = None\0A    _param_constant36 = self._param_constant36\0A    t_25 = torch.ops.aten.t(_param_constant36);  _param_constant36 = None\0A    view_59 = torch.ops.aten.view(mul_20, [15, 512])\0A    mm_25 = torch.ops.aten.mm(view_59, t_25);  view_59 = t_25 = None\0A    _unsafe_view_33 = torch.ops.aten._unsafe_view(mm_25, [1, 15, 512]);  mm_25 = None\0A    view_60 = torch.ops.aten.view(_unsafe_view_33, [1, -1, 8, 64]);  _unsafe_view_33 = None\0A    transpose_21 = torch.ops.aten.transpose(view_60, 1, 2);  view_60 = None\0A    _param_constant37 = self._param_constant37\0A    t_26 = torch.ops.aten.t(_param_constant37);  _param_constant37 = None\0A    view_61 = torch.ops.aten.view(mul_20, [15, 512]);  mul_20 = None\0A    mm_26 = torch.ops.aten.mm(view_61, t_26);  view_61 = t_26 = None\0A    _unsafe_view_34 = torch.ops.aten._unsafe_view(mm_26, [1, 15, 512]);  mm_26 = None\0A    view_62 = torch.ops.aten.view(_unsafe_view_34, [1, -1, 8, 64]);  _unsafe_view_34 = None\0A    transpose_22 = torch.ops.aten.transpose(view_62, 1, 2);  view_62 = None\0A    transpose_23 = torch.ops.aten.transpose(transpose_21, 3, 2);  transpose_21 = None\0A    expand_16 = torch.ops.aten.expand(transpose_20, [1, 8, 15, 64]);  transpose_20 = None\0A    view_63 = torch.ops.aten.view(expand_16, [8, 15, 64]);  expand_16 = None\0A    expand_17 = torch.ops.aten.expand(transpose_23, [1, 8, 64, 15]);  transpose_23 = None\0A    view_64 = torch.ops.aten.view(expand_17, [8, 64, 15]);  expand_17 = None\0A    bmm_8 = torch.ops.aten.bmm(view_63, view_64);  view_63 = view_64 = None\0A    _unsafe_view_35 = torch.ops.aten._unsafe_view(bmm_8, [1, 8, 15, 15]);  bmm_8 = None\0A    add__5 = torch.ops.aten.add_(_unsafe_view_35, add_3);  _unsafe_view_35 = None\0A    amax_4 = torch.ops.aten.amax(add__5, [-1], True)\0A    sub_5 = torch.ops.aten.sub(add__5, amax_4);  add__5 = amax_4 = None\0A    exp_4 = torch.ops.aten.exp(sub_5);  sub_5 = None\0A    sum_5 = torch.ops.aten.sum(exp_4, [-1], True)\0A    div_6 = torch.ops.aten.div(exp_4, sum_5);  exp_4 = sum_5 = None\0A    detach_17 = torch.ops.aten.detach(div_6)\0A    expand_18 = torch.ops.aten.expand(div_6, [1, 8, 15, 15]);  div_6 = None\0A    view_65 = torch.ops.aten.view(expand_18, [8, 15, 15]);  expand_18 = None\0A    expand_19 = torch.ops.aten.expand(transpose_22, [1, 8, 15, 64]);  transpose_22 = None\0A    view_66 = torch.ops.aten.view(expand_19, [8, 15, 64]);  expand_19 = None\0A    bmm_9 = torch.ops.aten.bmm(view_65, view_66);  view_65 = view_66 = None\0A    _unsafe_view_36 = torch.ops.aten._unsafe_view(bmm_9, [1, 8, 15, 64]);  bmm_9 = None\0A    transpose_24 = torch.ops.aten.transpose(_unsafe_view_36, 1, 2);  _unsafe_view_36 = None\0A    clone_5 = torch.ops.aten.clone(transpose_24, memory_format = torch.contiguous_format);  transpose_24 = None\0A    view_67 = torch.ops.aten.view(clone_5, [1, -1, 512]);  clone_5 = None\0A    _param_constant38 = self._param_constant38\0A    t_27 = torch.ops.aten.t(_param_constant38);  _param_constant38 = None\0A    view_68 = torch.ops.aten.view(view_67, [15, 512]);  view_67 = None\0A    mm_27 = torch.ops.aten.mm(view_68, t_27);  view_68 = t_27 = None\0A    _unsafe_view_37 = torch.ops.aten._unsafe_view(mm_27, [1, 15, 512]);  mm_27 = None\0A    add_20 = torch.ops.aten.add(add_18, _unsafe_view_37);  add_18 = _unsafe_view_37 = None\0A    pow_10 = torch.ops.aten.pow(add_20, 2)\0A    mean_9 = torch.ops.aten.mean(pow_10, [-1], True);  pow_10 = None\0A    add_21 = torch.ops.aten.add(mean_9, 1e-06);  mean_9 = None\0A    rsqrt_9 = torch.ops.aten.rsqrt(add_21);  add_21 = None\0A    detach_18 = torch.ops.aten.detach(rsqrt_9)\0A    mul_21 = torch.ops.aten.mul(add_20, rsqrt_9);  rsqrt_9 = None\0A    _param_constant39 = self._param_constant39\0A    mul_22 = torch.ops.aten.mul(_param_constant39, mul_21);  _param_constant39 = mul_21 = None\0A    _param_constant40 = self._param_constant40\0A    t_28 = torch.ops.aten.t(_param_constant40);  _param_constant40 = None\0A    view_69 = torch.ops.aten.view(mul_22, [15, 512]);  mul_22 = None\0A    mm_28 = torch.ops.aten.mm(view_69, t_28);  view_69 = t_28 = None\0A    _unsafe_view_38 = torch.ops.aten._unsafe_view(mm_28, [1, 15, 2048]);  mm_28 = None\0A    relu_4 = torch.ops.aten.relu(_unsafe_view_38);  _unsafe_view_38 = None\0A    detach_19 = torch.ops.aten.detach(relu_4)\0A    _param_constant41 = self._param_constant41\0A    t_29 = torch.ops.aten.t(_param_constant41);  _param_constant41 = None\0A    view_70 = torch.ops.aten.view(relu_4, [15, 2048]);  relu_4 = None\0A    mm_29 = torch.ops.aten.mm(view_70, t_29);  view_70 = t_29 = None\0A    _unsafe_view_39 = torch.ops.aten._unsafe_view(mm_29, [1, 15, 512]);  mm_29 = None\0A    add_22 = torch.ops.aten.add(add_20, _unsafe_view_39);  add_20 = _unsafe_view_39 = None\0A    pow_11 = torch.ops.aten.pow(add_22, 2)\0A    mean_10 = torch.ops.aten.mean(pow_11, [-1], True);  pow_11 = None\0A    add_23 = torch.ops.aten.add(mean_10, 1e-06);  mean_10 = None\0A    rsqrt_10 = torch.ops.aten.rsqrt(add_23);  add_23 = None\0A    detach_20 = torch.ops.aten.detach(rsqrt_10)\0A    mul_23 = torch.ops.aten.mul(add_22, rsqrt_10);  rsqrt_10 = None\0A    _param_constant42 = self._param_constant42\0A    mul_24 = torch.ops.aten.mul(_param_constant42, mul_23);  _param_constant42 = mul_23 = None\0A    _param_constant43 = self._param_constant43\0A    t_30 = torch.ops.aten.t(_param_constant43);  _param_constant43 = None\0A    view_71 = torch.ops.aten.view(mul_24, [15, 512])\0A    mm_30 = torch.ops.aten.mm(view_71, t_30);  view_71 = t_30 = None\0A    _unsafe_view_40 = torch.ops.aten._unsafe_view(mm_30, [1, 15, 512]);  mm_30 = None\0A    view_72 = torch.ops.aten.view(_unsafe_view_40, [1, -1, 8, 64]);  _unsafe_view_40 = None\0A    transpose_25 = torch.ops.aten.transpose(view_72, 1, 2);  view_72 = None\0A    _param_constant44 = self._param_constant44\0A    t_31 = torch.ops.aten.t(_param_constant44);  _param_constant44 = None\0A    view_73 = torch.ops.aten.view(mul_24, [15, 512])\0A    mm_31 = torch.ops.aten.mm(view_73, t_31);  view_73 = t_31 = None\0A    _unsafe_view_41 = torch.ops.aten._unsafe_view(mm_31, [1, 15, 512]);  mm_31 = None\0A    view_74 = torch.ops.aten.view(_unsafe_view_41, [1, -1, 8, 64]);  _unsafe_view_41 = None\0A    transpose_26 = torch.ops.aten.transpose(view_74, 1, 2);  view_74 = None\0A    _param_constant45 = self._param_constant45\0A    t_32 = torch.ops.aten.t(_param_constant45);  _param_constant45 = None\0A    view_75 = torch.ops.aten.view(mul_24, [15, 512]);  mul_24 = None\0A    mm_32 = torch.ops.aten.mm(view_75, t_32);  view_75 = t_32 = None\0A    _unsafe_view_42 = torch.ops.aten._unsafe_view(mm_32, [1, 15, 512]);  mm_32 = None\0A    view_76 = torch.ops.aten.view(_unsafe_view_42, [1, -1, 8, 64]);  _unsafe_view_42 = None\0A    transpose_27 = torch.ops.aten.transpose(view_76, 1, 2);  view_76 = None\0A    transpose_28 = torch.ops.aten.transpose(transpose_26, 3, 2);  transpose_26 = None\0A    expand_20 = torch.ops.aten.expand(transpose_25, [1, 8, 15, 64]);  transpose_25 = None\0A    view_77 = torch.ops.aten.view(expand_20, [8, 15, 64]);  expand_20 = None\0A    expand_21 = torch.ops.aten.expand(transpose_28, [1, 8, 64, 15]);  transpose_28 = None\0A    view_78 = torch.ops.aten.view(expand_21, [8, 64, 15]);  expand_21 = None\0A    bmm_10 = torch.ops.aten.bmm(view_77, view_78);  view_77 = view_78 = None\0A    _unsafe_view_43 = torch.ops.aten._unsafe_view(bmm_10, [1, 8, 15, 15]);  bmm_10 = None\0A    add__6 = torch.ops.aten.add_(_unsafe_view_43, add_3);  _unsafe_view_43 = add_3 = None\0A    amax_5 = torch.ops.aten.amax(add__6, [-1], True)\0A    sub_6 = torch.ops.aten.sub(add__6, amax_5);  add__6 = amax_5 = None\0A    exp_5 = torch.ops.aten.exp(sub_6);  sub_6 = None\0A    sum_6 = torch.ops.aten.sum(exp_5, [-1], True)\0A    div_7 = torch.ops.aten.div(exp_5, sum_6);  exp_5 = sum_6 = None\0A    detach_21 = torch.ops.aten.detach(div_7)\0A    expand_22 = torch.ops.aten.expand(div_7, [1, 8, 15, 15]);  div_7 = None\0A    view_79 = torch.ops.aten.view(expand_22, [8, 15, 15]);  expand_22 = None\0A    expand_23 = torch.ops.aten.expand(transpose_27, [1, 8, 15, 64]);  transpose_27 = None\0A    view_80 = torch.ops.aten.view(expand_23, [8, 15, 64]);  expand_23 = None\0A    bmm_11 = torch.ops.aten.bmm(view_79, view_80);  view_79 = view_80 = None\0A    _unsafe_view_44 = torch.ops.aten._unsafe_view(bmm_11, [1, 8, 15, 64]);  bmm_11 = None\0A    transpose_29 = torch.ops.aten.transpose(_unsafe_view_44, 1, 2);  _unsafe_view_44 = None\0A    clone_6 = torch.ops.aten.clone(transpose_29, memory_format = torch.contiguous_format);  transpose_29 = None\0A    view_81 = torch.ops.aten.view(clone_6, [1, -1, 512]);  clone_6 = None\0A    _param_constant46 = self._param_constant46\0A    t_33 = torch.ops.aten.t(_param_constant46);  _param_constant46 = None\0A    view_82 = torch.ops.aten.view(view_81, [15, 512]);  view_81 = None\0A    mm_33 = torch.ops.aten.mm(view_82, t_33);  view_82 = t_33 = None\0A    _unsafe_view_45 = torch.ops.aten._unsafe_view(mm_33, [1, 15, 512]);  mm_33 = None\0A    add_24 = torch.ops.aten.add(add_22, _unsafe_view_45);  add_22 = _unsafe_view_45 = None\0A    pow_12 = torch.ops.aten.pow(add_24, 2)\0A    mean_11 = torch.ops.aten.mean(pow_12, [-1], True);  pow_12 = None\0A    add_25 = torch.ops.aten.add(mean_11, 1e-06);  mean_11 = None\0A    rsqrt_11 = torch.ops.aten.rsqrt(add_25);  add_25 = None\0A    detach_22 = torch.ops.aten.detach(rsqrt_11)\0A    mul_25 = torch.ops.aten.mul(add_24, rsqrt_11);  rsqrt_11 = None\0A    _param_constant47 = self._param_constant47\0A    mul_26 = torch.ops.aten.mul(_param_constant47, mul_25);  _param_constant47 = mul_25 = None\0A    _param_constant48 = self._param_constant48\0A    t_34 = torch.ops.aten.t(_param_constant48);  _param_constant48 = None\0A    view_83 = torch.ops.aten.view(mul_26, [15, 512]);  mul_26 = None\0A    mm_34 = torch.ops.aten.mm(view_83, t_34);  view_83 = t_34 = None\0A    _unsafe_view_46 = torch.ops.aten._unsafe_view(mm_34, [1, 15, 2048]);  mm_34 = None\0A    relu_5 = torch.ops.aten.relu(_unsafe_view_46);  _unsafe_view_46 = None\0A    detach_23 = torch.ops.aten.detach(relu_5)\0A    _param_constant49 = self._param_constant49\0A    t_35 = torch.ops.aten.t(_param_constant49);  _param_constant49 = None\0A    view_84 = torch.ops.aten.view(relu_5, [15, 2048]);  relu_5 = None\0A    mm_35 = torch.ops.aten.mm(view_84, t_35);  view_84 = t_35 = None\0A    _unsafe_view_47 = torch.ops.aten._unsafe_view(mm_35, [1, 15, 512]);  mm_35 = None\0A    add_26 = torch.ops.aten.add(add_24, _unsafe_view_47);  add_24 = _unsafe_view_47 = None\0A    pow_13 = torch.ops.aten.pow(add_26, 2)\0A    mean_12 = torch.ops.aten.mean(pow_13, [-1], True);  pow_13 = None\0A    add_27 = torch.ops.aten.add(mean_12, 1e-06);  mean_12 = None\0A    rsqrt_12 = torch.ops.aten.rsqrt(add_27);  add_27 = None\0A    detach_24 = torch.ops.aten.detach(rsqrt_12)\0A    mul_27 = torch.ops.aten.mul(add_26, rsqrt_12);  add_26 = rsqrt_12 = None\0A    _param_constant50 = self._param_constant50\0A    mul_28 = torch.ops.aten.mul(_param_constant50, mul_27);  _param_constant50 = mul_27 = None\0A    view_85 = torch.ops.aten.view(masked_fill_, [-1, 4]);  masked_fill_ = None\0A    _param_constant0_1 = self._param_constant0\0A    embedding_2 = torch.ops.aten.embedding(_param_constant0_1, view_85);  _param_constant0_1 = view_85 = None\0A    ones_1 = torch.ops.aten.ones([1, 4], device = device(type='cpu'), pin_memory = False)\0A    ones_2 = torch.ops.aten.ones([1, 15], dtype = torch.int64, device = device(type='cpu'), pin_memory = False)\0A    arange_2 = torch.ops.aten.arange(4, device = device(type='cpu'), pin_memory = False)\0A    unsqueeze_5 = torch.ops.aten.unsqueeze(arange_2, 0)\0A    unsqueeze_6 = torch.ops.aten.unsqueeze(unsqueeze_5, 1);  unsqueeze_5 = None\0A    slice_7 = torch.ops.aten.slice(unsqueeze_6, 2, 0, 9223372036854775807);  unsqueeze_6 = None\0A    repeat = torch.ops.aten.repeat(slice_7, [1, 4, 1]);  slice_7 = None\0A    unsqueeze_7 = torch.ops.aten.unsqueeze(arange_2, 0);  arange_2 = None\0A    slice_8 = torch.ops.aten.slice(unsqueeze_7, 1, 0, 9223372036854775807);  unsqueeze_7 = None\0A    unsqueeze_8 = torch.ops.aten.unsqueeze(slice_8, 2);  slice_8 = None\0A    le = torch.ops.aten.le(repeat, unsqueeze_8);  repeat = unsqueeze_8 = None\0A    convert_element_type_3 = torch.ops.prims.convert_element_type(le, torch.float32);  le = None\0A    slice_9 = torch.ops.aten.slice(convert_element_type_3, 0, 0, 9223372036854775807);  convert_element_type_3 = None\0A    unsqueeze_9 = torch.ops.aten.unsqueeze(slice_9, 1);  slice_9 = None\0A    slice_10 = torch.ops.aten.slice(unsqueeze_9, 2, 0, 9223372036854775807);  unsqueeze_9 = None\0A    slice_11 = torch.ops.aten.slice(slice_10, 3, 0, 9223372036854775807);  slice_10 = None\0A    slice_12 = torch.ops.aten.slice(ones_1, 0, 0, 9223372036854775807);  ones_1 = None\0A    unsqueeze_10 = torch.ops.aten.unsqueeze(slice_12, 1);  slice_12 = None\0A    unsqueeze_11 = torch.ops.aten.unsqueeze(unsqueeze_10, 2);  unsqueeze_10 = None\0A    slice_13 = torch.ops.aten.slice(unsqueeze_11, 3, 0, 9223372036854775807);  unsqueeze_11 = None\0A    mul_29 = torch.ops.aten.mul(slice_11, slice_13);  slice_11 = slice_13 = None\0A    rsub_1 = torch.ops.aten.rsub(mul_29, 1.0);  mul_29 = None\0A    mul_30 = torch.ops.aten.mul(rsub_1, -3.4028234663852886e+38);  rsub_1 = None\0A    slice_14 = torch.ops.aten.slice(ones_2, 0, 0, 9223372036854775807);  ones_2 = None\0A    unsqueeze_12 = torch.ops.aten.unsqueeze(slice_14, 1);  slice_14 = None\0A    unsqueeze_13 = torch.ops.aten.unsqueeze(unsqueeze_12, 2);  unsqueeze_12 = None\0A    slice_15 = torch.ops.aten.slice(unsqueeze_13, 3, 0, 9223372036854775807);  unsqueeze_13 = None\0A    convert_element_type_4 = torch.ops.prims.convert_element_type(slice_15, torch.float32);  slice_15 = None\0A    rsub_2 = torch.ops.aten.rsub(convert_element_type_4, 1.0);  convert_element_type_4 = None\0A    mul_31 = torch.ops.aten.mul(rsub_2, -3.4028234663852886e+38);  rsub_2 = None\0A    pow_14 = torch.ops.aten.pow(embedding_2, 2)\0A    mean_13 = torch.ops.aten.mean(pow_14, [-1], True);  pow_14 = None\0A    add_28 = torch.ops.aten.add(mean_13, 1e-06);  mean_13 = None\0A    rsqrt_13 = torch.ops.aten.rsqrt(add_28);  add_28 = None\0A    detach_25 = torch.ops.aten.detach(rsqrt_13)\0A    mul_32 = torch.ops.aten.mul(embedding_2, rsqrt_13);  rsqrt_13 = None\0A    _param_constant51 = self._param_constant51\0A    mul_33 = torch.ops.aten.mul(_param_constant51, mul_32);  _param_constant51 = mul_32 = None\0A    _param_constant52 = self._param_constant52\0A    t_36 = torch.ops.aten.t(_param_constant52);  _param_constant52 = None\0A    view_86 = torch.ops.aten.view(mul_33, [4, 512])\0A    mm_36 = torch.ops.aten.mm(view_86, t_36);  view_86 = t_36 = None\0A    _unsafe_view_48 = torch.ops.aten._unsafe_view(mm_36, [1, 4, 512]);  mm_36 = None\0A    view_87 = torch.ops.aten.view(_unsafe_view_48, [1, -1, 8, 64]);  _unsafe_view_48 = None\0A    transpose_30 = torch.ops.aten.transpose(view_87, 1, 2);  view_87 = None\0A    _param_constant53 = self._param_constant53\0A    t_37 = torch.ops.aten.t(_param_constant53);  _param_constant53 = None\0A    view_88 = torch.ops.aten.view(mul_33, [4, 512])\0A    mm_37 = torch.ops.aten.mm(view_88, t_37);  view_88 = t_37 = None\0A    _unsafe_view_49 = torch.ops.aten._unsafe_view(mm_37, [1, 4, 512]);  mm_37 = None\0A    view_89 = torch.ops.aten.view(_unsafe_view_49, [1, -1, 8, 64]);  _unsafe_view_49 = None\0A    transpose_31 = torch.ops.aten.transpose(view_89, 1, 2);  view_89 = None\0A    _param_constant54 = self._param_constant54\0A    t_38 = torch.ops.aten.t(_param_constant54);  _param_constant54 = None\0A    view_90 = torch.ops.aten.view(mul_33, [4, 512]);  mul_33 = None\0A    mm_38 = torch.ops.aten.mm(view_90, t_38);  view_90 = t_38 = None\0A    _unsafe_view_50 = torch.ops.aten._unsafe_view(mm_38, [1, 4, 512]);  mm_38 = None\0A    view_91 = torch.ops.aten.view(_unsafe_view_50, [1, -1, 8, 64]);  _unsafe_view_50 = None\0A    transpose_32 = torch.ops.aten.transpose(view_91, 1, 2);  view_91 = None\0A    transpose_33 = torch.ops.aten.transpose(transpose_31, 3, 2);  transpose_31 = None\0A    expand_24 = torch.ops.aten.expand(transpose_30, [1, 8, 4, 64]);  transpose_30 = None\0A    view_92 = torch.ops.aten.view(expand_24, [8, 4, 64]);  expand_24 = None\0A    expand_25 = torch.ops.aten.expand(transpose_33, [1, 8, 64, 4]);  transpose_33 = None\0A    view_93 = torch.ops.aten.view(expand_25, [8, 64, 4]);  expand_25 = None\0A    bmm_12 = torch.ops.aten.bmm(view_92, view_93);  view_92 = view_93 = None\0A    _unsafe_view_51 = torch.ops.aten._unsafe_view(bmm_12, [1, 8, 4, 4]);  bmm_12 = None\0A    arange_3 = torch.ops.aten.arange(4, dtype = torch.int64, device = device(type='cpu'), pin_memory = False)\0A    slice_16 = torch.ops.aten.slice(arange_3, 0, 0, 9223372036854775807);  arange_3 = None\0A    unsqueeze_14 = torch.ops.aten.unsqueeze(slice_16, 1);  slice_16 = None\0A    arange_4 = torch.ops.aten.arange(4, dtype = torch.int64, device = device(type='cpu'), pin_memory = False)\0A    unsqueeze_15 = torch.ops.aten.unsqueeze(arange_4, 0);  arange_4 = None\0A    slice_17 = torch.ops.aten.slice(unsqueeze_15, 1, 0, 9223372036854775807);  unsqueeze_15 = None\0A    sub_7 = torch.ops.aten.sub(slice_17, unsqueeze_14);  slice_17 = unsqueeze_14 = None\0A    zeros_like = torch.ops.aten.zeros_like(sub_7, dtype = torch.int64, layout = torch.strided, device = device(type='cpu'), pin_memory = False)\0A    minimum_1 = torch.ops.aten.minimum(sub_7, zeros_like);  sub_7 = zeros_like = None\0A    neg = torch.ops.aten.neg(minimum_1);  minimum_1 = None\0A    lt_1 = torch.ops.aten.lt(neg, 16)\0A    convert_element_type_5 = torch.ops.prims.convert_element_type(neg, torch.float32)\0A    div_8 = torch.ops.aten.div(convert_element_type_5, 16);  convert_element_type_5 = None\0A    log_1 = torch.ops.aten.log(div_8);  div_8 = None\0A    div_9 = torch.ops.aten.div(log_1, 2.0794415416798357);  log_1 = None\0A    mul_34 = torch.ops.aten.mul(div_9, 16);  div_9 = None\0A    convert_element_type_6 = torch.ops.prims.convert_element_type(mul_34, torch.int64);  mul_34 = None\0A    add_29 = torch.ops.aten.add(convert_element_type_6, 16);  convert_element_type_6 = None\0A    full_like_1 = torch.ops.aten.full_like(add_29, 31, dtype = torch.int64, layout = torch.strided, device = device(type='cpu'), pin_memory = False)\0A    minimum_2 = torch.ops.aten.minimum(add_29, full_like_1);  add_29 = full_like_1 = None\0A    where_1 = torch.ops.aten.where(lt_1, neg, minimum_2);  lt_1 = neg = minimum_2 = None\0A    add_30 = torch.ops.aten.add(where_1, 0);  where_1 = None\0A    _param_constant55 = self._param_constant55\0A    embedding_3 = torch.ops.aten.embedding(_param_constant55, add_30);  _param_constant55 = add_30 = None\0A    permute_1 = torch.ops.aten.permute(embedding_3, [2, 0, 1]);  embedding_3 = None\0A    unsqueeze_16 = torch.ops.aten.unsqueeze(permute_1, 0);  permute_1 = None\0A    add_31 = torch.ops.aten.add(unsqueeze_16, mul_30);  unsqueeze_16 = mul_30 = None\0A    add__7 = torch.ops.aten.add_(_unsafe_view_51, add_31);  _unsafe_view_51 = None\0A    amax_6 = torch.ops.aten.amax(add__7, [-1], True)\0A    sub_8 = torch.ops.aten.sub(add__7, amax_6);  add__7 = amax_6 = None\0A    exp_6 = torch.ops.aten.exp(sub_8);  sub_8 = None\0A    sum_7 = torch.ops.aten.sum(exp_6, [-1], True)\0A    div_10 = torch.ops.aten.div(exp_6, sum_7);  exp_6 = sum_7 = None\0A    detach_26 = torch.ops.aten.detach(div_10)\0A    expand_26 = torch.ops.aten.expand(div_10, [1, 8, 4, 4]);  div_10 = None\0A    view_94 = torch.ops.aten.view(expand_26, [8, 4, 4]);  expand_26 = None\0A    expand_27 = torch.ops.aten.expand(transpose_32, [1, 8, 4, 64]);  transpose_32 = None\0A    view_95 = torch.ops.aten.view(expand_27, [8, 4, 64]);  expand_27 = None\0A    bmm_13 = torch.ops.aten.bmm(view_94, view_95);  view_94 = view_95 = None\0A    _unsafe_view_52 = torch.ops.aten._unsafe_view(bmm_13, [1, 8, 4, 64]);  bmm_13 = None\0A    transpose_34 = torch.ops.aten.transpose(_unsafe_view_52, 1, 2);  _unsafe_view_52 = None\0A    clone_7 = torch.ops.aten.clone(transpose_34, memory_format = torch.contiguous_format);  transpose_34 = None\0A    view_96 = torch.ops.aten.view(clone_7, [1, -1, 512]);  clone_7 = None\0A    _param_constant56 = self._param_constant56\0A    t_39 = torch.ops.aten.t(_param_constant56);  _param_constant56 = None\0A    view_97 = torch.ops.aten.view(view_96, [4, 512]);  view_96 = None\0A    mm_39 = torch.ops.aten.mm(view_97, t_39);  view_97 = t_39 = None\0A    _unsafe_view_53 = torch.ops.aten._unsafe_view(mm_39, [1, 4, 512]);  mm_39 = None\0A    add_32 = torch.ops.aten.add(embedding_2, _unsafe_view_53);  embedding_2 = _unsafe_view_53 = None\0A    pow_15 = torch.ops.aten.pow(add_32, 2)\0A    mean_14 = torch.ops.aten.mean(pow_15, [-1], True);  pow_15 = None\0A    add_33 = torch.ops.aten.add(mean_14, 1e-06);  mean_14 = None\0A    rsqrt_14 = torch.ops.aten.rsqrt(add_33);  add_33 = None\0A    detach_27 = torch.ops.aten.detach(rsqrt_14)\0A    mul_35 = torch.ops.aten.mul(add_32, rsqrt_14);  rsqrt_14 = None\0A    _param_constant57 = self._param_constant57\0A    mul_36 = torch.ops.aten.mul(_param_constant57, mul_35);  _param_constant57 = mul_35 = None\0A    _param_constant58 = self._param_constant58\0A    t_40 = torch.ops.aten.t(_param_constant58);  _param_constant58 = None\0A    view_98 = torch.ops.aten.view(mul_36, [4, 512]);  mul_36 = None\0A    mm_40 = torch.ops.aten.mm(view_98, t_40);  view_98 = t_40 = None\0A    _unsafe_view_54 = torch.ops.aten._unsafe_view(mm_40, [1, 4, 512]);  mm_40 = None\0A    view_99 = torch.ops.aten.view(_unsafe_view_54, [1, -1, 8, 64]);  _unsafe_view_54 = None\0A    transpose_35 = torch.ops.aten.transpose(view_99, 1, 2);  view_99 = None\0A    _param_constant59 = self._param_constant59\0A    t_41 = torch.ops.aten.t(_param_constant59);  _param_constant59 = None\0A    view_100 = torch.ops.aten.view(mul_28, [15, 512])\0A    mm_41 = torch.ops.aten.mm(view_100, t_41);  view_100 = t_41 = None\0A    _unsafe_view_55 = torch.ops.aten._unsafe_view(mm_41, [1, 15, 512]);  mm_41 = None\0A    view_101 = torch.ops.aten.view(_unsafe_view_55, [1, -1, 8, 64]);  _unsafe_view_55 = None\0A    transpose_36 = torch.ops.aten.transpose(view_101, 1, 2);  view_101 = None\0A    _param_constant60 = self._param_constant60\0A    t_42 = torch.ops.aten.t(_param_constant60);  _param_constant60 = None\0A    view_102 = torch.ops.aten.view(mul_28, [15, 512])\0A    mm_42 = torch.ops.aten.mm(view_102, t_42);  view_102 = t_42 = None\0A    _unsafe_view_56 = torch.ops.aten._unsafe_view(mm_42, [1, 15, 512]);  mm_42 = None\0A    view_103 = torch.ops.aten.view(_unsafe_view_56, [1, -1, 8, 64]);  _unsafe_view_56 = None\0A    transpose_37 = torch.ops.aten.transpose(view_103, 1, 2);  view_103 = None\0A    transpose_38 = torch.ops.aten.transpose(transpose_36, 3, 2);  transpose_36 = None\0A    expand_28 = torch.ops.aten.expand(transpose_35, [1, 8, 4, 64]);  transpose_35 = None\0A    view_104 = torch.ops.aten.view(expand_28, [8, 4, 64]);  expand_28 = None\0A    expand_29 = torch.ops.aten.expand(transpose_38, [1, 8, 64, 15]);  transpose_38 = None\0A    view_105 = torch.ops.aten.view(expand_29, [8, 64, 15]);  expand_29 = None\0A    bmm_14 = torch.ops.aten.bmm(view_104, view_105);  view_104 = view_105 = None\0A    _unsafe_view_57 = torch.ops.aten._unsafe_view(bmm_14, [1, 8, 4, 15]);  bmm_14 = None\0A    zeros = torch.ops.aten.zeros([1, 8, 4, 15], dtype = torch.float32, device = device(type='cpu'), pin_memory = False)\0A    add_34 = torch.ops.aten.add(zeros, mul_31);  zeros = mul_31 = None\0A    add__8 = torch.ops.aten.add_(_unsafe_view_57, add_34);  _unsafe_view_57 = None\0A    amax_7 = torch.ops.aten.amax(add__8, [-1], True)\0A    sub_9 = torch.ops.aten.sub(add__8, amax_7);  add__8 = amax_7 = None\0A    exp_7 = torch.ops.aten.exp(sub_9);  sub_9 = None\0A    sum_8 = torch.ops.aten.sum(exp_7, [-1], True)\0A    div_11 = torch.ops.aten.div(exp_7, sum_8);  exp_7 = sum_8 = None\0A    detach_28 = torch.ops.aten.detach(div_11)\0A    expand_30 = torch.ops.aten.expand(div_11, [1, 8, 4, 15]);  div_11 = None\0A    view_106 = torch.ops.aten.view(expand_30, [8, 4, 15]);  expand_30 = None\0A    expand_31 = torch.ops.aten.expand(transpose_37, [1, 8, 15, 64]);  transpose_37 = None\0A    view_107 = torch.ops.aten.view(expand_31, [8, 15, 64]);  expand_31 = None\0A    bmm_15 = torch.ops.aten.bmm(view_106, view_107);  view_106 = view_107 = None\0A    _unsafe_view_58 = torch.ops.aten._unsafe_view(bmm_15, [1, 8, 4, 64]);  bmm_15 = None\0A    transpose_39 = torch.ops.aten.transpose(_unsafe_view_58, 1, 2);  _unsafe_view_58 = None\0A    clone_8 = torch.ops.aten.clone(transpose_39, memory_format = torch.contiguous_format);  transpose_39 = None\0A    view_108 = torch.ops.aten.view(clone_8, [1, -1, 512]);  clone_8 = None\0A    _param_constant61 = self._param_constant61\0A    t_43 = torch.ops.aten.t(_param_constant61);  _param_constant61 = None\0A    view_109 = torch.ops.aten.view(view_108, [4, 512]);  view_108 = None\0A    mm_43 = torch.ops.aten.mm(view_109, t_43);  view_109 = t_43 = None\0A    _unsafe_view_59 = torch.ops.aten._unsafe_view(mm_43, [1, 4, 512]);  mm_43 = None\0A    add_35 = torch.ops.aten.add(add_32, _unsafe_view_59);  add_32 = _unsafe_view_59 = None\0A    pow_16 = torch.ops.aten.pow(add_35, 2)\0A    mean_15 = torch.ops.aten.mean(pow_16, [-1], True);  pow_16 = None\0A    add_36 = torch.ops.aten.add(mean_15, 1e-06);  mean_15 = None\0A    rsqrt_15 = torch.ops.aten.rsqrt(add_36);  add_36 = None\0A    detach_29 = torch.ops.aten.detach(rsqrt_15)\0A    mul_37 = torch.ops.aten.mul(add_35, rsqrt_15);  rsqrt_15 = None\0A    _param_constant62 = self._param_constant62\0A    mul_38 = torch.ops.aten.mul(_param_constant62, mul_37);  _param_constant62 = mul_37 = None\0A    _param_constant63 = self._param_constant63\0A    t_44 = torch.ops.aten.t(_param_constant63);  _param_constant63 = None\0A    view_110 = torch.ops.aten.view(mul_38, [4, 512]);  mul_38 = None\0A    mm_44 = torch.ops.aten.mm(view_110, t_44);  view_110 = t_44 = None\0A    _unsafe_view_60 = torch.ops.aten._unsafe_view(mm_44, [1, 4, 2048]);  mm_44 = None\0A    relu_6 = torch.ops.aten.relu(_unsafe_view_60);  _unsafe_view_60 = None\0A    detach_30 = torch.ops.aten.detach(relu_6)\0A    _param_constant64 = self._param_constant64\0A    t_45 = torch.ops.aten.t(_param_constant64);  _param_constant64 = None\0A    view_111 = torch.ops.aten.view(relu_6, [4, 2048]);  relu_6 = None\0A    mm_45 = torch.ops.aten.mm(view_111, t_45);  view_111 = t_45 = None\0A    _unsafe_view_61 = torch.ops.aten._unsafe_view(mm_45, [1, 4, 512]);  mm_45 = None\0A    add_37 = torch.ops.aten.add(add_35, _unsafe_view_61);  add_35 = _unsafe_view_61 = None\0A    pow_17 = torch.ops.aten.pow(add_37, 2)\0A    mean_16 = torch.ops.aten.mean(pow_17, [-1], True);  pow_17 = None\0A    add_38 = torch.ops.aten.add(mean_16, 1e-06);  mean_16 = None\0A    rsqrt_16 = torch.ops.aten.rsqrt(add_38);  add_38 = None\0A    detach_31 = torch.ops.aten.detach(rsqrt_16)\0A    mul_39 = torch.ops.aten.mul(add_37, rsqrt_16);  rsqrt_16 = None\0A    _param_constant65 = self._param_constant65\0A    mul_40 = torch.ops.aten.mul(_param_constant65, mul_39);  _param_constant65 = mul_39 = None\0A    _param_constant66 = self._param_constant66\0A    t_46 = torch.ops.aten.t(_param_constant66);  _param_constant66 = None\0A    view_112 = torch.ops.aten.view(mul_40, [4, 512])\0A    mm_46 = torch.ops.aten.mm(view_112, t_46);  view_112 = t_46 = None\0A    _unsafe_view_62 = torch.ops.aten._unsafe_view(mm_46, [1, 4, 512]);  mm_46 = None\0A    view_113 = torch.ops.aten.view(_unsafe_view_62, [1, -1, 8, 64]);  _unsafe_view_62 = None\0A    transpose_40 = torch.ops.aten.transpose(view_113, 1, 2);  view_113 = None\0A    _param_constant67 = self._param_constant67\0A    t_47 = torch.ops.aten.t(_param_constant67);  _param_constant67 = None\0A    view_114 = torch.ops.aten.view(mul_40, [4, 512])\0A    mm_47 = torch.ops.aten.mm(view_114, t_47);  view_114 = t_47 = None\0A    _unsafe_view_63 = torch.ops.aten._unsafe_view(mm_47, [1, 4, 512]);  mm_47 = None\0A    view_115 = torch.ops.aten.view(_unsafe_view_63, [1, -1, 8, 64]);  _unsafe_view_63 = None\0A    transpose_41 = torch.ops.aten.transpose(view_115, 1, 2);  view_115 = None\0A    _param_constant68 = self._param_constant68\0A    t_48 = torch.ops.aten.t(_param_constant68);  _param_constant68 = None\0A    view_116 = torch.ops.aten.view(mul_40, [4, 512]);  mul_40 = None\0A    mm_48 = torch.ops.aten.mm(view_116, t_48);  view_116 = t_48 = None\0A    _unsafe_view_64 = torch.ops.aten._unsafe_view(mm_48, [1, 4, 512]);  mm_48 = None\0A    view_117 = torch.ops.aten.view(_unsafe_view_64, [1, -1, 8, 64]);  _unsafe_view_64 = None\0A    transpose_42 = torch.ops.aten.transpose(view_117, 1, 2);  view_117 = None\0A    transpose_43 = torch.ops.aten.transpose(transpose_41, 3, 2);  transpose_41 = None\0A    expand_32 = torch.ops.aten.expand(transpose_40, [1, 8, 4, 64]);  transpose_40 = None\0A    view_118 = torch.ops.aten.view(expand_32, [8, 4, 64]);  expand_32 = None\0A    expand_33 = torch.ops.aten.expand(transpose_43, [1, 8, 64, 4]);  transpose_43 = None\0A    view_119 = torch.ops.aten.view(expand_33, [8, 64, 4]);  expand_33 = None\0A    bmm_16 = torch.ops.aten.bmm(view_118, view_119);  view_118 = view_119 = None\0A    _unsafe_view_65 = torch.ops.aten._unsafe_view(bmm_16, [1, 8, 4, 4]);  bmm_16 = None\0A    add__9 = torch.ops.aten.add_(_unsafe_view_65, add_31);  _unsafe_view_65 = None\0A    amax_8 = torch.ops.aten.amax(add__9, [-1], True)\0A    sub_10 = torch.ops.aten.sub(add__9, amax_8);  add__9 = amax_8 = None\0A    exp_8 = torch.ops.aten.exp(sub_10);  sub_10 = None\0A    sum_9 = torch.ops.aten.sum(exp_8, [-1], True)\0A    div_12 = torch.ops.aten.div(exp_8, sum_9);  exp_8 = sum_9 = None\0A    detach_32 = torch.ops.aten.detach(div_12)\0A    expand_34 = torch.ops.aten.expand(div_12, [1, 8, 4, 4]);  div_12 = None\0A    view_120 = torch.ops.aten.view(expand_34, [8, 4, 4]);  expand_34 = None\0A    expand_35 = torch.ops.aten.expand(transpose_42, [1, 8, 4, 64]);  transpose_42 = None\0A    view_121 = torch.ops.aten.view(expand_35, [8, 4, 64]);  expand_35 = None\0A    bmm_17 = torch.ops.aten.bmm(view_120, view_121);  view_120 = view_121 = None\0A    _unsafe_view_66 = torch.ops.aten._unsafe_view(bmm_17, [1, 8, 4, 64]);  bmm_17 = None\0A    transpose_44 = torch.ops.aten.transpose(_unsafe_view_66, 1, 2);  _unsafe_view_66 = None\0A    clone_9 = torch.ops.aten.clone(transpose_44, memory_format = torch.contiguous_format);  transpose_44 = None\0A    view_122 = torch.ops.aten.view(clone_9, [1, -1, 512]);  clone_9 = None\0A    _param_constant69 = self._param_constant69\0A    t_49 = torch.ops.aten.t(_param_constant69);  _param_constant69 = None\0A    view_123 = torch.ops.aten.view(view_122, [4, 512]);  view_122 = None\0A    mm_49 = torch.ops.aten.mm(view_123, t_49);  view_123 = t_49 = None\0A    _unsafe_view_67 = torch.ops.aten._unsafe_view(mm_49, [1, 4, 512]);  mm_49 = None\0A    add_39 = torch.ops.aten.add(add_37, _unsafe_view_67);  add_37 = _unsafe_view_67 = None\0A    pow_18 = torch.ops.aten.pow(add_39, 2)\0A    mean_17 = torch.ops.aten.mean(pow_18, [-1], True);  pow_18 = None\0A    add_40 = torch.ops.aten.add(mean_17, 1e-06);  mean_17 = None\0A    rsqrt_17 = torch.ops.aten.rsqrt(add_40);  add_40 = None\0A    detach_33 = torch.ops.aten.detach(rsqrt_17)\0A    mul_41 = torch.ops.aten.mul(add_39, rsqrt_17);  rsqrt_17 = None\0A    _param_constant70 = self._param_constant70\0A    mul_42 = torch.ops.aten.mul(_param_constant70, mul_41);  _param_constant70 = mul_41 = None\0A    _param_constant71 = self._param_constant71\0A    t_50 = torch.ops.aten.t(_param_constant71);  _param_constant71 = None\0A    view_124 = torch.ops.aten.view(mul_42, [4, 512]);  mul_42 = None\0A    mm_50 = torch.ops.aten.mm(view_124, t_50);  view_124 = t_50 = None\0A    _unsafe_view_68 = torch.ops.aten._unsafe_view(mm_50, [1, 4, 512]);  mm_50 = None\0A    view_125 = torch.ops.aten.view(_unsafe_view_68, [1, -1, 8, 64]);  _unsafe_view_68 = None\0A    transpose_45 = torch.ops.aten.transpose(view_125, 1, 2);  view_125 = None\0A    _param_constant72 = self._param_constant72\0A    t_51 = torch.ops.aten.t(_param_constant72);  _param_constant72 = None\0A    view_126 = torch.ops.aten.view(mul_28, [15, 512])\0A    mm_51 = torch.ops.aten.mm(view_126, t_51);  view_126 = t_51 = None\0A    _unsafe_view_69 = torch.ops.aten._unsafe_view(mm_51, [1, 15, 512]);  mm_51 = None\0A    view_127 = torch.ops.aten.view(_unsafe_view_69, [1, -1, 8, 64]);  _unsafe_view_69 = None\0A    transpose_46 = torch.ops.aten.transpose(view_127, 1, 2);  view_127 = None\0A    _param_constant73 = self._param_constant73\0A    t_52 = torch.ops.aten.t(_param_constant73);  _param_constant73 = None\0A    view_128 = torch.ops.aten.view(mul_28, [15, 512])\0A    mm_52 = torch.ops.aten.mm(view_128, t_52);  view_128 = t_52 = None\0A    _unsafe_view_70 = torch.ops.aten._unsafe_view(mm_52, [1, 15, 512]);  mm_52 = None\0A    view_129 = torch.ops.aten.view(_unsafe_view_70, [1, -1, 8, 64]);  _unsafe_view_70 = None\0A    transpose_47 = torch.ops.aten.transpose(view_129, 1, 2);  view_129 = None\0A    transpose_48 = torch.ops.aten.transpose(transpose_46, 3, 2);  transpose_46 = None\0A    expand_36 = torch.ops.aten.expand(transpose_45, [1, 8, 4, 64]);  transpose_45 = None\0A    view_130 = torch.ops.aten.view(expand_36, [8, 4, 64]);  expand_36 = None\0A    expand_37 = torch.ops.aten.expand(transpose_48, [1, 8, 64, 15]);  transpose_48 = None\0A    view_131 = torch.ops.aten.view(expand_37, [8, 64, 15]);  expand_37 = None\0A    bmm_18 = torch.ops.aten.bmm(view_130, view_131);  view_130 = view_131 = None\0A    _unsafe_view_71 = torch.ops.aten._unsafe_view(bmm_18, [1, 8, 4, 15]);  bmm_18 = None\0A    add__10 = torch.ops.aten.add_(_unsafe_view_71, add_34);  _unsafe_view_71 = None\0A    amax_9 = torch.ops.aten.amax(add__10, [-1], True)\0A    sub_11 = torch.ops.aten.sub(add__10, amax_9);  add__10 = amax_9 = None\0A    exp_9 = torch.ops.aten.exp(sub_11);  sub_11 = None\0A    sum_10 = torch.ops.aten.sum(exp_9, [-1], True)\0A    div_13 = torch.ops.aten.div(exp_9, sum_10);  exp_9 = sum_10 = None\0A    detach_34 = torch.ops.aten.detach(div_13)\0A    expand_38 = torch.ops.aten.expand(div_13, [1, 8, 4, 15]);  div_13 = None\0A    view_132 = torch.ops.aten.view(expand_38, [8, 4, 15]);  expand_38 = None\0A    expand_39 = torch.ops.aten.expand(transpose_47, [1, 8, 15, 64]);  transpose_47 = None\0A    view_133 = torch.ops.aten.view(expand_39, [8, 15, 64]);  expand_39 = None\0A    bmm_19 = torch.ops.aten.bmm(view_132, view_133);  view_132 = view_133 = None\0A    _unsafe_view_72 = torch.ops.aten._unsafe_view(bmm_19, [1, 8, 4, 64]);  bmm_19 = None\0A    transpose_49 = torch.ops.aten.transpose(_unsafe_view_72, 1, 2);  _unsafe_view_72 = None\0A    clone_10 = torch.ops.aten.clone(transpose_49, memory_format = torch.contiguous_format);  transpose_49 = None\0A    view_134 = torch.ops.aten.view(clone_10, [1, -1, 512]);  clone_10 = None\0A    _param_constant74 = self._param_constant74\0A    t_53 = torch.ops.aten.t(_param_constant74);  _param_constant74 = None\0A    view_135 = torch.ops.aten.view(view_134, [4, 512]);  view_134 = None\0A    mm_53 = torch.ops.aten.mm(view_135, t_53);  view_135 = t_53 = None\0A    _unsafe_view_73 = torch.ops.aten._unsafe_view(mm_53, [1, 4, 512]);  mm_53 = None\0A    add_41 = torch.ops.aten.add(add_39, _unsafe_view_73);  add_39 = _unsafe_view_73 = None\0A    pow_19 = torch.ops.aten.pow(add_41, 2)\0A    mean_18 = torch.ops.aten.mean(pow_19, [-1], True);  pow_19 = None\0A    add_42 = torch.ops.aten.add(mean_18, 1e-06);  mean_18 = None\0A    rsqrt_18 = torch.ops.aten.rsqrt(add_42);  add_42 = None\0A    detach_35 = torch.ops.aten.detach(rsqrt_18)\0A    mul_43 = torch.ops.aten.mul(add_41, rsqrt_18);  rsqrt_18 = None\0A    _param_constant75 = self._param_constant75\0A    mul_44 = torch.ops.aten.mul(_param_constant75, mul_43);  _param_constant75 = mul_43 = None\0A    _param_constant76 = self._param_constant76\0A    t_54 = torch.ops.aten.t(_param_constant76);  _param_constant76 = None\0A    view_136 = torch.ops.aten.view(mul_44, [4, 512]);  mul_44 = None\0A    mm_54 = torch.ops.aten.mm(view_136, t_54);  view_136 = t_54 = None\0A    _unsafe_view_74 = torch.ops.aten._unsafe_view(mm_54, [1, 4, 2048]);  mm_54 = None\0A    relu_7 = torch.ops.aten.relu(_unsafe_view_74);  _unsafe_view_74 = None\0A    detach_36 = torch.ops.aten.detach(relu_7)\0A    _param_constant77 = self._param_constant77\0A    t_55 = torch.ops.aten.t(_param_constant77);  _param_constant77 = None\0A    view_137 = torch.ops.aten.view(relu_7, [4, 2048]);  relu_7 = None\0A    mm_55 = torch.ops.aten.mm(view_137, t_55);  view_137 = t_55 = None\0A    _unsafe_view_75 = torch.ops.aten._unsafe_view(mm_55, [1, 4, 512]);  mm_55 = None\0A    add_43 = torch.ops.aten.add(add_41, _unsafe_view_75);  add_41 = _unsafe_view_75 = None\0A    pow_20 = torch.ops.aten.pow(add_43, 2)\0A    mean_19 = torch.ops.aten.mean(pow_20, [-1], True);  pow_20 = None\0A    add_44 = torch.ops.aten.add(mean_19, 1e-06);  mean_19 = None\0A    rsqrt_19 = torch.ops.aten.rsqrt(add_44);  add_44 = None\0A    detach_37 = torch.ops.aten.detach(rsqrt_19)\0A    mul_45 = torch.ops.aten.mul(add_43, rsqrt_19);  rsqrt_19 = None\0A    _param_constant78 = self._param_constant78\0A    mul_46 = torch.ops.aten.mul(_param_constant78, mul_45);  _param_constant78 = mul_45 = None\0A    _param_constant79 = self._param_constant79\0A    t_56 = torch.ops.aten.t(_param_constant79);  _param_constant79 = None\0A    view_138 = torch.ops.aten.view(mul_46, [4, 512])\0A    mm_56 = torch.ops.aten.mm(view_138, t_56);  view_138 = t_56 = None\0A    _unsafe_view_76 = torch.ops.aten._unsafe_view(mm_56, [1, 4, 512]);  mm_56 = None\0A    view_139 = torch.ops.aten.view(_unsafe_view_76, [1, -1, 8, 64]);  _unsafe_view_76 = None\0A    transpose_50 = torch.ops.aten.transpose(view_139, 1, 2);  view_139 = None\0A    _param_constant80 = self._param_constant80\0A    t_57 = torch.ops.aten.t(_param_constant80);  _param_constant80 = None\0A    view_140 = torch.ops.aten.view(mul_46, [4, 512])\0A    mm_57 = torch.ops.aten.mm(view_140, t_57);  view_140 = t_57 = None\0A    _unsafe_view_77 = torch.ops.aten._unsafe_view(mm_57, [1, 4, 512]);  mm_57 = None\0A    view_141 = torch.ops.aten.view(_unsafe_view_77, [1, -1, 8, 64]);  _unsafe_view_77 = None\0A    transpose_51 = torch.ops.aten.transpose(view_141, 1, 2);  view_141 = None\0A    _param_constant81 = self._param_constant81\0A    t_58 = torch.ops.aten.t(_param_constant81);  _param_constant81 = None\0A    view_142 = torch.ops.aten.view(mul_46, [4, 512]);  mul_46 = None\0A    mm_58 = torch.ops.aten.mm(view_142, t_58);  view_142 = t_58 = None\0A    _unsafe_view_78 = torch.ops.aten._unsafe_view(mm_58, [1, 4, 512]);  mm_58 = None\0A    view_143 = torch.ops.aten.view(_unsafe_view_78, [1, -1, 8, 64]);  _unsafe_view_78 = None\0A    transpose_52 = torch.ops.aten.transpose(view_143, 1, 2);  view_143 = None\0A    transpose_53 = torch.ops.aten.transpose(transpose_51, 3, 2);  transpose_51 = None\0A    expand_40 = torch.ops.aten.expand(transpose_50, [1, 8, 4, 64]);  transpose_50 = None\0A    view_144 = torch.ops.aten.view(expand_40, [8, 4, 64]);  expand_40 = None\0A    expand_41 = torch.ops.aten.expand(transpose_53, [1, 8, 64, 4]);  transpose_53 = None\0A    view_145 = torch.ops.aten.view(expand_41, [8, 64, 4]);  expand_41 = None\0A    bmm_20 = torch.ops.aten.bmm(view_144, view_145);  view_144 = view_145 = None\0A    _unsafe_view_79 = torch.ops.aten._unsafe_view(bmm_20, [1, 8, 4, 4]);  bmm_20 = None\0A    add__11 = torch.ops.aten.add_(_unsafe_view_79, add_31);  _unsafe_view_79 = None\0A    amax_10 = torch.ops.aten.amax(add__11, [-1], True)\0A    sub_12 = torch.ops.aten.sub(add__11, amax_10);  add__11 = amax_10 = None\0A    exp_10 = torch.ops.aten.exp(sub_12);  sub_12 = None\0A    sum_11 = torch.ops.aten.sum(exp_10, [-1], True)\0A    div_14 = torch.ops.aten.div(exp_10, sum_11);  exp_10 = sum_11 = None\0A    detach_38 = torch.ops.aten.detach(div_14)\0A    expand_42 = torch.ops.aten.expand(div_14, [1, 8, 4, 4]);  div_14 = None\0A    view_146 = torch.ops.aten.view(expand_42, [8, 4, 4]);  expand_42 = None\0A    expand_43 = torch.ops.aten.expand(transpose_52, [1, 8, 4, 64]);  transpose_52 = None\0A    view_147 = torch.ops.aten.view(expand_43, [8, 4, 64]);  expand_43 = None\0A    bmm_21 = torch.ops.aten.bmm(view_146, view_147);  view_146 = view_147 = None\0A    _unsafe_view_80 = torch.ops.aten._unsafe_view(bmm_21, [1, 8, 4, 64]);  bmm_21 = None\0A    transpose_54 = torch.ops.aten.transpose(_unsafe_view_80, 1, 2);  _unsafe_view_80 = None\0A    clone_11 = torch.ops.aten.clone(transpose_54, memory_format = torch.contiguous_format);  transpose_54 = None\0A    view_148 = torch.ops.aten.view(clone_11, [1, -1, 512]);  clone_11 = None\0A    _param_constant82 = self._param_constant82\0A    t_59 = torch.ops.aten.t(_param_constant82);  _param_constant82 = None\0A    view_149 = torch.ops.aten.view(view_148, [4, 512]);  view_148 = None\0A    mm_59 = torch.ops.aten.mm(view_149, t_59);  view_149 = t_59 = None\0A    _unsafe_view_81 = torch.ops.aten._unsafe_view(mm_59, [1, 4, 512]);  mm_59 = None\0A    add_45 = torch.ops.aten.add(add_43, _unsafe_view_81);  add_43 = _unsafe_view_81 = None\0A    pow_21 = torch.ops.aten.pow(add_45, 2)\0A    mean_20 = torch.ops.aten.mean(pow_21, [-1], True);  pow_21 = None\0A    add_46 = torch.ops.aten.add(mean_20, 1e-06);  mean_20 = None\0A    rsqrt_20 = torch.ops.aten.rsqrt(add_46);  add_46 = None\0A    detach_39 = torch.ops.aten.detach(rsqrt_20)\0A    mul_47 = torch.ops.aten.mul(add_45, rsqrt_20);  rsqrt_20 = None\0A    _param_constant83 = self._param_constant83\0A    mul_48 = torch.ops.aten.mul(_param_constant83, mul_47);  _param_constant83 = mul_47 = None\0A    _param_constant84 = self._param_constant84\0A    t_60 = torch.ops.aten.t(_param_constant84);  _param_constant84 = None\0A    view_150 = torch.ops.aten.view(mul_48, [4, 512]);  mul_48 = None\0A    mm_60 = torch.ops.aten.mm(view_150, t_60);  view_150 = t_60 = None\0A    _unsafe_view_82 = torch.ops.aten._unsafe_view(mm_60, [1, 4, 512]);  mm_60 = None\0A    view_151 = torch.ops.aten.view(_unsafe_view_82, [1, -1, 8, 64]);  _unsafe_view_82 = None\0A    transpose_55 = torch.ops.aten.transpose(view_151, 1, 2);  view_151 = None\0A    _param_constant85 = self._param_constant85\0A    t_61 = torch.ops.aten.t(_param_constant85);  _param_constant85 = None\0A    view_152 = torch.ops.aten.view(mul_28, [15, 512])\0A    mm_61 = torch.ops.aten.mm(view_152, t_61);  view_152 = t_61 = None\0A    _unsafe_view_83 = torch.ops.aten._unsafe_view(mm_61, [1, 15, 512]);  mm_61 = None\0A    view_153 = torch.ops.aten.view(_unsafe_view_83, [1, -1, 8, 64]);  _unsafe_view_83 = None\0A    transpose_56 = torch.ops.aten.transpose(view_153, 1, 2);  view_153 = None\0A    _param_constant86 = self._param_constant86\0A    t_62 = torch.ops.aten.t(_param_constant86);  _param_constant86 = None\0A    view_154 = torch.ops.aten.view(mul_28, [15, 512])\0A    mm_62 = torch.ops.aten.mm(view_154, t_62);  view_154 = t_62 = None\0A    _unsafe_view_84 = torch.ops.aten._unsafe_view(mm_62, [1, 15, 512]);  mm_62 = None\0A    view_155 = torch.ops.aten.view(_unsafe_view_84, [1, -1, 8, 64]);  _unsafe_view_84 = None\0A    transpose_57 = torch.ops.aten.transpose(view_155, 1, 2);  view_155 = None\0A    transpose_58 = torch.ops.aten.transpose(transpose_56, 3, 2);  transpose_56 = None\0A    expand_44 = torch.ops.aten.expand(transpose_55, [1, 8, 4, 64]);  transpose_55 = None\0A    view_156 = torch.ops.aten.view(expand_44, [8, 4, 64]);  expand_44 = None\0A    expand_45 = torch.ops.aten.expand(transpose_58, [1, 8, 64, 15]);  transpose_58 = None\0A    view_157 = torch.ops.aten.view(expand_45, [8, 64, 15]);  expand_45 = None\0A    bmm_22 = torch.ops.aten.bmm(view_156, view_157);  view_156 = view_157 = None\0A    _unsafe_view_85 = torch.ops.aten._unsafe_view(bmm_22, [1, 8, 4, 15]);  bmm_22 = None\0A    add__12 = torch.ops.aten.add_(_unsafe_view_85, add_34);  _unsafe_view_85 = None\0A    amax_11 = torch.ops.aten.amax(add__12, [-1], True)\0A    sub_13 = torch.ops.aten.sub(add__12, amax_11);  add__12 = amax_11 = None\0A    exp_11 = torch.ops.aten.exp(sub_13);  sub_13 = None\0A    sum_12 = torch.ops.aten.sum(exp_11, [-1], True)\0A    div_15 = torch.ops.aten.div(exp_11, sum_12);  exp_11 = sum_12 = None\0A    detach_40 = torch.ops.aten.detach(div_15)\0A    expand_46 = torch.ops.aten.expand(div_15, [1, 8, 4, 15]);  div_15 = None\0A    view_158 = torch.ops.aten.view(expand_46, [8, 4, 15]);  expand_46 = None\0A    expand_47 = torch.ops.aten.expand(transpose_57, [1, 8, 15, 64]);  transpose_57 = None\0A    view_159 = torch.ops.aten.view(expand_47, [8, 15, 64]);  expand_47 = None\0A    bmm_23 = torch.ops.aten.bmm(view_158, view_159);  view_158 = view_159 = None\0A    _unsafe_view_86 = torch.ops.aten._unsafe_view(bmm_23, [1, 8, 4, 64]);  bmm_23 = None\0A    transpose_59 = torch.ops.aten.transpose(_unsafe_view_86, 1, 2);  _unsafe_view_86 = None\0A    clone_12 = torch.ops.aten.clone(transpose_59, memory_format = torch.contiguous_format);  transpose_59 = None\0A    view_160 = torch.ops.aten.view(clone_12, [1, -1, 512]);  clone_12 = None\0A    _param_constant87 = self._param_constant87\0A    t_63 = torch.ops.aten.t(_param_constant87);  _param_constant87 = None\0A    view_161 = torch.ops.aten.view(view_160, [4, 512]);  view_160 = None\0A    mm_63 = torch.ops.aten.mm(view_161, t_63);  view_161 = t_63 = None\0A    _unsafe_view_87 = torch.ops.aten._unsafe_view(mm_63, [1, 4, 512]);  mm_63 = None\0A    add_47 = torch.ops.aten.add(add_45, _unsafe_view_87);  add_45 = _unsafe_view_87 = None\0A    pow_22 = torch.ops.aten.pow(add_47, 2)\0A    mean_21 = torch.ops.aten.mean(pow_22, [-1], True);  pow_22 = None\0A    add_48 = torch.ops.aten.add(mean_21, 1e-06);  mean_21 = None\0A    rsqrt_21 = torch.ops.aten.rsqrt(add_48);  add_48 = None\0A    detach_41 = torch.ops.aten.detach(rsqrt_21)\0A    mul_49 = torch.ops.aten.mul(add_47, rsqrt_21);  rsqrt_21 = None\0A    _param_constant88 = self._param_constant88\0A    mul_50 = torch.ops.aten.mul(_param_constant88, mul_49);  _param_constant88 = mul_49 = None\0A    _param_constant89 = self._param_constant89\0A    t_64 = torch.ops.aten.t(_param_constant89);  _param_constant89 = None\0A    view_162 = torch.ops.aten.view(mul_50, [4, 512]);  mul_50 = None\0A    mm_64 = torch.ops.aten.mm(view_162, t_64);  view_162 = t_64 = None\0A    _unsafe_view_88 = torch.ops.aten._unsafe_view(mm_64, [1, 4, 2048]);  mm_64 = None\0A    relu_8 = torch.ops.aten.relu(_unsafe_view_88);  _unsafe_view_88 = None\0A    detach_42 = torch.ops.aten.detach(relu_8)\0A    _param_constant90 = self._param_constant90\0A    t_65 = torch.ops.aten.t(_param_constant90);  _param_constant90 = None\0A    view_163 = torch.ops.aten.view(relu_8, [4, 2048]);  relu_8 = None\0A    mm_65 = torch.ops.aten.mm(view_163, t_65);  view_163 = t_65 = None\0A    _unsafe_view_89 = torch.ops.aten._unsafe_view(mm_65, [1, 4, 512]);  mm_65 = None\0A    add_49 = torch.ops.aten.add(add_47, _unsafe_view_89);  add_47 = _unsafe_view_89 = None\0A    pow_23 = torch.ops.aten.pow(add_49, 2)\0A    mean_22 = torch.ops.aten.mean(pow_23, [-1], True);  pow_23 = None\0A    add_50 = torch.ops.aten.add(mean_22, 1e-06);  mean_22 = None\0A    rsqrt_22 = torch.ops.aten.rsqrt(add_50);  add_50 = None\0A    detach_43 = torch.ops.aten.detach(rsqrt_22)\0A    mul_51 = torch.ops.aten.mul(add_49, rsqrt_22);  rsqrt_22 = None\0A    _param_constant91 = self._param_constant91\0A    mul_52 = torch.ops.aten.mul(_param_constant91, mul_51);  _param_constant91 = mul_51 = None\0A    _param_constant92 = self._param_constant92\0A    t_66 = torch.ops.aten.t(_param_constant92);  _param_constant92 = None\0A    view_164 = torch.ops.aten.view(mul_52, [4, 512])\0A    mm_66 = torch.ops.aten.mm(view_164, t_66);  view_164 = t_66 = None\0A    _unsafe_view_90 = torch.ops.aten._unsafe_view(mm_66, [1, 4, 512]);  mm_66 = None\0A    view_165 = torch.ops.aten.view(_unsafe_view_90, [1, -1, 8, 64]);  _unsafe_view_90 = None\0A    transpose_60 = torch.ops.aten.transpose(view_165, 1, 2);  view_165 = None\0A    _param_constant93 = self._param_constant93\0A    t_67 = torch.ops.aten.t(_param_constant93);  _param_constant93 = None\0A    view_166 = torch.ops.aten.view(mul_52, [4, 512])\0A    mm_67 = torch.ops.aten.mm(view_166, t_67);  view_166 = t_67 = None\0A    _unsafe_view_91 = torch.ops.aten._unsafe_view(mm_67, [1, 4, 512]);  mm_67 = None\0A    view_167 = torch.ops.aten.view(_unsafe_view_91, [1, -1, 8, 64]);  _unsafe_view_91 = None\0A    transpose_61 = torch.ops.aten.transpose(view_167, 1, 2);  view_167 = None\0A    _param_constant94 = self._param_constant94\0A    t_68 = torch.ops.aten.t(_param_constant94);  _param_constant94 = None\0A    view_168 = torch.ops.aten.view(mul_52, [4, 512]);  mul_52 = None\0A    mm_68 = torch.ops.aten.mm(view_168, t_68);  view_168 = t_68 = None\0A    _unsafe_view_92 = torch.ops.aten._unsafe_view(mm_68, [1, 4, 512]);  mm_68 = None\0A    view_169 = torch.ops.aten.view(_unsafe_view_92, [1, -1, 8, 64]);  _unsafe_view_92 = None\0A    transpose_62 = torch.ops.aten.transpose(view_169, 1, 2);  view_169 = None\0A    transpose_63 = torch.ops.aten.transpose(transpose_61, 3, 2);  transpose_61 = None\0A    expand_48 = torch.ops.aten.expand(transpose_60, [1, 8, 4, 64]);  transpose_60 = None\0A    view_170 = torch.ops.aten.view(expand_48, [8, 4, 64]);  expand_48 = None\0A    expand_49 = torch.ops.aten.expand(transpose_63, [1, 8, 64, 4]);  transpose_63 = None\0A    view_171 = torch.ops.aten.view(expand_49, [8, 64, 4]);  expand_49 = None\0A    bmm_24 = torch.ops.aten.bmm(view_170, view_171);  view_170 = view_171 = None\0A    _unsafe_view_93 = torch.ops.aten._unsafe_view(bmm_24, [1, 8, 4, 4]);  bmm_24 = None\0A    add__13 = torch.ops.aten.add_(_unsafe_view_93, add_31);  _unsafe_view_93 = None\0A    amax_12 = torch.ops.aten.amax(add__13, [-1], True)\0A    sub_14 = torch.ops.aten.sub(add__13, amax_12);  add__13 = amax_12 = None\0A    exp_12 = torch.ops.aten.exp(sub_14);  sub_14 = None\0A    sum_13 = torch.ops.aten.sum(exp_12, [-1], True)\0A    div_16 = torch.ops.aten.div(exp_12, sum_13);  exp_12 = sum_13 = None\0A    detach_44 = torch.ops.aten.detach(div_16)\0A    expand_50 = torch.ops.aten.expand(div_16, [1, 8, 4, 4]);  div_16 = None\0A    view_172 = torch.ops.aten.view(expand_50, [8, 4, 4]);  expand_50 = None\0A    expand_51 = torch.ops.aten.expand(transpose_62, [1, 8, 4, 64]);  transpose_62 = None\0A    view_173 = torch.ops.aten.view(expand_51, [8, 4, 64]);  expand_51 = None\0A    bmm_25 = torch.ops.aten.bmm(view_172, view_173);  view_172 = view_173 = None\0A    _unsafe_view_94 = torch.ops.aten._unsafe_view(bmm_25, [1, 8, 4, 64]);  bmm_25 = None\0A    transpose_64 = torch.ops.aten.transpose(_unsafe_view_94, 1, 2);  _unsafe_view_94 = None\0A    clone_13 = torch.ops.aten.clone(transpose_64, memory_format = torch.contiguous_format);  transpose_64 = None\0A    view_174 = torch.ops.aten.view(clone_13, [1, -1, 512]);  clone_13 = None\0A    _param_constant95 = self._param_constant95\0A    t_69 = torch.ops.aten.t(_param_constant95);  _param_constant95 = None\0A    view_175 = torch.ops.aten.view(view_174, [4, 512]);  view_174 = None\0A    mm_69 = torch.ops.aten.mm(view_175, t_69);  view_175 = t_69 = None\0A    _unsafe_view_95 = torch.ops.aten._unsafe_view(mm_69, [1, 4, 512]);  mm_69 = None\0A    add_51 = torch.ops.aten.add(add_49, _unsafe_view_95);  add_49 = _unsafe_view_95 = None\0A    pow_24 = torch.ops.aten.pow(add_51, 2)\0A    mean_23 = torch.ops.aten.mean(pow_24, [-1], True);  pow_24 = None\0A    add_52 = torch.ops.aten.add(mean_23, 1e-06);  mean_23 = None\0A    rsqrt_23 = torch.ops.aten.rsqrt(add_52);  add_52 = None\0A    detach_45 = torch.ops.aten.detach(rsqrt_23)\0A    mul_53 = torch.ops.aten.mul(add_51, rsqrt_23);  rsqrt_23 = None\0A    _param_constant96 = self._param_constant96\0A    mul_54 = torch.ops.aten.mul(_param_constant96, mul_53);  _param_constant96 = mul_53 = None\0A    _param_constant97 = self._param_constant97\0A    t_70 = torch.ops.aten.t(_param_constant97);  _param_constant97 = None\0A    view_176 = torch.ops.aten.view(mul_54, [4, 512]);  mul_54 = None\0A    mm_70 = torch.ops.aten.mm(view_176, t_70);  view_176 = t_70 = None\0A    _unsafe_view_96 = torch.ops.aten._unsafe_view(mm_70, [1, 4, 512]);  mm_70 = None\0A    view_177 = torch.ops.aten.view(_unsafe_view_96, [1, -1, 8, 64]);  _unsafe_view_96 = None\0A    transpose_65 = torch.ops.aten.transpose(view_177, 1, 2);  view_177 = None\0A    _param_constant98 = self._param_constant98\0A    t_71 = torch.ops.aten.t(_param_constant98);  _param_constant98 = None\0A    view_178 = torch.ops.aten.view(mul_28, [15, 512])\0A    mm_71 = torch.ops.aten.mm(view_178, t_71);  view_178 = t_71 = None\0A    _unsafe_view_97 = torch.ops.aten._unsafe_view(mm_71, [1, 15, 512]);  mm_71 = None\0A    view_179 = torch.ops.aten.view(_unsafe_view_97, [1, -1, 8, 64]);  _unsafe_view_97 = None\0A    transpose_66 = torch.ops.aten.transpose(view_179, 1, 2);  view_179 = None\0A    _param_constant99 = self._param_constant99\0A    t_72 = torch.ops.aten.t(_param_constant99);  _param_constant99 = None\0A    view_180 = torch.ops.aten.view(mul_28, [15, 512])\0A    mm_72 = torch.ops.aten.mm(view_180, t_72);  view_180 = t_72 = None\0A    _unsafe_view_98 = torch.ops.aten._unsafe_view(mm_72, [1, 15, 512]);  mm_72 = None\0A    view_181 = torch.ops.aten.view(_unsafe_view_98, [1, -1, 8, 64]);  _unsafe_view_98 = None\0A    transpose_67 = torch.ops.aten.transpose(view_181, 1, 2);  view_181 = None\0A    transpose_68 = torch.ops.aten.transpose(transpose_66, 3, 2);  transpose_66 = None\0A    expand_52 = torch.ops.aten.expand(transpose_65, [1, 8, 4, 64]);  transpose_65 = None\0A    view_182 = torch.ops.aten.view(expand_52, [8, 4, 64]);  expand_52 = None\0A    expand_53 = torch.ops.aten.expand(transpose_68, [1, 8, 64, 15]);  transpose_68 = None\0A    view_183 = torch.ops.aten.view(expand_53, [8, 64, 15]);  expand_53 = None\0A    bmm_26 = torch.ops.aten.bmm(view_182, view_183);  view_182 = view_183 = None\0A    _unsafe_view_99 = torch.ops.aten._unsafe_view(bmm_26, [1, 8, 4, 15]);  bmm_26 = None\0A    add__14 = torch.ops.aten.add_(_unsafe_view_99, add_34);  _unsafe_view_99 = None\0A    amax_13 = torch.ops.aten.amax(add__14, [-1], True)\0A    sub_15 = torch.ops.aten.sub(add__14, amax_13);  add__14 = amax_13 = None\0A    exp_13 = torch.ops.aten.exp(sub_15);  sub_15 = None\0A    sum_14 = torch.ops.aten.sum(exp_13, [-1], True)\0A    div_17 = torch.ops.aten.div(exp_13, sum_14);  exp_13 = sum_14 = None\0A    detach_46 = torch.ops.aten.detach(div_17)\0A    expand_54 = torch.ops.aten.expand(div_17, [1, 8, 4, 15]);  div_17 = None\0A    view_184 = torch.ops.aten.view(expand_54, [8, 4, 15]);  expand_54 = None\0A    expand_55 = torch.ops.aten.expand(transpose_67, [1, 8, 15, 64]);  transpose_67 = None\0A    view_185 = torch.ops.aten.view(expand_55, [8, 15, 64]);  expand_55 = None\0A    bmm_27 = torch.ops.aten.bmm(view_184, view_185);  view_184 = view_185 = None\0A    _unsafe_view_100 = torch.ops.aten._unsafe_view(bmm_27, [1, 8, 4, 64]);  bmm_27 = None\0A    transpose_69 = torch.ops.aten.transpose(_unsafe_view_100, 1, 2);  _unsafe_view_100 = None\0A    clone_14 = torch.ops.aten.clone(transpose_69, memory_format = torch.contiguous_format);  transpose_69 = None\0A    view_186 = torch.ops.aten.view(clone_14, [1, -1, 512]);  clone_14 = None\0A    _param_constant100 = self._param_constant100\0A    t_73 = torch.ops.aten.t(_param_constant100);  _param_constant100 = None\0A    view_187 = torch.ops.aten.view(view_186, [4, 512]);  view_186 = None\0A    mm_73 = torch.ops.aten.mm(view_187, t_73);  view_187 = t_73 = None\0A    _unsafe_view_101 = torch.ops.aten._unsafe_view(mm_73, [1, 4, 512]);  mm_73 = None\0A    add_53 = torch.ops.aten.add(add_51, _unsafe_view_101);  add_51 = _unsafe_view_101 = None\0A    pow_25 = torch.ops.aten.pow(add_53, 2)\0A    mean_24 = torch.ops.aten.mean(pow_25, [-1], True);  pow_25 = None\0A    add_54 = torch.ops.aten.add(mean_24, 1e-06);  mean_24 = None\0A    rsqrt_24 = torch.ops.aten.rsqrt(add_54);  add_54 = None\0A    detach_47 = torch.ops.aten.detach(rsqrt_24)\0A    mul_55 = torch.ops.aten.mul(add_53, rsqrt_24);  rsqrt_24 = None\0A    _param_constant101 = self._param_constant101\0A    mul_56 = torch.ops.aten.mul(_param_constant101, mul_55);  _param_constant101 = mul_55 = None\0A    _param_constant102 = self._param_constant102\0A    t_74 = torch.ops.aten.t(_param_constant102);  _param_constant102 = None\0A    view_188 = torch.ops.aten.view(mul_56, [4, 512]);  mul_56 = None\0A    mm_74 = torch.ops.aten.mm(view_188, t_74);  view_188 = t_74 = None\0A    _unsafe_view_102 = torch.ops.aten._unsafe_view(mm_74, [1, 4, 2048]);  mm_74 = None\0A    relu_9 = torch.ops.aten.relu(_unsafe_view_102);  _unsafe_view_102 = None\0A    detach_48 = torch.ops.aten.detach(relu_9)\0A    _param_constant103 = self._param_constant103\0A    t_75 = torch.ops.aten.t(_param_constant103);  _param_constant103 = None\0A    view_189 = torch.ops.aten.view(relu_9, [4, 2048]);  relu_9 = None\0A    mm_75 = torch.ops.aten.mm(view_189, t_75);  view_189 = t_75 = None\0A    _unsafe_view_103 = torch.ops.aten._unsafe_view(mm_75, [1, 4, 512]);  mm_75 = None\0A    add_55 = torch.ops.aten.add(add_53, _unsafe_view_103);  add_53 = _unsafe_view_103 = None\0A    pow_26 = torch.ops.aten.pow(add_55, 2)\0A    mean_25 = torch.ops.aten.mean(pow_26, [-1], True);  pow_26 = None\0A    add_56 = torch.ops.aten.add(mean_25, 1e-06);  mean_25 = None\0A    rsqrt_25 = torch.ops.aten.rsqrt(add_56);  add_56 = None\0A    detach_49 = torch.ops.aten.detach(rsqrt_25)\0A    mul_57 = torch.ops.aten.mul(add_55, rsqrt_25);  rsqrt_25 = None\0A    _param_constant104 = self._param_constant104\0A    mul_58 = torch.ops.aten.mul(_param_constant104, mul_57);  _param_constant104 = mul_57 = None\0A    _param_constant105 = self._param_constant105\0A    t_76 = torch.ops.aten.t(_param_constant105);  _param_constant105 = None\0A    view_190 = torch.ops.aten.view(mul_58, [4, 512])\0A    mm_76 = torch.ops.aten.mm(view_190, t_76);  view_190 = t_76 = None\0A    _unsafe_view_104 = torch.ops.aten._unsafe_view(mm_76, [1, 4, 512]);  mm_76 = None\0A    view_191 = torch.ops.aten.view(_unsafe_view_104, [1, -1, 8, 64]);  _unsafe_view_104 = None\0A    transpose_70 = torch.ops.aten.transpose(view_191, 1, 2);  view_191 = None\0A    _param_constant106 = self._param_constant106\0A    t_77 = torch.ops.aten.t(_param_constant106);  _param_constant106 = None\0A    view_192 = torch.ops.aten.view(mul_58, [4, 512])\0A    mm_77 = torch.ops.aten.mm(view_192, t_77);  view_192 = t_77 = None\0A    _unsafe_view_105 = torch.ops.aten._unsafe_view(mm_77, [1, 4, 512]);  mm_77 = None\0A    view_193 = torch.ops.aten.view(_unsafe_view_105, [1, -1, 8, 64]);  _unsafe_view_105 = None\0A    transpose_71 = torch.ops.aten.transpose(view_193, 1, 2);  view_193 = None\0A    _param_constant107 = self._param_constant107\0A    t_78 = torch.ops.aten.t(_param_constant107);  _param_constant107 = None\0A    view_194 = torch.ops.aten.view(mul_58, [4, 512]);  mul_58 = None\0A    mm_78 = torch.ops.aten.mm(view_194, t_78);  view_194 = t_78 = None\0A    _unsafe_view_106 = torch.ops.aten._unsafe_view(mm_78, [1, 4, 512]);  mm_78 = None\0A    view_195 = torch.ops.aten.view(_unsafe_view_106, [1, -1, 8, 64]);  _unsafe_view_106 = None\0A    transpose_72 = torch.ops.aten.transpose(view_195, 1, 2);  view_195 = None\0A    transpose_73 = torch.ops.aten.transpose(transpose_71, 3, 2);  transpose_71 = None\0A    expand_56 = torch.ops.aten.expand(transpose_70, [1, 8, 4, 64]);  transpose_70 = None\0A    view_196 = torch.ops.aten.view(expand_56, [8, 4, 64]);  expand_56 = None\0A    expand_57 = torch.ops.aten.expand(transpose_73, [1, 8, 64, 4]);  transpose_73 = None\0A    view_197 = torch.ops.aten.view(expand_57, [8, 64, 4]);  expand_57 = None\0A    bmm_28 = torch.ops.aten.bmm(view_196, view_197);  view_196 = view_197 = None\0A    _unsafe_view_107 = torch.ops.aten._unsafe_view(bmm_28, [1, 8, 4, 4]);  bmm_28 = None\0A    add__15 = torch.ops.aten.add_(_unsafe_view_107, add_31);  _unsafe_view_107 = None\0A    amax_14 = torch.ops.aten.amax(add__15, [-1], True)\0A    sub_16 = torch.ops.aten.sub(add__15, amax_14);  add__15 = amax_14 = None\0A    exp_14 = torch.ops.aten.exp(sub_16);  sub_16 = None\0A    sum_15 = torch.ops.aten.sum(exp_14, [-1], True)\0A    div_18 = torch.ops.aten.div(exp_14, sum_15);  exp_14 = sum_15 = None\0A    detach_50 = torch.ops.aten.detach(div_18)\0A    expand_58 = torch.ops.aten.expand(div_18, [1, 8, 4, 4]);  div_18 = None\0A    view_198 = torch.ops.aten.view(expand_58, [8, 4, 4]);  expand_58 = None\0A    expand_59 = torch.ops.aten.expand(transpose_72, [1, 8, 4, 64]);  transpose_72 = None\0A    view_199 = torch.ops.aten.view(expand_59, [8, 4, 64]);  expand_59 = None\0A    bmm_29 = torch.ops.aten.bmm(view_198, view_199);  view_198 = view_199 = None\0A    _unsafe_view_108 = torch.ops.aten._unsafe_view(bmm_29, [1, 8, 4, 64]);  bmm_29 = None\0A    transpose_74 = torch.ops.aten.transpose(_unsafe_view_108, 1, 2);  _unsafe_view_108 = None\0A    clone_15 = torch.ops.aten.clone(transpose_74, memory_format = torch.contiguous_format);  transpose_74 = None\0A    view_200 = torch.ops.aten.view(clone_15, [1, -1, 512]);  clone_15 = None\0A    _param_constant108 = self._param_constant108\0A    t_79 = torch.ops.aten.t(_param_constant108);  _param_constant108 = None\0A    view_201 = torch.ops.aten.view(view_200, [4, 512]);  view_200 = None\0A    mm_79 = torch.ops.aten.mm(view_201, t_79);  view_201 = t_79 = None\0A    _unsafe_view_109 = torch.ops.aten._unsafe_view(mm_79, [1, 4, 512]);  mm_79 = None\0A    add_57 = torch.ops.aten.add(add_55, _unsafe_view_109);  add_55 = _unsafe_view_109 = None\0A    pow_27 = torch.ops.aten.pow(add_57, 2)\0A    mean_26 = torch.ops.aten.mean(pow_27, [-1], True);  pow_27 = None\0A    add_58 = torch.ops.aten.add(mean_26, 1e-06);  mean_26 = None\0A    rsqrt_26 = torch.ops.aten.rsqrt(add_58);  add_58 = None\0A    detach_51 = torch.ops.aten.detach(rsqrt_26)\0A    mul_59 = torch.ops.aten.mul(add_57, rsqrt_26);  rsqrt_26 = None\0A    _param_constant109 = self._param_constant109\0A    mul_60 = torch.ops.aten.mul(_param_constant109, mul_59);  _param_constant109 = mul_59 = None\0A    _param_constant110 = self._param_constant110\0A    t_80 = torch.ops.aten.t(_param_constant110);  _param_constant110 = None\0A    view_202 = torch.ops.aten.view(mul_60, [4, 512]);  mul_60 = None\0A    mm_80 = torch.ops.aten.mm(view_202, t_80);  view_202 = t_80 = None\0A    _unsafe_view_110 = torch.ops.aten._unsafe_view(mm_80, [1, 4, 512]);  mm_80 = None\0A    view_203 = torch.ops.aten.view(_unsafe_view_110, [1, -1, 8, 64]);  _unsafe_view_110 = None\0A    transpose_75 = torch.ops.aten.transpose(view_203, 1, 2);  view_203 = None\0A    _param_constant111 = self._param_constant111\0A    t_81 = torch.ops.aten.t(_param_constant111);  _param_constant111 = None\0A    view_204 = torch.ops.aten.view(mul_28, [15, 512])\0A    mm_81 = torch.ops.aten.mm(view_204, t_81);  view_204 = t_81 = None\0A    _unsafe_view_111 = torch.ops.aten._unsafe_view(mm_81, [1, 15, 512]);  mm_81 = None\0A    view_205 = torch.ops.aten.view(_unsafe_view_111, [1, -1, 8, 64]);  _unsafe_view_111 = None\0A    transpose_76 = torch.ops.aten.transpose(view_205, 1, 2);  view_205 = None\0A    _param_constant112 = self._param_constant112\0A    t_82 = torch.ops.aten.t(_param_constant112);  _param_constant112 = None\0A    view_206 = torch.ops.aten.view(mul_28, [15, 512])\0A    mm_82 = torch.ops.aten.mm(view_206, t_82);  view_206 = t_82 = None\0A    _unsafe_view_112 = torch.ops.aten._unsafe_view(mm_82, [1, 15, 512]);  mm_82 = None\0A    view_207 = torch.ops.aten.view(_unsafe_view_112, [1, -1, 8, 64]);  _unsafe_view_112 = None\0A    transpose_77 = torch.ops.aten.transpose(view_207, 1, 2);  view_207 = None\0A    transpose_78 = torch.ops.aten.transpose(transpose_76, 3, 2);  transpose_76 = None\0A    expand_60 = torch.ops.aten.expand(transpose_75, [1, 8, 4, 64]);  transpose_75 = None\0A    view_208 = torch.ops.aten.view(expand_60, [8, 4, 64]);  expand_60 = None\0A    expand_61 = torch.ops.aten.expand(transpose_78, [1, 8, 64, 15]);  transpose_78 = None\0A    view_209 = torch.ops.aten.view(expand_61, [8, 64, 15]);  expand_61 = None\0A    bmm_30 = torch.ops.aten.bmm(view_208, view_209);  view_208 = view_209 = None\0A    _unsafe_view_113 = torch.ops.aten._unsafe_view(bmm_30, [1, 8, 4, 15]);  bmm_30 = None\0A    add__16 = torch.ops.aten.add_(_unsafe_view_113, add_34);  _unsafe_view_113 = None\0A    amax_15 = torch.ops.aten.amax(add__16, [-1], True)\0A    sub_17 = torch.ops.aten.sub(add__16, amax_15);  add__16 = amax_15 = None\0A    exp_15 = torch.ops.aten.exp(sub_17);  sub_17 = None\0A    sum_16 = torch.ops.aten.sum(exp_15, [-1], True)\0A    div_19 = torch.ops.aten.div(exp_15, sum_16);  exp_15 = sum_16 = None\0A    detach_52 = torch.ops.aten.detach(div_19)\0A    expand_62 = torch.ops.aten.expand(div_19, [1, 8, 4, 15]);  div_19 = None\0A    view_210 = torch.ops.aten.view(expand_62, [8, 4, 15]);  expand_62 = None\0A    expand_63 = torch.ops.aten.expand(transpose_77, [1, 8, 15, 64]);  transpose_77 = None\0A    view_211 = torch.ops.aten.view(expand_63, [8, 15, 64]);  expand_63 = None\0A    bmm_31 = torch.ops.aten.bmm(view_210, view_211);  view_210 = view_211 = None\0A    _unsafe_view_114 = torch.ops.aten._unsafe_view(bmm_31, [1, 8, 4, 64]);  bmm_31 = None\0A    transpose_79 = torch.ops.aten.transpose(_unsafe_view_114, 1, 2);  _unsafe_view_114 = None\0A    clone_16 = torch.ops.aten.clone(transpose_79, memory_format = torch.contiguous_format);  transpose_79 = None\0A    view_212 = torch.ops.aten.view(clone_16, [1, -1, 512]);  clone_16 = None\0A    _param_constant113 = self._param_constant113\0A    t_83 = torch.ops.aten.t(_param_constant113);  _param_constant113 = None\0A    view_213 = torch.ops.aten.view(view_212, [4, 512]);  view_212 = None\0A    mm_83 = torch.ops.aten.mm(view_213, t_83);  view_213 = t_83 = None\0A    _unsafe_view_115 = torch.ops.aten._unsafe_view(mm_83, [1, 4, 512]);  mm_83 = None\0A    add_59 = torch.ops.aten.add(add_57, _unsafe_view_115);  add_57 = _unsafe_view_115 = None\0A    pow_28 = torch.ops.aten.pow(add_59, 2)\0A    mean_27 = torch.ops.aten.mean(pow_28, [-1], True);  pow_28 = None\0A    add_60 = torch.ops.aten.add(mean_27, 1e-06);  mean_27 = None\0A    rsqrt_27 = torch.ops.aten.rsqrt(add_60);  add_60 = None\0A    detach_53 = torch.ops.aten.detach(rsqrt_27)\0A    mul_61 = torch.ops.aten.mul(add_59, rsqrt_27);  rsqrt_27 = None\0A    _param_constant114 = self._param_constant114\0A    mul_62 = torch.ops.aten.mul(_param_constant114, mul_61);  _param_constant114 = mul_61 = None\0A    _param_constant115 = self._param_constant115\0A    t_84 = torch.ops.aten.t(_param_constant115);  _param_constant115 = None\0A    view_214 = torch.ops.aten.view(mul_62, [4, 512]);  mul_62 = None\0A    mm_84 = torch.ops.aten.mm(view_214, t_84);  view_214 = t_84 = None\0A    _unsafe_view_116 = torch.ops.aten._unsafe_view(mm_84, [1, 4, 2048]);  mm_84 = None\0A    relu_10 = torch.ops.aten.relu(_unsafe_view_116);  _unsafe_view_116 = None\0A    detach_54 = torch.ops.aten.detach(relu_10)\0A    _param_constant116 = self._param_constant116\0A    t_85 = torch.ops.aten.t(_param_constant116);  _param_constant116 = None\0A    view_215 = torch.ops.aten.view(relu_10, [4, 2048]);  relu_10 = None\0A    mm_85 = torch.ops.aten.mm(view_215, t_85);  view_215 = t_85 = None\0A    _unsafe_view_117 = torch.ops.aten._unsafe_view(mm_85, [1, 4, 512]);  mm_85 = None\0A    add_61 = torch.ops.aten.add(add_59, _unsafe_view_117);  add_59 = _unsafe_view_117 = None\0A    pow_29 = torch.ops.aten.pow(add_61, 2)\0A    mean_28 = torch.ops.aten.mean(pow_29, [-1], True);  pow_29 = None\0A    add_62 = torch.ops.aten.add(mean_28, 1e-06);  mean_28 = None\0A    rsqrt_28 = torch.ops.aten.rsqrt(add_62);  add_62 = None\0A    detach_55 = torch.ops.aten.detach(rsqrt_28)\0A    mul_63 = torch.ops.aten.mul(add_61, rsqrt_28);  rsqrt_28 = None\0A    _param_constant117 = self._param_constant117\0A    mul_64 = torch.ops.aten.mul(_param_constant117, mul_63);  _param_constant117 = mul_63 = None\0A    _param_constant118 = self._param_constant118\0A    t_86 = torch.ops.aten.t(_param_constant118);  _param_constant118 = None\0A    view_216 = torch.ops.aten.view(mul_64, [4, 512])\0A    mm_86 = torch.ops.aten.mm(view_216, t_86);  view_216 = t_86 = None\0A    _unsafe_view_118 = torch.ops.aten._unsafe_view(mm_86, [1, 4, 512]);  mm_86 = None\0A    view_217 = torch.ops.aten.view(_unsafe_view_118, [1, -1, 8, 64]);  _unsafe_view_118 = None\0A    transpose_80 = torch.ops.aten.transpose(view_217, 1, 2);  view_217 = None\0A    _param_constant119 = self._param_constant119\0A    t_87 = torch.ops.aten.t(_param_constant119);  _param_constant119 = None\0A    view_218 = torch.ops.aten.view(mul_64, [4, 512])\0A    mm_87 = torch.ops.aten.mm(view_218, t_87);  view_218 = t_87 = None\0A    _unsafe_view_119 = torch.ops.aten._unsafe_view(mm_87, [1, 4, 512]);  mm_87 = None\0A    view_219 = torch.ops.aten.view(_unsafe_view_119, [1, -1, 8, 64]);  _unsafe_view_119 = None\0A    transpose_81 = torch.ops.aten.transpose(view_219, 1, 2);  view_219 = None\0A    _param_constant120 = self._param_constant120\0A    t_88 = torch.ops.aten.t(_param_constant120);  _param_constant120 = None\0A    view_220 = torch.ops.aten.view(mul_64, [4, 512]);  mul_64 = None\0A    mm_88 = torch.ops.aten.mm(view_220, t_88);  view_220 = t_88 = None\0A    _unsafe_view_120 = torch.ops.aten._unsafe_view(mm_88, [1, 4, 512]);  mm_88 = None\0A    view_221 = torch.ops.aten.view(_unsafe_view_120, [1, -1, 8, 64]);  _unsafe_view_120 = None\0A    transpose_82 = torch.ops.aten.transpose(view_221, 1, 2);  view_221 = None\0A    transpose_83 = torch.ops.aten.transpose(transpose_81, 3, 2);  transpose_81 = None\0A    expand_64 = torch.ops.aten.expand(transpose_80, [1, 8, 4, 64]);  transpose_80 = None\0A    view_222 = torch.ops.aten.view(expand_64, [8, 4, 64]);  expand_64 = None\0A    expand_65 = torch.ops.aten.expand(transpose_83, [1, 8, 64, 4]);  transpose_83 = None\0A    view_223 = torch.ops.aten.view(expand_65, [8, 64, 4]);  expand_65 = None\0A    bmm_32 = torch.ops.aten.bmm(view_222, view_223);  view_222 = view_223 = None\0A    _unsafe_view_121 = torch.ops.aten._unsafe_view(bmm_32, [1, 8, 4, 4]);  bmm_32 = None\0A    add__17 = torch.ops.aten.add_(_unsafe_view_121, add_31);  _unsafe_view_121 = add_31 = None\0A    amax_16 = torch.ops.aten.amax(add__17, [-1], True)\0A    sub_18 = torch.ops.aten.sub(add__17, amax_16);  add__17 = amax_16 = None\0A    exp_16 = torch.ops.aten.exp(sub_18);  sub_18 = None\0A    sum_17 = torch.ops.aten.sum(exp_16, [-1], True)\0A    div_20 = torch.ops.aten.div(exp_16, sum_17);  exp_16 = sum_17 = None\0A    detach_56 = torch.ops.aten.detach(div_20)\0A    expand_66 = torch.ops.aten.expand(div_20, [1, 8, 4, 4]);  div_20 = None\0A    view_224 = torch.ops.aten.view(expand_66, [8, 4, 4]);  expand_66 = None\0A    expand_67 = torch.ops.aten.expand(transpose_82, [1, 8, 4, 64]);  transpose_82 = None\0A    view_225 = torch.ops.aten.view(expand_67, [8, 4, 64]);  expand_67 = None\0A    bmm_33 = torch.ops.aten.bmm(view_224, view_225);  view_224 = view_225 = None\0A    _unsafe_view_122 = torch.ops.aten._unsafe_view(bmm_33, [1, 8, 4, 64]);  bmm_33 = None\0A    transpose_84 = torch.ops.aten.transpose(_unsafe_view_122, 1, 2);  _unsafe_view_122 = None\0A    clone_17 = torch.ops.aten.clone(transpose_84, memory_format = torch.contiguous_format);  transpose_84 = None\0A    view_226 = torch.ops.aten.view(clone_17, [1, -1, 512]);  clone_17 = None\0A    _param_constant121 = self._param_constant121\0A    t_89 = torch.ops.aten.t(_param_constant121);  _param_constant121 = None\0A    view_227 = torch.ops.aten.view(view_226, [4, 512]);  view_226 = None\0A    mm_89 = torch.ops.aten.mm(view_227, t_89);  view_227 = t_89 = None\0A    _unsafe_view_123 = torch.ops.aten._unsafe_view(mm_89, [1, 4, 512]);  mm_89 = None\0A    add_63 = torch.ops.aten.add(add_61, _unsafe_view_123);  add_61 = _unsafe_view_123 = None\0A    pow_30 = torch.ops.aten.pow(add_63, 2)\0A    mean_29 = torch.ops.aten.mean(pow_30, [-1], True);  pow_30 = None\0A    add_64 = torch.ops.aten.add(mean_29, 1e-06);  mean_29 = None\0A    rsqrt_29 = torch.ops.aten.rsqrt(add_64);  add_64 = None\0A    detach_57 = torch.ops.aten.detach(rsqrt_29)\0A    mul_65 = torch.ops.aten.mul(add_63, rsqrt_29);  rsqrt_29 = None\0A    _param_constant122 = self._param_constant122\0A    mul_66 = torch.ops.aten.mul(_param_constant122, mul_65);  _param_constant122 = mul_65 = None\0A    _param_constant123 = self._param_constant123\0A    t_90 = torch.ops.aten.t(_param_constant123);  _param_constant123 = None\0A    view_228 = torch.ops.aten.view(mul_66, [4, 512]);  mul_66 = None\0A    mm_90 = torch.ops.aten.mm(view_228, t_90);  view_228 = t_90 = None\0A    _unsafe_view_124 = torch.ops.aten._unsafe_view(mm_90, [1, 4, 512]);  mm_90 = None\0A    view_229 = torch.ops.aten.view(_unsafe_view_124, [1, -1, 8, 64]);  _unsafe_view_124 = None\0A    transpose_85 = torch.ops.aten.transpose(view_229, 1, 2);  view_229 = None\0A    _param_constant124 = self._param_constant124\0A    t_91 = torch.ops.aten.t(_param_constant124);  _param_constant124 = None\0A    view_230 = torch.ops.aten.view(mul_28, [15, 512])\0A    mm_91 = torch.ops.aten.mm(view_230, t_91);  view_230 = t_91 = None\0A    _unsafe_view_125 = torch.ops.aten._unsafe_view(mm_91, [1, 15, 512]);  mm_91 = None\0A    view_231 = torch.ops.aten.view(_unsafe_view_125, [1, -1, 8, 64]);  _unsafe_view_125 = None\0A    transpose_86 = torch.ops.aten.transpose(view_231, 1, 2);  view_231 = None\0A    _param_constant125 = self._param_constant125\0A    t_92 = torch.ops.aten.t(_param_constant125);  _param_constant125 = None\0A    view_232 = torch.ops.aten.view(mul_28, [15, 512]);  mul_28 = None\0A    mm_92 = torch.ops.aten.mm(view_232, t_92);  view_232 = t_92 = None\0A    _unsafe_view_126 = torch.ops.aten._unsafe_view(mm_92, [1, 15, 512]);  mm_92 = None\0A    view_233 = torch.ops.aten.view(_unsafe_view_126, [1, -1, 8, 64]);  _unsafe_view_126 = None\0A    transpose_87 = torch.ops.aten.transpose(view_233, 1, 2);  view_233 = None\0A    transpose_88 = torch.ops.aten.transpose(transpose_86, 3, 2);  transpose_86 = None\0A    expand_68 = torch.ops.aten.expand(transpose_85, [1, 8, 4, 64]);  transpose_85 = None\0A    view_234 = torch.ops.aten.view(expand_68, [8, 4, 64]);  expand_68 = None\0A    expand_69 = torch.ops.aten.expand(transpose_88, [1, 8, 64, 15]);  transpose_88 = None\0A    view_235 = torch.ops.aten.view(expand_69, [8, 64, 15]);  expand_69 = None\0A    bmm_34 = torch.ops.aten.bmm(view_234, view_235);  view_234 = view_235 = None\0A    _unsafe_view_127 = torch.ops.aten._unsafe_view(bmm_34, [1, 8, 4, 15]);  bmm_34 = None\0A    add__18 = torch.ops.aten.add_(_unsafe_view_127, add_34);  _unsafe_view_127 = add_34 = None\0A    amax_17 = torch.ops.aten.amax(add__18, [-1], True)\0A    sub_19 = torch.ops.aten.sub(add__18, amax_17);  add__18 = amax_17 = None\0A    exp_17 = torch.ops.aten.exp(sub_19);  sub_19 = None\0A    sum_18 = torch.ops.aten.sum(exp_17, [-1], True)\0A    div_21 = torch.ops.aten.div(exp_17, sum_18);  exp_17 = sum_18 = None\0A    detach_58 = torch.ops.aten.detach(div_21)\0A    expand_70 = torch.ops.aten.expand(div_21, [1, 8, 4, 15]);  div_21 = None\0A    view_236 = torch.ops.aten.view(expand_70, [8, 4, 15]);  expand_70 = None\0A    expand_71 = torch.ops.aten.expand(transpose_87, [1, 8, 15, 64]);  transpose_87 = None\0A    view_237 = torch.ops.aten.view(expand_71, [8, 15, 64]);  expand_71 = None\0A    bmm_35 = torch.ops.aten.bmm(view_236, view_237);  view_236 = view_237 = None\0A    _unsafe_view_128 = torch.ops.aten._unsafe_view(bmm_35, [1, 8, 4, 64]);  bmm_35 = None\0A    transpose_89 = torch.ops.aten.transpose(_unsafe_view_128, 1, 2);  _unsafe_view_128 = None\0A    clone_18 = torch.ops.aten.clone(transpose_89, memory_format = torch.contiguous_format);  transpose_89 = None\0A    view_238 = torch.ops.aten.view(clone_18, [1, -1, 512]);  clone_18 = None\0A    _param_constant126 = self._param_constant126\0A    t_93 = torch.ops.aten.t(_param_constant126);  _param_constant126 = None\0A    view_239 = torch.ops.aten.view(view_238, [4, 512]);  view_238 = None\0A    mm_93 = torch.ops.aten.mm(view_239, t_93);  view_239 = t_93 = None\0A    _unsafe_view_129 = torch.ops.aten._unsafe_view(mm_93, [1, 4, 512]);  mm_93 = None\0A    add_65 = torch.ops.aten.add(add_63, _unsafe_view_129);  add_63 = _unsafe_view_129 = None\0A    pow_31 = torch.ops.aten.pow(add_65, 2)\0A    mean_30 = torch.ops.aten.mean(pow_31, [-1], True);  pow_31 = None\0A    add_66 = torch.ops.aten.add(mean_30, 1e-06);  mean_30 = None\0A    rsqrt_30 = torch.ops.aten.rsqrt(add_66);  add_66 = None\0A    detach_59 = torch.ops.aten.detach(rsqrt_30)\0A    mul_67 = torch.ops.aten.mul(add_65, rsqrt_30);  rsqrt_30 = None\0A    _param_constant127 = self._param_constant127\0A    mul_68 = torch.ops.aten.mul(_param_constant127, mul_67);  _param_constant127 = mul_67 = None\0A    _param_constant128 = self._param_constant128\0A    t_94 = torch.ops.aten.t(_param_constant128);  _param_constant128 = None\0A    view_240 = torch.ops.aten.view(mul_68, [4, 512]);  mul_68 = None\0A    mm_94 = torch.ops.aten.mm(view_240, t_94);  view_240 = t_94 = None\0A    _unsafe_view_130 = torch.ops.aten._unsafe_view(mm_94, [1, 4, 2048]);  mm_94 = None\0A    relu_11 = torch.ops.aten.relu(_unsafe_view_130);  _unsafe_view_130 = None\0A    detach_60 = torch.ops.aten.detach(relu_11)\0A    _param_constant129 = self._param_constant129\0A    t_95 = torch.ops.aten.t(_param_constant129);  _param_constant129 = None\0A    view_241 = torch.ops.aten.view(relu_11, [4, 2048]);  relu_11 = None\0A    mm_95 = torch.ops.aten.mm(view_241, t_95);  view_241 = t_95 = None\0A    _unsafe_view_131 = torch.ops.aten._unsafe_view(mm_95, [1, 4, 512]);  mm_95 = None\0A    add_67 = torch.ops.aten.add(add_65, _unsafe_view_131);  add_65 = _unsafe_view_131 = None\0A    pow_32 = torch.ops.aten.pow(add_67, 2)\0A    mean_31 = torch.ops.aten.mean(pow_32, [-1], True);  pow_32 = None\0A    add_68 = torch.ops.aten.add(mean_31, 1e-06);  mean_31 = None\0A    rsqrt_31 = torch.ops.aten.rsqrt(add_68);  add_68 = None\0A    detach_61 = torch.ops.aten.detach(rsqrt_31)\0A    mul_69 = torch.ops.aten.mul(add_67, rsqrt_31);  add_67 = rsqrt_31 = None\0A    _param_constant130 = self._param_constant130\0A    mul_70 = torch.ops.aten.mul(_param_constant130, mul_69);  _param_constant130 = mul_69 = None\0A    mul_71 = torch.ops.aten.mul(mul_70, 0.04419417382415922);  mul_70 = None\0A    _param_constant0_2 = self._param_constant0\0A    t_96 = torch.ops.aten.t(_param_constant0_2);  _param_constant0_2 = None\0A    view_242 = torch.ops.aten.view(mul_71, [4, 512]);  mul_71 = None\0A    mm_96 = torch.ops.aten.mm(view_242, t_96);  view_242 = t_96 = None\0A    _unsafe_view_132 = torch.ops.aten._unsafe_view(mm_96, [1, 4, 32128]);  mm_96 = None\0A    return _unsafe_view_132\0A    "
  %132 = torch.nn_module {
    torch.slot "_param_constant0", %0 : !torch.tensor<[32128,512],f32>
    torch.slot "_param_constant1", %1 : !torch.tensor<[512],f32>
    torch.slot "_param_constant2", %2 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant3", %3 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant4", %4 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant5", %5 : !torch.tensor<[32,8],f32>
    torch.slot "_param_constant6", %6 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant7", %7 : !torch.tensor<[512],f32>
    torch.slot "_param_constant8", %8 : !torch.tensor<[2048,512],f32>
    torch.slot "_param_constant9", %9 : !torch.tensor<[512,2048],f32>
    torch.slot "_param_constant10", %10 : !torch.tensor<[512],f32>
    torch.slot "_param_constant11", %11 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant12", %12 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant13", %13 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant14", %14 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant15", %15 : !torch.tensor<[512],f32>
    torch.slot "_param_constant16", %16 : !torch.tensor<[2048,512],f32>
    torch.slot "_param_constant17", %17 : !torch.tensor<[512,2048],f32>
    torch.slot "_param_constant18", %18 : !torch.tensor<[512],f32>
    torch.slot "_param_constant19", %19 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant20", %20 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant21", %21 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant22", %22 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant23", %23 : !torch.tensor<[512],f32>
    torch.slot "_param_constant24", %24 : !torch.tensor<[2048,512],f32>
    torch.slot "_param_constant25", %25 : !torch.tensor<[512,2048],f32>
    torch.slot "_param_constant26", %26 : !torch.tensor<[512],f32>
    torch.slot "_param_constant27", %27 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant28", %28 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant29", %29 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant30", %30 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant31", %31 : !torch.tensor<[512],f32>
    torch.slot "_param_constant32", %32 : !torch.tensor<[2048,512],f32>
    torch.slot "_param_constant33", %33 : !torch.tensor<[512,2048],f32>
    torch.slot "_param_constant34", %34 : !torch.tensor<[512],f32>
    torch.slot "_param_constant35", %35 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant36", %36 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant37", %37 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant38", %38 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant39", %39 : !torch.tensor<[512],f32>
    torch.slot "_param_constant40", %40 : !torch.tensor<[2048,512],f32>
    torch.slot "_param_constant41", %41 : !torch.tensor<[512,2048],f32>
    torch.slot "_param_constant42", %42 : !torch.tensor<[512],f32>
    torch.slot "_param_constant43", %43 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant44", %44 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant45", %45 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant46", %46 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant47", %47 : !torch.tensor<[512],f32>
    torch.slot "_param_constant48", %48 : !torch.tensor<[2048,512],f32>
    torch.slot "_param_constant49", %49 : !torch.tensor<[512,2048],f32>
    torch.slot "_param_constant50", %50 : !torch.tensor<[512],f32>
    torch.slot "_param_constant51", %51 : !torch.tensor<[512],f32>
    torch.slot "_param_constant52", %52 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant53", %53 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant54", %54 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant55", %55 : !torch.tensor<[32,8],f32>
    torch.slot "_param_constant56", %56 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant57", %57 : !torch.tensor<[512],f32>
    torch.slot "_param_constant58", %58 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant59", %59 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant60", %60 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant61", %61 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant62", %62 : !torch.tensor<[512],f32>
    torch.slot "_param_constant63", %63 : !torch.tensor<[2048,512],f32>
    torch.slot "_param_constant64", %64 : !torch.tensor<[512,2048],f32>
    torch.slot "_param_constant65", %65 : !torch.tensor<[512],f32>
    torch.slot "_param_constant66", %66 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant67", %67 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant68", %68 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant69", %69 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant70", %70 : !torch.tensor<[512],f32>
    torch.slot "_param_constant71", %71 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant72", %72 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant73", %73 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant74", %74 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant75", %75 : !torch.tensor<[512],f32>
    torch.slot "_param_constant76", %76 : !torch.tensor<[2048,512],f32>
    torch.slot "_param_constant77", %77 : !torch.tensor<[512,2048],f32>
    torch.slot "_param_constant78", %78 : !torch.tensor<[512],f32>
    torch.slot "_param_constant79", %79 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant80", %80 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant81", %81 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant82", %82 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant83", %83 : !torch.tensor<[512],f32>
    torch.slot "_param_constant84", %84 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant85", %85 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant86", %86 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant87", %87 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant88", %88 : !torch.tensor<[512],f32>
    torch.slot "_param_constant89", %89 : !torch.tensor<[2048,512],f32>
    torch.slot "_param_constant90", %90 : !torch.tensor<[512,2048],f32>
    torch.slot "_param_constant91", %91 : !torch.tensor<[512],f32>
    torch.slot "_param_constant92", %92 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant93", %93 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant94", %94 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant95", %95 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant96", %96 : !torch.tensor<[512],f32>
    torch.slot "_param_constant97", %97 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant98", %98 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant99", %99 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant100", %100 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant101", %101 : !torch.tensor<[512],f32>
    torch.slot "_param_constant102", %102 : !torch.tensor<[2048,512],f32>
    torch.slot "_param_constant103", %103 : !torch.tensor<[512,2048],f32>
    torch.slot "_param_constant104", %104 : !torch.tensor<[512],f32>
    torch.slot "_param_constant105", %105 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant106", %106 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant107", %107 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant108", %108 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant109", %109 : !torch.tensor<[512],f32>
    torch.slot "_param_constant110", %110 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant111", %111 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant112", %112 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant113", %113 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant114", %114 : !torch.tensor<[512],f32>
    torch.slot "_param_constant115", %115 : !torch.tensor<[2048,512],f32>
    torch.slot "_param_constant116", %116 : !torch.tensor<[512,2048],f32>
    torch.slot "_param_constant117", %117 : !torch.tensor<[512],f32>
    torch.slot "_param_constant118", %118 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant119", %119 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant120", %120 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant121", %121 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant122", %122 : !torch.tensor<[512],f32>
    torch.slot "_param_constant123", %123 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant124", %124 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant125", %125 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant126", %126 : !torch.tensor<[512,512],f32>
    torch.slot "_param_constant127", %127 : !torch.tensor<[512],f32>
    torch.slot "_param_constant128", %128 : !torch.tensor<[2048,512],f32>
    torch.slot "_param_constant129", %129 : !torch.tensor<[512,2048],f32>
    torch.slot "_param_constant130", %130 : !torch.tensor<[512],f32>
    torch.slot "_tensor_constant0", %131 : !torch.tensor<[],si64>
    torch.slot "training", %true : !torch.bool
    torch.slot "_is_full_backward_hook", %none : !torch.none
    torch.slot "_code", %str : !torch.str
  } : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda">
 }