Skip to content

Instantly share code, notes, and snippets.

@AmosLewis
Created January 2, 2023 05:37
Show Gist options
  • Save AmosLewis/d5ca2b2d4fe3aee29add60db32a8b987 to your computer and use it in GitHub Desktop.
Save AmosLewis/d5ca2b2d4fe3aee29add60db32a8b987 to your computer and use it in GitHub Desktop.
module attributes {torch.debug_module_name = "_lambda"} {
func.func private @__torch__.torch.fx.graph_module._lambda.__code_getter(%arg0: !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda">) -> !torch.str {
%186 = torch.prim.GetAttr %arg0["_code"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.str
return %186 : !torch.str
}
func.func private @__torch__.torch.fx.graph_module._lambda.forward(%arg0: !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda">, %arg1: !torch.tensor {torch.type_bound = !torch.vtensor<[1,5],si64>}) -> !torch.tensor {
%int11 = torch.constant.int 11
%int-2 = torch.constant.int -2
%none_0 = torch.constant.none
%false = torch.constant.bool false
%cpu = torch.constant.device "cpu"
%int4 = torch.constant.int 4
%int-1 = torch.constant.int -1
%int1 = torch.constant.int 1
%int5 = torch.constant.int 5
%int0 = torch.constant.int 0
%int768 = torch.constant.int 768
%float1.000000e-05 = torch.constant.float 1.000000e-05
%int2 = torch.constant.int 2
%int2304 = torch.constant.int 2304
%int1536 = torch.constant.int 1536
%int12 = torch.constant.int 12
%int64 = torch.constant.int 64
%int3 = torch.constant.int 3
%int9223372036854775807 = torch.constant.int 9223372036854775807
%int3072 = torch.constant.int 3072
%float5.000000e-01 = torch.constant.float 5.000000e-01
%float3.000000e00 = torch.constant.float 3.000000e+00
%float4.471500e-02 = torch.constant.float 4.471500e-02
%float7.978850e-01 = torch.constant.float 0.79788456080286541
%float1.000000e00 = torch.constant.float 1.000000e+00
%int50257 = torch.constant.int 50257
%186 = torch.prim.ListConstruct %int-1, %int5 : (!torch.int, !torch.int) -> !torch.list<int>
%187 = torch.aten.view %arg1, %186 : !torch.tensor, !torch.list<int> -> !torch.tensor
%188 = torch.aten.arange.start %int0, %int5, %int4, %none_0, %cpu, %false : !torch.int, !torch.int, !torch.int, !torch.none, !torch.Device, !torch.bool -> !torch.tensor
%189 = torch.aten.unsqueeze %188, %int0 : !torch.tensor, !torch.int -> !torch.tensor
%190 = torch.prim.ListConstruct %int-1, %int5 : (!torch.int, !torch.int) -> !torch.list<int>
%191 = torch.aten.view %189, %190 : !torch.tensor, !torch.list<int> -> !torch.tensor
%192 = torch.prim.GetAttr %arg0["_param_constant0"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%193 = torch.aten.embedding %192, %187, %int-1, %false, %false : !torch.tensor, !torch.tensor, !torch.int, !torch.bool, !torch.bool -> !torch.tensor
%194 = torch.prim.GetAttr %arg0["_param_constant1"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%195 = torch.aten.embedding %194, %191, %int-1, %false, %false : !torch.tensor, !torch.tensor, !torch.int, !torch.bool, !torch.bool -> !torch.tensor
%196 = torch.aten.add.Tensor %193, %195, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%197 = torch.prim.GetAttr %arg0["_param_constant2"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%198 = torch.prim.GetAttr %arg0["_param_constant3"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%199 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<int>
%result0, %result1, %result2 = torch.aten.native_layer_norm %196, %199, %197, %198, %float1.000000e-05 : !torch.tensor, !torch.list<int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%200 = torch.prim.ListConstruct %int-1, %int768 : (!torch.int, !torch.int) -> !torch.list<int>
%201 = torch.aten.view %result0, %200 : !torch.tensor, !torch.list<int> -> !torch.tensor
%202 = torch.prim.GetAttr %arg0["_param_constant4"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%203 = torch.prim.GetAttr %arg0["_param_constant5"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%204 = torch.aten.addmm %202, %201, %203, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%205 = torch.prim.ListConstruct %int1, %int5, %int2304 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%206 = torch.aten.view %204, %205 : !torch.tensor, !torch.list<int> -> !torch.tensor
%207 = torch.aten.slice.Tensor %206, %int2, %int0, %int768, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%208 = torch.aten.slice.Tensor %206, %int2, %int768, %int1536, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%209 = torch.aten.slice.Tensor %206, %int2, %int1536, %int2304, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%210 = torch.prim.ListConstruct %int1, %int5, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%211 = torch.aten.view %207, %210 : !torch.tensor, !torch.list<int> -> !torch.tensor
%212 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%213 = torch.aten.permute %211, %212 : !torch.tensor, !torch.list<int> -> !torch.tensor
%214 = torch.prim.ListConstruct %int1, %int5, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%215 = torch.aten.view %208, %214 : !torch.tensor, !torch.list<int> -> !torch.tensor
%216 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%217 = torch.aten.permute %215, %216 : !torch.tensor, !torch.list<int> -> !torch.tensor
%218 = torch.prim.ListConstruct %int1, %int5, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%219 = torch.aten.view %209, %218 : !torch.tensor, !torch.list<int> -> !torch.tensor
%220 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%221 = torch.aten.permute %219, %220 : !torch.tensor, !torch.list<int> -> !torch.tensor
%222 = torch.aten.transpose.int %217, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%223 = torch.prim.ListConstruct %int1, %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%224 = torch.aten.expand %213, %223, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%225 = torch.prim.ListConstruct %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%226 = torch.aten.view %224, %225 : !torch.tensor, !torch.list<int> -> !torch.tensor
%227 = torch.prim.ListConstruct %int1, %int12, %int64, %int5 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%228 = torch.aten.expand %222, %227, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%229 = torch.prim.ListConstruct %int12, %int64, %int5 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%230 = torch.aten.view %228, %229 : !torch.tensor, !torch.list<int> -> !torch.tensor
%231 = torch.aten.bmm %226, %230 : !torch.tensor, !torch.tensor -> !torch.tensor
%232 = torch.prim.ListConstruct %int1, %int12, %int5, %int5 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%233 = torch.aten._unsafe_view %231, %232 : !torch.tensor, !torch.list<int> -> !torch.tensor
%234 = torch.prim.GetAttr %arg0["_tensor_constant0"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%235 = torch.aten.lift_fresh_copy %234 : !torch.tensor -> !torch.tensor
%236 = torch.aten.div.Tensor %233, %235 : !torch.tensor, !torch.tensor -> !torch.tensor
%237 = torch.prim.GetAttr %arg0["_tensor_constant1"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%238 = torch.aten.slice.Tensor %237, %int0, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%239 = torch.aten.slice.Tensor %238, %int1, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%240 = torch.aten.slice.Tensor %239, %int2, %int0, %int5, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%241 = torch.aten.slice.Tensor %240, %int3, %int0, %int5, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%242 = torch.aten._to_copy %241, %int11, %none_0, %none_0, %none_0, %false, %none_0 : !torch.tensor, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.tensor
%243 = torch.prim.GetAttr %arg0["_tensor_constant2"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%244 = torch.aten.lift_fresh_copy %243 : !torch.tensor -> !torch.tensor
%245 = torch.aten.where.self %242, %236, %244 : !torch.tensor, !torch.tensor, !torch.tensor -> !torch.tensor
%246 = torch.aten._softmax %245, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%247 = torch.prim.ListConstruct %int1, %int12, %int5, %int5 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%248 = torch.aten.expand %246, %247, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%249 = torch.prim.ListConstruct %int12, %int5, %int5 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%250 = torch.aten.view %248, %249 : !torch.tensor, !torch.list<int> -> !torch.tensor
%251 = torch.prim.ListConstruct %int1, %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%252 = torch.aten.expand %221, %251, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%253 = torch.prim.ListConstruct %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%254 = torch.aten.view %252, %253 : !torch.tensor, !torch.list<int> -> !torch.tensor
%255 = torch.aten.bmm %250, %254 : !torch.tensor, !torch.tensor -> !torch.tensor
%256 = torch.prim.ListConstruct %int1, %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%257 = torch.aten._unsafe_view %255, %256 : !torch.tensor, !torch.list<int> -> !torch.tensor
%258 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%259 = torch.aten.permute %257, %258 : !torch.tensor, !torch.list<int> -> !torch.tensor
%260 = torch.aten.clone %259, %int0 : !torch.tensor, !torch.int -> !torch.tensor
%261 = torch.prim.ListConstruct %int1, %int5, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%262 = torch.aten.view %260, %261 : !torch.tensor, !torch.list<int> -> !torch.tensor
%263 = torch.prim.ListConstruct %int-1, %int768 : (!torch.int, !torch.int) -> !torch.list<int>
%264 = torch.aten.view %262, %263 : !torch.tensor, !torch.list<int> -> !torch.tensor
%265 = torch.prim.GetAttr %arg0["_param_constant6"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%266 = torch.prim.GetAttr %arg0["_param_constant7"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%267 = torch.aten.addmm %265, %264, %266, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%268 = torch.prim.ListConstruct %int1, %int5, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%269 = torch.aten.view %267, %268 : !torch.tensor, !torch.list<int> -> !torch.tensor
%270 = torch.aten.add.Tensor %269, %196, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%271 = torch.prim.GetAttr %arg0["_param_constant8"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%272 = torch.prim.GetAttr %arg0["_param_constant9"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%273 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<int>
%result0_1, %result1_2, %result2_3 = torch.aten.native_layer_norm %270, %273, %271, %272, %float1.000000e-05 : !torch.tensor, !torch.list<int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%274 = torch.prim.ListConstruct %int-1, %int768 : (!torch.int, !torch.int) -> !torch.list<int>
%275 = torch.aten.view %result0_1, %274 : !torch.tensor, !torch.list<int> -> !torch.tensor
%276 = torch.prim.GetAttr %arg0["_param_constant10"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%277 = torch.prim.GetAttr %arg0["_param_constant11"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%278 = torch.aten.addmm %276, %275, %277, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%279 = torch.prim.ListConstruct %int1, %int5, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%280 = torch.aten.view %278, %279 : !torch.tensor, !torch.list<int> -> !torch.tensor
%281 = torch.aten.mul.Scalar %280, %float5.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%282 = torch.aten.pow.Tensor_Scalar %280, %float3.000000e00 : !torch.tensor, !torch.float -> !torch.tensor
%283 = torch.aten.mul.Scalar %282, %float4.471500e-02 : !torch.tensor, !torch.float -> !torch.tensor
%284 = torch.aten.add.Tensor %280, %283, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%285 = torch.aten.mul.Scalar %284, %float7.978850e-01 : !torch.tensor, !torch.float -> !torch.tensor
%286 = torch.aten.tanh %285 : !torch.tensor -> !torch.tensor
%287 = torch.aten.add.Scalar %286, %float1.000000e00, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
%288 = torch.aten.mul.Tensor %281, %287 : !torch.tensor, !torch.tensor -> !torch.tensor
%289 = torch.prim.ListConstruct %int-1, %int3072 : (!torch.int, !torch.int) -> !torch.list<int>
%290 = torch.aten.view %288, %289 : !torch.tensor, !torch.list<int> -> !torch.tensor
%291 = torch.prim.GetAttr %arg0["_param_constant12"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%292 = torch.prim.GetAttr %arg0["_param_constant13"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%293 = torch.aten.addmm %291, %290, %292, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%294 = torch.prim.ListConstruct %int1, %int5, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%295 = torch.aten.view %293, %294 : !torch.tensor, !torch.list<int> -> !torch.tensor
%296 = torch.aten.add.Tensor %270, %295, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%297 = torch.prim.GetAttr %arg0["_param_constant14"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%298 = torch.prim.GetAttr %arg0["_param_constant15"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%299 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<int>
%result0_4, %result1_5, %result2_6 = torch.aten.native_layer_norm %296, %299, %297, %298, %float1.000000e-05 : !torch.tensor, !torch.list<int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%300 = torch.prim.ListConstruct %int-1, %int768 : (!torch.int, !torch.int) -> !torch.list<int>
%301 = torch.aten.view %result0_4, %300 : !torch.tensor, !torch.list<int> -> !torch.tensor
%302 = torch.prim.GetAttr %arg0["_param_constant16"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%303 = torch.prim.GetAttr %arg0["_param_constant17"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%304 = torch.aten.addmm %302, %301, %303, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%305 = torch.prim.ListConstruct %int1, %int5, %int2304 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%306 = torch.aten.view %304, %305 : !torch.tensor, !torch.list<int> -> !torch.tensor
%307 = torch.aten.slice.Tensor %306, %int2, %int0, %int768, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%308 = torch.aten.slice.Tensor %306, %int2, %int768, %int1536, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%309 = torch.aten.slice.Tensor %306, %int2, %int1536, %int2304, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%310 = torch.prim.ListConstruct %int1, %int5, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%311 = torch.aten.view %307, %310 : !torch.tensor, !torch.list<int> -> !torch.tensor
%312 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%313 = torch.aten.permute %311, %312 : !torch.tensor, !torch.list<int> -> !torch.tensor
%314 = torch.prim.ListConstruct %int1, %int5, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%315 = torch.aten.view %308, %314 : !torch.tensor, !torch.list<int> -> !torch.tensor
%316 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%317 = torch.aten.permute %315, %316 : !torch.tensor, !torch.list<int> -> !torch.tensor
%318 = torch.prim.ListConstruct %int1, %int5, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%319 = torch.aten.view %309, %318 : !torch.tensor, !torch.list<int> -> !torch.tensor
%320 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%321 = torch.aten.permute %319, %320 : !torch.tensor, !torch.list<int> -> !torch.tensor
%322 = torch.aten.transpose.int %317, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%323 = torch.prim.ListConstruct %int1, %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%324 = torch.aten.expand %313, %323, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%325 = torch.prim.ListConstruct %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%326 = torch.aten.view %324, %325 : !torch.tensor, !torch.list<int> -> !torch.tensor
%327 = torch.prim.ListConstruct %int1, %int12, %int64, %int5 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%328 = torch.aten.expand %322, %327, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%329 = torch.prim.ListConstruct %int12, %int64, %int5 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%330 = torch.aten.view %328, %329 : !torch.tensor, !torch.list<int> -> !torch.tensor
%331 = torch.aten.bmm %326, %330 : !torch.tensor, !torch.tensor -> !torch.tensor
%332 = torch.prim.ListConstruct %int1, %int12, %int5, %int5 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%333 = torch.aten._unsafe_view %331, %332 : !torch.tensor, !torch.list<int> -> !torch.tensor
%334 = torch.prim.GetAttr %arg0["_tensor_constant3"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%335 = torch.aten.lift_fresh_copy %334 : !torch.tensor -> !torch.tensor
%336 = torch.aten.div.Tensor %333, %335 : !torch.tensor, !torch.tensor -> !torch.tensor
%337 = torch.prim.GetAttr %arg0["_tensor_constant4"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%338 = torch.aten.slice.Tensor %337, %int0, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%339 = torch.aten.slice.Tensor %338, %int1, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%340 = torch.aten.slice.Tensor %339, %int2, %int0, %int5, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%341 = torch.aten.slice.Tensor %340, %int3, %int0, %int5, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%342 = torch.aten._to_copy %341, %int11, %none_0, %none_0, %none_0, %false, %none_0 : !torch.tensor, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.tensor
%343 = torch.prim.GetAttr %arg0["_tensor_constant5"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%344 = torch.aten.lift_fresh_copy %343 : !torch.tensor -> !torch.tensor
%345 = torch.aten.where.self %342, %336, %344 : !torch.tensor, !torch.tensor, !torch.tensor -> !torch.tensor
%346 = torch.aten._softmax %345, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%347 = torch.prim.ListConstruct %int1, %int12, %int5, %int5 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%348 = torch.aten.expand %346, %347, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%349 = torch.prim.ListConstruct %int12, %int5, %int5 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%350 = torch.aten.view %348, %349 : !torch.tensor, !torch.list<int> -> !torch.tensor
%351 = torch.prim.ListConstruct %int1, %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%352 = torch.aten.expand %321, %351, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%353 = torch.prim.ListConstruct %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%354 = torch.aten.view %352, %353 : !torch.tensor, !torch.list<int> -> !torch.tensor
%355 = torch.aten.bmm %350, %354 : !torch.tensor, !torch.tensor -> !torch.tensor
%356 = torch.prim.ListConstruct %int1, %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%357 = torch.aten._unsafe_view %355, %356 : !torch.tensor, !torch.list<int> -> !torch.tensor
%358 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%359 = torch.aten.permute %357, %358 : !torch.tensor, !torch.list<int> -> !torch.tensor
%360 = torch.aten.clone %359, %int0 : !torch.tensor, !torch.int -> !torch.tensor
%361 = torch.prim.ListConstruct %int1, %int5, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%362 = torch.aten.view %360, %361 : !torch.tensor, !torch.list<int> -> !torch.tensor
%363 = torch.prim.ListConstruct %int-1, %int768 : (!torch.int, !torch.int) -> !torch.list<int>
%364 = torch.aten.view %362, %363 : !torch.tensor, !torch.list<int> -> !torch.tensor
%365 = torch.prim.GetAttr %arg0["_param_constant18"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%366 = torch.prim.GetAttr %arg0["_param_constant19"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%367 = torch.aten.addmm %365, %364, %366, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%368 = torch.prim.ListConstruct %int1, %int5, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%369 = torch.aten.view %367, %368 : !torch.tensor, !torch.list<int> -> !torch.tensor
%370 = torch.aten.add.Tensor %369, %296, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%371 = torch.prim.GetAttr %arg0["_param_constant20"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%372 = torch.prim.GetAttr %arg0["_param_constant21"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%373 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<int>
%result0_7, %result1_8, %result2_9 = torch.aten.native_layer_norm %370, %373, %371, %372, %float1.000000e-05 : !torch.tensor, !torch.list<int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%374 = torch.prim.ListConstruct %int-1, %int768 : (!torch.int, !torch.int) -> !torch.list<int>
%375 = torch.aten.view %result0_7, %374 : !torch.tensor, !torch.list<int> -> !torch.tensor
%376 = torch.prim.GetAttr %arg0["_param_constant22"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%377 = torch.prim.GetAttr %arg0["_param_constant23"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%378 = torch.aten.addmm %376, %375, %377, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%379 = torch.prim.ListConstruct %int1, %int5, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%380 = torch.aten.view %378, %379 : !torch.tensor, !torch.list<int> -> !torch.tensor
%381 = torch.aten.mul.Scalar %380, %float5.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%382 = torch.aten.pow.Tensor_Scalar %380, %float3.000000e00 : !torch.tensor, !torch.float -> !torch.tensor
%383 = torch.aten.mul.Scalar %382, %float4.471500e-02 : !torch.tensor, !torch.float -> !torch.tensor
%384 = torch.aten.add.Tensor %380, %383, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%385 = torch.aten.mul.Scalar %384, %float7.978850e-01 : !torch.tensor, !torch.float -> !torch.tensor
%386 = torch.aten.tanh %385 : !torch.tensor -> !torch.tensor
%387 = torch.aten.add.Scalar %386, %float1.000000e00, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
%388 = torch.aten.mul.Tensor %381, %387 : !torch.tensor, !torch.tensor -> !torch.tensor
%389 = torch.prim.ListConstruct %int-1, %int3072 : (!torch.int, !torch.int) -> !torch.list<int>
%390 = torch.aten.view %388, %389 : !torch.tensor, !torch.list<int> -> !torch.tensor
%391 = torch.prim.GetAttr %arg0["_param_constant24"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%392 = torch.prim.GetAttr %arg0["_param_constant25"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%393 = torch.aten.addmm %391, %390, %392, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%394 = torch.prim.ListConstruct %int1, %int5, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%395 = torch.aten.view %393, %394 : !torch.tensor, !torch.list<int> -> !torch.tensor
%396 = torch.aten.add.Tensor %370, %395, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%397 = torch.prim.GetAttr %arg0["_param_constant26"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%398 = torch.prim.GetAttr %arg0["_param_constant27"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%399 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<int>
%result0_10, %result1_11, %result2_12 = torch.aten.native_layer_norm %396, %399, %397, %398, %float1.000000e-05 : !torch.tensor, !torch.list<int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%400 = torch.prim.ListConstruct %int-1, %int768 : (!torch.int, !torch.int) -> !torch.list<int>
%401 = torch.aten.view %result0_10, %400 : !torch.tensor, !torch.list<int> -> !torch.tensor
%402 = torch.prim.GetAttr %arg0["_param_constant28"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%403 = torch.prim.GetAttr %arg0["_param_constant29"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%404 = torch.aten.addmm %402, %401, %403, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%405 = torch.prim.ListConstruct %int1, %int5, %int2304 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%406 = torch.aten.view %404, %405 : !torch.tensor, !torch.list<int> -> !torch.tensor
%407 = torch.aten.slice.Tensor %406, %int2, %int0, %int768, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%408 = torch.aten.slice.Tensor %406, %int2, %int768, %int1536, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%409 = torch.aten.slice.Tensor %406, %int2, %int1536, %int2304, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%410 = torch.prim.ListConstruct %int1, %int5, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%411 = torch.aten.view %407, %410 : !torch.tensor, !torch.list<int> -> !torch.tensor
%412 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%413 = torch.aten.permute %411, %412 : !torch.tensor, !torch.list<int> -> !torch.tensor
%414 = torch.prim.ListConstruct %int1, %int5, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%415 = torch.aten.view %408, %414 : !torch.tensor, !torch.list<int> -> !torch.tensor
%416 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%417 = torch.aten.permute %415, %416 : !torch.tensor, !torch.list<int> -> !torch.tensor
%418 = torch.prim.ListConstruct %int1, %int5, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%419 = torch.aten.view %409, %418 : !torch.tensor, !torch.list<int> -> !torch.tensor
%420 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%421 = torch.aten.permute %419, %420 : !torch.tensor, !torch.list<int> -> !torch.tensor
%422 = torch.aten.transpose.int %417, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%423 = torch.prim.ListConstruct %int1, %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%424 = torch.aten.expand %413, %423, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%425 = torch.prim.ListConstruct %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%426 = torch.aten.view %424, %425 : !torch.tensor, !torch.list<int> -> !torch.tensor
%427 = torch.prim.ListConstruct %int1, %int12, %int64, %int5 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%428 = torch.aten.expand %422, %427, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%429 = torch.prim.ListConstruct %int12, %int64, %int5 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%430 = torch.aten.view %428, %429 : !torch.tensor, !torch.list<int> -> !torch.tensor
%431 = torch.aten.bmm %426, %430 : !torch.tensor, !torch.tensor -> !torch.tensor
%432 = torch.prim.ListConstruct %int1, %int12, %int5, %int5 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%433 = torch.aten._unsafe_view %431, %432 : !torch.tensor, !torch.list<int> -> !torch.tensor
%434 = torch.prim.GetAttr %arg0["_tensor_constant6"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%435 = torch.aten.lift_fresh_copy %434 : !torch.tensor -> !torch.tensor
%436 = torch.aten.div.Tensor %433, %435 : !torch.tensor, !torch.tensor -> !torch.tensor
%437 = torch.prim.GetAttr %arg0["_tensor_constant7"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%438 = torch.aten.slice.Tensor %437, %int0, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%439 = torch.aten.slice.Tensor %438, %int1, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%440 = torch.aten.slice.Tensor %439, %int2, %int0, %int5, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%441 = torch.aten.slice.Tensor %440, %int3, %int0, %int5, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%442 = torch.aten._to_copy %441, %int11, %none_0, %none_0, %none_0, %false, %none_0 : !torch.tensor, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.tensor
%443 = torch.prim.GetAttr %arg0["_tensor_constant8"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%444 = torch.aten.lift_fresh_copy %443 : !torch.tensor -> !torch.tensor
%445 = torch.aten.where.self %442, %436, %444 : !torch.tensor, !torch.tensor, !torch.tensor -> !torch.tensor
%446 = torch.aten._softmax %445, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%447 = torch.prim.ListConstruct %int1, %int12, %int5, %int5 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%448 = torch.aten.expand %446, %447, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%449 = torch.prim.ListConstruct %int12, %int5, %int5 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%450 = torch.aten.view %448, %449 : !torch.tensor, !torch.list<int> -> !torch.tensor
%451 = torch.prim.ListConstruct %int1, %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%452 = torch.aten.expand %421, %451, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%453 = torch.prim.ListConstruct %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%454 = torch.aten.view %452, %453 : !torch.tensor, !torch.list<int> -> !torch.tensor
%455 = torch.aten.bmm %450, %454 : !torch.tensor, !torch.tensor -> !torch.tensor
%456 = torch.prim.ListConstruct %int1, %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%457 = torch.aten._unsafe_view %455, %456 : !torch.tensor, !torch.list<int> -> !torch.tensor
%458 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%459 = torch.aten.permute %457, %458 : !torch.tensor, !torch.list<int> -> !torch.tensor
%460 = torch.aten.clone %459, %int0 : !torch.tensor, !torch.int -> !torch.tensor
%461 = torch.prim.ListConstruct %int1, %int5, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%462 = torch.aten.view %460, %461 : !torch.tensor, !torch.list<int> -> !torch.tensor
%463 = torch.prim.ListConstruct %int-1, %int768 : (!torch.int, !torch.int) -> !torch.list<int>
%464 = torch.aten.view %462, %463 : !torch.tensor, !torch.list<int> -> !torch.tensor
%465 = torch.prim.GetAttr %arg0["_param_constant30"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%466 = torch.prim.GetAttr %arg0["_param_constant31"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%467 = torch.aten.addmm %465, %464, %466, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%468 = torch.prim.ListConstruct %int1, %int5, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%469 = torch.aten.view %467, %468 : !torch.tensor, !torch.list<int> -> !torch.tensor
%470 = torch.aten.add.Tensor %469, %396, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%471 = torch.prim.GetAttr %arg0["_param_constant32"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%472 = torch.prim.GetAttr %arg0["_param_constant33"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%473 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<int>
%result0_13, %result1_14, %result2_15 = torch.aten.native_layer_norm %470, %473, %471, %472, %float1.000000e-05 : !torch.tensor, !torch.list<int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%474 = torch.prim.ListConstruct %int-1, %int768 : (!torch.int, !torch.int) -> !torch.list<int>
%475 = torch.aten.view %result0_13, %474 : !torch.tensor, !torch.list<int> -> !torch.tensor
%476 = torch.prim.GetAttr %arg0["_param_constant34"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%477 = torch.prim.GetAttr %arg0["_param_constant35"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%478 = torch.aten.addmm %476, %475, %477, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%479 = torch.prim.ListConstruct %int1, %int5, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%480 = torch.aten.view %478, %479 : !torch.tensor, !torch.list<int> -> !torch.tensor
%481 = torch.aten.mul.Scalar %480, %float5.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%482 = torch.aten.pow.Tensor_Scalar %480, %float3.000000e00 : !torch.tensor, !torch.float -> !torch.tensor
%483 = torch.aten.mul.Scalar %482, %float4.471500e-02 : !torch.tensor, !torch.float -> !torch.tensor
%484 = torch.aten.add.Tensor %480, %483, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%485 = torch.aten.mul.Scalar %484, %float7.978850e-01 : !torch.tensor, !torch.float -> !torch.tensor
%486 = torch.aten.tanh %485 : !torch.tensor -> !torch.tensor
%487 = torch.aten.add.Scalar %486, %float1.000000e00, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
%488 = torch.aten.mul.Tensor %481, %487 : !torch.tensor, !torch.tensor -> !torch.tensor
%489 = torch.prim.ListConstruct %int-1, %int3072 : (!torch.int, !torch.int) -> !torch.list<int>
%490 = torch.aten.view %488, %489 : !torch.tensor, !torch.list<int> -> !torch.tensor
%491 = torch.prim.GetAttr %arg0["_param_constant36"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%492 = torch.prim.GetAttr %arg0["_param_constant37"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%493 = torch.aten.addmm %491, %490, %492, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%494 = torch.prim.ListConstruct %int1, %int5, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%495 = torch.aten.view %493, %494 : !torch.tensor, !torch.list<int> -> !torch.tensor
%496 = torch.aten.add.Tensor %470, %495, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%497 = torch.prim.GetAttr %arg0["_param_constant38"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%498 = torch.prim.GetAttr %arg0["_param_constant39"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%499 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<int>
%result0_16, %result1_17, %result2_18 = torch.aten.native_layer_norm %496, %499, %497, %498, %float1.000000e-05 : !torch.tensor, !torch.list<int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%500 = torch.prim.ListConstruct %int-1, %int768 : (!torch.int, !torch.int) -> !torch.list<int>
%501 = torch.aten.view %result0_16, %500 : !torch.tensor, !torch.list<int> -> !torch.tensor
%502 = torch.prim.GetAttr %arg0["_param_constant40"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%503 = torch.prim.GetAttr %arg0["_param_constant41"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%504 = torch.aten.addmm %502, %501, %503, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%505 = torch.prim.ListConstruct %int1, %int5, %int2304 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%506 = torch.aten.view %504, %505 : !torch.tensor, !torch.list<int> -> !torch.tensor
%507 = torch.aten.slice.Tensor %506, %int2, %int0, %int768, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%508 = torch.aten.slice.Tensor %506, %int2, %int768, %int1536, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%509 = torch.aten.slice.Tensor %506, %int2, %int1536, %int2304, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%510 = torch.prim.ListConstruct %int1, %int5, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%511 = torch.aten.view %507, %510 : !torch.tensor, !torch.list<int> -> !torch.tensor
%512 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%513 = torch.aten.permute %511, %512 : !torch.tensor, !torch.list<int> -> !torch.tensor
%514 = torch.prim.ListConstruct %int1, %int5, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%515 = torch.aten.view %508, %514 : !torch.tensor, !torch.list<int> -> !torch.tensor
%516 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%517 = torch.aten.permute %515, %516 : !torch.tensor, !torch.list<int> -> !torch.tensor
%518 = torch.prim.ListConstruct %int1, %int5, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%519 = torch.aten.view %509, %518 : !torch.tensor, !torch.list<int> -> !torch.tensor
%520 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%521 = torch.aten.permute %519, %520 : !torch.tensor, !torch.list<int> -> !torch.tensor
%522 = torch.aten.transpose.int %517, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%523 = torch.prim.ListConstruct %int1, %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%524 = torch.aten.expand %513, %523, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%525 = torch.prim.ListConstruct %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%526 = torch.aten.view %524, %525 : !torch.tensor, !torch.list<int> -> !torch.tensor
%527 = torch.prim.ListConstruct %int1, %int12, %int64, %int5 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%528 = torch.aten.expand %522, %527, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%529 = torch.prim.ListConstruct %int12, %int64, %int5 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%530 = torch.aten.view %528, %529 : !torch.tensor, !torch.list<int> -> !torch.tensor
%531 = torch.aten.bmm %526, %530 : !torch.tensor, !torch.tensor -> !torch.tensor
%532 = torch.prim.ListConstruct %int1, %int12, %int5, %int5 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%533 = torch.aten._unsafe_view %531, %532 : !torch.tensor, !torch.list<int> -> !torch.tensor
%534 = torch.prim.GetAttr %arg0["_tensor_constant9"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%535 = torch.aten.lift_fresh_copy %534 : !torch.tensor -> !torch.tensor
%536 = torch.aten.div.Tensor %533, %535 : !torch.tensor, !torch.tensor -> !torch.tensor
%537 = torch.prim.GetAttr %arg0["_tensor_constant10"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%538 = torch.aten.slice.Tensor %537, %int0, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%539 = torch.aten.slice.Tensor %538, %int1, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%540 = torch.aten.slice.Tensor %539, %int2, %int0, %int5, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%541 = torch.aten.slice.Tensor %540, %int3, %int0, %int5, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%542 = torch.aten._to_copy %541, %int11, %none_0, %none_0, %none_0, %false, %none_0 : !torch.tensor, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.tensor
%543 = torch.prim.GetAttr %arg0["_tensor_constant11"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%544 = torch.aten.lift_fresh_copy %543 : !torch.tensor -> !torch.tensor
%545 = torch.aten.where.self %542, %536, %544 : !torch.tensor, !torch.tensor, !torch.tensor -> !torch.tensor
%546 = torch.aten._softmax %545, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%547 = torch.prim.ListConstruct %int1, %int12, %int5, %int5 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%548 = torch.aten.expand %546, %547, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%549 = torch.prim.ListConstruct %int12, %int5, %int5 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%550 = torch.aten.view %548, %549 : !torch.tensor, !torch.list<int> -> !torch.tensor
%551 = torch.prim.ListConstruct %int1, %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%552 = torch.aten.expand %521, %551, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%553 = torch.prim.ListConstruct %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%554 = torch.aten.view %552, %553 : !torch.tensor, !torch.list<int> -> !torch.tensor
%555 = torch.aten.bmm %550, %554 : !torch.tensor, !torch.tensor -> !torch.tensor
%556 = torch.prim.ListConstruct %int1, %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%557 = torch.aten._unsafe_view %555, %556 : !torch.tensor, !torch.list<int> -> !torch.tensor
%558 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%559 = torch.aten.permute %557, %558 : !torch.tensor, !torch.list<int> -> !torch.tensor
%560 = torch.aten.clone %559, %int0 : !torch.tensor, !torch.int -> !torch.tensor
%561 = torch.prim.ListConstruct %int1, %int5, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%562 = torch.aten.view %560, %561 : !torch.tensor, !torch.list<int> -> !torch.tensor
%563 = torch.prim.ListConstruct %int-1, %int768 : (!torch.int, !torch.int) -> !torch.list<int>
%564 = torch.aten.view %562, %563 : !torch.tensor, !torch.list<int> -> !torch.tensor
%565 = torch.prim.GetAttr %arg0["_param_constant42"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%566 = torch.prim.GetAttr %arg0["_param_constant43"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%567 = torch.aten.addmm %565, %564, %566, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%568 = torch.prim.ListConstruct %int1, %int5, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%569 = torch.aten.view %567, %568 : !torch.tensor, !torch.list<int> -> !torch.tensor
%570 = torch.aten.add.Tensor %569, %496, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%571 = torch.prim.GetAttr %arg0["_param_constant44"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%572 = torch.prim.GetAttr %arg0["_param_constant45"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%573 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<int>
%result0_19, %result1_20, %result2_21 = torch.aten.native_layer_norm %570, %573, %571, %572, %float1.000000e-05 : !torch.tensor, !torch.list<int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%574 = torch.prim.ListConstruct %int-1, %int768 : (!torch.int, !torch.int) -> !torch.list<int>
%575 = torch.aten.view %result0_19, %574 : !torch.tensor, !torch.list<int> -> !torch.tensor
%576 = torch.prim.GetAttr %arg0["_param_constant46"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%577 = torch.prim.GetAttr %arg0["_param_constant47"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%578 = torch.aten.addmm %576, %575, %577, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%579 = torch.prim.ListConstruct %int1, %int5, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%580 = torch.aten.view %578, %579 : !torch.tensor, !torch.list<int> -> !torch.tensor
%581 = torch.aten.mul.Scalar %580, %float5.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%582 = torch.aten.pow.Tensor_Scalar %580, %float3.000000e00 : !torch.tensor, !torch.float -> !torch.tensor
%583 = torch.aten.mul.Scalar %582, %float4.471500e-02 : !torch.tensor, !torch.float -> !torch.tensor
%584 = torch.aten.add.Tensor %580, %583, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%585 = torch.aten.mul.Scalar %584, %float7.978850e-01 : !torch.tensor, !torch.float -> !torch.tensor
%586 = torch.aten.tanh %585 : !torch.tensor -> !torch.tensor
%587 = torch.aten.add.Scalar %586, %float1.000000e00, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
%588 = torch.aten.mul.Tensor %581, %587 : !torch.tensor, !torch.tensor -> !torch.tensor
%589 = torch.prim.ListConstruct %int-1, %int3072 : (!torch.int, !torch.int) -> !torch.list<int>
%590 = torch.aten.view %588, %589 : !torch.tensor, !torch.list<int> -> !torch.tensor
%591 = torch.prim.GetAttr %arg0["_param_constant48"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%592 = torch.prim.GetAttr %arg0["_param_constant49"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%593 = torch.aten.addmm %591, %590, %592, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%594 = torch.prim.ListConstruct %int1, %int5, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%595 = torch.aten.view %593, %594 : !torch.tensor, !torch.list<int> -> !torch.tensor
%596 = torch.aten.add.Tensor %570, %595, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%597 = torch.prim.GetAttr %arg0["_param_constant50"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%598 = torch.prim.GetAttr %arg0["_param_constant51"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%599 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<int>
%result0_22, %result1_23, %result2_24 = torch.aten.native_layer_norm %596, %599, %597, %598, %float1.000000e-05 : !torch.tensor, !torch.list<int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%600 = torch.prim.ListConstruct %int-1, %int768 : (!torch.int, !torch.int) -> !torch.list<int>
%601 = torch.aten.view %result0_22, %600 : !torch.tensor, !torch.list<int> -> !torch.tensor
%602 = torch.prim.GetAttr %arg0["_param_constant52"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%603 = torch.prim.GetAttr %arg0["_param_constant53"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%604 = torch.aten.addmm %602, %601, %603, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%605 = torch.prim.ListConstruct %int1, %int5, %int2304 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%606 = torch.aten.view %604, %605 : !torch.tensor, !torch.list<int> -> !torch.tensor
%607 = torch.aten.slice.Tensor %606, %int2, %int0, %int768, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%608 = torch.aten.slice.Tensor %606, %int2, %int768, %int1536, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%609 = torch.aten.slice.Tensor %606, %int2, %int1536, %int2304, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%610 = torch.prim.ListConstruct %int1, %int5, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%611 = torch.aten.view %607, %610 : !torch.tensor, !torch.list<int> -> !torch.tensor
%612 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%613 = torch.aten.permute %611, %612 : !torch.tensor, !torch.list<int> -> !torch.tensor
%614 = torch.prim.ListConstruct %int1, %int5, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%615 = torch.aten.view %608, %614 : !torch.tensor, !torch.list<int> -> !torch.tensor
%616 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%617 = torch.aten.permute %615, %616 : !torch.tensor, !torch.list<int> -> !torch.tensor
%618 = torch.prim.ListConstruct %int1, %int5, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%619 = torch.aten.view %609, %618 : !torch.tensor, !torch.list<int> -> !torch.tensor
%620 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%621 = torch.aten.permute %619, %620 : !torch.tensor, !torch.list<int> -> !torch.tensor
%622 = torch.aten.transpose.int %617, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%623 = torch.prim.ListConstruct %int1, %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%624 = torch.aten.expand %613, %623, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%625 = torch.prim.ListConstruct %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%626 = torch.aten.view %624, %625 : !torch.tensor, !torch.list<int> -> !torch.tensor
%627 = torch.prim.ListConstruct %int1, %int12, %int64, %int5 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%628 = torch.aten.expand %622, %627, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%629 = torch.prim.ListConstruct %int12, %int64, %int5 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%630 = torch.aten.view %628, %629 : !torch.tensor, !torch.list<int> -> !torch.tensor
%631 = torch.aten.bmm %626, %630 : !torch.tensor, !torch.tensor -> !torch.tensor
%632 = torch.prim.ListConstruct %int1, %int12, %int5, %int5 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%633 = torch.aten._unsafe_view %631, %632 : !torch.tensor, !torch.list<int> -> !torch.tensor
%634 = torch.prim.GetAttr %arg0["_tensor_constant12"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%635 = torch.aten.lift_fresh_copy %634 : !torch.tensor -> !torch.tensor
%636 = torch.aten.div.Tensor %633, %635 : !torch.tensor, !torch.tensor -> !torch.tensor
%637 = torch.prim.GetAttr %arg0["_tensor_constant13"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%638 = torch.aten.slice.Tensor %637, %int0, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%639 = torch.aten.slice.Tensor %638, %int1, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%640 = torch.aten.slice.Tensor %639, %int2, %int0, %int5, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%641 = torch.aten.slice.Tensor %640, %int3, %int0, %int5, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%642 = torch.aten._to_copy %641, %int11, %none_0, %none_0, %none_0, %false, %none_0 : !torch.tensor, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.tensor
%643 = torch.prim.GetAttr %arg0["_tensor_constant14"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%644 = torch.aten.lift_fresh_copy %643 : !torch.tensor -> !torch.tensor
%645 = torch.aten.where.self %642, %636, %644 : !torch.tensor, !torch.tensor, !torch.tensor -> !torch.tensor
%646 = torch.aten._softmax %645, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%647 = torch.prim.ListConstruct %int1, %int12, %int5, %int5 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%648 = torch.aten.expand %646, %647, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%649 = torch.prim.ListConstruct %int12, %int5, %int5 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%650 = torch.aten.view %648, %649 : !torch.tensor, !torch.list<int> -> !torch.tensor
%651 = torch.prim.ListConstruct %int1, %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%652 = torch.aten.expand %621, %651, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%653 = torch.prim.ListConstruct %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%654 = torch.aten.view %652, %653 : !torch.tensor, !torch.list<int> -> !torch.tensor
%655 = torch.aten.bmm %650, %654 : !torch.tensor, !torch.tensor -> !torch.tensor
%656 = torch.prim.ListConstruct %int1, %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%657 = torch.aten._unsafe_view %655, %656 : !torch.tensor, !torch.list<int> -> !torch.tensor
%658 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%659 = torch.aten.permute %657, %658 : !torch.tensor, !torch.list<int> -> !torch.tensor
%660 = torch.aten.clone %659, %int0 : !torch.tensor, !torch.int -> !torch.tensor
%661 = torch.prim.ListConstruct %int1, %int5, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%662 = torch.aten.view %660, %661 : !torch.tensor, !torch.list<int> -> !torch.tensor
%663 = torch.prim.ListConstruct %int-1, %int768 : (!torch.int, !torch.int) -> !torch.list<int>
%664 = torch.aten.view %662, %663 : !torch.tensor, !torch.list<int> -> !torch.tensor
%665 = torch.prim.GetAttr %arg0["_param_constant54"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%666 = torch.prim.GetAttr %arg0["_param_constant55"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%667 = torch.aten.addmm %665, %664, %666, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%668 = torch.prim.ListConstruct %int1, %int5, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%669 = torch.aten.view %667, %668 : !torch.tensor, !torch.list<int> -> !torch.tensor
%670 = torch.aten.add.Tensor %669, %596, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%671 = torch.prim.GetAttr %arg0["_param_constant56"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%672 = torch.prim.GetAttr %arg0["_param_constant57"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%673 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<int>
%result0_25, %result1_26, %result2_27 = torch.aten.native_layer_norm %670, %673, %671, %672, %float1.000000e-05 : !torch.tensor, !torch.list<int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%674 = torch.prim.ListConstruct %int-1, %int768 : (!torch.int, !torch.int) -> !torch.list<int>
%675 = torch.aten.view %result0_25, %674 : !torch.tensor, !torch.list<int> -> !torch.tensor
%676 = torch.prim.GetAttr %arg0["_param_constant58"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%677 = torch.prim.GetAttr %arg0["_param_constant59"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%678 = torch.aten.addmm %676, %675, %677, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%679 = torch.prim.ListConstruct %int1, %int5, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%680 = torch.aten.view %678, %679 : !torch.tensor, !torch.list<int> -> !torch.tensor
%681 = torch.aten.mul.Scalar %680, %float5.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%682 = torch.aten.pow.Tensor_Scalar %680, %float3.000000e00 : !torch.tensor, !torch.float -> !torch.tensor
%683 = torch.aten.mul.Scalar %682, %float4.471500e-02 : !torch.tensor, !torch.float -> !torch.tensor
%684 = torch.aten.add.Tensor %680, %683, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%685 = torch.aten.mul.Scalar %684, %float7.978850e-01 : !torch.tensor, !torch.float -> !torch.tensor
%686 = torch.aten.tanh %685 : !torch.tensor -> !torch.tensor
%687 = torch.aten.add.Scalar %686, %float1.000000e00, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
%688 = torch.aten.mul.Tensor %681, %687 : !torch.tensor, !torch.tensor -> !torch.tensor
%689 = torch.prim.ListConstruct %int-1, %int3072 : (!torch.int, !torch.int) -> !torch.list<int>
%690 = torch.aten.view %688, %689 : !torch.tensor, !torch.list<int> -> !torch.tensor
%691 = torch.prim.GetAttr %arg0["_param_constant60"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%692 = torch.prim.GetAttr %arg0["_param_constant61"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%693 = torch.aten.addmm %691, %690, %692, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%694 = torch.prim.ListConstruct %int1, %int5, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%695 = torch.aten.view %693, %694 : !torch.tensor, !torch.list<int> -> !torch.tensor
%696 = torch.aten.add.Tensor %670, %695, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%697 = torch.prim.GetAttr %arg0["_param_constant62"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%698 = torch.prim.GetAttr %arg0["_param_constant63"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%699 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<int>
%result0_28, %result1_29, %result2_30 = torch.aten.native_layer_norm %696, %699, %697, %698, %float1.000000e-05 : !torch.tensor, !torch.list<int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%700 = torch.prim.ListConstruct %int-1, %int768 : (!torch.int, !torch.int) -> !torch.list<int>
%701 = torch.aten.view %result0_28, %700 : !torch.tensor, !torch.list<int> -> !torch.tensor
%702 = torch.prim.GetAttr %arg0["_param_constant64"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%703 = torch.prim.GetAttr %arg0["_param_constant65"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%704 = torch.aten.addmm %702, %701, %703, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%705 = torch.prim.ListConstruct %int1, %int5, %int2304 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%706 = torch.aten.view %704, %705 : !torch.tensor, !torch.list<int> -> !torch.tensor
%707 = torch.aten.slice.Tensor %706, %int2, %int0, %int768, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%708 = torch.aten.slice.Tensor %706, %int2, %int768, %int1536, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%709 = torch.aten.slice.Tensor %706, %int2, %int1536, %int2304, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%710 = torch.prim.ListConstruct %int1, %int5, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%711 = torch.aten.view %707, %710 : !torch.tensor, !torch.list<int> -> !torch.tensor
%712 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%713 = torch.aten.permute %711, %712 : !torch.tensor, !torch.list<int> -> !torch.tensor
%714 = torch.prim.ListConstruct %int1, %int5, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%715 = torch.aten.view %708, %714 : !torch.tensor, !torch.list<int> -> !torch.tensor
%716 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%717 = torch.aten.permute %715, %716 : !torch.tensor, !torch.list<int> -> !torch.tensor
%718 = torch.prim.ListConstruct %int1, %int5, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%719 = torch.aten.view %709, %718 : !torch.tensor, !torch.list<int> -> !torch.tensor
%720 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%721 = torch.aten.permute %719, %720 : !torch.tensor, !torch.list<int> -> !torch.tensor
%722 = torch.aten.transpose.int %717, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%723 = torch.prim.ListConstruct %int1, %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%724 = torch.aten.expand %713, %723, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%725 = torch.prim.ListConstruct %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%726 = torch.aten.view %724, %725 : !torch.tensor, !torch.list<int> -> !torch.tensor
%727 = torch.prim.ListConstruct %int1, %int12, %int64, %int5 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%728 = torch.aten.expand %722, %727, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%729 = torch.prim.ListConstruct %int12, %int64, %int5 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%730 = torch.aten.view %728, %729 : !torch.tensor, !torch.list<int> -> !torch.tensor
%731 = torch.aten.bmm %726, %730 : !torch.tensor, !torch.tensor -> !torch.tensor
%732 = torch.prim.ListConstruct %int1, %int12, %int5, %int5 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%733 = torch.aten._unsafe_view %731, %732 : !torch.tensor, !torch.list<int> -> !torch.tensor
%734 = torch.prim.GetAttr %arg0["_tensor_constant15"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%735 = torch.aten.lift_fresh_copy %734 : !torch.tensor -> !torch.tensor
%736 = torch.aten.div.Tensor %733, %735 : !torch.tensor, !torch.tensor -> !torch.tensor
%737 = torch.prim.GetAttr %arg0["_tensor_constant16"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%738 = torch.aten.slice.Tensor %737, %int0, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%739 = torch.aten.slice.Tensor %738, %int1, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%740 = torch.aten.slice.Tensor %739, %int2, %int0, %int5, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%741 = torch.aten.slice.Tensor %740, %int3, %int0, %int5, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%742 = torch.aten._to_copy %741, %int11, %none_0, %none_0, %none_0, %false, %none_0 : !torch.tensor, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.tensor
%743 = torch.prim.GetAttr %arg0["_tensor_constant17"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%744 = torch.aten.lift_fresh_copy %743 : !torch.tensor -> !torch.tensor
%745 = torch.aten.where.self %742, %736, %744 : !torch.tensor, !torch.tensor, !torch.tensor -> !torch.tensor
%746 = torch.aten._softmax %745, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%747 = torch.prim.ListConstruct %int1, %int12, %int5, %int5 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%748 = torch.aten.expand %746, %747, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%749 = torch.prim.ListConstruct %int12, %int5, %int5 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%750 = torch.aten.view %748, %749 : !torch.tensor, !torch.list<int> -> !torch.tensor
%751 = torch.prim.ListConstruct %int1, %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%752 = torch.aten.expand %721, %751, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%753 = torch.prim.ListConstruct %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%754 = torch.aten.view %752, %753 : !torch.tensor, !torch.list<int> -> !torch.tensor
%755 = torch.aten.bmm %750, %754 : !torch.tensor, !torch.tensor -> !torch.tensor
%756 = torch.prim.ListConstruct %int1, %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%757 = torch.aten._unsafe_view %755, %756 : !torch.tensor, !torch.list<int> -> !torch.tensor
%758 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%759 = torch.aten.permute %757, %758 : !torch.tensor, !torch.list<int> -> !torch.tensor
%760 = torch.aten.clone %759, %int0 : !torch.tensor, !torch.int -> !torch.tensor
%761 = torch.prim.ListConstruct %int1, %int5, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%762 = torch.aten.view %760, %761 : !torch.tensor, !torch.list<int> -> !torch.tensor
%763 = torch.prim.ListConstruct %int-1, %int768 : (!torch.int, !torch.int) -> !torch.list<int>
%764 = torch.aten.view %762, %763 : !torch.tensor, !torch.list<int> -> !torch.tensor
%765 = torch.prim.GetAttr %arg0["_param_constant66"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%766 = torch.prim.GetAttr %arg0["_param_constant67"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%767 = torch.aten.addmm %765, %764, %766, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%768 = torch.prim.ListConstruct %int1, %int5, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%769 = torch.aten.view %767, %768 : !torch.tensor, !torch.list<int> -> !torch.tensor
%770 = torch.aten.add.Tensor %769, %696, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%771 = torch.prim.GetAttr %arg0["_param_constant68"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%772 = torch.prim.GetAttr %arg0["_param_constant69"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%773 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<int>
%result0_31, %result1_32, %result2_33 = torch.aten.native_layer_norm %770, %773, %771, %772, %float1.000000e-05 : !torch.tensor, !torch.list<int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%774 = torch.prim.ListConstruct %int-1, %int768 : (!torch.int, !torch.int) -> !torch.list<int>
%775 = torch.aten.view %result0_31, %774 : !torch.tensor, !torch.list<int> -> !torch.tensor
%776 = torch.prim.GetAttr %arg0["_param_constant70"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%777 = torch.prim.GetAttr %arg0["_param_constant71"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%778 = torch.aten.addmm %776, %775, %777, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%779 = torch.prim.ListConstruct %int1, %int5, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%780 = torch.aten.view %778, %779 : !torch.tensor, !torch.list<int> -> !torch.tensor
%781 = torch.aten.mul.Scalar %780, %float5.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%782 = torch.aten.pow.Tensor_Scalar %780, %float3.000000e00 : !torch.tensor, !torch.float -> !torch.tensor
%783 = torch.aten.mul.Scalar %782, %float4.471500e-02 : !torch.tensor, !torch.float -> !torch.tensor
%784 = torch.aten.add.Tensor %780, %783, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%785 = torch.aten.mul.Scalar %784, %float7.978850e-01 : !torch.tensor, !torch.float -> !torch.tensor
%786 = torch.aten.tanh %785 : !torch.tensor -> !torch.tensor
%787 = torch.aten.add.Scalar %786, %float1.000000e00, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
%788 = torch.aten.mul.Tensor %781, %787 : !torch.tensor, !torch.tensor -> !torch.tensor
%789 = torch.prim.ListConstruct %int-1, %int3072 : (!torch.int, !torch.int) -> !torch.list<int>
%790 = torch.aten.view %788, %789 : !torch.tensor, !torch.list<int> -> !torch.tensor
%791 = torch.prim.GetAttr %arg0["_param_constant72"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%792 = torch.prim.GetAttr %arg0["_param_constant73"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%793 = torch.aten.addmm %791, %790, %792, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%794 = torch.prim.ListConstruct %int1, %int5, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%795 = torch.aten.view %793, %794 : !torch.tensor, !torch.list<int> -> !torch.tensor
%796 = torch.aten.add.Tensor %770, %795, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%797 = torch.prim.GetAttr %arg0["_param_constant74"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%798 = torch.prim.GetAttr %arg0["_param_constant75"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%799 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<int>
%result0_34, %result1_35, %result2_36 = torch.aten.native_layer_norm %796, %799, %797, %798, %float1.000000e-05 : !torch.tensor, !torch.list<int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%800 = torch.prim.ListConstruct %int-1, %int768 : (!torch.int, !torch.int) -> !torch.list<int>
%801 = torch.aten.view %result0_34, %800 : !torch.tensor, !torch.list<int> -> !torch.tensor
%802 = torch.prim.GetAttr %arg0["_param_constant76"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%803 = torch.prim.GetAttr %arg0["_param_constant77"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%804 = torch.aten.addmm %802, %801, %803, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%805 = torch.prim.ListConstruct %int1, %int5, %int2304 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%806 = torch.aten.view %804, %805 : !torch.tensor, !torch.list<int> -> !torch.tensor
%807 = torch.aten.slice.Tensor %806, %int2, %int0, %int768, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%808 = torch.aten.slice.Tensor %806, %int2, %int768, %int1536, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%809 = torch.aten.slice.Tensor %806, %int2, %int1536, %int2304, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%810 = torch.prim.ListConstruct %int1, %int5, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%811 = torch.aten.view %807, %810 : !torch.tensor, !torch.list<int> -> !torch.tensor
%812 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%813 = torch.aten.permute %811, %812 : !torch.tensor, !torch.list<int> -> !torch.tensor
%814 = torch.prim.ListConstruct %int1, %int5, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%815 = torch.aten.view %808, %814 : !torch.tensor, !torch.list<int> -> !torch.tensor
%816 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%817 = torch.aten.permute %815, %816 : !torch.tensor, !torch.list<int> -> !torch.tensor
%818 = torch.prim.ListConstruct %int1, %int5, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%819 = torch.aten.view %809, %818 : !torch.tensor, !torch.list<int> -> !torch.tensor
%820 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%821 = torch.aten.permute %819, %820 : !torch.tensor, !torch.list<int> -> !torch.tensor
%822 = torch.aten.transpose.int %817, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%823 = torch.prim.ListConstruct %int1, %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%824 = torch.aten.expand %813, %823, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%825 = torch.prim.ListConstruct %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%826 = torch.aten.view %824, %825 : !torch.tensor, !torch.list<int> -> !torch.tensor
%827 = torch.prim.ListConstruct %int1, %int12, %int64, %int5 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%828 = torch.aten.expand %822, %827, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%829 = torch.prim.ListConstruct %int12, %int64, %int5 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%830 = torch.aten.view %828, %829 : !torch.tensor, !torch.list<int> -> !torch.tensor
%831 = torch.aten.bmm %826, %830 : !torch.tensor, !torch.tensor -> !torch.tensor
%832 = torch.prim.ListConstruct %int1, %int12, %int5, %int5 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%833 = torch.aten._unsafe_view %831, %832 : !torch.tensor, !torch.list<int> -> !torch.tensor
%834 = torch.prim.GetAttr %arg0["_tensor_constant18"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%835 = torch.aten.lift_fresh_copy %834 : !torch.tensor -> !torch.tensor
%836 = torch.aten.div.Tensor %833, %835 : !torch.tensor, !torch.tensor -> !torch.tensor
%837 = torch.prim.GetAttr %arg0["_tensor_constant19"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%838 = torch.aten.slice.Tensor %837, %int0, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%839 = torch.aten.slice.Tensor %838, %int1, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%840 = torch.aten.slice.Tensor %839, %int2, %int0, %int5, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%841 = torch.aten.slice.Tensor %840, %int3, %int0, %int5, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%842 = torch.aten._to_copy %841, %int11, %none_0, %none_0, %none_0, %false, %none_0 : !torch.tensor, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.tensor
%843 = torch.prim.GetAttr %arg0["_tensor_constant20"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%844 = torch.aten.lift_fresh_copy %843 : !torch.tensor -> !torch.tensor
%845 = torch.aten.where.self %842, %836, %844 : !torch.tensor, !torch.tensor, !torch.tensor -> !torch.tensor
%846 = torch.aten._softmax %845, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%847 = torch.prim.ListConstruct %int1, %int12, %int5, %int5 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%848 = torch.aten.expand %846, %847, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%849 = torch.prim.ListConstruct %int12, %int5, %int5 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%850 = torch.aten.view %848, %849 : !torch.tensor, !torch.list<int> -> !torch.tensor
%851 = torch.prim.ListConstruct %int1, %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%852 = torch.aten.expand %821, %851, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%853 = torch.prim.ListConstruct %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%854 = torch.aten.view %852, %853 : !torch.tensor, !torch.list<int> -> !torch.tensor
%855 = torch.aten.bmm %850, %854 : !torch.tensor, !torch.tensor -> !torch.tensor
%856 = torch.prim.ListConstruct %int1, %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%857 = torch.aten._unsafe_view %855, %856 : !torch.tensor, !torch.list<int> -> !torch.tensor
%858 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%859 = torch.aten.permute %857, %858 : !torch.tensor, !torch.list<int> -> !torch.tensor
%860 = torch.aten.clone %859, %int0 : !torch.tensor, !torch.int -> !torch.tensor
%861 = torch.prim.ListConstruct %int1, %int5, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%862 = torch.aten.view %860, %861 : !torch.tensor, !torch.list<int> -> !torch.tensor
%863 = torch.prim.ListConstruct %int-1, %int768 : (!torch.int, !torch.int) -> !torch.list<int>
%864 = torch.aten.view %862, %863 : !torch.tensor, !torch.list<int> -> !torch.tensor
%865 = torch.prim.GetAttr %arg0["_param_constant78"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%866 = torch.prim.GetAttr %arg0["_param_constant79"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%867 = torch.aten.addmm %865, %864, %866, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%868 = torch.prim.ListConstruct %int1, %int5, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%869 = torch.aten.view %867, %868 : !torch.tensor, !torch.list<int> -> !torch.tensor
%870 = torch.aten.add.Tensor %869, %796, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%871 = torch.prim.GetAttr %arg0["_param_constant80"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%872 = torch.prim.GetAttr %arg0["_param_constant81"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%873 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<int>
%result0_37, %result1_38, %result2_39 = torch.aten.native_layer_norm %870, %873, %871, %872, %float1.000000e-05 : !torch.tensor, !torch.list<int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%874 = torch.prim.ListConstruct %int-1, %int768 : (!torch.int, !torch.int) -> !torch.list<int>
%875 = torch.aten.view %result0_37, %874 : !torch.tensor, !torch.list<int> -> !torch.tensor
%876 = torch.prim.GetAttr %arg0["_param_constant82"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%877 = torch.prim.GetAttr %arg0["_param_constant83"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%878 = torch.aten.addmm %876, %875, %877, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%879 = torch.prim.ListConstruct %int1, %int5, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%880 = torch.aten.view %878, %879 : !torch.tensor, !torch.list<int> -> !torch.tensor
%881 = torch.aten.mul.Scalar %880, %float5.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%882 = torch.aten.pow.Tensor_Scalar %880, %float3.000000e00 : !torch.tensor, !torch.float -> !torch.tensor
%883 = torch.aten.mul.Scalar %882, %float4.471500e-02 : !torch.tensor, !torch.float -> !torch.tensor
%884 = torch.aten.add.Tensor %880, %883, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%885 = torch.aten.mul.Scalar %884, %float7.978850e-01 : !torch.tensor, !torch.float -> !torch.tensor
%886 = torch.aten.tanh %885 : !torch.tensor -> !torch.tensor
%887 = torch.aten.add.Scalar %886, %float1.000000e00, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
%888 = torch.aten.mul.Tensor %881, %887 : !torch.tensor, !torch.tensor -> !torch.tensor
%889 = torch.prim.ListConstruct %int-1, %int3072 : (!torch.int, !torch.int) -> !torch.list<int>
%890 = torch.aten.view %888, %889 : !torch.tensor, !torch.list<int> -> !torch.tensor
%891 = torch.prim.GetAttr %arg0["_param_constant84"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%892 = torch.prim.GetAttr %arg0["_param_constant85"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%893 = torch.aten.addmm %891, %890, %892, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%894 = torch.prim.ListConstruct %int1, %int5, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%895 = torch.aten.view %893, %894 : !torch.tensor, !torch.list<int> -> !torch.tensor
%896 = torch.aten.add.Tensor %870, %895, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%897 = torch.prim.GetAttr %arg0["_param_constant86"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%898 = torch.prim.GetAttr %arg0["_param_constant87"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%899 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<int>
%result0_40, %result1_41, %result2_42 = torch.aten.native_layer_norm %896, %899, %897, %898, %float1.000000e-05 : !torch.tensor, !torch.list<int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%900 = torch.prim.ListConstruct %int-1, %int768 : (!torch.int, !torch.int) -> !torch.list<int>
%901 = torch.aten.view %result0_40, %900 : !torch.tensor, !torch.list<int> -> !torch.tensor
%902 = torch.prim.GetAttr %arg0["_param_constant88"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%903 = torch.prim.GetAttr %arg0["_param_constant89"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%904 = torch.aten.addmm %902, %901, %903, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%905 = torch.prim.ListConstruct %int1, %int5, %int2304 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%906 = torch.aten.view %904, %905 : !torch.tensor, !torch.list<int> -> !torch.tensor
%907 = torch.aten.slice.Tensor %906, %int2, %int0, %int768, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%908 = torch.aten.slice.Tensor %906, %int2, %int768, %int1536, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%909 = torch.aten.slice.Tensor %906, %int2, %int1536, %int2304, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%910 = torch.prim.ListConstruct %int1, %int5, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%911 = torch.aten.view %907, %910 : !torch.tensor, !torch.list<int> -> !torch.tensor
%912 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%913 = torch.aten.permute %911, %912 : !torch.tensor, !torch.list<int> -> !torch.tensor
%914 = torch.prim.ListConstruct %int1, %int5, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%915 = torch.aten.view %908, %914 : !torch.tensor, !torch.list<int> -> !torch.tensor
%916 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%917 = torch.aten.permute %915, %916 : !torch.tensor, !torch.list<int> -> !torch.tensor
%918 = torch.prim.ListConstruct %int1, %int5, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%919 = torch.aten.view %909, %918 : !torch.tensor, !torch.list<int> -> !torch.tensor
%920 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%921 = torch.aten.permute %919, %920 : !torch.tensor, !torch.list<int> -> !torch.tensor
%922 = torch.aten.transpose.int %917, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%923 = torch.prim.ListConstruct %int1, %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%924 = torch.aten.expand %913, %923, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%925 = torch.prim.ListConstruct %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%926 = torch.aten.view %924, %925 : !torch.tensor, !torch.list<int> -> !torch.tensor
%927 = torch.prim.ListConstruct %int1, %int12, %int64, %int5 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%928 = torch.aten.expand %922, %927, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%929 = torch.prim.ListConstruct %int12, %int64, %int5 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%930 = torch.aten.view %928, %929 : !torch.tensor, !torch.list<int> -> !torch.tensor
%931 = torch.aten.bmm %926, %930 : !torch.tensor, !torch.tensor -> !torch.tensor
%932 = torch.prim.ListConstruct %int1, %int12, %int5, %int5 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%933 = torch.aten._unsafe_view %931, %932 : !torch.tensor, !torch.list<int> -> !torch.tensor
%934 = torch.prim.GetAttr %arg0["_tensor_constant21"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%935 = torch.aten.lift_fresh_copy %934 : !torch.tensor -> !torch.tensor
%936 = torch.aten.div.Tensor %933, %935 : !torch.tensor, !torch.tensor -> !torch.tensor
%937 = torch.prim.GetAttr %arg0["_tensor_constant22"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%938 = torch.aten.slice.Tensor %937, %int0, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%939 = torch.aten.slice.Tensor %938, %int1, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%940 = torch.aten.slice.Tensor %939, %int2, %int0, %int5, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%941 = torch.aten.slice.Tensor %940, %int3, %int0, %int5, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%942 = torch.aten._to_copy %941, %int11, %none_0, %none_0, %none_0, %false, %none_0 : !torch.tensor, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.tensor
%943 = torch.prim.GetAttr %arg0["_tensor_constant23"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%944 = torch.aten.lift_fresh_copy %943 : !torch.tensor -> !torch.tensor
%945 = torch.aten.where.self %942, %936, %944 : !torch.tensor, !torch.tensor, !torch.tensor -> !torch.tensor
%946 = torch.aten._softmax %945, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%947 = torch.prim.ListConstruct %int1, %int12, %int5, %int5 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%948 = torch.aten.expand %946, %947, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%949 = torch.prim.ListConstruct %int12, %int5, %int5 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%950 = torch.aten.view %948, %949 : !torch.tensor, !torch.list<int> -> !torch.tensor
%951 = torch.prim.ListConstruct %int1, %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%952 = torch.aten.expand %921, %951, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%953 = torch.prim.ListConstruct %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%954 = torch.aten.view %952, %953 : !torch.tensor, !torch.list<int> -> !torch.tensor
%955 = torch.aten.bmm %950, %954 : !torch.tensor, !torch.tensor -> !torch.tensor
%956 = torch.prim.ListConstruct %int1, %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%957 = torch.aten._unsafe_view %955, %956 : !torch.tensor, !torch.list<int> -> !torch.tensor
%958 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%959 = torch.aten.permute %957, %958 : !torch.tensor, !torch.list<int> -> !torch.tensor
%960 = torch.aten.clone %959, %int0 : !torch.tensor, !torch.int -> !torch.tensor
%961 = torch.prim.ListConstruct %int1, %int5, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%962 = torch.aten.view %960, %961 : !torch.tensor, !torch.list<int> -> !torch.tensor
%963 = torch.prim.ListConstruct %int-1, %int768 : (!torch.int, !torch.int) -> !torch.list<int>
%964 = torch.aten.view %962, %963 : !torch.tensor, !torch.list<int> -> !torch.tensor
%965 = torch.prim.GetAttr %arg0["_param_constant90"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%966 = torch.prim.GetAttr %arg0["_param_constant91"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%967 = torch.aten.addmm %965, %964, %966, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%968 = torch.prim.ListConstruct %int1, %int5, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%969 = torch.aten.view %967, %968 : !torch.tensor, !torch.list<int> -> !torch.tensor
%970 = torch.aten.add.Tensor %969, %896, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%971 = torch.prim.GetAttr %arg0["_param_constant92"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%972 = torch.prim.GetAttr %arg0["_param_constant93"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%973 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<int>
%result0_43, %result1_44, %result2_45 = torch.aten.native_layer_norm %970, %973, %971, %972, %float1.000000e-05 : !torch.tensor, !torch.list<int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%974 = torch.prim.ListConstruct %int-1, %int768 : (!torch.int, !torch.int) -> !torch.list<int>
%975 = torch.aten.view %result0_43, %974 : !torch.tensor, !torch.list<int> -> !torch.tensor
%976 = torch.prim.GetAttr %arg0["_param_constant94"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%977 = torch.prim.GetAttr %arg0["_param_constant95"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%978 = torch.aten.addmm %976, %975, %977, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%979 = torch.prim.ListConstruct %int1, %int5, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%980 = torch.aten.view %978, %979 : !torch.tensor, !torch.list<int> -> !torch.tensor
%981 = torch.aten.mul.Scalar %980, %float5.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%982 = torch.aten.pow.Tensor_Scalar %980, %float3.000000e00 : !torch.tensor, !torch.float -> !torch.tensor
%983 = torch.aten.mul.Scalar %982, %float4.471500e-02 : !torch.tensor, !torch.float -> !torch.tensor
%984 = torch.aten.add.Tensor %980, %983, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%985 = torch.aten.mul.Scalar %984, %float7.978850e-01 : !torch.tensor, !torch.float -> !torch.tensor
%986 = torch.aten.tanh %985 : !torch.tensor -> !torch.tensor
%987 = torch.aten.add.Scalar %986, %float1.000000e00, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
%988 = torch.aten.mul.Tensor %981, %987 : !torch.tensor, !torch.tensor -> !torch.tensor
%989 = torch.prim.ListConstruct %int-1, %int3072 : (!torch.int, !torch.int) -> !torch.list<int>
%990 = torch.aten.view %988, %989 : !torch.tensor, !torch.list<int> -> !torch.tensor
%991 = torch.prim.GetAttr %arg0["_param_constant96"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%992 = torch.prim.GetAttr %arg0["_param_constant97"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%993 = torch.aten.addmm %991, %990, %992, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%994 = torch.prim.ListConstruct %int1, %int5, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%995 = torch.aten.view %993, %994 : !torch.tensor, !torch.list<int> -> !torch.tensor
%996 = torch.aten.add.Tensor %970, %995, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%997 = torch.prim.GetAttr %arg0["_param_constant98"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%998 = torch.prim.GetAttr %arg0["_param_constant99"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%999 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<int>
%result0_46, %result1_47, %result2_48 = torch.aten.native_layer_norm %996, %999, %997, %998, %float1.000000e-05 : !torch.tensor, !torch.list<int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%1000 = torch.prim.ListConstruct %int-1, %int768 : (!torch.int, !torch.int) -> !torch.list<int>
%1001 = torch.aten.view %result0_46, %1000 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1002 = torch.prim.GetAttr %arg0["_param_constant100"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1003 = torch.prim.GetAttr %arg0["_param_constant101"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1004 = torch.aten.addmm %1002, %1001, %1003, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1005 = torch.prim.ListConstruct %int1, %int5, %int2304 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1006 = torch.aten.view %1004, %1005 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1007 = torch.aten.slice.Tensor %1006, %int2, %int0, %int768, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%1008 = torch.aten.slice.Tensor %1006, %int2, %int768, %int1536, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%1009 = torch.aten.slice.Tensor %1006, %int2, %int1536, %int2304, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%1010 = torch.prim.ListConstruct %int1, %int5, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1011 = torch.aten.view %1007, %1010 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1012 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1013 = torch.aten.permute %1011, %1012 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1014 = torch.prim.ListConstruct %int1, %int5, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1015 = torch.aten.view %1008, %1014 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1016 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1017 = torch.aten.permute %1015, %1016 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1018 = torch.prim.ListConstruct %int1, %int5, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1019 = torch.aten.view %1009, %1018 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1020 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1021 = torch.aten.permute %1019, %1020 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1022 = torch.aten.transpose.int %1017, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1023 = torch.prim.ListConstruct %int1, %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1024 = torch.aten.expand %1013, %1023, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1025 = torch.prim.ListConstruct %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1026 = torch.aten.view %1024, %1025 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1027 = torch.prim.ListConstruct %int1, %int12, %int64, %int5 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1028 = torch.aten.expand %1022, %1027, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1029 = torch.prim.ListConstruct %int12, %int64, %int5 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1030 = torch.aten.view %1028, %1029 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1031 = torch.aten.bmm %1026, %1030 : !torch.tensor, !torch.tensor -> !torch.tensor
%1032 = torch.prim.ListConstruct %int1, %int12, %int5, %int5 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1033 = torch.aten._unsafe_view %1031, %1032 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1034 = torch.prim.GetAttr %arg0["_tensor_constant24"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1035 = torch.aten.lift_fresh_copy %1034 : !torch.tensor -> !torch.tensor
%1036 = torch.aten.div.Tensor %1033, %1035 : !torch.tensor, !torch.tensor -> !torch.tensor
%1037 = torch.prim.GetAttr %arg0["_tensor_constant25"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1038 = torch.aten.slice.Tensor %1037, %int0, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%1039 = torch.aten.slice.Tensor %1038, %int1, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%1040 = torch.aten.slice.Tensor %1039, %int2, %int0, %int5, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%1041 = torch.aten.slice.Tensor %1040, %int3, %int0, %int5, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%1042 = torch.aten._to_copy %1041, %int11, %none_0, %none_0, %none_0, %false, %none_0 : !torch.tensor, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.tensor
%1043 = torch.prim.GetAttr %arg0["_tensor_constant26"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1044 = torch.aten.lift_fresh_copy %1043 : !torch.tensor -> !torch.tensor
%1045 = torch.aten.where.self %1042, %1036, %1044 : !torch.tensor, !torch.tensor, !torch.tensor -> !torch.tensor
%1046 = torch.aten._softmax %1045, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%1047 = torch.prim.ListConstruct %int1, %int12, %int5, %int5 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1048 = torch.aten.expand %1046, %1047, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1049 = torch.prim.ListConstruct %int12, %int5, %int5 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1050 = torch.aten.view %1048, %1049 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1051 = torch.prim.ListConstruct %int1, %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1052 = torch.aten.expand %1021, %1051, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1053 = torch.prim.ListConstruct %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1054 = torch.aten.view %1052, %1053 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1055 = torch.aten.bmm %1050, %1054 : !torch.tensor, !torch.tensor -> !torch.tensor
%1056 = torch.prim.ListConstruct %int1, %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1057 = torch.aten._unsafe_view %1055, %1056 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1058 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1059 = torch.aten.permute %1057, %1058 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1060 = torch.aten.clone %1059, %int0 : !torch.tensor, !torch.int -> !torch.tensor
%1061 = torch.prim.ListConstruct %int1, %int5, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1062 = torch.aten.view %1060, %1061 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1063 = torch.prim.ListConstruct %int-1, %int768 : (!torch.int, !torch.int) -> !torch.list<int>
%1064 = torch.aten.view %1062, %1063 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1065 = torch.prim.GetAttr %arg0["_param_constant102"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1066 = torch.prim.GetAttr %arg0["_param_constant103"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1067 = torch.aten.addmm %1065, %1064, %1066, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1068 = torch.prim.ListConstruct %int1, %int5, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1069 = torch.aten.view %1067, %1068 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1070 = torch.aten.add.Tensor %1069, %996, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1071 = torch.prim.GetAttr %arg0["_param_constant104"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1072 = torch.prim.GetAttr %arg0["_param_constant105"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1073 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<int>
%result0_49, %result1_50, %result2_51 = torch.aten.native_layer_norm %1070, %1073, %1071, %1072, %float1.000000e-05 : !torch.tensor, !torch.list<int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%1074 = torch.prim.ListConstruct %int-1, %int768 : (!torch.int, !torch.int) -> !torch.list<int>
%1075 = torch.aten.view %result0_49, %1074 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1076 = torch.prim.GetAttr %arg0["_param_constant106"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1077 = torch.prim.GetAttr %arg0["_param_constant107"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1078 = torch.aten.addmm %1076, %1075, %1077, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1079 = torch.prim.ListConstruct %int1, %int5, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1080 = torch.aten.view %1078, %1079 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1081 = torch.aten.mul.Scalar %1080, %float5.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1082 = torch.aten.pow.Tensor_Scalar %1080, %float3.000000e00 : !torch.tensor, !torch.float -> !torch.tensor
%1083 = torch.aten.mul.Scalar %1082, %float4.471500e-02 : !torch.tensor, !torch.float -> !torch.tensor
%1084 = torch.aten.add.Tensor %1080, %1083, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1085 = torch.aten.mul.Scalar %1084, %float7.978850e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1086 = torch.aten.tanh %1085 : !torch.tensor -> !torch.tensor
%1087 = torch.aten.add.Scalar %1086, %float1.000000e00, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
%1088 = torch.aten.mul.Tensor %1081, %1087 : !torch.tensor, !torch.tensor -> !torch.tensor
%1089 = torch.prim.ListConstruct %int-1, %int3072 : (!torch.int, !torch.int) -> !torch.list<int>
%1090 = torch.aten.view %1088, %1089 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1091 = torch.prim.GetAttr %arg0["_param_constant108"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1092 = torch.prim.GetAttr %arg0["_param_constant109"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1093 = torch.aten.addmm %1091, %1090, %1092, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1094 = torch.prim.ListConstruct %int1, %int5, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1095 = torch.aten.view %1093, %1094 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1096 = torch.aten.add.Tensor %1070, %1095, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1097 = torch.prim.GetAttr %arg0["_param_constant110"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1098 = torch.prim.GetAttr %arg0["_param_constant111"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1099 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<int>
%result0_52, %result1_53, %result2_54 = torch.aten.native_layer_norm %1096, %1099, %1097, %1098, %float1.000000e-05 : !torch.tensor, !torch.list<int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%1100 = torch.prim.ListConstruct %int-1, %int768 : (!torch.int, !torch.int) -> !torch.list<int>
%1101 = torch.aten.view %result0_52, %1100 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1102 = torch.prim.GetAttr %arg0["_param_constant112"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1103 = torch.prim.GetAttr %arg0["_param_constant113"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1104 = torch.aten.addmm %1102, %1101, %1103, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1105 = torch.prim.ListConstruct %int1, %int5, %int2304 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1106 = torch.aten.view %1104, %1105 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1107 = torch.aten.slice.Tensor %1106, %int2, %int0, %int768, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%1108 = torch.aten.slice.Tensor %1106, %int2, %int768, %int1536, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%1109 = torch.aten.slice.Tensor %1106, %int2, %int1536, %int2304, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%1110 = torch.prim.ListConstruct %int1, %int5, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1111 = torch.aten.view %1107, %1110 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1112 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1113 = torch.aten.permute %1111, %1112 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1114 = torch.prim.ListConstruct %int1, %int5, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1115 = torch.aten.view %1108, %1114 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1116 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1117 = torch.aten.permute %1115, %1116 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1118 = torch.prim.ListConstruct %int1, %int5, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1119 = torch.aten.view %1109, %1118 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1120 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1121 = torch.aten.permute %1119, %1120 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1122 = torch.aten.transpose.int %1117, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1123 = torch.prim.ListConstruct %int1, %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1124 = torch.aten.expand %1113, %1123, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1125 = torch.prim.ListConstruct %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1126 = torch.aten.view %1124, %1125 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1127 = torch.prim.ListConstruct %int1, %int12, %int64, %int5 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1128 = torch.aten.expand %1122, %1127, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1129 = torch.prim.ListConstruct %int12, %int64, %int5 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1130 = torch.aten.view %1128, %1129 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1131 = torch.aten.bmm %1126, %1130 : !torch.tensor, !torch.tensor -> !torch.tensor
%1132 = torch.prim.ListConstruct %int1, %int12, %int5, %int5 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1133 = torch.aten._unsafe_view %1131, %1132 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1134 = torch.prim.GetAttr %arg0["_tensor_constant27"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1135 = torch.aten.lift_fresh_copy %1134 : !torch.tensor -> !torch.tensor
%1136 = torch.aten.div.Tensor %1133, %1135 : !torch.tensor, !torch.tensor -> !torch.tensor
%1137 = torch.prim.GetAttr %arg0["_tensor_constant28"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1138 = torch.aten.slice.Tensor %1137, %int0, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%1139 = torch.aten.slice.Tensor %1138, %int1, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%1140 = torch.aten.slice.Tensor %1139, %int2, %int0, %int5, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%1141 = torch.aten.slice.Tensor %1140, %int3, %int0, %int5, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%1142 = torch.aten._to_copy %1141, %int11, %none_0, %none_0, %none_0, %false, %none_0 : !torch.tensor, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.tensor
%1143 = torch.prim.GetAttr %arg0["_tensor_constant29"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1144 = torch.aten.lift_fresh_copy %1143 : !torch.tensor -> !torch.tensor
%1145 = torch.aten.where.self %1142, %1136, %1144 : !torch.tensor, !torch.tensor, !torch.tensor -> !torch.tensor
%1146 = torch.aten._softmax %1145, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%1147 = torch.prim.ListConstruct %int1, %int12, %int5, %int5 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1148 = torch.aten.expand %1146, %1147, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1149 = torch.prim.ListConstruct %int12, %int5, %int5 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1150 = torch.aten.view %1148, %1149 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1151 = torch.prim.ListConstruct %int1, %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1152 = torch.aten.expand %1121, %1151, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1153 = torch.prim.ListConstruct %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1154 = torch.aten.view %1152, %1153 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1155 = torch.aten.bmm %1150, %1154 : !torch.tensor, !torch.tensor -> !torch.tensor
%1156 = torch.prim.ListConstruct %int1, %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1157 = torch.aten._unsafe_view %1155, %1156 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1158 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1159 = torch.aten.permute %1157, %1158 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1160 = torch.aten.clone %1159, %int0 : !torch.tensor, !torch.int -> !torch.tensor
%1161 = torch.prim.ListConstruct %int1, %int5, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1162 = torch.aten.view %1160, %1161 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1163 = torch.prim.ListConstruct %int-1, %int768 : (!torch.int, !torch.int) -> !torch.list<int>
%1164 = torch.aten.view %1162, %1163 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1165 = torch.prim.GetAttr %arg0["_param_constant114"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1166 = torch.prim.GetAttr %arg0["_param_constant115"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1167 = torch.aten.addmm %1165, %1164, %1166, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1168 = torch.prim.ListConstruct %int1, %int5, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1169 = torch.aten.view %1167, %1168 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1170 = torch.aten.add.Tensor %1169, %1096, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1171 = torch.prim.GetAttr %arg0["_param_constant116"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1172 = torch.prim.GetAttr %arg0["_param_constant117"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1173 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<int>
%result0_55, %result1_56, %result2_57 = torch.aten.native_layer_norm %1170, %1173, %1171, %1172, %float1.000000e-05 : !torch.tensor, !torch.list<int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%1174 = torch.prim.ListConstruct %int-1, %int768 : (!torch.int, !torch.int) -> !torch.list<int>
%1175 = torch.aten.view %result0_55, %1174 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1176 = torch.prim.GetAttr %arg0["_param_constant118"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1177 = torch.prim.GetAttr %arg0["_param_constant119"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1178 = torch.aten.addmm %1176, %1175, %1177, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1179 = torch.prim.ListConstruct %int1, %int5, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1180 = torch.aten.view %1178, %1179 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1181 = torch.aten.mul.Scalar %1180, %float5.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1182 = torch.aten.pow.Tensor_Scalar %1180, %float3.000000e00 : !torch.tensor, !torch.float -> !torch.tensor
%1183 = torch.aten.mul.Scalar %1182, %float4.471500e-02 : !torch.tensor, !torch.float -> !torch.tensor
%1184 = torch.aten.add.Tensor %1180, %1183, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1185 = torch.aten.mul.Scalar %1184, %float7.978850e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1186 = torch.aten.tanh %1185 : !torch.tensor -> !torch.tensor
%1187 = torch.aten.add.Scalar %1186, %float1.000000e00, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
%1188 = torch.aten.mul.Tensor %1181, %1187 : !torch.tensor, !torch.tensor -> !torch.tensor
%1189 = torch.prim.ListConstruct %int-1, %int3072 : (!torch.int, !torch.int) -> !torch.list<int>
%1190 = torch.aten.view %1188, %1189 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1191 = torch.prim.GetAttr %arg0["_param_constant120"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1192 = torch.prim.GetAttr %arg0["_param_constant121"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1193 = torch.aten.addmm %1191, %1190, %1192, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1194 = torch.prim.ListConstruct %int1, %int5, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1195 = torch.aten.view %1193, %1194 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1196 = torch.aten.add.Tensor %1170, %1195, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1197 = torch.prim.GetAttr %arg0["_param_constant122"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1198 = torch.prim.GetAttr %arg0["_param_constant123"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1199 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<int>
%result0_58, %result1_59, %result2_60 = torch.aten.native_layer_norm %1196, %1199, %1197, %1198, %float1.000000e-05 : !torch.tensor, !torch.list<int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%1200 = torch.prim.ListConstruct %int-1, %int768 : (!torch.int, !torch.int) -> !torch.list<int>
%1201 = torch.aten.view %result0_58, %1200 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1202 = torch.prim.GetAttr %arg0["_param_constant124"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1203 = torch.prim.GetAttr %arg0["_param_constant125"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1204 = torch.aten.addmm %1202, %1201, %1203, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1205 = torch.prim.ListConstruct %int1, %int5, %int2304 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1206 = torch.aten.view %1204, %1205 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1207 = torch.aten.slice.Tensor %1206, %int2, %int0, %int768, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%1208 = torch.aten.slice.Tensor %1206, %int2, %int768, %int1536, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%1209 = torch.aten.slice.Tensor %1206, %int2, %int1536, %int2304, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%1210 = torch.prim.ListConstruct %int1, %int5, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1211 = torch.aten.view %1207, %1210 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1212 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1213 = torch.aten.permute %1211, %1212 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1214 = torch.prim.ListConstruct %int1, %int5, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1215 = torch.aten.view %1208, %1214 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1216 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1217 = torch.aten.permute %1215, %1216 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1218 = torch.prim.ListConstruct %int1, %int5, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1219 = torch.aten.view %1209, %1218 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1220 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1221 = torch.aten.permute %1219, %1220 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1222 = torch.aten.transpose.int %1217, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1223 = torch.prim.ListConstruct %int1, %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1224 = torch.aten.expand %1213, %1223, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1225 = torch.prim.ListConstruct %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1226 = torch.aten.view %1224, %1225 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1227 = torch.prim.ListConstruct %int1, %int12, %int64, %int5 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1228 = torch.aten.expand %1222, %1227, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1229 = torch.prim.ListConstruct %int12, %int64, %int5 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1230 = torch.aten.view %1228, %1229 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1231 = torch.aten.bmm %1226, %1230 : !torch.tensor, !torch.tensor -> !torch.tensor
%1232 = torch.prim.ListConstruct %int1, %int12, %int5, %int5 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1233 = torch.aten._unsafe_view %1231, %1232 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1234 = torch.prim.GetAttr %arg0["_tensor_constant30"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1235 = torch.aten.lift_fresh_copy %1234 : !torch.tensor -> !torch.tensor
%1236 = torch.aten.div.Tensor %1233, %1235 : !torch.tensor, !torch.tensor -> !torch.tensor
%1237 = torch.prim.GetAttr %arg0["_tensor_constant31"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1238 = torch.aten.slice.Tensor %1237, %int0, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%1239 = torch.aten.slice.Tensor %1238, %int1, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%1240 = torch.aten.slice.Tensor %1239, %int2, %int0, %int5, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%1241 = torch.aten.slice.Tensor %1240, %int3, %int0, %int5, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%1242 = torch.aten._to_copy %1241, %int11, %none_0, %none_0, %none_0, %false, %none_0 : !torch.tensor, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.tensor
%1243 = torch.prim.GetAttr %arg0["_tensor_constant32"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1244 = torch.aten.lift_fresh_copy %1243 : !torch.tensor -> !torch.tensor
%1245 = torch.aten.where.self %1242, %1236, %1244 : !torch.tensor, !torch.tensor, !torch.tensor -> !torch.tensor
%1246 = torch.aten._softmax %1245, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%1247 = torch.prim.ListConstruct %int1, %int12, %int5, %int5 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1248 = torch.aten.expand %1246, %1247, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1249 = torch.prim.ListConstruct %int12, %int5, %int5 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1250 = torch.aten.view %1248, %1249 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1251 = torch.prim.ListConstruct %int1, %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1252 = torch.aten.expand %1221, %1251, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1253 = torch.prim.ListConstruct %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1254 = torch.aten.view %1252, %1253 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1255 = torch.aten.bmm %1250, %1254 : !torch.tensor, !torch.tensor -> !torch.tensor
%1256 = torch.prim.ListConstruct %int1, %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1257 = torch.aten._unsafe_view %1255, %1256 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1258 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1259 = torch.aten.permute %1257, %1258 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1260 = torch.aten.clone %1259, %int0 : !torch.tensor, !torch.int -> !torch.tensor
%1261 = torch.prim.ListConstruct %int1, %int5, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1262 = torch.aten.view %1260, %1261 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1263 = torch.prim.ListConstruct %int-1, %int768 : (!torch.int, !torch.int) -> !torch.list<int>
%1264 = torch.aten.view %1262, %1263 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1265 = torch.prim.GetAttr %arg0["_param_constant126"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1266 = torch.prim.GetAttr %arg0["_param_constant127"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1267 = torch.aten.addmm %1265, %1264, %1266, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1268 = torch.prim.ListConstruct %int1, %int5, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1269 = torch.aten.view %1267, %1268 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1270 = torch.aten.add.Tensor %1269, %1196, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1271 = torch.prim.GetAttr %arg0["_param_constant128"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1272 = torch.prim.GetAttr %arg0["_param_constant129"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1273 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<int>
%result0_61, %result1_62, %result2_63 = torch.aten.native_layer_norm %1270, %1273, %1271, %1272, %float1.000000e-05 : !torch.tensor, !torch.list<int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%1274 = torch.prim.ListConstruct %int-1, %int768 : (!torch.int, !torch.int) -> !torch.list<int>
%1275 = torch.aten.view %result0_61, %1274 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1276 = torch.prim.GetAttr %arg0["_param_constant130"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1277 = torch.prim.GetAttr %arg0["_param_constant131"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1278 = torch.aten.addmm %1276, %1275, %1277, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1279 = torch.prim.ListConstruct %int1, %int5, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1280 = torch.aten.view %1278, %1279 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1281 = torch.aten.mul.Scalar %1280, %float5.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1282 = torch.aten.pow.Tensor_Scalar %1280, %float3.000000e00 : !torch.tensor, !torch.float -> !torch.tensor
%1283 = torch.aten.mul.Scalar %1282, %float4.471500e-02 : !torch.tensor, !torch.float -> !torch.tensor
%1284 = torch.aten.add.Tensor %1280, %1283, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1285 = torch.aten.mul.Scalar %1284, %float7.978850e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1286 = torch.aten.tanh %1285 : !torch.tensor -> !torch.tensor
%1287 = torch.aten.add.Scalar %1286, %float1.000000e00, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
%1288 = torch.aten.mul.Tensor %1281, %1287 : !torch.tensor, !torch.tensor -> !torch.tensor
%1289 = torch.prim.ListConstruct %int-1, %int3072 : (!torch.int, !torch.int) -> !torch.list<int>
%1290 = torch.aten.view %1288, %1289 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1291 = torch.prim.GetAttr %arg0["_param_constant132"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1292 = torch.prim.GetAttr %arg0["_param_constant133"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1293 = torch.aten.addmm %1291, %1290, %1292, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1294 = torch.prim.ListConstruct %int1, %int5, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1295 = torch.aten.view %1293, %1294 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1296 = torch.aten.add.Tensor %1270, %1295, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1297 = torch.prim.GetAttr %arg0["_param_constant134"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1298 = torch.prim.GetAttr %arg0["_param_constant135"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1299 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<int>
%result0_64, %result1_65, %result2_66 = torch.aten.native_layer_norm %1296, %1299, %1297, %1298, %float1.000000e-05 : !torch.tensor, !torch.list<int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%1300 = torch.prim.ListConstruct %int-1, %int768 : (!torch.int, !torch.int) -> !torch.list<int>
%1301 = torch.aten.view %result0_64, %1300 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1302 = torch.prim.GetAttr %arg0["_param_constant136"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1303 = torch.prim.GetAttr %arg0["_param_constant137"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1304 = torch.aten.addmm %1302, %1301, %1303, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1305 = torch.prim.ListConstruct %int1, %int5, %int2304 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1306 = torch.aten.view %1304, %1305 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1307 = torch.aten.slice.Tensor %1306, %int2, %int0, %int768, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%1308 = torch.aten.slice.Tensor %1306, %int2, %int768, %int1536, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%1309 = torch.aten.slice.Tensor %1306, %int2, %int1536, %int2304, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%1310 = torch.prim.ListConstruct %int1, %int5, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1311 = torch.aten.view %1307, %1310 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1312 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1313 = torch.aten.permute %1311, %1312 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1314 = torch.prim.ListConstruct %int1, %int5, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1315 = torch.aten.view %1308, %1314 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1316 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1317 = torch.aten.permute %1315, %1316 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1318 = torch.prim.ListConstruct %int1, %int5, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1319 = torch.aten.view %1309, %1318 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1320 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1321 = torch.aten.permute %1319, %1320 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1322 = torch.aten.transpose.int %1317, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1323 = torch.prim.ListConstruct %int1, %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1324 = torch.aten.expand %1313, %1323, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1325 = torch.prim.ListConstruct %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1326 = torch.aten.view %1324, %1325 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1327 = torch.prim.ListConstruct %int1, %int12, %int64, %int5 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1328 = torch.aten.expand %1322, %1327, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1329 = torch.prim.ListConstruct %int12, %int64, %int5 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1330 = torch.aten.view %1328, %1329 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1331 = torch.aten.bmm %1326, %1330 : !torch.tensor, !torch.tensor -> !torch.tensor
%1332 = torch.prim.ListConstruct %int1, %int12, %int5, %int5 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1333 = torch.aten._unsafe_view %1331, %1332 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1334 = torch.prim.GetAttr %arg0["_tensor_constant33"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1335 = torch.aten.lift_fresh_copy %1334 : !torch.tensor -> !torch.tensor
%1336 = torch.aten.div.Tensor %1333, %1335 : !torch.tensor, !torch.tensor -> !torch.tensor
%1337 = torch.prim.GetAttr %arg0["_tensor_constant34"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1338 = torch.aten.slice.Tensor %1337, %int0, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%1339 = torch.aten.slice.Tensor %1338, %int1, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%1340 = torch.aten.slice.Tensor %1339, %int2, %int0, %int5, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%1341 = torch.aten.slice.Tensor %1340, %int3, %int0, %int5, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%1342 = torch.aten._to_copy %1341, %int11, %none_0, %none_0, %none_0, %false, %none_0 : !torch.tensor, !torch.int, !torch.none, !torch.none, !torch.none, !torch.bool, !torch.none -> !torch.tensor
%1343 = torch.prim.GetAttr %arg0["_tensor_constant35"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1344 = torch.aten.lift_fresh_copy %1343 : !torch.tensor -> !torch.tensor
%1345 = torch.aten.where.self %1342, %1336, %1344 : !torch.tensor, !torch.tensor, !torch.tensor -> !torch.tensor
%1346 = torch.aten._softmax %1345, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor
%1347 = torch.prim.ListConstruct %int1, %int12, %int5, %int5 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1348 = torch.aten.expand %1346, %1347, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1349 = torch.prim.ListConstruct %int12, %int5, %int5 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1350 = torch.aten.view %1348, %1349 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1351 = torch.prim.ListConstruct %int1, %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1352 = torch.aten.expand %1321, %1351, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1353 = torch.prim.ListConstruct %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1354 = torch.aten.view %1352, %1353 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1355 = torch.aten.bmm %1350, %1354 : !torch.tensor, !torch.tensor -> !torch.tensor
%1356 = torch.prim.ListConstruct %int1, %int12, %int5, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1357 = torch.aten._unsafe_view %1355, %1356 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1358 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1359 = torch.aten.permute %1357, %1358 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1360 = torch.aten.clone %1359, %int0 : !torch.tensor, !torch.int -> !torch.tensor
%1361 = torch.prim.ListConstruct %int1, %int5, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1362 = torch.aten.view %1360, %1361 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1363 = torch.prim.ListConstruct %int-1, %int768 : (!torch.int, !torch.int) -> !torch.list<int>
%1364 = torch.aten.view %1362, %1363 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1365 = torch.prim.GetAttr %arg0["_param_constant138"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1366 = torch.prim.GetAttr %arg0["_param_constant139"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1367 = torch.aten.addmm %1365, %1364, %1366, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1368 = torch.prim.ListConstruct %int1, %int5, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1369 = torch.aten.view %1367, %1368 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1370 = torch.aten.add.Tensor %1369, %1296, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1371 = torch.prim.GetAttr %arg0["_param_constant140"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1372 = torch.prim.GetAttr %arg0["_param_constant141"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1373 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<int>
%result0_67, %result1_68, %result2_69 = torch.aten.native_layer_norm %1370, %1373, %1371, %1372, %float1.000000e-05 : !torch.tensor, !torch.list<int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%1374 = torch.prim.ListConstruct %int-1, %int768 : (!torch.int, !torch.int) -> !torch.list<int>
%1375 = torch.aten.view %result0_67, %1374 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1376 = torch.prim.GetAttr %arg0["_param_constant142"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1377 = torch.prim.GetAttr %arg0["_param_constant143"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1378 = torch.aten.addmm %1376, %1375, %1377, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1379 = torch.prim.ListConstruct %int1, %int5, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1380 = torch.aten.view %1378, %1379 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1381 = torch.aten.mul.Scalar %1380, %float5.000000e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1382 = torch.aten.pow.Tensor_Scalar %1380, %float3.000000e00 : !torch.tensor, !torch.float -> !torch.tensor
%1383 = torch.aten.mul.Scalar %1382, %float4.471500e-02 : !torch.tensor, !torch.float -> !torch.tensor
%1384 = torch.aten.add.Tensor %1380, %1383, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1385 = torch.aten.mul.Scalar %1384, %float7.978850e-01 : !torch.tensor, !torch.float -> !torch.tensor
%1386 = torch.aten.tanh %1385 : !torch.tensor -> !torch.tensor
%1387 = torch.aten.add.Scalar %1386, %float1.000000e00, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
%1388 = torch.aten.mul.Tensor %1381, %1387 : !torch.tensor, !torch.tensor -> !torch.tensor
%1389 = torch.prim.ListConstruct %int-1, %int3072 : (!torch.int, !torch.int) -> !torch.list<int>
%1390 = torch.aten.view %1388, %1389 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1391 = torch.prim.GetAttr %arg0["_param_constant144"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1392 = torch.prim.GetAttr %arg0["_param_constant145"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1393 = torch.aten.addmm %1391, %1390, %1392, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1394 = torch.prim.ListConstruct %int1, %int5, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1395 = torch.aten.view %1393, %1394 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1396 = torch.aten.add.Tensor %1370, %1395, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1397 = torch.prim.GetAttr %arg0["_param_constant146"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1398 = torch.prim.GetAttr %arg0["_param_constant147"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1399 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<int>
%result0_70, %result1_71, %result2_72 = torch.aten.native_layer_norm %1396, %1399, %1397, %1398, %float1.000000e-05 : !torch.tensor, !torch.list<int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor
%1400 = torch.prim.ListConstruct %int1, %int5, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1401 = torch.aten.view %result0_70, %1400 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1402 = torch.prim.GetAttr %arg0["_param_constant148"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1403 = torch.aten.t %1402 : !torch.tensor -> !torch.tensor
%1404 = torch.prim.ListConstruct %int5, %int768 : (!torch.int, !torch.int) -> !torch.list<int>
%1405 = torch.aten.view %1401, %1404 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1406 = torch.aten.mm %1405, %1403 : !torch.tensor, !torch.tensor -> !torch.tensor
%1407 = torch.prim.ListConstruct %int1, %int5, %int50257 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1408 = torch.aten._unsafe_view %1406, %1407 : !torch.tensor, !torch.list<int> -> !torch.tensor
return %1408 : !torch.tensor
}
torch.class_type @__torch__.torch.fx.graph_module._lambda {
torch.attr private "_param_constant0" : !torch.tensor
torch.attr private "_param_constant1" : !torch.tensor
torch.attr private "_param_constant2" : !torch.tensor
torch.attr private "_param_constant3" : !torch.tensor
torch.attr private "_param_constant4" : !torch.tensor
torch.attr private "_param_constant5" : !torch.tensor
torch.attr private "_param_constant6" : !torch.tensor
torch.attr private "_param_constant7" : !torch.tensor
torch.attr private "_param_constant8" : !torch.tensor
torch.attr private "_param_constant9" : !torch.tensor
torch.attr private "_param_constant10" : !torch.tensor
torch.attr private "_param_constant11" : !torch.tensor
torch.attr private "_param_constant12" : !torch.tensor
torch.attr private "_param_constant13" : !torch.tensor
torch.attr private "_param_constant14" : !torch.tensor
torch.attr private "_param_constant15" : !torch.tensor
torch.attr private "_param_constant16" : !torch.tensor
torch.attr private "_param_constant17" : !torch.tensor
torch.attr private "_param_constant18" : !torch.tensor
torch.attr private "_param_constant19" : !torch.tensor
torch.attr private "_param_constant20" : !torch.tensor
torch.attr private "_param_constant21" : !torch.tensor
torch.attr private "_param_constant22" : !torch.tensor
torch.attr private "_param_constant23" : !torch.tensor
torch.attr private "_param_constant24" : !torch.tensor
torch.attr private "_param_constant25" : !torch.tensor
torch.attr private "_param_constant26" : !torch.tensor
torch.attr private "_param_constant27" : !torch.tensor
torch.attr private "_param_constant28" : !torch.tensor
torch.attr private "_param_constant29" : !torch.tensor
torch.attr private "_param_constant30" : !torch.tensor
torch.attr private "_param_constant31" : !torch.tensor
torch.attr private "_param_constant32" : !torch.tensor
torch.attr private "_param_constant33" : !torch.tensor
torch.attr private "_param_constant34" : !torch.tensor
torch.attr private "_param_constant35" : !torch.tensor
torch.attr private "_param_constant36" : !torch.tensor
torch.attr private "_param_constant37" : !torch.tensor
torch.attr private "_param_constant38" : !torch.tensor
torch.attr private "_param_constant39" : !torch.tensor
torch.attr private "_param_constant40" : !torch.tensor
torch.attr private "_param_constant41" : !torch.tensor
torch.attr private "_param_constant42" : !torch.tensor
torch.attr private "_param_constant43" : !torch.tensor
torch.attr private "_param_constant44" : !torch.tensor
torch.attr private "_param_constant45" : !torch.tensor
torch.attr private "_param_constant46" : !torch.tensor
torch.attr private "_param_constant47" : !torch.tensor
torch.attr private "_param_constant48" : !torch.tensor
torch.attr private "_param_constant49" : !torch.tensor
torch.attr private "_param_constant50" : !torch.tensor
torch.attr private "_param_constant51" : !torch.tensor
torch.attr private "_param_constant52" : !torch.tensor
torch.attr private "_param_constant53" : !torch.tensor
torch.attr private "_param_constant54" : !torch.tensor
torch.attr private "_param_constant55" : !torch.tensor
torch.attr private "_param_constant56" : !torch.tensor
torch.attr private "_param_constant57" : !torch.tensor
torch.attr private "_param_constant58" : !torch.tensor
torch.attr private "_param_constant59" : !torch.tensor
torch.attr private "_param_constant60" : !torch.tensor
torch.attr private "_param_constant61" : !torch.tensor
torch.attr private "_param_constant62" : !torch.tensor
torch.attr private "_param_constant63" : !torch.tensor
torch.attr private "_param_constant64" : !torch.tensor
torch.attr private "_param_constant65" : !torch.tensor
torch.attr private "_param_constant66" : !torch.tensor
torch.attr private "_param_constant67" : !torch.tensor
torch.attr private "_param_constant68" : !torch.tensor
torch.attr private "_param_constant69" : !torch.tensor
torch.attr private "_param_constant70" : !torch.tensor
torch.attr private "_param_constant71" : !torch.tensor
torch.attr private "_param_constant72" : !torch.tensor
torch.attr private "_param_constant73" : !torch.tensor
torch.attr private "_param_constant74" : !torch.tensor
torch.attr private "_param_constant75" : !torch.tensor
torch.attr private "_param_constant76" : !torch.tensor
torch.attr private "_param_constant77" : !torch.tensor
torch.attr private "_param_constant78" : !torch.tensor
torch.attr private "_param_constant79" : !torch.tensor
torch.attr private "_param_constant80" : !torch.tensor
torch.attr private "_param_constant81" : !torch.tensor
torch.attr private "_param_constant82" : !torch.tensor
torch.attr private "_param_constant83" : !torch.tensor
torch.attr private "_param_constant84" : !torch.tensor
torch.attr private "_param_constant85" : !torch.tensor
torch.attr private "_param_constant86" : !torch.tensor
torch.attr private "_param_constant87" : !torch.tensor
torch.attr private "_param_constant88" : !torch.tensor
torch.attr private "_param_constant89" : !torch.tensor
torch.attr private "_param_constant90" : !torch.tensor
torch.attr private "_param_constant91" : !torch.tensor
torch.attr private "_param_constant92" : !torch.tensor
torch.attr private "_param_constant93" : !torch.tensor
torch.attr private "_param_constant94" : !torch.tensor
torch.attr private "_param_constant95" : !torch.tensor
torch.attr private "_param_constant96" : !torch.tensor
torch.attr private "_param_constant97" : !torch.tensor
torch.attr private "_param_constant98" : !torch.tensor
torch.attr private "_param_constant99" : !torch.tensor
torch.attr private "_param_constant100" : !torch.tensor
torch.attr private "_param_constant101" : !torch.tensor
torch.attr private "_param_constant102" : !torch.tensor
torch.attr private "_param_constant103" : !torch.tensor
torch.attr private "_param_constant104" : !torch.tensor
torch.attr private "_param_constant105" : !torch.tensor
torch.attr private "_param_constant106" : !torch.tensor
torch.attr private "_param_constant107" : !torch.tensor
torch.attr private "_param_constant108" : !torch.tensor
torch.attr private "_param_constant109" : !torch.tensor
torch.attr private "_param_constant110" : !torch.tensor
torch.attr private "_param_constant111" : !torch.tensor
torch.attr private "_param_constant112" : !torch.tensor
torch.attr private "_param_constant113" : !torch.tensor
torch.attr private "_param_constant114" : !torch.tensor
torch.attr private "_param_constant115" : !torch.tensor
torch.attr private "_param_constant116" : !torch.tensor
torch.attr private "_param_constant117" : !torch.tensor
torch.attr private "_param_constant118" : !torch.tensor
torch.attr private "_param_constant119" : !torch.tensor
torch.attr private "_param_constant120" : !torch.tensor
torch.attr private "_param_constant121" : !torch.tensor
torch.attr private "_param_constant122" : !torch.tensor
torch.attr private "_param_constant123" : !torch.tensor
torch.attr private "_param_constant124" : !torch.tensor
torch.attr private "_param_constant125" : !torch.tensor
torch.attr private "_param_constant126" : !torch.tensor
torch.attr private "_param_constant127" : !torch.tensor
torch.attr private "_param_constant128" : !torch.tensor
torch.attr private "_param_constant129" : !torch.tensor
torch.attr private "_param_constant130" : !torch.tensor
torch.attr private "_param_constant131" : !torch.tensor
torch.attr private "_param_constant132" : !torch.tensor
torch.attr private "_param_constant133" : !torch.tensor
torch.attr private "_param_constant134" : !torch.tensor
torch.attr private "_param_constant135" : !torch.tensor
torch.attr private "_param_constant136" : !torch.tensor
torch.attr private "_param_constant137" : !torch.tensor
torch.attr private "_param_constant138" : !torch.tensor
torch.attr private "_param_constant139" : !torch.tensor
torch.attr private "_param_constant140" : !torch.tensor
torch.attr private "_param_constant141" : !torch.tensor
torch.attr private "_param_constant142" : !torch.tensor
torch.attr private "_param_constant143" : !torch.tensor
torch.attr private "_param_constant144" : !torch.tensor
torch.attr private "_param_constant145" : !torch.tensor
torch.attr private "_param_constant146" : !torch.tensor
torch.attr private "_param_constant147" : !torch.tensor
torch.attr private "_param_constant148" : !torch.tensor
torch.attr private "_tensor_constant0" : !torch.tensor
torch.attr private "_tensor_constant1" : !torch.tensor
torch.attr private "_tensor_constant2" : !torch.tensor
torch.attr private "_tensor_constant3" : !torch.tensor
torch.attr private "_tensor_constant4" : !torch.tensor
torch.attr private "_tensor_constant5" : !torch.tensor
torch.attr private "_tensor_constant6" : !torch.tensor
torch.attr private "_tensor_constant7" : !torch.tensor
torch.attr private "_tensor_constant8" : !torch.tensor
torch.attr private "_tensor_constant9" : !torch.tensor
torch.attr private "_tensor_constant10" : !torch.tensor
torch.attr private "_tensor_constant11" : !torch.tensor
torch.attr private "_tensor_constant12" : !torch.tensor
torch.attr private "_tensor_constant13" : !torch.tensor
torch.attr private "_tensor_constant14" : !torch.tensor
torch.attr private "_tensor_constant15" : !torch.tensor
torch.attr private "_tensor_constant16" : !torch.tensor
torch.attr private "_tensor_constant17" : !torch.tensor
torch.attr private "_tensor_constant18" : !torch.tensor
torch.attr private "_tensor_constant19" : !torch.tensor
torch.attr private "_tensor_constant20" : !torch.tensor
torch.attr private "_tensor_constant21" : !torch.tensor
torch.attr private "_tensor_constant22" : !torch.tensor
torch.attr private "_tensor_constant23" : !torch.tensor
torch.attr private "_tensor_constant24" : !torch.tensor
torch.attr private "_tensor_constant25" : !torch.tensor
torch.attr private "_tensor_constant26" : !torch.tensor
torch.attr private "_tensor_constant27" : !torch.tensor
torch.attr private "_tensor_constant28" : !torch.tensor
torch.attr private "_tensor_constant29" : !torch.tensor
torch.attr private "_tensor_constant30" : !torch.tensor
torch.attr private "_tensor_constant31" : !torch.tensor
torch.attr private "_tensor_constant32" : !torch.tensor
torch.attr private "_tensor_constant33" : !torch.tensor
torch.attr private "_tensor_constant34" : !torch.tensor
torch.attr private "_tensor_constant35" : !torch.tensor
torch.attr private "training" : !torch.bool
torch.attr private "_is_full_backward_hook" : !torch.optional<bool>
torch.attr private "_code" : !torch.str
torch.method private "__code_getter", @__torch__.torch.fx.graph_module._lambda.__code_getter
torch.method "forward", @__torch__.torch.fx.graph_module._lambda.forward
}
%0 = torch.tensor.literal(dense_resource<__elided__> : tensor<50257x768xf32>) : !torch.tensor<[50257,768],f32>
%1 = torch.tensor.literal(dense_resource<__elided__> : tensor<1024x768xf32>) : !torch.tensor<[1024,768],f32>
%2 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%3 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%4 = torch.tensor.literal(dense_resource<__elided__> : tensor<2304xf32>) : !torch.tensor<[2304],f32>
%5 = torch.tensor.literal(dense_resource<__elided__> : tensor<768x2304xf32>) : !torch.tensor<[768,2304],f32>
%6 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%7 = torch.tensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.tensor<[768,768],f32>
%8 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%9 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%10 = torch.tensor.literal(dense_resource<__elided__> : tensor<3072xf32>) : !torch.tensor<[3072],f32>
%11 = torch.tensor.literal(dense_resource<__elided__> : tensor<768x3072xf32>) : !torch.tensor<[768,3072],f32>
%12 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%13 = torch.tensor.literal(dense_resource<__elided__> : tensor<3072x768xf32>) : !torch.tensor<[3072,768],f32>
%14 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%15 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%16 = torch.tensor.literal(dense_resource<__elided__> : tensor<2304xf32>) : !torch.tensor<[2304],f32>
%17 = torch.tensor.literal(dense_resource<__elided__> : tensor<768x2304xf32>) : !torch.tensor<[768,2304],f32>
%18 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%19 = torch.tensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.tensor<[768,768],f32>
%20 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%21 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%22 = torch.tensor.literal(dense_resource<__elided__> : tensor<3072xf32>) : !torch.tensor<[3072],f32>
%23 = torch.tensor.literal(dense_resource<__elided__> : tensor<768x3072xf32>) : !torch.tensor<[768,3072],f32>
%24 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%25 = torch.tensor.literal(dense_resource<__elided__> : tensor<3072x768xf32>) : !torch.tensor<[3072,768],f32>
%26 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%27 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%28 = torch.tensor.literal(dense_resource<__elided__> : tensor<2304xf32>) : !torch.tensor<[2304],f32>
%29 = torch.tensor.literal(dense_resource<__elided__> : tensor<768x2304xf32>) : !torch.tensor<[768,2304],f32>
%30 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%31 = torch.tensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.tensor<[768,768],f32>
%32 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%33 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%34 = torch.tensor.literal(dense_resource<__elided__> : tensor<3072xf32>) : !torch.tensor<[3072],f32>
%35 = torch.tensor.literal(dense_resource<__elided__> : tensor<768x3072xf32>) : !torch.tensor<[768,3072],f32>
%36 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%37 = torch.tensor.literal(dense_resource<__elided__> : tensor<3072x768xf32>) : !torch.tensor<[3072,768],f32>
%38 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%39 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%40 = torch.tensor.literal(dense_resource<__elided__> : tensor<2304xf32>) : !torch.tensor<[2304],f32>
%41 = torch.tensor.literal(dense_resource<__elided__> : tensor<768x2304xf32>) : !torch.tensor<[768,2304],f32>
%42 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%43 = torch.tensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.tensor<[768,768],f32>
%44 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%45 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%46 = torch.tensor.literal(dense_resource<__elided__> : tensor<3072xf32>) : !torch.tensor<[3072],f32>
%47 = torch.tensor.literal(dense_resource<__elided__> : tensor<768x3072xf32>) : !torch.tensor<[768,3072],f32>
%48 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%49 = torch.tensor.literal(dense_resource<__elided__> : tensor<3072x768xf32>) : !torch.tensor<[3072,768],f32>
%50 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%51 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%52 = torch.tensor.literal(dense_resource<__elided__> : tensor<2304xf32>) : !torch.tensor<[2304],f32>
%53 = torch.tensor.literal(dense_resource<__elided__> : tensor<768x2304xf32>) : !torch.tensor<[768,2304],f32>
%54 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%55 = torch.tensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.tensor<[768,768],f32>
%56 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%57 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%58 = torch.tensor.literal(dense_resource<__elided__> : tensor<3072xf32>) : !torch.tensor<[3072],f32>
%59 = torch.tensor.literal(dense_resource<__elided__> : tensor<768x3072xf32>) : !torch.tensor<[768,3072],f32>
%60 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%61 = torch.tensor.literal(dense_resource<__elided__> : tensor<3072x768xf32>) : !torch.tensor<[3072,768],f32>
%62 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%63 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%64 = torch.tensor.literal(dense_resource<__elided__> : tensor<2304xf32>) : !torch.tensor<[2304],f32>
%65 = torch.tensor.literal(dense_resource<__elided__> : tensor<768x2304xf32>) : !torch.tensor<[768,2304],f32>
%66 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%67 = torch.tensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.tensor<[768,768],f32>
%68 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%69 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%70 = torch.tensor.literal(dense_resource<__elided__> : tensor<3072xf32>) : !torch.tensor<[3072],f32>
%71 = torch.tensor.literal(dense_resource<__elided__> : tensor<768x3072xf32>) : !torch.tensor<[768,3072],f32>
%72 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%73 = torch.tensor.literal(dense_resource<__elided__> : tensor<3072x768xf32>) : !torch.tensor<[3072,768],f32>
%74 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%75 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%76 = torch.tensor.literal(dense_resource<__elided__> : tensor<2304xf32>) : !torch.tensor<[2304],f32>
%77 = torch.tensor.literal(dense_resource<__elided__> : tensor<768x2304xf32>) : !torch.tensor<[768,2304],f32>
%78 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%79 = torch.tensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.tensor<[768,768],f32>
%80 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%81 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%82 = torch.tensor.literal(dense_resource<__elided__> : tensor<3072xf32>) : !torch.tensor<[3072],f32>
%83 = torch.tensor.literal(dense_resource<__elided__> : tensor<768x3072xf32>) : !torch.tensor<[768,3072],f32>
%84 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%85 = torch.tensor.literal(dense_resource<__elided__> : tensor<3072x768xf32>) : !torch.tensor<[3072,768],f32>
%86 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%87 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%88 = torch.tensor.literal(dense_resource<__elided__> : tensor<2304xf32>) : !torch.tensor<[2304],f32>
%89 = torch.tensor.literal(dense_resource<__elided__> : tensor<768x2304xf32>) : !torch.tensor<[768,2304],f32>
%90 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%91 = torch.tensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.tensor<[768,768],f32>
%92 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%93 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%94 = torch.tensor.literal(dense_resource<__elided__> : tensor<3072xf32>) : !torch.tensor<[3072],f32>
%95 = torch.tensor.literal(dense_resource<__elided__> : tensor<768x3072xf32>) : !torch.tensor<[768,3072],f32>
%96 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%97 = torch.tensor.literal(dense_resource<__elided__> : tensor<3072x768xf32>) : !torch.tensor<[3072,768],f32>
%98 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%99 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%100 = torch.tensor.literal(dense_resource<__elided__> : tensor<2304xf32>) : !torch.tensor<[2304],f32>
%101 = torch.tensor.literal(dense_resource<__elided__> : tensor<768x2304xf32>) : !torch.tensor<[768,2304],f32>
%102 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%103 = torch.tensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.tensor<[768,768],f32>
%104 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%105 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%106 = torch.tensor.literal(dense_resource<__elided__> : tensor<3072xf32>) : !torch.tensor<[3072],f32>
%107 = torch.tensor.literal(dense_resource<__elided__> : tensor<768x3072xf32>) : !torch.tensor<[768,3072],f32>
%108 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%109 = torch.tensor.literal(dense_resource<__elided__> : tensor<3072x768xf32>) : !torch.tensor<[3072,768],f32>
%110 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%111 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%112 = torch.tensor.literal(dense_resource<__elided__> : tensor<2304xf32>) : !torch.tensor<[2304],f32>
%113 = torch.tensor.literal(dense_resource<__elided__> : tensor<768x2304xf32>) : !torch.tensor<[768,2304],f32>
%114 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%115 = torch.tensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.tensor<[768,768],f32>
%116 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%117 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%118 = torch.tensor.literal(dense_resource<__elided__> : tensor<3072xf32>) : !torch.tensor<[3072],f32>
%119 = torch.tensor.literal(dense_resource<__elided__> : tensor<768x3072xf32>) : !torch.tensor<[768,3072],f32>
%120 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%121 = torch.tensor.literal(dense_resource<__elided__> : tensor<3072x768xf32>) : !torch.tensor<[3072,768],f32>
%122 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%123 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%124 = torch.tensor.literal(dense_resource<__elided__> : tensor<2304xf32>) : !torch.tensor<[2304],f32>
%125 = torch.tensor.literal(dense_resource<__elided__> : tensor<768x2304xf32>) : !torch.tensor<[768,2304],f32>
%126 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%127 = torch.tensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.tensor<[768,768],f32>
%128 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%129 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%130 = torch.tensor.literal(dense_resource<__elided__> : tensor<3072xf32>) : !torch.tensor<[3072],f32>
%131 = torch.tensor.literal(dense_resource<__elided__> : tensor<768x3072xf32>) : !torch.tensor<[768,3072],f32>
%132 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%133 = torch.tensor.literal(dense_resource<__elided__> : tensor<3072x768xf32>) : !torch.tensor<[3072,768],f32>
%134 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%135 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%136 = torch.tensor.literal(dense_resource<__elided__> : tensor<2304xf32>) : !torch.tensor<[2304],f32>
%137 = torch.tensor.literal(dense_resource<__elided__> : tensor<768x2304xf32>) : !torch.tensor<[768,2304],f32>
%138 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%139 = torch.tensor.literal(dense_resource<__elided__> : tensor<768x768xf32>) : !torch.tensor<[768,768],f32>
%140 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%141 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%142 = torch.tensor.literal(dense_resource<__elided__> : tensor<3072xf32>) : !torch.tensor<[3072],f32>
%143 = torch.tensor.literal(dense_resource<__elided__> : tensor<768x3072xf32>) : !torch.tensor<[768,3072],f32>
%144 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%145 = torch.tensor.literal(dense_resource<__elided__> : tensor<3072x768xf32>) : !torch.tensor<[3072,768],f32>
%146 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%147 = torch.tensor.literal(dense_resource<__elided__> : tensor<768xf32>) : !torch.tensor<[768],f32>
%148 = torch.tensor.literal(dense_resource<__elided__> : tensor<50257x768xf32>) : !torch.tensor<[50257,768],f32>
%149 = torch.tensor.literal(dense<8.000000e+00> : tensor<f32>) : !torch.tensor<[],f32>
%150 = torch.tensor.literal(dense_resource<__elided__> : tensor<1x1x1024x1024xui8>) : !torch.tensor<[1,1,1024,1024],ui8>
%151 = torch.tensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.tensor<[],f32>
%152 = torch.tensor.literal(dense<8.000000e+00> : tensor<f32>) : !torch.tensor<[],f32>
%153 = torch.tensor.literal(dense_resource<__elided__> : tensor<1x1x1024x1024xui8>) : !torch.tensor<[1,1,1024,1024],ui8>
%154 = torch.tensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.tensor<[],f32>
%155 = torch.tensor.literal(dense<8.000000e+00> : tensor<f32>) : !torch.tensor<[],f32>
%156 = torch.tensor.literal(dense_resource<__elided__> : tensor<1x1x1024x1024xui8>) : !torch.tensor<[1,1,1024,1024],ui8>
%157 = torch.tensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.tensor<[],f32>
%158 = torch.tensor.literal(dense<8.000000e+00> : tensor<f32>) : !torch.tensor<[],f32>
%159 = torch.tensor.literal(dense_resource<__elided__> : tensor<1x1x1024x1024xui8>) : !torch.tensor<[1,1,1024,1024],ui8>
%160 = torch.tensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.tensor<[],f32>
%161 = torch.tensor.literal(dense<8.000000e+00> : tensor<f32>) : !torch.tensor<[],f32>
%162 = torch.tensor.literal(dense_resource<__elided__> : tensor<1x1x1024x1024xui8>) : !torch.tensor<[1,1,1024,1024],ui8>
%163 = torch.tensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.tensor<[],f32>
%164 = torch.tensor.literal(dense<8.000000e+00> : tensor<f32>) : !torch.tensor<[],f32>
%165 = torch.tensor.literal(dense_resource<__elided__> : tensor<1x1x1024x1024xui8>) : !torch.tensor<[1,1,1024,1024],ui8>
%166 = torch.tensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.tensor<[],f32>
%167 = torch.tensor.literal(dense<8.000000e+00> : tensor<f32>) : !torch.tensor<[],f32>
%168 = torch.tensor.literal(dense_resource<__elided__> : tensor<1x1x1024x1024xui8>) : !torch.tensor<[1,1,1024,1024],ui8>
%169 = torch.tensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.tensor<[],f32>
%170 = torch.tensor.literal(dense<8.000000e+00> : tensor<f32>) : !torch.tensor<[],f32>
%171 = torch.tensor.literal(dense_resource<__elided__> : tensor<1x1x1024x1024xui8>) : !torch.tensor<[1,1,1024,1024],ui8>
%172 = torch.tensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.tensor<[],f32>
%173 = torch.tensor.literal(dense<8.000000e+00> : tensor<f32>) : !torch.tensor<[],f32>
%174 = torch.tensor.literal(dense_resource<__elided__> : tensor<1x1x1024x1024xui8>) : !torch.tensor<[1,1,1024,1024],ui8>
%175 = torch.tensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.tensor<[],f32>
%176 = torch.tensor.literal(dense<8.000000e+00> : tensor<f32>) : !torch.tensor<[],f32>
%177 = torch.tensor.literal(dense_resource<__elided__> : tensor<1x1x1024x1024xui8>) : !torch.tensor<[1,1,1024,1024],ui8>
%178 = torch.tensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.tensor<[],f32>
%179 = torch.tensor.literal(dense<8.000000e+00> : tensor<f32>) : !torch.tensor<[],f32>
%180 = torch.tensor.literal(dense_resource<__elided__> : tensor<1x1x1024x1024xui8>) : !torch.tensor<[1,1,1024,1024],ui8>
%181 = torch.tensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.tensor<[],f32>
%182 = torch.tensor.literal(dense<8.000000e+00> : tensor<f32>) : !torch.tensor<[],f32>
%183 = torch.tensor.literal(dense_resource<__elided__> : tensor<1x1x1024x1024xui8>) : !torch.tensor<[1,1,1024,1024],ui8>
%184 = torch.tensor.literal(dense<-3.40282347E+38> : tensor<f32>) : !torch.tensor<[],f32>
%true = torch.constant.bool true
%none = torch.constant.none
%str = torch.constant.str "\0A\0A\0Adef forward(self, arg0_1):\0A view = torch.ops.aten.view(arg0_1, [-1, 5]); arg0_1 = None\0A arange = torch.ops.aten.arange(0, 5, dtype = torch.int64, device = device(type='cpu'), pin_memory = False)\0A unsqueeze = torch.ops.aten.unsqueeze(arange, 0); arange = None\0A view_1 = torch.ops.aten.view(unsqueeze, [-1, 5]); unsqueeze = None\0A _param_constant0 = self._param_constant0\0A embedding = torch.ops.aten.embedding(_param_constant0, view); _param_constant0 = view = None\0A _param_constant1 = self._param_constant1\0A embedding_1 = torch.ops.aten.embedding(_param_constant1, view_1); _param_constant1 = view_1 = None\0A add = torch.ops.aten.add(embedding, embedding_1); embedding = embedding_1 = None\0A _param_constant2 = self._param_constant2\0A _param_constant3 = self._param_constant3\0A native_layer_norm = torch.ops.aten.native_layer_norm(add, [768], _param_constant2, _param_constant3, 1e-05); _param_constant2 = _param_constant3 = None\0A getitem = native_layer_norm[0]\0A getitem_1 = native_layer_norm[1]\0A getitem_2 = native_layer_norm[2]; native_layer_norm = None\0A view_2 = torch.ops.aten.view(getitem, [-1, 768]); getitem = None\0A _param_constant4 = self._param_constant4\0A _param_constant5 = self._param_constant5\0A addmm = torch.ops.aten.addmm(_param_constant4, view_2, _param_constant5); _param_constant4 = view_2 = _param_constant5 = None\0A view_3 = torch.ops.aten.view(addmm, [1, 5, 2304]); addmm = None\0A slice_1 = torch.ops.aten.slice(view_3, 2, 0, 768)\0A slice_2 = torch.ops.aten.slice(view_3, 2, 768, 1536)\0A slice_3 = torch.ops.aten.slice(view_3, 2, 1536, 2304); view_3 = None\0A view_4 = torch.ops.aten.view(slice_1, [1, 5, 12, 64]); slice_1 = None\0A permute = torch.ops.aten.permute(view_4, [0, 2, 1, 3]); view_4 = None\0A view_5 = torch.ops.aten.view(slice_2, [1, 5, 12, 64]); slice_2 = None\0A permute_1 = torch.ops.aten.permute(view_5, [0, 2, 1, 3]); view_5 = None\0A view_6 = torch.ops.aten.view(slice_3, [1, 5, 12, 64]); slice_3 = None\0A permute_2 = torch.ops.aten.permute(view_6, [0, 2, 1, 3]); view_6 = None\0A transpose = torch.ops.aten.transpose(permute_1, -1, -2); permute_1 = None\0A expand = torch.ops.aten.expand(permute, [1, 12, 5, 64]); permute = None\0A view_7 = torch.ops.aten.view(expand, [12, 5, 64]); expand = None\0A expand_1 = torch.ops.aten.expand(transpose, [1, 12, 64, 5]); transpose = None\0A view_8 = torch.ops.aten.view(expand_1, [12, 64, 5]); expand_1 = None\0A bmm = torch.ops.aten.bmm(view_7, view_8); view_7 = view_8 = None\0A _unsafe_view = torch.ops.aten._unsafe_view(bmm, [1, 12, 5, 5]); bmm = None\0A _tensor_constant0 = self._tensor_constant0\0A lift_fresh_copy = torch.ops.aten.lift_fresh_copy(_tensor_constant0); _tensor_constant0 = None\0A div = torch.ops.aten.div(_unsafe_view, lift_fresh_copy); _unsafe_view = lift_fresh_copy = None\0A _tensor_constant1 = self._tensor_constant1\0A slice_4 = torch.ops.aten.slice(_tensor_constant1, 0, 0, 9223372036854775807); _tensor_constant1 = None\0A slice_5 = torch.ops.aten.slice(slice_4, 1, 0, 9223372036854775807); slice_4 = None\0A slice_6 = torch.ops.aten.slice(slice_5, 2, 0, 5); slice_5 = None\0A slice_7 = torch.ops.aten.slice(slice_6, 3, 0, 5); slice_6 = None\0A _to_copy = torch.ops.aten._to_copy(slice_7, dtype = torch.bool); slice_7 = None\0A _tensor_constant2 = self._tensor_constant2\0A lift_fresh_copy_1 = torch.ops.aten.lift_fresh_copy(_tensor_constant2); _tensor_constant2 = None\0A where = torch.ops.aten.where(_to_copy, div, lift_fresh_copy_1); _to_copy = div = lift_fresh_copy_1 = None\0A _softmax = torch.ops.aten._softmax(where, -1, False); where = None\0A detach = torch.ops.aten.detach(_softmax)\0A expand_2 = torch.ops.aten.expand(_softmax, [1, 12, 5, 5]); _softmax = None\0A view_9 = torch.ops.aten.view(expand_2, [12, 5, 5]); expand_2 = None\0A expand_3 = torch.ops.aten.expand(permute_2, [1, 12, 5, 64]); permute_2 = None\0A view_10 = torch.ops.aten.view(expand_3, [12, 5, 64]); expand_3 = None\0A bmm_1 = torch.ops.aten.bmm(view_9, view_10); view_9 = view_10 = None\0A _unsafe_view_1 = torch.ops.aten._unsafe_view(bmm_1, [1, 12, 5, 64]); bmm_1 = None\0A permute_3 = torch.ops.aten.permute(_unsafe_view_1, [0, 2, 1, 3]); _unsafe_view_1 = None\0A clone = torch.ops.aten.clone(permute_3, memory_format = torch.contiguous_format); permute_3 = None\0A view_11 = torch.ops.aten.view(clone, [1, 5, 768]); clone = None\0A view_12 = torch.ops.aten.view(view_11, [-1, 768]); view_11 = None\0A _param_constant6 = self._param_constant6\0A _param_constant7 = self._param_constant7\0A addmm_1 = torch.ops.aten.addmm(_param_constant6, view_12, _param_constant7); _param_constant6 = view_12 = _param_constant7 = None\0A view_13 = torch.ops.aten.view(addmm_1, [1, 5, 768]); addmm_1 = None\0A add_1 = torch.ops.aten.add(view_13, add); view_13 = add = None\0A _param_constant8 = self._param_constant8\0A _param_constant9 = self._param_constant9\0A native_layer_norm_1 = torch.ops.aten.native_layer_norm(add_1, [768], _param_constant8, _param_constant9, 1e-05); _param_constant8 = _param_constant9 = None\0A getitem_3 = native_layer_norm_1[0]\0A getitem_4 = native_layer_norm_1[1]\0A getitem_5 = native_layer_norm_1[2]; native_layer_norm_1 = None\0A view_14 = torch.ops.aten.view(getitem_3, [-1, 768]); getitem_3 = None\0A _param_constant10 = self._param_constant10\0A _param_constant11 = self._param_constant11\0A addmm_2 = torch.ops.aten.addmm(_param_constant10, view_14, _param_constant11); _param_constant10 = view_14 = _param_constant11 = None\0A view_15 = torch.ops.aten.view(addmm_2, [1, 5, 3072]); addmm_2 = None\0A mul = torch.ops.aten.mul(view_15, 0.5)\0A pow_1 = torch.ops.aten.pow(view_15, 3.0)\0A mul_1 = torch.ops.aten.mul(pow_1, 0.044715); pow_1 = None\0A add_2 = torch.ops.aten.add(view_15, mul_1); view_15 = mul_1 = None\0A mul_2 = torch.ops.aten.mul(add_2, 0.7978845608028654); add_2 = None\0A tanh = torch.ops.aten.tanh(mul_2); mul_2 = None\0A detach_1 = torch.ops.aten.detach(tanh)\0A add_3 = torch.ops.aten.add(tanh, 1.0); tanh = None\0A mul_3 = torch.ops.aten.mul(mul, add_3); mul = add_3 = None\0A view_16 = torch.ops.aten.view(mul_3, [-1, 3072]); mul_3 = None\0A _param_constant12 = self._param_constant12\0A _param_constant13 = self._param_constant13\0A addmm_3 = torch.ops.aten.addmm(_param_constant12, view_16, _param_constant13); _param_constant12 = view_16 = _param_constant13 = None\0A view_17 = torch.ops.aten.view(addmm_3, [1, 5, 768]); addmm_3 = None\0A add_4 = torch.ops.aten.add(add_1, view_17); add_1 = view_17 = None\0A _param_constant14 = self._param_constant14\0A _param_constant15 = self._param_constant15\0A native_layer_norm_2 = torch.ops.aten.native_layer_norm(add_4, [768], _param_constant14, _param_constant15, 1e-05); _param_constant14 = _param_constant15 = None\0A getitem_6 = native_layer_norm_2[0]\0A getitem_7 = native_layer_norm_2[1]\0A getitem_8 = native_layer_norm_2[2]; native_layer_norm_2 = None\0A view_18 = torch.ops.aten.view(getitem_6, [-1, 768]); getitem_6 = None\0A _param_constant16 = self._param_constant16\0A _param_constant17 = self._param_constant17\0A addmm_4 = torch.ops.aten.addmm(_param_constant16, view_18, _param_constant17); _param_constant16 = view_18 = _param_constant17 = None\0A view_19 = torch.ops.aten.view(addmm_4, [1, 5, 2304]); addmm_4 = None\0A slice_8 = torch.ops.aten.slice(view_19, 2, 0, 768)\0A slice_9 = torch.ops.aten.slice(view_19, 2, 768, 1536)\0A slice_10 = torch.ops.aten.slice(view_19, 2, 1536, 2304); view_19 = None\0A view_20 = torch.ops.aten.view(slice_8, [1, 5, 12, 64]); slice_8 = None\0A permute_4 = torch.ops.aten.permute(view_20, [0, 2, 1, 3]); view_20 = None\0A view_21 = torch.ops.aten.view(slice_9, [1, 5, 12, 64]); slice_9 = None\0A permute_5 = torch.ops.aten.permute(view_21, [0, 2, 1, 3]); view_21 = None\0A view_22 = torch.ops.aten.view(slice_10, [1, 5, 12, 64]); slice_10 = None\0A permute_6 = torch.ops.aten.permute(view_22, [0, 2, 1, 3]); view_22 = None\0A transpose_1 = torch.ops.aten.transpose(permute_5, -1, -2); permute_5 = None\0A expand_4 = torch.ops.aten.expand(permute_4, [1, 12, 5, 64]); permute_4 = None\0A view_23 = torch.ops.aten.view(expand_4, [12, 5, 64]); expand_4 = None\0A expand_5 = torch.ops.aten.expand(transpose_1, [1, 12, 64, 5]); transpose_1 = None\0A view_24 = torch.ops.aten.view(expand_5, [12, 64, 5]); expand_5 = None\0A bmm_2 = torch.ops.aten.bmm(view_23, view_24); view_23 = view_24 = None\0A _unsafe_view_2 = torch.ops.aten._unsafe_view(bmm_2, [1, 12, 5, 5]); bmm_2 = None\0A _tensor_constant3 = self._tensor_constant3\0A lift_fresh_copy_2 = torch.ops.aten.lift_fresh_copy(_tensor_constant3); _tensor_constant3 = None\0A div_1 = torch.ops.aten.div(_unsafe_view_2, lift_fresh_copy_2); _unsafe_view_2 = lift_fresh_copy_2 = None\0A _tensor_constant4 = self._tensor_constant4\0A slice_11 = torch.ops.aten.slice(_tensor_constant4, 0, 0, 9223372036854775807); _tensor_constant4 = None\0A slice_12 = torch.ops.aten.slice(slice_11, 1, 0, 9223372036854775807); slice_11 = None\0A slice_13 = torch.ops.aten.slice(slice_12, 2, 0, 5); slice_12 = None\0A slice_14 = torch.ops.aten.slice(slice_13, 3, 0, 5); slice_13 = None\0A _to_copy_1 = torch.ops.aten._to_copy(slice_14, dtype = torch.bool); slice_14 = None\0A _tensor_constant5 = self._tensor_constant5\0A lift_fresh_copy_3 = torch.ops.aten.lift_fresh_copy(_tensor_constant5); _tensor_constant5 = None\0A where_1 = torch.ops.aten.where(_to_copy_1, div_1, lift_fresh_copy_3); _to_copy_1 = div_1 = lift_fresh_copy_3 = None\0A _softmax_1 = torch.ops.aten._softmax(where_1, -1, False); where_1 = None\0A detach_2 = torch.ops.aten.detach(_softmax_1)\0A expand_6 = torch.ops.aten.expand(_softmax_1, [1, 12, 5, 5]); _softmax_1 = None\0A view_25 = torch.ops.aten.view(expand_6, [12, 5, 5]); expand_6 = None\0A expand_7 = torch.ops.aten.expand(permute_6, [1, 12, 5, 64]); permute_6 = None\0A view_26 = torch.ops.aten.view(expand_7, [12, 5, 64]); expand_7 = None\0A bmm_3 = torch.ops.aten.bmm(view_25, view_26); view_25 = view_26 = None\0A _unsafe_view_3 = torch.ops.aten._unsafe_view(bmm_3, [1, 12, 5, 64]); bmm_3 = None\0A permute_7 = torch.ops.aten.permute(_unsafe_view_3, [0, 2, 1, 3]); _unsafe_view_3 = None\0A clone_1 = torch.ops.aten.clone(permute_7, memory_format = torch.contiguous_format); permute_7 = None\0A view_27 = torch.ops.aten.view(clone_1, [1, 5, 768]); clone_1 = None\0A view_28 = torch.ops.aten.view(view_27, [-1, 768]); view_27 = None\0A _param_constant18 = self._param_constant18\0A _param_constant19 = self._param_constant19\0A addmm_5 = torch.ops.aten.addmm(_param_constant18, view_28, _param_constant19); _param_constant18 = view_28 = _param_constant19 = None\0A view_29 = torch.ops.aten.view(addmm_5, [1, 5, 768]); addmm_5 = None\0A add_5 = torch.ops.aten.add(view_29, add_4); view_29 = add_4 = None\0A _param_constant20 = self._param_constant20\0A _param_constant21 = self._param_constant21\0A native_layer_norm_3 = torch.ops.aten.native_layer_norm(add_5, [768], _param_constant20, _param_constant21, 1e-05); _param_constant20 = _param_constant21 = None\0A getitem_9 = native_layer_norm_3[0]\0A getitem_10 = native_layer_norm_3[1]\0A getitem_11 = native_layer_norm_3[2]; native_layer_norm_3 = None\0A view_30 = torch.ops.aten.view(getitem_9, [-1, 768]); getitem_9 = None\0A _param_constant22 = self._param_constant22\0A _param_constant23 = self._param_constant23\0A addmm_6 = torch.ops.aten.addmm(_param_constant22, view_30, _param_constant23); _param_constant22 = view_30 = _param_constant23 = None\0A view_31 = torch.ops.aten.view(addmm_6, [1, 5, 3072]); addmm_6 = None\0A mul_4 = torch.ops.aten.mul(view_31, 0.5)\0A pow_2 = torch.ops.aten.pow(view_31, 3.0)\0A mul_5 = torch.ops.aten.mul(pow_2, 0.044715); pow_2 = None\0A add_6 = torch.ops.aten.add(view_31, mul_5); view_31 = mul_5 = None\0A mul_6 = torch.ops.aten.mul(add_6, 0.7978845608028654); add_6 = None\0A tanh_1 = torch.ops.aten.tanh(mul_6); mul_6 = None\0A detach_3 = torch.ops.aten.detach(tanh_1)\0A add_7 = torch.ops.aten.add(tanh_1, 1.0); tanh_1 = None\0A mul_7 = torch.ops.aten.mul(mul_4, add_7); mul_4 = add_7 = None\0A view_32 = torch.ops.aten.view(mul_7, [-1, 3072]); mul_7 = None\0A _param_constant24 = self._param_constant24\0A _param_constant25 = self._param_constant25\0A addmm_7 = torch.ops.aten.addmm(_param_constant24, view_32, _param_constant25); _param_constant24 = view_32 = _param_constant25 = None\0A view_33 = torch.ops.aten.view(addmm_7, [1, 5, 768]); addmm_7 = None\0A add_8 = torch.ops.aten.add(add_5, view_33); add_5 = view_33 = None\0A _param_constant26 = self._param_constant26\0A _param_constant27 = self._param_constant27\0A native_layer_norm_4 = torch.ops.aten.native_layer_norm(add_8, [768], _param_constant26, _param_constant27, 1e-05); _param_constant26 = _param_constant27 = None\0A getitem_12 = native_layer_norm_4[0]\0A getitem_13 = native_layer_norm_4[1]\0A getitem_14 = native_layer_norm_4[2]; native_layer_norm_4 = None\0A view_34 = torch.ops.aten.view(getitem_12, [-1, 768]); getitem_12 = None\0A _param_constant28 = self._param_constant28\0A _param_constant29 = self._param_constant29\0A addmm_8 = torch.ops.aten.addmm(_param_constant28, view_34, _param_constant29); _param_constant28 = view_34 = _param_constant29 = None\0A view_35 = torch.ops.aten.view(addmm_8, [1, 5, 2304]); addmm_8 = None\0A slice_15 = torch.ops.aten.slice(view_35, 2, 0, 768)\0A slice_16 = torch.ops.aten.slice(view_35, 2, 768, 1536)\0A slice_17 = torch.ops.aten.slice(view_35, 2, 1536, 2304); view_35 = None\0A view_36 = torch.ops.aten.view(slice_15, [1, 5, 12, 64]); slice_15 = None\0A permute_8 = torch.ops.aten.permute(view_36, [0, 2, 1, 3]); view_36 = None\0A view_37 = torch.ops.aten.view(slice_16, [1, 5, 12, 64]); slice_16 = None\0A permute_9 = torch.ops.aten.permute(view_37, [0, 2, 1, 3]); view_37 = None\0A view_38 = torch.ops.aten.view(slice_17, [1, 5, 12, 64]); slice_17 = None\0A permute_10 = torch.ops.aten.permute(view_38, [0, 2, 1, 3]); view_38 = None\0A transpose_2 = torch.ops.aten.transpose(permute_9, -1, -2); permute_9 = None\0A expand_8 = torch.ops.aten.expand(permute_8, [1, 12, 5, 64]); permute_8 = None\0A view_39 = torch.ops.aten.view(expand_8, [12, 5, 64]); expand_8 = None\0A expand_9 = torch.ops.aten.expand(transpose_2, [1, 12, 64, 5]); transpose_2 = None\0A view_40 = torch.ops.aten.view(expand_9, [12, 64, 5]); expand_9 = None\0A bmm_4 = torch.ops.aten.bmm(view_39, view_40); view_39 = view_40 = None\0A _unsafe_view_4 = torch.ops.aten._unsafe_view(bmm_4, [1, 12, 5, 5]); bmm_4 = None\0A _tensor_constant6 = self._tensor_constant6\0A lift_fresh_copy_4 = torch.ops.aten.lift_fresh_copy(_tensor_constant6); _tensor_constant6 = None\0A div_2 = torch.ops.aten.div(_unsafe_view_4, lift_fresh_copy_4); _unsafe_view_4 = lift_fresh_copy_4 = None\0A _tensor_constant7 = self._tensor_constant7\0A slice_18 = torch.ops.aten.slice(_tensor_constant7, 0, 0, 9223372036854775807); _tensor_constant7 = None\0A slice_19 = torch.ops.aten.slice(slice_18, 1, 0, 9223372036854775807); slice_18 = None\0A slice_20 = torch.ops.aten.slice(slice_19, 2, 0, 5); slice_19 = None\0A slice_21 = torch.ops.aten.slice(slice_20, 3, 0, 5); slice_20 = None\0A _to_copy_2 = torch.ops.aten._to_copy(slice_21, dtype = torch.bool); slice_21 = None\0A _tensor_constant8 = self._tensor_constant8\0A lift_fresh_copy_5 = torch.ops.aten.lift_fresh_copy(_tensor_constant8); _tensor_constant8 = None\0A where_2 = torch.ops.aten.where(_to_copy_2, div_2, lift_fresh_copy_5); _to_copy_2 = div_2 = lift_fresh_copy_5 = None\0A _softmax_2 = torch.ops.aten._softmax(where_2, -1, False); where_2 = None\0A detach_4 = torch.ops.aten.detach(_softmax_2)\0A expand_10 = torch.ops.aten.expand(_softmax_2, [1, 12, 5, 5]); _softmax_2 = None\0A view_41 = torch.ops.aten.view(expand_10, [12, 5, 5]); expand_10 = None\0A expand_11 = torch.ops.aten.expand(permute_10, [1, 12, 5, 64]); permute_10 = None\0A view_42 = torch.ops.aten.view(expand_11, [12, 5, 64]); expand_11 = None\0A bmm_5 = torch.ops.aten.bmm(view_41, view_42); view_41 = view_42 = None\0A _unsafe_view_5 = torch.ops.aten._unsafe_view(bmm_5, [1, 12, 5, 64]); bmm_5 = None\0A permute_11 = torch.ops.aten.permute(_unsafe_view_5, [0, 2, 1, 3]); _unsafe_view_5 = None\0A clone_2 = torch.ops.aten.clone(permute_11, memory_format = torch.contiguous_format); permute_11 = None\0A view_43 = torch.ops.aten.view(clone_2, [1, 5, 768]); clone_2 = None\0A view_44 = torch.ops.aten.view(view_43, [-1, 768]); view_43 = None\0A _param_constant30 = self._param_constant30\0A _param_constant31 = self._param_constant31\0A addmm_9 = torch.ops.aten.addmm(_param_constant30, view_44, _param_constant31); _param_constant30 = view_44 = _param_constant31 = None\0A view_45 = torch.ops.aten.view(addmm_9, [1, 5, 768]); addmm_9 = None\0A add_9 = torch.ops.aten.add(view_45, add_8); view_45 = add_8 = None\0A _param_constant32 = self._param_constant32\0A _param_constant33 = self._param_constant33\0A native_layer_norm_5 = torch.ops.aten.native_layer_norm(add_9, [768], _param_constant32, _param_constant33, 1e-05); _param_constant32 = _param_constant33 = None\0A getitem_15 = native_layer_norm_5[0]\0A getitem_16 = native_layer_norm_5[1]\0A getitem_17 = native_layer_norm_5[2]; native_layer_norm_5 = None\0A view_46 = torch.ops.aten.view(getitem_15, [-1, 768]); getitem_15 = None\0A _param_constant34 = self._param_constant34\0A _param_constant35 = self._param_constant35\0A addmm_10 = torch.ops.aten.addmm(_param_constant34, view_46, _param_constant35); _param_constant34 = view_46 = _param_constant35 = None\0A view_47 = torch.ops.aten.view(addmm_10, [1, 5, 3072]); addmm_10 = None\0A mul_8 = torch.ops.aten.mul(view_47, 0.5)\0A pow_3 = torch.ops.aten.pow(view_47, 3.0)\0A mul_9 = torch.ops.aten.mul(pow_3, 0.044715); pow_3 = None\0A add_10 = torch.ops.aten.add(view_47, mul_9); view_47 = mul_9 = None\0A mul_10 = torch.ops.aten.mul(add_10, 0.7978845608028654); add_10 = None\0A tanh_2 = torch.ops.aten.tanh(mul_10); mul_10 = None\0A detach_5 = torch.ops.aten.detach(tanh_2)\0A add_11 = torch.ops.aten.add(tanh_2, 1.0); tanh_2 = None\0A mul_11 = torch.ops.aten.mul(mul_8, add_11); mul_8 = add_11 = None\0A view_48 = torch.ops.aten.view(mul_11, [-1, 3072]); mul_11 = None\0A _param_constant36 = self._param_constant36\0A _param_constant37 = self._param_constant37\0A addmm_11 = torch.ops.aten.addmm(_param_constant36, view_48, _param_constant37); _param_constant36 = view_48 = _param_constant37 = None\0A view_49 = torch.ops.aten.view(addmm_11, [1, 5, 768]); addmm_11 = None\0A add_12 = torch.ops.aten.add(add_9, view_49); add_9 = view_49 = None\0A _param_constant38 = self._param_constant38\0A _param_constant39 = self._param_constant39\0A native_layer_norm_6 = torch.ops.aten.native_layer_norm(add_12, [768], _param_constant38, _param_constant39, 1e-05); _param_constant38 = _param_constant39 = None\0A getitem_18 = native_layer_norm_6[0]\0A getitem_19 = native_layer_norm_6[1]\0A getitem_20 = native_layer_norm_6[2]; native_layer_norm_6 = None\0A view_50 = torch.ops.aten.view(getitem_18, [-1, 768]); getitem_18 = None\0A _param_constant40 = self._param_constant40\0A _param_constant41 = self._param_constant41\0A addmm_12 = torch.ops.aten.addmm(_param_constant40, view_50, _param_constant41); _param_constant40 = view_50 = _param_constant41 = None\0A view_51 = torch.ops.aten.view(addmm_12, [1, 5, 2304]); addmm_12 = None\0A slice_22 = torch.ops.aten.slice(view_51, 2, 0, 768)\0A slice_23 = torch.ops.aten.slice(view_51, 2, 768, 1536)\0A slice_24 = torch.ops.aten.slice(view_51, 2, 1536, 2304); view_51 = None\0A view_52 = torch.ops.aten.view(slice_22, [1, 5, 12, 64]); slice_22 = None\0A permute_12 = torch.ops.aten.permute(view_52, [0, 2, 1, 3]); view_52 = None\0A view_53 = torch.ops.aten.view(slice_23, [1, 5, 12, 64]); slice_23 = None\0A permute_13 = torch.ops.aten.permute(view_53, [0, 2, 1, 3]); view_53 = None\0A view_54 = torch.ops.aten.view(slice_24, [1, 5, 12, 64]); slice_24 = None\0A permute_14 = torch.ops.aten.permute(view_54, [0, 2, 1, 3]); view_54 = None\0A transpose_3 = torch.ops.aten.transpose(permute_13, -1, -2); permute_13 = None\0A expand_12 = torch.ops.aten.expand(permute_12, [1, 12, 5, 64]); permute_12 = None\0A view_55 = torch.ops.aten.view(expand_12, [12, 5, 64]); expand_12 = None\0A expand_13 = torch.ops.aten.expand(transpose_3, [1, 12, 64, 5]); transpose_3 = None\0A view_56 = torch.ops.aten.view(expand_13, [12, 64, 5]); expand_13 = None\0A bmm_6 = torch.ops.aten.bmm(view_55, view_56); view_55 = view_56 = None\0A _unsafe_view_6 = torch.ops.aten._unsafe_view(bmm_6, [1, 12, 5, 5]); bmm_6 = None\0A _tensor_constant9 = self._tensor_constant9\0A lift_fresh_copy_6 = torch.ops.aten.lift_fresh_copy(_tensor_constant9); _tensor_constant9 = None\0A div_3 = torch.ops.aten.div(_unsafe_view_6, lift_fresh_copy_6); _unsafe_view_6 = lift_fresh_copy_6 = None\0A _tensor_constant10 = self._tensor_constant10\0A slice_25 = torch.ops.aten.slice(_tensor_constant10, 0, 0, 9223372036854775807); _tensor_constant10 = None\0A slice_26 = torch.ops.aten.slice(slice_25, 1, 0, 9223372036854775807); slice_25 = None\0A slice_27 = torch.ops.aten.slice(slice_26, 2, 0, 5); slice_26 = None\0A slice_28 = torch.ops.aten.slice(slice_27, 3, 0, 5); slice_27 = None\0A _to_copy_3 = torch.ops.aten._to_copy(slice_28, dtype = torch.bool); slice_28 = None\0A _tensor_constant11 = self._tensor_constant11\0A lift_fresh_copy_7 = torch.ops.aten.lift_fresh_copy(_tensor_constant11); _tensor_constant11 = None\0A where_3 = torch.ops.aten.where(_to_copy_3, div_3, lift_fresh_copy_7); _to_copy_3 = div_3 = lift_fresh_copy_7 = None\0A _softmax_3 = torch.ops.aten._softmax(where_3, -1, False); where_3 = None\0A detach_6 = torch.ops.aten.detach(_softmax_3)\0A expand_14 = torch.ops.aten.expand(_softmax_3, [1, 12, 5, 5]); _softmax_3 = None\0A view_57 = torch.ops.aten.view(expand_14, [12, 5, 5]); expand_14 = None\0A expand_15 = torch.ops.aten.expand(permute_14, [1, 12, 5, 64]); permute_14 = None\0A view_58 = torch.ops.aten.view(expand_15, [12, 5, 64]); expand_15 = None\0A bmm_7 = torch.ops.aten.bmm(view_57, view_58); view_57 = view_58 = None\0A _unsafe_view_7 = torch.ops.aten._unsafe_view(bmm_7, [1, 12, 5, 64]); bmm_7 = None\0A permute_15 = torch.ops.aten.permute(_unsafe_view_7, [0, 2, 1, 3]); _unsafe_view_7 = None\0A clone_3 = torch.ops.aten.clone(permute_15, memory_format = torch.contiguous_format); permute_15 = None\0A view_59 = torch.ops.aten.view(clone_3, [1, 5, 768]); clone_3 = None\0A view_60 = torch.ops.aten.view(view_59, [-1, 768]); view_59 = None\0A _param_constant42 = self._param_constant42\0A _param_constant43 = self._param_constant43\0A addmm_13 = torch.ops.aten.addmm(_param_constant42, view_60, _param_constant43); _param_constant42 = view_60 = _param_constant43 = None\0A view_61 = torch.ops.aten.view(addmm_13, [1, 5, 768]); addmm_13 = None\0A add_13 = torch.ops.aten.add(view_61, add_12); view_61 = add_12 = None\0A _param_constant44 = self._param_constant44\0A _param_constant45 = self._param_constant45\0A native_layer_norm_7 = torch.ops.aten.native_layer_norm(add_13, [768], _param_constant44, _param_constant45, 1e-05); _param_constant44 = _param_constant45 = None\0A getitem_21 = native_layer_norm_7[0]\0A getitem_22 = native_layer_norm_7[1]\0A getitem_23 = native_layer_norm_7[2]; native_layer_norm_7 = None\0A view_62 = torch.ops.aten.view(getitem_21, [-1, 768]); getitem_21 = None\0A _param_constant46 = self._param_constant46\0A _param_constant47 = self._param_constant47\0A addmm_14 = torch.ops.aten.addmm(_param_constant46, view_62, _param_constant47); _param_constant46 = view_62 = _param_constant47 = None\0A view_63 = torch.ops.aten.view(addmm_14, [1, 5, 3072]); addmm_14 = None\0A mul_12 = torch.ops.aten.mul(view_63, 0.5)\0A pow_4 = torch.ops.aten.pow(view_63, 3.0)\0A mul_13 = torch.ops.aten.mul(pow_4, 0.044715); pow_4 = None\0A add_14 = torch.ops.aten.add(view_63, mul_13); view_63 = mul_13 = None\0A mul_14 = torch.ops.aten.mul(add_14, 0.7978845608028654); add_14 = None\0A tanh_3 = torch.ops.aten.tanh(mul_14); mul_14 = None\0A detach_7 = torch.ops.aten.detach(tanh_3)\0A add_15 = torch.ops.aten.add(tanh_3, 1.0); tanh_3 = None\0A mul_15 = torch.ops.aten.mul(mul_12, add_15); mul_12 = add_15 = None\0A view_64 = torch.ops.aten.view(mul_15, [-1, 3072]); mul_15 = None\0A _param_constant48 = self._param_constant48\0A _param_constant49 = self._param_constant49\0A addmm_15 = torch.ops.aten.addmm(_param_constant48, view_64, _param_constant49); _param_constant48 = view_64 = _param_constant49 = None\0A view_65 = torch.ops.aten.view(addmm_15, [1, 5, 768]); addmm_15 = None\0A add_16 = torch.ops.aten.add(add_13, view_65); add_13 = view_65 = None\0A _param_constant50 = self._param_constant50\0A _param_constant51 = self._param_constant51\0A native_layer_norm_8 = torch.ops.aten.native_layer_norm(add_16, [768], _param_constant50, _param_constant51, 1e-05); _param_constant50 = _param_constant51 = None\0A getitem_24 = native_layer_norm_8[0]\0A getitem_25 = native_layer_norm_8[1]\0A getitem_26 = native_layer_norm_8[2]; native_layer_norm_8 = None\0A view_66 = torch.ops.aten.view(getitem_24, [-1, 768]); getitem_24 = None\0A _param_constant52 = self._param_constant52\0A _param_constant53 = self._param_constant53\0A addmm_16 = torch.ops.aten.addmm(_param_constant52, view_66, _param_constant53); _param_constant52 = view_66 = _param_constant53 = None\0A view_67 = torch.ops.aten.view(addmm_16, [1, 5, 2304]); addmm_16 = None\0A slice_29 = torch.ops.aten.slice(view_67, 2, 0, 768)\0A slice_30 = torch.ops.aten.slice(view_67, 2, 768, 1536)\0A slice_31 = torch.ops.aten.slice(view_67, 2, 1536, 2304); view_67 = None\0A view_68 = torch.ops.aten.view(slice_29, [1, 5, 12, 64]); slice_29 = None\0A permute_16 = torch.ops.aten.permute(view_68, [0, 2, 1, 3]); view_68 = None\0A view_69 = torch.ops.aten.view(slice_30, [1, 5, 12, 64]); slice_30 = None\0A permute_17 = torch.ops.aten.permute(view_69, [0, 2, 1, 3]); view_69 = None\0A view_70 = torch.ops.aten.view(slice_31, [1, 5, 12, 64]); slice_31 = None\0A permute_18 = torch.ops.aten.permute(view_70, [0, 2, 1, 3]); view_70 = None\0A transpose_4 = torch.ops.aten.transpose(permute_17, -1, -2); permute_17 = None\0A expand_16 = torch.ops.aten.expand(permute_16, [1, 12, 5, 64]); permute_16 = None\0A view_71 = torch.ops.aten.view(expand_16, [12, 5, 64]); expand_16 = None\0A expand_17 = torch.ops.aten.expand(transpose_4, [1, 12, 64, 5]); transpose_4 = None\0A view_72 = torch.ops.aten.view(expand_17, [12, 64, 5]); expand_17 = None\0A bmm_8 = torch.ops.aten.bmm(view_71, view_72); view_71 = view_72 = None\0A _unsafe_view_8 = torch.ops.aten._unsafe_view(bmm_8, [1, 12, 5, 5]); bmm_8 = None\0A _tensor_constant12 = self._tensor_constant12\0A lift_fresh_copy_8 = torch.ops.aten.lift_fresh_copy(_tensor_constant12); _tensor_constant12 = None\0A div_4 = torch.ops.aten.div(_unsafe_view_8, lift_fresh_copy_8); _unsafe_view_8 = lift_fresh_copy_8 = None\0A _tensor_constant13 = self._tensor_constant13\0A slice_32 = torch.ops.aten.slice(_tensor_constant13, 0, 0, 9223372036854775807); _tensor_constant13 = None\0A slice_33 = torch.ops.aten.slice(slice_32, 1, 0, 9223372036854775807); slice_32 = None\0A slice_34 = torch.ops.aten.slice(slice_33, 2, 0, 5); slice_33 = None\0A slice_35 = torch.ops.aten.slice(slice_34, 3, 0, 5); slice_34 = None\0A _to_copy_4 = torch.ops.aten._to_copy(slice_35, dtype = torch.bool); slice_35 = None\0A _tensor_constant14 = self._tensor_constant14\0A lift_fresh_copy_9 = torch.ops.aten.lift_fresh_copy(_tensor_constant14); _tensor_constant14 = None\0A where_4 = torch.ops.aten.where(_to_copy_4, div_4, lift_fresh_copy_9); _to_copy_4 = div_4 = lift_fresh_copy_9 = None\0A _softmax_4 = torch.ops.aten._softmax(where_4, -1, False); where_4 = None\0A detach_8 = torch.ops.aten.detach(_softmax_4)\0A expand_18 = torch.ops.aten.expand(_softmax_4, [1, 12, 5, 5]); _softmax_4 = None\0A view_73 = torch.ops.aten.view(expand_18, [12, 5, 5]); expand_18 = None\0A expand_19 = torch.ops.aten.expand(permute_18, [1, 12, 5, 64]); permute_18 = None\0A view_74 = torch.ops.aten.view(expand_19, [12, 5, 64]); expand_19 = None\0A bmm_9 = torch.ops.aten.bmm(view_73, view_74); view_73 = view_74 = None\0A _unsafe_view_9 = torch.ops.aten._unsafe_view(bmm_9, [1, 12, 5, 64]); bmm_9 = None\0A permute_19 = torch.ops.aten.permute(_unsafe_view_9, [0, 2, 1, 3]); _unsafe_view_9 = None\0A clone_4 = torch.ops.aten.clone(permute_19, memory_format = torch.contiguous_format); permute_19 = None\0A view_75 = torch.ops.aten.view(clone_4, [1, 5, 768]); clone_4 = None\0A view_76 = torch.ops.aten.view(view_75, [-1, 768]); view_75 = None\0A _param_constant54 = self._param_constant54\0A _param_constant55 = self._param_constant55\0A addmm_17 = torch.ops.aten.addmm(_param_constant54, view_76, _param_constant55); _param_constant54 = view_76 = _param_constant55 = None\0A view_77 = torch.ops.aten.view(addmm_17, [1, 5, 768]); addmm_17 = None\0A add_17 = torch.ops.aten.add(view_77, add_16); view_77 = add_16 = None\0A _param_constant56 = self._param_constant56\0A _param_constant57 = self._param_constant57\0A native_layer_norm_9 = torch.ops.aten.native_layer_norm(add_17, [768], _param_constant56, _param_constant57, 1e-05); _param_constant56 = _param_constant57 = None\0A getitem_27 = native_layer_norm_9[0]\0A getitem_28 = native_layer_norm_9[1]\0A getitem_29 = native_layer_norm_9[2]; native_layer_norm_9 = None\0A view_78 = torch.ops.aten.view(getitem_27, [-1, 768]); getitem_27 = None\0A _param_constant58 = self._param_constant58\0A _param_constant59 = self._param_constant59\0A addmm_18 = torch.ops.aten.addmm(_param_constant58, view_78, _param_constant59); _param_constant58 = view_78 = _param_constant59 = None\0A view_79 = torch.ops.aten.view(addmm_18, [1, 5, 3072]); addmm_18 = None\0A mul_16 = torch.ops.aten.mul(view_79, 0.5)\0A pow_5 = torch.ops.aten.pow(view_79, 3.0)\0A mul_17 = torch.ops.aten.mul(pow_5, 0.044715); pow_5 = None\0A add_18 = torch.ops.aten.add(view_79, mul_17); view_79 = mul_17 = None\0A mul_18 = torch.ops.aten.mul(add_18, 0.7978845608028654); add_18 = None\0A tanh_4 = torch.ops.aten.tanh(mul_18); mul_18 = None\0A detach_9 = torch.ops.aten.detach(tanh_4)\0A add_19 = torch.ops.aten.add(tanh_4, 1.0); tanh_4 = None\0A mul_19 = torch.ops.aten.mul(mul_16, add_19); mul_16 = add_19 = None\0A view_80 = torch.ops.aten.view(mul_19, [-1, 3072]); mul_19 = None\0A _param_constant60 = self._param_constant60\0A _param_constant61 = self._param_constant61\0A addmm_19 = torch.ops.aten.addmm(_param_constant60, view_80, _param_constant61); _param_constant60 = view_80 = _param_constant61 = None\0A view_81 = torch.ops.aten.view(addmm_19, [1, 5, 768]); addmm_19 = None\0A add_20 = torch.ops.aten.add(add_17, view_81); add_17 = view_81 = None\0A _param_constant62 = self._param_constant62\0A _param_constant63 = self._param_constant63\0A native_layer_norm_10 = torch.ops.aten.native_layer_norm(add_20, [768], _param_constant62, _param_constant63, 1e-05); _param_constant62 = _param_constant63 = None\0A getitem_30 = native_layer_norm_10[0]\0A getitem_31 = native_layer_norm_10[1]\0A getitem_32 = native_layer_norm_10[2]; native_layer_norm_10 = None\0A view_82 = torch.ops.aten.view(getitem_30, [-1, 768]); getitem_30 = None\0A _param_constant64 = self._param_constant64\0A _param_constant65 = self._param_constant65\0A addmm_20 = torch.ops.aten.addmm(_param_constant64, view_82, _param_constant65); _param_constant64 = view_82 = _param_constant65 = None\0A view_83 = torch.ops.aten.view(addmm_20, [1, 5, 2304]); addmm_20 = None\0A slice_36 = torch.ops.aten.slice(view_83, 2, 0, 768)\0A slice_37 = torch.ops.aten.slice(view_83, 2, 768, 1536)\0A slice_38 = torch.ops.aten.slice(view_83, 2, 1536, 2304); view_83 = None\0A view_84 = torch.ops.aten.view(slice_36, [1, 5, 12, 64]); slice_36 = None\0A permute_20 = torch.ops.aten.permute(view_84, [0, 2, 1, 3]); view_84 = None\0A view_85 = torch.ops.aten.view(slice_37, [1, 5, 12, 64]); slice_37 = None\0A permute_21 = torch.ops.aten.permute(view_85, [0, 2, 1, 3]); view_85 = None\0A view_86 = torch.ops.aten.view(slice_38, [1, 5, 12, 64]); slice_38 = None\0A permute_22 = torch.ops.aten.permute(view_86, [0, 2, 1, 3]); view_86 = None\0A transpose_5 = torch.ops.aten.transpose(permute_21, -1, -2); permute_21 = None\0A expand_20 = torch.ops.aten.expand(permute_20, [1, 12, 5, 64]); permute_20 = None\0A view_87 = torch.ops.aten.view(expand_20, [12, 5, 64]); expand_20 = None\0A expand_21 = torch.ops.aten.expand(transpose_5, [1, 12, 64, 5]); transpose_5 = None\0A view_88 = torch.ops.aten.view(expand_21, [12, 64, 5]); expand_21 = None\0A bmm_10 = torch.ops.aten.bmm(view_87, view_88); view_87 = view_88 = None\0A _unsafe_view_10 = torch.ops.aten._unsafe_view(bmm_10, [1, 12, 5, 5]); bmm_10 = None\0A _tensor_constant15 = self._tensor_constant15\0A lift_fresh_copy_10 = torch.ops.aten.lift_fresh_copy(_tensor_constant15); _tensor_constant15 = None\0A div_5 = torch.ops.aten.div(_unsafe_view_10, lift_fresh_copy_10); _unsafe_view_10 = lift_fresh_copy_10 = None\0A _tensor_constant16 = self._tensor_constant16\0A slice_39 = torch.ops.aten.slice(_tensor_constant16, 0, 0, 9223372036854775807); _tensor_constant16 = None\0A slice_40 = torch.ops.aten.slice(slice_39, 1, 0, 9223372036854775807); slice_39 = None\0A slice_41 = torch.ops.aten.slice(slice_40, 2, 0, 5); slice_40 = None\0A slice_42 = torch.ops.aten.slice(slice_41, 3, 0, 5); slice_41 = None\0A _to_copy_5 = torch.ops.aten._to_copy(slice_42, dtype = torch.bool); slice_42 = None\0A _tensor_constant17 = self._tensor_constant17\0A lift_fresh_copy_11 = torch.ops.aten.lift_fresh_copy(_tensor_constant17); _tensor_constant17 = None\0A where_5 = torch.ops.aten.where(_to_copy_5, div_5, lift_fresh_copy_11); _to_copy_5 = div_5 = lift_fresh_copy_11 = None\0A _softmax_5 = torch.ops.aten._softmax(where_5, -1, False); where_5 = None\0A detach_10 = torch.ops.aten.detach(_softmax_5)\0A expand_22 = torch.ops.aten.expand(_softmax_5, [1, 12, 5, 5]); _softmax_5 = None\0A view_89 = torch.ops.aten.view(expand_22, [12, 5, 5]); expand_22 = None\0A expand_23 = torch.ops.aten.expand(permute_22, [1, 12, 5, 64]); permute_22 = None\0A view_90 = torch.ops.aten.view(expand_23, [12, 5, 64]); expand_23 = None\0A bmm_11 = torch.ops.aten.bmm(view_89, view_90); view_89 = view_90 = None\0A _unsafe_view_11 = torch.ops.aten._unsafe_view(bmm_11, [1, 12, 5, 64]); bmm_11 = None\0A permute_23 = torch.ops.aten.permute(_unsafe_view_11, [0, 2, 1, 3]); _unsafe_view_11 = None\0A clone_5 = torch.ops.aten.clone(permute_23, memory_format = torch.contiguous_format); permute_23 = None\0A view_91 = torch.ops.aten.view(clone_5, [1, 5, 768]); clone_5 = None\0A view_92 = torch.ops.aten.view(view_91, [-1, 768]); view_91 = None\0A _param_constant66 = self._param_constant66\0A _param_constant67 = self._param_constant67\0A addmm_21 = torch.ops.aten.addmm(_param_constant66, view_92, _param_constant67); _param_constant66 = view_92 = _param_constant67 = None\0A view_93 = torch.ops.aten.view(addmm_21, [1, 5, 768]); addmm_21 = None\0A add_21 = torch.ops.aten.add(view_93, add_20); view_93 = add_20 = None\0A _param_constant68 = self._param_constant68\0A _param_constant69 = self._param_constant69\0A native_layer_norm_11 = torch.ops.aten.native_layer_norm(add_21, [768], _param_constant68, _param_constant69, 1e-05); _param_constant68 = _param_constant69 = None\0A getitem_33 = native_layer_norm_11[0]\0A getitem_34 = native_layer_norm_11[1]\0A getitem_35 = native_layer_norm_11[2]; native_layer_norm_11 = None\0A view_94 = torch.ops.aten.view(getitem_33, [-1, 768]); getitem_33 = None\0A _param_constant70 = self._param_constant70\0A _param_constant71 = self._param_constant71\0A addmm_22 = torch.ops.aten.addmm(_param_constant70, view_94, _param_constant71); _param_constant70 = view_94 = _param_constant71 = None\0A view_95 = torch.ops.aten.view(addmm_22, [1, 5, 3072]); addmm_22 = None\0A mul_20 = torch.ops.aten.mul(view_95, 0.5)\0A pow_6 = torch.ops.aten.pow(view_95, 3.0)\0A mul_21 = torch.ops.aten.mul(pow_6, 0.044715); pow_6 = None\0A add_22 = torch.ops.aten.add(view_95, mul_21); view_95 = mul_21 = None\0A mul_22 = torch.ops.aten.mul(add_22, 0.7978845608028654); add_22 = None\0A tanh_5 = torch.ops.aten.tanh(mul_22); mul_22 = None\0A detach_11 = torch.ops.aten.detach(tanh_5)\0A add_23 = torch.ops.aten.add(tanh_5, 1.0); tanh_5 = None\0A mul_23 = torch.ops.aten.mul(mul_20, add_23); mul_20 = add_23 = None\0A view_96 = torch.ops.aten.view(mul_23, [-1, 3072]); mul_23 = None\0A _param_constant72 = self._param_constant72\0A _param_constant73 = self._param_constant73\0A addmm_23 = torch.ops.aten.addmm(_param_constant72, view_96, _param_constant73); _param_constant72 = view_96 = _param_constant73 = None\0A view_97 = torch.ops.aten.view(addmm_23, [1, 5, 768]); addmm_23 = None\0A add_24 = torch.ops.aten.add(add_21, view_97); add_21 = view_97 = None\0A _param_constant74 = self._param_constant74\0A _param_constant75 = self._param_constant75\0A native_layer_norm_12 = torch.ops.aten.native_layer_norm(add_24, [768], _param_constant74, _param_constant75, 1e-05); _param_constant74 = _param_constant75 = None\0A getitem_36 = native_layer_norm_12[0]\0A getitem_37 = native_layer_norm_12[1]\0A getitem_38 = native_layer_norm_12[2]; native_layer_norm_12 = None\0A view_98 = torch.ops.aten.view(getitem_36, [-1, 768]); getitem_36 = None\0A _param_constant76 = self._param_constant76\0A _param_constant77 = self._param_constant77\0A addmm_24 = torch.ops.aten.addmm(_param_constant76, view_98, _param_constant77); _param_constant76 = view_98 = _param_constant77 = None\0A view_99 = torch.ops.aten.view(addmm_24, [1, 5, 2304]); addmm_24 = None\0A slice_43 = torch.ops.aten.slice(view_99, 2, 0, 768)\0A slice_44 = torch.ops.aten.slice(view_99, 2, 768, 1536)\0A slice_45 = torch.ops.aten.slice(view_99, 2, 1536, 2304); view_99 = None\0A view_100 = torch.ops.aten.view(slice_43, [1, 5, 12, 64]); slice_43 = None\0A permute_24 = torch.ops.aten.permute(view_100, [0, 2, 1, 3]); view_100 = None\0A view_101 = torch.ops.aten.view(slice_44, [1, 5, 12, 64]); slice_44 = None\0A permute_25 = torch.ops.aten.permute(view_101, [0, 2, 1, 3]); view_101 = None\0A view_102 = torch.ops.aten.view(slice_45, [1, 5, 12, 64]); slice_45 = None\0A permute_26 = torch.ops.aten.permute(view_102, [0, 2, 1, 3]); view_102 = None\0A transpose_6 = torch.ops.aten.transpose(permute_25, -1, -2); permute_25 = None\0A expand_24 = torch.ops.aten.expand(permute_24, [1, 12, 5, 64]); permute_24 = None\0A view_103 = torch.ops.aten.view(expand_24, [12, 5, 64]); expand_24 = None\0A expand_25 = torch.ops.aten.expand(transpose_6, [1, 12, 64, 5]); transpose_6 = None\0A view_104 = torch.ops.aten.view(expand_25, [12, 64, 5]); expand_25 = None\0A bmm_12 = torch.ops.aten.bmm(view_103, view_104); view_103 = view_104 = None\0A _unsafe_view_12 = torch.ops.aten._unsafe_view(bmm_12, [1, 12, 5, 5]); bmm_12 = None\0A _tensor_constant18 = self._tensor_constant18\0A lift_fresh_copy_12 = torch.ops.aten.lift_fresh_copy(_tensor_constant18); _tensor_constant18 = None\0A div_6 = torch.ops.aten.div(_unsafe_view_12, lift_fresh_copy_12); _unsafe_view_12 = lift_fresh_copy_12 = None\0A _tensor_constant19 = self._tensor_constant19\0A slice_46 = torch.ops.aten.slice(_tensor_constant19, 0, 0, 9223372036854775807); _tensor_constant19 = None\0A slice_47 = torch.ops.aten.slice(slice_46, 1, 0, 9223372036854775807); slice_46 = None\0A slice_48 = torch.ops.aten.slice(slice_47, 2, 0, 5); slice_47 = None\0A slice_49 = torch.ops.aten.slice(slice_48, 3, 0, 5); slice_48 = None\0A _to_copy_6 = torch.ops.aten._to_copy(slice_49, dtype = torch.bool); slice_49 = None\0A _tensor_constant20 = self._tensor_constant20\0A lift_fresh_copy_13 = torch.ops.aten.lift_fresh_copy(_tensor_constant20); _tensor_constant20 = None\0A where_6 = torch.ops.aten.where(_to_copy_6, div_6, lift_fresh_copy_13); _to_copy_6 = div_6 = lift_fresh_copy_13 = None\0A _softmax_6 = torch.ops.aten._softmax(where_6, -1, False); where_6 = None\0A detach_12 = torch.ops.aten.detach(_softmax_6)\0A expand_26 = torch.ops.aten.expand(_softmax_6, [1, 12, 5, 5]); _softmax_6 = None\0A view_105 = torch.ops.aten.view(expand_26, [12, 5, 5]); expand_26 = None\0A expand_27 = torch.ops.aten.expand(permute_26, [1, 12, 5, 64]); permute_26 = None\0A view_106 = torch.ops.aten.view(expand_27, [12, 5, 64]); expand_27 = None\0A bmm_13 = torch.ops.aten.bmm(view_105, view_106); view_105 = view_106 = None\0A _unsafe_view_13 = torch.ops.aten._unsafe_view(bmm_13, [1, 12, 5, 64]); bmm_13 = None\0A permute_27 = torch.ops.aten.permute(_unsafe_view_13, [0, 2, 1, 3]); _unsafe_view_13 = None\0A clone_6 = torch.ops.aten.clone(permute_27, memory_format = torch.contiguous_format); permute_27 = None\0A view_107 = torch.ops.aten.view(clone_6, [1, 5, 768]); clone_6 = None\0A view_108 = torch.ops.aten.view(view_107, [-1, 768]); view_107 = None\0A _param_constant78 = self._param_constant78\0A _param_constant79 = self._param_constant79\0A addmm_25 = torch.ops.aten.addmm(_param_constant78, view_108, _param_constant79); _param_constant78 = view_108 = _param_constant79 = None\0A view_109 = torch.ops.aten.view(addmm_25, [1, 5, 768]); addmm_25 = None\0A add_25 = torch.ops.aten.add(view_109, add_24); view_109 = add_24 = None\0A _param_constant80 = self._param_constant80\0A _param_constant81 = self._param_constant81\0A native_layer_norm_13 = torch.ops.aten.native_layer_norm(add_25, [768], _param_constant80, _param_constant81, 1e-05); _param_constant80 = _param_constant81 = None\0A getitem_39 = native_layer_norm_13[0]\0A getitem_40 = native_layer_norm_13[1]\0A getitem_41 = native_layer_norm_13[2]; native_layer_norm_13 = None\0A view_110 = torch.ops.aten.view(getitem_39, [-1, 768]); getitem_39 = None\0A _param_constant82 = self._param_constant82\0A _param_constant83 = self._param_constant83\0A addmm_26 = torch.ops.aten.addmm(_param_constant82, view_110, _param_constant83); _param_constant82 = view_110 = _param_constant83 = None\0A view_111 = torch.ops.aten.view(addmm_26, [1, 5, 3072]); addmm_26 = None\0A mul_24 = torch.ops.aten.mul(view_111, 0.5)\0A pow_7 = torch.ops.aten.pow(view_111, 3.0)\0A mul_25 = torch.ops.aten.mul(pow_7, 0.044715); pow_7 = None\0A add_26 = torch.ops.aten.add(view_111, mul_25); view_111 = mul_25 = None\0A mul_26 = torch.ops.aten.mul(add_26, 0.7978845608028654); add_26 = None\0A tanh_6 = torch.ops.aten.tanh(mul_26); mul_26 = None\0A detach_13 = torch.ops.aten.detach(tanh_6)\0A add_27 = torch.ops.aten.add(tanh_6, 1.0); tanh_6 = None\0A mul_27 = torch.ops.aten.mul(mul_24, add_27); mul_24 = add_27 = None\0A view_112 = torch.ops.aten.view(mul_27, [-1, 3072]); mul_27 = None\0A _param_constant84 = self._param_constant84\0A _param_constant85 = self._param_constant85\0A addmm_27 = torch.ops.aten.addmm(_param_constant84, view_112, _param_constant85); _param_constant84 = view_112 = _param_constant85 = None\0A view_113 = torch.ops.aten.view(addmm_27, [1, 5, 768]); addmm_27 = None\0A add_28 = torch.ops.aten.add(add_25, view_113); add_25 = view_113 = None\0A _param_constant86 = self._param_constant86\0A _param_constant87 = self._param_constant87\0A native_layer_norm_14 = torch.ops.aten.native_layer_norm(add_28, [768], _param_constant86, _param_constant87, 1e-05); _param_constant86 = _param_constant87 = None\0A getitem_42 = native_layer_norm_14[0]\0A getitem_43 = native_layer_norm_14[1]\0A getitem_44 = native_layer_norm_14[2]; native_layer_norm_14 = None\0A view_114 = torch.ops.aten.view(getitem_42, [-1, 768]); getitem_42 = None\0A _param_constant88 = self._param_constant88\0A _param_constant89 = self._param_constant89\0A addmm_28 = torch.ops.aten.addmm(_param_constant88, view_114, _param_constant89); _param_constant88 = view_114 = _param_constant89 = None\0A view_115 = torch.ops.aten.view(addmm_28, [1, 5, 2304]); addmm_28 = None\0A slice_50 = torch.ops.aten.slice(view_115, 2, 0, 768)\0A slice_51 = torch.ops.aten.slice(view_115, 2, 768, 1536)\0A slice_52 = torch.ops.aten.slice(view_115, 2, 1536, 2304); view_115 = None\0A view_116 = torch.ops.aten.view(slice_50, [1, 5, 12, 64]); slice_50 = None\0A permute_28 = torch.ops.aten.permute(view_116, [0, 2, 1, 3]); view_116 = None\0A view_117 = torch.ops.aten.view(slice_51, [1, 5, 12, 64]); slice_51 = None\0A permute_29 = torch.ops.aten.permute(view_117, [0, 2, 1, 3]); view_117 = None\0A view_118 = torch.ops.aten.view(slice_52, [1, 5, 12, 64]); slice_52 = None\0A permute_30 = torch.ops.aten.permute(view_118, [0, 2, 1, 3]); view_118 = None\0A transpose_7 = torch.ops.aten.transpose(permute_29, -1, -2); permute_29 = None\0A expand_28 = torch.ops.aten.expand(permute_28, [1, 12, 5, 64]); permute_28 = None\0A view_119 = torch.ops.aten.view(expand_28, [12, 5, 64]); expand_28 = None\0A expand_29 = torch.ops.aten.expand(transpose_7, [1, 12, 64, 5]); transpose_7 = None\0A view_120 = torch.ops.aten.view(expand_29, [12, 64, 5]); expand_29 = None\0A bmm_14 = torch.ops.aten.bmm(view_119, view_120); view_119 = view_120 = None\0A _unsafe_view_14 = torch.ops.aten._unsafe_view(bmm_14, [1, 12, 5, 5]); bmm_14 = None\0A _tensor_constant21 = self._tensor_constant21\0A lift_fresh_copy_14 = torch.ops.aten.lift_fresh_copy(_tensor_constant21); _tensor_constant21 = None\0A div_7 = torch.ops.aten.div(_unsafe_view_14, lift_fresh_copy_14); _unsafe_view_14 = lift_fresh_copy_14 = None\0A _tensor_constant22 = self._tensor_constant22\0A slice_53 = torch.ops.aten.slice(_tensor_constant22, 0, 0, 9223372036854775807); _tensor_constant22 = None\0A slice_54 = torch.ops.aten.slice(slice_53, 1, 0, 9223372036854775807); slice_53 = None\0A slice_55 = torch.ops.aten.slice(slice_54, 2, 0, 5); slice_54 = None\0A slice_56 = torch.ops.aten.slice(slice_55, 3, 0, 5); slice_55 = None\0A _to_copy_7 = torch.ops.aten._to_copy(slice_56, dtype = torch.bool); slice_56 = None\0A _tensor_constant23 = self._tensor_constant23\0A lift_fresh_copy_15 = torch.ops.aten.lift_fresh_copy(_tensor_constant23); _tensor_constant23 = None\0A where_7 = torch.ops.aten.where(_to_copy_7, div_7, lift_fresh_copy_15); _to_copy_7 = div_7 = lift_fresh_copy_15 = None\0A _softmax_7 = torch.ops.aten._softmax(where_7, -1, False); where_7 = None\0A detach_14 = torch.ops.aten.detach(_softmax_7)\0A expand_30 = torch.ops.aten.expand(_softmax_7, [1, 12, 5, 5]); _softmax_7 = None\0A view_121 = torch.ops.aten.view(expand_30, [12, 5, 5]); expand_30 = None\0A expand_31 = torch.ops.aten.expand(permute_30, [1, 12, 5, 64]); permute_30 = None\0A view_122 = torch.ops.aten.view(expand_31, [12, 5, 64]); expand_31 = None\0A bmm_15 = torch.ops.aten.bmm(view_121, view_122); view_121 = view_122 = None\0A _unsafe_view_15 = torch.ops.aten._unsafe_view(bmm_15, [1, 12, 5, 64]); bmm_15 = None\0A permute_31 = torch.ops.aten.permute(_unsafe_view_15, [0, 2, 1, 3]); _unsafe_view_15 = None\0A clone_7 = torch.ops.aten.clone(permute_31, memory_format = torch.contiguous_format); permute_31 = None\0A view_123 = torch.ops.aten.view(clone_7, [1, 5, 768]); clone_7 = None\0A view_124 = torch.ops.aten.view(view_123, [-1, 768]); view_123 = None\0A _param_constant90 = self._param_constant90\0A _param_constant91 = self._param_constant91\0A addmm_29 = torch.ops.aten.addmm(_param_constant90, view_124, _param_constant91); _param_constant90 = view_124 = _param_constant91 = None\0A view_125 = torch.ops.aten.view(addmm_29, [1, 5, 768]); addmm_29 = None\0A add_29 = torch.ops.aten.add(view_125, add_28); view_125 = add_28 = None\0A _param_constant92 = self._param_constant92\0A _param_constant93 = self._param_constant93\0A native_layer_norm_15 = torch.ops.aten.native_layer_norm(add_29, [768], _param_constant92, _param_constant93, 1e-05); _param_constant92 = _param_constant93 = None\0A getitem_45 = native_layer_norm_15[0]\0A getitem_46 = native_layer_norm_15[1]\0A getitem_47 = native_layer_norm_15[2]; native_layer_norm_15 = None\0A view_126 = torch.ops.aten.view(getitem_45, [-1, 768]); getitem_45 = None\0A _param_constant94 = self._param_constant94\0A _param_constant95 = self._param_constant95\0A addmm_30 = torch.ops.aten.addmm(_param_constant94, view_126, _param_constant95); _param_constant94 = view_126 = _param_constant95 = None\0A view_127 = torch.ops.aten.view(addmm_30, [1, 5, 3072]); addmm_30 = None\0A mul_28 = torch.ops.aten.mul(view_127, 0.5)\0A pow_8 = torch.ops.aten.pow(view_127, 3.0)\0A mul_29 = torch.ops.aten.mul(pow_8, 0.044715); pow_8 = None\0A add_30 = torch.ops.aten.add(view_127, mul_29); view_127 = mul_29 = None\0A mul_30 = torch.ops.aten.mul(add_30, 0.7978845608028654); add_30 = None\0A tanh_7 = torch.ops.aten.tanh(mul_30); mul_30 = None\0A detach_15 = torch.ops.aten.detach(tanh_7)\0A add_31 = torch.ops.aten.add(tanh_7, 1.0); tanh_7 = None\0A mul_31 = torch.ops.aten.mul(mul_28, add_31); mul_28 = add_31 = None\0A view_128 = torch.ops.aten.view(mul_31, [-1, 3072]); mul_31 = None\0A _param_constant96 = self._param_constant96\0A _param_constant97 = self._param_constant97\0A addmm_31 = torch.ops.aten.addmm(_param_constant96, view_128, _param_constant97); _param_constant96 = view_128 = _param_constant97 = None\0A view_129 = torch.ops.aten.view(addmm_31, [1, 5, 768]); addmm_31 = None\0A add_32 = torch.ops.aten.add(add_29, view_129); add_29 = view_129 = None\0A _param_constant98 = self._param_constant98\0A _param_constant99 = self._param_constant99\0A native_layer_norm_16 = torch.ops.aten.native_layer_norm(add_32, [768], _param_constant98, _param_constant99, 1e-05); _param_constant98 = _param_constant99 = None\0A getitem_48 = native_layer_norm_16[0]\0A getitem_49 = native_layer_norm_16[1]\0A getitem_50 = native_layer_norm_16[2]; native_layer_norm_16 = None\0A view_130 = torch.ops.aten.view(getitem_48, [-1, 768]); getitem_48 = None\0A _param_constant100 = self._param_constant100\0A _param_constant101 = self._param_constant101\0A addmm_32 = torch.ops.aten.addmm(_param_constant100, view_130, _param_constant101); _param_constant100 = view_130 = _param_constant101 = None\0A view_131 = torch.ops.aten.view(addmm_32, [1, 5, 2304]); addmm_32 = None\0A slice_57 = torch.ops.aten.slice(view_131, 2, 0, 768)\0A slice_58 = torch.ops.aten.slice(view_131, 2, 768, 1536)\0A slice_59 = torch.ops.aten.slice(view_131, 2, 1536, 2304); view_131 = None\0A view_132 = torch.ops.aten.view(slice_57, [1, 5, 12, 64]); slice_57 = None\0A permute_32 = torch.ops.aten.permute(view_132, [0, 2, 1, 3]); view_132 = None\0A view_133 = torch.ops.aten.view(slice_58, [1, 5, 12, 64]); slice_58 = None\0A permute_33 = torch.ops.aten.permute(view_133, [0, 2, 1, 3]); view_133 = None\0A view_134 = torch.ops.aten.view(slice_59, [1, 5, 12, 64]); slice_59 = None\0A permute_34 = torch.ops.aten.permute(view_134, [0, 2, 1, 3]); view_134 = None\0A transpose_8 = torch.ops.aten.transpose(permute_33, -1, -2); permute_33 = None\0A expand_32 = torch.ops.aten.expand(permute_32, [1, 12, 5, 64]); permute_32 = None\0A view_135 = torch.ops.aten.view(expand_32, [12, 5, 64]); expand_32 = None\0A expand_33 = torch.ops.aten.expand(transpose_8, [1, 12, 64, 5]); transpose_8 = None\0A view_136 = torch.ops.aten.view(expand_33, [12, 64, 5]); expand_33 = None\0A bmm_16 = torch.ops.aten.bmm(view_135, view_136); view_135 = view_136 = None\0A _unsafe_view_16 = torch.ops.aten._unsafe_view(bmm_16, [1, 12, 5, 5]); bmm_16 = None\0A _tensor_constant24 = self._tensor_constant24\0A lift_fresh_copy_16 = torch.ops.aten.lift_fresh_copy(_tensor_constant24); _tensor_constant24 = None\0A div_8 = torch.ops.aten.div(_unsafe_view_16, lift_fresh_copy_16); _unsafe_view_16 = lift_fresh_copy_16 = None\0A _tensor_constant25 = self._tensor_constant25\0A slice_60 = torch.ops.aten.slice(_tensor_constant25, 0, 0, 9223372036854775807); _tensor_constant25 = None\0A slice_61 = torch.ops.aten.slice(slice_60, 1, 0, 9223372036854775807); slice_60 = None\0A slice_62 = torch.ops.aten.slice(slice_61, 2, 0, 5); slice_61 = None\0A slice_63 = torch.ops.aten.slice(slice_62, 3, 0, 5); slice_62 = None\0A _to_copy_8 = torch.ops.aten._to_copy(slice_63, dtype = torch.bool); slice_63 = None\0A _tensor_constant26 = self._tensor_constant26\0A lift_fresh_copy_17 = torch.ops.aten.lift_fresh_copy(_tensor_constant26); _tensor_constant26 = None\0A where_8 = torch.ops.aten.where(_to_copy_8, div_8, lift_fresh_copy_17); _to_copy_8 = div_8 = lift_fresh_copy_17 = None\0A _softmax_8 = torch.ops.aten._softmax(where_8, -1, False); where_8 = None\0A detach_16 = torch.ops.aten.detach(_softmax_8)\0A expand_34 = torch.ops.aten.expand(_softmax_8, [1, 12, 5, 5]); _softmax_8 = None\0A view_137 = torch.ops.aten.view(expand_34, [12, 5, 5]); expand_34 = None\0A expand_35 = torch.ops.aten.expand(permute_34, [1, 12, 5, 64]); permute_34 = None\0A view_138 = torch.ops.aten.view(expand_35, [12, 5, 64]); expand_35 = None\0A bmm_17 = torch.ops.aten.bmm(view_137, view_138); view_137 = view_138 = None\0A _unsafe_view_17 = torch.ops.aten._unsafe_view(bmm_17, [1, 12, 5, 64]); bmm_17 = None\0A permute_35 = torch.ops.aten.permute(_unsafe_view_17, [0, 2, 1, 3]); _unsafe_view_17 = None\0A clone_8 = torch.ops.aten.clone(permute_35, memory_format = torch.contiguous_format); permute_35 = None\0A view_139 = torch.ops.aten.view(clone_8, [1, 5, 768]); clone_8 = None\0A view_140 = torch.ops.aten.view(view_139, [-1, 768]); view_139 = None\0A _param_constant102 = self._param_constant102\0A _param_constant103 = self._param_constant103\0A addmm_33 = torch.ops.aten.addmm(_param_constant102, view_140, _param_constant103); _param_constant102 = view_140 = _param_constant103 = None\0A view_141 = torch.ops.aten.view(addmm_33, [1, 5, 768]); addmm_33 = None\0A add_33 = torch.ops.aten.add(view_141, add_32); view_141 = add_32 = None\0A _param_constant104 = self._param_constant104\0A _param_constant105 = self._param_constant105\0A native_layer_norm_17 = torch.ops.aten.native_layer_norm(add_33, [768], _param_constant104, _param_constant105, 1e-05); _param_constant104 = _param_constant105 = None\0A getitem_51 = native_layer_norm_17[0]\0A getitem_52 = native_layer_norm_17[1]\0A getitem_53 = native_layer_norm_17[2]; native_layer_norm_17 = None\0A view_142 = torch.ops.aten.view(getitem_51, [-1, 768]); getitem_51 = None\0A _param_constant106 = self._param_constant106\0A _param_constant107 = self._param_constant107\0A addmm_34 = torch.ops.aten.addmm(_param_constant106, view_142, _param_constant107); _param_constant106 = view_142 = _param_constant107 = None\0A view_143 = torch.ops.aten.view(addmm_34, [1, 5, 3072]); addmm_34 = None\0A mul_32 = torch.ops.aten.mul(view_143, 0.5)\0A pow_9 = torch.ops.aten.pow(view_143, 3.0)\0A mul_33 = torch.ops.aten.mul(pow_9, 0.044715); pow_9 = None\0A add_34 = torch.ops.aten.add(view_143, mul_33); view_143 = mul_33 = None\0A mul_34 = torch.ops.aten.mul(add_34, 0.7978845608028654); add_34 = None\0A tanh_8 = torch.ops.aten.tanh(mul_34); mul_34 = None\0A detach_17 = torch.ops.aten.detach(tanh_8)\0A add_35 = torch.ops.aten.add(tanh_8, 1.0); tanh_8 = None\0A mul_35 = torch.ops.aten.mul(mul_32, add_35); mul_32 = add_35 = None\0A view_144 = torch.ops.aten.view(mul_35, [-1, 3072]); mul_35 = None\0A _param_constant108 = self._param_constant108\0A _param_constant109 = self._param_constant109\0A addmm_35 = torch.ops.aten.addmm(_param_constant108, view_144, _param_constant109); _param_constant108 = view_144 = _param_constant109 = None\0A view_145 = torch.ops.aten.view(addmm_35, [1, 5, 768]); addmm_35 = None\0A add_36 = torch.ops.aten.add(add_33, view_145); add_33 = view_145 = None\0A _param_constant110 = self._param_constant110\0A _param_constant111 = self._param_constant111\0A native_layer_norm_18 = torch.ops.aten.native_layer_norm(add_36, [768], _param_constant110, _param_constant111, 1e-05); _param_constant110 = _param_constant111 = None\0A getitem_54 = native_layer_norm_18[0]\0A getitem_55 = native_layer_norm_18[1]\0A getitem_56 = native_layer_norm_18[2]; native_layer_norm_18 = None\0A view_146 = torch.ops.aten.view(getitem_54, [-1, 768]); getitem_54 = None\0A _param_constant112 = self._param_constant112\0A _param_constant113 = self._param_constant113\0A addmm_36 = torch.ops.aten.addmm(_param_constant112, view_146, _param_constant113); _param_constant112 = view_146 = _param_constant113 = None\0A view_147 = torch.ops.aten.view(addmm_36, [1, 5, 2304]); addmm_36 = None\0A slice_64 = torch.ops.aten.slice(view_147, 2, 0, 768)\0A slice_65 = torch.ops.aten.slice(view_147, 2, 768, 1536)\0A slice_66 = torch.ops.aten.slice(view_147, 2, 1536, 2304); view_147 = None\0A view_148 = torch.ops.aten.view(slice_64, [1, 5, 12, 64]); slice_64 = None\0A permute_36 = torch.ops.aten.permute(view_148, [0, 2, 1, 3]); view_148 = None\0A view_149 = torch.ops.aten.view(slice_65, [1, 5, 12, 64]); slice_65 = None\0A permute_37 = torch.ops.aten.permute(view_149, [0, 2, 1, 3]); view_149 = None\0A view_150 = torch.ops.aten.view(slice_66, [1, 5, 12, 64]); slice_66 = None\0A permute_38 = torch.ops.aten.permute(view_150, [0, 2, 1, 3]); view_150 = None\0A transpose_9 = torch.ops.aten.transpose(permute_37, -1, -2); permute_37 = None\0A expand_36 = torch.ops.aten.expand(permute_36, [1, 12, 5, 64]); permute_36 = None\0A view_151 = torch.ops.aten.view(expand_36, [12, 5, 64]); expand_36 = None\0A expand_37 = torch.ops.aten.expand(transpose_9, [1, 12, 64, 5]); transpose_9 = None\0A view_152 = torch.ops.aten.view(expand_37, [12, 64, 5]); expand_37 = None\0A bmm_18 = torch.ops.aten.bmm(view_151, view_152); view_151 = view_152 = None\0A _unsafe_view_18 = torch.ops.aten._unsafe_view(bmm_18, [1, 12, 5, 5]); bmm_18 = None\0A _tensor_constant27 = self._tensor_constant27\0A lift_fresh_copy_18 = torch.ops.aten.lift_fresh_copy(_tensor_constant27); _tensor_constant27 = None\0A div_9 = torch.ops.aten.div(_unsafe_view_18, lift_fresh_copy_18); _unsafe_view_18 = lift_fresh_copy_18 = None\0A _tensor_constant28 = self._tensor_constant28\0A slice_67 = torch.ops.aten.slice(_tensor_constant28, 0, 0, 9223372036854775807); _tensor_constant28 = None\0A slice_68 = torch.ops.aten.slice(slice_67, 1, 0, 9223372036854775807); slice_67 = None\0A slice_69 = torch.ops.aten.slice(slice_68, 2, 0, 5); slice_68 = None\0A slice_70 = torch.ops.aten.slice(slice_69, 3, 0, 5); slice_69 = None\0A _to_copy_9 = torch.ops.aten._to_copy(slice_70, dtype = torch.bool); slice_70 = None\0A _tensor_constant29 = self._tensor_constant29\0A lift_fresh_copy_19 = torch.ops.aten.lift_fresh_copy(_tensor_constant29); _tensor_constant29 = None\0A where_9 = torch.ops.aten.where(_to_copy_9, div_9, lift_fresh_copy_19); _to_copy_9 = div_9 = lift_fresh_copy_19 = None\0A _softmax_9 = torch.ops.aten._softmax(where_9, -1, False); where_9 = None\0A detach_18 = torch.ops.aten.detach(_softmax_9)\0A expand_38 = torch.ops.aten.expand(_softmax_9, [1, 12, 5, 5]); _softmax_9 = None\0A view_153 = torch.ops.aten.view(expand_38, [12, 5, 5]); expand_38 = None\0A expand_39 = torch.ops.aten.expand(permute_38, [1, 12, 5, 64]); permute_38 = None\0A view_154 = torch.ops.aten.view(expand_39, [12, 5, 64]); expand_39 = None\0A bmm_19 = torch.ops.aten.bmm(view_153, view_154); view_153 = view_154 = None\0A _unsafe_view_19 = torch.ops.aten._unsafe_view(bmm_19, [1, 12, 5, 64]); bmm_19 = None\0A permute_39 = torch.ops.aten.permute(_unsafe_view_19, [0, 2, 1, 3]); _unsafe_view_19 = None\0A clone_9 = torch.ops.aten.clone(permute_39, memory_format = torch.contiguous_format); permute_39 = None\0A view_155 = torch.ops.aten.view(clone_9, [1, 5, 768]); clone_9 = None\0A view_156 = torch.ops.aten.view(view_155, [-1, 768]); view_155 = None\0A _param_constant114 = self._param_constant114\0A _param_constant115 = self._param_constant115\0A addmm_37 = torch.ops.aten.addmm(_param_constant114, view_156, _param_constant115); _param_constant114 = view_156 = _param_constant115 = None\0A view_157 = torch.ops.aten.view(addmm_37, [1, 5, 768]); addmm_37 = None\0A add_37 = torch.ops.aten.add(view_157, add_36); view_157 = add_36 = None\0A _param_constant116 = self._param_constant116\0A _param_constant117 = self._param_constant117\0A native_layer_norm_19 = torch.ops.aten.native_layer_norm(add_37, [768], _param_constant116, _param_constant117, 1e-05); _param_constant116 = _param_constant117 = None\0A getitem_57 = native_layer_norm_19[0]\0A getitem_58 = native_layer_norm_19[1]\0A getitem_59 = native_layer_norm_19[2]; native_layer_norm_19 = None\0A view_158 = torch.ops.aten.view(getitem_57, [-1, 768]); getitem_57 = None\0A _param_constant118 = self._param_constant118\0A _param_constant119 = self._param_constant119\0A addmm_38 = torch.ops.aten.addmm(_param_constant118, view_158, _param_constant119); _param_constant118 = view_158 = _param_constant119 = None\0A view_159 = torch.ops.aten.view(addmm_38, [1, 5, 3072]); addmm_38 = None\0A mul_36 = torch.ops.aten.mul(view_159, 0.5)\0A pow_10 = torch.ops.aten.pow(view_159, 3.0)\0A mul_37 = torch.ops.aten.mul(pow_10, 0.044715); pow_10 = None\0A add_38 = torch.ops.aten.add(view_159, mul_37); view_159 = mul_37 = None\0A mul_38 = torch.ops.aten.mul(add_38, 0.7978845608028654); add_38 = None\0A tanh_9 = torch.ops.aten.tanh(mul_38); mul_38 = None\0A detach_19 = torch.ops.aten.detach(tanh_9)\0A add_39 = torch.ops.aten.add(tanh_9, 1.0); tanh_9 = None\0A mul_39 = torch.ops.aten.mul(mul_36, add_39); mul_36 = add_39 = None\0A view_160 = torch.ops.aten.view(mul_39, [-1, 3072]); mul_39 = None\0A _param_constant120 = self._param_constant120\0A _param_constant121 = self._param_constant121\0A addmm_39 = torch.ops.aten.addmm(_param_constant120, view_160, _param_constant121); _param_constant120 = view_160 = _param_constant121 = None\0A view_161 = torch.ops.aten.view(addmm_39, [1, 5, 768]); addmm_39 = None\0A add_40 = torch.ops.aten.add(add_37, view_161); add_37 = view_161 = None\0A _param_constant122 = self._param_constant122\0A _param_constant123 = self._param_constant123\0A native_layer_norm_20 = torch.ops.aten.native_layer_norm(add_40, [768], _param_constant122, _param_constant123, 1e-05); _param_constant122 = _param_constant123 = None\0A getitem_60 = native_layer_norm_20[0]\0A getitem_61 = native_layer_norm_20[1]\0A getitem_62 = native_layer_norm_20[2]; native_layer_norm_20 = None\0A view_162 = torch.ops.aten.view(getitem_60, [-1, 768]); getitem_60 = None\0A _param_constant124 = self._param_constant124\0A _param_constant125 = self._param_constant125\0A addmm_40 = torch.ops.aten.addmm(_param_constant124, view_162, _param_constant125); _param_constant124 = view_162 = _param_constant125 = None\0A view_163 = torch.ops.aten.view(addmm_40, [1, 5, 2304]); addmm_40 = None\0A slice_71 = torch.ops.aten.slice(view_163, 2, 0, 768)\0A slice_72 = torch.ops.aten.slice(view_163, 2, 768, 1536)\0A slice_73 = torch.ops.aten.slice(view_163, 2, 1536, 2304); view_163 = None\0A view_164 = torch.ops.aten.view(slice_71, [1, 5, 12, 64]); slice_71 = None\0A permute_40 = torch.ops.aten.permute(view_164, [0, 2, 1, 3]); view_164 = None\0A view_165 = torch.ops.aten.view(slice_72, [1, 5, 12, 64]); slice_72 = None\0A permute_41 = torch.ops.aten.permute(view_165, [0, 2, 1, 3]); view_165 = None\0A view_166 = torch.ops.aten.view(slice_73, [1, 5, 12, 64]); slice_73 = None\0A permute_42 = torch.ops.aten.permute(view_166, [0, 2, 1, 3]); view_166 = None\0A transpose_10 = torch.ops.aten.transpose(permute_41, -1, -2); permute_41 = None\0A expand_40 = torch.ops.aten.expand(permute_40, [1, 12, 5, 64]); permute_40 = None\0A view_167 = torch.ops.aten.view(expand_40, [12, 5, 64]); expand_40 = None\0A expand_41 = torch.ops.aten.expand(transpose_10, [1, 12, 64, 5]); transpose_10 = None\0A view_168 = torch.ops.aten.view(expand_41, [12, 64, 5]); expand_41 = None\0A bmm_20 = torch.ops.aten.bmm(view_167, view_168); view_167 = view_168 = None\0A _unsafe_view_20 = torch.ops.aten._unsafe_view(bmm_20, [1, 12, 5, 5]); bmm_20 = None\0A _tensor_constant30 = self._tensor_constant30\0A lift_fresh_copy_20 = torch.ops.aten.lift_fresh_copy(_tensor_constant30); _tensor_constant30 = None\0A div_10 = torch.ops.aten.div(_unsafe_view_20, lift_fresh_copy_20); _unsafe_view_20 = lift_fresh_copy_20 = None\0A _tensor_constant31 = self._tensor_constant31\0A slice_74 = torch.ops.aten.slice(_tensor_constant31, 0, 0, 9223372036854775807); _tensor_constant31 = None\0A slice_75 = torch.ops.aten.slice(slice_74, 1, 0, 9223372036854775807); slice_74 = None\0A slice_76 = torch.ops.aten.slice(slice_75, 2, 0, 5); slice_75 = None\0A slice_77 = torch.ops.aten.slice(slice_76, 3, 0, 5); slice_76 = None\0A _to_copy_10 = torch.ops.aten._to_copy(slice_77, dtype = torch.bool); slice_77 = None\0A _tensor_constant32 = self._tensor_constant32\0A lift_fresh_copy_21 = torch.ops.aten.lift_fresh_copy(_tensor_constant32); _tensor_constant32 = None\0A where_10 = torch.ops.aten.where(_to_copy_10, div_10, lift_fresh_copy_21); _to_copy_10 = div_10 = lift_fresh_copy_21 = None\0A _softmax_10 = torch.ops.aten._softmax(where_10, -1, False); where_10 = None\0A detach_20 = torch.ops.aten.detach(_softmax_10)\0A expand_42 = torch.ops.aten.expand(_softmax_10, [1, 12, 5, 5]); _softmax_10 = None\0A view_169 = torch.ops.aten.view(expand_42, [12, 5, 5]); expand_42 = None\0A expand_43 = torch.ops.aten.expand(permute_42, [1, 12, 5, 64]); permute_42 = None\0A view_170 = torch.ops.aten.view(expand_43, [12, 5, 64]); expand_43 = None\0A bmm_21 = torch.ops.aten.bmm(view_169, view_170); view_169 = view_170 = None\0A _unsafe_view_21 = torch.ops.aten._unsafe_view(bmm_21, [1, 12, 5, 64]); bmm_21 = None\0A permute_43 = torch.ops.aten.permute(_unsafe_view_21, [0, 2, 1, 3]); _unsafe_view_21 = None\0A clone_10 = torch.ops.aten.clone(permute_43, memory_format = torch.contiguous_format); permute_43 = None\0A view_171 = torch.ops.aten.view(clone_10, [1, 5, 768]); clone_10 = None\0A view_172 = torch.ops.aten.view(view_171, [-1, 768]); view_171 = None\0A _param_constant126 = self._param_constant126\0A _param_constant127 = self._param_constant127\0A addmm_41 = torch.ops.aten.addmm(_param_constant126, view_172, _param_constant127); _param_constant126 = view_172 = _param_constant127 = None\0A view_173 = torch.ops.aten.view(addmm_41, [1, 5, 768]); addmm_41 = None\0A add_41 = torch.ops.aten.add(view_173, add_40); view_173 = add_40 = None\0A _param_constant128 = self._param_constant128\0A _param_constant129 = self._param_constant129\0A native_layer_norm_21 = torch.ops.aten.native_layer_norm(add_41, [768], _param_constant128, _param_constant129, 1e-05); _param_constant128 = _param_constant129 = None\0A getitem_63 = native_layer_norm_21[0]\0A getitem_64 = native_layer_norm_21[1]\0A getitem_65 = native_layer_norm_21[2]; native_layer_norm_21 = None\0A view_174 = torch.ops.aten.view(getitem_63, [-1, 768]); getitem_63 = None\0A _param_constant130 = self._param_constant130\0A _param_constant131 = self._param_constant131\0A addmm_42 = torch.ops.aten.addmm(_param_constant130, view_174, _param_constant131); _param_constant130 = view_174 = _param_constant131 = None\0A view_175 = torch.ops.aten.view(addmm_42, [1, 5, 3072]); addmm_42 = None\0A mul_40 = torch.ops.aten.mul(view_175, 0.5)\0A pow_11 = torch.ops.aten.pow(view_175, 3.0)\0A mul_41 = torch.ops.aten.mul(pow_11, 0.044715); pow_11 = None\0A add_42 = torch.ops.aten.add(view_175, mul_41); view_175 = mul_41 = None\0A mul_42 = torch.ops.aten.mul(add_42, 0.7978845608028654); add_42 = None\0A tanh_10 = torch.ops.aten.tanh(mul_42); mul_42 = None\0A detach_21 = torch.ops.aten.detach(tanh_10)\0A add_43 = torch.ops.aten.add(tanh_10, 1.0); tanh_10 = None\0A mul_43 = torch.ops.aten.mul(mul_40, add_43); mul_40 = add_43 = None\0A view_176 = torch.ops.aten.view(mul_43, [-1, 3072]); mul_43 = None\0A _param_constant132 = self._param_constant132\0A _param_constant133 = self._param_constant133\0A addmm_43 = torch.ops.aten.addmm(_param_constant132, view_176, _param_constant133); _param_constant132 = view_176 = _param_constant133 = None\0A view_177 = torch.ops.aten.view(addmm_43, [1, 5, 768]); addmm_43 = None\0A add_44 = torch.ops.aten.add(add_41, view_177); add_41 = view_177 = None\0A _param_constant134 = self._param_constant134\0A _param_constant135 = self._param_constant135\0A native_layer_norm_22 = torch.ops.aten.native_layer_norm(add_44, [768], _param_constant134, _param_constant135, 1e-05); _param_constant134 = _param_constant135 = None\0A getitem_66 = native_layer_norm_22[0]\0A getitem_67 = native_layer_norm_22[1]\0A getitem_68 = native_layer_norm_22[2]; native_layer_norm_22 = None\0A view_178 = torch.ops.aten.view(getitem_66, [-1, 768]); getitem_66 = None\0A _param_constant136 = self._param_constant136\0A _param_constant137 = self._param_constant137\0A addmm_44 = torch.ops.aten.addmm(_param_constant136, view_178, _param_constant137); _param_constant136 = view_178 = _param_constant137 = None\0A view_179 = torch.ops.aten.view(addmm_44, [1, 5, 2304]); addmm_44 = None\0A slice_78 = torch.ops.aten.slice(view_179, 2, 0, 768)\0A slice_79 = torch.ops.aten.slice(view_179, 2, 768, 1536)\0A slice_80 = torch.ops.aten.slice(view_179, 2, 1536, 2304); view_179 = None\0A view_180 = torch.ops.aten.view(slice_78, [1, 5, 12, 64]); slice_78 = None\0A permute_44 = torch.ops.aten.permute(view_180, [0, 2, 1, 3]); view_180 = None\0A view_181 = torch.ops.aten.view(slice_79, [1, 5, 12, 64]); slice_79 = None\0A permute_45 = torch.ops.aten.permute(view_181, [0, 2, 1, 3]); view_181 = None\0A view_182 = torch.ops.aten.view(slice_80, [1, 5, 12, 64]); slice_80 = None\0A permute_46 = torch.ops.aten.permute(view_182, [0, 2, 1, 3]); view_182 = None\0A transpose_11 = torch.ops.aten.transpose(permute_45, -1, -2); permute_45 = None\0A expand_44 = torch.ops.aten.expand(permute_44, [1, 12, 5, 64]); permute_44 = None\0A view_183 = torch.ops.aten.view(expand_44, [12, 5, 64]); expand_44 = None\0A expand_45 = torch.ops.aten.expand(transpose_11, [1, 12, 64, 5]); transpose_11 = None\0A view_184 = torch.ops.aten.view(expand_45, [12, 64, 5]); expand_45 = None\0A bmm_22 = torch.ops.aten.bmm(view_183, view_184); view_183 = view_184 = None\0A _unsafe_view_22 = torch.ops.aten._unsafe_view(bmm_22, [1, 12, 5, 5]); bmm_22 = None\0A _tensor_constant33 = self._tensor_constant33\0A lift_fresh_copy_22 = torch.ops.aten.lift_fresh_copy(_tensor_constant33); _tensor_constant33 = None\0A div_11 = torch.ops.aten.div(_unsafe_view_22, lift_fresh_copy_22); _unsafe_view_22 = lift_fresh_copy_22 = None\0A _tensor_constant34 = self._tensor_constant34\0A slice_81 = torch.ops.aten.slice(_tensor_constant34, 0, 0, 9223372036854775807); _tensor_constant34 = None\0A slice_82 = torch.ops.aten.slice(slice_81, 1, 0, 9223372036854775807); slice_81 = None\0A slice_83 = torch.ops.aten.slice(slice_82, 2, 0, 5); slice_82 = None\0A slice_84 = torch.ops.aten.slice(slice_83, 3, 0, 5); slice_83 = None\0A _to_copy_11 = torch.ops.aten._to_copy(slice_84, dtype = torch.bool); slice_84 = None\0A _tensor_constant35 = self._tensor_constant35\0A lift_fresh_copy_23 = torch.ops.aten.lift_fresh_copy(_tensor_constant35); _tensor_constant35 = None\0A where_11 = torch.ops.aten.where(_to_copy_11, div_11, lift_fresh_copy_23); _to_copy_11 = div_11 = lift_fresh_copy_23 = None\0A _softmax_11 = torch.ops.aten._softmax(where_11, -1, False); where_11 = None\0A detach_22 = torch.ops.aten.detach(_softmax_11)\0A expand_46 = torch.ops.aten.expand(_softmax_11, [1, 12, 5, 5]); _softmax_11 = None\0A view_185 = torch.ops.aten.view(expand_46, [12, 5, 5]); expand_46 = None\0A expand_47 = torch.ops.aten.expand(permute_46, [1, 12, 5, 64]); permute_46 = None\0A view_186 = torch.ops.aten.view(expand_47, [12, 5, 64]); expand_47 = None\0A bmm_23 = torch.ops.aten.bmm(view_185, view_186); view_185 = view_186 = None\0A _unsafe_view_23 = torch.ops.aten._unsafe_view(bmm_23, [1, 12, 5, 64]); bmm_23 = None\0A permute_47 = torch.ops.aten.permute(_unsafe_view_23, [0, 2, 1, 3]); _unsafe_view_23 = None\0A clone_11 = torch.ops.aten.clone(permute_47, memory_format = torch.contiguous_format); permute_47 = None\0A view_187 = torch.ops.aten.view(clone_11, [1, 5, 768]); clone_11 = None\0A view_188 = torch.ops.aten.view(view_187, [-1, 768]); view_187 = None\0A _param_constant138 = self._param_constant138\0A _param_constant139 = self._param_constant139\0A addmm_45 = torch.ops.aten.addmm(_param_constant138, view_188, _param_constant139); _param_constant138 = view_188 = _param_constant139 = None\0A view_189 = torch.ops.aten.view(addmm_45, [1, 5, 768]); addmm_45 = None\0A add_45 = torch.ops.aten.add(view_189, add_44); view_189 = add_44 = None\0A _param_constant140 = self._param_constant140\0A _param_constant141 = self._param_constant141\0A native_layer_norm_23 = torch.ops.aten.native_layer_norm(add_45, [768], _param_constant140, _param_constant141, 1e-05); _param_constant140 = _param_constant141 = None\0A getitem_69 = native_layer_norm_23[0]\0A getitem_70 = native_layer_norm_23[1]\0A getitem_71 = native_layer_norm_23[2]; native_layer_norm_23 = None\0A view_190 = torch.ops.aten.view(getitem_69, [-1, 768]); getitem_69 = None\0A _param_constant142 = self._param_constant142\0A _param_constant143 = self._param_constant143\0A addmm_46 = torch.ops.aten.addmm(_param_constant142, view_190, _param_constant143); _param_constant142 = view_190 = _param_constant143 = None\0A view_191 = torch.ops.aten.view(addmm_46, [1, 5, 3072]); addmm_46 = None\0A mul_44 = torch.ops.aten.mul(view_191, 0.5)\0A pow_12 = torch.ops.aten.pow(view_191, 3.0)\0A mul_45 = torch.ops.aten.mul(pow_12, 0.044715); pow_12 = None\0A add_46 = torch.ops.aten.add(view_191, mul_45); view_191 = mul_45 = None\0A mul_46 = torch.ops.aten.mul(add_46, 0.7978845608028654); add_46 = None\0A tanh_11 = torch.ops.aten.tanh(mul_46); mul_46 = None\0A detach_23 = torch.ops.aten.detach(tanh_11)\0A add_47 = torch.ops.aten.add(tanh_11, 1.0); tanh_11 = None\0A mul_47 = torch.ops.aten.mul(mul_44, add_47); mul_44 = add_47 = None\0A view_192 = torch.ops.aten.view(mul_47, [-1, 3072]); mul_47 = None\0A _param_constant144 = self._param_constant144\0A _param_constant145 = self._param_constant145\0A addmm_47 = torch.ops.aten.addmm(_param_constant144, view_192, _param_constant145); _param_constant144 = view_192 = _param_constant145 = None\0A view_193 = torch.ops.aten.view(addmm_47, [1, 5, 768]); addmm_47 = None\0A add_48 = torch.ops.aten.add(add_45, view_193); add_45 = view_193 = None\0A _param_constant146 = self._param_constant146\0A _param_constant147 = self._param_constant147\0A native_layer_norm_24 = torch.ops.aten.native_layer_norm(add_48, [768], _param_constant146, _param_constant147, 1e-05); add_48 = _param_constant146 = _param_constant147 = None\0A getitem_72 = native_layer_norm_24[0]\0A getitem_73 = native_layer_norm_24[1]\0A getitem_74 = native_layer_norm_24[2]; native_layer_norm_24 = None\0A view_194 = torch.ops.aten.view(getitem_72, [1, 5, 768]); getitem_72 = None\0A _param_constant148 = self._param_constant148\0A t = torch.ops.aten.t(_param_constant148); _param_constant148 = None\0A view_195 = torch.ops.aten.view(view_194, [5, 768]); view_194 = None\0A mm = torch.ops.aten.mm(view_195, t); view_195 = t = None\0A _unsafe_view_24 = torch.ops.aten._unsafe_view(mm, [1, 5, 50257]); mm = None\0A return _unsafe_view_24\0A "
%185 = torch.nn_module {
torch.slot "_param_constant0", %0 : !torch.tensor<[50257,768],f32>
torch.slot "_param_constant1", %1 : !torch.tensor<[1024,768],f32>
torch.slot "_param_constant2", %2 : !torch.tensor<[768],f32>
torch.slot "_param_constant3", %3 : !torch.tensor<[768],f32>
torch.slot "_param_constant4", %4 : !torch.tensor<[2304],f32>
torch.slot "_param_constant5", %5 : !torch.tensor<[768,2304],f32>
torch.slot "_param_constant6", %6 : !torch.tensor<[768],f32>
torch.slot "_param_constant7", %7 : !torch.tensor<[768,768],f32>
torch.slot "_param_constant8", %8 : !torch.tensor<[768],f32>
torch.slot "_param_constant9", %9 : !torch.tensor<[768],f32>
torch.slot "_param_constant10", %10 : !torch.tensor<[3072],f32>
torch.slot "_param_constant11", %11 : !torch.tensor<[768,3072],f32>
torch.slot "_param_constant12", %12 : !torch.tensor<[768],f32>
torch.slot "_param_constant13", %13 : !torch.tensor<[3072,768],f32>
torch.slot "_param_constant14", %14 : !torch.tensor<[768],f32>
torch.slot "_param_constant15", %15 : !torch.tensor<[768],f32>
torch.slot "_param_constant16", %16 : !torch.tensor<[2304],f32>
torch.slot "_param_constant17", %17 : !torch.tensor<[768,2304],f32>
torch.slot "_param_constant18", %18 : !torch.tensor<[768],f32>
torch.slot "_param_constant19", %19 : !torch.tensor<[768,768],f32>
torch.slot "_param_constant20", %20 : !torch.tensor<[768],f32>
torch.slot "_param_constant21", %21 : !torch.tensor<[768],f32>
torch.slot "_param_constant22", %22 : !torch.tensor<[3072],f32>
torch.slot "_param_constant23", %23 : !torch.tensor<[768,3072],f32>
torch.slot "_param_constant24", %24 : !torch.tensor<[768],f32>
torch.slot "_param_constant25", %25 : !torch.tensor<[3072,768],f32>
torch.slot "_param_constant26", %26 : !torch.tensor<[768],f32>
torch.slot "_param_constant27", %27 : !torch.tensor<[768],f32>
torch.slot "_param_constant28", %28 : !torch.tensor<[2304],f32>
torch.slot "_param_constant29", %29 : !torch.tensor<[768,2304],f32>
torch.slot "_param_constant30", %30 : !torch.tensor<[768],f32>
torch.slot "_param_constant31", %31 : !torch.tensor<[768,768],f32>
torch.slot "_param_constant32", %32 : !torch.tensor<[768],f32>
torch.slot "_param_constant33", %33 : !torch.tensor<[768],f32>
torch.slot "_param_constant34", %34 : !torch.tensor<[3072],f32>
torch.slot "_param_constant35", %35 : !torch.tensor<[768,3072],f32>
torch.slot "_param_constant36", %36 : !torch.tensor<[768],f32>
torch.slot "_param_constant37", %37 : !torch.tensor<[3072,768],f32>
torch.slot "_param_constant38", %38 : !torch.tensor<[768],f32>
torch.slot "_param_constant39", %39 : !torch.tensor<[768],f32>
torch.slot "_param_constant40", %40 : !torch.tensor<[2304],f32>
torch.slot "_param_constant41", %41 : !torch.tensor<[768,2304],f32>
torch.slot "_param_constant42", %42 : !torch.tensor<[768],f32>
torch.slot "_param_constant43", %43 : !torch.tensor<[768,768],f32>
torch.slot "_param_constant44", %44 : !torch.tensor<[768],f32>
torch.slot "_param_constant45", %45 : !torch.tensor<[768],f32>
torch.slot "_param_constant46", %46 : !torch.tensor<[3072],f32>
torch.slot "_param_constant47", %47 : !torch.tensor<[768,3072],f32>
torch.slot "_param_constant48", %48 : !torch.tensor<[768],f32>
torch.slot "_param_constant49", %49 : !torch.tensor<[3072,768],f32>
torch.slot "_param_constant50", %50 : !torch.tensor<[768],f32>
torch.slot "_param_constant51", %51 : !torch.tensor<[768],f32>
torch.slot "_param_constant52", %52 : !torch.tensor<[2304],f32>
torch.slot "_param_constant53", %53 : !torch.tensor<[768,2304],f32>
torch.slot "_param_constant54", %54 : !torch.tensor<[768],f32>
torch.slot "_param_constant55", %55 : !torch.tensor<[768,768],f32>
torch.slot "_param_constant56", %56 : !torch.tensor<[768],f32>
torch.slot "_param_constant57", %57 : !torch.tensor<[768],f32>
torch.slot "_param_constant58", %58 : !torch.tensor<[3072],f32>
torch.slot "_param_constant59", %59 : !torch.tensor<[768,3072],f32>
torch.slot "_param_constant60", %60 : !torch.tensor<[768],f32>
torch.slot "_param_constant61", %61 : !torch.tensor<[3072,768],f32>
torch.slot "_param_constant62", %62 : !torch.tensor<[768],f32>
torch.slot "_param_constant63", %63 : !torch.tensor<[768],f32>
torch.slot "_param_constant64", %64 : !torch.tensor<[2304],f32>
torch.slot "_param_constant65", %65 : !torch.tensor<[768,2304],f32>
torch.slot "_param_constant66", %66 : !torch.tensor<[768],f32>
torch.slot "_param_constant67", %67 : !torch.tensor<[768,768],f32>
torch.slot "_param_constant68", %68 : !torch.tensor<[768],f32>
torch.slot "_param_constant69", %69 : !torch.tensor<[768],f32>
torch.slot "_param_constant70", %70 : !torch.tensor<[3072],f32>
torch.slot "_param_constant71", %71 : !torch.tensor<[768,3072],f32>
torch.slot "_param_constant72", %72 : !torch.tensor<[768],f32>
torch.slot "_param_constant73", %73 : !torch.tensor<[3072,768],f32>
torch.slot "_param_constant74", %74 : !torch.tensor<[768],f32>
torch.slot "_param_constant75", %75 : !torch.tensor<[768],f32>
torch.slot "_param_constant76", %76 : !torch.tensor<[2304],f32>
torch.slot "_param_constant77", %77 : !torch.tensor<[768,2304],f32>
torch.slot "_param_constant78", %78 : !torch.tensor<[768],f32>
torch.slot "_param_constant79", %79 : !torch.tensor<[768,768],f32>
torch.slot "_param_constant80", %80 : !torch.tensor<[768],f32>
torch.slot "_param_constant81", %81 : !torch.tensor<[768],f32>
torch.slot "_param_constant82", %82 : !torch.tensor<[3072],f32>
torch.slot "_param_constant83", %83 : !torch.tensor<[768,3072],f32>
torch.slot "_param_constant84", %84 : !torch.tensor<[768],f32>
torch.slot "_param_constant85", %85 : !torch.tensor<[3072,768],f32>
torch.slot "_param_constant86", %86 : !torch.tensor<[768],f32>
torch.slot "_param_constant87", %87 : !torch.tensor<[768],f32>
torch.slot "_param_constant88", %88 : !torch.tensor<[2304],f32>
torch.slot "_param_constant89", %89 : !torch.tensor<[768,2304],f32>
torch.slot "_param_constant90", %90 : !torch.tensor<[768],f32>
torch.slot "_param_constant91", %91 : !torch.tensor<[768,768],f32>
torch.slot "_param_constant92", %92 : !torch.tensor<[768],f32>
torch.slot "_param_constant93", %93 : !torch.tensor<[768],f32>
torch.slot "_param_constant94", %94 : !torch.tensor<[3072],f32>
torch.slot "_param_constant95", %95 : !torch.tensor<[768,3072],f32>
torch.slot "_param_constant96", %96 : !torch.tensor<[768],f32>
torch.slot "_param_constant97", %97 : !torch.tensor<[3072,768],f32>
torch.slot "_param_constant98", %98 : !torch.tensor<[768],f32>
torch.slot "_param_constant99", %99 : !torch.tensor<[768],f32>
torch.slot "_param_constant100", %100 : !torch.tensor<[2304],f32>
torch.slot "_param_constant101", %101 : !torch.tensor<[768,2304],f32>
torch.slot "_param_constant102", %102 : !torch.tensor<[768],f32>
torch.slot "_param_constant103", %103 : !torch.tensor<[768,768],f32>
torch.slot "_param_constant104", %104 : !torch.tensor<[768],f32>
torch.slot "_param_constant105", %105 : !torch.tensor<[768],f32>
torch.slot "_param_constant106", %106 : !torch.tensor<[3072],f32>
torch.slot "_param_constant107", %107 : !torch.tensor<[768,3072],f32>
torch.slot "_param_constant108", %108 : !torch.tensor<[768],f32>
torch.slot "_param_constant109", %109 : !torch.tensor<[3072,768],f32>
torch.slot "_param_constant110", %110 : !torch.tensor<[768],f32>
torch.slot "_param_constant111", %111 : !torch.tensor<[768],f32>
torch.slot "_param_constant112", %112 : !torch.tensor<[2304],f32>
torch.slot "_param_constant113", %113 : !torch.tensor<[768,2304],f32>
torch.slot "_param_constant114", %114 : !torch.tensor<[768],f32>
torch.slot "_param_constant115", %115 : !torch.tensor<[768,768],f32>
torch.slot "_param_constant116", %116 : !torch.tensor<[768],f32>
torch.slot "_param_constant117", %117 : !torch.tensor<[768],f32>
torch.slot "_param_constant118", %118 : !torch.tensor<[3072],f32>
torch.slot "_param_constant119", %119 : !torch.tensor<[768,3072],f32>
torch.slot "_param_constant120", %120 : !torch.tensor<[768],f32>
torch.slot "_param_constant121", %121 : !torch.tensor<[3072,768],f32>
torch.slot "_param_constant122", %122 : !torch.tensor<[768],f32>
torch.slot "_param_constant123", %123 : !torch.tensor<[768],f32>
torch.slot "_param_constant124", %124 : !torch.tensor<[2304],f32>
torch.slot "_param_constant125", %125 : !torch.tensor<[768,2304],f32>
torch.slot "_param_constant126", %126 : !torch.tensor<[768],f32>
torch.slot "_param_constant127", %127 : !torch.tensor<[768,768],f32>
torch.slot "_param_constant128", %128 : !torch.tensor<[768],f32>
torch.slot "_param_constant129", %129 : !torch.tensor<[768],f32>
torch.slot "_param_constant130", %130 : !torch.tensor<[3072],f32>
torch.slot "_param_constant131", %131 : !torch.tensor<[768,3072],f32>
torch.slot "_param_constant132", %132 : !torch.tensor<[768],f32>
torch.slot "_param_constant133", %133 : !torch.tensor<[3072,768],f32>
torch.slot "_param_constant134", %134 : !torch.tensor<[768],f32>
torch.slot "_param_constant135", %135 : !torch.tensor<[768],f32>
torch.slot "_param_constant136", %136 : !torch.tensor<[2304],f32>
torch.slot "_param_constant137", %137 : !torch.tensor<[768,2304],f32>
torch.slot "_param_constant138", %138 : !torch.tensor<[768],f32>
torch.slot "_param_constant139", %139 : !torch.tensor<[768,768],f32>
torch.slot "_param_constant140", %140 : !torch.tensor<[768],f32>
torch.slot "_param_constant141", %141 : !torch.tensor<[768],f32>
torch.slot "_param_constant142", %142 : !torch.tensor<[3072],f32>
torch.slot "_param_constant143", %143 : !torch.tensor<[768,3072],f32>
torch.slot "_param_constant144", %144 : !torch.tensor<[768],f32>
torch.slot "_param_constant145", %145 : !torch.tensor<[3072,768],f32>
torch.slot "_param_constant146", %146 : !torch.tensor<[768],f32>
torch.slot "_param_constant147", %147 : !torch.tensor<[768],f32>
torch.slot "_param_constant148", %148 : !torch.tensor<[50257,768],f32>
torch.slot "_tensor_constant0", %149 : !torch.tensor<[],f32>
torch.slot "_tensor_constant1", %150 : !torch.tensor<[1,1,1024,1024],ui8>
torch.slot "_tensor_constant2", %151 : !torch.tensor<[],f32>
torch.slot "_tensor_constant3", %152 : !torch.tensor<[],f32>
torch.slot "_tensor_constant4", %153 : !torch.tensor<[1,1,1024,1024],ui8>
torch.slot "_tensor_constant5", %154 : !torch.tensor<[],f32>
torch.slot "_tensor_constant6", %155 : !torch.tensor<[],f32>
torch.slot "_tensor_constant7", %156 : !torch.tensor<[1,1,1024,1024],ui8>
torch.slot "_tensor_constant8", %157 : !torch.tensor<[],f32>
torch.slot "_tensor_constant9", %158 : !torch.tensor<[],f32>
torch.slot "_tensor_constant10", %159 : !torch.tensor<[1,1,1024,1024],ui8>
torch.slot "_tensor_constant11", %160 : !torch.tensor<[],f32>
torch.slot "_tensor_constant12", %161 : !torch.tensor<[],f32>
torch.slot "_tensor_constant13", %162 : !torch.tensor<[1,1,1024,1024],ui8>
torch.slot "_tensor_constant14", %163 : !torch.tensor<[],f32>
torch.slot "_tensor_constant15", %164 : !torch.tensor<[],f32>
torch.slot "_tensor_constant16", %165 : !torch.tensor<[1,1,1024,1024],ui8>
torch.slot "_tensor_constant17", %166 : !torch.tensor<[],f32>
torch.slot "_tensor_constant18", %167 : !torch.tensor<[],f32>
torch.slot "_tensor_constant19", %168 : !torch.tensor<[1,1,1024,1024],ui8>
torch.slot "_tensor_constant20", %169 : !torch.tensor<[],f32>
torch.slot "_tensor_constant21", %170 : !torch.tensor<[],f32>
torch.slot "_tensor_constant22", %171 : !torch.tensor<[1,1,1024,1024],ui8>
torch.slot "_tensor_constant23", %172 : !torch.tensor<[],f32>
torch.slot "_tensor_constant24", %173 : !torch.tensor<[],f32>
torch.slot "_tensor_constant25", %174 : !torch.tensor<[1,1,1024,1024],ui8>
torch.slot "_tensor_constant26", %175 : !torch.tensor<[],f32>
torch.slot "_tensor_constant27", %176 : !torch.tensor<[],f32>
torch.slot "_tensor_constant28", %177 : !torch.tensor<[1,1,1024,1024],ui8>
torch.slot "_tensor_constant29", %178 : !torch.tensor<[],f32>
torch.slot "_tensor_constant30", %179 : !torch.tensor<[],f32>
torch.slot "_tensor_constant31", %180 : !torch.tensor<[1,1,1024,1024],ui8>
torch.slot "_tensor_constant32", %181 : !torch.tensor<[],f32>
torch.slot "_tensor_constant33", %182 : !torch.tensor<[],f32>
torch.slot "_tensor_constant34", %183 : !torch.tensor<[1,1,1024,1024],ui8>
torch.slot "_tensor_constant35", %184 : !torch.tensor<[],f32>
torch.slot "training", %true : !torch.bool
torch.slot "_is_full_backward_hook", %none : !torch.none
torch.slot "_code", %str : !torch.str
} : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda">
}
@AmosLewis
Copy link
Author

torch-mlir-opt -pass-pipeline='builtin.module(torchscript-module-to-torch-backend-pipeline{backend-legal-ops=torch.aten.flatten.using_ints})' /tmp/gpt2_torch_raw_elide.mlir --mlir-print-ir-after-all > gpt2_tosa_ramiro.mlir

@AmosLewis
Copy link
Author

%884 = torch.aten.tanh %883 : !torch.vtensor<[1,5,3072],f32> -> !torch.vtensor<[1,5,3072],unk>

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment