Skip to content

Instantly share code, notes, and snippets.

@AmosLewis
Created March 7, 2023 00:25
Show Gist options
  • Save AmosLewis/16c45d383c7b5db2ce94f8d0124e44d1 to your computer and use it in GitHub Desktop.
Save AmosLewis/16c45d383c7b5db2ce94f8d0124e44d1 to your computer and use it in GitHub Desktop.
module attributes {torch.debug_module_name = "_lambda"} {
func.func private @__torch__.torch.fx.graph_module._lambda.__code_getter(%arg0: !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda">) -> !torch.str {
%133 = torch.prim.GetAttr %arg0["_code"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.str
return %133 : !torch.str
}
func.func private @__torch__.torch.fx.graph_module._lambda.forward(%arg0: !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda">, %arg1: !torch.tensor {torch.type_bound = !torch.vtensor<[1,15],si64>}, %arg2: !torch.tensor {torch.type_bound = !torch.vtensor<[1,4],si64>}) -> !torch.tensor {
%int6 = torch.constant.int 6
%true_0 = torch.constant.bool true
%float-3.402820e38 = torch.constant.float -3.4028234663852886E+38
%int-100 = torch.constant.int -100
%none_1 = torch.constant.none
%int-1 = torch.constant.int -1
%false = torch.constant.bool false
%cpu = torch.constant.device "cpu"
%int1 = torch.constant.int 1
%int4 = torch.constant.int 4
%int0 = torch.constant.int 0
%int9223372036854775807 = torch.constant.int 9223372036854775807
%int15 = torch.constant.int 15
%int2 = torch.constant.int 2
%int3 = torch.constant.int 3
%float1.000000e00 = torch.constant.float 1.000000e+00
%float9.999990e-07 = torch.constant.float 9.9999999999999995E-7
%int512 = torch.constant.int 512
%int8 = torch.constant.int 8
%int64 = torch.constant.int 64
%int16 = torch.constant.int 16
%float2.772590e00 = torch.constant.float 2.7725887222397811
%int2048 = torch.constant.int 2048
%float2.079440e00 = torch.constant.float 2.0794415416798357
%int31 = torch.constant.int 31
%float4.419420e-02 = torch.constant.float 0.044194173824159223
%int32128 = torch.constant.int 32128
%133 = torch.prim.ListConstruct %int1, %int4 : (!torch.int, !torch.int) -> !torch.list<int>
%134 = torch.aten.new_zeros %arg2, %133, %int4, %int0, %cpu, %false : !torch.tensor, !torch.list<int>, !torch.int, !torch.int, !torch.Device, !torch.bool -> !torch.tensor
%135 = torch.aten.slice.Tensor %arg2, %int1, %int0, %int-1, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%136 = torch.aten.clone %135, %none_1 : !torch.tensor, !torch.none -> !torch.tensor
%137 = torch.aten.slice.Tensor %134, %int1, %int1, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%138 = torch.aten.copy_ %137, %136, %false : !torch.tensor, !torch.tensor, !torch.bool -> !torch.tensor
%139 = torch.prim.GetAttr %arg0["_tensor_constant0"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%140 = torch.aten.lift_fresh_copy %139 : !torch.tensor -> !torch.tensor
%141 = torch.aten.select.int %134, %int1, %int0 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%142 = torch.aten.fill_.Tensor %141, %140 : !torch.tensor, !torch.tensor -> !torch.tensor
%143 = torch.aten.eq.Scalar %134, %int-100 : !torch.tensor, !torch.int -> !torch.tensor
%144 = torch.aten.masked_fill_.Scalar %134, %143, %int0 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%145 = torch.prim.ListConstruct %int-1, %int15 : (!torch.int, !torch.int) -> !torch.list<int>
%146 = torch.aten.view %arg1, %145 : !torch.tensor, !torch.list<int> -> !torch.tensor
%147 = torch.prim.GetAttr %arg0["_param_constant0"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%148 = torch.aten.embedding %147, %146, %int-1, %false, %false : !torch.tensor, !torch.tensor, !torch.int, !torch.bool, !torch.bool -> !torch.tensor
%149 = torch.prim.ListConstruct %int1, %int15 : (!torch.int, !torch.int) -> !torch.list<int>
%150 = torch.aten.ones %149, %none_1, %none_1, %cpu, %false : !torch.list<int>, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.tensor
%151 = torch.aten.slice.Tensor %150, %int0, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%152 = torch.aten.unsqueeze %151, %int1 : !torch.tensor, !torch.int -> !torch.tensor
%153 = torch.aten.unsqueeze %152, %int2 : !torch.tensor, !torch.int -> !torch.tensor
%154 = torch.aten.slice.Tensor %153, %int3, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%155 = torch.aten.rsub.Scalar %154, %float1.000000e00, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
%156 = torch.aten.mul.Scalar %155, %float-3.402820e38 : !torch.tensor, !torch.float -> !torch.tensor
%157 = torch.aten.pow.Tensor_Scalar %148, %int2 : !torch.tensor, !torch.int -> !torch.tensor
%158 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%159 = torch.aten.mean.dim %157, %158, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
%160 = torch.aten.add.Scalar %159, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
%161 = torch.aten.rsqrt %160 : !torch.tensor -> !torch.tensor
%162 = torch.aten.mul.Tensor %148, %161 : !torch.tensor, !torch.tensor -> !torch.tensor
%163 = torch.prim.GetAttr %arg0["_param_constant1"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%164 = torch.aten.mul.Tensor %163, %162 : !torch.tensor, !torch.tensor -> !torch.tensor
%165 = torch.prim.GetAttr %arg0["_param_constant2"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%166 = torch.aten.t %165 : !torch.tensor -> !torch.tensor
%167 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%168 = torch.aten.view %164, %167 : !torch.tensor, !torch.list<int> -> !torch.tensor
%169 = torch.aten.mm %168, %166 : !torch.tensor, !torch.tensor -> !torch.tensor
%170 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%171 = torch.aten._unsafe_view %169, %170 : !torch.tensor, !torch.list<int> -> !torch.tensor
%172 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%173 = torch.aten.view %171, %172 : !torch.tensor, !torch.list<int> -> !torch.tensor
%174 = torch.aten.transpose.int %173, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%175 = torch.prim.GetAttr %arg0["_param_constant3"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%176 = torch.aten.t %175 : !torch.tensor -> !torch.tensor
%177 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%178 = torch.aten.view %164, %177 : !torch.tensor, !torch.list<int> -> !torch.tensor
%179 = torch.aten.mm %178, %176 : !torch.tensor, !torch.tensor -> !torch.tensor
%180 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%181 = torch.aten._unsafe_view %179, %180 : !torch.tensor, !torch.list<int> -> !torch.tensor
%182 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%183 = torch.aten.view %181, %182 : !torch.tensor, !torch.list<int> -> !torch.tensor
%184 = torch.aten.transpose.int %183, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%185 = torch.prim.GetAttr %arg0["_param_constant4"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%186 = torch.aten.t %185 : !torch.tensor -> !torch.tensor
%187 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%188 = torch.aten.view %164, %187 : !torch.tensor, !torch.list<int> -> !torch.tensor
%189 = torch.aten.mm %188, %186 : !torch.tensor, !torch.tensor -> !torch.tensor
%190 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%191 = torch.aten._unsafe_view %189, %190 : !torch.tensor, !torch.list<int> -> !torch.tensor
%192 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%193 = torch.aten.view %191, %192 : !torch.tensor, !torch.list<int> -> !torch.tensor
%194 = torch.aten.transpose.int %193, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%195 = torch.aten.transpose.int %184, %int3, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%196 = torch.prim.ListConstruct %int1, %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%197 = torch.aten.expand %174, %196, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%198 = torch.prim.ListConstruct %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%199 = torch.aten.view %197, %198 : !torch.tensor, !torch.list<int> -> !torch.tensor
%200 = torch.prim.ListConstruct %int1, %int8, %int64, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%201 = torch.aten.expand %195, %200, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%202 = torch.prim.ListConstruct %int8, %int64, %int15 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%203 = torch.aten.view %201, %202 : !torch.tensor, !torch.list<int> -> !torch.tensor
%204 = torch.aten.bmm %199, %203 : !torch.tensor, !torch.tensor -> !torch.tensor
%205 = torch.prim.ListConstruct %int1, %int8, %int15, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%206 = torch.aten._unsafe_view %204, %205 : !torch.tensor, !torch.list<int> -> !torch.tensor
%207 = torch.aten.arange %int15, %int4, %none_1, %cpu, %false : !torch.int, !torch.int, !torch.none, !torch.Device, !torch.bool -> !torch.tensor
%208 = torch.aten.slice.Tensor %207, %int0, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%209 = torch.aten.unsqueeze %208, %int1 : !torch.tensor, !torch.int -> !torch.tensor
%210 = torch.aten.arange %int15, %int4, %none_1, %cpu, %false : !torch.int, !torch.int, !torch.none, !torch.Device, !torch.bool -> !torch.tensor
%211 = torch.aten.unsqueeze %210, %int0 : !torch.tensor, !torch.int -> !torch.tensor
%212 = torch.aten.slice.Tensor %211, %int1, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%213 = torch.aten.sub.Tensor %212, %209, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%214 = torch.aten.gt.Scalar %213, %int0 : !torch.tensor, !torch.int -> !torch.tensor
%215 = torch.prims.convert_element_type %214, %int4 : !torch.tensor, !torch.int -> !torch.tensor
%216 = torch.aten.mul.Scalar %215, %int16 : !torch.tensor, !torch.int -> !torch.tensor
%217 = torch.aten.add.Scalar %216, %int0, %int1 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%218 = torch.aten.abs %213 : !torch.tensor -> !torch.tensor
%219 = torch.aten.lt.Scalar %218, %int8 : !torch.tensor, !torch.int -> !torch.tensor
%220 = torch.prims.convert_element_type %218, %int6 : !torch.tensor, !torch.int -> !torch.tensor
%221 = torch.aten.div.Scalar %220, %int8 : !torch.tensor, !torch.int -> !torch.tensor
%222 = torch.aten.log %221 : !torch.tensor -> !torch.tensor
%223 = torch.aten.div.Scalar %222, %float2.772590e00 : !torch.tensor, !torch.float -> !torch.tensor
%224 = torch.aten.mul.Scalar %223, %int8 : !torch.tensor, !torch.int -> !torch.tensor
%225 = torch.prims.convert_element_type %224, %int4 : !torch.tensor, !torch.int -> !torch.tensor
%226 = torch.aten.add.Scalar %225, %int8, %int1 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%227 = torch.aten.full_like %226, %int15, %int4, %int0, %cpu, %false, %none_1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.Device, !torch.bool, !torch.none -> !torch.tensor
%228 = torch.aten.minimum %226, %227 : !torch.tensor, !torch.tensor -> !torch.tensor
%229 = torch.aten.where.self %219, %218, %228 : !torch.tensor, !torch.tensor, !torch.tensor -> !torch.tensor
%230 = torch.aten.add_.Tensor %217, %229, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%231 = torch.prim.GetAttr %arg0["_param_constant5"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%232 = torch.aten.embedding %231, %230, %int-1, %false, %false : !torch.tensor, !torch.tensor, !torch.int, !torch.bool, !torch.bool -> !torch.tensor
%233 = torch.prim.ListConstruct %int2, %int0, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%234 = torch.aten.permute %232, %233 : !torch.tensor, !torch.list<int> -> !torch.tensor
%235 = torch.aten.unsqueeze %234, %int0 : !torch.tensor, !torch.int -> !torch.tensor
%236 = torch.aten.add.Tensor %235, %156, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%237 = torch.aten.add_.Tensor %206, %236, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%238 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%239 = torch.aten.amax %237, %238, %true_0 : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%240 = torch.aten.sub.Tensor %237, %239, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%241 = torch.aten.exp %240 : !torch.tensor -> !torch.tensor
%242 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%243 = torch.aten.sum.dim_IntList %241, %242, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
%244 = torch.aten.div.Tensor %241, %243 : !torch.tensor, !torch.tensor -> !torch.tensor
%245 = torch.prim.ListConstruct %int1, %int8, %int15, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%246 = torch.aten.expand %244, %245, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%247 = torch.prim.ListConstruct %int8, %int15, %int15 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%248 = torch.aten.view %246, %247 : !torch.tensor, !torch.list<int> -> !torch.tensor
%249 = torch.prim.ListConstruct %int1, %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%250 = torch.aten.expand %194, %249, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%251 = torch.prim.ListConstruct %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%252 = torch.aten.view %250, %251 : !torch.tensor, !torch.list<int> -> !torch.tensor
%253 = torch.aten.bmm %248, %252 : !torch.tensor, !torch.tensor -> !torch.tensor
%254 = torch.prim.ListConstruct %int1, %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%255 = torch.aten._unsafe_view %253, %254 : !torch.tensor, !torch.list<int> -> !torch.tensor
%256 = torch.aten.transpose.int %255, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%257 = torch.aten.clone %256, %int0 : !torch.tensor, !torch.int -> !torch.tensor
%258 = torch.prim.ListConstruct %int1, %int-1, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%259 = torch.aten.view %257, %258 : !torch.tensor, !torch.list<int> -> !torch.tensor
%260 = torch.prim.GetAttr %arg0["_param_constant6"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%261 = torch.aten.t %260 : !torch.tensor -> !torch.tensor
%262 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%263 = torch.aten.view %259, %262 : !torch.tensor, !torch.list<int> -> !torch.tensor
%264 = torch.aten.mm %263, %261 : !torch.tensor, !torch.tensor -> !torch.tensor
%265 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%266 = torch.aten._unsafe_view %264, %265 : !torch.tensor, !torch.list<int> -> !torch.tensor
%267 = torch.aten.add.Tensor %148, %266, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%268 = torch.aten.pow.Tensor_Scalar %267, %int2 : !torch.tensor, !torch.int -> !torch.tensor
%269 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%270 = torch.aten.mean.dim %268, %269, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
%271 = torch.aten.add.Scalar %270, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
%272 = torch.aten.rsqrt %271 : !torch.tensor -> !torch.tensor
%273 = torch.aten.mul.Tensor %267, %272 : !torch.tensor, !torch.tensor -> !torch.tensor
%274 = torch.prim.GetAttr %arg0["_param_constant7"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%275 = torch.aten.mul.Tensor %274, %273 : !torch.tensor, !torch.tensor -> !torch.tensor
%276 = torch.prim.GetAttr %arg0["_param_constant8"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%277 = torch.aten.t %276 : !torch.tensor -> !torch.tensor
%278 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%279 = torch.aten.view %275, %278 : !torch.tensor, !torch.list<int> -> !torch.tensor
%280 = torch.aten.mm %279, %277 : !torch.tensor, !torch.tensor -> !torch.tensor
%281 = torch.prim.ListConstruct %int1, %int15, %int2048 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%282 = torch.aten._unsafe_view %280, %281 : !torch.tensor, !torch.list<int> -> !torch.tensor
%283 = torch.aten.relu %282 : !torch.tensor -> !torch.tensor
%284 = torch.prim.GetAttr %arg0["_param_constant9"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%285 = torch.aten.t %284 : !torch.tensor -> !torch.tensor
%286 = torch.prim.ListConstruct %int15, %int2048 : (!torch.int, !torch.int) -> !torch.list<int>
%287 = torch.aten.view %283, %286 : !torch.tensor, !torch.list<int> -> !torch.tensor
%288 = torch.aten.mm %287, %285 : !torch.tensor, !torch.tensor -> !torch.tensor
%289 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%290 = torch.aten._unsafe_view %288, %289 : !torch.tensor, !torch.list<int> -> !torch.tensor
%291 = torch.aten.add.Tensor %267, %290, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%292 = torch.aten.pow.Tensor_Scalar %291, %int2 : !torch.tensor, !torch.int -> !torch.tensor
%293 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%294 = torch.aten.mean.dim %292, %293, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
%295 = torch.aten.add.Scalar %294, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
%296 = torch.aten.rsqrt %295 : !torch.tensor -> !torch.tensor
%297 = torch.aten.mul.Tensor %291, %296 : !torch.tensor, !torch.tensor -> !torch.tensor
%298 = torch.prim.GetAttr %arg0["_param_constant10"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%299 = torch.aten.mul.Tensor %298, %297 : !torch.tensor, !torch.tensor -> !torch.tensor
%300 = torch.prim.GetAttr %arg0["_param_constant11"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%301 = torch.aten.t %300 : !torch.tensor -> !torch.tensor
%302 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%303 = torch.aten.view %299, %302 : !torch.tensor, !torch.list<int> -> !torch.tensor
%304 = torch.aten.mm %303, %301 : !torch.tensor, !torch.tensor -> !torch.tensor
%305 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%306 = torch.aten._unsafe_view %304, %305 : !torch.tensor, !torch.list<int> -> !torch.tensor
%307 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%308 = torch.aten.view %306, %307 : !torch.tensor, !torch.list<int> -> !torch.tensor
%309 = torch.aten.transpose.int %308, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%310 = torch.prim.GetAttr %arg0["_param_constant12"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%311 = torch.aten.t %310 : !torch.tensor -> !torch.tensor
%312 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%313 = torch.aten.view %299, %312 : !torch.tensor, !torch.list<int> -> !torch.tensor
%314 = torch.aten.mm %313, %311 : !torch.tensor, !torch.tensor -> !torch.tensor
%315 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%316 = torch.aten._unsafe_view %314, %315 : !torch.tensor, !torch.list<int> -> !torch.tensor
%317 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%318 = torch.aten.view %316, %317 : !torch.tensor, !torch.list<int> -> !torch.tensor
%319 = torch.aten.transpose.int %318, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%320 = torch.prim.GetAttr %arg0["_param_constant13"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%321 = torch.aten.t %320 : !torch.tensor -> !torch.tensor
%322 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%323 = torch.aten.view %299, %322 : !torch.tensor, !torch.list<int> -> !torch.tensor
%324 = torch.aten.mm %323, %321 : !torch.tensor, !torch.tensor -> !torch.tensor
%325 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%326 = torch.aten._unsafe_view %324, %325 : !torch.tensor, !torch.list<int> -> !torch.tensor
%327 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%328 = torch.aten.view %326, %327 : !torch.tensor, !torch.list<int> -> !torch.tensor
%329 = torch.aten.transpose.int %328, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%330 = torch.aten.transpose.int %319, %int3, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%331 = torch.prim.ListConstruct %int1, %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%332 = torch.aten.expand %309, %331, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%333 = torch.prim.ListConstruct %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%334 = torch.aten.view %332, %333 : !torch.tensor, !torch.list<int> -> !torch.tensor
%335 = torch.prim.ListConstruct %int1, %int8, %int64, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%336 = torch.aten.expand %330, %335, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%337 = torch.prim.ListConstruct %int8, %int64, %int15 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%338 = torch.aten.view %336, %337 : !torch.tensor, !torch.list<int> -> !torch.tensor
%339 = torch.aten.bmm %334, %338 : !torch.tensor, !torch.tensor -> !torch.tensor
%340 = torch.prim.ListConstruct %int1, %int8, %int15, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%341 = torch.aten._unsafe_view %339, %340 : !torch.tensor, !torch.list<int> -> !torch.tensor
%342 = torch.aten.add_.Tensor %341, %236, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%343 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%344 = torch.aten.amax %342, %343, %true_0 : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%345 = torch.aten.sub.Tensor %342, %344, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%346 = torch.aten.exp %345 : !torch.tensor -> !torch.tensor
%347 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%348 = torch.aten.sum.dim_IntList %346, %347, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
%349 = torch.aten.div.Tensor %346, %348 : !torch.tensor, !torch.tensor -> !torch.tensor
%350 = torch.prim.ListConstruct %int1, %int8, %int15, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%351 = torch.aten.expand %349, %350, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%352 = torch.prim.ListConstruct %int8, %int15, %int15 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%353 = torch.aten.view %351, %352 : !torch.tensor, !torch.list<int> -> !torch.tensor
%354 = torch.prim.ListConstruct %int1, %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%355 = torch.aten.expand %329, %354, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%356 = torch.prim.ListConstruct %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%357 = torch.aten.view %355, %356 : !torch.tensor, !torch.list<int> -> !torch.tensor
%358 = torch.aten.bmm %353, %357 : !torch.tensor, !torch.tensor -> !torch.tensor
%359 = torch.prim.ListConstruct %int1, %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%360 = torch.aten._unsafe_view %358, %359 : !torch.tensor, !torch.list<int> -> !torch.tensor
%361 = torch.aten.transpose.int %360, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%362 = torch.aten.clone %361, %int0 : !torch.tensor, !torch.int -> !torch.tensor
%363 = torch.prim.ListConstruct %int1, %int-1, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%364 = torch.aten.view %362, %363 : !torch.tensor, !torch.list<int> -> !torch.tensor
%365 = torch.prim.GetAttr %arg0["_param_constant14"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%366 = torch.aten.t %365 : !torch.tensor -> !torch.tensor
%367 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%368 = torch.aten.view %364, %367 : !torch.tensor, !torch.list<int> -> !torch.tensor
%369 = torch.aten.mm %368, %366 : !torch.tensor, !torch.tensor -> !torch.tensor
%370 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%371 = torch.aten._unsafe_view %369, %370 : !torch.tensor, !torch.list<int> -> !torch.tensor
%372 = torch.aten.add.Tensor %291, %371, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%373 = torch.aten.pow.Tensor_Scalar %372, %int2 : !torch.tensor, !torch.int -> !torch.tensor
%374 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%375 = torch.aten.mean.dim %373, %374, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
%376 = torch.aten.add.Scalar %375, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
%377 = torch.aten.rsqrt %376 : !torch.tensor -> !torch.tensor
%378 = torch.aten.mul.Tensor %372, %377 : !torch.tensor, !torch.tensor -> !torch.tensor
%379 = torch.prim.GetAttr %arg0["_param_constant15"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%380 = torch.aten.mul.Tensor %379, %378 : !torch.tensor, !torch.tensor -> !torch.tensor
%381 = torch.prim.GetAttr %arg0["_param_constant16"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%382 = torch.aten.t %381 : !torch.tensor -> !torch.tensor
%383 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%384 = torch.aten.view %380, %383 : !torch.tensor, !torch.list<int> -> !torch.tensor
%385 = torch.aten.mm %384, %382 : !torch.tensor, !torch.tensor -> !torch.tensor
%386 = torch.prim.ListConstruct %int1, %int15, %int2048 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%387 = torch.aten._unsafe_view %385, %386 : !torch.tensor, !torch.list<int> -> !torch.tensor
%388 = torch.aten.relu %387 : !torch.tensor -> !torch.tensor
%389 = torch.prim.GetAttr %arg0["_param_constant17"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%390 = torch.aten.t %389 : !torch.tensor -> !torch.tensor
%391 = torch.prim.ListConstruct %int15, %int2048 : (!torch.int, !torch.int) -> !torch.list<int>
%392 = torch.aten.view %388, %391 : !torch.tensor, !torch.list<int> -> !torch.tensor
%393 = torch.aten.mm %392, %390 : !torch.tensor, !torch.tensor -> !torch.tensor
%394 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%395 = torch.aten._unsafe_view %393, %394 : !torch.tensor, !torch.list<int> -> !torch.tensor
%396 = torch.aten.add.Tensor %372, %395, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%397 = torch.aten.pow.Tensor_Scalar %396, %int2 : !torch.tensor, !torch.int -> !torch.tensor
%398 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%399 = torch.aten.mean.dim %397, %398, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
%400 = torch.aten.add.Scalar %399, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
%401 = torch.aten.rsqrt %400 : !torch.tensor -> !torch.tensor
%402 = torch.aten.mul.Tensor %396, %401 : !torch.tensor, !torch.tensor -> !torch.tensor
%403 = torch.prim.GetAttr %arg0["_param_constant18"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%404 = torch.aten.mul.Tensor %403, %402 : !torch.tensor, !torch.tensor -> !torch.tensor
%405 = torch.prim.GetAttr %arg0["_param_constant19"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%406 = torch.aten.t %405 : !torch.tensor -> !torch.tensor
%407 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%408 = torch.aten.view %404, %407 : !torch.tensor, !torch.list<int> -> !torch.tensor
%409 = torch.aten.mm %408, %406 : !torch.tensor, !torch.tensor -> !torch.tensor
%410 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%411 = torch.aten._unsafe_view %409, %410 : !torch.tensor, !torch.list<int> -> !torch.tensor
%412 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%413 = torch.aten.view %411, %412 : !torch.tensor, !torch.list<int> -> !torch.tensor
%414 = torch.aten.transpose.int %413, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%415 = torch.prim.GetAttr %arg0["_param_constant20"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%416 = torch.aten.t %415 : !torch.tensor -> !torch.tensor
%417 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%418 = torch.aten.view %404, %417 : !torch.tensor, !torch.list<int> -> !torch.tensor
%419 = torch.aten.mm %418, %416 : !torch.tensor, !torch.tensor -> !torch.tensor
%420 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%421 = torch.aten._unsafe_view %419, %420 : !torch.tensor, !torch.list<int> -> !torch.tensor
%422 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%423 = torch.aten.view %421, %422 : !torch.tensor, !torch.list<int> -> !torch.tensor
%424 = torch.aten.transpose.int %423, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%425 = torch.prim.GetAttr %arg0["_param_constant21"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%426 = torch.aten.t %425 : !torch.tensor -> !torch.tensor
%427 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%428 = torch.aten.view %404, %427 : !torch.tensor, !torch.list<int> -> !torch.tensor
%429 = torch.aten.mm %428, %426 : !torch.tensor, !torch.tensor -> !torch.tensor
%430 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%431 = torch.aten._unsafe_view %429, %430 : !torch.tensor, !torch.list<int> -> !torch.tensor
%432 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%433 = torch.aten.view %431, %432 : !torch.tensor, !torch.list<int> -> !torch.tensor
%434 = torch.aten.transpose.int %433, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%435 = torch.aten.transpose.int %424, %int3, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%436 = torch.prim.ListConstruct %int1, %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%437 = torch.aten.expand %414, %436, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%438 = torch.prim.ListConstruct %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%439 = torch.aten.view %437, %438 : !torch.tensor, !torch.list<int> -> !torch.tensor
%440 = torch.prim.ListConstruct %int1, %int8, %int64, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%441 = torch.aten.expand %435, %440, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%442 = torch.prim.ListConstruct %int8, %int64, %int15 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%443 = torch.aten.view %441, %442 : !torch.tensor, !torch.list<int> -> !torch.tensor
%444 = torch.aten.bmm %439, %443 : !torch.tensor, !torch.tensor -> !torch.tensor
%445 = torch.prim.ListConstruct %int1, %int8, %int15, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%446 = torch.aten._unsafe_view %444, %445 : !torch.tensor, !torch.list<int> -> !torch.tensor
%447 = torch.aten.add_.Tensor %446, %236, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%448 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%449 = torch.aten.amax %447, %448, %true_0 : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%450 = torch.aten.sub.Tensor %447, %449, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%451 = torch.aten.exp %450 : !torch.tensor -> !torch.tensor
%452 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%453 = torch.aten.sum.dim_IntList %451, %452, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
%454 = torch.aten.div.Tensor %451, %453 : !torch.tensor, !torch.tensor -> !torch.tensor
%455 = torch.prim.ListConstruct %int1, %int8, %int15, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%456 = torch.aten.expand %454, %455, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%457 = torch.prim.ListConstruct %int8, %int15, %int15 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%458 = torch.aten.view %456, %457 : !torch.tensor, !torch.list<int> -> !torch.tensor
%459 = torch.prim.ListConstruct %int1, %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%460 = torch.aten.expand %434, %459, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%461 = torch.prim.ListConstruct %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%462 = torch.aten.view %460, %461 : !torch.tensor, !torch.list<int> -> !torch.tensor
%463 = torch.aten.bmm %458, %462 : !torch.tensor, !torch.tensor -> !torch.tensor
%464 = torch.prim.ListConstruct %int1, %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%465 = torch.aten._unsafe_view %463, %464 : !torch.tensor, !torch.list<int> -> !torch.tensor
%466 = torch.aten.transpose.int %465, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%467 = torch.aten.clone %466, %int0 : !torch.tensor, !torch.int -> !torch.tensor
%468 = torch.prim.ListConstruct %int1, %int-1, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%469 = torch.aten.view %467, %468 : !torch.tensor, !torch.list<int> -> !torch.tensor
%470 = torch.prim.GetAttr %arg0["_param_constant22"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%471 = torch.aten.t %470 : !torch.tensor -> !torch.tensor
%472 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%473 = torch.aten.view %469, %472 : !torch.tensor, !torch.list<int> -> !torch.tensor
%474 = torch.aten.mm %473, %471 : !torch.tensor, !torch.tensor -> !torch.tensor
%475 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%476 = torch.aten._unsafe_view %474, %475 : !torch.tensor, !torch.list<int> -> !torch.tensor
%477 = torch.aten.add.Tensor %396, %476, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%478 = torch.aten.pow.Tensor_Scalar %477, %int2 : !torch.tensor, !torch.int -> !torch.tensor
%479 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%480 = torch.aten.mean.dim %478, %479, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
%481 = torch.aten.add.Scalar %480, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
%482 = torch.aten.rsqrt %481 : !torch.tensor -> !torch.tensor
%483 = torch.aten.mul.Tensor %477, %482 : !torch.tensor, !torch.tensor -> !torch.tensor
%484 = torch.prim.GetAttr %arg0["_param_constant23"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%485 = torch.aten.mul.Tensor %484, %483 : !torch.tensor, !torch.tensor -> !torch.tensor
%486 = torch.prim.GetAttr %arg0["_param_constant24"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%487 = torch.aten.t %486 : !torch.tensor -> !torch.tensor
%488 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%489 = torch.aten.view %485, %488 : !torch.tensor, !torch.list<int> -> !torch.tensor
%490 = torch.aten.mm %489, %487 : !torch.tensor, !torch.tensor -> !torch.tensor
%491 = torch.prim.ListConstruct %int1, %int15, %int2048 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%492 = torch.aten._unsafe_view %490, %491 : !torch.tensor, !torch.list<int> -> !torch.tensor
%493 = torch.aten.relu %492 : !torch.tensor -> !torch.tensor
%494 = torch.prim.GetAttr %arg0["_param_constant25"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%495 = torch.aten.t %494 : !torch.tensor -> !torch.tensor
%496 = torch.prim.ListConstruct %int15, %int2048 : (!torch.int, !torch.int) -> !torch.list<int>
%497 = torch.aten.view %493, %496 : !torch.tensor, !torch.list<int> -> !torch.tensor
%498 = torch.aten.mm %497, %495 : !torch.tensor, !torch.tensor -> !torch.tensor
%499 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%500 = torch.aten._unsafe_view %498, %499 : !torch.tensor, !torch.list<int> -> !torch.tensor
%501 = torch.aten.add.Tensor %477, %500, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%502 = torch.aten.pow.Tensor_Scalar %501, %int2 : !torch.tensor, !torch.int -> !torch.tensor
%503 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%504 = torch.aten.mean.dim %502, %503, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
%505 = torch.aten.add.Scalar %504, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
%506 = torch.aten.rsqrt %505 : !torch.tensor -> !torch.tensor
%507 = torch.aten.mul.Tensor %501, %506 : !torch.tensor, !torch.tensor -> !torch.tensor
%508 = torch.prim.GetAttr %arg0["_param_constant26"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%509 = torch.aten.mul.Tensor %508, %507 : !torch.tensor, !torch.tensor -> !torch.tensor
%510 = torch.prim.GetAttr %arg0["_param_constant27"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%511 = torch.aten.t %510 : !torch.tensor -> !torch.tensor
%512 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%513 = torch.aten.view %509, %512 : !torch.tensor, !torch.list<int> -> !torch.tensor
%514 = torch.aten.mm %513, %511 : !torch.tensor, !torch.tensor -> !torch.tensor
%515 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%516 = torch.aten._unsafe_view %514, %515 : !torch.tensor, !torch.list<int> -> !torch.tensor
%517 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%518 = torch.aten.view %516, %517 : !torch.tensor, !torch.list<int> -> !torch.tensor
%519 = torch.aten.transpose.int %518, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%520 = torch.prim.GetAttr %arg0["_param_constant28"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%521 = torch.aten.t %520 : !torch.tensor -> !torch.tensor
%522 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%523 = torch.aten.view %509, %522 : !torch.tensor, !torch.list<int> -> !torch.tensor
%524 = torch.aten.mm %523, %521 : !torch.tensor, !torch.tensor -> !torch.tensor
%525 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%526 = torch.aten._unsafe_view %524, %525 : !torch.tensor, !torch.list<int> -> !torch.tensor
%527 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%528 = torch.aten.view %526, %527 : !torch.tensor, !torch.list<int> -> !torch.tensor
%529 = torch.aten.transpose.int %528, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%530 = torch.prim.GetAttr %arg0["_param_constant29"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%531 = torch.aten.t %530 : !torch.tensor -> !torch.tensor
%532 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%533 = torch.aten.view %509, %532 : !torch.tensor, !torch.list<int> -> !torch.tensor
%534 = torch.aten.mm %533, %531 : !torch.tensor, !torch.tensor -> !torch.tensor
%535 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%536 = torch.aten._unsafe_view %534, %535 : !torch.tensor, !torch.list<int> -> !torch.tensor
%537 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%538 = torch.aten.view %536, %537 : !torch.tensor, !torch.list<int> -> !torch.tensor
%539 = torch.aten.transpose.int %538, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%540 = torch.aten.transpose.int %529, %int3, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%541 = torch.prim.ListConstruct %int1, %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%542 = torch.aten.expand %519, %541, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%543 = torch.prim.ListConstruct %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%544 = torch.aten.view %542, %543 : !torch.tensor, !torch.list<int> -> !torch.tensor
%545 = torch.prim.ListConstruct %int1, %int8, %int64, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%546 = torch.aten.expand %540, %545, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%547 = torch.prim.ListConstruct %int8, %int64, %int15 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%548 = torch.aten.view %546, %547 : !torch.tensor, !torch.list<int> -> !torch.tensor
%549 = torch.aten.bmm %544, %548 : !torch.tensor, !torch.tensor -> !torch.tensor
%550 = torch.prim.ListConstruct %int1, %int8, %int15, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%551 = torch.aten._unsafe_view %549, %550 : !torch.tensor, !torch.list<int> -> !torch.tensor
%552 = torch.aten.add_.Tensor %551, %236, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%553 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%554 = torch.aten.amax %552, %553, %true_0 : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%555 = torch.aten.sub.Tensor %552, %554, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%556 = torch.aten.exp %555 : !torch.tensor -> !torch.tensor
%557 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%558 = torch.aten.sum.dim_IntList %556, %557, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
%559 = torch.aten.div.Tensor %556, %558 : !torch.tensor, !torch.tensor -> !torch.tensor
%560 = torch.prim.ListConstruct %int1, %int8, %int15, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%561 = torch.aten.expand %559, %560, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%562 = torch.prim.ListConstruct %int8, %int15, %int15 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%563 = torch.aten.view %561, %562 : !torch.tensor, !torch.list<int> -> !torch.tensor
%564 = torch.prim.ListConstruct %int1, %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%565 = torch.aten.expand %539, %564, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%566 = torch.prim.ListConstruct %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%567 = torch.aten.view %565, %566 : !torch.tensor, !torch.list<int> -> !torch.tensor
%568 = torch.aten.bmm %563, %567 : !torch.tensor, !torch.tensor -> !torch.tensor
%569 = torch.prim.ListConstruct %int1, %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%570 = torch.aten._unsafe_view %568, %569 : !torch.tensor, !torch.list<int> -> !torch.tensor
%571 = torch.aten.transpose.int %570, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%572 = torch.aten.clone %571, %int0 : !torch.tensor, !torch.int -> !torch.tensor
%573 = torch.prim.ListConstruct %int1, %int-1, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%574 = torch.aten.view %572, %573 : !torch.tensor, !torch.list<int> -> !torch.tensor
%575 = torch.prim.GetAttr %arg0["_param_constant30"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%576 = torch.aten.t %575 : !torch.tensor -> !torch.tensor
%577 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%578 = torch.aten.view %574, %577 : !torch.tensor, !torch.list<int> -> !torch.tensor
%579 = torch.aten.mm %578, %576 : !torch.tensor, !torch.tensor -> !torch.tensor
%580 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%581 = torch.aten._unsafe_view %579, %580 : !torch.tensor, !torch.list<int> -> !torch.tensor
%582 = torch.aten.add.Tensor %501, %581, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%583 = torch.aten.pow.Tensor_Scalar %582, %int2 : !torch.tensor, !torch.int -> !torch.tensor
%584 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%585 = torch.aten.mean.dim %583, %584, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
%586 = torch.aten.add.Scalar %585, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
%587 = torch.aten.rsqrt %586 : !torch.tensor -> !torch.tensor
%588 = torch.aten.mul.Tensor %582, %587 : !torch.tensor, !torch.tensor -> !torch.tensor
%589 = torch.prim.GetAttr %arg0["_param_constant31"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%590 = torch.aten.mul.Tensor %589, %588 : !torch.tensor, !torch.tensor -> !torch.tensor
%591 = torch.prim.GetAttr %arg0["_param_constant32"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%592 = torch.aten.t %591 : !torch.tensor -> !torch.tensor
%593 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%594 = torch.aten.view %590, %593 : !torch.tensor, !torch.list<int> -> !torch.tensor
%595 = torch.aten.mm %594, %592 : !torch.tensor, !torch.tensor -> !torch.tensor
%596 = torch.prim.ListConstruct %int1, %int15, %int2048 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%597 = torch.aten._unsafe_view %595, %596 : !torch.tensor, !torch.list<int> -> !torch.tensor
%598 = torch.aten.relu %597 : !torch.tensor -> !torch.tensor
%599 = torch.prim.GetAttr %arg0["_param_constant33"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%600 = torch.aten.t %599 : !torch.tensor -> !torch.tensor
%601 = torch.prim.ListConstruct %int15, %int2048 : (!torch.int, !torch.int) -> !torch.list<int>
%602 = torch.aten.view %598, %601 : !torch.tensor, !torch.list<int> -> !torch.tensor
%603 = torch.aten.mm %602, %600 : !torch.tensor, !torch.tensor -> !torch.tensor
%604 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%605 = torch.aten._unsafe_view %603, %604 : !torch.tensor, !torch.list<int> -> !torch.tensor
%606 = torch.aten.add.Tensor %582, %605, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%607 = torch.aten.pow.Tensor_Scalar %606, %int2 : !torch.tensor, !torch.int -> !torch.tensor
%608 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%609 = torch.aten.mean.dim %607, %608, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
%610 = torch.aten.add.Scalar %609, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
%611 = torch.aten.rsqrt %610 : !torch.tensor -> !torch.tensor
%612 = torch.aten.mul.Tensor %606, %611 : !torch.tensor, !torch.tensor -> !torch.tensor
%613 = torch.prim.GetAttr %arg0["_param_constant34"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%614 = torch.aten.mul.Tensor %613, %612 : !torch.tensor, !torch.tensor -> !torch.tensor
%615 = torch.prim.GetAttr %arg0["_param_constant35"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%616 = torch.aten.t %615 : !torch.tensor -> !torch.tensor
%617 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%618 = torch.aten.view %614, %617 : !torch.tensor, !torch.list<int> -> !torch.tensor
%619 = torch.aten.mm %618, %616 : !torch.tensor, !torch.tensor -> !torch.tensor
%620 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%621 = torch.aten._unsafe_view %619, %620 : !torch.tensor, !torch.list<int> -> !torch.tensor
%622 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%623 = torch.aten.view %621, %622 : !torch.tensor, !torch.list<int> -> !torch.tensor
%624 = torch.aten.transpose.int %623, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%625 = torch.prim.GetAttr %arg0["_param_constant36"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%626 = torch.aten.t %625 : !torch.tensor -> !torch.tensor
%627 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%628 = torch.aten.view %614, %627 : !torch.tensor, !torch.list<int> -> !torch.tensor
%629 = torch.aten.mm %628, %626 : !torch.tensor, !torch.tensor -> !torch.tensor
%630 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%631 = torch.aten._unsafe_view %629, %630 : !torch.tensor, !torch.list<int> -> !torch.tensor
%632 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%633 = torch.aten.view %631, %632 : !torch.tensor, !torch.list<int> -> !torch.tensor
%634 = torch.aten.transpose.int %633, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%635 = torch.prim.GetAttr %arg0["_param_constant37"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%636 = torch.aten.t %635 : !torch.tensor -> !torch.tensor
%637 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%638 = torch.aten.view %614, %637 : !torch.tensor, !torch.list<int> -> !torch.tensor
%639 = torch.aten.mm %638, %636 : !torch.tensor, !torch.tensor -> !torch.tensor
%640 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%641 = torch.aten._unsafe_view %639, %640 : !torch.tensor, !torch.list<int> -> !torch.tensor
%642 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%643 = torch.aten.view %641, %642 : !torch.tensor, !torch.list<int> -> !torch.tensor
%644 = torch.aten.transpose.int %643, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%645 = torch.aten.transpose.int %634, %int3, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%646 = torch.prim.ListConstruct %int1, %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%647 = torch.aten.expand %624, %646, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%648 = torch.prim.ListConstruct %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%649 = torch.aten.view %647, %648 : !torch.tensor, !torch.list<int> -> !torch.tensor
%650 = torch.prim.ListConstruct %int1, %int8, %int64, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%651 = torch.aten.expand %645, %650, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%652 = torch.prim.ListConstruct %int8, %int64, %int15 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%653 = torch.aten.view %651, %652 : !torch.tensor, !torch.list<int> -> !torch.tensor
%654 = torch.aten.bmm %649, %653 : !torch.tensor, !torch.tensor -> !torch.tensor
%655 = torch.prim.ListConstruct %int1, %int8, %int15, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%656 = torch.aten._unsafe_view %654, %655 : !torch.tensor, !torch.list<int> -> !torch.tensor
%657 = torch.aten.add_.Tensor %656, %236, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%658 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%659 = torch.aten.amax %657, %658, %true_0 : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%660 = torch.aten.sub.Tensor %657, %659, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%661 = torch.aten.exp %660 : !torch.tensor -> !torch.tensor
%662 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%663 = torch.aten.sum.dim_IntList %661, %662, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
%664 = torch.aten.div.Tensor %661, %663 : !torch.tensor, !torch.tensor -> !torch.tensor
%665 = torch.prim.ListConstruct %int1, %int8, %int15, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%666 = torch.aten.expand %664, %665, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%667 = torch.prim.ListConstruct %int8, %int15, %int15 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%668 = torch.aten.view %666, %667 : !torch.tensor, !torch.list<int> -> !torch.tensor
%669 = torch.prim.ListConstruct %int1, %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%670 = torch.aten.expand %644, %669, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%671 = torch.prim.ListConstruct %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%672 = torch.aten.view %670, %671 : !torch.tensor, !torch.list<int> -> !torch.tensor
%673 = torch.aten.bmm %668, %672 : !torch.tensor, !torch.tensor -> !torch.tensor
%674 = torch.prim.ListConstruct %int1, %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%675 = torch.aten._unsafe_view %673, %674 : !torch.tensor, !torch.list<int> -> !torch.tensor
%676 = torch.aten.transpose.int %675, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%677 = torch.aten.clone %676, %int0 : !torch.tensor, !torch.int -> !torch.tensor
%678 = torch.prim.ListConstruct %int1, %int-1, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%679 = torch.aten.view %677, %678 : !torch.tensor, !torch.list<int> -> !torch.tensor
%680 = torch.prim.GetAttr %arg0["_param_constant38"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%681 = torch.aten.t %680 : !torch.tensor -> !torch.tensor
%682 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%683 = torch.aten.view %679, %682 : !torch.tensor, !torch.list<int> -> !torch.tensor
%684 = torch.aten.mm %683, %681 : !torch.tensor, !torch.tensor -> !torch.tensor
%685 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%686 = torch.aten._unsafe_view %684, %685 : !torch.tensor, !torch.list<int> -> !torch.tensor
%687 = torch.aten.add.Tensor %606, %686, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%688 = torch.aten.pow.Tensor_Scalar %687, %int2 : !torch.tensor, !torch.int -> !torch.tensor
%689 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%690 = torch.aten.mean.dim %688, %689, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
%691 = torch.aten.add.Scalar %690, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
%692 = torch.aten.rsqrt %691 : !torch.tensor -> !torch.tensor
%693 = torch.aten.mul.Tensor %687, %692 : !torch.tensor, !torch.tensor -> !torch.tensor
%694 = torch.prim.GetAttr %arg0["_param_constant39"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%695 = torch.aten.mul.Tensor %694, %693 : !torch.tensor, !torch.tensor -> !torch.tensor
%696 = torch.prim.GetAttr %arg0["_param_constant40"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%697 = torch.aten.t %696 : !torch.tensor -> !torch.tensor
%698 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%699 = torch.aten.view %695, %698 : !torch.tensor, !torch.list<int> -> !torch.tensor
%700 = torch.aten.mm %699, %697 : !torch.tensor, !torch.tensor -> !torch.tensor
%701 = torch.prim.ListConstruct %int1, %int15, %int2048 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%702 = torch.aten._unsafe_view %700, %701 : !torch.tensor, !torch.list<int> -> !torch.tensor
%703 = torch.aten.relu %702 : !torch.tensor -> !torch.tensor
%704 = torch.prim.GetAttr %arg0["_param_constant41"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%705 = torch.aten.t %704 : !torch.tensor -> !torch.tensor
%706 = torch.prim.ListConstruct %int15, %int2048 : (!torch.int, !torch.int) -> !torch.list<int>
%707 = torch.aten.view %703, %706 : !torch.tensor, !torch.list<int> -> !torch.tensor
%708 = torch.aten.mm %707, %705 : !torch.tensor, !torch.tensor -> !torch.tensor
%709 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%710 = torch.aten._unsafe_view %708, %709 : !torch.tensor, !torch.list<int> -> !torch.tensor
%711 = torch.aten.add.Tensor %687, %710, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%712 = torch.aten.pow.Tensor_Scalar %711, %int2 : !torch.tensor, !torch.int -> !torch.tensor
%713 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%714 = torch.aten.mean.dim %712, %713, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
%715 = torch.aten.add.Scalar %714, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
%716 = torch.aten.rsqrt %715 : !torch.tensor -> !torch.tensor
%717 = torch.aten.mul.Tensor %711, %716 : !torch.tensor, !torch.tensor -> !torch.tensor
%718 = torch.prim.GetAttr %arg0["_param_constant42"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%719 = torch.aten.mul.Tensor %718, %717 : !torch.tensor, !torch.tensor -> !torch.tensor
%720 = torch.prim.GetAttr %arg0["_param_constant43"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%721 = torch.aten.t %720 : !torch.tensor -> !torch.tensor
%722 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%723 = torch.aten.view %719, %722 : !torch.tensor, !torch.list<int> -> !torch.tensor
%724 = torch.aten.mm %723, %721 : !torch.tensor, !torch.tensor -> !torch.tensor
%725 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%726 = torch.aten._unsafe_view %724, %725 : !torch.tensor, !torch.list<int> -> !torch.tensor
%727 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%728 = torch.aten.view %726, %727 : !torch.tensor, !torch.list<int> -> !torch.tensor
%729 = torch.aten.transpose.int %728, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%730 = torch.prim.GetAttr %arg0["_param_constant44"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%731 = torch.aten.t %730 : !torch.tensor -> !torch.tensor
%732 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%733 = torch.aten.view %719, %732 : !torch.tensor, !torch.list<int> -> !torch.tensor
%734 = torch.aten.mm %733, %731 : !torch.tensor, !torch.tensor -> !torch.tensor
%735 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%736 = torch.aten._unsafe_view %734, %735 : !torch.tensor, !torch.list<int> -> !torch.tensor
%737 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%738 = torch.aten.view %736, %737 : !torch.tensor, !torch.list<int> -> !torch.tensor
%739 = torch.aten.transpose.int %738, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%740 = torch.prim.GetAttr %arg0["_param_constant45"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%741 = torch.aten.t %740 : !torch.tensor -> !torch.tensor
%742 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%743 = torch.aten.view %719, %742 : !torch.tensor, !torch.list<int> -> !torch.tensor
%744 = torch.aten.mm %743, %741 : !torch.tensor, !torch.tensor -> !torch.tensor
%745 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%746 = torch.aten._unsafe_view %744, %745 : !torch.tensor, !torch.list<int> -> !torch.tensor
%747 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%748 = torch.aten.view %746, %747 : !torch.tensor, !torch.list<int> -> !torch.tensor
%749 = torch.aten.transpose.int %748, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%750 = torch.aten.transpose.int %739, %int3, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%751 = torch.prim.ListConstruct %int1, %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%752 = torch.aten.expand %729, %751, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%753 = torch.prim.ListConstruct %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%754 = torch.aten.view %752, %753 : !torch.tensor, !torch.list<int> -> !torch.tensor
%755 = torch.prim.ListConstruct %int1, %int8, %int64, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%756 = torch.aten.expand %750, %755, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%757 = torch.prim.ListConstruct %int8, %int64, %int15 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%758 = torch.aten.view %756, %757 : !torch.tensor, !torch.list<int> -> !torch.tensor
%759 = torch.aten.bmm %754, %758 : !torch.tensor, !torch.tensor -> !torch.tensor
%760 = torch.prim.ListConstruct %int1, %int8, %int15, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%761 = torch.aten._unsafe_view %759, %760 : !torch.tensor, !torch.list<int> -> !torch.tensor
%762 = torch.aten.add_.Tensor %761, %236, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%763 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%764 = torch.aten.amax %762, %763, %true_0 : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%765 = torch.aten.sub.Tensor %762, %764, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%766 = torch.aten.exp %765 : !torch.tensor -> !torch.tensor
%767 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%768 = torch.aten.sum.dim_IntList %766, %767, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
%769 = torch.aten.div.Tensor %766, %768 : !torch.tensor, !torch.tensor -> !torch.tensor
%770 = torch.prim.ListConstruct %int1, %int8, %int15, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%771 = torch.aten.expand %769, %770, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%772 = torch.prim.ListConstruct %int8, %int15, %int15 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%773 = torch.aten.view %771, %772 : !torch.tensor, !torch.list<int> -> !torch.tensor
%774 = torch.prim.ListConstruct %int1, %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%775 = torch.aten.expand %749, %774, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%776 = torch.prim.ListConstruct %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%777 = torch.aten.view %775, %776 : !torch.tensor, !torch.list<int> -> !torch.tensor
%778 = torch.aten.bmm %773, %777 : !torch.tensor, !torch.tensor -> !torch.tensor
%779 = torch.prim.ListConstruct %int1, %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%780 = torch.aten._unsafe_view %778, %779 : !torch.tensor, !torch.list<int> -> !torch.tensor
%781 = torch.aten.transpose.int %780, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%782 = torch.aten.clone %781, %int0 : !torch.tensor, !torch.int -> !torch.tensor
%783 = torch.prim.ListConstruct %int1, %int-1, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%784 = torch.aten.view %782, %783 : !torch.tensor, !torch.list<int> -> !torch.tensor
%785 = torch.prim.GetAttr %arg0["_param_constant46"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%786 = torch.aten.t %785 : !torch.tensor -> !torch.tensor
%787 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%788 = torch.aten.view %784, %787 : !torch.tensor, !torch.list<int> -> !torch.tensor
%789 = torch.aten.mm %788, %786 : !torch.tensor, !torch.tensor -> !torch.tensor
%790 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%791 = torch.aten._unsafe_view %789, %790 : !torch.tensor, !torch.list<int> -> !torch.tensor
%792 = torch.aten.add.Tensor %711, %791, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%793 = torch.aten.pow.Tensor_Scalar %792, %int2 : !torch.tensor, !torch.int -> !torch.tensor
%794 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%795 = torch.aten.mean.dim %793, %794, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
%796 = torch.aten.add.Scalar %795, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
%797 = torch.aten.rsqrt %796 : !torch.tensor -> !torch.tensor
%798 = torch.aten.mul.Tensor %792, %797 : !torch.tensor, !torch.tensor -> !torch.tensor
%799 = torch.prim.GetAttr %arg0["_param_constant47"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%800 = torch.aten.mul.Tensor %799, %798 : !torch.tensor, !torch.tensor -> !torch.tensor
%801 = torch.prim.GetAttr %arg0["_param_constant48"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%802 = torch.aten.t %801 : !torch.tensor -> !torch.tensor
%803 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%804 = torch.aten.view %800, %803 : !torch.tensor, !torch.list<int> -> !torch.tensor
%805 = torch.aten.mm %804, %802 : !torch.tensor, !torch.tensor -> !torch.tensor
%806 = torch.prim.ListConstruct %int1, %int15, %int2048 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%807 = torch.aten._unsafe_view %805, %806 : !torch.tensor, !torch.list<int> -> !torch.tensor
%808 = torch.aten.relu %807 : !torch.tensor -> !torch.tensor
%809 = torch.prim.GetAttr %arg0["_param_constant49"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%810 = torch.aten.t %809 : !torch.tensor -> !torch.tensor
%811 = torch.prim.ListConstruct %int15, %int2048 : (!torch.int, !torch.int) -> !torch.list<int>
%812 = torch.aten.view %808, %811 : !torch.tensor, !torch.list<int> -> !torch.tensor
%813 = torch.aten.mm %812, %810 : !torch.tensor, !torch.tensor -> !torch.tensor
%814 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%815 = torch.aten._unsafe_view %813, %814 : !torch.tensor, !torch.list<int> -> !torch.tensor
%816 = torch.aten.add.Tensor %792, %815, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%817 = torch.aten.pow.Tensor_Scalar %816, %int2 : !torch.tensor, !torch.int -> !torch.tensor
%818 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%819 = torch.aten.mean.dim %817, %818, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
%820 = torch.aten.add.Scalar %819, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
%821 = torch.aten.rsqrt %820 : !torch.tensor -> !torch.tensor
%822 = torch.aten.mul.Tensor %816, %821 : !torch.tensor, !torch.tensor -> !torch.tensor
%823 = torch.prim.GetAttr %arg0["_param_constant50"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%824 = torch.aten.mul.Tensor %823, %822 : !torch.tensor, !torch.tensor -> !torch.tensor
%825 = torch.prim.ListConstruct %int-1, %int4 : (!torch.int, !torch.int) -> !torch.list<int>
%826 = torch.aten.view %144, %825 : !torch.tensor, !torch.list<int> -> !torch.tensor
%827 = torch.prim.GetAttr %arg0["_param_constant0"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%828 = torch.aten.embedding %827, %826, %int-1, %false, %false : !torch.tensor, !torch.tensor, !torch.int, !torch.bool, !torch.bool -> !torch.tensor
%829 = torch.prim.ListConstruct %int1, %int4 : (!torch.int, !torch.int) -> !torch.list<int>
%830 = torch.aten.ones %829, %none_1, %none_1, %cpu, %false : !torch.list<int>, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.tensor
%831 = torch.prim.ListConstruct %int1, %int15 : (!torch.int, !torch.int) -> !torch.list<int>
%832 = torch.aten.ones %831, %int4, %none_1, %cpu, %false : !torch.list<int>, !torch.int, !torch.none, !torch.Device, !torch.bool -> !torch.tensor
%833 = torch.aten.arange %int4, %none_1, %none_1, %cpu, %false : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.tensor
%834 = torch.aten.unsqueeze %833, %int0 : !torch.tensor, !torch.int -> !torch.tensor
%835 = torch.aten.unsqueeze %834, %int1 : !torch.tensor, !torch.int -> !torch.tensor
%836 = torch.aten.slice.Tensor %835, %int2, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%837 = torch.prim.ListConstruct %int1, %int4, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%838 = torch.aten.repeat %836, %837 : !torch.tensor, !torch.list<int> -> !torch.tensor
%839 = torch.aten.unsqueeze %833, %int0 : !torch.tensor, !torch.int -> !torch.tensor
%840 = torch.aten.slice.Tensor %839, %int1, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%841 = torch.aten.unsqueeze %840, %int2 : !torch.tensor, !torch.int -> !torch.tensor
%842 = torch.aten.le.Tensor %838, %841 : !torch.tensor, !torch.tensor -> !torch.tensor
%843 = torch.prims.convert_element_type %842, %int6 : !torch.tensor, !torch.int -> !torch.tensor
%844 = torch.aten.slice.Tensor %843, %int0, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%845 = torch.aten.unsqueeze %844, %int1 : !torch.tensor, !torch.int -> !torch.tensor
%846 = torch.aten.slice.Tensor %845, %int2, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%847 = torch.aten.slice.Tensor %846, %int3, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%848 = torch.aten.slice.Tensor %830, %int0, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%849 = torch.aten.unsqueeze %848, %int1 : !torch.tensor, !torch.int -> !torch.tensor
%850 = torch.aten.unsqueeze %849, %int2 : !torch.tensor, !torch.int -> !torch.tensor
%851 = torch.aten.slice.Tensor %850, %int3, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%852 = torch.aten.mul.Tensor %847, %851 : !torch.tensor, !torch.tensor -> !torch.tensor
%853 = torch.aten.rsub.Scalar %852, %float1.000000e00, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
%854 = torch.aten.mul.Scalar %853, %float-3.402820e38 : !torch.tensor, !torch.float -> !torch.tensor
%855 = torch.aten.slice.Tensor %832, %int0, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%856 = torch.aten.unsqueeze %855, %int1 : !torch.tensor, !torch.int -> !torch.tensor
%857 = torch.aten.unsqueeze %856, %int2 : !torch.tensor, !torch.int -> !torch.tensor
%858 = torch.aten.slice.Tensor %857, %int3, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%859 = torch.prims.convert_element_type %858, %int6 : !torch.tensor, !torch.int -> !torch.tensor
%860 = torch.aten.rsub.Scalar %859, %float1.000000e00, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
%861 = torch.aten.mul.Scalar %860, %float-3.402820e38 : !torch.tensor, !torch.float -> !torch.tensor
%862 = torch.aten.pow.Tensor_Scalar %828, %int2 : !torch.tensor, !torch.int -> !torch.tensor
%863 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%864 = torch.aten.mean.dim %862, %863, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
%865 = torch.aten.add.Scalar %864, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
%866 = torch.aten.rsqrt %865 : !torch.tensor -> !torch.tensor
%867 = torch.aten.mul.Tensor %828, %866 : !torch.tensor, !torch.tensor -> !torch.tensor
%868 = torch.prim.GetAttr %arg0["_param_constant51"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%869 = torch.aten.mul.Tensor %868, %867 : !torch.tensor, !torch.tensor -> !torch.tensor
%870 = torch.prim.GetAttr %arg0["_param_constant52"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%871 = torch.aten.t %870 : !torch.tensor -> !torch.tensor
%872 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%873 = torch.aten.view %869, %872 : !torch.tensor, !torch.list<int> -> !torch.tensor
%874 = torch.aten.mm %873, %871 : !torch.tensor, !torch.tensor -> !torch.tensor
%875 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%876 = torch.aten._unsafe_view %874, %875 : !torch.tensor, !torch.list<int> -> !torch.tensor
%877 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%878 = torch.aten.view %876, %877 : !torch.tensor, !torch.list<int> -> !torch.tensor
%879 = torch.aten.transpose.int %878, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%880 = torch.prim.GetAttr %arg0["_param_constant53"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%881 = torch.aten.t %880 : !torch.tensor -> !torch.tensor
%882 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%883 = torch.aten.view %869, %882 : !torch.tensor, !torch.list<int> -> !torch.tensor
%884 = torch.aten.mm %883, %881 : !torch.tensor, !torch.tensor -> !torch.tensor
%885 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%886 = torch.aten._unsafe_view %884, %885 : !torch.tensor, !torch.list<int> -> !torch.tensor
%887 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%888 = torch.aten.view %886, %887 : !torch.tensor, !torch.list<int> -> !torch.tensor
%889 = torch.aten.transpose.int %888, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%890 = torch.prim.GetAttr %arg0["_param_constant54"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%891 = torch.aten.t %890 : !torch.tensor -> !torch.tensor
%892 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%893 = torch.aten.view %869, %892 : !torch.tensor, !torch.list<int> -> !torch.tensor
%894 = torch.aten.mm %893, %891 : !torch.tensor, !torch.tensor -> !torch.tensor
%895 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%896 = torch.aten._unsafe_view %894, %895 : !torch.tensor, !torch.list<int> -> !torch.tensor
%897 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%898 = torch.aten.view %896, %897 : !torch.tensor, !torch.list<int> -> !torch.tensor
%899 = torch.aten.transpose.int %898, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%900 = torch.aten.transpose.int %889, %int3, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%901 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%902 = torch.aten.expand %879, %901, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%903 = torch.prim.ListConstruct %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%904 = torch.aten.view %902, %903 : !torch.tensor, !torch.list<int> -> !torch.tensor
%905 = torch.prim.ListConstruct %int1, %int8, %int64, %int4 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%906 = torch.aten.expand %900, %905, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%907 = torch.prim.ListConstruct %int8, %int64, %int4 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%908 = torch.aten.view %906, %907 : !torch.tensor, !torch.list<int> -> !torch.tensor
%909 = torch.aten.bmm %904, %908 : !torch.tensor, !torch.tensor -> !torch.tensor
%910 = torch.prim.ListConstruct %int1, %int8, %int4, %int4 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%911 = torch.aten._unsafe_view %909, %910 : !torch.tensor, !torch.list<int> -> !torch.tensor
%912 = torch.aten.arange %int4, %int4, %none_1, %cpu, %false : !torch.int, !torch.int, !torch.none, !torch.Device, !torch.bool -> !torch.tensor
%913 = torch.aten.slice.Tensor %912, %int0, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%914 = torch.aten.unsqueeze %913, %int1 : !torch.tensor, !torch.int -> !torch.tensor
%915 = torch.aten.arange %int4, %int4, %none_1, %cpu, %false : !torch.int, !torch.int, !torch.none, !torch.Device, !torch.bool -> !torch.tensor
%916 = torch.aten.unsqueeze %915, %int0 : !torch.tensor, !torch.int -> !torch.tensor
%917 = torch.aten.slice.Tensor %916, %int1, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor
%918 = torch.aten.sub.Tensor %917, %914, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%919 = torch.aten.zeros_like %918, %int4, %int0, %cpu, %false, %none_1 : !torch.tensor, !torch.int, !torch.int, !torch.Device, !torch.bool, !torch.none -> !torch.tensor
%920 = torch.aten.minimum %918, %919 : !torch.tensor, !torch.tensor -> !torch.tensor
%921 = torch.aten.neg %920 : !torch.tensor -> !torch.tensor
%922 = torch.aten.lt.Scalar %921, %int16 : !torch.tensor, !torch.int -> !torch.tensor
%923 = torch.prims.convert_element_type %921, %int6 : !torch.tensor, !torch.int -> !torch.tensor
%924 = torch.aten.div.Scalar %923, %int16 : !torch.tensor, !torch.int -> !torch.tensor
%925 = torch.aten.log %924 : !torch.tensor -> !torch.tensor
%926 = torch.aten.div.Scalar %925, %float2.079440e00 : !torch.tensor, !torch.float -> !torch.tensor
%927 = torch.aten.mul.Scalar %926, %int16 : !torch.tensor, !torch.int -> !torch.tensor
%928 = torch.prims.convert_element_type %927, %int4 : !torch.tensor, !torch.int -> !torch.tensor
%929 = torch.aten.add.Scalar %928, %int16, %int1 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%930 = torch.aten.full_like %929, %int31, %int4, %int0, %cpu, %false, %none_1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.Device, !torch.bool, !torch.none -> !torch.tensor
%931 = torch.aten.minimum %929, %930 : !torch.tensor, !torch.tensor -> !torch.tensor
%932 = torch.aten.where.self %922, %921, %931 : !torch.tensor, !torch.tensor, !torch.tensor -> !torch.tensor
%933 = torch.aten.add.Scalar %932, %int0, %int1 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%934 = torch.prim.GetAttr %arg0["_param_constant55"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%935 = torch.aten.embedding %934, %933, %int-1, %false, %false : !torch.tensor, !torch.tensor, !torch.int, !torch.bool, !torch.bool -> !torch.tensor
%936 = torch.prim.ListConstruct %int2, %int0, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%937 = torch.aten.permute %935, %936 : !torch.tensor, !torch.list<int> -> !torch.tensor
%938 = torch.aten.unsqueeze %937, %int0 : !torch.tensor, !torch.int -> !torch.tensor
%939 = torch.aten.add.Tensor %938, %854, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%940 = torch.aten.add_.Tensor %911, %939, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%941 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%942 = torch.aten.amax %940, %941, %true_0 : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%943 = torch.aten.sub.Tensor %940, %942, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%944 = torch.aten.exp %943 : !torch.tensor -> !torch.tensor
%945 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%946 = torch.aten.sum.dim_IntList %944, %945, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
%947 = torch.aten.div.Tensor %944, %946 : !torch.tensor, !torch.tensor -> !torch.tensor
%948 = torch.prim.ListConstruct %int1, %int8, %int4, %int4 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%949 = torch.aten.expand %947, %948, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%950 = torch.prim.ListConstruct %int8, %int4, %int4 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%951 = torch.aten.view %949, %950 : !torch.tensor, !torch.list<int> -> !torch.tensor
%952 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%953 = torch.aten.expand %899, %952, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%954 = torch.prim.ListConstruct %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%955 = torch.aten.view %953, %954 : !torch.tensor, !torch.list<int> -> !torch.tensor
%956 = torch.aten.bmm %951, %955 : !torch.tensor, !torch.tensor -> !torch.tensor
%957 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%958 = torch.aten._unsafe_view %956, %957 : !torch.tensor, !torch.list<int> -> !torch.tensor
%959 = torch.aten.transpose.int %958, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%960 = torch.aten.clone %959, %int0 : !torch.tensor, !torch.int -> !torch.tensor
%961 = torch.prim.ListConstruct %int1, %int-1, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%962 = torch.aten.view %960, %961 : !torch.tensor, !torch.list<int> -> !torch.tensor
%963 = torch.prim.GetAttr %arg0["_param_constant56"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%964 = torch.aten.t %963 : !torch.tensor -> !torch.tensor
%965 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%966 = torch.aten.view %962, %965 : !torch.tensor, !torch.list<int> -> !torch.tensor
%967 = torch.aten.mm %966, %964 : !torch.tensor, !torch.tensor -> !torch.tensor
%968 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%969 = torch.aten._unsafe_view %967, %968 : !torch.tensor, !torch.list<int> -> !torch.tensor
%970 = torch.aten.add.Tensor %828, %969, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%971 = torch.aten.pow.Tensor_Scalar %970, %int2 : !torch.tensor, !torch.int -> !torch.tensor
%972 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%973 = torch.aten.mean.dim %971, %972, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
%974 = torch.aten.add.Scalar %973, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
%975 = torch.aten.rsqrt %974 : !torch.tensor -> !torch.tensor
%976 = torch.aten.mul.Tensor %970, %975 : !torch.tensor, !torch.tensor -> !torch.tensor
%977 = torch.prim.GetAttr %arg0["_param_constant57"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%978 = torch.aten.mul.Tensor %977, %976 : !torch.tensor, !torch.tensor -> !torch.tensor
%979 = torch.prim.GetAttr %arg0["_param_constant58"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%980 = torch.aten.t %979 : !torch.tensor -> !torch.tensor
%981 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%982 = torch.aten.view %978, %981 : !torch.tensor, !torch.list<int> -> !torch.tensor
%983 = torch.aten.mm %982, %980 : !torch.tensor, !torch.tensor -> !torch.tensor
%984 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%985 = torch.aten._unsafe_view %983, %984 : !torch.tensor, !torch.list<int> -> !torch.tensor
%986 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%987 = torch.aten.view %985, %986 : !torch.tensor, !torch.list<int> -> !torch.tensor
%988 = torch.aten.transpose.int %987, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%989 = torch.prim.GetAttr %arg0["_param_constant59"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%990 = torch.aten.t %989 : !torch.tensor -> !torch.tensor
%991 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%992 = torch.aten.view %824, %991 : !torch.tensor, !torch.list<int> -> !torch.tensor
%993 = torch.aten.mm %992, %990 : !torch.tensor, !torch.tensor -> !torch.tensor
%994 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%995 = torch.aten._unsafe_view %993, %994 : !torch.tensor, !torch.list<int> -> !torch.tensor
%996 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%997 = torch.aten.view %995, %996 : !torch.tensor, !torch.list<int> -> !torch.tensor
%998 = torch.aten.transpose.int %997, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%999 = torch.prim.GetAttr %arg0["_param_constant60"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1000 = torch.aten.t %999 : !torch.tensor -> !torch.tensor
%1001 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%1002 = torch.aten.view %824, %1001 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1003 = torch.aten.mm %1002, %1000 : !torch.tensor, !torch.tensor -> !torch.tensor
%1004 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1005 = torch.aten._unsafe_view %1003, %1004 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1006 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1007 = torch.aten.view %1005, %1006 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1008 = torch.aten.transpose.int %1007, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1009 = torch.aten.transpose.int %998, %int3, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1010 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1011 = torch.aten.expand %988, %1010, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1012 = torch.prim.ListConstruct %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1013 = torch.aten.view %1011, %1012 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1014 = torch.prim.ListConstruct %int1, %int8, %int64, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1015 = torch.aten.expand %1009, %1014, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1016 = torch.prim.ListConstruct %int8, %int64, %int15 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1017 = torch.aten.view %1015, %1016 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1018 = torch.aten.bmm %1013, %1017 : !torch.tensor, !torch.tensor -> !torch.tensor
%1019 = torch.prim.ListConstruct %int1, %int8, %int4, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1020 = torch.aten._unsafe_view %1018, %1019 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1021 = torch.prim.ListConstruct %int1, %int8, %int4, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1022 = torch.aten.zeros %1021, %int6, %none_1, %cpu, %false : !torch.list<int>, !torch.int, !torch.none, !torch.Device, !torch.bool -> !torch.tensor
%1023 = torch.aten.add.Tensor %1022, %861, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1024 = torch.aten.add_.Tensor %1020, %1023, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1025 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%1026 = torch.aten.amax %1024, %1025, %true_0 : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1027 = torch.aten.sub.Tensor %1024, %1026, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1028 = torch.aten.exp %1027 : !torch.tensor -> !torch.tensor
%1029 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%1030 = torch.aten.sum.dim_IntList %1028, %1029, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
%1031 = torch.aten.div.Tensor %1028, %1030 : !torch.tensor, !torch.tensor -> !torch.tensor
%1032 = torch.prim.ListConstruct %int1, %int8, %int4, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1033 = torch.aten.expand %1031, %1032, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1034 = torch.prim.ListConstruct %int8, %int4, %int15 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1035 = torch.aten.view %1033, %1034 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1036 = torch.prim.ListConstruct %int1, %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1037 = torch.aten.expand %1008, %1036, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1038 = torch.prim.ListConstruct %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1039 = torch.aten.view %1037, %1038 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1040 = torch.aten.bmm %1035, %1039 : !torch.tensor, !torch.tensor -> !torch.tensor
%1041 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1042 = torch.aten._unsafe_view %1040, %1041 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1043 = torch.aten.transpose.int %1042, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1044 = torch.aten.clone %1043, %int0 : !torch.tensor, !torch.int -> !torch.tensor
%1045 = torch.prim.ListConstruct %int1, %int-1, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1046 = torch.aten.view %1044, %1045 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1047 = torch.prim.GetAttr %arg0["_param_constant61"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1048 = torch.aten.t %1047 : !torch.tensor -> !torch.tensor
%1049 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%1050 = torch.aten.view %1046, %1049 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1051 = torch.aten.mm %1050, %1048 : !torch.tensor, !torch.tensor -> !torch.tensor
%1052 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1053 = torch.aten._unsafe_view %1051, %1052 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1054 = torch.aten.add.Tensor %970, %1053, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1055 = torch.aten.pow.Tensor_Scalar %1054, %int2 : !torch.tensor, !torch.int -> !torch.tensor
%1056 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%1057 = torch.aten.mean.dim %1055, %1056, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
%1058 = torch.aten.add.Scalar %1057, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
%1059 = torch.aten.rsqrt %1058 : !torch.tensor -> !torch.tensor
%1060 = torch.aten.mul.Tensor %1054, %1059 : !torch.tensor, !torch.tensor -> !torch.tensor
%1061 = torch.prim.GetAttr %arg0["_param_constant62"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1062 = torch.aten.mul.Tensor %1061, %1060 : !torch.tensor, !torch.tensor -> !torch.tensor
%1063 = torch.prim.GetAttr %arg0["_param_constant63"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1064 = torch.aten.t %1063 : !torch.tensor -> !torch.tensor
%1065 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%1066 = torch.aten.view %1062, %1065 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1067 = torch.aten.mm %1066, %1064 : !torch.tensor, !torch.tensor -> !torch.tensor
%1068 = torch.prim.ListConstruct %int1, %int4, %int2048 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1069 = torch.aten._unsafe_view %1067, %1068 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1070 = torch.aten.relu %1069 : !torch.tensor -> !torch.tensor
%1071 = torch.prim.GetAttr %arg0["_param_constant64"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1072 = torch.aten.t %1071 : !torch.tensor -> !torch.tensor
%1073 = torch.prim.ListConstruct %int4, %int2048 : (!torch.int, !torch.int) -> !torch.list<int>
%1074 = torch.aten.view %1070, %1073 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1075 = torch.aten.mm %1074, %1072 : !torch.tensor, !torch.tensor -> !torch.tensor
%1076 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1077 = torch.aten._unsafe_view %1075, %1076 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1078 = torch.aten.add.Tensor %1054, %1077, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1079 = torch.aten.pow.Tensor_Scalar %1078, %int2 : !torch.tensor, !torch.int -> !torch.tensor
%1080 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%1081 = torch.aten.mean.dim %1079, %1080, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
%1082 = torch.aten.add.Scalar %1081, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
%1083 = torch.aten.rsqrt %1082 : !torch.tensor -> !torch.tensor
%1084 = torch.aten.mul.Tensor %1078, %1083 : !torch.tensor, !torch.tensor -> !torch.tensor
%1085 = torch.prim.GetAttr %arg0["_param_constant65"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1086 = torch.aten.mul.Tensor %1085, %1084 : !torch.tensor, !torch.tensor -> !torch.tensor
%1087 = torch.prim.GetAttr %arg0["_param_constant66"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1088 = torch.aten.t %1087 : !torch.tensor -> !torch.tensor
%1089 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%1090 = torch.aten.view %1086, %1089 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1091 = torch.aten.mm %1090, %1088 : !torch.tensor, !torch.tensor -> !torch.tensor
%1092 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1093 = torch.aten._unsafe_view %1091, %1092 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1094 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1095 = torch.aten.view %1093, %1094 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1096 = torch.aten.transpose.int %1095, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1097 = torch.prim.GetAttr %arg0["_param_constant67"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1098 = torch.aten.t %1097 : !torch.tensor -> !torch.tensor
%1099 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%1100 = torch.aten.view %1086, %1099 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1101 = torch.aten.mm %1100, %1098 : !torch.tensor, !torch.tensor -> !torch.tensor
%1102 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1103 = torch.aten._unsafe_view %1101, %1102 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1104 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1105 = torch.aten.view %1103, %1104 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1106 = torch.aten.transpose.int %1105, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1107 = torch.prim.GetAttr %arg0["_param_constant68"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1108 = torch.aten.t %1107 : !torch.tensor -> !torch.tensor
%1109 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%1110 = torch.aten.view %1086, %1109 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1111 = torch.aten.mm %1110, %1108 : !torch.tensor, !torch.tensor -> !torch.tensor
%1112 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1113 = torch.aten._unsafe_view %1111, %1112 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1114 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1115 = torch.aten.view %1113, %1114 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1116 = torch.aten.transpose.int %1115, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1117 = torch.aten.transpose.int %1106, %int3, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1118 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1119 = torch.aten.expand %1096, %1118, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1120 = torch.prim.ListConstruct %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1121 = torch.aten.view %1119, %1120 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1122 = torch.prim.ListConstruct %int1, %int8, %int64, %int4 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1123 = torch.aten.expand %1117, %1122, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1124 = torch.prim.ListConstruct %int8, %int64, %int4 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1125 = torch.aten.view %1123, %1124 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1126 = torch.aten.bmm %1121, %1125 : !torch.tensor, !torch.tensor -> !torch.tensor
%1127 = torch.prim.ListConstruct %int1, %int8, %int4, %int4 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1128 = torch.aten._unsafe_view %1126, %1127 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1129 = torch.aten.add_.Tensor %1128, %939, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1130 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%1131 = torch.aten.amax %1129, %1130, %true_0 : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1132 = torch.aten.sub.Tensor %1129, %1131, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1133 = torch.aten.exp %1132 : !torch.tensor -> !torch.tensor
%1134 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%1135 = torch.aten.sum.dim_IntList %1133, %1134, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
%1136 = torch.aten.div.Tensor %1133, %1135 : !torch.tensor, !torch.tensor -> !torch.tensor
%1137 = torch.prim.ListConstruct %int1, %int8, %int4, %int4 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1138 = torch.aten.expand %1136, %1137, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1139 = torch.prim.ListConstruct %int8, %int4, %int4 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1140 = torch.aten.view %1138, %1139 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1141 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1142 = torch.aten.expand %1116, %1141, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1143 = torch.prim.ListConstruct %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1144 = torch.aten.view %1142, %1143 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1145 = torch.aten.bmm %1140, %1144 : !torch.tensor, !torch.tensor -> !torch.tensor
%1146 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1147 = torch.aten._unsafe_view %1145, %1146 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1148 = torch.aten.transpose.int %1147, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1149 = torch.aten.clone %1148, %int0 : !torch.tensor, !torch.int -> !torch.tensor
%1150 = torch.prim.ListConstruct %int1, %int-1, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1151 = torch.aten.view %1149, %1150 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1152 = torch.prim.GetAttr %arg0["_param_constant69"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1153 = torch.aten.t %1152 : !torch.tensor -> !torch.tensor
%1154 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%1155 = torch.aten.view %1151, %1154 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1156 = torch.aten.mm %1155, %1153 : !torch.tensor, !torch.tensor -> !torch.tensor
%1157 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1158 = torch.aten._unsafe_view %1156, %1157 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1159 = torch.aten.add.Tensor %1078, %1158, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1160 = torch.aten.pow.Tensor_Scalar %1159, %int2 : !torch.tensor, !torch.int -> !torch.tensor
%1161 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%1162 = torch.aten.mean.dim %1160, %1161, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
%1163 = torch.aten.add.Scalar %1162, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
%1164 = torch.aten.rsqrt %1163 : !torch.tensor -> !torch.tensor
%1165 = torch.aten.mul.Tensor %1159, %1164 : !torch.tensor, !torch.tensor -> !torch.tensor
%1166 = torch.prim.GetAttr %arg0["_param_constant70"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1167 = torch.aten.mul.Tensor %1166, %1165 : !torch.tensor, !torch.tensor -> !torch.tensor
%1168 = torch.prim.GetAttr %arg0["_param_constant71"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1169 = torch.aten.t %1168 : !torch.tensor -> !torch.tensor
%1170 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%1171 = torch.aten.view %1167, %1170 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1172 = torch.aten.mm %1171, %1169 : !torch.tensor, !torch.tensor -> !torch.tensor
%1173 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1174 = torch.aten._unsafe_view %1172, %1173 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1175 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1176 = torch.aten.view %1174, %1175 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1177 = torch.aten.transpose.int %1176, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1178 = torch.prim.GetAttr %arg0["_param_constant72"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1179 = torch.aten.t %1178 : !torch.tensor -> !torch.tensor
%1180 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%1181 = torch.aten.view %824, %1180 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1182 = torch.aten.mm %1181, %1179 : !torch.tensor, !torch.tensor -> !torch.tensor
%1183 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1184 = torch.aten._unsafe_view %1182, %1183 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1185 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1186 = torch.aten.view %1184, %1185 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1187 = torch.aten.transpose.int %1186, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1188 = torch.prim.GetAttr %arg0["_param_constant73"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1189 = torch.aten.t %1188 : !torch.tensor -> !torch.tensor
%1190 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%1191 = torch.aten.view %824, %1190 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1192 = torch.aten.mm %1191, %1189 : !torch.tensor, !torch.tensor -> !torch.tensor
%1193 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1194 = torch.aten._unsafe_view %1192, %1193 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1195 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1196 = torch.aten.view %1194, %1195 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1197 = torch.aten.transpose.int %1196, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1198 = torch.aten.transpose.int %1187, %int3, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1199 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1200 = torch.aten.expand %1177, %1199, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1201 = torch.prim.ListConstruct %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1202 = torch.aten.view %1200, %1201 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1203 = torch.prim.ListConstruct %int1, %int8, %int64, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1204 = torch.aten.expand %1198, %1203, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1205 = torch.prim.ListConstruct %int8, %int64, %int15 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1206 = torch.aten.view %1204, %1205 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1207 = torch.aten.bmm %1202, %1206 : !torch.tensor, !torch.tensor -> !torch.tensor
%1208 = torch.prim.ListConstruct %int1, %int8, %int4, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1209 = torch.aten._unsafe_view %1207, %1208 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1210 = torch.aten.add_.Tensor %1209, %1023, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1211 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%1212 = torch.aten.amax %1210, %1211, %true_0 : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1213 = torch.aten.sub.Tensor %1210, %1212, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1214 = torch.aten.exp %1213 : !torch.tensor -> !torch.tensor
%1215 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%1216 = torch.aten.sum.dim_IntList %1214, %1215, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
%1217 = torch.aten.div.Tensor %1214, %1216 : !torch.tensor, !torch.tensor -> !torch.tensor
%1218 = torch.prim.ListConstruct %int1, %int8, %int4, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1219 = torch.aten.expand %1217, %1218, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1220 = torch.prim.ListConstruct %int8, %int4, %int15 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1221 = torch.aten.view %1219, %1220 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1222 = torch.prim.ListConstruct %int1, %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1223 = torch.aten.expand %1197, %1222, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1224 = torch.prim.ListConstruct %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1225 = torch.aten.view %1223, %1224 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1226 = torch.aten.bmm %1221, %1225 : !torch.tensor, !torch.tensor -> !torch.tensor
%1227 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1228 = torch.aten._unsafe_view %1226, %1227 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1229 = torch.aten.transpose.int %1228, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1230 = torch.aten.clone %1229, %int0 : !torch.tensor, !torch.int -> !torch.tensor
%1231 = torch.prim.ListConstruct %int1, %int-1, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1232 = torch.aten.view %1230, %1231 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1233 = torch.prim.GetAttr %arg0["_param_constant74"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1234 = torch.aten.t %1233 : !torch.tensor -> !torch.tensor
%1235 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%1236 = torch.aten.view %1232, %1235 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1237 = torch.aten.mm %1236, %1234 : !torch.tensor, !torch.tensor -> !torch.tensor
%1238 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1239 = torch.aten._unsafe_view %1237, %1238 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1240 = torch.aten.add.Tensor %1159, %1239, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1241 = torch.aten.pow.Tensor_Scalar %1240, %int2 : !torch.tensor, !torch.int -> !torch.tensor
%1242 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%1243 = torch.aten.mean.dim %1241, %1242, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
%1244 = torch.aten.add.Scalar %1243, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
%1245 = torch.aten.rsqrt %1244 : !torch.tensor -> !torch.tensor
%1246 = torch.aten.mul.Tensor %1240, %1245 : !torch.tensor, !torch.tensor -> !torch.tensor
%1247 = torch.prim.GetAttr %arg0["_param_constant75"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1248 = torch.aten.mul.Tensor %1247, %1246 : !torch.tensor, !torch.tensor -> !torch.tensor
%1249 = torch.prim.GetAttr %arg0["_param_constant76"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1250 = torch.aten.t %1249 : !torch.tensor -> !torch.tensor
%1251 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%1252 = torch.aten.view %1248, %1251 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1253 = torch.aten.mm %1252, %1250 : !torch.tensor, !torch.tensor -> !torch.tensor
%1254 = torch.prim.ListConstruct %int1, %int4, %int2048 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1255 = torch.aten._unsafe_view %1253, %1254 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1256 = torch.aten.relu %1255 : !torch.tensor -> !torch.tensor
%1257 = torch.prim.GetAttr %arg0["_param_constant77"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1258 = torch.aten.t %1257 : !torch.tensor -> !torch.tensor
%1259 = torch.prim.ListConstruct %int4, %int2048 : (!torch.int, !torch.int) -> !torch.list<int>
%1260 = torch.aten.view %1256, %1259 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1261 = torch.aten.mm %1260, %1258 : !torch.tensor, !torch.tensor -> !torch.tensor
%1262 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1263 = torch.aten._unsafe_view %1261, %1262 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1264 = torch.aten.add.Tensor %1240, %1263, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1265 = torch.aten.pow.Tensor_Scalar %1264, %int2 : !torch.tensor, !torch.int -> !torch.tensor
%1266 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%1267 = torch.aten.mean.dim %1265, %1266, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
%1268 = torch.aten.add.Scalar %1267, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
%1269 = torch.aten.rsqrt %1268 : !torch.tensor -> !torch.tensor
%1270 = torch.aten.mul.Tensor %1264, %1269 : !torch.tensor, !torch.tensor -> !torch.tensor
%1271 = torch.prim.GetAttr %arg0["_param_constant78"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1272 = torch.aten.mul.Tensor %1271, %1270 : !torch.tensor, !torch.tensor -> !torch.tensor
%1273 = torch.prim.GetAttr %arg0["_param_constant79"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1274 = torch.aten.t %1273 : !torch.tensor -> !torch.tensor
%1275 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%1276 = torch.aten.view %1272, %1275 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1277 = torch.aten.mm %1276, %1274 : !torch.tensor, !torch.tensor -> !torch.tensor
%1278 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1279 = torch.aten._unsafe_view %1277, %1278 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1280 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1281 = torch.aten.view %1279, %1280 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1282 = torch.aten.transpose.int %1281, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1283 = torch.prim.GetAttr %arg0["_param_constant80"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1284 = torch.aten.t %1283 : !torch.tensor -> !torch.tensor
%1285 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%1286 = torch.aten.view %1272, %1285 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1287 = torch.aten.mm %1286, %1284 : !torch.tensor, !torch.tensor -> !torch.tensor
%1288 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1289 = torch.aten._unsafe_view %1287, %1288 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1290 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1291 = torch.aten.view %1289, %1290 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1292 = torch.aten.transpose.int %1291, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1293 = torch.prim.GetAttr %arg0["_param_constant81"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1294 = torch.aten.t %1293 : !torch.tensor -> !torch.tensor
%1295 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%1296 = torch.aten.view %1272, %1295 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1297 = torch.aten.mm %1296, %1294 : !torch.tensor, !torch.tensor -> !torch.tensor
%1298 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1299 = torch.aten._unsafe_view %1297, %1298 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1300 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1301 = torch.aten.view %1299, %1300 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1302 = torch.aten.transpose.int %1301, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1303 = torch.aten.transpose.int %1292, %int3, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1304 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1305 = torch.aten.expand %1282, %1304, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1306 = torch.prim.ListConstruct %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1307 = torch.aten.view %1305, %1306 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1308 = torch.prim.ListConstruct %int1, %int8, %int64, %int4 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1309 = torch.aten.expand %1303, %1308, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1310 = torch.prim.ListConstruct %int8, %int64, %int4 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1311 = torch.aten.view %1309, %1310 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1312 = torch.aten.bmm %1307, %1311 : !torch.tensor, !torch.tensor -> !torch.tensor
%1313 = torch.prim.ListConstruct %int1, %int8, %int4, %int4 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1314 = torch.aten._unsafe_view %1312, %1313 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1315 = torch.aten.add_.Tensor %1314, %939, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1316 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%1317 = torch.aten.amax %1315, %1316, %true_0 : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1318 = torch.aten.sub.Tensor %1315, %1317, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1319 = torch.aten.exp %1318 : !torch.tensor -> !torch.tensor
%1320 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%1321 = torch.aten.sum.dim_IntList %1319, %1320, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
%1322 = torch.aten.div.Tensor %1319, %1321 : !torch.tensor, !torch.tensor -> !torch.tensor
%1323 = torch.prim.ListConstruct %int1, %int8, %int4, %int4 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1324 = torch.aten.expand %1322, %1323, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1325 = torch.prim.ListConstruct %int8, %int4, %int4 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1326 = torch.aten.view %1324, %1325 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1327 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1328 = torch.aten.expand %1302, %1327, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1329 = torch.prim.ListConstruct %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1330 = torch.aten.view %1328, %1329 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1331 = torch.aten.bmm %1326, %1330 : !torch.tensor, !torch.tensor -> !torch.tensor
%1332 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1333 = torch.aten._unsafe_view %1331, %1332 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1334 = torch.aten.transpose.int %1333, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1335 = torch.aten.clone %1334, %int0 : !torch.tensor, !torch.int -> !torch.tensor
%1336 = torch.prim.ListConstruct %int1, %int-1, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1337 = torch.aten.view %1335, %1336 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1338 = torch.prim.GetAttr %arg0["_param_constant82"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1339 = torch.aten.t %1338 : !torch.tensor -> !torch.tensor
%1340 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%1341 = torch.aten.view %1337, %1340 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1342 = torch.aten.mm %1341, %1339 : !torch.tensor, !torch.tensor -> !torch.tensor
%1343 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1344 = torch.aten._unsafe_view %1342, %1343 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1345 = torch.aten.add.Tensor %1264, %1344, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1346 = torch.aten.pow.Tensor_Scalar %1345, %int2 : !torch.tensor, !torch.int -> !torch.tensor
%1347 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%1348 = torch.aten.mean.dim %1346, %1347, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
%1349 = torch.aten.add.Scalar %1348, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
%1350 = torch.aten.rsqrt %1349 : !torch.tensor -> !torch.tensor
%1351 = torch.aten.mul.Tensor %1345, %1350 : !torch.tensor, !torch.tensor -> !torch.tensor
%1352 = torch.prim.GetAttr %arg0["_param_constant83"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1353 = torch.aten.mul.Tensor %1352, %1351 : !torch.tensor, !torch.tensor -> !torch.tensor
%1354 = torch.prim.GetAttr %arg0["_param_constant84"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1355 = torch.aten.t %1354 : !torch.tensor -> !torch.tensor
%1356 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%1357 = torch.aten.view %1353, %1356 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1358 = torch.aten.mm %1357, %1355 : !torch.tensor, !torch.tensor -> !torch.tensor
%1359 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1360 = torch.aten._unsafe_view %1358, %1359 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1361 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1362 = torch.aten.view %1360, %1361 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1363 = torch.aten.transpose.int %1362, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1364 = torch.prim.GetAttr %arg0["_param_constant85"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1365 = torch.aten.t %1364 : !torch.tensor -> !torch.tensor
%1366 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%1367 = torch.aten.view %824, %1366 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1368 = torch.aten.mm %1367, %1365 : !torch.tensor, !torch.tensor -> !torch.tensor
%1369 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1370 = torch.aten._unsafe_view %1368, %1369 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1371 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1372 = torch.aten.view %1370, %1371 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1373 = torch.aten.transpose.int %1372, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1374 = torch.prim.GetAttr %arg0["_param_constant86"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1375 = torch.aten.t %1374 : !torch.tensor -> !torch.tensor
%1376 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%1377 = torch.aten.view %824, %1376 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1378 = torch.aten.mm %1377, %1375 : !torch.tensor, !torch.tensor -> !torch.tensor
%1379 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1380 = torch.aten._unsafe_view %1378, %1379 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1381 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1382 = torch.aten.view %1380, %1381 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1383 = torch.aten.transpose.int %1382, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1384 = torch.aten.transpose.int %1373, %int3, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1385 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1386 = torch.aten.expand %1363, %1385, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1387 = torch.prim.ListConstruct %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1388 = torch.aten.view %1386, %1387 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1389 = torch.prim.ListConstruct %int1, %int8, %int64, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1390 = torch.aten.expand %1384, %1389, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1391 = torch.prim.ListConstruct %int8, %int64, %int15 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1392 = torch.aten.view %1390, %1391 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1393 = torch.aten.bmm %1388, %1392 : !torch.tensor, !torch.tensor -> !torch.tensor
%1394 = torch.prim.ListConstruct %int1, %int8, %int4, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1395 = torch.aten._unsafe_view %1393, %1394 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1396 = torch.aten.add_.Tensor %1395, %1023, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1397 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%1398 = torch.aten.amax %1396, %1397, %true_0 : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1399 = torch.aten.sub.Tensor %1396, %1398, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1400 = torch.aten.exp %1399 : !torch.tensor -> !torch.tensor
%1401 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%1402 = torch.aten.sum.dim_IntList %1400, %1401, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
%1403 = torch.aten.div.Tensor %1400, %1402 : !torch.tensor, !torch.tensor -> !torch.tensor
%1404 = torch.prim.ListConstruct %int1, %int8, %int4, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1405 = torch.aten.expand %1403, %1404, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1406 = torch.prim.ListConstruct %int8, %int4, %int15 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1407 = torch.aten.view %1405, %1406 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1408 = torch.prim.ListConstruct %int1, %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1409 = torch.aten.expand %1383, %1408, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1410 = torch.prim.ListConstruct %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1411 = torch.aten.view %1409, %1410 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1412 = torch.aten.bmm %1407, %1411 : !torch.tensor, !torch.tensor -> !torch.tensor
%1413 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1414 = torch.aten._unsafe_view %1412, %1413 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1415 = torch.aten.transpose.int %1414, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1416 = torch.aten.clone %1415, %int0 : !torch.tensor, !torch.int -> !torch.tensor
%1417 = torch.prim.ListConstruct %int1, %int-1, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1418 = torch.aten.view %1416, %1417 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1419 = torch.prim.GetAttr %arg0["_param_constant87"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1420 = torch.aten.t %1419 : !torch.tensor -> !torch.tensor
%1421 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%1422 = torch.aten.view %1418, %1421 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1423 = torch.aten.mm %1422, %1420 : !torch.tensor, !torch.tensor -> !torch.tensor
%1424 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1425 = torch.aten._unsafe_view %1423, %1424 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1426 = torch.aten.add.Tensor %1345, %1425, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1427 = torch.aten.pow.Tensor_Scalar %1426, %int2 : !torch.tensor, !torch.int -> !torch.tensor
%1428 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%1429 = torch.aten.mean.dim %1427, %1428, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
%1430 = torch.aten.add.Scalar %1429, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
%1431 = torch.aten.rsqrt %1430 : !torch.tensor -> !torch.tensor
%1432 = torch.aten.mul.Tensor %1426, %1431 : !torch.tensor, !torch.tensor -> !torch.tensor
%1433 = torch.prim.GetAttr %arg0["_param_constant88"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1434 = torch.aten.mul.Tensor %1433, %1432 : !torch.tensor, !torch.tensor -> !torch.tensor
%1435 = torch.prim.GetAttr %arg0["_param_constant89"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1436 = torch.aten.t %1435 : !torch.tensor -> !torch.tensor
%1437 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%1438 = torch.aten.view %1434, %1437 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1439 = torch.aten.mm %1438, %1436 : !torch.tensor, !torch.tensor -> !torch.tensor
%1440 = torch.prim.ListConstruct %int1, %int4, %int2048 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1441 = torch.aten._unsafe_view %1439, %1440 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1442 = torch.aten.relu %1441 : !torch.tensor -> !torch.tensor
%1443 = torch.prim.GetAttr %arg0["_param_constant90"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1444 = torch.aten.t %1443 : !torch.tensor -> !torch.tensor
%1445 = torch.prim.ListConstruct %int4, %int2048 : (!torch.int, !torch.int) -> !torch.list<int>
%1446 = torch.aten.view %1442, %1445 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1447 = torch.aten.mm %1446, %1444 : !torch.tensor, !torch.tensor -> !torch.tensor
%1448 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1449 = torch.aten._unsafe_view %1447, %1448 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1450 = torch.aten.add.Tensor %1426, %1449, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1451 = torch.aten.pow.Tensor_Scalar %1450, %int2 : !torch.tensor, !torch.int -> !torch.tensor
%1452 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%1453 = torch.aten.mean.dim %1451, %1452, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
%1454 = torch.aten.add.Scalar %1453, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
%1455 = torch.aten.rsqrt %1454 : !torch.tensor -> !torch.tensor
%1456 = torch.aten.mul.Tensor %1450, %1455 : !torch.tensor, !torch.tensor -> !torch.tensor
%1457 = torch.prim.GetAttr %arg0["_param_constant91"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1458 = torch.aten.mul.Tensor %1457, %1456 : !torch.tensor, !torch.tensor -> !torch.tensor
%1459 = torch.prim.GetAttr %arg0["_param_constant92"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1460 = torch.aten.t %1459 : !torch.tensor -> !torch.tensor
%1461 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%1462 = torch.aten.view %1458, %1461 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1463 = torch.aten.mm %1462, %1460 : !torch.tensor, !torch.tensor -> !torch.tensor
%1464 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1465 = torch.aten._unsafe_view %1463, %1464 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1466 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1467 = torch.aten.view %1465, %1466 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1468 = torch.aten.transpose.int %1467, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1469 = torch.prim.GetAttr %arg0["_param_constant93"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1470 = torch.aten.t %1469 : !torch.tensor -> !torch.tensor
%1471 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%1472 = torch.aten.view %1458, %1471 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1473 = torch.aten.mm %1472, %1470 : !torch.tensor, !torch.tensor -> !torch.tensor
%1474 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1475 = torch.aten._unsafe_view %1473, %1474 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1476 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1477 = torch.aten.view %1475, %1476 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1478 = torch.aten.transpose.int %1477, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1479 = torch.prim.GetAttr %arg0["_param_constant94"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1480 = torch.aten.t %1479 : !torch.tensor -> !torch.tensor
%1481 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%1482 = torch.aten.view %1458, %1481 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1483 = torch.aten.mm %1482, %1480 : !torch.tensor, !torch.tensor -> !torch.tensor
%1484 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1485 = torch.aten._unsafe_view %1483, %1484 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1486 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1487 = torch.aten.view %1485, %1486 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1488 = torch.aten.transpose.int %1487, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1489 = torch.aten.transpose.int %1478, %int3, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1490 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1491 = torch.aten.expand %1468, %1490, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1492 = torch.prim.ListConstruct %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1493 = torch.aten.view %1491, %1492 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1494 = torch.prim.ListConstruct %int1, %int8, %int64, %int4 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1495 = torch.aten.expand %1489, %1494, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1496 = torch.prim.ListConstruct %int8, %int64, %int4 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1497 = torch.aten.view %1495, %1496 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1498 = torch.aten.bmm %1493, %1497 : !torch.tensor, !torch.tensor -> !torch.tensor
%1499 = torch.prim.ListConstruct %int1, %int8, %int4, %int4 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1500 = torch.aten._unsafe_view %1498, %1499 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1501 = torch.aten.add_.Tensor %1500, %939, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1502 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%1503 = torch.aten.amax %1501, %1502, %true_0 : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1504 = torch.aten.sub.Tensor %1501, %1503, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1505 = torch.aten.exp %1504 : !torch.tensor -> !torch.tensor
%1506 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%1507 = torch.aten.sum.dim_IntList %1505, %1506, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
%1508 = torch.aten.div.Tensor %1505, %1507 : !torch.tensor, !torch.tensor -> !torch.tensor
%1509 = torch.prim.ListConstruct %int1, %int8, %int4, %int4 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1510 = torch.aten.expand %1508, %1509, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1511 = torch.prim.ListConstruct %int8, %int4, %int4 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1512 = torch.aten.view %1510, %1511 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1513 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1514 = torch.aten.expand %1488, %1513, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1515 = torch.prim.ListConstruct %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1516 = torch.aten.view %1514, %1515 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1517 = torch.aten.bmm %1512, %1516 : !torch.tensor, !torch.tensor -> !torch.tensor
%1518 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1519 = torch.aten._unsafe_view %1517, %1518 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1520 = torch.aten.transpose.int %1519, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1521 = torch.aten.clone %1520, %int0 : !torch.tensor, !torch.int -> !torch.tensor
%1522 = torch.prim.ListConstruct %int1, %int-1, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1523 = torch.aten.view %1521, %1522 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1524 = torch.prim.GetAttr %arg0["_param_constant95"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1525 = torch.aten.t %1524 : !torch.tensor -> !torch.tensor
%1526 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%1527 = torch.aten.view %1523, %1526 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1528 = torch.aten.mm %1527, %1525 : !torch.tensor, !torch.tensor -> !torch.tensor
%1529 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1530 = torch.aten._unsafe_view %1528, %1529 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1531 = torch.aten.add.Tensor %1450, %1530, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1532 = torch.aten.pow.Tensor_Scalar %1531, %int2 : !torch.tensor, !torch.int -> !torch.tensor
%1533 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%1534 = torch.aten.mean.dim %1532, %1533, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
%1535 = torch.aten.add.Scalar %1534, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
%1536 = torch.aten.rsqrt %1535 : !torch.tensor -> !torch.tensor
%1537 = torch.aten.mul.Tensor %1531, %1536 : !torch.tensor, !torch.tensor -> !torch.tensor
%1538 = torch.prim.GetAttr %arg0["_param_constant96"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1539 = torch.aten.mul.Tensor %1538, %1537 : !torch.tensor, !torch.tensor -> !torch.tensor
%1540 = torch.prim.GetAttr %arg0["_param_constant97"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1541 = torch.aten.t %1540 : !torch.tensor -> !torch.tensor
%1542 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%1543 = torch.aten.view %1539, %1542 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1544 = torch.aten.mm %1543, %1541 : !torch.tensor, !torch.tensor -> !torch.tensor
%1545 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1546 = torch.aten._unsafe_view %1544, %1545 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1547 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1548 = torch.aten.view %1546, %1547 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1549 = torch.aten.transpose.int %1548, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1550 = torch.prim.GetAttr %arg0["_param_constant98"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1551 = torch.aten.t %1550 : !torch.tensor -> !torch.tensor
%1552 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%1553 = torch.aten.view %824, %1552 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1554 = torch.aten.mm %1553, %1551 : !torch.tensor, !torch.tensor -> !torch.tensor
%1555 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1556 = torch.aten._unsafe_view %1554, %1555 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1557 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1558 = torch.aten.view %1556, %1557 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1559 = torch.aten.transpose.int %1558, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1560 = torch.prim.GetAttr %arg0["_param_constant99"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1561 = torch.aten.t %1560 : !torch.tensor -> !torch.tensor
%1562 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%1563 = torch.aten.view %824, %1562 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1564 = torch.aten.mm %1563, %1561 : !torch.tensor, !torch.tensor -> !torch.tensor
%1565 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1566 = torch.aten._unsafe_view %1564, %1565 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1567 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1568 = torch.aten.view %1566, %1567 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1569 = torch.aten.transpose.int %1568, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1570 = torch.aten.transpose.int %1559, %int3, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1571 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1572 = torch.aten.expand %1549, %1571, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1573 = torch.prim.ListConstruct %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1574 = torch.aten.view %1572, %1573 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1575 = torch.prim.ListConstruct %int1, %int8, %int64, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1576 = torch.aten.expand %1570, %1575, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1577 = torch.prim.ListConstruct %int8, %int64, %int15 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1578 = torch.aten.view %1576, %1577 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1579 = torch.aten.bmm %1574, %1578 : !torch.tensor, !torch.tensor -> !torch.tensor
%1580 = torch.prim.ListConstruct %int1, %int8, %int4, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1581 = torch.aten._unsafe_view %1579, %1580 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1582 = torch.aten.add_.Tensor %1581, %1023, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1583 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%1584 = torch.aten.amax %1582, %1583, %true_0 : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1585 = torch.aten.sub.Tensor %1582, %1584, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1586 = torch.aten.exp %1585 : !torch.tensor -> !torch.tensor
%1587 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%1588 = torch.aten.sum.dim_IntList %1586, %1587, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
%1589 = torch.aten.div.Tensor %1586, %1588 : !torch.tensor, !torch.tensor -> !torch.tensor
%1590 = torch.prim.ListConstruct %int1, %int8, %int4, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1591 = torch.aten.expand %1589, %1590, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1592 = torch.prim.ListConstruct %int8, %int4, %int15 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1593 = torch.aten.view %1591, %1592 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1594 = torch.prim.ListConstruct %int1, %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1595 = torch.aten.expand %1569, %1594, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1596 = torch.prim.ListConstruct %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1597 = torch.aten.view %1595, %1596 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1598 = torch.aten.bmm %1593, %1597 : !torch.tensor, !torch.tensor -> !torch.tensor
%1599 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1600 = torch.aten._unsafe_view %1598, %1599 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1601 = torch.aten.transpose.int %1600, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1602 = torch.aten.clone %1601, %int0 : !torch.tensor, !torch.int -> !torch.tensor
%1603 = torch.prim.ListConstruct %int1, %int-1, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1604 = torch.aten.view %1602, %1603 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1605 = torch.prim.GetAttr %arg0["_param_constant100"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1606 = torch.aten.t %1605 : !torch.tensor -> !torch.tensor
%1607 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%1608 = torch.aten.view %1604, %1607 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1609 = torch.aten.mm %1608, %1606 : !torch.tensor, !torch.tensor -> !torch.tensor
%1610 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1611 = torch.aten._unsafe_view %1609, %1610 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1612 = torch.aten.add.Tensor %1531, %1611, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1613 = torch.aten.pow.Tensor_Scalar %1612, %int2 : !torch.tensor, !torch.int -> !torch.tensor
%1614 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%1615 = torch.aten.mean.dim %1613, %1614, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
%1616 = torch.aten.add.Scalar %1615, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
%1617 = torch.aten.rsqrt %1616 : !torch.tensor -> !torch.tensor
%1618 = torch.aten.mul.Tensor %1612, %1617 : !torch.tensor, !torch.tensor -> !torch.tensor
%1619 = torch.prim.GetAttr %arg0["_param_constant101"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1620 = torch.aten.mul.Tensor %1619, %1618 : !torch.tensor, !torch.tensor -> !torch.tensor
%1621 = torch.prim.GetAttr %arg0["_param_constant102"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1622 = torch.aten.t %1621 : !torch.tensor -> !torch.tensor
%1623 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%1624 = torch.aten.view %1620, %1623 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1625 = torch.aten.mm %1624, %1622 : !torch.tensor, !torch.tensor -> !torch.tensor
%1626 = torch.prim.ListConstruct %int1, %int4, %int2048 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1627 = torch.aten._unsafe_view %1625, %1626 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1628 = torch.aten.relu %1627 : !torch.tensor -> !torch.tensor
%1629 = torch.prim.GetAttr %arg0["_param_constant103"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1630 = torch.aten.t %1629 : !torch.tensor -> !torch.tensor
%1631 = torch.prim.ListConstruct %int4, %int2048 : (!torch.int, !torch.int) -> !torch.list<int>
%1632 = torch.aten.view %1628, %1631 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1633 = torch.aten.mm %1632, %1630 : !torch.tensor, !torch.tensor -> !torch.tensor
%1634 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1635 = torch.aten._unsafe_view %1633, %1634 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1636 = torch.aten.add.Tensor %1612, %1635, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1637 = torch.aten.pow.Tensor_Scalar %1636, %int2 : !torch.tensor, !torch.int -> !torch.tensor
%1638 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%1639 = torch.aten.mean.dim %1637, %1638, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
%1640 = torch.aten.add.Scalar %1639, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
%1641 = torch.aten.rsqrt %1640 : !torch.tensor -> !torch.tensor
%1642 = torch.aten.mul.Tensor %1636, %1641 : !torch.tensor, !torch.tensor -> !torch.tensor
%1643 = torch.prim.GetAttr %arg0["_param_constant104"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1644 = torch.aten.mul.Tensor %1643, %1642 : !torch.tensor, !torch.tensor -> !torch.tensor
%1645 = torch.prim.GetAttr %arg0["_param_constant105"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1646 = torch.aten.t %1645 : !torch.tensor -> !torch.tensor
%1647 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%1648 = torch.aten.view %1644, %1647 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1649 = torch.aten.mm %1648, %1646 : !torch.tensor, !torch.tensor -> !torch.tensor
%1650 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1651 = torch.aten._unsafe_view %1649, %1650 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1652 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1653 = torch.aten.view %1651, %1652 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1654 = torch.aten.transpose.int %1653, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1655 = torch.prim.GetAttr %arg0["_param_constant106"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1656 = torch.aten.t %1655 : !torch.tensor -> !torch.tensor
%1657 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%1658 = torch.aten.view %1644, %1657 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1659 = torch.aten.mm %1658, %1656 : !torch.tensor, !torch.tensor -> !torch.tensor
%1660 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1661 = torch.aten._unsafe_view %1659, %1660 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1662 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1663 = torch.aten.view %1661, %1662 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1664 = torch.aten.transpose.int %1663, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1665 = torch.prim.GetAttr %arg0["_param_constant107"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1666 = torch.aten.t %1665 : !torch.tensor -> !torch.tensor
%1667 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%1668 = torch.aten.view %1644, %1667 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1669 = torch.aten.mm %1668, %1666 : !torch.tensor, !torch.tensor -> !torch.tensor
%1670 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1671 = torch.aten._unsafe_view %1669, %1670 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1672 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1673 = torch.aten.view %1671, %1672 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1674 = torch.aten.transpose.int %1673, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1675 = torch.aten.transpose.int %1664, %int3, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1676 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1677 = torch.aten.expand %1654, %1676, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1678 = torch.prim.ListConstruct %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1679 = torch.aten.view %1677, %1678 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1680 = torch.prim.ListConstruct %int1, %int8, %int64, %int4 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1681 = torch.aten.expand %1675, %1680, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1682 = torch.prim.ListConstruct %int8, %int64, %int4 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1683 = torch.aten.view %1681, %1682 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1684 = torch.aten.bmm %1679, %1683 : !torch.tensor, !torch.tensor -> !torch.tensor
%1685 = torch.prim.ListConstruct %int1, %int8, %int4, %int4 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1686 = torch.aten._unsafe_view %1684, %1685 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1687 = torch.aten.add_.Tensor %1686, %939, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1688 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%1689 = torch.aten.amax %1687, %1688, %true_0 : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1690 = torch.aten.sub.Tensor %1687, %1689, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1691 = torch.aten.exp %1690 : !torch.tensor -> !torch.tensor
%1692 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%1693 = torch.aten.sum.dim_IntList %1691, %1692, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
%1694 = torch.aten.div.Tensor %1691, %1693 : !torch.tensor, !torch.tensor -> !torch.tensor
%1695 = torch.prim.ListConstruct %int1, %int8, %int4, %int4 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1696 = torch.aten.expand %1694, %1695, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1697 = torch.prim.ListConstruct %int8, %int4, %int4 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1698 = torch.aten.view %1696, %1697 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1699 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1700 = torch.aten.expand %1674, %1699, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1701 = torch.prim.ListConstruct %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1702 = torch.aten.view %1700, %1701 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1703 = torch.aten.bmm %1698, %1702 : !torch.tensor, !torch.tensor -> !torch.tensor
%1704 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1705 = torch.aten._unsafe_view %1703, %1704 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1706 = torch.aten.transpose.int %1705, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1707 = torch.aten.clone %1706, %int0 : !torch.tensor, !torch.int -> !torch.tensor
%1708 = torch.prim.ListConstruct %int1, %int-1, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1709 = torch.aten.view %1707, %1708 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1710 = torch.prim.GetAttr %arg0["_param_constant108"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1711 = torch.aten.t %1710 : !torch.tensor -> !torch.tensor
%1712 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%1713 = torch.aten.view %1709, %1712 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1714 = torch.aten.mm %1713, %1711 : !torch.tensor, !torch.tensor -> !torch.tensor
%1715 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1716 = torch.aten._unsafe_view %1714, %1715 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1717 = torch.aten.add.Tensor %1636, %1716, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1718 = torch.aten.pow.Tensor_Scalar %1717, %int2 : !torch.tensor, !torch.int -> !torch.tensor
%1719 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%1720 = torch.aten.mean.dim %1718, %1719, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
%1721 = torch.aten.add.Scalar %1720, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
%1722 = torch.aten.rsqrt %1721 : !torch.tensor -> !torch.tensor
%1723 = torch.aten.mul.Tensor %1717, %1722 : !torch.tensor, !torch.tensor -> !torch.tensor
%1724 = torch.prim.GetAttr %arg0["_param_constant109"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1725 = torch.aten.mul.Tensor %1724, %1723 : !torch.tensor, !torch.tensor -> !torch.tensor
%1726 = torch.prim.GetAttr %arg0["_param_constant110"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1727 = torch.aten.t %1726 : !torch.tensor -> !torch.tensor
%1728 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%1729 = torch.aten.view %1725, %1728 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1730 = torch.aten.mm %1729, %1727 : !torch.tensor, !torch.tensor -> !torch.tensor
%1731 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1732 = torch.aten._unsafe_view %1730, %1731 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1733 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1734 = torch.aten.view %1732, %1733 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1735 = torch.aten.transpose.int %1734, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1736 = torch.prim.GetAttr %arg0["_param_constant111"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1737 = torch.aten.t %1736 : !torch.tensor -> !torch.tensor
%1738 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%1739 = torch.aten.view %824, %1738 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1740 = torch.aten.mm %1739, %1737 : !torch.tensor, !torch.tensor -> !torch.tensor
%1741 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1742 = torch.aten._unsafe_view %1740, %1741 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1743 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1744 = torch.aten.view %1742, %1743 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1745 = torch.aten.transpose.int %1744, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1746 = torch.prim.GetAttr %arg0["_param_constant112"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1747 = torch.aten.t %1746 : !torch.tensor -> !torch.tensor
%1748 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%1749 = torch.aten.view %824, %1748 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1750 = torch.aten.mm %1749, %1747 : !torch.tensor, !torch.tensor -> !torch.tensor
%1751 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1752 = torch.aten._unsafe_view %1750, %1751 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1753 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1754 = torch.aten.view %1752, %1753 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1755 = torch.aten.transpose.int %1754, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1756 = torch.aten.transpose.int %1745, %int3, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1757 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1758 = torch.aten.expand %1735, %1757, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1759 = torch.prim.ListConstruct %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1760 = torch.aten.view %1758, %1759 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1761 = torch.prim.ListConstruct %int1, %int8, %int64, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1762 = torch.aten.expand %1756, %1761, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1763 = torch.prim.ListConstruct %int8, %int64, %int15 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1764 = torch.aten.view %1762, %1763 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1765 = torch.aten.bmm %1760, %1764 : !torch.tensor, !torch.tensor -> !torch.tensor
%1766 = torch.prim.ListConstruct %int1, %int8, %int4, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1767 = torch.aten._unsafe_view %1765, %1766 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1768 = torch.aten.add_.Tensor %1767, %1023, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1769 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%1770 = torch.aten.amax %1768, %1769, %true_0 : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1771 = torch.aten.sub.Tensor %1768, %1770, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1772 = torch.aten.exp %1771 : !torch.tensor -> !torch.tensor
%1773 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%1774 = torch.aten.sum.dim_IntList %1772, %1773, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
%1775 = torch.aten.div.Tensor %1772, %1774 : !torch.tensor, !torch.tensor -> !torch.tensor
%1776 = torch.prim.ListConstruct %int1, %int8, %int4, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1777 = torch.aten.expand %1775, %1776, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1778 = torch.prim.ListConstruct %int8, %int4, %int15 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1779 = torch.aten.view %1777, %1778 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1780 = torch.prim.ListConstruct %int1, %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1781 = torch.aten.expand %1755, %1780, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1782 = torch.prim.ListConstruct %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1783 = torch.aten.view %1781, %1782 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1784 = torch.aten.bmm %1779, %1783 : !torch.tensor, !torch.tensor -> !torch.tensor
%1785 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1786 = torch.aten._unsafe_view %1784, %1785 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1787 = torch.aten.transpose.int %1786, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1788 = torch.aten.clone %1787, %int0 : !torch.tensor, !torch.int -> !torch.tensor
%1789 = torch.prim.ListConstruct %int1, %int-1, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1790 = torch.aten.view %1788, %1789 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1791 = torch.prim.GetAttr %arg0["_param_constant113"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1792 = torch.aten.t %1791 : !torch.tensor -> !torch.tensor
%1793 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%1794 = torch.aten.view %1790, %1793 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1795 = torch.aten.mm %1794, %1792 : !torch.tensor, !torch.tensor -> !torch.tensor
%1796 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1797 = torch.aten._unsafe_view %1795, %1796 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1798 = torch.aten.add.Tensor %1717, %1797, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1799 = torch.aten.pow.Tensor_Scalar %1798, %int2 : !torch.tensor, !torch.int -> !torch.tensor
%1800 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%1801 = torch.aten.mean.dim %1799, %1800, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
%1802 = torch.aten.add.Scalar %1801, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
%1803 = torch.aten.rsqrt %1802 : !torch.tensor -> !torch.tensor
%1804 = torch.aten.mul.Tensor %1798, %1803 : !torch.tensor, !torch.tensor -> !torch.tensor
%1805 = torch.prim.GetAttr %arg0["_param_constant114"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1806 = torch.aten.mul.Tensor %1805, %1804 : !torch.tensor, !torch.tensor -> !torch.tensor
%1807 = torch.prim.GetAttr %arg0["_param_constant115"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1808 = torch.aten.t %1807 : !torch.tensor -> !torch.tensor
%1809 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%1810 = torch.aten.view %1806, %1809 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1811 = torch.aten.mm %1810, %1808 : !torch.tensor, !torch.tensor -> !torch.tensor
%1812 = torch.prim.ListConstruct %int1, %int4, %int2048 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1813 = torch.aten._unsafe_view %1811, %1812 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1814 = torch.aten.relu %1813 : !torch.tensor -> !torch.tensor
%1815 = torch.prim.GetAttr %arg0["_param_constant116"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1816 = torch.aten.t %1815 : !torch.tensor -> !torch.tensor
%1817 = torch.prim.ListConstruct %int4, %int2048 : (!torch.int, !torch.int) -> !torch.list<int>
%1818 = torch.aten.view %1814, %1817 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1819 = torch.aten.mm %1818, %1816 : !torch.tensor, !torch.tensor -> !torch.tensor
%1820 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1821 = torch.aten._unsafe_view %1819, %1820 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1822 = torch.aten.add.Tensor %1798, %1821, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1823 = torch.aten.pow.Tensor_Scalar %1822, %int2 : !torch.tensor, !torch.int -> !torch.tensor
%1824 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%1825 = torch.aten.mean.dim %1823, %1824, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
%1826 = torch.aten.add.Scalar %1825, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
%1827 = torch.aten.rsqrt %1826 : !torch.tensor -> !torch.tensor
%1828 = torch.aten.mul.Tensor %1822, %1827 : !torch.tensor, !torch.tensor -> !torch.tensor
%1829 = torch.prim.GetAttr %arg0["_param_constant117"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1830 = torch.aten.mul.Tensor %1829, %1828 : !torch.tensor, !torch.tensor -> !torch.tensor
%1831 = torch.prim.GetAttr %arg0["_param_constant118"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1832 = torch.aten.t %1831 : !torch.tensor -> !torch.tensor
%1833 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%1834 = torch.aten.view %1830, %1833 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1835 = torch.aten.mm %1834, %1832 : !torch.tensor, !torch.tensor -> !torch.tensor
%1836 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1837 = torch.aten._unsafe_view %1835, %1836 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1838 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1839 = torch.aten.view %1837, %1838 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1840 = torch.aten.transpose.int %1839, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1841 = torch.prim.GetAttr %arg0["_param_constant119"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1842 = torch.aten.t %1841 : !torch.tensor -> !torch.tensor
%1843 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%1844 = torch.aten.view %1830, %1843 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1845 = torch.aten.mm %1844, %1842 : !torch.tensor, !torch.tensor -> !torch.tensor
%1846 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1847 = torch.aten._unsafe_view %1845, %1846 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1848 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1849 = torch.aten.view %1847, %1848 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1850 = torch.aten.transpose.int %1849, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1851 = torch.prim.GetAttr %arg0["_param_constant120"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1852 = torch.aten.t %1851 : !torch.tensor -> !torch.tensor
%1853 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%1854 = torch.aten.view %1830, %1853 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1855 = torch.aten.mm %1854, %1852 : !torch.tensor, !torch.tensor -> !torch.tensor
%1856 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1857 = torch.aten._unsafe_view %1855, %1856 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1858 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1859 = torch.aten.view %1857, %1858 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1860 = torch.aten.transpose.int %1859, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1861 = torch.aten.transpose.int %1850, %int3, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1862 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1863 = torch.aten.expand %1840, %1862, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1864 = torch.prim.ListConstruct %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1865 = torch.aten.view %1863, %1864 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1866 = torch.prim.ListConstruct %int1, %int8, %int64, %int4 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1867 = torch.aten.expand %1861, %1866, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1868 = torch.prim.ListConstruct %int8, %int64, %int4 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1869 = torch.aten.view %1867, %1868 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1870 = torch.aten.bmm %1865, %1869 : !torch.tensor, !torch.tensor -> !torch.tensor
%1871 = torch.prim.ListConstruct %int1, %int8, %int4, %int4 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1872 = torch.aten._unsafe_view %1870, %1871 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1873 = torch.aten.add_.Tensor %1872, %939, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1874 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%1875 = torch.aten.amax %1873, %1874, %true_0 : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1876 = torch.aten.sub.Tensor %1873, %1875, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1877 = torch.aten.exp %1876 : !torch.tensor -> !torch.tensor
%1878 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%1879 = torch.aten.sum.dim_IntList %1877, %1878, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
%1880 = torch.aten.div.Tensor %1877, %1879 : !torch.tensor, !torch.tensor -> !torch.tensor
%1881 = torch.prim.ListConstruct %int1, %int8, %int4, %int4 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1882 = torch.aten.expand %1880, %1881, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1883 = torch.prim.ListConstruct %int8, %int4, %int4 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1884 = torch.aten.view %1882, %1883 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1885 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1886 = torch.aten.expand %1860, %1885, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1887 = torch.prim.ListConstruct %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1888 = torch.aten.view %1886, %1887 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1889 = torch.aten.bmm %1884, %1888 : !torch.tensor, !torch.tensor -> !torch.tensor
%1890 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1891 = torch.aten._unsafe_view %1889, %1890 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1892 = torch.aten.transpose.int %1891, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1893 = torch.aten.clone %1892, %int0 : !torch.tensor, !torch.int -> !torch.tensor
%1894 = torch.prim.ListConstruct %int1, %int-1, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1895 = torch.aten.view %1893, %1894 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1896 = torch.prim.GetAttr %arg0["_param_constant121"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1897 = torch.aten.t %1896 : !torch.tensor -> !torch.tensor
%1898 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%1899 = torch.aten.view %1895, %1898 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1900 = torch.aten.mm %1899, %1897 : !torch.tensor, !torch.tensor -> !torch.tensor
%1901 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1902 = torch.aten._unsafe_view %1900, %1901 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1903 = torch.aten.add.Tensor %1822, %1902, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1904 = torch.aten.pow.Tensor_Scalar %1903, %int2 : !torch.tensor, !torch.int -> !torch.tensor
%1905 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%1906 = torch.aten.mean.dim %1904, %1905, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
%1907 = torch.aten.add.Scalar %1906, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
%1908 = torch.aten.rsqrt %1907 : !torch.tensor -> !torch.tensor
%1909 = torch.aten.mul.Tensor %1903, %1908 : !torch.tensor, !torch.tensor -> !torch.tensor
%1910 = torch.prim.GetAttr %arg0["_param_constant122"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1911 = torch.aten.mul.Tensor %1910, %1909 : !torch.tensor, !torch.tensor -> !torch.tensor
%1912 = torch.prim.GetAttr %arg0["_param_constant123"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1913 = torch.aten.t %1912 : !torch.tensor -> !torch.tensor
%1914 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%1915 = torch.aten.view %1911, %1914 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1916 = torch.aten.mm %1915, %1913 : !torch.tensor, !torch.tensor -> !torch.tensor
%1917 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1918 = torch.aten._unsafe_view %1916, %1917 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1919 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1920 = torch.aten.view %1918, %1919 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1921 = torch.aten.transpose.int %1920, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1922 = torch.prim.GetAttr %arg0["_param_constant124"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1923 = torch.aten.t %1922 : !torch.tensor -> !torch.tensor
%1924 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%1925 = torch.aten.view %824, %1924 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1926 = torch.aten.mm %1925, %1923 : !torch.tensor, !torch.tensor -> !torch.tensor
%1927 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1928 = torch.aten._unsafe_view %1926, %1927 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1929 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1930 = torch.aten.view %1928, %1929 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1931 = torch.aten.transpose.int %1930, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1932 = torch.prim.GetAttr %arg0["_param_constant125"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1933 = torch.aten.t %1932 : !torch.tensor -> !torch.tensor
%1934 = torch.prim.ListConstruct %int15, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%1935 = torch.aten.view %824, %1934 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1936 = torch.aten.mm %1935, %1933 : !torch.tensor, !torch.tensor -> !torch.tensor
%1937 = torch.prim.ListConstruct %int1, %int15, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1938 = torch.aten._unsafe_view %1936, %1937 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1939 = torch.prim.ListConstruct %int1, %int-1, %int8, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1940 = torch.aten.view %1938, %1939 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1941 = torch.aten.transpose.int %1940, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1942 = torch.aten.transpose.int %1931, %int3, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1943 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1944 = torch.aten.expand %1921, %1943, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1945 = torch.prim.ListConstruct %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1946 = torch.aten.view %1944, %1945 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1947 = torch.prim.ListConstruct %int1, %int8, %int64, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1948 = torch.aten.expand %1942, %1947, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1949 = torch.prim.ListConstruct %int8, %int64, %int15 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1950 = torch.aten.view %1948, %1949 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1951 = torch.aten.bmm %1946, %1950 : !torch.tensor, !torch.tensor -> !torch.tensor
%1952 = torch.prim.ListConstruct %int1, %int8, %int4, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1953 = torch.aten._unsafe_view %1951, %1952 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1954 = torch.aten.add_.Tensor %1953, %1023, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1955 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%1956 = torch.aten.amax %1954, %1955, %true_0 : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1957 = torch.aten.sub.Tensor %1954, %1956, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1958 = torch.aten.exp %1957 : !torch.tensor -> !torch.tensor
%1959 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%1960 = torch.aten.sum.dim_IntList %1958, %1959, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
%1961 = torch.aten.div.Tensor %1958, %1960 : !torch.tensor, !torch.tensor -> !torch.tensor
%1962 = torch.prim.ListConstruct %int1, %int8, %int4, %int15 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1963 = torch.aten.expand %1961, %1962, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1964 = torch.prim.ListConstruct %int8, %int4, %int15 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1965 = torch.aten.view %1963, %1964 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1966 = torch.prim.ListConstruct %int1, %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1967 = torch.aten.expand %1941, %1966, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor
%1968 = torch.prim.ListConstruct %int8, %int15, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1969 = torch.aten.view %1967, %1968 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1970 = torch.aten.bmm %1965, %1969 : !torch.tensor, !torch.tensor -> !torch.tensor
%1971 = torch.prim.ListConstruct %int1, %int8, %int4, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1972 = torch.aten._unsafe_view %1970, %1971 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1973 = torch.aten.transpose.int %1972, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor
%1974 = torch.aten.clone %1973, %int0 : !torch.tensor, !torch.int -> !torch.tensor
%1975 = torch.prim.ListConstruct %int1, %int-1, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1976 = torch.aten.view %1974, %1975 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1977 = torch.prim.GetAttr %arg0["_param_constant126"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1978 = torch.aten.t %1977 : !torch.tensor -> !torch.tensor
%1979 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%1980 = torch.aten.view %1976, %1979 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1981 = torch.aten.mm %1980, %1978 : !torch.tensor, !torch.tensor -> !torch.tensor
%1982 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1983 = torch.aten._unsafe_view %1981, %1982 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1984 = torch.aten.add.Tensor %1903, %1983, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%1985 = torch.aten.pow.Tensor_Scalar %1984, %int2 : !torch.tensor, !torch.int -> !torch.tensor
%1986 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%1987 = torch.aten.mean.dim %1985, %1986, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
%1988 = torch.aten.add.Scalar %1987, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
%1989 = torch.aten.rsqrt %1988 : !torch.tensor -> !torch.tensor
%1990 = torch.aten.mul.Tensor %1984, %1989 : !torch.tensor, !torch.tensor -> !torch.tensor
%1991 = torch.prim.GetAttr %arg0["_param_constant127"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1992 = torch.aten.mul.Tensor %1991, %1990 : !torch.tensor, !torch.tensor -> !torch.tensor
%1993 = torch.prim.GetAttr %arg0["_param_constant128"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%1994 = torch.aten.t %1993 : !torch.tensor -> !torch.tensor
%1995 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%1996 = torch.aten.view %1992, %1995 : !torch.tensor, !torch.list<int> -> !torch.tensor
%1997 = torch.aten.mm %1996, %1994 : !torch.tensor, !torch.tensor -> !torch.tensor
%1998 = torch.prim.ListConstruct %int1, %int4, %int2048 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%1999 = torch.aten._unsafe_view %1997, %1998 : !torch.tensor, !torch.list<int> -> !torch.tensor
%2000 = torch.aten.relu %1999 : !torch.tensor -> !torch.tensor
%2001 = torch.prim.GetAttr %arg0["_param_constant129"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%2002 = torch.aten.t %2001 : !torch.tensor -> !torch.tensor
%2003 = torch.prim.ListConstruct %int4, %int2048 : (!torch.int, !torch.int) -> !torch.list<int>
%2004 = torch.aten.view %2000, %2003 : !torch.tensor, !torch.list<int> -> !torch.tensor
%2005 = torch.aten.mm %2004, %2002 : !torch.tensor, !torch.tensor -> !torch.tensor
%2006 = torch.prim.ListConstruct %int1, %int4, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2007 = torch.aten._unsafe_view %2005, %2006 : !torch.tensor, !torch.list<int> -> !torch.tensor
%2008 = torch.aten.add.Tensor %1984, %2007, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor
%2009 = torch.aten.pow.Tensor_Scalar %2008, %int2 : !torch.tensor, !torch.int -> !torch.tensor
%2010 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int>
%2011 = torch.aten.mean.dim %2009, %2010, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor
%2012 = torch.aten.add.Scalar %2011, %float9.999990e-07, %int1 : !torch.tensor, !torch.float, !torch.int -> !torch.tensor
%2013 = torch.aten.rsqrt %2012 : !torch.tensor -> !torch.tensor
%2014 = torch.aten.mul.Tensor %2008, %2013 : !torch.tensor, !torch.tensor -> !torch.tensor
%2015 = torch.prim.GetAttr %arg0["_param_constant130"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%2016 = torch.aten.mul.Tensor %2015, %2014 : !torch.tensor, !torch.tensor -> !torch.tensor
%2017 = torch.aten.mul.Scalar %2016, %float4.419420e-02 : !torch.tensor, !torch.float -> !torch.tensor
%2018 = torch.prim.GetAttr %arg0["_param_constant0"] : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda"> -> !torch.tensor
%2019 = torch.aten.t %2018 : !torch.tensor -> !torch.tensor
%2020 = torch.prim.ListConstruct %int4, %int512 : (!torch.int, !torch.int) -> !torch.list<int>
%2021 = torch.aten.view %2017, %2020 : !torch.tensor, !torch.list<int> -> !torch.tensor
%2022 = torch.aten.mm %2021, %2019 : !torch.tensor, !torch.tensor -> !torch.tensor
%2023 = torch.prim.ListConstruct %int1, %int4, %int32128 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%2024 = torch.aten._unsafe_view %2022, %2023 : !torch.tensor, !torch.list<int> -> !torch.tensor
return %2024 : !torch.tensor
}
torch.class_type @__torch__.torch.fx.graph_module._lambda {
torch.attr private "_param_constant0" : !torch.tensor
torch.attr private "_param_constant1" : !torch.tensor
torch.attr private "_param_constant2" : !torch.tensor
torch.attr private "_param_constant3" : !torch.tensor
torch.attr private "_param_constant4" : !torch.tensor
torch.attr private "_param_constant5" : !torch.tensor
torch.attr private "_param_constant6" : !torch.tensor
torch.attr private "_param_constant7" : !torch.tensor
torch.attr private "_param_constant8" : !torch.tensor
torch.attr private "_param_constant9" : !torch.tensor
torch.attr private "_param_constant10" : !torch.tensor
torch.attr private "_param_constant11" : !torch.tensor
torch.attr private "_param_constant12" : !torch.tensor
torch.attr private "_param_constant13" : !torch.tensor
torch.attr private "_param_constant14" : !torch.tensor
torch.attr private "_param_constant15" : !torch.tensor
torch.attr private "_param_constant16" : !torch.tensor
torch.attr private "_param_constant17" : !torch.tensor
torch.attr private "_param_constant18" : !torch.tensor
torch.attr private "_param_constant19" : !torch.tensor
torch.attr private "_param_constant20" : !torch.tensor
torch.attr private "_param_constant21" : !torch.tensor
torch.attr private "_param_constant22" : !torch.tensor
torch.attr private "_param_constant23" : !torch.tensor
torch.attr private "_param_constant24" : !torch.tensor
torch.attr private "_param_constant25" : !torch.tensor
torch.attr private "_param_constant26" : !torch.tensor
torch.attr private "_param_constant27" : !torch.tensor
torch.attr private "_param_constant28" : !torch.tensor
torch.attr private "_param_constant29" : !torch.tensor
torch.attr private "_param_constant30" : !torch.tensor
torch.attr private "_param_constant31" : !torch.tensor
torch.attr private "_param_constant32" : !torch.tensor
torch.attr private "_param_constant33" : !torch.tensor
torch.attr private "_param_constant34" : !torch.tensor
torch.attr private "_param_constant35" : !torch.tensor
torch.attr private "_param_constant36" : !torch.tensor
torch.attr private "_param_constant37" : !torch.tensor
torch.attr private "_param_constant38" : !torch.tensor
torch.attr private "_param_constant39" : !torch.tensor
torch.attr private "_param_constant40" : !torch.tensor
torch.attr private "_param_constant41" : !torch.tensor
torch.attr private "_param_constant42" : !torch.tensor
torch.attr private "_param_constant43" : !torch.tensor
torch.attr private "_param_constant44" : !torch.tensor
torch.attr private "_param_constant45" : !torch.tensor
torch.attr private "_param_constant46" : !torch.tensor
torch.attr private "_param_constant47" : !torch.tensor
torch.attr private "_param_constant48" : !torch.tensor
torch.attr private "_param_constant49" : !torch.tensor
torch.attr private "_param_constant50" : !torch.tensor
torch.attr private "_param_constant51" : !torch.tensor
torch.attr private "_param_constant52" : !torch.tensor
torch.attr private "_param_constant53" : !torch.tensor
torch.attr private "_param_constant54" : !torch.tensor
torch.attr private "_param_constant55" : !torch.tensor
torch.attr private "_param_constant56" : !torch.tensor
torch.attr private "_param_constant57" : !torch.tensor
torch.attr private "_param_constant58" : !torch.tensor
torch.attr private "_param_constant59" : !torch.tensor
torch.attr private "_param_constant60" : !torch.tensor
torch.attr private "_param_constant61" : !torch.tensor
torch.attr private "_param_constant62" : !torch.tensor
torch.attr private "_param_constant63" : !torch.tensor
torch.attr private "_param_constant64" : !torch.tensor
torch.attr private "_param_constant65" : !torch.tensor
torch.attr private "_param_constant66" : !torch.tensor
torch.attr private "_param_constant67" : !torch.tensor
torch.attr private "_param_constant68" : !torch.tensor
torch.attr private "_param_constant69" : !torch.tensor
torch.attr private "_param_constant70" : !torch.tensor
torch.attr private "_param_constant71" : !torch.tensor
torch.attr private "_param_constant72" : !torch.tensor
torch.attr private "_param_constant73" : !torch.tensor
torch.attr private "_param_constant74" : !torch.tensor
torch.attr private "_param_constant75" : !torch.tensor
torch.attr private "_param_constant76" : !torch.tensor
torch.attr private "_param_constant77" : !torch.tensor
torch.attr private "_param_constant78" : !torch.tensor
torch.attr private "_param_constant79" : !torch.tensor
torch.attr private "_param_constant80" : !torch.tensor
torch.attr private "_param_constant81" : !torch.tensor
torch.attr private "_param_constant82" : !torch.tensor
torch.attr private "_param_constant83" : !torch.tensor
torch.attr private "_param_constant84" : !torch.tensor
torch.attr private "_param_constant85" : !torch.tensor
torch.attr private "_param_constant86" : !torch.tensor
torch.attr private "_param_constant87" : !torch.tensor
torch.attr private "_param_constant88" : !torch.tensor
torch.attr private "_param_constant89" : !torch.tensor
torch.attr private "_param_constant90" : !torch.tensor
torch.attr private "_param_constant91" : !torch.tensor
torch.attr private "_param_constant92" : !torch.tensor
torch.attr private "_param_constant93" : !torch.tensor
torch.attr private "_param_constant94" : !torch.tensor
torch.attr private "_param_constant95" : !torch.tensor
torch.attr private "_param_constant96" : !torch.tensor
torch.attr private "_param_constant97" : !torch.tensor
torch.attr private "_param_constant98" : !torch.tensor
torch.attr private "_param_constant99" : !torch.tensor
torch.attr private "_param_constant100" : !torch.tensor
torch.attr private "_param_constant101" : !torch.tensor
torch.attr private "_param_constant102" : !torch.tensor
torch.attr private "_param_constant103" : !torch.tensor
torch.attr private "_param_constant104" : !torch.tensor
torch.attr private "_param_constant105" : !torch.tensor
torch.attr private "_param_constant106" : !torch.tensor
torch.attr private "_param_constant107" : !torch.tensor
torch.attr private "_param_constant108" : !torch.tensor
torch.attr private "_param_constant109" : !torch.tensor
torch.attr private "_param_constant110" : !torch.tensor
torch.attr private "_param_constant111" : !torch.tensor
torch.attr private "_param_constant112" : !torch.tensor
torch.attr private "_param_constant113" : !torch.tensor
torch.attr private "_param_constant114" : !torch.tensor
torch.attr private "_param_constant115" : !torch.tensor
torch.attr private "_param_constant116" : !torch.tensor
torch.attr private "_param_constant117" : !torch.tensor
torch.attr private "_param_constant118" : !torch.tensor
torch.attr private "_param_constant119" : !torch.tensor
torch.attr private "_param_constant120" : !torch.tensor
torch.attr private "_param_constant121" : !torch.tensor
torch.attr private "_param_constant122" : !torch.tensor
torch.attr private "_param_constant123" : !torch.tensor
torch.attr private "_param_constant124" : !torch.tensor
torch.attr private "_param_constant125" : !torch.tensor
torch.attr private "_param_constant126" : !torch.tensor
torch.attr private "_param_constant127" : !torch.tensor
torch.attr private "_param_constant128" : !torch.tensor
torch.attr private "_param_constant129" : !torch.tensor
torch.attr private "_param_constant130" : !torch.tensor
torch.attr private "_tensor_constant0" : !torch.tensor
torch.attr private "training" : !torch.bool
torch.attr private "_is_full_backward_hook" : !torch.optional<bool>
torch.attr private "_code" : !torch.str
torch.method private "__code_getter", @__torch__.torch.fx.graph_module._lambda.__code_getter
torch.method "forward", @__torch__.torch.fx.graph_module._lambda.forward
}
%0 = torch.tensor.literal(dense_resource<__elided__> : tensor<32128x512xf32>) : !torch.tensor<[32128,512],f32>
%1 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
%2 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%3 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%4 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%5 = torch.tensor.literal(dense_resource<__elided__> : tensor<32x8xf32>) : !torch.tensor<[32,8],f32>
%6 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%7 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
%8 = torch.tensor.literal(dense_resource<__elided__> : tensor<2048x512xf32>) : !torch.tensor<[2048,512],f32>
%9 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x2048xf32>) : !torch.tensor<[512,2048],f32>
%10 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
%11 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%12 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%13 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%14 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%15 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
%16 = torch.tensor.literal(dense_resource<__elided__> : tensor<2048x512xf32>) : !torch.tensor<[2048,512],f32>
%17 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x2048xf32>) : !torch.tensor<[512,2048],f32>
%18 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
%19 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%20 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%21 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%22 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%23 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
%24 = torch.tensor.literal(dense_resource<__elided__> : tensor<2048x512xf32>) : !torch.tensor<[2048,512],f32>
%25 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x2048xf32>) : !torch.tensor<[512,2048],f32>
%26 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
%27 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%28 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%29 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%30 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%31 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
%32 = torch.tensor.literal(dense_resource<__elided__> : tensor<2048x512xf32>) : !torch.tensor<[2048,512],f32>
%33 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x2048xf32>) : !torch.tensor<[512,2048],f32>
%34 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
%35 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%36 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%37 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%38 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%39 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
%40 = torch.tensor.literal(dense_resource<__elided__> : tensor<2048x512xf32>) : !torch.tensor<[2048,512],f32>
%41 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x2048xf32>) : !torch.tensor<[512,2048],f32>
%42 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
%43 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%44 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%45 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%46 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%47 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
%48 = torch.tensor.literal(dense_resource<__elided__> : tensor<2048x512xf32>) : !torch.tensor<[2048,512],f32>
%49 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x2048xf32>) : !torch.tensor<[512,2048],f32>
%50 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
%51 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
%52 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%53 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%54 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%55 = torch.tensor.literal(dense_resource<__elided__> : tensor<32x8xf32>) : !torch.tensor<[32,8],f32>
%56 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%57 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
%58 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%59 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%60 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%61 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%62 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
%63 = torch.tensor.literal(dense_resource<__elided__> : tensor<2048x512xf32>) : !torch.tensor<[2048,512],f32>
%64 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x2048xf32>) : !torch.tensor<[512,2048],f32>
%65 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
%66 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%67 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%68 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%69 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%70 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
%71 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%72 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%73 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%74 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%75 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
%76 = torch.tensor.literal(dense_resource<__elided__> : tensor<2048x512xf32>) : !torch.tensor<[2048,512],f32>
%77 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x2048xf32>) : !torch.tensor<[512,2048],f32>
%78 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
%79 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%80 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%81 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%82 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%83 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
%84 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%85 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%86 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%87 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%88 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
%89 = torch.tensor.literal(dense_resource<__elided__> : tensor<2048x512xf32>) : !torch.tensor<[2048,512],f32>
%90 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x2048xf32>) : !torch.tensor<[512,2048],f32>
%91 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
%92 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%93 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%94 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%95 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%96 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
%97 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%98 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%99 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%100 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%101 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
%102 = torch.tensor.literal(dense_resource<__elided__> : tensor<2048x512xf32>) : !torch.tensor<[2048,512],f32>
%103 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x2048xf32>) : !torch.tensor<[512,2048],f32>
%104 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
%105 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%106 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%107 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%108 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%109 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
%110 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%111 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%112 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%113 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%114 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
%115 = torch.tensor.literal(dense_resource<__elided__> : tensor<2048x512xf32>) : !torch.tensor<[2048,512],f32>
%116 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x2048xf32>) : !torch.tensor<[512,2048],f32>
%117 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
%118 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%119 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%120 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%121 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%122 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
%123 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%124 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%125 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%126 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x512xf32>) : !torch.tensor<[512,512],f32>
%127 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
%128 = torch.tensor.literal(dense_resource<__elided__> : tensor<2048x512xf32>) : !torch.tensor<[2048,512],f32>
%129 = torch.tensor.literal(dense_resource<__elided__> : tensor<512x2048xf32>) : !torch.tensor<[512,2048],f32>
%130 = torch.tensor.literal(dense_resource<__elided__> : tensor<512xf32>) : !torch.tensor<[512],f32>
%131 = torch.tensor.literal(dense<0> : tensor<si64>) : !torch.tensor<[],si64>
%true = torch.constant.bool true
%none = torch.constant.none
%str = torch.constant.str "\0A\0A\0Adef forward(self, arg0_1, arg1_1):\0A new_zeros = torch.ops.aten.new_zeros(arg1_1, [1, 4], dtype = torch.int64, layout = torch.strided, device = device(type='cpu'), pin_memory = False)\0A slice_1 = torch.ops.aten.slice(arg1_1, 1, 0, -1); arg1_1 = None\0A clone = torch.ops.aten.clone(slice_1); slice_1 = None\0A slice_2 = torch.ops.aten.slice(new_zeros, 1, 1, 9223372036854775807)\0A copy_ = torch.ops.aten.copy_(slice_2, clone); slice_2 = clone = None\0A _tensor_constant0 = self._tensor_constant0\0A lift_fresh_copy = torch.ops.aten.lift_fresh_copy(_tensor_constant0); _tensor_constant0 = None\0A select = torch.ops.aten.select(new_zeros, 1, 0)\0A fill_ = torch.ops.aten.fill_(select, lift_fresh_copy); select = lift_fresh_copy = None\0A eq = torch.ops.aten.eq(new_zeros, -100)\0A masked_fill_ = torch.ops.aten.masked_fill_(new_zeros, eq, 0); new_zeros = eq = None\0A view = torch.ops.aten.view(arg0_1, [-1, 15]); arg0_1 = None\0A _param_constant0 = self._param_constant0\0A embedding = torch.ops.aten.embedding(_param_constant0, view); _param_constant0 = view = None\0A ones = torch.ops.aten.ones([1, 15], device = device(type='cpu'), pin_memory = False)\0A slice_3 = torch.ops.aten.slice(ones, 0, 0, 9223372036854775807); ones = None\0A unsqueeze = torch.ops.aten.unsqueeze(slice_3, 1); slice_3 = None\0A unsqueeze_1 = torch.ops.aten.unsqueeze(unsqueeze, 2); unsqueeze = None\0A slice_4 = torch.ops.aten.slice(unsqueeze_1, 3, 0, 9223372036854775807); unsqueeze_1 = None\0A rsub = torch.ops.aten.rsub(slice_4, 1.0); slice_4 = None\0A mul = torch.ops.aten.mul(rsub, -3.4028234663852886e+38); rsub = None\0A pow_1 = torch.ops.aten.pow(embedding, 2)\0A mean = torch.ops.aten.mean(pow_1, [-1], True); pow_1 = None\0A add = torch.ops.aten.add(mean, 1e-06); mean = None\0A rsqrt = torch.ops.aten.rsqrt(add); add = None\0A detach = torch.ops.aten.detach(rsqrt)\0A mul_1 = torch.ops.aten.mul(embedding, rsqrt); rsqrt = None\0A _param_constant1 = self._param_constant1\0A mul_2 = torch.ops.aten.mul(_param_constant1, mul_1); _param_constant1 = mul_1 = None\0A _param_constant2 = self._param_constant2\0A t = torch.ops.aten.t(_param_constant2); _param_constant2 = None\0A view_1 = torch.ops.aten.view(mul_2, [15, 512])\0A mm = torch.ops.aten.mm(view_1, t); view_1 = t = None\0A _unsafe_view = torch.ops.aten._unsafe_view(mm, [1, 15, 512]); mm = None\0A view_2 = torch.ops.aten.view(_unsafe_view, [1, -1, 8, 64]); _unsafe_view = None\0A transpose = torch.ops.aten.transpose(view_2, 1, 2); view_2 = None\0A _param_constant3 = self._param_constant3\0A t_1 = torch.ops.aten.t(_param_constant3); _param_constant3 = None\0A view_3 = torch.ops.aten.view(mul_2, [15, 512])\0A mm_1 = torch.ops.aten.mm(view_3, t_1); view_3 = t_1 = None\0A _unsafe_view_1 = torch.ops.aten._unsafe_view(mm_1, [1, 15, 512]); mm_1 = None\0A view_4 = torch.ops.aten.view(_unsafe_view_1, [1, -1, 8, 64]); _unsafe_view_1 = None\0A transpose_1 = torch.ops.aten.transpose(view_4, 1, 2); view_4 = None\0A _param_constant4 = self._param_constant4\0A t_2 = torch.ops.aten.t(_param_constant4); _param_constant4 = None\0A view_5 = torch.ops.aten.view(mul_2, [15, 512]); mul_2 = None\0A mm_2 = torch.ops.aten.mm(view_5, t_2); view_5 = t_2 = None\0A _unsafe_view_2 = torch.ops.aten._unsafe_view(mm_2, [1, 15, 512]); mm_2 = None\0A view_6 = torch.ops.aten.view(_unsafe_view_2, [1, -1, 8, 64]); _unsafe_view_2 = None\0A transpose_2 = torch.ops.aten.transpose(view_6, 1, 2); view_6 = None\0A transpose_3 = torch.ops.aten.transpose(transpose_1, 3, 2); transpose_1 = None\0A expand = torch.ops.aten.expand(transpose, [1, 8, 15, 64]); transpose = None\0A view_7 = torch.ops.aten.view(expand, [8, 15, 64]); expand = None\0A expand_1 = torch.ops.aten.expand(transpose_3, [1, 8, 64, 15]); transpose_3 = None\0A view_8 = torch.ops.aten.view(expand_1, [8, 64, 15]); expand_1 = None\0A bmm = torch.ops.aten.bmm(view_7, view_8); view_7 = view_8 = None\0A _unsafe_view_3 = torch.ops.aten._unsafe_view(bmm, [1, 8, 15, 15]); bmm = None\0A arange = torch.ops.aten.arange(15, dtype = torch.int64, device = device(type='cpu'), pin_memory = False)\0A slice_5 = torch.ops.aten.slice(arange, 0, 0, 9223372036854775807); arange = None\0A unsqueeze_2 = torch.ops.aten.unsqueeze(slice_5, 1); slice_5 = None\0A arange_1 = torch.ops.aten.arange(15, dtype = torch.int64, device = device(type='cpu'), pin_memory = False)\0A unsqueeze_3 = torch.ops.aten.unsqueeze(arange_1, 0); arange_1 = None\0A slice_6 = torch.ops.aten.slice(unsqueeze_3, 1, 0, 9223372036854775807); unsqueeze_3 = None\0A sub = torch.ops.aten.sub(slice_6, unsqueeze_2); slice_6 = unsqueeze_2 = None\0A gt = torch.ops.aten.gt(sub, 0)\0A convert_element_type = torch.ops.prims.convert_element_type(gt, torch.int64); gt = None\0A mul_3 = torch.ops.aten.mul(convert_element_type, 16); convert_element_type = None\0A add_1 = torch.ops.aten.add(mul_3, 0); mul_3 = None\0A abs_1 = torch.ops.aten.abs(sub); sub = None\0A lt = torch.ops.aten.lt(abs_1, 8)\0A convert_element_type_1 = torch.ops.prims.convert_element_type(abs_1, torch.float32)\0A div = torch.ops.aten.div(convert_element_type_1, 8); convert_element_type_1 = None\0A log = torch.ops.aten.log(div); div = None\0A div_1 = torch.ops.aten.div(log, 2.772588722239781); log = None\0A mul_4 = torch.ops.aten.mul(div_1, 8); div_1 = None\0A convert_element_type_2 = torch.ops.prims.convert_element_type(mul_4, torch.int64); mul_4 = None\0A add_2 = torch.ops.aten.add(convert_element_type_2, 8); convert_element_type_2 = None\0A full_like = torch.ops.aten.full_like(add_2, 15, dtype = torch.int64, layout = torch.strided, device = device(type='cpu'), pin_memory = False)\0A minimum = torch.ops.aten.minimum(add_2, full_like); add_2 = full_like = None\0A where = torch.ops.aten.where(lt, abs_1, minimum); lt = abs_1 = minimum = None\0A add_ = torch.ops.aten.add_(add_1, where); add_1 = where = None\0A _param_constant5 = self._param_constant5\0A embedding_1 = torch.ops.aten.embedding(_param_constant5, add_); _param_constant5 = add_ = None\0A permute = torch.ops.aten.permute(embedding_1, [2, 0, 1]); embedding_1 = None\0A unsqueeze_4 = torch.ops.aten.unsqueeze(permute, 0); permute = None\0A add_3 = torch.ops.aten.add(unsqueeze_4, mul); unsqueeze_4 = mul = None\0A add__1 = torch.ops.aten.add_(_unsafe_view_3, add_3); _unsafe_view_3 = None\0A amax = torch.ops.aten.amax(add__1, [-1], True)\0A sub_1 = torch.ops.aten.sub(add__1, amax); add__1 = amax = None\0A exp = torch.ops.aten.exp(sub_1); sub_1 = None\0A sum_1 = torch.ops.aten.sum(exp, [-1], True)\0A div_2 = torch.ops.aten.div(exp, sum_1); exp = sum_1 = None\0A detach_1 = torch.ops.aten.detach(div_2)\0A expand_2 = torch.ops.aten.expand(div_2, [1, 8, 15, 15]); div_2 = None\0A view_9 = torch.ops.aten.view(expand_2, [8, 15, 15]); expand_2 = None\0A expand_3 = torch.ops.aten.expand(transpose_2, [1, 8, 15, 64]); transpose_2 = None\0A view_10 = torch.ops.aten.view(expand_3, [8, 15, 64]); expand_3 = None\0A bmm_1 = torch.ops.aten.bmm(view_9, view_10); view_9 = view_10 = None\0A _unsafe_view_4 = torch.ops.aten._unsafe_view(bmm_1, [1, 8, 15, 64]); bmm_1 = None\0A transpose_4 = torch.ops.aten.transpose(_unsafe_view_4, 1, 2); _unsafe_view_4 = None\0A clone_1 = torch.ops.aten.clone(transpose_4, memory_format = torch.contiguous_format); transpose_4 = None\0A view_11 = torch.ops.aten.view(clone_1, [1, -1, 512]); clone_1 = None\0A _param_constant6 = self._param_constant6\0A t_3 = torch.ops.aten.t(_param_constant6); _param_constant6 = None\0A view_12 = torch.ops.aten.view(view_11, [15, 512]); view_11 = None\0A mm_3 = torch.ops.aten.mm(view_12, t_3); view_12 = t_3 = None\0A _unsafe_view_5 = torch.ops.aten._unsafe_view(mm_3, [1, 15, 512]); mm_3 = None\0A add_4 = torch.ops.aten.add(embedding, _unsafe_view_5); embedding = _unsafe_view_5 = None\0A pow_2 = torch.ops.aten.pow(add_4, 2)\0A mean_1 = torch.ops.aten.mean(pow_2, [-1], True); pow_2 = None\0A add_5 = torch.ops.aten.add(mean_1, 1e-06); mean_1 = None\0A rsqrt_1 = torch.ops.aten.rsqrt(add_5); add_5 = None\0A detach_2 = torch.ops.aten.detach(rsqrt_1)\0A mul_5 = torch.ops.aten.mul(add_4, rsqrt_1); rsqrt_1 = None\0A _param_constant7 = self._param_constant7\0A mul_6 = torch.ops.aten.mul(_param_constant7, mul_5); _param_constant7 = mul_5 = None\0A _param_constant8 = self._param_constant8\0A t_4 = torch.ops.aten.t(_param_constant8); _param_constant8 = None\0A view_13 = torch.ops.aten.view(mul_6, [15, 512]); mul_6 = None\0A mm_4 = torch.ops.aten.mm(view_13, t_4); view_13 = t_4 = None\0A _unsafe_view_6 = torch.ops.aten._unsafe_view(mm_4, [1, 15, 2048]); mm_4 = None\0A relu = torch.ops.aten.relu(_unsafe_view_6); _unsafe_view_6 = None\0A detach_3 = torch.ops.aten.detach(relu)\0A _param_constant9 = self._param_constant9\0A t_5 = torch.ops.aten.t(_param_constant9); _param_constant9 = None\0A view_14 = torch.ops.aten.view(relu, [15, 2048]); relu = None\0A mm_5 = torch.ops.aten.mm(view_14, t_5); view_14 = t_5 = None\0A _unsafe_view_7 = torch.ops.aten._unsafe_view(mm_5, [1, 15, 512]); mm_5 = None\0A add_6 = torch.ops.aten.add(add_4, _unsafe_view_7); add_4 = _unsafe_view_7 = None\0A pow_3 = torch.ops.aten.pow(add_6, 2)\0A mean_2 = torch.ops.aten.mean(pow_3, [-1], True); pow_3 = None\0A add_7 = torch.ops.aten.add(mean_2, 1e-06); mean_2 = None\0A rsqrt_2 = torch.ops.aten.rsqrt(add_7); add_7 = None\0A detach_4 = torch.ops.aten.detach(rsqrt_2)\0A mul_7 = torch.ops.aten.mul(add_6, rsqrt_2); rsqrt_2 = None\0A _param_constant10 = self._param_constant10\0A mul_8 = torch.ops.aten.mul(_param_constant10, mul_7); _param_constant10 = mul_7 = None\0A _param_constant11 = self._param_constant11\0A t_6 = torch.ops.aten.t(_param_constant11); _param_constant11 = None\0A view_15 = torch.ops.aten.view(mul_8, [15, 512])\0A mm_6 = torch.ops.aten.mm(view_15, t_6); view_15 = t_6 = None\0A _unsafe_view_8 = torch.ops.aten._unsafe_view(mm_6, [1, 15, 512]); mm_6 = None\0A view_16 = torch.ops.aten.view(_unsafe_view_8, [1, -1, 8, 64]); _unsafe_view_8 = None\0A transpose_5 = torch.ops.aten.transpose(view_16, 1, 2); view_16 = None\0A _param_constant12 = self._param_constant12\0A t_7 = torch.ops.aten.t(_param_constant12); _param_constant12 = None\0A view_17 = torch.ops.aten.view(mul_8, [15, 512])\0A mm_7 = torch.ops.aten.mm(view_17, t_7); view_17 = t_7 = None\0A _unsafe_view_9 = torch.ops.aten._unsafe_view(mm_7, [1, 15, 512]); mm_7 = None\0A view_18 = torch.ops.aten.view(_unsafe_view_9, [1, -1, 8, 64]); _unsafe_view_9 = None\0A transpose_6 = torch.ops.aten.transpose(view_18, 1, 2); view_18 = None\0A _param_constant13 = self._param_constant13\0A t_8 = torch.ops.aten.t(_param_constant13); _param_constant13 = None\0A view_19 = torch.ops.aten.view(mul_8, [15, 512]); mul_8 = None\0A mm_8 = torch.ops.aten.mm(view_19, t_8); view_19 = t_8 = None\0A _unsafe_view_10 = torch.ops.aten._unsafe_view(mm_8, [1, 15, 512]); mm_8 = None\0A view_20 = torch.ops.aten.view(_unsafe_view_10, [1, -1, 8, 64]); _unsafe_view_10 = None\0A transpose_7 = torch.ops.aten.transpose(view_20, 1, 2); view_20 = None\0A transpose_8 = torch.ops.aten.transpose(transpose_6, 3, 2); transpose_6 = None\0A expand_4 = torch.ops.aten.expand(transpose_5, [1, 8, 15, 64]); transpose_5 = None\0A view_21 = torch.ops.aten.view(expand_4, [8, 15, 64]); expand_4 = None\0A expand_5 = torch.ops.aten.expand(transpose_8, [1, 8, 64, 15]); transpose_8 = None\0A view_22 = torch.ops.aten.view(expand_5, [8, 64, 15]); expand_5 = None\0A bmm_2 = torch.ops.aten.bmm(view_21, view_22); view_21 = view_22 = None\0A _unsafe_view_11 = torch.ops.aten._unsafe_view(bmm_2, [1, 8, 15, 15]); bmm_2 = None\0A add__2 = torch.ops.aten.add_(_unsafe_view_11, add_3); _unsafe_view_11 = None\0A amax_1 = torch.ops.aten.amax(add__2, [-1], True)\0A sub_2 = torch.ops.aten.sub(add__2, amax_1); add__2 = amax_1 = None\0A exp_1 = torch.ops.aten.exp(sub_2); sub_2 = None\0A sum_2 = torch.ops.aten.sum(exp_1, [-1], True)\0A div_3 = torch.ops.aten.div(exp_1, sum_2); exp_1 = sum_2 = None\0A detach_5 = torch.ops.aten.detach(div_3)\0A expand_6 = torch.ops.aten.expand(div_3, [1, 8, 15, 15]); div_3 = None\0A view_23 = torch.ops.aten.view(expand_6, [8, 15, 15]); expand_6 = None\0A expand_7 = torch.ops.aten.expand(transpose_7, [1, 8, 15, 64]); transpose_7 = None\0A view_24 = torch.ops.aten.view(expand_7, [8, 15, 64]); expand_7 = None\0A bmm_3 = torch.ops.aten.bmm(view_23, view_24); view_23 = view_24 = None\0A _unsafe_view_12 = torch.ops.aten._unsafe_view(bmm_3, [1, 8, 15, 64]); bmm_3 = None\0A transpose_9 = torch.ops.aten.transpose(_unsafe_view_12, 1, 2); _unsafe_view_12 = None\0A clone_2 = torch.ops.aten.clone(transpose_9, memory_format = torch.contiguous_format); transpose_9 = None\0A view_25 = torch.ops.aten.view(clone_2, [1, -1, 512]); clone_2 = None\0A _param_constant14 = self._param_constant14\0A t_9 = torch.ops.aten.t(_param_constant14); _param_constant14 = None\0A view_26 = torch.ops.aten.view(view_25, [15, 512]); view_25 = None\0A mm_9 = torch.ops.aten.mm(view_26, t_9); view_26 = t_9 = None\0A _unsafe_view_13 = torch.ops.aten._unsafe_view(mm_9, [1, 15, 512]); mm_9 = None\0A add_8 = torch.ops.aten.add(add_6, _unsafe_view_13); add_6 = _unsafe_view_13 = None\0A pow_4 = torch.ops.aten.pow(add_8, 2)\0A mean_3 = torch.ops.aten.mean(pow_4, [-1], True); pow_4 = None\0A add_9 = torch.ops.aten.add(mean_3, 1e-06); mean_3 = None\0A rsqrt_3 = torch.ops.aten.rsqrt(add_9); add_9 = None\0A detach_6 = torch.ops.aten.detach(rsqrt_3)\0A mul_9 = torch.ops.aten.mul(add_8, rsqrt_3); rsqrt_3 = None\0A _param_constant15 = self._param_constant15\0A mul_10 = torch.ops.aten.mul(_param_constant15, mul_9); _param_constant15 = mul_9 = None\0A _param_constant16 = self._param_constant16\0A t_10 = torch.ops.aten.t(_param_constant16); _param_constant16 = None\0A view_27 = torch.ops.aten.view(mul_10, [15, 512]); mul_10 = None\0A mm_10 = torch.ops.aten.mm(view_27, t_10); view_27 = t_10 = None\0A _unsafe_view_14 = torch.ops.aten._unsafe_view(mm_10, [1, 15, 2048]); mm_10 = None\0A relu_1 = torch.ops.aten.relu(_unsafe_view_14); _unsafe_view_14 = None\0A detach_7 = torch.ops.aten.detach(relu_1)\0A _param_constant17 = self._param_constant17\0A t_11 = torch.ops.aten.t(_param_constant17); _param_constant17 = None\0A view_28 = torch.ops.aten.view(relu_1, [15, 2048]); relu_1 = None\0A mm_11 = torch.ops.aten.mm(view_28, t_11); view_28 = t_11 = None\0A _unsafe_view_15 = torch.ops.aten._unsafe_view(mm_11, [1, 15, 512]); mm_11 = None\0A add_10 = torch.ops.aten.add(add_8, _unsafe_view_15); add_8 = _unsafe_view_15 = None\0A pow_5 = torch.ops.aten.pow(add_10, 2)\0A mean_4 = torch.ops.aten.mean(pow_5, [-1], True); pow_5 = None\0A add_11 = torch.ops.aten.add(mean_4, 1e-06); mean_4 = None\0A rsqrt_4 = torch.ops.aten.rsqrt(add_11); add_11 = None\0A detach_8 = torch.ops.aten.detach(rsqrt_4)\0A mul_11 = torch.ops.aten.mul(add_10, rsqrt_4); rsqrt_4 = None\0A _param_constant18 = self._param_constant18\0A mul_12 = torch.ops.aten.mul(_param_constant18, mul_11); _param_constant18 = mul_11 = None\0A _param_constant19 = self._param_constant19\0A t_12 = torch.ops.aten.t(_param_constant19); _param_constant19 = None\0A view_29 = torch.ops.aten.view(mul_12, [15, 512])\0A mm_12 = torch.ops.aten.mm(view_29, t_12); view_29 = t_12 = None\0A _unsafe_view_16 = torch.ops.aten._unsafe_view(mm_12, [1, 15, 512]); mm_12 = None\0A view_30 = torch.ops.aten.view(_unsafe_view_16, [1, -1, 8, 64]); _unsafe_view_16 = None\0A transpose_10 = torch.ops.aten.transpose(view_30, 1, 2); view_30 = None\0A _param_constant20 = self._param_constant20\0A t_13 = torch.ops.aten.t(_param_constant20); _param_constant20 = None\0A view_31 = torch.ops.aten.view(mul_12, [15, 512])\0A mm_13 = torch.ops.aten.mm(view_31, t_13); view_31 = t_13 = None\0A _unsafe_view_17 = torch.ops.aten._unsafe_view(mm_13, [1, 15, 512]); mm_13 = None\0A view_32 = torch.ops.aten.view(_unsafe_view_17, [1, -1, 8, 64]); _unsafe_view_17 = None\0A transpose_11 = torch.ops.aten.transpose(view_32, 1, 2); view_32 = None\0A _param_constant21 = self._param_constant21\0A t_14 = torch.ops.aten.t(_param_constant21); _param_constant21 = None\0A view_33 = torch.ops.aten.view(mul_12, [15, 512]); mul_12 = None\0A mm_14 = torch.ops.aten.mm(view_33, t_14); view_33 = t_14 = None\0A _unsafe_view_18 = torch.ops.aten._unsafe_view(mm_14, [1, 15, 512]); mm_14 = None\0A view_34 = torch.ops.aten.view(_unsafe_view_18, [1, -1, 8, 64]); _unsafe_view_18 = None\0A transpose_12 = torch.ops.aten.transpose(view_34, 1, 2); view_34 = None\0A transpose_13 = torch.ops.aten.transpose(transpose_11, 3, 2); transpose_11 = None\0A expand_8 = torch.ops.aten.expand(transpose_10, [1, 8, 15, 64]); transpose_10 = None\0A view_35 = torch.ops.aten.view(expand_8, [8, 15, 64]); expand_8 = None\0A expand_9 = torch.ops.aten.expand(transpose_13, [1, 8, 64, 15]); transpose_13 = None\0A view_36 = torch.ops.aten.view(expand_9, [8, 64, 15]); expand_9 = None\0A bmm_4 = torch.ops.aten.bmm(view_35, view_36); view_35 = view_36 = None\0A _unsafe_view_19 = torch.ops.aten._unsafe_view(bmm_4, [1, 8, 15, 15]); bmm_4 = None\0A add__3 = torch.ops.aten.add_(_unsafe_view_19, add_3); _unsafe_view_19 = None\0A amax_2 = torch.ops.aten.amax(add__3, [-1], True)\0A sub_3 = torch.ops.aten.sub(add__3, amax_2); add__3 = amax_2 = None\0A exp_2 = torch.ops.aten.exp(sub_3); sub_3 = None\0A sum_3 = torch.ops.aten.sum(exp_2, [-1], True)\0A div_4 = torch.ops.aten.div(exp_2, sum_3); exp_2 = sum_3 = None\0A detach_9 = torch.ops.aten.detach(div_4)\0A expand_10 = torch.ops.aten.expand(div_4, [1, 8, 15, 15]); div_4 = None\0A view_37 = torch.ops.aten.view(expand_10, [8, 15, 15]); expand_10 = None\0A expand_11 = torch.ops.aten.expand(transpose_12, [1, 8, 15, 64]); transpose_12 = None\0A view_38 = torch.ops.aten.view(expand_11, [8, 15, 64]); expand_11 = None\0A bmm_5 = torch.ops.aten.bmm(view_37, view_38); view_37 = view_38 = None\0A _unsafe_view_20 = torch.ops.aten._unsafe_view(bmm_5, [1, 8, 15, 64]); bmm_5 = None\0A transpose_14 = torch.ops.aten.transpose(_unsafe_view_20, 1, 2); _unsafe_view_20 = None\0A clone_3 = torch.ops.aten.clone(transpose_14, memory_format = torch.contiguous_format); transpose_14 = None\0A view_39 = torch.ops.aten.view(clone_3, [1, -1, 512]); clone_3 = None\0A _param_constant22 = self._param_constant22\0A t_15 = torch.ops.aten.t(_param_constant22); _param_constant22 = None\0A view_40 = torch.ops.aten.view(view_39, [15, 512]); view_39 = None\0A mm_15 = torch.ops.aten.mm(view_40, t_15); view_40 = t_15 = None\0A _unsafe_view_21 = torch.ops.aten._unsafe_view(mm_15, [1, 15, 512]); mm_15 = None\0A add_12 = torch.ops.aten.add(add_10, _unsafe_view_21); add_10 = _unsafe_view_21 = None\0A pow_6 = torch.ops.aten.pow(add_12, 2)\0A mean_5 = torch.ops.aten.mean(pow_6, [-1], True); pow_6 = None\0A add_13 = torch.ops.aten.add(mean_5, 1e-06); mean_5 = None\0A rsqrt_5 = torch.ops.aten.rsqrt(add_13); add_13 = None\0A detach_10 = torch.ops.aten.detach(rsqrt_5)\0A mul_13 = torch.ops.aten.mul(add_12, rsqrt_5); rsqrt_5 = None\0A _param_constant23 = self._param_constant23\0A mul_14 = torch.ops.aten.mul(_param_constant23, mul_13); _param_constant23 = mul_13 = None\0A _param_constant24 = self._param_constant24\0A t_16 = torch.ops.aten.t(_param_constant24); _param_constant24 = None\0A view_41 = torch.ops.aten.view(mul_14, [15, 512]); mul_14 = None\0A mm_16 = torch.ops.aten.mm(view_41, t_16); view_41 = t_16 = None\0A _unsafe_view_22 = torch.ops.aten._unsafe_view(mm_16, [1, 15, 2048]); mm_16 = None\0A relu_2 = torch.ops.aten.relu(_unsafe_view_22); _unsafe_view_22 = None\0A detach_11 = torch.ops.aten.detach(relu_2)\0A _param_constant25 = self._param_constant25\0A t_17 = torch.ops.aten.t(_param_constant25); _param_constant25 = None\0A view_42 = torch.ops.aten.view(relu_2, [15, 2048]); relu_2 = None\0A mm_17 = torch.ops.aten.mm(view_42, t_17); view_42 = t_17 = None\0A _unsafe_view_23 = torch.ops.aten._unsafe_view(mm_17, [1, 15, 512]); mm_17 = None\0A add_14 = torch.ops.aten.add(add_12, _unsafe_view_23); add_12 = _unsafe_view_23 = None\0A pow_7 = torch.ops.aten.pow(add_14, 2)\0A mean_6 = torch.ops.aten.mean(pow_7, [-1], True); pow_7 = None\0A add_15 = torch.ops.aten.add(mean_6, 1e-06); mean_6 = None\0A rsqrt_6 = torch.ops.aten.rsqrt(add_15); add_15 = None\0A detach_12 = torch.ops.aten.detach(rsqrt_6)\0A mul_15 = torch.ops.aten.mul(add_14, rsqrt_6); rsqrt_6 = None\0A _param_constant26 = self._param_constant26\0A mul_16 = torch.ops.aten.mul(_param_constant26, mul_15); _param_constant26 = mul_15 = None\0A _param_constant27 = self._param_constant27\0A t_18 = torch.ops.aten.t(_param_constant27); _param_constant27 = None\0A view_43 = torch.ops.aten.view(mul_16, [15, 512])\0A mm_18 = torch.ops.aten.mm(view_43, t_18); view_43 = t_18 = None\0A _unsafe_view_24 = torch.ops.aten._unsafe_view(mm_18, [1, 15, 512]); mm_18 = None\0A view_44 = torch.ops.aten.view(_unsafe_view_24, [1, -1, 8, 64]); _unsafe_view_24 = None\0A transpose_15 = torch.ops.aten.transpose(view_44, 1, 2); view_44 = None\0A _param_constant28 = self._param_constant28\0A t_19 = torch.ops.aten.t(_param_constant28); _param_constant28 = None\0A view_45 = torch.ops.aten.view(mul_16, [15, 512])\0A mm_19 = torch.ops.aten.mm(view_45, t_19); view_45 = t_19 = None\0A _unsafe_view_25 = torch.ops.aten._unsafe_view(mm_19, [1, 15, 512]); mm_19 = None\0A view_46 = torch.ops.aten.view(_unsafe_view_25, [1, -1, 8, 64]); _unsafe_view_25 = None\0A transpose_16 = torch.ops.aten.transpose(view_46, 1, 2); view_46 = None\0A _param_constant29 = self._param_constant29\0A t_20 = torch.ops.aten.t(_param_constant29); _param_constant29 = None\0A view_47 = torch.ops.aten.view(mul_16, [15, 512]); mul_16 = None\0A mm_20 = torch.ops.aten.mm(view_47, t_20); view_47 = t_20 = None\0A _unsafe_view_26 = torch.ops.aten._unsafe_view(mm_20, [1, 15, 512]); mm_20 = None\0A view_48 = torch.ops.aten.view(_unsafe_view_26, [1, -1, 8, 64]); _unsafe_view_26 = None\0A transpose_17 = torch.ops.aten.transpose(view_48, 1, 2); view_48 = None\0A transpose_18 = torch.ops.aten.transpose(transpose_16, 3, 2); transpose_16 = None\0A expand_12 = torch.ops.aten.expand(transpose_15, [1, 8, 15, 64]); transpose_15 = None\0A view_49 = torch.ops.aten.view(expand_12, [8, 15, 64]); expand_12 = None\0A expand_13 = torch.ops.aten.expand(transpose_18, [1, 8, 64, 15]); transpose_18 = None\0A view_50 = torch.ops.aten.view(expand_13, [8, 64, 15]); expand_13 = None\0A bmm_6 = torch.ops.aten.bmm(view_49, view_50); view_49 = view_50 = None\0A _unsafe_view_27 = torch.ops.aten._unsafe_view(bmm_6, [1, 8, 15, 15]); bmm_6 = None\0A add__4 = torch.ops.aten.add_(_unsafe_view_27, add_3); _unsafe_view_27 = None\0A amax_3 = torch.ops.aten.amax(add__4, [-1], True)\0A sub_4 = torch.ops.aten.sub(add__4, amax_3); add__4 = amax_3 = None\0A exp_3 = torch.ops.aten.exp(sub_4); sub_4 = None\0A sum_4 = torch.ops.aten.sum(exp_3, [-1], True)\0A div_5 = torch.ops.aten.div(exp_3, sum_4); exp_3 = sum_4 = None\0A detach_13 = torch.ops.aten.detach(div_5)\0A expand_14 = torch.ops.aten.expand(div_5, [1, 8, 15, 15]); div_5 = None\0A view_51 = torch.ops.aten.view(expand_14, [8, 15, 15]); expand_14 = None\0A expand_15 = torch.ops.aten.expand(transpose_17, [1, 8, 15, 64]); transpose_17 = None\0A view_52 = torch.ops.aten.view(expand_15, [8, 15, 64]); expand_15 = None\0A bmm_7 = torch.ops.aten.bmm(view_51, view_52); view_51 = view_52 = None\0A _unsafe_view_28 = torch.ops.aten._unsafe_view(bmm_7, [1, 8, 15, 64]); bmm_7 = None\0A transpose_19 = torch.ops.aten.transpose(_unsafe_view_28, 1, 2); _unsafe_view_28 = None\0A clone_4 = torch.ops.aten.clone(transpose_19, memory_format = torch.contiguous_format); transpose_19 = None\0A view_53 = torch.ops.aten.view(clone_4, [1, -1, 512]); clone_4 = None\0A _param_constant30 = self._param_constant30\0A t_21 = torch.ops.aten.t(_param_constant30); _param_constant30 = None\0A view_54 = torch.ops.aten.view(view_53, [15, 512]); view_53 = None\0A mm_21 = torch.ops.aten.mm(view_54, t_21); view_54 = t_21 = None\0A _unsafe_view_29 = torch.ops.aten._unsafe_view(mm_21, [1, 15, 512]); mm_21 = None\0A add_16 = torch.ops.aten.add(add_14, _unsafe_view_29); add_14 = _unsafe_view_29 = None\0A pow_8 = torch.ops.aten.pow(add_16, 2)\0A mean_7 = torch.ops.aten.mean(pow_8, [-1], True); pow_8 = None\0A add_17 = torch.ops.aten.add(mean_7, 1e-06); mean_7 = None\0A rsqrt_7 = torch.ops.aten.rsqrt(add_17); add_17 = None\0A detach_14 = torch.ops.aten.detach(rsqrt_7)\0A mul_17 = torch.ops.aten.mul(add_16, rsqrt_7); rsqrt_7 = None\0A _param_constant31 = self._param_constant31\0A mul_18 = torch.ops.aten.mul(_param_constant31, mul_17); _param_constant31 = mul_17 = None\0A _param_constant32 = self._param_constant32\0A t_22 = torch.ops.aten.t(_param_constant32); _param_constant32 = None\0A view_55 = torch.ops.aten.view(mul_18, [15, 512]); mul_18 = None\0A mm_22 = torch.ops.aten.mm(view_55, t_22); view_55 = t_22 = None\0A _unsafe_view_30 = torch.ops.aten._unsafe_view(mm_22, [1, 15, 2048]); mm_22 = None\0A relu_3 = torch.ops.aten.relu(_unsafe_view_30); _unsafe_view_30 = None\0A detach_15 = torch.ops.aten.detach(relu_3)\0A _param_constant33 = self._param_constant33\0A t_23 = torch.ops.aten.t(_param_constant33); _param_constant33 = None\0A view_56 = torch.ops.aten.view(relu_3, [15, 2048]); relu_3 = None\0A mm_23 = torch.ops.aten.mm(view_56, t_23); view_56 = t_23 = None\0A _unsafe_view_31 = torch.ops.aten._unsafe_view(mm_23, [1, 15, 512]); mm_23 = None\0A add_18 = torch.ops.aten.add(add_16, _unsafe_view_31); add_16 = _unsafe_view_31 = None\0A pow_9 = torch.ops.aten.pow(add_18, 2)\0A mean_8 = torch.ops.aten.mean(pow_9, [-1], True); pow_9 = None\0A add_19 = torch.ops.aten.add(mean_8, 1e-06); mean_8 = None\0A rsqrt_8 = torch.ops.aten.rsqrt(add_19); add_19 = None\0A detach_16 = torch.ops.aten.detach(rsqrt_8)\0A mul_19 = torch.ops.aten.mul(add_18, rsqrt_8); rsqrt_8 = None\0A _param_constant34 = self._param_constant34\0A mul_20 = torch.ops.aten.mul(_param_constant34, mul_19); _param_constant34 = mul_19 = None\0A _param_constant35 = self._param_constant35\0A t_24 = torch.ops.aten.t(_param_constant35); _param_constant35 = None\0A view_57 = torch.ops.aten.view(mul_20, [15, 512])\0A mm_24 = torch.ops.aten.mm(view_57, t_24); view_57 = t_24 = None\0A _unsafe_view_32 = torch.ops.aten._unsafe_view(mm_24, [1, 15, 512]); mm_24 = None\0A view_58 = torch.ops.aten.view(_unsafe_view_32, [1, -1, 8, 64]); _unsafe_view_32 = None\0A transpose_20 = torch.ops.aten.transpose(view_58, 1, 2); view_58 = None\0A _param_constant36 = self._param_constant36\0A t_25 = torch.ops.aten.t(_param_constant36); _param_constant36 = None\0A view_59 = torch.ops.aten.view(mul_20, [15, 512])\0A mm_25 = torch.ops.aten.mm(view_59, t_25); view_59 = t_25 = None\0A _unsafe_view_33 = torch.ops.aten._unsafe_view(mm_25, [1, 15, 512]); mm_25 = None\0A view_60 = torch.ops.aten.view(_unsafe_view_33, [1, -1, 8, 64]); _unsafe_view_33 = None\0A transpose_21 = torch.ops.aten.transpose(view_60, 1, 2); view_60 = None\0A _param_constant37 = self._param_constant37\0A t_26 = torch.ops.aten.t(_param_constant37); _param_constant37 = None\0A view_61 = torch.ops.aten.view(mul_20, [15, 512]); mul_20 = None\0A mm_26 = torch.ops.aten.mm(view_61, t_26); view_61 = t_26 = None\0A _unsafe_view_34 = torch.ops.aten._unsafe_view(mm_26, [1, 15, 512]); mm_26 = None\0A view_62 = torch.ops.aten.view(_unsafe_view_34, [1, -1, 8, 64]); _unsafe_view_34 = None\0A transpose_22 = torch.ops.aten.transpose(view_62, 1, 2); view_62 = None\0A transpose_23 = torch.ops.aten.transpose(transpose_21, 3, 2); transpose_21 = None\0A expand_16 = torch.ops.aten.expand(transpose_20, [1, 8, 15, 64]); transpose_20 = None\0A view_63 = torch.ops.aten.view(expand_16, [8, 15, 64]); expand_16 = None\0A expand_17 = torch.ops.aten.expand(transpose_23, [1, 8, 64, 15]); transpose_23 = None\0A view_64 = torch.ops.aten.view(expand_17, [8, 64, 15]); expand_17 = None\0A bmm_8 = torch.ops.aten.bmm(view_63, view_64); view_63 = view_64 = None\0A _unsafe_view_35 = torch.ops.aten._unsafe_view(bmm_8, [1, 8, 15, 15]); bmm_8 = None\0A add__5 = torch.ops.aten.add_(_unsafe_view_35, add_3); _unsafe_view_35 = None\0A amax_4 = torch.ops.aten.amax(add__5, [-1], True)\0A sub_5 = torch.ops.aten.sub(add__5, amax_4); add__5 = amax_4 = None\0A exp_4 = torch.ops.aten.exp(sub_5); sub_5 = None\0A sum_5 = torch.ops.aten.sum(exp_4, [-1], True)\0A div_6 = torch.ops.aten.div(exp_4, sum_5); exp_4 = sum_5 = None\0A detach_17 = torch.ops.aten.detach(div_6)\0A expand_18 = torch.ops.aten.expand(div_6, [1, 8, 15, 15]); div_6 = None\0A view_65 = torch.ops.aten.view(expand_18, [8, 15, 15]); expand_18 = None\0A expand_19 = torch.ops.aten.expand(transpose_22, [1, 8, 15, 64]); transpose_22 = None\0A view_66 = torch.ops.aten.view(expand_19, [8, 15, 64]); expand_19 = None\0A bmm_9 = torch.ops.aten.bmm(view_65, view_66); view_65 = view_66 = None\0A _unsafe_view_36 = torch.ops.aten._unsafe_view(bmm_9, [1, 8, 15, 64]); bmm_9 = None\0A transpose_24 = torch.ops.aten.transpose(_unsafe_view_36, 1, 2); _unsafe_view_36 = None\0A clone_5 = torch.ops.aten.clone(transpose_24, memory_format = torch.contiguous_format); transpose_24 = None\0A view_67 = torch.ops.aten.view(clone_5, [1, -1, 512]); clone_5 = None\0A _param_constant38 = self._param_constant38\0A t_27 = torch.ops.aten.t(_param_constant38); _param_constant38 = None\0A view_68 = torch.ops.aten.view(view_67, [15, 512]); view_67 = None\0A mm_27 = torch.ops.aten.mm(view_68, t_27); view_68 = t_27 = None\0A _unsafe_view_37 = torch.ops.aten._unsafe_view(mm_27, [1, 15, 512]); mm_27 = None\0A add_20 = torch.ops.aten.add(add_18, _unsafe_view_37); add_18 = _unsafe_view_37 = None\0A pow_10 = torch.ops.aten.pow(add_20, 2)\0A mean_9 = torch.ops.aten.mean(pow_10, [-1], True); pow_10 = None\0A add_21 = torch.ops.aten.add(mean_9, 1e-06); mean_9 = None\0A rsqrt_9 = torch.ops.aten.rsqrt(add_21); add_21 = None\0A detach_18 = torch.ops.aten.detach(rsqrt_9)\0A mul_21 = torch.ops.aten.mul(add_20, rsqrt_9); rsqrt_9 = None\0A _param_constant39 = self._param_constant39\0A mul_22 = torch.ops.aten.mul(_param_constant39, mul_21); _param_constant39 = mul_21 = None\0A _param_constant40 = self._param_constant40\0A t_28 = torch.ops.aten.t(_param_constant40); _param_constant40 = None\0A view_69 = torch.ops.aten.view(mul_22, [15, 512]); mul_22 = None\0A mm_28 = torch.ops.aten.mm(view_69, t_28); view_69 = t_28 = None\0A _unsafe_view_38 = torch.ops.aten._unsafe_view(mm_28, [1, 15, 2048]); mm_28 = None\0A relu_4 = torch.ops.aten.relu(_unsafe_view_38); _unsafe_view_38 = None\0A detach_19 = torch.ops.aten.detach(relu_4)\0A _param_constant41 = self._param_constant41\0A t_29 = torch.ops.aten.t(_param_constant41); _param_constant41 = None\0A view_70 = torch.ops.aten.view(relu_4, [15, 2048]); relu_4 = None\0A mm_29 = torch.ops.aten.mm(view_70, t_29); view_70 = t_29 = None\0A _unsafe_view_39 = torch.ops.aten._unsafe_view(mm_29, [1, 15, 512]); mm_29 = None\0A add_22 = torch.ops.aten.add(add_20, _unsafe_view_39); add_20 = _unsafe_view_39 = None\0A pow_11 = torch.ops.aten.pow(add_22, 2)\0A mean_10 = torch.ops.aten.mean(pow_11, [-1], True); pow_11 = None\0A add_23 = torch.ops.aten.add(mean_10, 1e-06); mean_10 = None\0A rsqrt_10 = torch.ops.aten.rsqrt(add_23); add_23 = None\0A detach_20 = torch.ops.aten.detach(rsqrt_10)\0A mul_23 = torch.ops.aten.mul(add_22, rsqrt_10); rsqrt_10 = None\0A _param_constant42 = self._param_constant42\0A mul_24 = torch.ops.aten.mul(_param_constant42, mul_23); _param_constant42 = mul_23 = None\0A _param_constant43 = self._param_constant43\0A t_30 = torch.ops.aten.t(_param_constant43); _param_constant43 = None\0A view_71 = torch.ops.aten.view(mul_24, [15, 512])\0A mm_30 = torch.ops.aten.mm(view_71, t_30); view_71 = t_30 = None\0A _unsafe_view_40 = torch.ops.aten._unsafe_view(mm_30, [1, 15, 512]); mm_30 = None\0A view_72 = torch.ops.aten.view(_unsafe_view_40, [1, -1, 8, 64]); _unsafe_view_40 = None\0A transpose_25 = torch.ops.aten.transpose(view_72, 1, 2); view_72 = None\0A _param_constant44 = self._param_constant44\0A t_31 = torch.ops.aten.t(_param_constant44); _param_constant44 = None\0A view_73 = torch.ops.aten.view(mul_24, [15, 512])\0A mm_31 = torch.ops.aten.mm(view_73, t_31); view_73 = t_31 = None\0A _unsafe_view_41 = torch.ops.aten._unsafe_view(mm_31, [1, 15, 512]); mm_31 = None\0A view_74 = torch.ops.aten.view(_unsafe_view_41, [1, -1, 8, 64]); _unsafe_view_41 = None\0A transpose_26 = torch.ops.aten.transpose(view_74, 1, 2); view_74 = None\0A _param_constant45 = self._param_constant45\0A t_32 = torch.ops.aten.t(_param_constant45); _param_constant45 = None\0A view_75 = torch.ops.aten.view(mul_24, [15, 512]); mul_24 = None\0A mm_32 = torch.ops.aten.mm(view_75, t_32); view_75 = t_32 = None\0A _unsafe_view_42 = torch.ops.aten._unsafe_view(mm_32, [1, 15, 512]); mm_32 = None\0A view_76 = torch.ops.aten.view(_unsafe_view_42, [1, -1, 8, 64]); _unsafe_view_42 = None\0A transpose_27 = torch.ops.aten.transpose(view_76, 1, 2); view_76 = None\0A transpose_28 = torch.ops.aten.transpose(transpose_26, 3, 2); transpose_26 = None\0A expand_20 = torch.ops.aten.expand(transpose_25, [1, 8, 15, 64]); transpose_25 = None\0A view_77 = torch.ops.aten.view(expand_20, [8, 15, 64]); expand_20 = None\0A expand_21 = torch.ops.aten.expand(transpose_28, [1, 8, 64, 15]); transpose_28 = None\0A view_78 = torch.ops.aten.view(expand_21, [8, 64, 15]); expand_21 = None\0A bmm_10 = torch.ops.aten.bmm(view_77, view_78); view_77 = view_78 = None\0A _unsafe_view_43 = torch.ops.aten._unsafe_view(bmm_10, [1, 8, 15, 15]); bmm_10 = None\0A add__6 = torch.ops.aten.add_(_unsafe_view_43, add_3); _unsafe_view_43 = add_3 = None\0A amax_5 = torch.ops.aten.amax(add__6, [-1], True)\0A sub_6 = torch.ops.aten.sub(add__6, amax_5); add__6 = amax_5 = None\0A exp_5 = torch.ops.aten.exp(sub_6); sub_6 = None\0A sum_6 = torch.ops.aten.sum(exp_5, [-1], True)\0A div_7 = torch.ops.aten.div(exp_5, sum_6); exp_5 = sum_6 = None\0A detach_21 = torch.ops.aten.detach(div_7)\0A expand_22 = torch.ops.aten.expand(div_7, [1, 8, 15, 15]); div_7 = None\0A view_79 = torch.ops.aten.view(expand_22, [8, 15, 15]); expand_22 = None\0A expand_23 = torch.ops.aten.expand(transpose_27, [1, 8, 15, 64]); transpose_27 = None\0A view_80 = torch.ops.aten.view(expand_23, [8, 15, 64]); expand_23 = None\0A bmm_11 = torch.ops.aten.bmm(view_79, view_80); view_79 = view_80 = None\0A _unsafe_view_44 = torch.ops.aten._unsafe_view(bmm_11, [1, 8, 15, 64]); bmm_11 = None\0A transpose_29 = torch.ops.aten.transpose(_unsafe_view_44, 1, 2); _unsafe_view_44 = None\0A clone_6 = torch.ops.aten.clone(transpose_29, memory_format = torch.contiguous_format); transpose_29 = None\0A view_81 = torch.ops.aten.view(clone_6, [1, -1, 512]); clone_6 = None\0A _param_constant46 = self._param_constant46\0A t_33 = torch.ops.aten.t(_param_constant46); _param_constant46 = None\0A view_82 = torch.ops.aten.view(view_81, [15, 512]); view_81 = None\0A mm_33 = torch.ops.aten.mm(view_82, t_33); view_82 = t_33 = None\0A _unsafe_view_45 = torch.ops.aten._unsafe_view(mm_33, [1, 15, 512]); mm_33 = None\0A add_24 = torch.ops.aten.add(add_22, _unsafe_view_45); add_22 = _unsafe_view_45 = None\0A pow_12 = torch.ops.aten.pow(add_24, 2)\0A mean_11 = torch.ops.aten.mean(pow_12, [-1], True); pow_12 = None\0A add_25 = torch.ops.aten.add(mean_11, 1e-06); mean_11 = None\0A rsqrt_11 = torch.ops.aten.rsqrt(add_25); add_25 = None\0A detach_22 = torch.ops.aten.detach(rsqrt_11)\0A mul_25 = torch.ops.aten.mul(add_24, rsqrt_11); rsqrt_11 = None\0A _param_constant47 = self._param_constant47\0A mul_26 = torch.ops.aten.mul(_param_constant47, mul_25); _param_constant47 = mul_25 = None\0A _param_constant48 = self._param_constant48\0A t_34 = torch.ops.aten.t(_param_constant48); _param_constant48 = None\0A view_83 = torch.ops.aten.view(mul_26, [15, 512]); mul_26 = None\0A mm_34 = torch.ops.aten.mm(view_83, t_34); view_83 = t_34 = None\0A _unsafe_view_46 = torch.ops.aten._unsafe_view(mm_34, [1, 15, 2048]); mm_34 = None\0A relu_5 = torch.ops.aten.relu(_unsafe_view_46); _unsafe_view_46 = None\0A detach_23 = torch.ops.aten.detach(relu_5)\0A _param_constant49 = self._param_constant49\0A t_35 = torch.ops.aten.t(_param_constant49); _param_constant49 = None\0A view_84 = torch.ops.aten.view(relu_5, [15, 2048]); relu_5 = None\0A mm_35 = torch.ops.aten.mm(view_84, t_35); view_84 = t_35 = None\0A _unsafe_view_47 = torch.ops.aten._unsafe_view(mm_35, [1, 15, 512]); mm_35 = None\0A add_26 = torch.ops.aten.add(add_24, _unsafe_view_47); add_24 = _unsafe_view_47 = None\0A pow_13 = torch.ops.aten.pow(add_26, 2)\0A mean_12 = torch.ops.aten.mean(pow_13, [-1], True); pow_13 = None\0A add_27 = torch.ops.aten.add(mean_12, 1e-06); mean_12 = None\0A rsqrt_12 = torch.ops.aten.rsqrt(add_27); add_27 = None\0A detach_24 = torch.ops.aten.detach(rsqrt_12)\0A mul_27 = torch.ops.aten.mul(add_26, rsqrt_12); add_26 = rsqrt_12 = None\0A _param_constant50 = self._param_constant50\0A mul_28 = torch.ops.aten.mul(_param_constant50, mul_27); _param_constant50 = mul_27 = None\0A view_85 = torch.ops.aten.view(masked_fill_, [-1, 4]); masked_fill_ = None\0A _param_constant0_1 = self._param_constant0\0A embedding_2 = torch.ops.aten.embedding(_param_constant0_1, view_85); _param_constant0_1 = view_85 = None\0A ones_1 = torch.ops.aten.ones([1, 4], device = device(type='cpu'), pin_memory = False)\0A ones_2 = torch.ops.aten.ones([1, 15], dtype = torch.int64, device = device(type='cpu'), pin_memory = False)\0A arange_2 = torch.ops.aten.arange(4, device = device(type='cpu'), pin_memory = False)\0A unsqueeze_5 = torch.ops.aten.unsqueeze(arange_2, 0)\0A unsqueeze_6 = torch.ops.aten.unsqueeze(unsqueeze_5, 1); unsqueeze_5 = None\0A slice_7 = torch.ops.aten.slice(unsqueeze_6, 2, 0, 9223372036854775807); unsqueeze_6 = None\0A repeat = torch.ops.aten.repeat(slice_7, [1, 4, 1]); slice_7 = None\0A unsqueeze_7 = torch.ops.aten.unsqueeze(arange_2, 0); arange_2 = None\0A slice_8 = torch.ops.aten.slice(unsqueeze_7, 1, 0, 9223372036854775807); unsqueeze_7 = None\0A unsqueeze_8 = torch.ops.aten.unsqueeze(slice_8, 2); slice_8 = None\0A le = torch.ops.aten.le(repeat, unsqueeze_8); repeat = unsqueeze_8 = None\0A convert_element_type_3 = torch.ops.prims.convert_element_type(le, torch.float32); le = None\0A slice_9 = torch.ops.aten.slice(convert_element_type_3, 0, 0, 9223372036854775807); convert_element_type_3 = None\0A unsqueeze_9 = torch.ops.aten.unsqueeze(slice_9, 1); slice_9 = None\0A slice_10 = torch.ops.aten.slice(unsqueeze_9, 2, 0, 9223372036854775807); unsqueeze_9 = None\0A slice_11 = torch.ops.aten.slice(slice_10, 3, 0, 9223372036854775807); slice_10 = None\0A slice_12 = torch.ops.aten.slice(ones_1, 0, 0, 9223372036854775807); ones_1 = None\0A unsqueeze_10 = torch.ops.aten.unsqueeze(slice_12, 1); slice_12 = None\0A unsqueeze_11 = torch.ops.aten.unsqueeze(unsqueeze_10, 2); unsqueeze_10 = None\0A slice_13 = torch.ops.aten.slice(unsqueeze_11, 3, 0, 9223372036854775807); unsqueeze_11 = None\0A mul_29 = torch.ops.aten.mul(slice_11, slice_13); slice_11 = slice_13 = None\0A rsub_1 = torch.ops.aten.rsub(mul_29, 1.0); mul_29 = None\0A mul_30 = torch.ops.aten.mul(rsub_1, -3.4028234663852886e+38); rsub_1 = None\0A slice_14 = torch.ops.aten.slice(ones_2, 0, 0, 9223372036854775807); ones_2 = None\0A unsqueeze_12 = torch.ops.aten.unsqueeze(slice_14, 1); slice_14 = None\0A unsqueeze_13 = torch.ops.aten.unsqueeze(unsqueeze_12, 2); unsqueeze_12 = None\0A slice_15 = torch.ops.aten.slice(unsqueeze_13, 3, 0, 9223372036854775807); unsqueeze_13 = None\0A convert_element_type_4 = torch.ops.prims.convert_element_type(slice_15, torch.float32); slice_15 = None\0A rsub_2 = torch.ops.aten.rsub(convert_element_type_4, 1.0); convert_element_type_4 = None\0A mul_31 = torch.ops.aten.mul(rsub_2, -3.4028234663852886e+38); rsub_2 = None\0A pow_14 = torch.ops.aten.pow(embedding_2, 2)\0A mean_13 = torch.ops.aten.mean(pow_14, [-1], True); pow_14 = None\0A add_28 = torch.ops.aten.add(mean_13, 1e-06); mean_13 = None\0A rsqrt_13 = torch.ops.aten.rsqrt(add_28); add_28 = None\0A detach_25 = torch.ops.aten.detach(rsqrt_13)\0A mul_32 = torch.ops.aten.mul(embedding_2, rsqrt_13); rsqrt_13 = None\0A _param_constant51 = self._param_constant51\0A mul_33 = torch.ops.aten.mul(_param_constant51, mul_32); _param_constant51 = mul_32 = None\0A _param_constant52 = self._param_constant52\0A t_36 = torch.ops.aten.t(_param_constant52); _param_constant52 = None\0A view_86 = torch.ops.aten.view(mul_33, [4, 512])\0A mm_36 = torch.ops.aten.mm(view_86, t_36); view_86 = t_36 = None\0A _unsafe_view_48 = torch.ops.aten._unsafe_view(mm_36, [1, 4, 512]); mm_36 = None\0A view_87 = torch.ops.aten.view(_unsafe_view_48, [1, -1, 8, 64]); _unsafe_view_48 = None\0A transpose_30 = torch.ops.aten.transpose(view_87, 1, 2); view_87 = None\0A _param_constant53 = self._param_constant53\0A t_37 = torch.ops.aten.t(_param_constant53); _param_constant53 = None\0A view_88 = torch.ops.aten.view(mul_33, [4, 512])\0A mm_37 = torch.ops.aten.mm(view_88, t_37); view_88 = t_37 = None\0A _unsafe_view_49 = torch.ops.aten._unsafe_view(mm_37, [1, 4, 512]); mm_37 = None\0A view_89 = torch.ops.aten.view(_unsafe_view_49, [1, -1, 8, 64]); _unsafe_view_49 = None\0A transpose_31 = torch.ops.aten.transpose(view_89, 1, 2); view_89 = None\0A _param_constant54 = self._param_constant54\0A t_38 = torch.ops.aten.t(_param_constant54); _param_constant54 = None\0A view_90 = torch.ops.aten.view(mul_33, [4, 512]); mul_33 = None\0A mm_38 = torch.ops.aten.mm(view_90, t_38); view_90 = t_38 = None\0A _unsafe_view_50 = torch.ops.aten._unsafe_view(mm_38, [1, 4, 512]); mm_38 = None\0A view_91 = torch.ops.aten.view(_unsafe_view_50, [1, -1, 8, 64]); _unsafe_view_50 = None\0A transpose_32 = torch.ops.aten.transpose(view_91, 1, 2); view_91 = None\0A transpose_33 = torch.ops.aten.transpose(transpose_31, 3, 2); transpose_31 = None\0A expand_24 = torch.ops.aten.expand(transpose_30, [1, 8, 4, 64]); transpose_30 = None\0A view_92 = torch.ops.aten.view(expand_24, [8, 4, 64]); expand_24 = None\0A expand_25 = torch.ops.aten.expand(transpose_33, [1, 8, 64, 4]); transpose_33 = None\0A view_93 = torch.ops.aten.view(expand_25, [8, 64, 4]); expand_25 = None\0A bmm_12 = torch.ops.aten.bmm(view_92, view_93); view_92 = view_93 = None\0A _unsafe_view_51 = torch.ops.aten._unsafe_view(bmm_12, [1, 8, 4, 4]); bmm_12 = None\0A arange_3 = torch.ops.aten.arange(4, dtype = torch.int64, device = device(type='cpu'), pin_memory = False)\0A slice_16 = torch.ops.aten.slice(arange_3, 0, 0, 9223372036854775807); arange_3 = None\0A unsqueeze_14 = torch.ops.aten.unsqueeze(slice_16, 1); slice_16 = None\0A arange_4 = torch.ops.aten.arange(4, dtype = torch.int64, device = device(type='cpu'), pin_memory = False)\0A unsqueeze_15 = torch.ops.aten.unsqueeze(arange_4, 0); arange_4 = None\0A slice_17 = torch.ops.aten.slice(unsqueeze_15, 1, 0, 9223372036854775807); unsqueeze_15 = None\0A sub_7 = torch.ops.aten.sub(slice_17, unsqueeze_14); slice_17 = unsqueeze_14 = None\0A zeros_like = torch.ops.aten.zeros_like(sub_7, dtype = torch.int64, layout = torch.strided, device = device(type='cpu'), pin_memory = False)\0A minimum_1 = torch.ops.aten.minimum(sub_7, zeros_like); sub_7 = zeros_like = None\0A neg = torch.ops.aten.neg(minimum_1); minimum_1 = None\0A lt_1 = torch.ops.aten.lt(neg, 16)\0A convert_element_type_5 = torch.ops.prims.convert_element_type(neg, torch.float32)\0A div_8 = torch.ops.aten.div(convert_element_type_5, 16); convert_element_type_5 = None\0A log_1 = torch.ops.aten.log(div_8); div_8 = None\0A div_9 = torch.ops.aten.div(log_1, 2.0794415416798357); log_1 = None\0A mul_34 = torch.ops.aten.mul(div_9, 16); div_9 = None\0A convert_element_type_6 = torch.ops.prims.convert_element_type(mul_34, torch.int64); mul_34 = None\0A add_29 = torch.ops.aten.add(convert_element_type_6, 16); convert_element_type_6 = None\0A full_like_1 = torch.ops.aten.full_like(add_29, 31, dtype = torch.int64, layout = torch.strided, device = device(type='cpu'), pin_memory = False)\0A minimum_2 = torch.ops.aten.minimum(add_29, full_like_1); add_29 = full_like_1 = None\0A where_1 = torch.ops.aten.where(lt_1, neg, minimum_2); lt_1 = neg = minimum_2 = None\0A add_30 = torch.ops.aten.add(where_1, 0); where_1 = None\0A _param_constant55 = self._param_constant55\0A embedding_3 = torch.ops.aten.embedding(_param_constant55, add_30); _param_constant55 = add_30 = None\0A permute_1 = torch.ops.aten.permute(embedding_3, [2, 0, 1]); embedding_3 = None\0A unsqueeze_16 = torch.ops.aten.unsqueeze(permute_1, 0); permute_1 = None\0A add_31 = torch.ops.aten.add(unsqueeze_16, mul_30); unsqueeze_16 = mul_30 = None\0A add__7 = torch.ops.aten.add_(_unsafe_view_51, add_31); _unsafe_view_51 = None\0A amax_6 = torch.ops.aten.amax(add__7, [-1], True)\0A sub_8 = torch.ops.aten.sub(add__7, amax_6); add__7 = amax_6 = None\0A exp_6 = torch.ops.aten.exp(sub_8); sub_8 = None\0A sum_7 = torch.ops.aten.sum(exp_6, [-1], True)\0A div_10 = torch.ops.aten.div(exp_6, sum_7); exp_6 = sum_7 = None\0A detach_26 = torch.ops.aten.detach(div_10)\0A expand_26 = torch.ops.aten.expand(div_10, [1, 8, 4, 4]); div_10 = None\0A view_94 = torch.ops.aten.view(expand_26, [8, 4, 4]); expand_26 = None\0A expand_27 = torch.ops.aten.expand(transpose_32, [1, 8, 4, 64]); transpose_32 = None\0A view_95 = torch.ops.aten.view(expand_27, [8, 4, 64]); expand_27 = None\0A bmm_13 = torch.ops.aten.bmm(view_94, view_95); view_94 = view_95 = None\0A _unsafe_view_52 = torch.ops.aten._unsafe_view(bmm_13, [1, 8, 4, 64]); bmm_13 = None\0A transpose_34 = torch.ops.aten.transpose(_unsafe_view_52, 1, 2); _unsafe_view_52 = None\0A clone_7 = torch.ops.aten.clone(transpose_34, memory_format = torch.contiguous_format); transpose_34 = None\0A view_96 = torch.ops.aten.view(clone_7, [1, -1, 512]); clone_7 = None\0A _param_constant56 = self._param_constant56\0A t_39 = torch.ops.aten.t(_param_constant56); _param_constant56 = None\0A view_97 = torch.ops.aten.view(view_96, [4, 512]); view_96 = None\0A mm_39 = torch.ops.aten.mm(view_97, t_39); view_97 = t_39 = None\0A _unsafe_view_53 = torch.ops.aten._unsafe_view(mm_39, [1, 4, 512]); mm_39 = None\0A add_32 = torch.ops.aten.add(embedding_2, _unsafe_view_53); embedding_2 = _unsafe_view_53 = None\0A pow_15 = torch.ops.aten.pow(add_32, 2)\0A mean_14 = torch.ops.aten.mean(pow_15, [-1], True); pow_15 = None\0A add_33 = torch.ops.aten.add(mean_14, 1e-06); mean_14 = None\0A rsqrt_14 = torch.ops.aten.rsqrt(add_33); add_33 = None\0A detach_27 = torch.ops.aten.detach(rsqrt_14)\0A mul_35 = torch.ops.aten.mul(add_32, rsqrt_14); rsqrt_14 = None\0A _param_constant57 = self._param_constant57\0A mul_36 = torch.ops.aten.mul(_param_constant57, mul_35); _param_constant57 = mul_35 = None\0A _param_constant58 = self._param_constant58\0A t_40 = torch.ops.aten.t(_param_constant58); _param_constant58 = None\0A view_98 = torch.ops.aten.view(mul_36, [4, 512]); mul_36 = None\0A mm_40 = torch.ops.aten.mm(view_98, t_40); view_98 = t_40 = None\0A _unsafe_view_54 = torch.ops.aten._unsafe_view(mm_40, [1, 4, 512]); mm_40 = None\0A view_99 = torch.ops.aten.view(_unsafe_view_54, [1, -1, 8, 64]); _unsafe_view_54 = None\0A transpose_35 = torch.ops.aten.transpose(view_99, 1, 2); view_99 = None\0A _param_constant59 = self._param_constant59\0A t_41 = torch.ops.aten.t(_param_constant59); _param_constant59 = None\0A view_100 = torch.ops.aten.view(mul_28, [15, 512])\0A mm_41 = torch.ops.aten.mm(view_100, t_41); view_100 = t_41 = None\0A _unsafe_view_55 = torch.ops.aten._unsafe_view(mm_41, [1, 15, 512]); mm_41 = None\0A view_101 = torch.ops.aten.view(_unsafe_view_55, [1, -1, 8, 64]); _unsafe_view_55 = None\0A transpose_36 = torch.ops.aten.transpose(view_101, 1, 2); view_101 = None\0A _param_constant60 = self._param_constant60\0A t_42 = torch.ops.aten.t(_param_constant60); _param_constant60 = None\0A view_102 = torch.ops.aten.view(mul_28, [15, 512])\0A mm_42 = torch.ops.aten.mm(view_102, t_42); view_102 = t_42 = None\0A _unsafe_view_56 = torch.ops.aten._unsafe_view(mm_42, [1, 15, 512]); mm_42 = None\0A view_103 = torch.ops.aten.view(_unsafe_view_56, [1, -1, 8, 64]); _unsafe_view_56 = None\0A transpose_37 = torch.ops.aten.transpose(view_103, 1, 2); view_103 = None\0A transpose_38 = torch.ops.aten.transpose(transpose_36, 3, 2); transpose_36 = None\0A expand_28 = torch.ops.aten.expand(transpose_35, [1, 8, 4, 64]); transpose_35 = None\0A view_104 = torch.ops.aten.view(expand_28, [8, 4, 64]); expand_28 = None\0A expand_29 = torch.ops.aten.expand(transpose_38, [1, 8, 64, 15]); transpose_38 = None\0A view_105 = torch.ops.aten.view(expand_29, [8, 64, 15]); expand_29 = None\0A bmm_14 = torch.ops.aten.bmm(view_104, view_105); view_104 = view_105 = None\0A _unsafe_view_57 = torch.ops.aten._unsafe_view(bmm_14, [1, 8, 4, 15]); bmm_14 = None\0A zeros = torch.ops.aten.zeros([1, 8, 4, 15], dtype = torch.float32, device = device(type='cpu'), pin_memory = False)\0A add_34 = torch.ops.aten.add(zeros, mul_31); zeros = mul_31 = None\0A add__8 = torch.ops.aten.add_(_unsafe_view_57, add_34); _unsafe_view_57 = None\0A amax_7 = torch.ops.aten.amax(add__8, [-1], True)\0A sub_9 = torch.ops.aten.sub(add__8, amax_7); add__8 = amax_7 = None\0A exp_7 = torch.ops.aten.exp(sub_9); sub_9 = None\0A sum_8 = torch.ops.aten.sum(exp_7, [-1], True)\0A div_11 = torch.ops.aten.div(exp_7, sum_8); exp_7 = sum_8 = None\0A detach_28 = torch.ops.aten.detach(div_11)\0A expand_30 = torch.ops.aten.expand(div_11, [1, 8, 4, 15]); div_11 = None\0A view_106 = torch.ops.aten.view(expand_30, [8, 4, 15]); expand_30 = None\0A expand_31 = torch.ops.aten.expand(transpose_37, [1, 8, 15, 64]); transpose_37 = None\0A view_107 = torch.ops.aten.view(expand_31, [8, 15, 64]); expand_31 = None\0A bmm_15 = torch.ops.aten.bmm(view_106, view_107); view_106 = view_107 = None\0A _unsafe_view_58 = torch.ops.aten._unsafe_view(bmm_15, [1, 8, 4, 64]); bmm_15 = None\0A transpose_39 = torch.ops.aten.transpose(_unsafe_view_58, 1, 2); _unsafe_view_58 = None\0A clone_8 = torch.ops.aten.clone(transpose_39, memory_format = torch.contiguous_format); transpose_39 = None\0A view_108 = torch.ops.aten.view(clone_8, [1, -1, 512]); clone_8 = None\0A _param_constant61 = self._param_constant61\0A t_43 = torch.ops.aten.t(_param_constant61); _param_constant61 = None\0A view_109 = torch.ops.aten.view(view_108, [4, 512]); view_108 = None\0A mm_43 = torch.ops.aten.mm(view_109, t_43); view_109 = t_43 = None\0A _unsafe_view_59 = torch.ops.aten._unsafe_view(mm_43, [1, 4, 512]); mm_43 = None\0A add_35 = torch.ops.aten.add(add_32, _unsafe_view_59); add_32 = _unsafe_view_59 = None\0A pow_16 = torch.ops.aten.pow(add_35, 2)\0A mean_15 = torch.ops.aten.mean(pow_16, [-1], True); pow_16 = None\0A add_36 = torch.ops.aten.add(mean_15, 1e-06); mean_15 = None\0A rsqrt_15 = torch.ops.aten.rsqrt(add_36); add_36 = None\0A detach_29 = torch.ops.aten.detach(rsqrt_15)\0A mul_37 = torch.ops.aten.mul(add_35, rsqrt_15); rsqrt_15 = None\0A _param_constant62 = self._param_constant62\0A mul_38 = torch.ops.aten.mul(_param_constant62, mul_37); _param_constant62 = mul_37 = None\0A _param_constant63 = self._param_constant63\0A t_44 = torch.ops.aten.t(_param_constant63); _param_constant63 = None\0A view_110 = torch.ops.aten.view(mul_38, [4, 512]); mul_38 = None\0A mm_44 = torch.ops.aten.mm(view_110, t_44); view_110 = t_44 = None\0A _unsafe_view_60 = torch.ops.aten._unsafe_view(mm_44, [1, 4, 2048]); mm_44 = None\0A relu_6 = torch.ops.aten.relu(_unsafe_view_60); _unsafe_view_60 = None\0A detach_30 = torch.ops.aten.detach(relu_6)\0A _param_constant64 = self._param_constant64\0A t_45 = torch.ops.aten.t(_param_constant64); _param_constant64 = None\0A view_111 = torch.ops.aten.view(relu_6, [4, 2048]); relu_6 = None\0A mm_45 = torch.ops.aten.mm(view_111, t_45); view_111 = t_45 = None\0A _unsafe_view_61 = torch.ops.aten._unsafe_view(mm_45, [1, 4, 512]); mm_45 = None\0A add_37 = torch.ops.aten.add(add_35, _unsafe_view_61); add_35 = _unsafe_view_61 = None\0A pow_17 = torch.ops.aten.pow(add_37, 2)\0A mean_16 = torch.ops.aten.mean(pow_17, [-1], True); pow_17 = None\0A add_38 = torch.ops.aten.add(mean_16, 1e-06); mean_16 = None\0A rsqrt_16 = torch.ops.aten.rsqrt(add_38); add_38 = None\0A detach_31 = torch.ops.aten.detach(rsqrt_16)\0A mul_39 = torch.ops.aten.mul(add_37, rsqrt_16); rsqrt_16 = None\0A _param_constant65 = self._param_constant65\0A mul_40 = torch.ops.aten.mul(_param_constant65, mul_39); _param_constant65 = mul_39 = None\0A _param_constant66 = self._param_constant66\0A t_46 = torch.ops.aten.t(_param_constant66); _param_constant66 = None\0A view_112 = torch.ops.aten.view(mul_40, [4, 512])\0A mm_46 = torch.ops.aten.mm(view_112, t_46); view_112 = t_46 = None\0A _unsafe_view_62 = torch.ops.aten._unsafe_view(mm_46, [1, 4, 512]); mm_46 = None\0A view_113 = torch.ops.aten.view(_unsafe_view_62, [1, -1, 8, 64]); _unsafe_view_62 = None\0A transpose_40 = torch.ops.aten.transpose(view_113, 1, 2); view_113 = None\0A _param_constant67 = self._param_constant67\0A t_47 = torch.ops.aten.t(_param_constant67); _param_constant67 = None\0A view_114 = torch.ops.aten.view(mul_40, [4, 512])\0A mm_47 = torch.ops.aten.mm(view_114, t_47); view_114 = t_47 = None\0A _unsafe_view_63 = torch.ops.aten._unsafe_view(mm_47, [1, 4, 512]); mm_47 = None\0A view_115 = torch.ops.aten.view(_unsafe_view_63, [1, -1, 8, 64]); _unsafe_view_63 = None\0A transpose_41 = torch.ops.aten.transpose(view_115, 1, 2); view_115 = None\0A _param_constant68 = self._param_constant68\0A t_48 = torch.ops.aten.t(_param_constant68); _param_constant68 = None\0A view_116 = torch.ops.aten.view(mul_40, [4, 512]); mul_40 = None\0A mm_48 = torch.ops.aten.mm(view_116, t_48); view_116 = t_48 = None\0A _unsafe_view_64 = torch.ops.aten._unsafe_view(mm_48, [1, 4, 512]); mm_48 = None\0A view_117 = torch.ops.aten.view(_unsafe_view_64, [1, -1, 8, 64]); _unsafe_view_64 = None\0A transpose_42 = torch.ops.aten.transpose(view_117, 1, 2); view_117 = None\0A transpose_43 = torch.ops.aten.transpose(transpose_41, 3, 2); transpose_41 = None\0A expand_32 = torch.ops.aten.expand(transpose_40, [1, 8, 4, 64]); transpose_40 = None\0A view_118 = torch.ops.aten.view(expand_32, [8, 4, 64]); expand_32 = None\0A expand_33 = torch.ops.aten.expand(transpose_43, [1, 8, 64, 4]); transpose_43 = None\0A view_119 = torch.ops.aten.view(expand_33, [8, 64, 4]); expand_33 = None\0A bmm_16 = torch.ops.aten.bmm(view_118, view_119); view_118 = view_119 = None\0A _unsafe_view_65 = torch.ops.aten._unsafe_view(bmm_16, [1, 8, 4, 4]); bmm_16 = None\0A add__9 = torch.ops.aten.add_(_unsafe_view_65, add_31); _unsafe_view_65 = None\0A amax_8 = torch.ops.aten.amax(add__9, [-1], True)\0A sub_10 = torch.ops.aten.sub(add__9, amax_8); add__9 = amax_8 = None\0A exp_8 = torch.ops.aten.exp(sub_10); sub_10 = None\0A sum_9 = torch.ops.aten.sum(exp_8, [-1], True)\0A div_12 = torch.ops.aten.div(exp_8, sum_9); exp_8 = sum_9 = None\0A detach_32 = torch.ops.aten.detach(div_12)\0A expand_34 = torch.ops.aten.expand(div_12, [1, 8, 4, 4]); div_12 = None\0A view_120 = torch.ops.aten.view(expand_34, [8, 4, 4]); expand_34 = None\0A expand_35 = torch.ops.aten.expand(transpose_42, [1, 8, 4, 64]); transpose_42 = None\0A view_121 = torch.ops.aten.view(expand_35, [8, 4, 64]); expand_35 = None\0A bmm_17 = torch.ops.aten.bmm(view_120, view_121); view_120 = view_121 = None\0A _unsafe_view_66 = torch.ops.aten._unsafe_view(bmm_17, [1, 8, 4, 64]); bmm_17 = None\0A transpose_44 = torch.ops.aten.transpose(_unsafe_view_66, 1, 2); _unsafe_view_66 = None\0A clone_9 = torch.ops.aten.clone(transpose_44, memory_format = torch.contiguous_format); transpose_44 = None\0A view_122 = torch.ops.aten.view(clone_9, [1, -1, 512]); clone_9 = None\0A _param_constant69 = self._param_constant69\0A t_49 = torch.ops.aten.t(_param_constant69); _param_constant69 = None\0A view_123 = torch.ops.aten.view(view_122, [4, 512]); view_122 = None\0A mm_49 = torch.ops.aten.mm(view_123, t_49); view_123 = t_49 = None\0A _unsafe_view_67 = torch.ops.aten._unsafe_view(mm_49, [1, 4, 512]); mm_49 = None\0A add_39 = torch.ops.aten.add(add_37, _unsafe_view_67); add_37 = _unsafe_view_67 = None\0A pow_18 = torch.ops.aten.pow(add_39, 2)\0A mean_17 = torch.ops.aten.mean(pow_18, [-1], True); pow_18 = None\0A add_40 = torch.ops.aten.add(mean_17, 1e-06); mean_17 = None\0A rsqrt_17 = torch.ops.aten.rsqrt(add_40); add_40 = None\0A detach_33 = torch.ops.aten.detach(rsqrt_17)\0A mul_41 = torch.ops.aten.mul(add_39, rsqrt_17); rsqrt_17 = None\0A _param_constant70 = self._param_constant70\0A mul_42 = torch.ops.aten.mul(_param_constant70, mul_41); _param_constant70 = mul_41 = None\0A _param_constant71 = self._param_constant71\0A t_50 = torch.ops.aten.t(_param_constant71); _param_constant71 = None\0A view_124 = torch.ops.aten.view(mul_42, [4, 512]); mul_42 = None\0A mm_50 = torch.ops.aten.mm(view_124, t_50); view_124 = t_50 = None\0A _unsafe_view_68 = torch.ops.aten._unsafe_view(mm_50, [1, 4, 512]); mm_50 = None\0A view_125 = torch.ops.aten.view(_unsafe_view_68, [1, -1, 8, 64]); _unsafe_view_68 = None\0A transpose_45 = torch.ops.aten.transpose(view_125, 1, 2); view_125 = None\0A _param_constant72 = self._param_constant72\0A t_51 = torch.ops.aten.t(_param_constant72); _param_constant72 = None\0A view_126 = torch.ops.aten.view(mul_28, [15, 512])\0A mm_51 = torch.ops.aten.mm(view_126, t_51); view_126 = t_51 = None\0A _unsafe_view_69 = torch.ops.aten._unsafe_view(mm_51, [1, 15, 512]); mm_51 = None\0A view_127 = torch.ops.aten.view(_unsafe_view_69, [1, -1, 8, 64]); _unsafe_view_69 = None\0A transpose_46 = torch.ops.aten.transpose(view_127, 1, 2); view_127 = None\0A _param_constant73 = self._param_constant73\0A t_52 = torch.ops.aten.t(_param_constant73); _param_constant73 = None\0A view_128 = torch.ops.aten.view(mul_28, [15, 512])\0A mm_52 = torch.ops.aten.mm(view_128, t_52); view_128 = t_52 = None\0A _unsafe_view_70 = torch.ops.aten._unsafe_view(mm_52, [1, 15, 512]); mm_52 = None\0A view_129 = torch.ops.aten.view(_unsafe_view_70, [1, -1, 8, 64]); _unsafe_view_70 = None\0A transpose_47 = torch.ops.aten.transpose(view_129, 1, 2); view_129 = None\0A transpose_48 = torch.ops.aten.transpose(transpose_46, 3, 2); transpose_46 = None\0A expand_36 = torch.ops.aten.expand(transpose_45, [1, 8, 4, 64]); transpose_45 = None\0A view_130 = torch.ops.aten.view(expand_36, [8, 4, 64]); expand_36 = None\0A expand_37 = torch.ops.aten.expand(transpose_48, [1, 8, 64, 15]); transpose_48 = None\0A view_131 = torch.ops.aten.view(expand_37, [8, 64, 15]); expand_37 = None\0A bmm_18 = torch.ops.aten.bmm(view_130, view_131); view_130 = view_131 = None\0A _unsafe_view_71 = torch.ops.aten._unsafe_view(bmm_18, [1, 8, 4, 15]); bmm_18 = None\0A add__10 = torch.ops.aten.add_(_unsafe_view_71, add_34); _unsafe_view_71 = None\0A amax_9 = torch.ops.aten.amax(add__10, [-1], True)\0A sub_11 = torch.ops.aten.sub(add__10, amax_9); add__10 = amax_9 = None\0A exp_9 = torch.ops.aten.exp(sub_11); sub_11 = None\0A sum_10 = torch.ops.aten.sum(exp_9, [-1], True)\0A div_13 = torch.ops.aten.div(exp_9, sum_10); exp_9 = sum_10 = None\0A detach_34 = torch.ops.aten.detach(div_13)\0A expand_38 = torch.ops.aten.expand(div_13, [1, 8, 4, 15]); div_13 = None\0A view_132 = torch.ops.aten.view(expand_38, [8, 4, 15]); expand_38 = None\0A expand_39 = torch.ops.aten.expand(transpose_47, [1, 8, 15, 64]); transpose_47 = None\0A view_133 = torch.ops.aten.view(expand_39, [8, 15, 64]); expand_39 = None\0A bmm_19 = torch.ops.aten.bmm(view_132, view_133); view_132 = view_133 = None\0A _unsafe_view_72 = torch.ops.aten._unsafe_view(bmm_19, [1, 8, 4, 64]); bmm_19 = None\0A transpose_49 = torch.ops.aten.transpose(_unsafe_view_72, 1, 2); _unsafe_view_72 = None\0A clone_10 = torch.ops.aten.clone(transpose_49, memory_format = torch.contiguous_format); transpose_49 = None\0A view_134 = torch.ops.aten.view(clone_10, [1, -1, 512]); clone_10 = None\0A _param_constant74 = self._param_constant74\0A t_53 = torch.ops.aten.t(_param_constant74); _param_constant74 = None\0A view_135 = torch.ops.aten.view(view_134, [4, 512]); view_134 = None\0A mm_53 = torch.ops.aten.mm(view_135, t_53); view_135 = t_53 = None\0A _unsafe_view_73 = torch.ops.aten._unsafe_view(mm_53, [1, 4, 512]); mm_53 = None\0A add_41 = torch.ops.aten.add(add_39, _unsafe_view_73); add_39 = _unsafe_view_73 = None\0A pow_19 = torch.ops.aten.pow(add_41, 2)\0A mean_18 = torch.ops.aten.mean(pow_19, [-1], True); pow_19 = None\0A add_42 = torch.ops.aten.add(mean_18, 1e-06); mean_18 = None\0A rsqrt_18 = torch.ops.aten.rsqrt(add_42); add_42 = None\0A detach_35 = torch.ops.aten.detach(rsqrt_18)\0A mul_43 = torch.ops.aten.mul(add_41, rsqrt_18); rsqrt_18 = None\0A _param_constant75 = self._param_constant75\0A mul_44 = torch.ops.aten.mul(_param_constant75, mul_43); _param_constant75 = mul_43 = None\0A _param_constant76 = self._param_constant76\0A t_54 = torch.ops.aten.t(_param_constant76); _param_constant76 = None\0A view_136 = torch.ops.aten.view(mul_44, [4, 512]); mul_44 = None\0A mm_54 = torch.ops.aten.mm(view_136, t_54); view_136 = t_54 = None\0A _unsafe_view_74 = torch.ops.aten._unsafe_view(mm_54, [1, 4, 2048]); mm_54 = None\0A relu_7 = torch.ops.aten.relu(_unsafe_view_74); _unsafe_view_74 = None\0A detach_36 = torch.ops.aten.detach(relu_7)\0A _param_constant77 = self._param_constant77\0A t_55 = torch.ops.aten.t(_param_constant77); _param_constant77 = None\0A view_137 = torch.ops.aten.view(relu_7, [4, 2048]); relu_7 = None\0A mm_55 = torch.ops.aten.mm(view_137, t_55); view_137 = t_55 = None\0A _unsafe_view_75 = torch.ops.aten._unsafe_view(mm_55, [1, 4, 512]); mm_55 = None\0A add_43 = torch.ops.aten.add(add_41, _unsafe_view_75); add_41 = _unsafe_view_75 = None\0A pow_20 = torch.ops.aten.pow(add_43, 2)\0A mean_19 = torch.ops.aten.mean(pow_20, [-1], True); pow_20 = None\0A add_44 = torch.ops.aten.add(mean_19, 1e-06); mean_19 = None\0A rsqrt_19 = torch.ops.aten.rsqrt(add_44); add_44 = None\0A detach_37 = torch.ops.aten.detach(rsqrt_19)\0A mul_45 = torch.ops.aten.mul(add_43, rsqrt_19); rsqrt_19 = None\0A _param_constant78 = self._param_constant78\0A mul_46 = torch.ops.aten.mul(_param_constant78, mul_45); _param_constant78 = mul_45 = None\0A _param_constant79 = self._param_constant79\0A t_56 = torch.ops.aten.t(_param_constant79); _param_constant79 = None\0A view_138 = torch.ops.aten.view(mul_46, [4, 512])\0A mm_56 = torch.ops.aten.mm(view_138, t_56); view_138 = t_56 = None\0A _unsafe_view_76 = torch.ops.aten._unsafe_view(mm_56, [1, 4, 512]); mm_56 = None\0A view_139 = torch.ops.aten.view(_unsafe_view_76, [1, -1, 8, 64]); _unsafe_view_76 = None\0A transpose_50 = torch.ops.aten.transpose(view_139, 1, 2); view_139 = None\0A _param_constant80 = self._param_constant80\0A t_57 = torch.ops.aten.t(_param_constant80); _param_constant80 = None\0A view_140 = torch.ops.aten.view(mul_46, [4, 512])\0A mm_57 = torch.ops.aten.mm(view_140, t_57); view_140 = t_57 = None\0A _unsafe_view_77 = torch.ops.aten._unsafe_view(mm_57, [1, 4, 512]); mm_57 = None\0A view_141 = torch.ops.aten.view(_unsafe_view_77, [1, -1, 8, 64]); _unsafe_view_77 = None\0A transpose_51 = torch.ops.aten.transpose(view_141, 1, 2); view_141 = None\0A _param_constant81 = self._param_constant81\0A t_58 = torch.ops.aten.t(_param_constant81); _param_constant81 = None\0A view_142 = torch.ops.aten.view(mul_46, [4, 512]); mul_46 = None\0A mm_58 = torch.ops.aten.mm(view_142, t_58); view_142 = t_58 = None\0A _unsafe_view_78 = torch.ops.aten._unsafe_view(mm_58, [1, 4, 512]); mm_58 = None\0A view_143 = torch.ops.aten.view(_unsafe_view_78, [1, -1, 8, 64]); _unsafe_view_78 = None\0A transpose_52 = torch.ops.aten.transpose(view_143, 1, 2); view_143 = None\0A transpose_53 = torch.ops.aten.transpose(transpose_51, 3, 2); transpose_51 = None\0A expand_40 = torch.ops.aten.expand(transpose_50, [1, 8, 4, 64]); transpose_50 = None\0A view_144 = torch.ops.aten.view(expand_40, [8, 4, 64]); expand_40 = None\0A expand_41 = torch.ops.aten.expand(transpose_53, [1, 8, 64, 4]); transpose_53 = None\0A view_145 = torch.ops.aten.view(expand_41, [8, 64, 4]); expand_41 = None\0A bmm_20 = torch.ops.aten.bmm(view_144, view_145); view_144 = view_145 = None\0A _unsafe_view_79 = torch.ops.aten._unsafe_view(bmm_20, [1, 8, 4, 4]); bmm_20 = None\0A add__11 = torch.ops.aten.add_(_unsafe_view_79, add_31); _unsafe_view_79 = None\0A amax_10 = torch.ops.aten.amax(add__11, [-1], True)\0A sub_12 = torch.ops.aten.sub(add__11, amax_10); add__11 = amax_10 = None\0A exp_10 = torch.ops.aten.exp(sub_12); sub_12 = None\0A sum_11 = torch.ops.aten.sum(exp_10, [-1], True)\0A div_14 = torch.ops.aten.div(exp_10, sum_11); exp_10 = sum_11 = None\0A detach_38 = torch.ops.aten.detach(div_14)\0A expand_42 = torch.ops.aten.expand(div_14, [1, 8, 4, 4]); div_14 = None\0A view_146 = torch.ops.aten.view(expand_42, [8, 4, 4]); expand_42 = None\0A expand_43 = torch.ops.aten.expand(transpose_52, [1, 8, 4, 64]); transpose_52 = None\0A view_147 = torch.ops.aten.view(expand_43, [8, 4, 64]); expand_43 = None\0A bmm_21 = torch.ops.aten.bmm(view_146, view_147); view_146 = view_147 = None\0A _unsafe_view_80 = torch.ops.aten._unsafe_view(bmm_21, [1, 8, 4, 64]); bmm_21 = None\0A transpose_54 = torch.ops.aten.transpose(_unsafe_view_80, 1, 2); _unsafe_view_80 = None\0A clone_11 = torch.ops.aten.clone(transpose_54, memory_format = torch.contiguous_format); transpose_54 = None\0A view_148 = torch.ops.aten.view(clone_11, [1, -1, 512]); clone_11 = None\0A _param_constant82 = self._param_constant82\0A t_59 = torch.ops.aten.t(_param_constant82); _param_constant82 = None\0A view_149 = torch.ops.aten.view(view_148, [4, 512]); view_148 = None\0A mm_59 = torch.ops.aten.mm(view_149, t_59); view_149 = t_59 = None\0A _unsafe_view_81 = torch.ops.aten._unsafe_view(mm_59, [1, 4, 512]); mm_59 = None\0A add_45 = torch.ops.aten.add(add_43, _unsafe_view_81); add_43 = _unsafe_view_81 = None\0A pow_21 = torch.ops.aten.pow(add_45, 2)\0A mean_20 = torch.ops.aten.mean(pow_21, [-1], True); pow_21 = None\0A add_46 = torch.ops.aten.add(mean_20, 1e-06); mean_20 = None\0A rsqrt_20 = torch.ops.aten.rsqrt(add_46); add_46 = None\0A detach_39 = torch.ops.aten.detach(rsqrt_20)\0A mul_47 = torch.ops.aten.mul(add_45, rsqrt_20); rsqrt_20 = None\0A _param_constant83 = self._param_constant83\0A mul_48 = torch.ops.aten.mul(_param_constant83, mul_47); _param_constant83 = mul_47 = None\0A _param_constant84 = self._param_constant84\0A t_60 = torch.ops.aten.t(_param_constant84); _param_constant84 = None\0A view_150 = torch.ops.aten.view(mul_48, [4, 512]); mul_48 = None\0A mm_60 = torch.ops.aten.mm(view_150, t_60); view_150 = t_60 = None\0A _unsafe_view_82 = torch.ops.aten._unsafe_view(mm_60, [1, 4, 512]); mm_60 = None\0A view_151 = torch.ops.aten.view(_unsafe_view_82, [1, -1, 8, 64]); _unsafe_view_82 = None\0A transpose_55 = torch.ops.aten.transpose(view_151, 1, 2); view_151 = None\0A _param_constant85 = self._param_constant85\0A t_61 = torch.ops.aten.t(_param_constant85); _param_constant85 = None\0A view_152 = torch.ops.aten.view(mul_28, [15, 512])\0A mm_61 = torch.ops.aten.mm(view_152, t_61); view_152 = t_61 = None\0A _unsafe_view_83 = torch.ops.aten._unsafe_view(mm_61, [1, 15, 512]); mm_61 = None\0A view_153 = torch.ops.aten.view(_unsafe_view_83, [1, -1, 8, 64]); _unsafe_view_83 = None\0A transpose_56 = torch.ops.aten.transpose(view_153, 1, 2); view_153 = None\0A _param_constant86 = self._param_constant86\0A t_62 = torch.ops.aten.t(_param_constant86); _param_constant86 = None\0A view_154 = torch.ops.aten.view(mul_28, [15, 512])\0A mm_62 = torch.ops.aten.mm(view_154, t_62); view_154 = t_62 = None\0A _unsafe_view_84 = torch.ops.aten._unsafe_view(mm_62, [1, 15, 512]); mm_62 = None\0A view_155 = torch.ops.aten.view(_unsafe_view_84, [1, -1, 8, 64]); _unsafe_view_84 = None\0A transpose_57 = torch.ops.aten.transpose(view_155, 1, 2); view_155 = None\0A transpose_58 = torch.ops.aten.transpose(transpose_56, 3, 2); transpose_56 = None\0A expand_44 = torch.ops.aten.expand(transpose_55, [1, 8, 4, 64]); transpose_55 = None\0A view_156 = torch.ops.aten.view(expand_44, [8, 4, 64]); expand_44 = None\0A expand_45 = torch.ops.aten.expand(transpose_58, [1, 8, 64, 15]); transpose_58 = None\0A view_157 = torch.ops.aten.view(expand_45, [8, 64, 15]); expand_45 = None\0A bmm_22 = torch.ops.aten.bmm(view_156, view_157); view_156 = view_157 = None\0A _unsafe_view_85 = torch.ops.aten._unsafe_view(bmm_22, [1, 8, 4, 15]); bmm_22 = None\0A add__12 = torch.ops.aten.add_(_unsafe_view_85, add_34); _unsafe_view_85 = None\0A amax_11 = torch.ops.aten.amax(add__12, [-1], True)\0A sub_13 = torch.ops.aten.sub(add__12, amax_11); add__12 = amax_11 = None\0A exp_11 = torch.ops.aten.exp(sub_13); sub_13 = None\0A sum_12 = torch.ops.aten.sum(exp_11, [-1], True)\0A div_15 = torch.ops.aten.div(exp_11, sum_12); exp_11 = sum_12 = None\0A detach_40 = torch.ops.aten.detach(div_15)\0A expand_46 = torch.ops.aten.expand(div_15, [1, 8, 4, 15]); div_15 = None\0A view_158 = torch.ops.aten.view(expand_46, [8, 4, 15]); expand_46 = None\0A expand_47 = torch.ops.aten.expand(transpose_57, [1, 8, 15, 64]); transpose_57 = None\0A view_159 = torch.ops.aten.view(expand_47, [8, 15, 64]); expand_47 = None\0A bmm_23 = torch.ops.aten.bmm(view_158, view_159); view_158 = view_159 = None\0A _unsafe_view_86 = torch.ops.aten._unsafe_view(bmm_23, [1, 8, 4, 64]); bmm_23 = None\0A transpose_59 = torch.ops.aten.transpose(_unsafe_view_86, 1, 2); _unsafe_view_86 = None\0A clone_12 = torch.ops.aten.clone(transpose_59, memory_format = torch.contiguous_format); transpose_59 = None\0A view_160 = torch.ops.aten.view(clone_12, [1, -1, 512]); clone_12 = None\0A _param_constant87 = self._param_constant87\0A t_63 = torch.ops.aten.t(_param_constant87); _param_constant87 = None\0A view_161 = torch.ops.aten.view(view_160, [4, 512]); view_160 = None\0A mm_63 = torch.ops.aten.mm(view_161, t_63); view_161 = t_63 = None\0A _unsafe_view_87 = torch.ops.aten._unsafe_view(mm_63, [1, 4, 512]); mm_63 = None\0A add_47 = torch.ops.aten.add(add_45, _unsafe_view_87); add_45 = _unsafe_view_87 = None\0A pow_22 = torch.ops.aten.pow(add_47, 2)\0A mean_21 = torch.ops.aten.mean(pow_22, [-1], True); pow_22 = None\0A add_48 = torch.ops.aten.add(mean_21, 1e-06); mean_21 = None\0A rsqrt_21 = torch.ops.aten.rsqrt(add_48); add_48 = None\0A detach_41 = torch.ops.aten.detach(rsqrt_21)\0A mul_49 = torch.ops.aten.mul(add_47, rsqrt_21); rsqrt_21 = None\0A _param_constant88 = self._param_constant88\0A mul_50 = torch.ops.aten.mul(_param_constant88, mul_49); _param_constant88 = mul_49 = None\0A _param_constant89 = self._param_constant89\0A t_64 = torch.ops.aten.t(_param_constant89); _param_constant89 = None\0A view_162 = torch.ops.aten.view(mul_50, [4, 512]); mul_50 = None\0A mm_64 = torch.ops.aten.mm(view_162, t_64); view_162 = t_64 = None\0A _unsafe_view_88 = torch.ops.aten._unsafe_view(mm_64, [1, 4, 2048]); mm_64 = None\0A relu_8 = torch.ops.aten.relu(_unsafe_view_88); _unsafe_view_88 = None\0A detach_42 = torch.ops.aten.detach(relu_8)\0A _param_constant90 = self._param_constant90\0A t_65 = torch.ops.aten.t(_param_constant90); _param_constant90 = None\0A view_163 = torch.ops.aten.view(relu_8, [4, 2048]); relu_8 = None\0A mm_65 = torch.ops.aten.mm(view_163, t_65); view_163 = t_65 = None\0A _unsafe_view_89 = torch.ops.aten._unsafe_view(mm_65, [1, 4, 512]); mm_65 = None\0A add_49 = torch.ops.aten.add(add_47, _unsafe_view_89); add_47 = _unsafe_view_89 = None\0A pow_23 = torch.ops.aten.pow(add_49, 2)\0A mean_22 = torch.ops.aten.mean(pow_23, [-1], True); pow_23 = None\0A add_50 = torch.ops.aten.add(mean_22, 1e-06); mean_22 = None\0A rsqrt_22 = torch.ops.aten.rsqrt(add_50); add_50 = None\0A detach_43 = torch.ops.aten.detach(rsqrt_22)\0A mul_51 = torch.ops.aten.mul(add_49, rsqrt_22); rsqrt_22 = None\0A _param_constant91 = self._param_constant91\0A mul_52 = torch.ops.aten.mul(_param_constant91, mul_51); _param_constant91 = mul_51 = None\0A _param_constant92 = self._param_constant92\0A t_66 = torch.ops.aten.t(_param_constant92); _param_constant92 = None\0A view_164 = torch.ops.aten.view(mul_52, [4, 512])\0A mm_66 = torch.ops.aten.mm(view_164, t_66); view_164 = t_66 = None\0A _unsafe_view_90 = torch.ops.aten._unsafe_view(mm_66, [1, 4, 512]); mm_66 = None\0A view_165 = torch.ops.aten.view(_unsafe_view_90, [1, -1, 8, 64]); _unsafe_view_90 = None\0A transpose_60 = torch.ops.aten.transpose(view_165, 1, 2); view_165 = None\0A _param_constant93 = self._param_constant93\0A t_67 = torch.ops.aten.t(_param_constant93); _param_constant93 = None\0A view_166 = torch.ops.aten.view(mul_52, [4, 512])\0A mm_67 = torch.ops.aten.mm(view_166, t_67); view_166 = t_67 = None\0A _unsafe_view_91 = torch.ops.aten._unsafe_view(mm_67, [1, 4, 512]); mm_67 = None\0A view_167 = torch.ops.aten.view(_unsafe_view_91, [1, -1, 8, 64]); _unsafe_view_91 = None\0A transpose_61 = torch.ops.aten.transpose(view_167, 1, 2); view_167 = None\0A _param_constant94 = self._param_constant94\0A t_68 = torch.ops.aten.t(_param_constant94); _param_constant94 = None\0A view_168 = torch.ops.aten.view(mul_52, [4, 512]); mul_52 = None\0A mm_68 = torch.ops.aten.mm(view_168, t_68); view_168 = t_68 = None\0A _unsafe_view_92 = torch.ops.aten._unsafe_view(mm_68, [1, 4, 512]); mm_68 = None\0A view_169 = torch.ops.aten.view(_unsafe_view_92, [1, -1, 8, 64]); _unsafe_view_92 = None\0A transpose_62 = torch.ops.aten.transpose(view_169, 1, 2); view_169 = None\0A transpose_63 = torch.ops.aten.transpose(transpose_61, 3, 2); transpose_61 = None\0A expand_48 = torch.ops.aten.expand(transpose_60, [1, 8, 4, 64]); transpose_60 = None\0A view_170 = torch.ops.aten.view(expand_48, [8, 4, 64]); expand_48 = None\0A expand_49 = torch.ops.aten.expand(transpose_63, [1, 8, 64, 4]); transpose_63 = None\0A view_171 = torch.ops.aten.view(expand_49, [8, 64, 4]); expand_49 = None\0A bmm_24 = torch.ops.aten.bmm(view_170, view_171); view_170 = view_171 = None\0A _unsafe_view_93 = torch.ops.aten._unsafe_view(bmm_24, [1, 8, 4, 4]); bmm_24 = None\0A add__13 = torch.ops.aten.add_(_unsafe_view_93, add_31); _unsafe_view_93 = None\0A amax_12 = torch.ops.aten.amax(add__13, [-1], True)\0A sub_14 = torch.ops.aten.sub(add__13, amax_12); add__13 = amax_12 = None\0A exp_12 = torch.ops.aten.exp(sub_14); sub_14 = None\0A sum_13 = torch.ops.aten.sum(exp_12, [-1], True)\0A div_16 = torch.ops.aten.div(exp_12, sum_13); exp_12 = sum_13 = None\0A detach_44 = torch.ops.aten.detach(div_16)\0A expand_50 = torch.ops.aten.expand(div_16, [1, 8, 4, 4]); div_16 = None\0A view_172 = torch.ops.aten.view(expand_50, [8, 4, 4]); expand_50 = None\0A expand_51 = torch.ops.aten.expand(transpose_62, [1, 8, 4, 64]); transpose_62 = None\0A view_173 = torch.ops.aten.view(expand_51, [8, 4, 64]); expand_51 = None\0A bmm_25 = torch.ops.aten.bmm(view_172, view_173); view_172 = view_173 = None\0A _unsafe_view_94 = torch.ops.aten._unsafe_view(bmm_25, [1, 8, 4, 64]); bmm_25 = None\0A transpose_64 = torch.ops.aten.transpose(_unsafe_view_94, 1, 2); _unsafe_view_94 = None\0A clone_13 = torch.ops.aten.clone(transpose_64, memory_format = torch.contiguous_format); transpose_64 = None\0A view_174 = torch.ops.aten.view(clone_13, [1, -1, 512]); clone_13 = None\0A _param_constant95 = self._param_constant95\0A t_69 = torch.ops.aten.t(_param_constant95); _param_constant95 = None\0A view_175 = torch.ops.aten.view(view_174, [4, 512]); view_174 = None\0A mm_69 = torch.ops.aten.mm(view_175, t_69); view_175 = t_69 = None\0A _unsafe_view_95 = torch.ops.aten._unsafe_view(mm_69, [1, 4, 512]); mm_69 = None\0A add_51 = torch.ops.aten.add(add_49, _unsafe_view_95); add_49 = _unsafe_view_95 = None\0A pow_24 = torch.ops.aten.pow(add_51, 2)\0A mean_23 = torch.ops.aten.mean(pow_24, [-1], True); pow_24 = None\0A add_52 = torch.ops.aten.add(mean_23, 1e-06); mean_23 = None\0A rsqrt_23 = torch.ops.aten.rsqrt(add_52); add_52 = None\0A detach_45 = torch.ops.aten.detach(rsqrt_23)\0A mul_53 = torch.ops.aten.mul(add_51, rsqrt_23); rsqrt_23 = None\0A _param_constant96 = self._param_constant96\0A mul_54 = torch.ops.aten.mul(_param_constant96, mul_53); _param_constant96 = mul_53 = None\0A _param_constant97 = self._param_constant97\0A t_70 = torch.ops.aten.t(_param_constant97); _param_constant97 = None\0A view_176 = torch.ops.aten.view(mul_54, [4, 512]); mul_54 = None\0A mm_70 = torch.ops.aten.mm(view_176, t_70); view_176 = t_70 = None\0A _unsafe_view_96 = torch.ops.aten._unsafe_view(mm_70, [1, 4, 512]); mm_70 = None\0A view_177 = torch.ops.aten.view(_unsafe_view_96, [1, -1, 8, 64]); _unsafe_view_96 = None\0A transpose_65 = torch.ops.aten.transpose(view_177, 1, 2); view_177 = None\0A _param_constant98 = self._param_constant98\0A t_71 = torch.ops.aten.t(_param_constant98); _param_constant98 = None\0A view_178 = torch.ops.aten.view(mul_28, [15, 512])\0A mm_71 = torch.ops.aten.mm(view_178, t_71); view_178 = t_71 = None\0A _unsafe_view_97 = torch.ops.aten._unsafe_view(mm_71, [1, 15, 512]); mm_71 = None\0A view_179 = torch.ops.aten.view(_unsafe_view_97, [1, -1, 8, 64]); _unsafe_view_97 = None\0A transpose_66 = torch.ops.aten.transpose(view_179, 1, 2); view_179 = None\0A _param_constant99 = self._param_constant99\0A t_72 = torch.ops.aten.t(_param_constant99); _param_constant99 = None\0A view_180 = torch.ops.aten.view(mul_28, [15, 512])\0A mm_72 = torch.ops.aten.mm(view_180, t_72); view_180 = t_72 = None\0A _unsafe_view_98 = torch.ops.aten._unsafe_view(mm_72, [1, 15, 512]); mm_72 = None\0A view_181 = torch.ops.aten.view(_unsafe_view_98, [1, -1, 8, 64]); _unsafe_view_98 = None\0A transpose_67 = torch.ops.aten.transpose(view_181, 1, 2); view_181 = None\0A transpose_68 = torch.ops.aten.transpose(transpose_66, 3, 2); transpose_66 = None\0A expand_52 = torch.ops.aten.expand(transpose_65, [1, 8, 4, 64]); transpose_65 = None\0A view_182 = torch.ops.aten.view(expand_52, [8, 4, 64]); expand_52 = None\0A expand_53 = torch.ops.aten.expand(transpose_68, [1, 8, 64, 15]); transpose_68 = None\0A view_183 = torch.ops.aten.view(expand_53, [8, 64, 15]); expand_53 = None\0A bmm_26 = torch.ops.aten.bmm(view_182, view_183); view_182 = view_183 = None\0A _unsafe_view_99 = torch.ops.aten._unsafe_view(bmm_26, [1, 8, 4, 15]); bmm_26 = None\0A add__14 = torch.ops.aten.add_(_unsafe_view_99, add_34); _unsafe_view_99 = None\0A amax_13 = torch.ops.aten.amax(add__14, [-1], True)\0A sub_15 = torch.ops.aten.sub(add__14, amax_13); add__14 = amax_13 = None\0A exp_13 = torch.ops.aten.exp(sub_15); sub_15 = None\0A sum_14 = torch.ops.aten.sum(exp_13, [-1], True)\0A div_17 = torch.ops.aten.div(exp_13, sum_14); exp_13 = sum_14 = None\0A detach_46 = torch.ops.aten.detach(div_17)\0A expand_54 = torch.ops.aten.expand(div_17, [1, 8, 4, 15]); div_17 = None\0A view_184 = torch.ops.aten.view(expand_54, [8, 4, 15]); expand_54 = None\0A expand_55 = torch.ops.aten.expand(transpose_67, [1, 8, 15, 64]); transpose_67 = None\0A view_185 = torch.ops.aten.view(expand_55, [8, 15, 64]); expand_55 = None\0A bmm_27 = torch.ops.aten.bmm(view_184, view_185); view_184 = view_185 = None\0A _unsafe_view_100 = torch.ops.aten._unsafe_view(bmm_27, [1, 8, 4, 64]); bmm_27 = None\0A transpose_69 = torch.ops.aten.transpose(_unsafe_view_100, 1, 2); _unsafe_view_100 = None\0A clone_14 = torch.ops.aten.clone(transpose_69, memory_format = torch.contiguous_format); transpose_69 = None\0A view_186 = torch.ops.aten.view(clone_14, [1, -1, 512]); clone_14 = None\0A _param_constant100 = self._param_constant100\0A t_73 = torch.ops.aten.t(_param_constant100); _param_constant100 = None\0A view_187 = torch.ops.aten.view(view_186, [4, 512]); view_186 = None\0A mm_73 = torch.ops.aten.mm(view_187, t_73); view_187 = t_73 = None\0A _unsafe_view_101 = torch.ops.aten._unsafe_view(mm_73, [1, 4, 512]); mm_73 = None\0A add_53 = torch.ops.aten.add(add_51, _unsafe_view_101); add_51 = _unsafe_view_101 = None\0A pow_25 = torch.ops.aten.pow(add_53, 2)\0A mean_24 = torch.ops.aten.mean(pow_25, [-1], True); pow_25 = None\0A add_54 = torch.ops.aten.add(mean_24, 1e-06); mean_24 = None\0A rsqrt_24 = torch.ops.aten.rsqrt(add_54); add_54 = None\0A detach_47 = torch.ops.aten.detach(rsqrt_24)\0A mul_55 = torch.ops.aten.mul(add_53, rsqrt_24); rsqrt_24 = None\0A _param_constant101 = self._param_constant101\0A mul_56 = torch.ops.aten.mul(_param_constant101, mul_55); _param_constant101 = mul_55 = None\0A _param_constant102 = self._param_constant102\0A t_74 = torch.ops.aten.t(_param_constant102); _param_constant102 = None\0A view_188 = torch.ops.aten.view(mul_56, [4, 512]); mul_56 = None\0A mm_74 = torch.ops.aten.mm(view_188, t_74); view_188 = t_74 = None\0A _unsafe_view_102 = torch.ops.aten._unsafe_view(mm_74, [1, 4, 2048]); mm_74 = None\0A relu_9 = torch.ops.aten.relu(_unsafe_view_102); _unsafe_view_102 = None\0A detach_48 = torch.ops.aten.detach(relu_9)\0A _param_constant103 = self._param_constant103\0A t_75 = torch.ops.aten.t(_param_constant103); _param_constant103 = None\0A view_189 = torch.ops.aten.view(relu_9, [4, 2048]); relu_9 = None\0A mm_75 = torch.ops.aten.mm(view_189, t_75); view_189 = t_75 = None\0A _unsafe_view_103 = torch.ops.aten._unsafe_view(mm_75, [1, 4, 512]); mm_75 = None\0A add_55 = torch.ops.aten.add(add_53, _unsafe_view_103); add_53 = _unsafe_view_103 = None\0A pow_26 = torch.ops.aten.pow(add_55, 2)\0A mean_25 = torch.ops.aten.mean(pow_26, [-1], True); pow_26 = None\0A add_56 = torch.ops.aten.add(mean_25, 1e-06); mean_25 = None\0A rsqrt_25 = torch.ops.aten.rsqrt(add_56); add_56 = None\0A detach_49 = torch.ops.aten.detach(rsqrt_25)\0A mul_57 = torch.ops.aten.mul(add_55, rsqrt_25); rsqrt_25 = None\0A _param_constant104 = self._param_constant104\0A mul_58 = torch.ops.aten.mul(_param_constant104, mul_57); _param_constant104 = mul_57 = None\0A _param_constant105 = self._param_constant105\0A t_76 = torch.ops.aten.t(_param_constant105); _param_constant105 = None\0A view_190 = torch.ops.aten.view(mul_58, [4, 512])\0A mm_76 = torch.ops.aten.mm(view_190, t_76); view_190 = t_76 = None\0A _unsafe_view_104 = torch.ops.aten._unsafe_view(mm_76, [1, 4, 512]); mm_76 = None\0A view_191 = torch.ops.aten.view(_unsafe_view_104, [1, -1, 8, 64]); _unsafe_view_104 = None\0A transpose_70 = torch.ops.aten.transpose(view_191, 1, 2); view_191 = None\0A _param_constant106 = self._param_constant106\0A t_77 = torch.ops.aten.t(_param_constant106); _param_constant106 = None\0A view_192 = torch.ops.aten.view(mul_58, [4, 512])\0A mm_77 = torch.ops.aten.mm(view_192, t_77); view_192 = t_77 = None\0A _unsafe_view_105 = torch.ops.aten._unsafe_view(mm_77, [1, 4, 512]); mm_77 = None\0A view_193 = torch.ops.aten.view(_unsafe_view_105, [1, -1, 8, 64]); _unsafe_view_105 = None\0A transpose_71 = torch.ops.aten.transpose(view_193, 1, 2); view_193 = None\0A _param_constant107 = self._param_constant107\0A t_78 = torch.ops.aten.t(_param_constant107); _param_constant107 = None\0A view_194 = torch.ops.aten.view(mul_58, [4, 512]); mul_58 = None\0A mm_78 = torch.ops.aten.mm(view_194, t_78); view_194 = t_78 = None\0A _unsafe_view_106 = torch.ops.aten._unsafe_view(mm_78, [1, 4, 512]); mm_78 = None\0A view_195 = torch.ops.aten.view(_unsafe_view_106, [1, -1, 8, 64]); _unsafe_view_106 = None\0A transpose_72 = torch.ops.aten.transpose(view_195, 1, 2); view_195 = None\0A transpose_73 = torch.ops.aten.transpose(transpose_71, 3, 2); transpose_71 = None\0A expand_56 = torch.ops.aten.expand(transpose_70, [1, 8, 4, 64]); transpose_70 = None\0A view_196 = torch.ops.aten.view(expand_56, [8, 4, 64]); expand_56 = None\0A expand_57 = torch.ops.aten.expand(transpose_73, [1, 8, 64, 4]); transpose_73 = None\0A view_197 = torch.ops.aten.view(expand_57, [8, 64, 4]); expand_57 = None\0A bmm_28 = torch.ops.aten.bmm(view_196, view_197); view_196 = view_197 = None\0A _unsafe_view_107 = torch.ops.aten._unsafe_view(bmm_28, [1, 8, 4, 4]); bmm_28 = None\0A add__15 = torch.ops.aten.add_(_unsafe_view_107, add_31); _unsafe_view_107 = None\0A amax_14 = torch.ops.aten.amax(add__15, [-1], True)\0A sub_16 = torch.ops.aten.sub(add__15, amax_14); add__15 = amax_14 = None\0A exp_14 = torch.ops.aten.exp(sub_16); sub_16 = None\0A sum_15 = torch.ops.aten.sum(exp_14, [-1], True)\0A div_18 = torch.ops.aten.div(exp_14, sum_15); exp_14 = sum_15 = None\0A detach_50 = torch.ops.aten.detach(div_18)\0A expand_58 = torch.ops.aten.expand(div_18, [1, 8, 4, 4]); div_18 = None\0A view_198 = torch.ops.aten.view(expand_58, [8, 4, 4]); expand_58 = None\0A expand_59 = torch.ops.aten.expand(transpose_72, [1, 8, 4, 64]); transpose_72 = None\0A view_199 = torch.ops.aten.view(expand_59, [8, 4, 64]); expand_59 = None\0A bmm_29 = torch.ops.aten.bmm(view_198, view_199); view_198 = view_199 = None\0A _unsafe_view_108 = torch.ops.aten._unsafe_view(bmm_29, [1, 8, 4, 64]); bmm_29 = None\0A transpose_74 = torch.ops.aten.transpose(_unsafe_view_108, 1, 2); _unsafe_view_108 = None\0A clone_15 = torch.ops.aten.clone(transpose_74, memory_format = torch.contiguous_format); transpose_74 = None\0A view_200 = torch.ops.aten.view(clone_15, [1, -1, 512]); clone_15 = None\0A _param_constant108 = self._param_constant108\0A t_79 = torch.ops.aten.t(_param_constant108); _param_constant108 = None\0A view_201 = torch.ops.aten.view(view_200, [4, 512]); view_200 = None\0A mm_79 = torch.ops.aten.mm(view_201, t_79); view_201 = t_79 = None\0A _unsafe_view_109 = torch.ops.aten._unsafe_view(mm_79, [1, 4, 512]); mm_79 = None\0A add_57 = torch.ops.aten.add(add_55, _unsafe_view_109); add_55 = _unsafe_view_109 = None\0A pow_27 = torch.ops.aten.pow(add_57, 2)\0A mean_26 = torch.ops.aten.mean(pow_27, [-1], True); pow_27 = None\0A add_58 = torch.ops.aten.add(mean_26, 1e-06); mean_26 = None\0A rsqrt_26 = torch.ops.aten.rsqrt(add_58); add_58 = None\0A detach_51 = torch.ops.aten.detach(rsqrt_26)\0A mul_59 = torch.ops.aten.mul(add_57, rsqrt_26); rsqrt_26 = None\0A _param_constant109 = self._param_constant109\0A mul_60 = torch.ops.aten.mul(_param_constant109, mul_59); _param_constant109 = mul_59 = None\0A _param_constant110 = self._param_constant110\0A t_80 = torch.ops.aten.t(_param_constant110); _param_constant110 = None\0A view_202 = torch.ops.aten.view(mul_60, [4, 512]); mul_60 = None\0A mm_80 = torch.ops.aten.mm(view_202, t_80); view_202 = t_80 = None\0A _unsafe_view_110 = torch.ops.aten._unsafe_view(mm_80, [1, 4, 512]); mm_80 = None\0A view_203 = torch.ops.aten.view(_unsafe_view_110, [1, -1, 8, 64]); _unsafe_view_110 = None\0A transpose_75 = torch.ops.aten.transpose(view_203, 1, 2); view_203 = None\0A _param_constant111 = self._param_constant111\0A t_81 = torch.ops.aten.t(_param_constant111); _param_constant111 = None\0A view_204 = torch.ops.aten.view(mul_28, [15, 512])\0A mm_81 = torch.ops.aten.mm(view_204, t_81); view_204 = t_81 = None\0A _unsafe_view_111 = torch.ops.aten._unsafe_view(mm_81, [1, 15, 512]); mm_81 = None\0A view_205 = torch.ops.aten.view(_unsafe_view_111, [1, -1, 8, 64]); _unsafe_view_111 = None\0A transpose_76 = torch.ops.aten.transpose(view_205, 1, 2); view_205 = None\0A _param_constant112 = self._param_constant112\0A t_82 = torch.ops.aten.t(_param_constant112); _param_constant112 = None\0A view_206 = torch.ops.aten.view(mul_28, [15, 512])\0A mm_82 = torch.ops.aten.mm(view_206, t_82); view_206 = t_82 = None\0A _unsafe_view_112 = torch.ops.aten._unsafe_view(mm_82, [1, 15, 512]); mm_82 = None\0A view_207 = torch.ops.aten.view(_unsafe_view_112, [1, -1, 8, 64]); _unsafe_view_112 = None\0A transpose_77 = torch.ops.aten.transpose(view_207, 1, 2); view_207 = None\0A transpose_78 = torch.ops.aten.transpose(transpose_76, 3, 2); transpose_76 = None\0A expand_60 = torch.ops.aten.expand(transpose_75, [1, 8, 4, 64]); transpose_75 = None\0A view_208 = torch.ops.aten.view(expand_60, [8, 4, 64]); expand_60 = None\0A expand_61 = torch.ops.aten.expand(transpose_78, [1, 8, 64, 15]); transpose_78 = None\0A view_209 = torch.ops.aten.view(expand_61, [8, 64, 15]); expand_61 = None\0A bmm_30 = torch.ops.aten.bmm(view_208, view_209); view_208 = view_209 = None\0A _unsafe_view_113 = torch.ops.aten._unsafe_view(bmm_30, [1, 8, 4, 15]); bmm_30 = None\0A add__16 = torch.ops.aten.add_(_unsafe_view_113, add_34); _unsafe_view_113 = None\0A amax_15 = torch.ops.aten.amax(add__16, [-1], True)\0A sub_17 = torch.ops.aten.sub(add__16, amax_15); add__16 = amax_15 = None\0A exp_15 = torch.ops.aten.exp(sub_17); sub_17 = None\0A sum_16 = torch.ops.aten.sum(exp_15, [-1], True)\0A div_19 = torch.ops.aten.div(exp_15, sum_16); exp_15 = sum_16 = None\0A detach_52 = torch.ops.aten.detach(div_19)\0A expand_62 = torch.ops.aten.expand(div_19, [1, 8, 4, 15]); div_19 = None\0A view_210 = torch.ops.aten.view(expand_62, [8, 4, 15]); expand_62 = None\0A expand_63 = torch.ops.aten.expand(transpose_77, [1, 8, 15, 64]); transpose_77 = None\0A view_211 = torch.ops.aten.view(expand_63, [8, 15, 64]); expand_63 = None\0A bmm_31 = torch.ops.aten.bmm(view_210, view_211); view_210 = view_211 = None\0A _unsafe_view_114 = torch.ops.aten._unsafe_view(bmm_31, [1, 8, 4, 64]); bmm_31 = None\0A transpose_79 = torch.ops.aten.transpose(_unsafe_view_114, 1, 2); _unsafe_view_114 = None\0A clone_16 = torch.ops.aten.clone(transpose_79, memory_format = torch.contiguous_format); transpose_79 = None\0A view_212 = torch.ops.aten.view(clone_16, [1, -1, 512]); clone_16 = None\0A _param_constant113 = self._param_constant113\0A t_83 = torch.ops.aten.t(_param_constant113); _param_constant113 = None\0A view_213 = torch.ops.aten.view(view_212, [4, 512]); view_212 = None\0A mm_83 = torch.ops.aten.mm(view_213, t_83); view_213 = t_83 = None\0A _unsafe_view_115 = torch.ops.aten._unsafe_view(mm_83, [1, 4, 512]); mm_83 = None\0A add_59 = torch.ops.aten.add(add_57, _unsafe_view_115); add_57 = _unsafe_view_115 = None\0A pow_28 = torch.ops.aten.pow(add_59, 2)\0A mean_27 = torch.ops.aten.mean(pow_28, [-1], True); pow_28 = None\0A add_60 = torch.ops.aten.add(mean_27, 1e-06); mean_27 = None\0A rsqrt_27 = torch.ops.aten.rsqrt(add_60); add_60 = None\0A detach_53 = torch.ops.aten.detach(rsqrt_27)\0A mul_61 = torch.ops.aten.mul(add_59, rsqrt_27); rsqrt_27 = None\0A _param_constant114 = self._param_constant114\0A mul_62 = torch.ops.aten.mul(_param_constant114, mul_61); _param_constant114 = mul_61 = None\0A _param_constant115 = self._param_constant115\0A t_84 = torch.ops.aten.t(_param_constant115); _param_constant115 = None\0A view_214 = torch.ops.aten.view(mul_62, [4, 512]); mul_62 = None\0A mm_84 = torch.ops.aten.mm(view_214, t_84); view_214 = t_84 = None\0A _unsafe_view_116 = torch.ops.aten._unsafe_view(mm_84, [1, 4, 2048]); mm_84 = None\0A relu_10 = torch.ops.aten.relu(_unsafe_view_116); _unsafe_view_116 = None\0A detach_54 = torch.ops.aten.detach(relu_10)\0A _param_constant116 = self._param_constant116\0A t_85 = torch.ops.aten.t(_param_constant116); _param_constant116 = None\0A view_215 = torch.ops.aten.view(relu_10, [4, 2048]); relu_10 = None\0A mm_85 = torch.ops.aten.mm(view_215, t_85); view_215 = t_85 = None\0A _unsafe_view_117 = torch.ops.aten._unsafe_view(mm_85, [1, 4, 512]); mm_85 = None\0A add_61 = torch.ops.aten.add(add_59, _unsafe_view_117); add_59 = _unsafe_view_117 = None\0A pow_29 = torch.ops.aten.pow(add_61, 2)\0A mean_28 = torch.ops.aten.mean(pow_29, [-1], True); pow_29 = None\0A add_62 = torch.ops.aten.add(mean_28, 1e-06); mean_28 = None\0A rsqrt_28 = torch.ops.aten.rsqrt(add_62); add_62 = None\0A detach_55 = torch.ops.aten.detach(rsqrt_28)\0A mul_63 = torch.ops.aten.mul(add_61, rsqrt_28); rsqrt_28 = None\0A _param_constant117 = self._param_constant117\0A mul_64 = torch.ops.aten.mul(_param_constant117, mul_63); _param_constant117 = mul_63 = None\0A _param_constant118 = self._param_constant118\0A t_86 = torch.ops.aten.t(_param_constant118); _param_constant118 = None\0A view_216 = torch.ops.aten.view(mul_64, [4, 512])\0A mm_86 = torch.ops.aten.mm(view_216, t_86); view_216 = t_86 = None\0A _unsafe_view_118 = torch.ops.aten._unsafe_view(mm_86, [1, 4, 512]); mm_86 = None\0A view_217 = torch.ops.aten.view(_unsafe_view_118, [1, -1, 8, 64]); _unsafe_view_118 = None\0A transpose_80 = torch.ops.aten.transpose(view_217, 1, 2); view_217 = None\0A _param_constant119 = self._param_constant119\0A t_87 = torch.ops.aten.t(_param_constant119); _param_constant119 = None\0A view_218 = torch.ops.aten.view(mul_64, [4, 512])\0A mm_87 = torch.ops.aten.mm(view_218, t_87); view_218 = t_87 = None\0A _unsafe_view_119 = torch.ops.aten._unsafe_view(mm_87, [1, 4, 512]); mm_87 = None\0A view_219 = torch.ops.aten.view(_unsafe_view_119, [1, -1, 8, 64]); _unsafe_view_119 = None\0A transpose_81 = torch.ops.aten.transpose(view_219, 1, 2); view_219 = None\0A _param_constant120 = self._param_constant120\0A t_88 = torch.ops.aten.t(_param_constant120); _param_constant120 = None\0A view_220 = torch.ops.aten.view(mul_64, [4, 512]); mul_64 = None\0A mm_88 = torch.ops.aten.mm(view_220, t_88); view_220 = t_88 = None\0A _unsafe_view_120 = torch.ops.aten._unsafe_view(mm_88, [1, 4, 512]); mm_88 = None\0A view_221 = torch.ops.aten.view(_unsafe_view_120, [1, -1, 8, 64]); _unsafe_view_120 = None\0A transpose_82 = torch.ops.aten.transpose(view_221, 1, 2); view_221 = None\0A transpose_83 = torch.ops.aten.transpose(transpose_81, 3, 2); transpose_81 = None\0A expand_64 = torch.ops.aten.expand(transpose_80, [1, 8, 4, 64]); transpose_80 = None\0A view_222 = torch.ops.aten.view(expand_64, [8, 4, 64]); expand_64 = None\0A expand_65 = torch.ops.aten.expand(transpose_83, [1, 8, 64, 4]); transpose_83 = None\0A view_223 = torch.ops.aten.view(expand_65, [8, 64, 4]); expand_65 = None\0A bmm_32 = torch.ops.aten.bmm(view_222, view_223); view_222 = view_223 = None\0A _unsafe_view_121 = torch.ops.aten._unsafe_view(bmm_32, [1, 8, 4, 4]); bmm_32 = None\0A add__17 = torch.ops.aten.add_(_unsafe_view_121, add_31); _unsafe_view_121 = add_31 = None\0A amax_16 = torch.ops.aten.amax(add__17, [-1], True)\0A sub_18 = torch.ops.aten.sub(add__17, amax_16); add__17 = amax_16 = None\0A exp_16 = torch.ops.aten.exp(sub_18); sub_18 = None\0A sum_17 = torch.ops.aten.sum(exp_16, [-1], True)\0A div_20 = torch.ops.aten.div(exp_16, sum_17); exp_16 = sum_17 = None\0A detach_56 = torch.ops.aten.detach(div_20)\0A expand_66 = torch.ops.aten.expand(div_20, [1, 8, 4, 4]); div_20 = None\0A view_224 = torch.ops.aten.view(expand_66, [8, 4, 4]); expand_66 = None\0A expand_67 = torch.ops.aten.expand(transpose_82, [1, 8, 4, 64]); transpose_82 = None\0A view_225 = torch.ops.aten.view(expand_67, [8, 4, 64]); expand_67 = None\0A bmm_33 = torch.ops.aten.bmm(view_224, view_225); view_224 = view_225 = None\0A _unsafe_view_122 = torch.ops.aten._unsafe_view(bmm_33, [1, 8, 4, 64]); bmm_33 = None\0A transpose_84 = torch.ops.aten.transpose(_unsafe_view_122, 1, 2); _unsafe_view_122 = None\0A clone_17 = torch.ops.aten.clone(transpose_84, memory_format = torch.contiguous_format); transpose_84 = None\0A view_226 = torch.ops.aten.view(clone_17, [1, -1, 512]); clone_17 = None\0A _param_constant121 = self._param_constant121\0A t_89 = torch.ops.aten.t(_param_constant121); _param_constant121 = None\0A view_227 = torch.ops.aten.view(view_226, [4, 512]); view_226 = None\0A mm_89 = torch.ops.aten.mm(view_227, t_89); view_227 = t_89 = None\0A _unsafe_view_123 = torch.ops.aten._unsafe_view(mm_89, [1, 4, 512]); mm_89 = None\0A add_63 = torch.ops.aten.add(add_61, _unsafe_view_123); add_61 = _unsafe_view_123 = None\0A pow_30 = torch.ops.aten.pow(add_63, 2)\0A mean_29 = torch.ops.aten.mean(pow_30, [-1], True); pow_30 = None\0A add_64 = torch.ops.aten.add(mean_29, 1e-06); mean_29 = None\0A rsqrt_29 = torch.ops.aten.rsqrt(add_64); add_64 = None\0A detach_57 = torch.ops.aten.detach(rsqrt_29)\0A mul_65 = torch.ops.aten.mul(add_63, rsqrt_29); rsqrt_29 = None\0A _param_constant122 = self._param_constant122\0A mul_66 = torch.ops.aten.mul(_param_constant122, mul_65); _param_constant122 = mul_65 = None\0A _param_constant123 = self._param_constant123\0A t_90 = torch.ops.aten.t(_param_constant123); _param_constant123 = None\0A view_228 = torch.ops.aten.view(mul_66, [4, 512]); mul_66 = None\0A mm_90 = torch.ops.aten.mm(view_228, t_90); view_228 = t_90 = None\0A _unsafe_view_124 = torch.ops.aten._unsafe_view(mm_90, [1, 4, 512]); mm_90 = None\0A view_229 = torch.ops.aten.view(_unsafe_view_124, [1, -1, 8, 64]); _unsafe_view_124 = None\0A transpose_85 = torch.ops.aten.transpose(view_229, 1, 2); view_229 = None\0A _param_constant124 = self._param_constant124\0A t_91 = torch.ops.aten.t(_param_constant124); _param_constant124 = None\0A view_230 = torch.ops.aten.view(mul_28, [15, 512])\0A mm_91 = torch.ops.aten.mm(view_230, t_91); view_230 = t_91 = None\0A _unsafe_view_125 = torch.ops.aten._unsafe_view(mm_91, [1, 15, 512]); mm_91 = None\0A view_231 = torch.ops.aten.view(_unsafe_view_125, [1, -1, 8, 64]); _unsafe_view_125 = None\0A transpose_86 = torch.ops.aten.transpose(view_231, 1, 2); view_231 = None\0A _param_constant125 = self._param_constant125\0A t_92 = torch.ops.aten.t(_param_constant125); _param_constant125 = None\0A view_232 = torch.ops.aten.view(mul_28, [15, 512]); mul_28 = None\0A mm_92 = torch.ops.aten.mm(view_232, t_92); view_232 = t_92 = None\0A _unsafe_view_126 = torch.ops.aten._unsafe_view(mm_92, [1, 15, 512]); mm_92 = None\0A view_233 = torch.ops.aten.view(_unsafe_view_126, [1, -1, 8, 64]); _unsafe_view_126 = None\0A transpose_87 = torch.ops.aten.transpose(view_233, 1, 2); view_233 = None\0A transpose_88 = torch.ops.aten.transpose(transpose_86, 3, 2); transpose_86 = None\0A expand_68 = torch.ops.aten.expand(transpose_85, [1, 8, 4, 64]); transpose_85 = None\0A view_234 = torch.ops.aten.view(expand_68, [8, 4, 64]); expand_68 = None\0A expand_69 = torch.ops.aten.expand(transpose_88, [1, 8, 64, 15]); transpose_88 = None\0A view_235 = torch.ops.aten.view(expand_69, [8, 64, 15]); expand_69 = None\0A bmm_34 = torch.ops.aten.bmm(view_234, view_235); view_234 = view_235 = None\0A _unsafe_view_127 = torch.ops.aten._unsafe_view(bmm_34, [1, 8, 4, 15]); bmm_34 = None\0A add__18 = torch.ops.aten.add_(_unsafe_view_127, add_34); _unsafe_view_127 = add_34 = None\0A amax_17 = torch.ops.aten.amax(add__18, [-1], True)\0A sub_19 = torch.ops.aten.sub(add__18, amax_17); add__18 = amax_17 = None\0A exp_17 = torch.ops.aten.exp(sub_19); sub_19 = None\0A sum_18 = torch.ops.aten.sum(exp_17, [-1], True)\0A div_21 = torch.ops.aten.div(exp_17, sum_18); exp_17 = sum_18 = None\0A detach_58 = torch.ops.aten.detach(div_21)\0A expand_70 = torch.ops.aten.expand(div_21, [1, 8, 4, 15]); div_21 = None\0A view_236 = torch.ops.aten.view(expand_70, [8, 4, 15]); expand_70 = None\0A expand_71 = torch.ops.aten.expand(transpose_87, [1, 8, 15, 64]); transpose_87 = None\0A view_237 = torch.ops.aten.view(expand_71, [8, 15, 64]); expand_71 = None\0A bmm_35 = torch.ops.aten.bmm(view_236, view_237); view_236 = view_237 = None\0A _unsafe_view_128 = torch.ops.aten._unsafe_view(bmm_35, [1, 8, 4, 64]); bmm_35 = None\0A transpose_89 = torch.ops.aten.transpose(_unsafe_view_128, 1, 2); _unsafe_view_128 = None\0A clone_18 = torch.ops.aten.clone(transpose_89, memory_format = torch.contiguous_format); transpose_89 = None\0A view_238 = torch.ops.aten.view(clone_18, [1, -1, 512]); clone_18 = None\0A _param_constant126 = self._param_constant126\0A t_93 = torch.ops.aten.t(_param_constant126); _param_constant126 = None\0A view_239 = torch.ops.aten.view(view_238, [4, 512]); view_238 = None\0A mm_93 = torch.ops.aten.mm(view_239, t_93); view_239 = t_93 = None\0A _unsafe_view_129 = torch.ops.aten._unsafe_view(mm_93, [1, 4, 512]); mm_93 = None\0A add_65 = torch.ops.aten.add(add_63, _unsafe_view_129); add_63 = _unsafe_view_129 = None\0A pow_31 = torch.ops.aten.pow(add_65, 2)\0A mean_30 = torch.ops.aten.mean(pow_31, [-1], True); pow_31 = None\0A add_66 = torch.ops.aten.add(mean_30, 1e-06); mean_30 = None\0A rsqrt_30 = torch.ops.aten.rsqrt(add_66); add_66 = None\0A detach_59 = torch.ops.aten.detach(rsqrt_30)\0A mul_67 = torch.ops.aten.mul(add_65, rsqrt_30); rsqrt_30 = None\0A _param_constant127 = self._param_constant127\0A mul_68 = torch.ops.aten.mul(_param_constant127, mul_67); _param_constant127 = mul_67 = None\0A _param_constant128 = self._param_constant128\0A t_94 = torch.ops.aten.t(_param_constant128); _param_constant128 = None\0A view_240 = torch.ops.aten.view(mul_68, [4, 512]); mul_68 = None\0A mm_94 = torch.ops.aten.mm(view_240, t_94); view_240 = t_94 = None\0A _unsafe_view_130 = torch.ops.aten._unsafe_view(mm_94, [1, 4, 2048]); mm_94 = None\0A relu_11 = torch.ops.aten.relu(_unsafe_view_130); _unsafe_view_130 = None\0A detach_60 = torch.ops.aten.detach(relu_11)\0A _param_constant129 = self._param_constant129\0A t_95 = torch.ops.aten.t(_param_constant129); _param_constant129 = None\0A view_241 = torch.ops.aten.view(relu_11, [4, 2048]); relu_11 = None\0A mm_95 = torch.ops.aten.mm(view_241, t_95); view_241 = t_95 = None\0A _unsafe_view_131 = torch.ops.aten._unsafe_view(mm_95, [1, 4, 512]); mm_95 = None\0A add_67 = torch.ops.aten.add(add_65, _unsafe_view_131); add_65 = _unsafe_view_131 = None\0A pow_32 = torch.ops.aten.pow(add_67, 2)\0A mean_31 = torch.ops.aten.mean(pow_32, [-1], True); pow_32 = None\0A add_68 = torch.ops.aten.add(mean_31, 1e-06); mean_31 = None\0A rsqrt_31 = torch.ops.aten.rsqrt(add_68); add_68 = None\0A detach_61 = torch.ops.aten.detach(rsqrt_31)\0A mul_69 = torch.ops.aten.mul(add_67, rsqrt_31); add_67 = rsqrt_31 = None\0A _param_constant130 = self._param_constant130\0A mul_70 = torch.ops.aten.mul(_param_constant130, mul_69); _param_constant130 = mul_69 = None\0A mul_71 = torch.ops.aten.mul(mul_70, 0.04419417382415922); mul_70 = None\0A _param_constant0_2 = self._param_constant0\0A t_96 = torch.ops.aten.t(_param_constant0_2); _param_constant0_2 = None\0A view_242 = torch.ops.aten.view(mul_71, [4, 512]); mul_71 = None\0A mm_96 = torch.ops.aten.mm(view_242, t_96); view_242 = t_96 = None\0A _unsafe_view_132 = torch.ops.aten._unsafe_view(mm_96, [1, 4, 32128]); mm_96 = None\0A return _unsafe_view_132\0A "
%132 = torch.nn_module {
torch.slot "_param_constant0", %0 : !torch.tensor<[32128,512],f32>
torch.slot "_param_constant1", %1 : !torch.tensor<[512],f32>
torch.slot "_param_constant2", %2 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant3", %3 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant4", %4 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant5", %5 : !torch.tensor<[32,8],f32>
torch.slot "_param_constant6", %6 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant7", %7 : !torch.tensor<[512],f32>
torch.slot "_param_constant8", %8 : !torch.tensor<[2048,512],f32>
torch.slot "_param_constant9", %9 : !torch.tensor<[512,2048],f32>
torch.slot "_param_constant10", %10 : !torch.tensor<[512],f32>
torch.slot "_param_constant11", %11 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant12", %12 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant13", %13 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant14", %14 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant15", %15 : !torch.tensor<[512],f32>
torch.slot "_param_constant16", %16 : !torch.tensor<[2048,512],f32>
torch.slot "_param_constant17", %17 : !torch.tensor<[512,2048],f32>
torch.slot "_param_constant18", %18 : !torch.tensor<[512],f32>
torch.slot "_param_constant19", %19 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant20", %20 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant21", %21 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant22", %22 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant23", %23 : !torch.tensor<[512],f32>
torch.slot "_param_constant24", %24 : !torch.tensor<[2048,512],f32>
torch.slot "_param_constant25", %25 : !torch.tensor<[512,2048],f32>
torch.slot "_param_constant26", %26 : !torch.tensor<[512],f32>
torch.slot "_param_constant27", %27 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant28", %28 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant29", %29 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant30", %30 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant31", %31 : !torch.tensor<[512],f32>
torch.slot "_param_constant32", %32 : !torch.tensor<[2048,512],f32>
torch.slot "_param_constant33", %33 : !torch.tensor<[512,2048],f32>
torch.slot "_param_constant34", %34 : !torch.tensor<[512],f32>
torch.slot "_param_constant35", %35 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant36", %36 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant37", %37 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant38", %38 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant39", %39 : !torch.tensor<[512],f32>
torch.slot "_param_constant40", %40 : !torch.tensor<[2048,512],f32>
torch.slot "_param_constant41", %41 : !torch.tensor<[512,2048],f32>
torch.slot "_param_constant42", %42 : !torch.tensor<[512],f32>
torch.slot "_param_constant43", %43 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant44", %44 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant45", %45 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant46", %46 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant47", %47 : !torch.tensor<[512],f32>
torch.slot "_param_constant48", %48 : !torch.tensor<[2048,512],f32>
torch.slot "_param_constant49", %49 : !torch.tensor<[512,2048],f32>
torch.slot "_param_constant50", %50 : !torch.tensor<[512],f32>
torch.slot "_param_constant51", %51 : !torch.tensor<[512],f32>
torch.slot "_param_constant52", %52 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant53", %53 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant54", %54 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant55", %55 : !torch.tensor<[32,8],f32>
torch.slot "_param_constant56", %56 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant57", %57 : !torch.tensor<[512],f32>
torch.slot "_param_constant58", %58 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant59", %59 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant60", %60 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant61", %61 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant62", %62 : !torch.tensor<[512],f32>
torch.slot "_param_constant63", %63 : !torch.tensor<[2048,512],f32>
torch.slot "_param_constant64", %64 : !torch.tensor<[512,2048],f32>
torch.slot "_param_constant65", %65 : !torch.tensor<[512],f32>
torch.slot "_param_constant66", %66 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant67", %67 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant68", %68 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant69", %69 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant70", %70 : !torch.tensor<[512],f32>
torch.slot "_param_constant71", %71 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant72", %72 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant73", %73 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant74", %74 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant75", %75 : !torch.tensor<[512],f32>
torch.slot "_param_constant76", %76 : !torch.tensor<[2048,512],f32>
torch.slot "_param_constant77", %77 : !torch.tensor<[512,2048],f32>
torch.slot "_param_constant78", %78 : !torch.tensor<[512],f32>
torch.slot "_param_constant79", %79 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant80", %80 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant81", %81 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant82", %82 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant83", %83 : !torch.tensor<[512],f32>
torch.slot "_param_constant84", %84 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant85", %85 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant86", %86 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant87", %87 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant88", %88 : !torch.tensor<[512],f32>
torch.slot "_param_constant89", %89 : !torch.tensor<[2048,512],f32>
torch.slot "_param_constant90", %90 : !torch.tensor<[512,2048],f32>
torch.slot "_param_constant91", %91 : !torch.tensor<[512],f32>
torch.slot "_param_constant92", %92 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant93", %93 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant94", %94 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant95", %95 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant96", %96 : !torch.tensor<[512],f32>
torch.slot "_param_constant97", %97 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant98", %98 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant99", %99 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant100", %100 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant101", %101 : !torch.tensor<[512],f32>
torch.slot "_param_constant102", %102 : !torch.tensor<[2048,512],f32>
torch.slot "_param_constant103", %103 : !torch.tensor<[512,2048],f32>
torch.slot "_param_constant104", %104 : !torch.tensor<[512],f32>
torch.slot "_param_constant105", %105 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant106", %106 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant107", %107 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant108", %108 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant109", %109 : !torch.tensor<[512],f32>
torch.slot "_param_constant110", %110 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant111", %111 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant112", %112 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant113", %113 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant114", %114 : !torch.tensor<[512],f32>
torch.slot "_param_constant115", %115 : !torch.tensor<[2048,512],f32>
torch.slot "_param_constant116", %116 : !torch.tensor<[512,2048],f32>
torch.slot "_param_constant117", %117 : !torch.tensor<[512],f32>
torch.slot "_param_constant118", %118 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant119", %119 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant120", %120 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant121", %121 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant122", %122 : !torch.tensor<[512],f32>
torch.slot "_param_constant123", %123 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant124", %124 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant125", %125 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant126", %126 : !torch.tensor<[512,512],f32>
torch.slot "_param_constant127", %127 : !torch.tensor<[512],f32>
torch.slot "_param_constant128", %128 : !torch.tensor<[2048,512],f32>
torch.slot "_param_constant129", %129 : !torch.tensor<[512,2048],f32>
torch.slot "_param_constant130", %130 : !torch.tensor<[512],f32>
torch.slot "_tensor_constant0", %131 : !torch.tensor<[],si64>
torch.slot "training", %true : !torch.bool
torch.slot "_is_full_backward_hook", %none : !torch.none
torch.slot "_code", %str : !torch.str
} : !torch.nn.Module<"__torch__.torch.fx.graph_module._lambda">
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment