Created
May 9, 2022 11:02
-
-
Save pashu123/0ec7166408c8f05c20ea7f37ee8b8f7e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module attributes {torch.debug_module_name = "forward"} { | |
func private @__torch__.torch.fx.graph_module.forward.__code_getter(%arg0: !torch.nn.Module<"__torch__.torch.fx.graph_module.forward">) -> !torch.str { | |
%2 = torch.prim.GetAttr %arg0["_code"] : !torch.nn.Module<"__torch__.torch.fx.graph_module.forward"> -> !torch.str | |
return %2 : !torch.str | |
} | |
func private @__torch__.torch.fx.graph_module.forward.forward(%arg0: !torch.nn.Module<"__torch__.torch.fx.graph_module.forward">, %arg1: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg2: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg3: !torch.tensor {torch.type_bound = !torch.vtensor<[512,384],f32>}, %arg4: !torch.tensor {torch.type_bound = !torch.vtensor<[2,384],f32>}, %arg5: !torch.tensor {torch.type_bound = !torch.vtensor<[30522,384],f32>}, %arg6: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg7: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg8: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg9: !torch.tensor {torch.type_bound = !torch.vtensor<[384,384],f32>}, %arg10: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg11: !torch.tensor {torch.type_bound = !torch.vtensor<[384,384],f32>}, %arg12: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg13: !torch.tensor {torch.type_bound = !torch.vtensor<[384,384],f32>}, %arg14: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg15: !torch.tensor {torch.type_bound = !torch.vtensor<[384,384],f32>}, %arg16: !torch.tensor {torch.type_bound = !torch.vtensor<[1536],f32>}, %arg17: !torch.tensor {torch.type_bound = !torch.vtensor<[1536,384],f32>}, %arg18: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg19: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg20: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg21: !torch.tensor {torch.type_bound = !torch.vtensor<[384,1536],f32>}, %arg22: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg23: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg24: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg25: !torch.tensor {torch.type_bound = !torch.vtensor<[384,384],f32>}, %arg26: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg27: !torch.tensor {torch.type_bound = !torch.vtensor<[384,384],f32>}, %arg28: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg29: !torch.tensor {torch.type_bound = !torch.vtensor<[384,384],f32>}, %arg30: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg31: !torch.tensor {torch.type_bound = !torch.vtensor<[384,384],f32>}, %arg32: !torch.tensor {torch.type_bound = !torch.vtensor<[1536],f32>}, %arg33: !torch.tensor {torch.type_bound = !torch.vtensor<[1536,384],f32>}, %arg34: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg35: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg36: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg37: !torch.tensor {torch.type_bound = !torch.vtensor<[384,1536],f32>}, %arg38: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg39: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg40: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg41: !torch.tensor {torch.type_bound = !torch.vtensor<[384,384],f32>}, %arg42: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg43: !torch.tensor {torch.type_bound = !torch.vtensor<[384,384],f32>}, %arg44: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg45: !torch.tensor {torch.type_bound = !torch.vtensor<[384,384],f32>}, %arg46: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg47: !torch.tensor {torch.type_bound = !torch.vtensor<[384,384],f32>}, %arg48: !torch.tensor {torch.type_bound = !torch.vtensor<[1536],f32>}, %arg49: !torch.tensor {torch.type_bound = !torch.vtensor<[1536,384],f32>}, %arg50: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg51: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg52: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg53: !torch.tensor {torch.type_bound = !torch.vtensor<[384,1536],f32>}, %arg54: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg55: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg56: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg57: !torch.tensor {torch.type_bound = !torch.vtensor<[384,384],f32>}, %arg58: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg59: !torch.tensor {torch.type_bound = !torch.vtensor<[384,384],f32>}, %arg60: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg61: !torch.tensor {torch.type_bound = !torch.vtensor<[384,384],f32>}, %arg62: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg63: !torch.tensor {torch.type_bound = !torch.vtensor<[384,384],f32>}, %arg64: !torch.tensor {torch.type_bound = !torch.vtensor<[1536],f32>}, %arg65: !torch.tensor {torch.type_bound = !torch.vtensor<[1536,384],f32>}, %arg66: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg67: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg68: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg69: !torch.tensor {torch.type_bound = !torch.vtensor<[384,1536],f32>}, %arg70: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg71: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg72: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg73: !torch.tensor {torch.type_bound = !torch.vtensor<[384,384],f32>}, %arg74: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg75: !torch.tensor {torch.type_bound = !torch.vtensor<[384,384],f32>}, %arg76: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg77: !torch.tensor {torch.type_bound = !torch.vtensor<[384,384],f32>}, %arg78: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg79: !torch.tensor {torch.type_bound = !torch.vtensor<[384,384],f32>}, %arg80: !torch.tensor {torch.type_bound = !torch.vtensor<[1536],f32>}, %arg81: !torch.tensor {torch.type_bound = !torch.vtensor<[1536,384],f32>}, %arg82: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg83: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg84: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg85: !torch.tensor {torch.type_bound = !torch.vtensor<[384,1536],f32>}, %arg86: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg87: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg88: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg89: !torch.tensor {torch.type_bound = !torch.vtensor<[384,384],f32>}, %arg90: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg91: !torch.tensor {torch.type_bound = !torch.vtensor<[384,384],f32>}, %arg92: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg93: !torch.tensor {torch.type_bound = !torch.vtensor<[384,384],f32>}, %arg94: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg95: !torch.tensor {torch.type_bound = !torch.vtensor<[384,384],f32>}, %arg96: !torch.tensor {torch.type_bound = !torch.vtensor<[1536],f32>}, %arg97: !torch.tensor {torch.type_bound = !torch.vtensor<[1536,384],f32>}, %arg98: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg99: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg100: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg101: !torch.tensor {torch.type_bound = !torch.vtensor<[384,1536],f32>}, %arg102: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg103: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg104: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg105: !torch.tensor {torch.type_bound = !torch.vtensor<[384,384],f32>}, %arg106: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg107: !torch.tensor {torch.type_bound = !torch.vtensor<[384,384],f32>}, %arg108: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg109: !torch.tensor {torch.type_bound = !torch.vtensor<[384,384],f32>}, %arg110: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg111: !torch.tensor {torch.type_bound = !torch.vtensor<[384,384],f32>}, %arg112: !torch.tensor {torch.type_bound = !torch.vtensor<[1536],f32>}, %arg113: !torch.tensor {torch.type_bound = !torch.vtensor<[1536,384],f32>}, %arg114: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg115: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg116: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg117: !torch.tensor {torch.type_bound = !torch.vtensor<[384,1536],f32>}, %arg118: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg119: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg120: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg121: !torch.tensor {torch.type_bound = !torch.vtensor<[384,384],f32>}, %arg122: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg123: !torch.tensor {torch.type_bound = !torch.vtensor<[384,384],f32>}, %arg124: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg125: !torch.tensor {torch.type_bound = !torch.vtensor<[384,384],f32>}, %arg126: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg127: !torch.tensor {torch.type_bound = !torch.vtensor<[384,384],f32>}, %arg128: !torch.tensor {torch.type_bound = !torch.vtensor<[1536],f32>}, %arg129: !torch.tensor {torch.type_bound = !torch.vtensor<[1536,384],f32>}, %arg130: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg131: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg132: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg133: !torch.tensor {torch.type_bound = !torch.vtensor<[384,1536],f32>}, %arg134: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg135: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg136: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg137: !torch.tensor {torch.type_bound = !torch.vtensor<[384,384],f32>}, %arg138: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg139: !torch.tensor {torch.type_bound = !torch.vtensor<[384,384],f32>}, %arg140: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg141: !torch.tensor {torch.type_bound = !torch.vtensor<[384,384],f32>}, %arg142: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg143: !torch.tensor {torch.type_bound = !torch.vtensor<[384,384],f32>}, %arg144: !torch.tensor {torch.type_bound = !torch.vtensor<[1536],f32>}, %arg145: !torch.tensor {torch.type_bound = !torch.vtensor<[1536,384],f32>}, %arg146: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg147: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg148: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg149: !torch.tensor {torch.type_bound = !torch.vtensor<[384,1536],f32>}, %arg150: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg151: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg152: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg153: !torch.tensor {torch.type_bound = !torch.vtensor<[384,384],f32>}, %arg154: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg155: !torch.tensor {torch.type_bound = !torch.vtensor<[384,384],f32>}, %arg156: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg157: !torch.tensor {torch.type_bound = !torch.vtensor<[384,384],f32>}, %arg158: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg159: !torch.tensor {torch.type_bound = !torch.vtensor<[384,384],f32>}, %arg160: !torch.tensor {torch.type_bound = !torch.vtensor<[1536],f32>}, %arg161: !torch.tensor {torch.type_bound = !torch.vtensor<[1536,384],f32>}, %arg162: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg163: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg164: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg165: !torch.tensor {torch.type_bound = !torch.vtensor<[384,1536],f32>}, %arg166: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg167: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg168: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg169: !torch.tensor {torch.type_bound = !torch.vtensor<[384,384],f32>}, %arg170: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg171: !torch.tensor {torch.type_bound = !torch.vtensor<[384,384],f32>}, %arg172: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg173: !torch.tensor {torch.type_bound = !torch.vtensor<[384,384],f32>}, %arg174: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg175: !torch.tensor {torch.type_bound = !torch.vtensor<[384,384],f32>}, %arg176: !torch.tensor {torch.type_bound = !torch.vtensor<[1536],f32>}, %arg177: !torch.tensor {torch.type_bound = !torch.vtensor<[1536,384],f32>}, %arg178: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg179: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg180: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg181: !torch.tensor {torch.type_bound = !torch.vtensor<[384,1536],f32>}, %arg182: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg183: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg184: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg185: !torch.tensor {torch.type_bound = !torch.vtensor<[384,384],f32>}, %arg186: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg187: !torch.tensor {torch.type_bound = !torch.vtensor<[384,384],f32>}, %arg188: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg189: !torch.tensor {torch.type_bound = !torch.vtensor<[384,384],f32>}, %arg190: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg191: !torch.tensor {torch.type_bound = !torch.vtensor<[384,384],f32>}, %arg192: !torch.tensor {torch.type_bound = !torch.vtensor<[1536],f32>}, %arg193: !torch.tensor {torch.type_bound = !torch.vtensor<[1536,384],f32>}, %arg194: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg195: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg196: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg197: !torch.tensor {torch.type_bound = !torch.vtensor<[384,1536],f32>}, %arg198: !torch.tensor {torch.type_bound = !torch.vtensor<[384],f32>}, %arg199: !torch.tensor {torch.type_bound = !torch.vtensor<[384,384],f32>}, %arg200: !torch.tensor {torch.type_bound = !torch.vtensor<[2],f32>}, %arg201: !torch.tensor {torch.type_bound = !torch.vtensor<[2,384],f32>}, %arg202: !torch.tensor {torch.type_bound = !torch.vtensor<[1,512],si64>}, %arg203: !torch.tensor {torch.type_bound = !torch.vtensor<[1,512],si64>}, %arg204: !torch.tensor {torch.type_bound = !torch.vtensor<[1,128],si64>}) -> !torch.tuple<tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor> { | |
%true_0 = torch.constant.bool true | |
%cpu = torch.constant.device "cpu" | |
%none_1 = torch.constant.none | |
%str_2 = torch.constant.str "none" | |
%false = torch.constant.bool false | |
%int0 = torch.constant.int 0 | |
%int9223372036854775807 = torch.constant.int 9223372036854775807 | |
%int1 = torch.constant.int 1 | |
%int128 = torch.constant.int 128 | |
%int384 = torch.constant.int 384 | |
%float9.999990e-13 = torch.constant.float 9.9999999999999998E-13 | |
%int12 = torch.constant.int 12 | |
%int32 = torch.constant.int 32 | |
%int2 = torch.constant.int 2 | |
%int3 = torch.constant.int 3 | |
%int-1 = torch.constant.int -1 | |
%int-2 = torch.constant.int -2 | |
%float5.656850e00 = torch.constant.float 5.6568542494923806 | |
%int16384 = torch.constant.int 16384 | |
%int1536 = torch.constant.int 1536 | |
%int6 = torch.constant.int 6 | |
%int196608 = torch.constant.int 196608 | |
%int49152 = torch.constant.int 49152 | |
%int4096 = torch.constant.int 4096 | |
%int512 = torch.constant.int 512 | |
%int30522 = torch.constant.int 30522 | |
%float-1.000000e-02 = torch.constant.float -1.000000e-02 | |
%2 = torch.aten.slice.Tensor %arg203, %int0, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor | |
%3 = torch.aten.slice.Tensor %2, %int1, %int0, %int128, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor | |
%4 = torch.prim.ListConstruct %int1, %int128 : (!torch.int, !torch.int) -> !torch.list<int> | |
%5 = torch.aten.expand %3, %4, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor | |
%6 = torch.aten.slice.Tensor %arg202, %int0, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor | |
%7 = torch.aten.slice.Tensor %6, %int1, %int0, %int128, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor | |
%8 = torch.aten.embedding %arg5, %arg204, %int0, %false, %false : !torch.tensor, !torch.tensor, !torch.int, !torch.bool, !torch.bool -> !torch.tensor | |
%9 = torch.aten.embedding %arg4, %5, %int-1, %false, %false : !torch.tensor, !torch.tensor, !torch.int, !torch.bool, !torch.bool -> !torch.tensor | |
%10 = torch.aten.add.Tensor %8, %9, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%11 = torch.aten.embedding %arg3, %7, %int-1, %false, %false : !torch.tensor, !torch.tensor, !torch.int, !torch.bool, !torch.bool -> !torch.tensor | |
%12 = torch.aten.add_.Tensor %10, %11, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%13 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%result0, %result1, %result2 = torch.aten.native_layer_norm %12, %13, %arg2, %arg1, %float9.999990e-13 : !torch.tensor, !torch.list<int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor | |
%14 = torch.aten.t %arg13 : !torch.tensor -> !torch.tensor | |
%15 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%16 = torch.aten.view %result0, %15 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%17 = torch.aten.addmm %arg12, %16, %14, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%18 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%19 = torch.aten.view %17, %18 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%20 = torch.aten.t %arg11 : !torch.tensor -> !torch.tensor | |
%21 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%22 = torch.aten.view %result0, %21 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%23 = torch.aten.addmm %arg10, %22, %20, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%24 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%25 = torch.aten.view %23, %24 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%26 = torch.prim.ListConstruct %int1, %int128, %int12, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%27 = torch.aten.view %25, %26 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%28 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%29 = torch.aten.permute %27, %28 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%30 = torch.aten.t %arg15 : !torch.tensor -> !torch.tensor | |
%31 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%32 = torch.aten.view %result0, %31 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%33 = torch.aten.addmm %arg14, %32, %30, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%34 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%35 = torch.aten.view %33, %34 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%36 = torch.prim.ListConstruct %int1, %int128, %int12, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%37 = torch.aten.view %35, %36 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%38 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%39 = torch.aten.permute %37, %38 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%40 = torch.prim.ListConstruct %int1, %int128, %int12, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%41 = torch.aten.view %19, %40 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%42 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%43 = torch.aten.permute %41, %42 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%44 = torch.aten.transpose.int %29, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%45 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%46 = torch.aten.expand %43, %45, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor | |
%47 = torch.prim.ListConstruct %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%48 = torch.prim.ListConstruct %int32, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%49 = torch.aten._reshape_alias %46, %47, %48 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%50 = torch.prim.ListConstruct %int1, %int12, %int32, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%51 = torch.aten.expand %44, %50, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor | |
%52 = torch.prim.ListConstruct %int12, %int32, %int128 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%53 = torch.prim.ListConstruct %int32, %int1, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%54 = torch.aten._reshape_alias %51, %52, %53 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%55 = torch.aten.bmm %49, %54 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%56 = torch.prim.ListConstruct %int1, %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%57 = torch.aten._unsafe_view %55, %56 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%58 = torch.aten.div.Scalar %57, %float5.656850e00 : !torch.tensor, !torch.float -> !torch.tensor | |
%59 = torch.prim.GetAttr %arg0["_tensor_constant0"] : !torch.nn.Module<"__torch__.torch.fx.graph_module.forward"> -> !torch.tensor | |
%60 = torch.aten.add.Tensor %58, %59, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%61 = torch.aten._softmax %60, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor | |
%62 = torch.aten.detach %61 : !torch.tensor -> !torch.tensor | |
%63 = torch.prim.ListConstruct %int1, %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%64 = torch.aten.expand %61, %63, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor | |
%65 = torch.prim.ListConstruct %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%66 = torch.prim.ListConstruct %int16384, %int128, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%67 = torch.aten._reshape_alias %64, %65, %66 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%68 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%69 = torch.aten.expand %39, %68, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor | |
%70 = torch.prim.ListConstruct %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%71 = torch.prim.ListConstruct %int32, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%72 = torch.aten._reshape_alias %69, %70, %71 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%73 = torch.aten.bmm %67, %72 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%74 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%75 = torch.aten._unsafe_view %73, %74 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%76 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%77 = torch.aten.permute %75, %76 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%78 = torch.aten.clone %77, %int0 : !torch.tensor, !torch.int -> !torch.tensor | |
%79 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%80 = torch.aten.view %78, %79 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%81 = torch.aten.t %arg9 : !torch.tensor -> !torch.tensor | |
%82 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%83 = torch.aten.view %80, %82 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%84 = torch.aten.addmm %arg8, %83, %81, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%85 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%86 = torch.aten.view %84, %85 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%87 = torch.aten.add.Tensor %86, %result0, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%88 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%result0_3, %result1_4, %result2_5 = torch.aten.native_layer_norm %87, %88, %arg7, %arg6, %float9.999990e-13 : !torch.tensor, !torch.list<int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor | |
%89 = torch.aten.t %arg17 : !torch.tensor -> !torch.tensor | |
%90 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%91 = torch.aten.view %result0_3, %90 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%92 = torch.aten.addmm %arg16, %91, %89, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%93 = torch.prim.ListConstruct %int1, %int128, %int1536 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%94 = torch.aten.view %92, %93 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%95 = torch.aten.gelu %94, %str_2 : !torch.tensor, !torch.str -> !torch.tensor | |
%96 = torch.aten.t %arg21 : !torch.tensor -> !torch.tensor | |
%97 = torch.prim.ListConstruct %int128, %int1536 : (!torch.int, !torch.int) -> !torch.list<int> | |
%98 = torch.aten.view %95, %97 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%99 = torch.aten.addmm %arg20, %98, %96, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%100 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%101 = torch.aten.view %99, %100 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%102 = torch.aten.add.Tensor %101, %result0_3, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%103 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%result0_6, %result1_7, %result2_8 = torch.aten.native_layer_norm %102, %103, %arg19, %arg18, %float9.999990e-13 : !torch.tensor, !torch.list<int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor | |
%104 = torch.aten.t %arg29 : !torch.tensor -> !torch.tensor | |
%105 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%106 = torch.aten.view %result0_6, %105 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%107 = torch.aten.addmm %arg28, %106, %104, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%108 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%109 = torch.aten.view %107, %108 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%110 = torch.aten.t %arg27 : !torch.tensor -> !torch.tensor | |
%111 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%112 = torch.aten.view %result0_6, %111 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%113 = torch.aten.addmm %arg26, %112, %110, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%114 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%115 = torch.aten.view %113, %114 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%116 = torch.prim.ListConstruct %int1, %int128, %int12, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%117 = torch.aten.view %115, %116 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%118 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%119 = torch.aten.permute %117, %118 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%120 = torch.aten.t %arg31 : !torch.tensor -> !torch.tensor | |
%121 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%122 = torch.aten.view %result0_6, %121 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%123 = torch.aten.addmm %arg30, %122, %120, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%124 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%125 = torch.aten.view %123, %124 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%126 = torch.prim.ListConstruct %int1, %int128, %int12, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%127 = torch.aten.view %125, %126 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%128 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%129 = torch.aten.permute %127, %128 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%130 = torch.prim.ListConstruct %int1, %int128, %int12, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%131 = torch.aten.view %109, %130 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%132 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%133 = torch.aten.permute %131, %132 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%134 = torch.aten.transpose.int %119, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%135 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%136 = torch.aten.expand %133, %135, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor | |
%137 = torch.prim.ListConstruct %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%138 = torch.prim.ListConstruct %int32, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%139 = torch.aten._reshape_alias %136, %137, %138 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%140 = torch.prim.ListConstruct %int1, %int12, %int32, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%141 = torch.aten.expand %134, %140, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor | |
%142 = torch.prim.ListConstruct %int12, %int32, %int128 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%143 = torch.prim.ListConstruct %int32, %int1, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%144 = torch.aten._reshape_alias %141, %142, %143 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%145 = torch.aten.bmm %139, %144 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%146 = torch.prim.ListConstruct %int1, %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%147 = torch.aten._unsafe_view %145, %146 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%148 = torch.aten.div.Scalar %147, %float5.656850e00 : !torch.tensor, !torch.float -> !torch.tensor | |
%149 = torch.prim.GetAttr %arg0["_tensor_constant0"] : !torch.nn.Module<"__torch__.torch.fx.graph_module.forward"> -> !torch.tensor | |
%150 = torch.aten.add.Tensor %148, %149, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%151 = torch.aten._softmax %150, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor | |
%152 = torch.aten.detach %151 : !torch.tensor -> !torch.tensor | |
%153 = torch.prim.ListConstruct %int1, %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%154 = torch.aten.expand %151, %153, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor | |
%155 = torch.prim.ListConstruct %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%156 = torch.prim.ListConstruct %int16384, %int128, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%157 = torch.aten._reshape_alias %154, %155, %156 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%158 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%159 = torch.aten.expand %129, %158, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor | |
%160 = torch.prim.ListConstruct %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%161 = torch.prim.ListConstruct %int32, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%162 = torch.aten._reshape_alias %159, %160, %161 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%163 = torch.aten.bmm %157, %162 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%164 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%165 = torch.aten._unsafe_view %163, %164 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%166 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%167 = torch.aten.permute %165, %166 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%168 = torch.aten.clone %167, %int0 : !torch.tensor, !torch.int -> !torch.tensor | |
%169 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%170 = torch.aten.view %168, %169 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%171 = torch.aten.t %arg25 : !torch.tensor -> !torch.tensor | |
%172 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%173 = torch.aten.view %170, %172 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%174 = torch.aten.addmm %arg24, %173, %171, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%175 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%176 = torch.aten.view %174, %175 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%177 = torch.aten.add.Tensor %176, %result0_6, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%178 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%result0_9, %result1_10, %result2_11 = torch.aten.native_layer_norm %177, %178, %arg23, %arg22, %float9.999990e-13 : !torch.tensor, !torch.list<int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor | |
%179 = torch.aten.t %arg33 : !torch.tensor -> !torch.tensor | |
%180 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%181 = torch.aten.view %result0_9, %180 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%182 = torch.aten.addmm %arg32, %181, %179, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%183 = torch.prim.ListConstruct %int1, %int128, %int1536 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%184 = torch.aten.view %182, %183 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%185 = torch.aten.gelu %184, %str_2 : !torch.tensor, !torch.str -> !torch.tensor | |
%186 = torch.aten.t %arg37 : !torch.tensor -> !torch.tensor | |
%187 = torch.prim.ListConstruct %int128, %int1536 : (!torch.int, !torch.int) -> !torch.list<int> | |
%188 = torch.aten.view %185, %187 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%189 = torch.aten.addmm %arg36, %188, %186, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%190 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%191 = torch.aten.view %189, %190 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%192 = torch.aten.add.Tensor %191, %result0_9, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%193 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%result0_12, %result1_13, %result2_14 = torch.aten.native_layer_norm %192, %193, %arg35, %arg34, %float9.999990e-13 : !torch.tensor, !torch.list<int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor | |
%194 = torch.aten.t %arg77 : !torch.tensor -> !torch.tensor | |
%195 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%196 = torch.aten.view %result0_12, %195 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%197 = torch.aten.addmm %arg76, %196, %194, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%198 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%199 = torch.aten.view %197, %198 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%200 = torch.aten.t %arg75 : !torch.tensor -> !torch.tensor | |
%201 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%202 = torch.aten.view %result0_12, %201 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%203 = torch.aten.addmm %arg74, %202, %200, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%204 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%205 = torch.aten.view %203, %204 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%206 = torch.prim.ListConstruct %int1, %int128, %int12, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%207 = torch.aten.view %205, %206 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%208 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%209 = torch.aten.permute %207, %208 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%210 = torch.aten.t %arg79 : !torch.tensor -> !torch.tensor | |
%211 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%212 = torch.aten.view %result0_12, %211 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%213 = torch.aten.addmm %arg78, %212, %210, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%214 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%215 = torch.aten.view %213, %214 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%216 = torch.prim.ListConstruct %int1, %int128, %int12, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%217 = torch.aten.view %215, %216 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%218 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%219 = torch.aten.permute %217, %218 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%220 = torch.prim.ListConstruct %int1, %int128, %int12, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%221 = torch.aten.view %199, %220 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%222 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%223 = torch.aten.permute %221, %222 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%224 = torch.aten.transpose.int %209, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%225 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%226 = torch.aten.expand %223, %225, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor | |
%227 = torch.prim.ListConstruct %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%228 = torch.prim.ListConstruct %int32, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%229 = torch.aten._reshape_alias %226, %227, %228 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%230 = torch.prim.ListConstruct %int1, %int12, %int32, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%231 = torch.aten.expand %224, %230, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor | |
%232 = torch.prim.ListConstruct %int12, %int32, %int128 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%233 = torch.prim.ListConstruct %int32, %int1, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%234 = torch.aten._reshape_alias %231, %232, %233 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%235 = torch.aten.bmm %229, %234 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%236 = torch.prim.ListConstruct %int1, %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%237 = torch.aten._unsafe_view %235, %236 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%238 = torch.aten.div.Scalar %237, %float5.656850e00 : !torch.tensor, !torch.float -> !torch.tensor | |
%239 = torch.prim.GetAttr %arg0["_tensor_constant0"] : !torch.nn.Module<"__torch__.torch.fx.graph_module.forward"> -> !torch.tensor | |
%240 = torch.aten.add.Tensor %238, %239, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%241 = torch.aten._softmax %240, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor | |
%242 = torch.aten.detach %241 : !torch.tensor -> !torch.tensor | |
%243 = torch.prim.ListConstruct %int1, %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%244 = torch.aten.expand %241, %243, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor | |
%245 = torch.prim.ListConstruct %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%246 = torch.prim.ListConstruct %int16384, %int128, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%247 = torch.aten._reshape_alias %244, %245, %246 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%248 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%249 = torch.aten.expand %219, %248, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor | |
%250 = torch.prim.ListConstruct %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%251 = torch.prim.ListConstruct %int32, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%252 = torch.aten._reshape_alias %249, %250, %251 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%253 = torch.aten.bmm %247, %252 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%254 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%255 = torch.aten._unsafe_view %253, %254 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%256 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%257 = torch.aten.permute %255, %256 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%258 = torch.aten.clone %257, %int0 : !torch.tensor, !torch.int -> !torch.tensor | |
%259 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%260 = torch.aten.view %258, %259 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%261 = torch.aten.t %arg73 : !torch.tensor -> !torch.tensor | |
%262 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%263 = torch.aten.view %260, %262 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%264 = torch.aten.addmm %arg72, %263, %261, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%265 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%266 = torch.aten.view %264, %265 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%267 = torch.aten.add.Tensor %266, %result0_12, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%268 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%result0_15, %result1_16, %result2_17 = torch.aten.native_layer_norm %267, %268, %arg71, %arg70, %float9.999990e-13 : !torch.tensor, !torch.list<int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor | |
%269 = torch.aten.t %arg81 : !torch.tensor -> !torch.tensor | |
%270 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%271 = torch.aten.view %result0_15, %270 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%272 = torch.aten.addmm %arg80, %271, %269, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%273 = torch.prim.ListConstruct %int1, %int128, %int1536 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%274 = torch.aten.view %272, %273 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%275 = torch.aten.gelu %274, %str_2 : !torch.tensor, !torch.str -> !torch.tensor | |
%276 = torch.aten.t %arg85 : !torch.tensor -> !torch.tensor | |
%277 = torch.prim.ListConstruct %int128, %int1536 : (!torch.int, !torch.int) -> !torch.list<int> | |
%278 = torch.aten.view %275, %277 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%279 = torch.aten.addmm %arg84, %278, %276, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%280 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%281 = torch.aten.view %279, %280 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%282 = torch.aten.add.Tensor %281, %result0_15, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%283 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%result0_18, %result1_19, %result2_20 = torch.aten.native_layer_norm %282, %283, %arg83, %arg82, %float9.999990e-13 : !torch.tensor, !torch.list<int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor | |
%284 = torch.aten.t %arg93 : !torch.tensor -> !torch.tensor | |
%285 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%286 = torch.aten.view %result0_18, %285 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%287 = torch.aten.addmm %arg92, %286, %284, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%288 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%289 = torch.aten.view %287, %288 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%290 = torch.aten.t %arg91 : !torch.tensor -> !torch.tensor | |
%291 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%292 = torch.aten.view %result0_18, %291 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%293 = torch.aten.addmm %arg90, %292, %290, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%294 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%295 = torch.aten.view %293, %294 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%296 = torch.prim.ListConstruct %int1, %int128, %int12, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%297 = torch.aten.view %295, %296 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%298 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%299 = torch.aten.permute %297, %298 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%300 = torch.aten.t %arg95 : !torch.tensor -> !torch.tensor | |
%301 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%302 = torch.aten.view %result0_18, %301 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%303 = torch.aten.addmm %arg94, %302, %300, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%304 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%305 = torch.aten.view %303, %304 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%306 = torch.prim.ListConstruct %int1, %int128, %int12, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%307 = torch.aten.view %305, %306 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%308 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%309 = torch.aten.permute %307, %308 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%310 = torch.prim.ListConstruct %int1, %int128, %int12, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%311 = torch.aten.view %289, %310 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%312 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%313 = torch.aten.permute %311, %312 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%314 = torch.aten.transpose.int %299, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%315 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%316 = torch.aten.expand %313, %315, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor | |
%317 = torch.prim.ListConstruct %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%318 = torch.prim.ListConstruct %int32, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%319 = torch.aten._reshape_alias %316, %317, %318 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%320 = torch.prim.ListConstruct %int1, %int12, %int32, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%321 = torch.aten.expand %314, %320, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor | |
%322 = torch.prim.ListConstruct %int12, %int32, %int128 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%323 = torch.prim.ListConstruct %int32, %int1, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%324 = torch.aten._reshape_alias %321, %322, %323 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%325 = torch.aten.bmm %319, %324 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%326 = torch.prim.ListConstruct %int1, %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%327 = torch.aten._unsafe_view %325, %326 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%328 = torch.aten.div.Scalar %327, %float5.656850e00 : !torch.tensor, !torch.float -> !torch.tensor | |
%329 = torch.prim.GetAttr %arg0["_tensor_constant0"] : !torch.nn.Module<"__torch__.torch.fx.graph_module.forward"> -> !torch.tensor | |
%330 = torch.aten.add.Tensor %328, %329, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%331 = torch.aten._softmax %330, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor | |
%332 = torch.aten.detach %331 : !torch.tensor -> !torch.tensor | |
%333 = torch.prim.ListConstruct %int1, %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%334 = torch.aten.expand %331, %333, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor | |
%335 = torch.prim.ListConstruct %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%336 = torch.prim.ListConstruct %int16384, %int128, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%337 = torch.aten._reshape_alias %334, %335, %336 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%338 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%339 = torch.aten.expand %309, %338, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor | |
%340 = torch.prim.ListConstruct %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%341 = torch.prim.ListConstruct %int32, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%342 = torch.aten._reshape_alias %339, %340, %341 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%343 = torch.aten.bmm %337, %342 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%344 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%345 = torch.aten._unsafe_view %343, %344 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%346 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%347 = torch.aten.permute %345, %346 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%348 = torch.aten.clone %347, %int0 : !torch.tensor, !torch.int -> !torch.tensor | |
%349 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%350 = torch.aten.view %348, %349 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%351 = torch.aten.t %arg89 : !torch.tensor -> !torch.tensor | |
%352 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%353 = torch.aten.view %350, %352 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%354 = torch.aten.addmm %arg88, %353, %351, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%355 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%356 = torch.aten.view %354, %355 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%357 = torch.aten.add.Tensor %356, %result0_18, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%358 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%result0_21, %result1_22, %result2_23 = torch.aten.native_layer_norm %357, %358, %arg87, %arg86, %float9.999990e-13 : !torch.tensor, !torch.list<int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor | |
%359 = torch.aten.t %arg97 : !torch.tensor -> !torch.tensor | |
%360 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%361 = torch.aten.view %result0_21, %360 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%362 = torch.aten.addmm %arg96, %361, %359, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%363 = torch.prim.ListConstruct %int1, %int128, %int1536 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%364 = torch.aten.view %362, %363 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%365 = torch.aten.gelu %364, %str_2 : !torch.tensor, !torch.str -> !torch.tensor | |
%366 = torch.aten.t %arg101 : !torch.tensor -> !torch.tensor | |
%367 = torch.prim.ListConstruct %int128, %int1536 : (!torch.int, !torch.int) -> !torch.list<int> | |
%368 = torch.aten.view %365, %367 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%369 = torch.aten.addmm %arg100, %368, %366, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%370 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%371 = torch.aten.view %369, %370 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%372 = torch.aten.add.Tensor %371, %result0_21, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%373 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%result0_24, %result1_25, %result2_26 = torch.aten.native_layer_norm %372, %373, %arg99, %arg98, %float9.999990e-13 : !torch.tensor, !torch.list<int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor | |
%374 = torch.aten.t %arg109 : !torch.tensor -> !torch.tensor | |
%375 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%376 = torch.aten.view %result0_24, %375 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%377 = torch.aten.addmm %arg108, %376, %374, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%378 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%379 = torch.aten.view %377, %378 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%380 = torch.aten.t %arg107 : !torch.tensor -> !torch.tensor | |
%381 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%382 = torch.aten.view %result0_24, %381 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%383 = torch.aten.addmm %arg106, %382, %380, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%384 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%385 = torch.aten.view %383, %384 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%386 = torch.prim.ListConstruct %int1, %int128, %int12, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%387 = torch.aten.view %385, %386 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%388 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%389 = torch.aten.permute %387, %388 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%390 = torch.aten.t %arg111 : !torch.tensor -> !torch.tensor | |
%391 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%392 = torch.aten.view %result0_24, %391 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%393 = torch.aten.addmm %arg110, %392, %390, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%394 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%395 = torch.aten.view %393, %394 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%396 = torch.prim.ListConstruct %int1, %int128, %int12, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%397 = torch.aten.view %395, %396 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%398 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%399 = torch.aten.permute %397, %398 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%400 = torch.prim.ListConstruct %int1, %int128, %int12, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%401 = torch.aten.view %379, %400 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%402 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%403 = torch.aten.permute %401, %402 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%404 = torch.aten.transpose.int %389, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%405 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%406 = torch.aten.expand %403, %405, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor | |
%407 = torch.prim.ListConstruct %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%408 = torch.prim.ListConstruct %int32, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%409 = torch.aten._reshape_alias %406, %407, %408 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%410 = torch.prim.ListConstruct %int1, %int12, %int32, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%411 = torch.aten.expand %404, %410, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor | |
%412 = torch.prim.ListConstruct %int12, %int32, %int128 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%413 = torch.prim.ListConstruct %int32, %int1, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%414 = torch.aten._reshape_alias %411, %412, %413 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%415 = torch.aten.bmm %409, %414 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%416 = torch.prim.ListConstruct %int1, %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%417 = torch.aten._unsafe_view %415, %416 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%418 = torch.aten.div.Scalar %417, %float5.656850e00 : !torch.tensor, !torch.float -> !torch.tensor | |
%419 = torch.prim.GetAttr %arg0["_tensor_constant0"] : !torch.nn.Module<"__torch__.torch.fx.graph_module.forward"> -> !torch.tensor | |
%420 = torch.aten.add.Tensor %418, %419, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%421 = torch.aten._softmax %420, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor | |
%422 = torch.aten.detach %421 : !torch.tensor -> !torch.tensor | |
%423 = torch.prim.ListConstruct %int1, %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%424 = torch.aten.expand %421, %423, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor | |
%425 = torch.prim.ListConstruct %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%426 = torch.prim.ListConstruct %int16384, %int128, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%427 = torch.aten._reshape_alias %424, %425, %426 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%428 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%429 = torch.aten.expand %399, %428, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor | |
%430 = torch.prim.ListConstruct %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%431 = torch.prim.ListConstruct %int32, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%432 = torch.aten._reshape_alias %429, %430, %431 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%433 = torch.aten.bmm %427, %432 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%434 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%435 = torch.aten._unsafe_view %433, %434 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%436 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%437 = torch.aten.permute %435, %436 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%438 = torch.aten.clone %437, %int0 : !torch.tensor, !torch.int -> !torch.tensor | |
%439 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%440 = torch.aten.view %438, %439 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%441 = torch.aten.t %arg105 : !torch.tensor -> !torch.tensor | |
%442 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%443 = torch.aten.view %440, %442 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%444 = torch.aten.addmm %arg104, %443, %441, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%445 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%446 = torch.aten.view %444, %445 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%447 = torch.aten.add.Tensor %446, %result0_24, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%448 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%result0_27, %result1_28, %result2_29 = torch.aten.native_layer_norm %447, %448, %arg103, %arg102, %float9.999990e-13 : !torch.tensor, !torch.list<int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor | |
%449 = torch.aten.t %arg113 : !torch.tensor -> !torch.tensor | |
%450 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%451 = torch.aten.view %result0_27, %450 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%452 = torch.aten.addmm %arg112, %451, %449, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%453 = torch.prim.ListConstruct %int1, %int128, %int1536 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%454 = torch.aten.view %452, %453 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%455 = torch.aten.gelu %454, %str_2 : !torch.tensor, !torch.str -> !torch.tensor | |
%456 = torch.aten.t %arg117 : !torch.tensor -> !torch.tensor | |
%457 = torch.prim.ListConstruct %int128, %int1536 : (!torch.int, !torch.int) -> !torch.list<int> | |
%458 = torch.aten.view %455, %457 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%459 = torch.aten.addmm %arg116, %458, %456, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%460 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%461 = torch.aten.view %459, %460 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%462 = torch.aten.add.Tensor %461, %result0_27, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%463 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%result0_30, %result1_31, %result2_32 = torch.aten.native_layer_norm %462, %463, %arg115, %arg114, %float9.999990e-13 : !torch.tensor, !torch.list<int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor | |
%464 = torch.aten.t %arg125 : !torch.tensor -> !torch.tensor | |
%465 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%466 = torch.aten.view %result0_30, %465 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%467 = torch.aten.addmm %arg124, %466, %464, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%468 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%469 = torch.aten.view %467, %468 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%470 = torch.aten.t %arg123 : !torch.tensor -> !torch.tensor | |
%471 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%472 = torch.aten.view %result0_30, %471 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%473 = torch.aten.addmm %arg122, %472, %470, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%474 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%475 = torch.aten.view %473, %474 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%476 = torch.prim.ListConstruct %int1, %int128, %int12, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%477 = torch.aten.view %475, %476 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%478 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%479 = torch.aten.permute %477, %478 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%480 = torch.aten.t %arg127 : !torch.tensor -> !torch.tensor | |
%481 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%482 = torch.aten.view %result0_30, %481 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%483 = torch.aten.addmm %arg126, %482, %480, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%484 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%485 = torch.aten.view %483, %484 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%486 = torch.prim.ListConstruct %int1, %int128, %int12, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%487 = torch.aten.view %485, %486 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%488 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%489 = torch.aten.permute %487, %488 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%490 = torch.prim.ListConstruct %int1, %int128, %int12, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%491 = torch.aten.view %469, %490 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%492 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%493 = torch.aten.permute %491, %492 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%494 = torch.aten.transpose.int %479, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%495 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%496 = torch.aten.expand %493, %495, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor | |
%497 = torch.prim.ListConstruct %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%498 = torch.prim.ListConstruct %int32, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%499 = torch.aten._reshape_alias %496, %497, %498 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%500 = torch.prim.ListConstruct %int1, %int12, %int32, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%501 = torch.aten.expand %494, %500, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor | |
%502 = torch.prim.ListConstruct %int12, %int32, %int128 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%503 = torch.prim.ListConstruct %int32, %int1, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%504 = torch.aten._reshape_alias %501, %502, %503 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%505 = torch.aten.bmm %499, %504 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%506 = torch.prim.ListConstruct %int1, %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%507 = torch.aten._unsafe_view %505, %506 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%508 = torch.aten.div.Scalar %507, %float5.656850e00 : !torch.tensor, !torch.float -> !torch.tensor | |
%509 = torch.prim.GetAttr %arg0["_tensor_constant0"] : !torch.nn.Module<"__torch__.torch.fx.graph_module.forward"> -> !torch.tensor | |
%510 = torch.aten.add.Tensor %508, %509, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%511 = torch.aten._softmax %510, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor | |
%512 = torch.aten.detach %511 : !torch.tensor -> !torch.tensor | |
%513 = torch.prim.ListConstruct %int1, %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%514 = torch.aten.expand %511, %513, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor | |
%515 = torch.prim.ListConstruct %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%516 = torch.prim.ListConstruct %int16384, %int128, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%517 = torch.aten._reshape_alias %514, %515, %516 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%518 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%519 = torch.aten.expand %489, %518, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor | |
%520 = torch.prim.ListConstruct %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%521 = torch.prim.ListConstruct %int32, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%522 = torch.aten._reshape_alias %519, %520, %521 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%523 = torch.aten.bmm %517, %522 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%524 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%525 = torch.aten._unsafe_view %523, %524 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%526 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%527 = torch.aten.permute %525, %526 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%528 = torch.aten.clone %527, %int0 : !torch.tensor, !torch.int -> !torch.tensor | |
%529 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%530 = torch.aten.view %528, %529 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%531 = torch.aten.t %arg121 : !torch.tensor -> !torch.tensor | |
%532 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%533 = torch.aten.view %530, %532 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%534 = torch.aten.addmm %arg120, %533, %531, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%535 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%536 = torch.aten.view %534, %535 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%537 = torch.aten.add.Tensor %536, %result0_30, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%538 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%result0_33, %result1_34, %result2_35 = torch.aten.native_layer_norm %537, %538, %arg119, %arg118, %float9.999990e-13 : !torch.tensor, !torch.list<int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor | |
%539 = torch.aten.t %arg129 : !torch.tensor -> !torch.tensor | |
%540 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%541 = torch.aten.view %result0_33, %540 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%542 = torch.aten.addmm %arg128, %541, %539, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%543 = torch.prim.ListConstruct %int1, %int128, %int1536 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%544 = torch.aten.view %542, %543 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%545 = torch.aten.gelu %544, %str_2 : !torch.tensor, !torch.str -> !torch.tensor | |
%546 = torch.aten.t %arg133 : !torch.tensor -> !torch.tensor | |
%547 = torch.prim.ListConstruct %int128, %int1536 : (!torch.int, !torch.int) -> !torch.list<int> | |
%548 = torch.aten.view %545, %547 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%549 = torch.aten.addmm %arg132, %548, %546, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%550 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%551 = torch.aten.view %549, %550 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%552 = torch.aten.add.Tensor %551, %result0_33, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%553 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%result0_36, %result1_37, %result2_38 = torch.aten.native_layer_norm %552, %553, %arg131, %arg130, %float9.999990e-13 : !torch.tensor, !torch.list<int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor | |
%554 = torch.aten.t %arg141 : !torch.tensor -> !torch.tensor | |
%555 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%556 = torch.aten.view %result0_36, %555 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%557 = torch.aten.addmm %arg140, %556, %554, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%558 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%559 = torch.aten.view %557, %558 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%560 = torch.aten.t %arg139 : !torch.tensor -> !torch.tensor | |
%561 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%562 = torch.aten.view %result0_36, %561 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%563 = torch.aten.addmm %arg138, %562, %560, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%564 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%565 = torch.aten.view %563, %564 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%566 = torch.prim.ListConstruct %int1, %int128, %int12, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%567 = torch.aten.view %565, %566 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%568 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%569 = torch.aten.permute %567, %568 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%570 = torch.aten.t %arg143 : !torch.tensor -> !torch.tensor | |
%571 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%572 = torch.aten.view %result0_36, %571 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%573 = torch.aten.addmm %arg142, %572, %570, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%574 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%575 = torch.aten.view %573, %574 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%576 = torch.prim.ListConstruct %int1, %int128, %int12, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%577 = torch.aten.view %575, %576 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%578 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%579 = torch.aten.permute %577, %578 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%580 = torch.prim.ListConstruct %int1, %int128, %int12, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%581 = torch.aten.view %559, %580 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%582 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%583 = torch.aten.permute %581, %582 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%584 = torch.aten.transpose.int %569, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%585 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%586 = torch.aten.expand %583, %585, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor | |
%587 = torch.prim.ListConstruct %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%588 = torch.prim.ListConstruct %int32, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%589 = torch.aten._reshape_alias %586, %587, %588 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%590 = torch.prim.ListConstruct %int1, %int12, %int32, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%591 = torch.aten.expand %584, %590, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor | |
%592 = torch.prim.ListConstruct %int12, %int32, %int128 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%593 = torch.prim.ListConstruct %int32, %int1, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%594 = torch.aten._reshape_alias %591, %592, %593 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%595 = torch.aten.bmm %589, %594 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%596 = torch.prim.ListConstruct %int1, %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%597 = torch.aten._unsafe_view %595, %596 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%598 = torch.aten.div.Scalar %597, %float5.656850e00 : !torch.tensor, !torch.float -> !torch.tensor | |
%599 = torch.prim.GetAttr %arg0["_tensor_constant0"] : !torch.nn.Module<"__torch__.torch.fx.graph_module.forward"> -> !torch.tensor | |
%600 = torch.aten.add.Tensor %598, %599, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%601 = torch.aten._softmax %600, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor | |
%602 = torch.aten.detach %601 : !torch.tensor -> !torch.tensor | |
%603 = torch.prim.ListConstruct %int1, %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%604 = torch.aten.expand %601, %603, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor | |
%605 = torch.prim.ListConstruct %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%606 = torch.prim.ListConstruct %int16384, %int128, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%607 = torch.aten._reshape_alias %604, %605, %606 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%608 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%609 = torch.aten.expand %579, %608, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor | |
%610 = torch.prim.ListConstruct %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%611 = torch.prim.ListConstruct %int32, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%612 = torch.aten._reshape_alias %609, %610, %611 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%613 = torch.aten.bmm %607, %612 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%614 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%615 = torch.aten._unsafe_view %613, %614 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%616 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%617 = torch.aten.permute %615, %616 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%618 = torch.aten.clone %617, %int0 : !torch.tensor, !torch.int -> !torch.tensor | |
%619 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%620 = torch.aten.view %618, %619 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%621 = torch.aten.t %arg137 : !torch.tensor -> !torch.tensor | |
%622 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%623 = torch.aten.view %620, %622 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%624 = torch.aten.addmm %arg136, %623, %621, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%625 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%626 = torch.aten.view %624, %625 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%627 = torch.aten.add.Tensor %626, %result0_36, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%628 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%result0_39, %result1_40, %result2_41 = torch.aten.native_layer_norm %627, %628, %arg135, %arg134, %float9.999990e-13 : !torch.tensor, !torch.list<int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor | |
%629 = torch.aten.t %arg145 : !torch.tensor -> !torch.tensor | |
%630 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%631 = torch.aten.view %result0_39, %630 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%632 = torch.aten.addmm %arg144, %631, %629, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%633 = torch.prim.ListConstruct %int1, %int128, %int1536 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%634 = torch.aten.view %632, %633 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%635 = torch.aten.gelu %634, %str_2 : !torch.tensor, !torch.str -> !torch.tensor | |
%636 = torch.aten.t %arg149 : !torch.tensor -> !torch.tensor | |
%637 = torch.prim.ListConstruct %int128, %int1536 : (!torch.int, !torch.int) -> !torch.list<int> | |
%638 = torch.aten.view %635, %637 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%639 = torch.aten.addmm %arg148, %638, %636, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%640 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%641 = torch.aten.view %639, %640 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%642 = torch.aten.add.Tensor %641, %result0_39, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%643 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%result0_42, %result1_43, %result2_44 = torch.aten.native_layer_norm %642, %643, %arg147, %arg146, %float9.999990e-13 : !torch.tensor, !torch.list<int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor | |
%644 = torch.aten.t %arg157 : !torch.tensor -> !torch.tensor | |
%645 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%646 = torch.aten.view %result0_42, %645 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%647 = torch.aten.addmm %arg156, %646, %644, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%648 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%649 = torch.aten.view %647, %648 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%650 = torch.aten.t %arg155 : !torch.tensor -> !torch.tensor | |
%651 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%652 = torch.aten.view %result0_42, %651 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%653 = torch.aten.addmm %arg154, %652, %650, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%654 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%655 = torch.aten.view %653, %654 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%656 = torch.prim.ListConstruct %int1, %int128, %int12, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%657 = torch.aten.view %655, %656 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%658 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%659 = torch.aten.permute %657, %658 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%660 = torch.aten.t %arg159 : !torch.tensor -> !torch.tensor | |
%661 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%662 = torch.aten.view %result0_42, %661 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%663 = torch.aten.addmm %arg158, %662, %660, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%664 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%665 = torch.aten.view %663, %664 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%666 = torch.prim.ListConstruct %int1, %int128, %int12, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%667 = torch.aten.view %665, %666 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%668 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%669 = torch.aten.permute %667, %668 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%670 = torch.prim.ListConstruct %int1, %int128, %int12, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%671 = torch.aten.view %649, %670 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%672 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%673 = torch.aten.permute %671, %672 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%674 = torch.aten.transpose.int %659, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%675 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%676 = torch.aten.expand %673, %675, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor | |
%677 = torch.prim.ListConstruct %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%678 = torch.prim.ListConstruct %int32, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%679 = torch.aten._reshape_alias %676, %677, %678 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%680 = torch.prim.ListConstruct %int1, %int12, %int32, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%681 = torch.aten.expand %674, %680, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor | |
%682 = torch.prim.ListConstruct %int12, %int32, %int128 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%683 = torch.prim.ListConstruct %int32, %int1, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%684 = torch.aten._reshape_alias %681, %682, %683 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%685 = torch.aten.bmm %679, %684 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%686 = torch.prim.ListConstruct %int1, %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%687 = torch.aten._unsafe_view %685, %686 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%688 = torch.aten.div.Scalar %687, %float5.656850e00 : !torch.tensor, !torch.float -> !torch.tensor | |
%689 = torch.prim.GetAttr %arg0["_tensor_constant0"] : !torch.nn.Module<"__torch__.torch.fx.graph_module.forward"> -> !torch.tensor | |
%690 = torch.aten.add.Tensor %688, %689, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%691 = torch.aten._softmax %690, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor | |
%692 = torch.aten.detach %691 : !torch.tensor -> !torch.tensor | |
%693 = torch.prim.ListConstruct %int1, %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%694 = torch.aten.expand %691, %693, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor | |
%695 = torch.prim.ListConstruct %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%696 = torch.prim.ListConstruct %int16384, %int128, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%697 = torch.aten._reshape_alias %694, %695, %696 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%698 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%699 = torch.aten.expand %669, %698, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor | |
%700 = torch.prim.ListConstruct %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%701 = torch.prim.ListConstruct %int32, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%702 = torch.aten._reshape_alias %699, %700, %701 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%703 = torch.aten.bmm %697, %702 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%704 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%705 = torch.aten._unsafe_view %703, %704 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%706 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%707 = torch.aten.permute %705, %706 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%708 = torch.aten.clone %707, %int0 : !torch.tensor, !torch.int -> !torch.tensor | |
%709 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%710 = torch.aten.view %708, %709 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%711 = torch.aten.t %arg153 : !torch.tensor -> !torch.tensor | |
%712 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%713 = torch.aten.view %710, %712 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%714 = torch.aten.addmm %arg152, %713, %711, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%715 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%716 = torch.aten.view %714, %715 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%717 = torch.aten.add.Tensor %716, %result0_42, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%718 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%result0_45, %result1_46, %result2_47 = torch.aten.native_layer_norm %717, %718, %arg151, %arg150, %float9.999990e-13 : !torch.tensor, !torch.list<int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor | |
%719 = torch.aten.t %arg161 : !torch.tensor -> !torch.tensor | |
%720 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%721 = torch.aten.view %result0_45, %720 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%722 = torch.aten.addmm %arg160, %721, %719, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%723 = torch.prim.ListConstruct %int1, %int128, %int1536 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%724 = torch.aten.view %722, %723 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%725 = torch.aten.gelu %724, %str_2 : !torch.tensor, !torch.str -> !torch.tensor | |
%726 = torch.aten.t %arg165 : !torch.tensor -> !torch.tensor | |
%727 = torch.prim.ListConstruct %int128, %int1536 : (!torch.int, !torch.int) -> !torch.list<int> | |
%728 = torch.aten.view %725, %727 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%729 = torch.aten.addmm %arg164, %728, %726, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%730 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%731 = torch.aten.view %729, %730 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%732 = torch.aten.add.Tensor %731, %result0_45, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%733 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%result0_48, %result1_49, %result2_50 = torch.aten.native_layer_norm %732, %733, %arg163, %arg162, %float9.999990e-13 : !torch.tensor, !torch.list<int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor | |
%734 = torch.aten.t %arg173 : !torch.tensor -> !torch.tensor | |
%735 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%736 = torch.aten.view %result0_48, %735 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%737 = torch.aten.addmm %arg172, %736, %734, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%738 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%739 = torch.aten.view %737, %738 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%740 = torch.aten.t %arg171 : !torch.tensor -> !torch.tensor | |
%741 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%742 = torch.aten.view %result0_48, %741 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%743 = torch.aten.addmm %arg170, %742, %740, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%744 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%745 = torch.aten.view %743, %744 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%746 = torch.prim.ListConstruct %int1, %int128, %int12, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%747 = torch.aten.view %745, %746 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%748 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%749 = torch.aten.permute %747, %748 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%750 = torch.aten.t %arg175 : !torch.tensor -> !torch.tensor | |
%751 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%752 = torch.aten.view %result0_48, %751 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%753 = torch.aten.addmm %arg174, %752, %750, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%754 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%755 = torch.aten.view %753, %754 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%756 = torch.prim.ListConstruct %int1, %int128, %int12, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%757 = torch.aten.view %755, %756 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%758 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%759 = torch.aten.permute %757, %758 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%760 = torch.prim.ListConstruct %int1, %int128, %int12, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%761 = torch.aten.view %739, %760 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%762 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%763 = torch.aten.permute %761, %762 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%764 = torch.aten.transpose.int %749, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%765 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%766 = torch.aten.expand %763, %765, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor | |
%767 = torch.prim.ListConstruct %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%768 = torch.prim.ListConstruct %int32, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%769 = torch.aten._reshape_alias %766, %767, %768 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%770 = torch.prim.ListConstruct %int1, %int12, %int32, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%771 = torch.aten.expand %764, %770, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor | |
%772 = torch.prim.ListConstruct %int12, %int32, %int128 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%773 = torch.prim.ListConstruct %int32, %int1, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%774 = torch.aten._reshape_alias %771, %772, %773 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%775 = torch.aten.bmm %769, %774 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%776 = torch.prim.ListConstruct %int1, %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%777 = torch.aten._unsafe_view %775, %776 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%778 = torch.aten.div.Scalar %777, %float5.656850e00 : !torch.tensor, !torch.float -> !torch.tensor | |
%779 = torch.prim.GetAttr %arg0["_tensor_constant0"] : !torch.nn.Module<"__torch__.torch.fx.graph_module.forward"> -> !torch.tensor | |
%780 = torch.aten.add.Tensor %778, %779, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%781 = torch.aten._softmax %780, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor | |
%782 = torch.aten.detach %781 : !torch.tensor -> !torch.tensor | |
%783 = torch.prim.ListConstruct %int1, %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%784 = torch.aten.expand %781, %783, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor | |
%785 = torch.prim.ListConstruct %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%786 = torch.prim.ListConstruct %int16384, %int128, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%787 = torch.aten._reshape_alias %784, %785, %786 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%788 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%789 = torch.aten.expand %759, %788, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor | |
%790 = torch.prim.ListConstruct %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%791 = torch.prim.ListConstruct %int32, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%792 = torch.aten._reshape_alias %789, %790, %791 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%793 = torch.aten.bmm %787, %792 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%794 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%795 = torch.aten._unsafe_view %793, %794 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%796 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%797 = torch.aten.permute %795, %796 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%798 = torch.aten.clone %797, %int0 : !torch.tensor, !torch.int -> !torch.tensor | |
%799 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%800 = torch.aten.view %798, %799 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%801 = torch.aten.t %arg169 : !torch.tensor -> !torch.tensor | |
%802 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%803 = torch.aten.view %800, %802 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%804 = torch.aten.addmm %arg168, %803, %801, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%805 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%806 = torch.aten.view %804, %805 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%807 = torch.aten.add.Tensor %806, %result0_48, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%808 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%result0_51, %result1_52, %result2_53 = torch.aten.native_layer_norm %807, %808, %arg167, %arg166, %float9.999990e-13 : !torch.tensor, !torch.list<int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor | |
%809 = torch.aten.t %arg177 : !torch.tensor -> !torch.tensor | |
%810 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%811 = torch.aten.view %result0_51, %810 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%812 = torch.aten.addmm %arg176, %811, %809, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%813 = torch.prim.ListConstruct %int1, %int128, %int1536 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%814 = torch.aten.view %812, %813 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%815 = torch.aten.gelu %814, %str_2 : !torch.tensor, !torch.str -> !torch.tensor | |
%816 = torch.aten.t %arg181 : !torch.tensor -> !torch.tensor | |
%817 = torch.prim.ListConstruct %int128, %int1536 : (!torch.int, !torch.int) -> !torch.list<int> | |
%818 = torch.aten.view %815, %817 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%819 = torch.aten.addmm %arg180, %818, %816, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%820 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%821 = torch.aten.view %819, %820 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%822 = torch.aten.add.Tensor %821, %result0_51, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%823 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%result0_54, %result1_55, %result2_56 = torch.aten.native_layer_norm %822, %823, %arg179, %arg178, %float9.999990e-13 : !torch.tensor, !torch.list<int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor | |
%824 = torch.aten.t %arg189 : !torch.tensor -> !torch.tensor | |
%825 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%826 = torch.aten.view %result0_54, %825 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%827 = torch.aten.addmm %arg188, %826, %824, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%828 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%829 = torch.aten.view %827, %828 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%830 = torch.aten.t %arg187 : !torch.tensor -> !torch.tensor | |
%831 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%832 = torch.aten.view %result0_54, %831 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%833 = torch.aten.addmm %arg186, %832, %830, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%834 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%835 = torch.aten.view %833, %834 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%836 = torch.prim.ListConstruct %int1, %int128, %int12, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%837 = torch.aten.view %835, %836 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%838 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%839 = torch.aten.permute %837, %838 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%840 = torch.aten.t %arg191 : !torch.tensor -> !torch.tensor | |
%841 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%842 = torch.aten.view %result0_54, %841 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%843 = torch.aten.addmm %arg190, %842, %840, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%844 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%845 = torch.aten.view %843, %844 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%846 = torch.prim.ListConstruct %int1, %int128, %int12, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%847 = torch.aten.view %845, %846 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%848 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%849 = torch.aten.permute %847, %848 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%850 = torch.prim.ListConstruct %int1, %int128, %int12, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%851 = torch.aten.view %829, %850 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%852 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%853 = torch.aten.permute %851, %852 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%854 = torch.aten.transpose.int %839, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%855 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%856 = torch.aten.expand %853, %855, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor | |
%857 = torch.prim.ListConstruct %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%858 = torch.prim.ListConstruct %int32, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%859 = torch.aten._reshape_alias %856, %857, %858 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%860 = torch.prim.ListConstruct %int1, %int12, %int32, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%861 = torch.aten.expand %854, %860, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor | |
%862 = torch.prim.ListConstruct %int12, %int32, %int128 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%863 = torch.prim.ListConstruct %int32, %int1, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%864 = torch.aten._reshape_alias %861, %862, %863 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%865 = torch.aten.bmm %859, %864 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%866 = torch.prim.ListConstruct %int1, %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%867 = torch.aten._unsafe_view %865, %866 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%868 = torch.aten.div.Scalar %867, %float5.656850e00 : !torch.tensor, !torch.float -> !torch.tensor | |
%869 = torch.prim.GetAttr %arg0["_tensor_constant0"] : !torch.nn.Module<"__torch__.torch.fx.graph_module.forward"> -> !torch.tensor | |
%870 = torch.aten.add.Tensor %868, %869, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%871 = torch.aten._softmax %870, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor | |
%872 = torch.aten.detach %871 : !torch.tensor -> !torch.tensor | |
%873 = torch.prim.ListConstruct %int1, %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%874 = torch.aten.expand %871, %873, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor | |
%875 = torch.prim.ListConstruct %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%876 = torch.prim.ListConstruct %int16384, %int128, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%877 = torch.aten._reshape_alias %874, %875, %876 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%878 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%879 = torch.aten.expand %849, %878, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor | |
%880 = torch.prim.ListConstruct %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%881 = torch.prim.ListConstruct %int32, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%882 = torch.aten._reshape_alias %879, %880, %881 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%883 = torch.aten.bmm %877, %882 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%884 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%885 = torch.aten._unsafe_view %883, %884 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%886 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%887 = torch.aten.permute %885, %886 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%888 = torch.aten.clone %887, %int0 : !torch.tensor, !torch.int -> !torch.tensor | |
%889 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%890 = torch.aten.view %888, %889 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%891 = torch.aten.t %arg185 : !torch.tensor -> !torch.tensor | |
%892 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%893 = torch.aten.view %890, %892 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%894 = torch.aten.addmm %arg184, %893, %891, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%895 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%896 = torch.aten.view %894, %895 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%897 = torch.aten.add.Tensor %896, %result0_54, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%898 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%result0_57, %result1_58, %result2_59 = torch.aten.native_layer_norm %897, %898, %arg183, %arg182, %float9.999990e-13 : !torch.tensor, !torch.list<int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor | |
%899 = torch.aten.t %arg193 : !torch.tensor -> !torch.tensor | |
%900 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%901 = torch.aten.view %result0_57, %900 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%902 = torch.aten.addmm %arg192, %901, %899, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%903 = torch.prim.ListConstruct %int1, %int128, %int1536 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%904 = torch.aten.view %902, %903 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%905 = torch.aten.gelu %904, %str_2 : !torch.tensor, !torch.str -> !torch.tensor | |
%906 = torch.aten.t %arg197 : !torch.tensor -> !torch.tensor | |
%907 = torch.prim.ListConstruct %int128, %int1536 : (!torch.int, !torch.int) -> !torch.list<int> | |
%908 = torch.aten.view %905, %907 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%909 = torch.aten.addmm %arg196, %908, %906, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%910 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%911 = torch.aten.view %909, %910 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%912 = torch.aten.add.Tensor %911, %result0_57, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%913 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%result0_60, %result1_61, %result2_62 = torch.aten.native_layer_norm %912, %913, %arg195, %arg194, %float9.999990e-13 : !torch.tensor, !torch.list<int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor | |
%914 = torch.aten.t %arg45 : !torch.tensor -> !torch.tensor | |
%915 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%916 = torch.aten.view %result0_60, %915 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%917 = torch.aten.addmm %arg44, %916, %914, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%918 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%919 = torch.aten.view %917, %918 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%920 = torch.aten.t %arg43 : !torch.tensor -> !torch.tensor | |
%921 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%922 = torch.aten.view %result0_60, %921 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%923 = torch.aten.addmm %arg42, %922, %920, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%924 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%925 = torch.aten.view %923, %924 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%926 = torch.prim.ListConstruct %int1, %int128, %int12, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%927 = torch.aten.view %925, %926 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%928 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%929 = torch.aten.permute %927, %928 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%930 = torch.aten.t %arg47 : !torch.tensor -> !torch.tensor | |
%931 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%932 = torch.aten.view %result0_60, %931 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%933 = torch.aten.addmm %arg46, %932, %930, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%934 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%935 = torch.aten.view %933, %934 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%936 = torch.prim.ListConstruct %int1, %int128, %int12, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%937 = torch.aten.view %935, %936 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%938 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%939 = torch.aten.permute %937, %938 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%940 = torch.prim.ListConstruct %int1, %int128, %int12, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%941 = torch.aten.view %919, %940 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%942 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%943 = torch.aten.permute %941, %942 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%944 = torch.aten.transpose.int %929, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%945 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%946 = torch.aten.expand %943, %945, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor | |
%947 = torch.prim.ListConstruct %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%948 = torch.prim.ListConstruct %int32, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%949 = torch.aten._reshape_alias %946, %947, %948 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%950 = torch.prim.ListConstruct %int1, %int12, %int32, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%951 = torch.aten.expand %944, %950, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor | |
%952 = torch.prim.ListConstruct %int12, %int32, %int128 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%953 = torch.prim.ListConstruct %int32, %int1, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%954 = torch.aten._reshape_alias %951, %952, %953 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%955 = torch.aten.bmm %949, %954 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%956 = torch.prim.ListConstruct %int1, %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%957 = torch.aten._unsafe_view %955, %956 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%958 = torch.aten.div.Scalar %957, %float5.656850e00 : !torch.tensor, !torch.float -> !torch.tensor | |
%959 = torch.prim.GetAttr %arg0["_tensor_constant0"] : !torch.nn.Module<"__torch__.torch.fx.graph_module.forward"> -> !torch.tensor | |
%960 = torch.aten.add.Tensor %958, %959, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%961 = torch.aten._softmax %960, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor | |
%962 = torch.aten.detach %961 : !torch.tensor -> !torch.tensor | |
%963 = torch.prim.ListConstruct %int1, %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%964 = torch.aten.expand %961, %963, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor | |
%965 = torch.prim.ListConstruct %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%966 = torch.prim.ListConstruct %int16384, %int128, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%967 = torch.aten._reshape_alias %964, %965, %966 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%968 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%969 = torch.aten.expand %939, %968, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor | |
%970 = torch.prim.ListConstruct %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%971 = torch.prim.ListConstruct %int32, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%972 = torch.aten._reshape_alias %969, %970, %971 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%973 = torch.aten.bmm %967, %972 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%974 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%975 = torch.aten._unsafe_view %973, %974 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%976 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%977 = torch.aten.permute %975, %976 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%978 = torch.aten.clone %977, %int0 : !torch.tensor, !torch.int -> !torch.tensor | |
%979 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%980 = torch.aten.view %978, %979 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%981 = torch.aten.t %arg41 : !torch.tensor -> !torch.tensor | |
%982 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%983 = torch.aten.view %980, %982 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%984 = torch.aten.addmm %arg40, %983, %981, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%985 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%986 = torch.aten.view %984, %985 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%987 = torch.aten.add.Tensor %986, %result0_60, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%988 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%result0_63, %result1_64, %result2_65 = torch.aten.native_layer_norm %987, %988, %arg39, %arg38, %float9.999990e-13 : !torch.tensor, !torch.list<int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor | |
%989 = torch.aten.t %arg49 : !torch.tensor -> !torch.tensor | |
%990 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%991 = torch.aten.view %result0_63, %990 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%992 = torch.aten.addmm %arg48, %991, %989, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%993 = torch.prim.ListConstruct %int1, %int128, %int1536 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%994 = torch.aten.view %992, %993 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%995 = torch.aten.gelu %994, %str_2 : !torch.tensor, !torch.str -> !torch.tensor | |
%996 = torch.aten.t %arg53 : !torch.tensor -> !torch.tensor | |
%997 = torch.prim.ListConstruct %int128, %int1536 : (!torch.int, !torch.int) -> !torch.list<int> | |
%998 = torch.aten.view %995, %997 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%999 = torch.aten.addmm %arg52, %998, %996, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%1000 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1001 = torch.aten.view %999, %1000 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1002 = torch.aten.add.Tensor %1001, %result0_63, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1003 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%result0_66, %result1_67, %result2_68 = torch.aten.native_layer_norm %1002, %1003, %arg51, %arg50, %float9.999990e-13 : !torch.tensor, !torch.list<int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor | |
%1004 = torch.aten.t %arg61 : !torch.tensor -> !torch.tensor | |
%1005 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1006 = torch.aten.view %result0_66, %1005 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1007 = torch.aten.addmm %arg60, %1006, %1004, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%1008 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1009 = torch.aten.view %1007, %1008 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1010 = torch.aten.t %arg59 : !torch.tensor -> !torch.tensor | |
%1011 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1012 = torch.aten.view %result0_66, %1011 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1013 = torch.aten.addmm %arg58, %1012, %1010, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%1014 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1015 = torch.aten.view %1013, %1014 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1016 = torch.prim.ListConstruct %int1, %int128, %int12, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1017 = torch.aten.view %1015, %1016 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1018 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1019 = torch.aten.permute %1017, %1018 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1020 = torch.aten.t %arg63 : !torch.tensor -> !torch.tensor | |
%1021 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1022 = torch.aten.view %result0_66, %1021 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1023 = torch.aten.addmm %arg62, %1022, %1020, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%1024 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1025 = torch.aten.view %1023, %1024 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1026 = torch.prim.ListConstruct %int1, %int128, %int12, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1027 = torch.aten.view %1025, %1026 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1028 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1029 = torch.aten.permute %1027, %1028 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1030 = torch.prim.ListConstruct %int1, %int128, %int12, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1031 = torch.aten.view %1009, %1030 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1032 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1033 = torch.aten.permute %1031, %1032 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1034 = torch.aten.transpose.int %1019, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%1035 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1036 = torch.aten.expand %1033, %1035, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor | |
%1037 = torch.prim.ListConstruct %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1038 = torch.prim.ListConstruct %int32, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1039 = torch.aten._reshape_alias %1036, %1037, %1038 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1040 = torch.prim.ListConstruct %int1, %int12, %int32, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1041 = torch.aten.expand %1034, %1040, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor | |
%1042 = torch.prim.ListConstruct %int12, %int32, %int128 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1043 = torch.prim.ListConstruct %int32, %int1, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1044 = torch.aten._reshape_alias %1041, %1042, %1043 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1045 = torch.aten.bmm %1039, %1044 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1046 = torch.prim.ListConstruct %int1, %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1047 = torch.aten._unsafe_view %1045, %1046 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1048 = torch.aten.div.Scalar %1047, %float5.656850e00 : !torch.tensor, !torch.float -> !torch.tensor | |
%1049 = torch.prim.GetAttr %arg0["_tensor_constant0"] : !torch.nn.Module<"__torch__.torch.fx.graph_module.forward"> -> !torch.tensor | |
%1050 = torch.aten.add.Tensor %1048, %1049, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1051 = torch.aten._softmax %1050, %int-1, %false : !torch.tensor, !torch.int, !torch.bool -> !torch.tensor | |
%1052 = torch.aten.detach %1051 : !torch.tensor -> !torch.tensor | |
%1053 = torch.prim.ListConstruct %int1, %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1054 = torch.aten.expand %1051, %1053, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor | |
%1055 = torch.prim.ListConstruct %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1056 = torch.prim.ListConstruct %int16384, %int128, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1057 = torch.aten._reshape_alias %1054, %1055, %1056 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1058 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1059 = torch.aten.expand %1029, %1058, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor | |
%1060 = torch.prim.ListConstruct %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1061 = torch.prim.ListConstruct %int32, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1062 = torch.aten._reshape_alias %1059, %1060, %1061 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1063 = torch.aten.bmm %1057, %1062 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1064 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1065 = torch.aten._unsafe_view %1063, %1064 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1066 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1067 = torch.aten.permute %1065, %1066 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1068 = torch.aten.clone %1067, %int0 : !torch.tensor, !torch.int -> !torch.tensor | |
%1069 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1070 = torch.aten.view %1068, %1069 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1071 = torch.aten.t %arg57 : !torch.tensor -> !torch.tensor | |
%1072 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1073 = torch.aten.view %1070, %1072 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1074 = torch.aten.addmm %arg56, %1073, %1071, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%1075 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1076 = torch.aten.view %1074, %1075 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1077 = torch.aten.add.Tensor %1076, %result0_66, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1078 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%result0_69, %result1_70, %result2_71 = torch.aten.native_layer_norm %1077, %1078, %arg55, %arg54, %float9.999990e-13 : !torch.tensor, !torch.list<int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor | |
%1079 = torch.aten.t %arg65 : !torch.tensor -> !torch.tensor | |
%1080 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1081 = torch.aten.view %result0_69, %1080 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1082 = torch.aten.addmm %arg64, %1081, %1079, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%1083 = torch.prim.ListConstruct %int1, %int128, %int1536 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1084 = torch.aten.view %1082, %1083 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1085 = torch.aten.gelu %1084, %str_2 : !torch.tensor, !torch.str -> !torch.tensor | |
%1086 = torch.aten.t %arg69 : !torch.tensor -> !torch.tensor | |
%1087 = torch.prim.ListConstruct %int128, %int1536 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1088 = torch.aten.view %1085, %1087 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1089 = torch.aten.addmm %arg68, %1088, %1086, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%1090 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1091 = torch.aten.view %1089, %1090 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1092 = torch.aten.add.Tensor %1091, %result0_69, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1093 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%result0_72, %result1_73, %result2_74 = torch.aten.native_layer_norm %1092, %1093, %arg67, %arg66, %float9.999990e-13 : !torch.tensor, !torch.list<int>, !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor, !torch.tensor, !torch.tensor | |
%1094 = torch.aten.slice.Tensor %result0_72, %int0, %int0, %int9223372036854775807, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.tensor | |
%1095 = torch.aten.select.int %1094, %int1, %int0 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%1096 = torch.aten.t %arg199 : !torch.tensor -> !torch.tensor | |
%1097 = torch.aten.addmm %arg198, %1095, %1096, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%1098 = torch.aten.tanh %1097 : !torch.tensor -> !torch.tensor | |
%1099 = torch.aten.detach %1098 : !torch.tensor -> !torch.tensor | |
%1100 = torch.aten.t %arg201 : !torch.tensor -> !torch.tensor | |
%1101 = torch.aten.addmm %arg200, %1098, %1100, %int1, %int1 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%1102 = torch.aten.sum %1101, %none_1 : !torch.tensor, !torch.none -> !torch.tensor | |
%1103 = torch.aten.ones_like %1102, %int6, %int0, %cpu, %false, %int1 : !torch.tensor, !torch.int, !torch.int, !torch.Device, !torch.bool, !torch.int -> !torch.tensor | |
%1104 = torch.prim.ListConstruct %int1, %int2 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1105 = torch.aten.expand %1103, %1104, %false : !torch.tensor, !torch.list<int>, !torch.bool -> !torch.tensor | |
%1106 = torch.aten.t %1100 : !torch.tensor -> !torch.tensor | |
%1107 = torch.aten.mm %1105, %1106 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1108 = torch.aten.t %1105 : !torch.tensor -> !torch.tensor | |
%1109 = torch.aten.mm %1108, %1098 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1110 = torch.aten.t %1109 : !torch.tensor -> !torch.tensor | |
%1111 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%1112 = torch.aten.sum.dim_IntList %1105, %1111, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1113 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> | |
%1114 = torch.aten.view %1112, %1113 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1115 = torch.aten.detach %1114 : !torch.tensor -> !torch.tensor | |
%1116 = torch.aten.detach %1115 : !torch.tensor -> !torch.tensor | |
%1117 = torch.aten.t %1110 : !torch.tensor -> !torch.tensor | |
%1118 = torch.aten.detach %1117 : !torch.tensor -> !torch.tensor | |
%1119 = torch.aten.detach %1118 : !torch.tensor -> !torch.tensor | |
%1120 = torch.aten.detach %1099 : !torch.tensor -> !torch.tensor | |
%1121 = torch.aten.tanh_backward %1107, %1120 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1122 = torch.aten.t %1096 : !torch.tensor -> !torch.tensor | |
%1123 = torch.aten.mm %1121, %1122 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1124 = torch.aten.t %1121 : !torch.tensor -> !torch.tensor | |
%1125 = torch.aten.mm %1124, %1095 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1126 = torch.aten.t %1125 : !torch.tensor -> !torch.tensor | |
%1127 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%1128 = torch.aten.sum.dim_IntList %1121, %1127, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1129 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%1130 = torch.aten.view %1128, %1129 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1131 = torch.aten.detach %1130 : !torch.tensor -> !torch.tensor | |
%1132 = torch.aten.detach %1131 : !torch.tensor -> !torch.tensor | |
%1133 = torch.aten.t %1126 : !torch.tensor -> !torch.tensor | |
%1134 = torch.aten.detach %1133 : !torch.tensor -> !torch.tensor | |
%1135 = torch.aten.detach %1134 : !torch.tensor -> !torch.tensor | |
%1136 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1137 = torch.aten.new_zeros %1123, %1136, %int6, %int0, %cpu, %false : !torch.tensor, !torch.list<int>, !torch.int, !torch.int, !torch.Device, !torch.bool -> !torch.tensor | |
%1138 = torch.operator "aten.select_scatter"(%1137, %1123, %int1, %int0) : (!torch.tensor, !torch.tensor, !torch.int, !torch.int) -> !torch.tensor | |
%1139 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1140 = torch.aten.new_zeros %1138, %1139, %int6, %int0, %cpu, %false : !torch.tensor, !torch.list<int>, !torch.int, !torch.int, !torch.Device, !torch.bool -> !torch.tensor | |
%1141 = torch.operator "aten.slice_scatter"(%1140, %1138, %int0, %int0, %int9223372036854775807, %int1) : (!torch.tensor, !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.tensor | |
%1142 = torch.aten.sub.Tensor %1092, %result1_73, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1143 = torch.aten.mul.Tensor %1142, %result2_74 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1144 = torch.aten.mul.Tensor %1141, %arg67 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1145 = torch.aten.mul.Scalar %1144, %int384 : !torch.tensor, !torch.int -> !torch.tensor | |
%1146 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> | |
%1147 = torch.aten.sum.dim_IntList %1144, %1146, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1148 = torch.aten.mul.Tensor %1144, %1143 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1149 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> | |
%1150 = torch.aten.sum.dim_IntList %1148, %1149, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1151 = torch.aten.mul.Tensor %1143, %1150 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1152 = torch.aten.sub.Tensor %1145, %1147, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1153 = torch.aten.sub.Tensor %1152, %1151, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1154 = torch.aten.div.Scalar %result2_74, %int384 : !torch.tensor, !torch.int -> !torch.tensor | |
%1155 = torch.aten.mul.Tensor %1154, %1153 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1156 = torch.aten.mul.Tensor %1141, %1143 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1157 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1158 = torch.aten.sum.dim_IntList %1156, %1157, %false, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1159 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1160 = torch.aten.sum.dim_IntList %1141, %1159, %false, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1161 = torch.aten.detach %1158 : !torch.tensor -> !torch.tensor | |
%1162 = torch.aten.detach %1161 : !torch.tensor -> !torch.tensor | |
%1163 = torch.aten.detach %1160 : !torch.tensor -> !torch.tensor | |
%1164 = torch.aten.detach %1163 : !torch.tensor -> !torch.tensor | |
%1165 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1166 = torch.prim.ListConstruct %int384, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1167 = torch.aten._reshape_alias %1155, %1165, %1166 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1168 = torch.aten.t %1086 : !torch.tensor -> !torch.tensor | |
%1169 = torch.aten.mm %1167, %1168 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1170 = torch.aten.t %1167 : !torch.tensor -> !torch.tensor | |
%1171 = torch.aten.mm %1170, %1088 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1172 = torch.aten.t %1171 : !torch.tensor -> !torch.tensor | |
%1173 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%1174 = torch.aten.sum.dim_IntList %1167, %1173, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1175 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%1176 = torch.aten.view %1174, %1175 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1177 = torch.aten.detach %1176 : !torch.tensor -> !torch.tensor | |
%1178 = torch.aten.detach %1177 : !torch.tensor -> !torch.tensor | |
%1179 = torch.prim.ListConstruct %int1, %int128, %int1536 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1180 = torch.prim.ListConstruct %int196608, %int1536, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1181 = torch.aten._reshape_alias %1169, %1179, %1180 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1182 = torch.aten.t %1172 : !torch.tensor -> !torch.tensor | |
%1183 = torch.aten.detach %1182 : !torch.tensor -> !torch.tensor | |
%1184 = torch.aten.detach %1183 : !torch.tensor -> !torch.tensor | |
%1185 = torch.aten.gelu_backward %1181, %1084, %str_2 : !torch.tensor, !torch.tensor, !torch.str -> !torch.tensor | |
%1186 = torch.prim.ListConstruct %int128, %int1536 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1187 = torch.prim.ListConstruct %int1536, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1188 = torch.aten._reshape_alias %1185, %1186, %1187 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1189 = torch.aten.t %1079 : !torch.tensor -> !torch.tensor | |
%1190 = torch.aten.mm %1188, %1189 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1191 = torch.aten.t %1188 : !torch.tensor -> !torch.tensor | |
%1192 = torch.aten.mm %1191, %1081 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1193 = torch.aten.t %1192 : !torch.tensor -> !torch.tensor | |
%1194 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%1195 = torch.aten.sum.dim_IntList %1188, %1194, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1196 = torch.prim.ListConstruct %int1536 : (!torch.int) -> !torch.list<int> | |
%1197 = torch.aten.view %1195, %1196 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1198 = torch.aten.detach %1197 : !torch.tensor -> !torch.tensor | |
%1199 = torch.aten.detach %1198 : !torch.tensor -> !torch.tensor | |
%1200 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1201 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1202 = torch.aten._reshape_alias %1190, %1200, %1201 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1203 = torch.aten.add.Tensor %1155, %1202, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1204 = torch.aten.t %1193 : !torch.tensor -> !torch.tensor | |
%1205 = torch.aten.detach %1204 : !torch.tensor -> !torch.tensor | |
%1206 = torch.aten.detach %1205 : !torch.tensor -> !torch.tensor | |
%1207 = torch.aten.sub.Tensor %1077, %result1_70, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1208 = torch.aten.mul.Tensor %1207, %result2_71 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1209 = torch.aten.mul.Tensor %1203, %arg55 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1210 = torch.aten.mul.Scalar %1209, %int384 : !torch.tensor, !torch.int -> !torch.tensor | |
%1211 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> | |
%1212 = torch.aten.sum.dim_IntList %1209, %1211, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1213 = torch.aten.mul.Tensor %1209, %1208 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1214 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> | |
%1215 = torch.aten.sum.dim_IntList %1213, %1214, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1216 = torch.aten.mul.Tensor %1208, %1215 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1217 = torch.aten.sub.Tensor %1210, %1212, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1218 = torch.aten.sub.Tensor %1217, %1216, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1219 = torch.aten.div.Scalar %result2_71, %int384 : !torch.tensor, !torch.int -> !torch.tensor | |
%1220 = torch.aten.mul.Tensor %1219, %1218 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1221 = torch.aten.mul.Tensor %1203, %1208 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1222 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1223 = torch.aten.sum.dim_IntList %1221, %1222, %false, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1224 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1225 = torch.aten.sum.dim_IntList %1203, %1224, %false, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1226 = torch.aten.detach %1223 : !torch.tensor -> !torch.tensor | |
%1227 = torch.aten.detach %1226 : !torch.tensor -> !torch.tensor | |
%1228 = torch.aten.detach %1225 : !torch.tensor -> !torch.tensor | |
%1229 = torch.aten.detach %1228 : !torch.tensor -> !torch.tensor | |
%1230 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1231 = torch.prim.ListConstruct %int384, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1232 = torch.aten._reshape_alias %1220, %1230, %1231 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1233 = torch.aten.t %1071 : !torch.tensor -> !torch.tensor | |
%1234 = torch.aten.mm %1232, %1233 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1235 = torch.aten.t %1232 : !torch.tensor -> !torch.tensor | |
%1236 = torch.aten.mm %1235, %1073 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1237 = torch.aten.t %1236 : !torch.tensor -> !torch.tensor | |
%1238 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%1239 = torch.aten.sum.dim_IntList %1232, %1238, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1240 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%1241 = torch.aten.view %1239, %1240 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1242 = torch.aten.detach %1241 : !torch.tensor -> !torch.tensor | |
%1243 = torch.aten.detach %1242 : !torch.tensor -> !torch.tensor | |
%1244 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1245 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1246 = torch.aten._reshape_alias %1234, %1244, %1245 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1247 = torch.aten.t %1237 : !torch.tensor -> !torch.tensor | |
%1248 = torch.aten.detach %1247 : !torch.tensor -> !torch.tensor | |
%1249 = torch.aten.detach %1248 : !torch.tensor -> !torch.tensor | |
%1250 = torch.prim.ListConstruct %int1, %int128, %int12, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1251 = torch.prim.ListConstruct %int49152, %int384, %int32, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1252 = torch.aten._reshape_alias %1246, %1250, %1251 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1253 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1254 = torch.aten.permute %1252, %1253 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1255 = torch.prim.ListConstruct %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1256 = torch.prim.ListConstruct %int32, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1257 = torch.aten._reshape_alias %1254, %1255, %1256 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1258 = torch.aten.transpose.int %1057, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%1259 = torch.aten.bmm %1258, %1257 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1260 = torch.aten.transpose.int %1062, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%1261 = torch.aten.bmm %1257, %1260 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1262 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1263 = torch.prim.ListConstruct %int49152, %int4096, %int32, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1264 = torch.aten._reshape_alias %1259, %1262, %1263 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1265 = torch.prim.ListConstruct %int1, %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1266 = torch.prim.ListConstruct %int196608, %int16384, %int128, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1267 = torch.aten._reshape_alias %1261, %1265, %1266 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1268 = torch.aten.detach %1052 : !torch.tensor -> !torch.tensor | |
%1269 = torch.aten._softmax_backward_data %1267, %1268, %int-1, %int6 : !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%1270 = torch.aten.div.Scalar %1269, %float5.656850e00 : !torch.tensor, !torch.float -> !torch.tensor | |
%1271 = torch.prim.ListConstruct %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1272 = torch.prim.ListConstruct %int16384, %int128, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1273 = torch.aten._reshape_alias %1270, %1271, %1272 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1274 = torch.aten.transpose.int %1039, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%1275 = torch.aten.bmm %1274, %1273 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1276 = torch.aten.transpose.int %1044, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%1277 = torch.aten.bmm %1273, %1276 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1278 = torch.prim.ListConstruct %int1, %int12, %int32, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1279 = torch.prim.ListConstruct %int49152, %int4096, %int128, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1280 = torch.aten._reshape_alias %1275, %1278, %1279 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1281 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1282 = torch.prim.ListConstruct %int49152, %int4096, %int32, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1283 = torch.aten._reshape_alias %1277, %1281, %1282 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1284 = torch.aten.transpose.int %1280, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%1285 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1286 = torch.aten.permute %1283, %1285 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1287 = torch.aten.clone %1286, %int0 : !torch.tensor, !torch.int -> !torch.tensor | |
%1288 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1289 = torch.aten._unsafe_view %1287, %1288 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1290 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1291 = torch.aten.permute %1264, %1290 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1292 = torch.aten.clone %1291, %int0 : !torch.tensor, !torch.int -> !torch.tensor | |
%1293 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1294 = torch.aten._unsafe_view %1292, %1293 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1295 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1296 = torch.prim.ListConstruct %int384, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1297 = torch.aten._reshape_alias %1294, %1295, %1296 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1298 = torch.aten.t %1020 : !torch.tensor -> !torch.tensor | |
%1299 = torch.aten.mm %1297, %1298 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1300 = torch.aten.t %1297 : !torch.tensor -> !torch.tensor | |
%1301 = torch.aten.mm %1300, %1022 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1302 = torch.aten.t %1301 : !torch.tensor -> !torch.tensor | |
%1303 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%1304 = torch.aten.sum.dim_IntList %1297, %1303, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1305 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%1306 = torch.aten.view %1304, %1305 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1307 = torch.aten.detach %1306 : !torch.tensor -> !torch.tensor | |
%1308 = torch.aten.detach %1307 : !torch.tensor -> !torch.tensor | |
%1309 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1310 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1311 = torch.aten._reshape_alias %1299, %1309, %1310 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1312 = torch.aten.add.Tensor %1220, %1311, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1313 = torch.aten.t %1302 : !torch.tensor -> !torch.tensor | |
%1314 = torch.aten.detach %1313 : !torch.tensor -> !torch.tensor | |
%1315 = torch.aten.detach %1314 : !torch.tensor -> !torch.tensor | |
%1316 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1317 = torch.aten.permute %1284, %1316 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1318 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1319 = torch.prim.ListConstruct %int128, %int1, %int128 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1320 = torch.aten._reshape_alias %1317, %1318, %1319 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1321 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1322 = torch.prim.ListConstruct %int1, %int128 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1323 = torch.aten._reshape_alias %1320, %1321, %1322 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1324 = torch.aten.t %1010 : !torch.tensor -> !torch.tensor | |
%1325 = torch.aten.mm %1323, %1324 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1326 = torch.aten.t %1323 : !torch.tensor -> !torch.tensor | |
%1327 = torch.aten.mm %1326, %1012 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1328 = torch.aten.t %1327 : !torch.tensor -> !torch.tensor | |
%1329 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%1330 = torch.aten.sum.dim_IntList %1323, %1329, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1331 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%1332 = torch.aten.view %1330, %1331 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1333 = torch.aten.detach %1332 : !torch.tensor -> !torch.tensor | |
%1334 = torch.aten.detach %1333 : !torch.tensor -> !torch.tensor | |
%1335 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1336 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1337 = torch.aten._reshape_alias %1325, %1335, %1336 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1338 = torch.aten.add.Tensor %1312, %1337, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1339 = torch.aten.t %1328 : !torch.tensor -> !torch.tensor | |
%1340 = torch.aten.detach %1339 : !torch.tensor -> !torch.tensor | |
%1341 = torch.aten.detach %1340 : !torch.tensor -> !torch.tensor | |
%1342 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1343 = torch.prim.ListConstruct %int384, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1344 = torch.aten._reshape_alias %1289, %1342, %1343 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1345 = torch.aten.t %1004 : !torch.tensor -> !torch.tensor | |
%1346 = torch.aten.mm %1344, %1345 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1347 = torch.aten.t %1344 : !torch.tensor -> !torch.tensor | |
%1348 = torch.aten.mm %1347, %1006 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1349 = torch.aten.t %1348 : !torch.tensor -> !torch.tensor | |
%1350 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%1351 = torch.aten.sum.dim_IntList %1344, %1350, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1352 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%1353 = torch.aten.view %1351, %1352 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1354 = torch.aten.detach %1353 : !torch.tensor -> !torch.tensor | |
%1355 = torch.aten.detach %1354 : !torch.tensor -> !torch.tensor | |
%1356 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1357 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1358 = torch.aten._reshape_alias %1346, %1356, %1357 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1359 = torch.aten.add.Tensor %1338, %1358, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1360 = torch.aten.t %1349 : !torch.tensor -> !torch.tensor | |
%1361 = torch.aten.detach %1360 : !torch.tensor -> !torch.tensor | |
%1362 = torch.aten.detach %1361 : !torch.tensor -> !torch.tensor | |
%1363 = torch.aten.sub.Tensor %1002, %result1_67, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1364 = torch.aten.mul.Tensor %1363, %result2_68 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1365 = torch.aten.mul.Tensor %1359, %arg51 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1366 = torch.aten.mul.Scalar %1365, %int384 : !torch.tensor, !torch.int -> !torch.tensor | |
%1367 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> | |
%1368 = torch.aten.sum.dim_IntList %1365, %1367, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1369 = torch.aten.mul.Tensor %1365, %1364 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1370 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> | |
%1371 = torch.aten.sum.dim_IntList %1369, %1370, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1372 = torch.aten.mul.Tensor %1364, %1371 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1373 = torch.aten.sub.Tensor %1366, %1368, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1374 = torch.aten.sub.Tensor %1373, %1372, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1375 = torch.aten.div.Scalar %result2_68, %int384 : !torch.tensor, !torch.int -> !torch.tensor | |
%1376 = torch.aten.mul.Tensor %1375, %1374 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1377 = torch.aten.mul.Tensor %1359, %1364 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1378 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1379 = torch.aten.sum.dim_IntList %1377, %1378, %false, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1380 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1381 = torch.aten.sum.dim_IntList %1359, %1380, %false, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1382 = torch.aten.detach %1379 : !torch.tensor -> !torch.tensor | |
%1383 = torch.aten.detach %1382 : !torch.tensor -> !torch.tensor | |
%1384 = torch.aten.detach %1381 : !torch.tensor -> !torch.tensor | |
%1385 = torch.aten.detach %1384 : !torch.tensor -> !torch.tensor | |
%1386 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1387 = torch.prim.ListConstruct %int384, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1388 = torch.aten._reshape_alias %1376, %1386, %1387 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1389 = torch.aten.t %996 : !torch.tensor -> !torch.tensor | |
%1390 = torch.aten.mm %1388, %1389 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1391 = torch.aten.t %1388 : !torch.tensor -> !torch.tensor | |
%1392 = torch.aten.mm %1391, %998 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1393 = torch.aten.t %1392 : !torch.tensor -> !torch.tensor | |
%1394 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%1395 = torch.aten.sum.dim_IntList %1388, %1394, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1396 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%1397 = torch.aten.view %1395, %1396 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1398 = torch.aten.detach %1397 : !torch.tensor -> !torch.tensor | |
%1399 = torch.aten.detach %1398 : !torch.tensor -> !torch.tensor | |
%1400 = torch.prim.ListConstruct %int1, %int128, %int1536 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1401 = torch.prim.ListConstruct %int196608, %int1536, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1402 = torch.aten._reshape_alias %1390, %1400, %1401 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1403 = torch.aten.t %1393 : !torch.tensor -> !torch.tensor | |
%1404 = torch.aten.detach %1403 : !torch.tensor -> !torch.tensor | |
%1405 = torch.aten.detach %1404 : !torch.tensor -> !torch.tensor | |
%1406 = torch.aten.gelu_backward %1402, %994, %str_2 : !torch.tensor, !torch.tensor, !torch.str -> !torch.tensor | |
%1407 = torch.prim.ListConstruct %int128, %int1536 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1408 = torch.prim.ListConstruct %int1536, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1409 = torch.aten._reshape_alias %1406, %1407, %1408 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1410 = torch.aten.t %989 : !torch.tensor -> !torch.tensor | |
%1411 = torch.aten.mm %1409, %1410 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1412 = torch.aten.t %1409 : !torch.tensor -> !torch.tensor | |
%1413 = torch.aten.mm %1412, %991 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1414 = torch.aten.t %1413 : !torch.tensor -> !torch.tensor | |
%1415 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%1416 = torch.aten.sum.dim_IntList %1409, %1415, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1417 = torch.prim.ListConstruct %int1536 : (!torch.int) -> !torch.list<int> | |
%1418 = torch.aten.view %1416, %1417 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1419 = torch.aten.detach %1418 : !torch.tensor -> !torch.tensor | |
%1420 = torch.aten.detach %1419 : !torch.tensor -> !torch.tensor | |
%1421 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1422 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1423 = torch.aten._reshape_alias %1411, %1421, %1422 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1424 = torch.aten.add.Tensor %1376, %1423, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1425 = torch.aten.t %1414 : !torch.tensor -> !torch.tensor | |
%1426 = torch.aten.detach %1425 : !torch.tensor -> !torch.tensor | |
%1427 = torch.aten.detach %1426 : !torch.tensor -> !torch.tensor | |
%1428 = torch.aten.sub.Tensor %987, %result1_64, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1429 = torch.aten.mul.Tensor %1428, %result2_65 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1430 = torch.aten.mul.Tensor %1424, %arg39 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1431 = torch.aten.mul.Scalar %1430, %int384 : !torch.tensor, !torch.int -> !torch.tensor | |
%1432 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> | |
%1433 = torch.aten.sum.dim_IntList %1430, %1432, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1434 = torch.aten.mul.Tensor %1430, %1429 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1435 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> | |
%1436 = torch.aten.sum.dim_IntList %1434, %1435, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1437 = torch.aten.mul.Tensor %1429, %1436 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1438 = torch.aten.sub.Tensor %1431, %1433, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1439 = torch.aten.sub.Tensor %1438, %1437, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1440 = torch.aten.div.Scalar %result2_65, %int384 : !torch.tensor, !torch.int -> !torch.tensor | |
%1441 = torch.aten.mul.Tensor %1440, %1439 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1442 = torch.aten.mul.Tensor %1424, %1429 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1443 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1444 = torch.aten.sum.dim_IntList %1442, %1443, %false, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1445 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1446 = torch.aten.sum.dim_IntList %1424, %1445, %false, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1447 = torch.aten.detach %1444 : !torch.tensor -> !torch.tensor | |
%1448 = torch.aten.detach %1447 : !torch.tensor -> !torch.tensor | |
%1449 = torch.aten.detach %1446 : !torch.tensor -> !torch.tensor | |
%1450 = torch.aten.detach %1449 : !torch.tensor -> !torch.tensor | |
%1451 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1452 = torch.prim.ListConstruct %int384, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1453 = torch.aten._reshape_alias %1441, %1451, %1452 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1454 = torch.aten.t %981 : !torch.tensor -> !torch.tensor | |
%1455 = torch.aten.mm %1453, %1454 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1456 = torch.aten.t %1453 : !torch.tensor -> !torch.tensor | |
%1457 = torch.aten.mm %1456, %983 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1458 = torch.aten.t %1457 : !torch.tensor -> !torch.tensor | |
%1459 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%1460 = torch.aten.sum.dim_IntList %1453, %1459, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1461 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%1462 = torch.aten.view %1460, %1461 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1463 = torch.aten.detach %1462 : !torch.tensor -> !torch.tensor | |
%1464 = torch.aten.detach %1463 : !torch.tensor -> !torch.tensor | |
%1465 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1466 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1467 = torch.aten._reshape_alias %1455, %1465, %1466 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1468 = torch.aten.t %1458 : !torch.tensor -> !torch.tensor | |
%1469 = torch.aten.detach %1468 : !torch.tensor -> !torch.tensor | |
%1470 = torch.aten.detach %1469 : !torch.tensor -> !torch.tensor | |
%1471 = torch.prim.ListConstruct %int1, %int128, %int12, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1472 = torch.prim.ListConstruct %int49152, %int384, %int32, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1473 = torch.aten._reshape_alias %1467, %1471, %1472 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1474 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1475 = torch.aten.permute %1473, %1474 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1476 = torch.prim.ListConstruct %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1477 = torch.prim.ListConstruct %int32, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1478 = torch.aten._reshape_alias %1475, %1476, %1477 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1479 = torch.aten.transpose.int %967, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%1480 = torch.aten.bmm %1479, %1478 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1481 = torch.aten.transpose.int %972, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%1482 = torch.aten.bmm %1478, %1481 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1483 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1484 = torch.prim.ListConstruct %int49152, %int4096, %int32, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1485 = torch.aten._reshape_alias %1480, %1483, %1484 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1486 = torch.prim.ListConstruct %int1, %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1487 = torch.prim.ListConstruct %int196608, %int16384, %int128, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1488 = torch.aten._reshape_alias %1482, %1486, %1487 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1489 = torch.aten.detach %962 : !torch.tensor -> !torch.tensor | |
%1490 = torch.aten._softmax_backward_data %1488, %1489, %int-1, %int6 : !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%1491 = torch.aten.div.Scalar %1490, %float5.656850e00 : !torch.tensor, !torch.float -> !torch.tensor | |
%1492 = torch.prim.ListConstruct %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1493 = torch.prim.ListConstruct %int16384, %int128, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1494 = torch.aten._reshape_alias %1491, %1492, %1493 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1495 = torch.aten.transpose.int %949, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%1496 = torch.aten.bmm %1495, %1494 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1497 = torch.aten.transpose.int %954, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%1498 = torch.aten.bmm %1494, %1497 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1499 = torch.prim.ListConstruct %int1, %int12, %int32, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1500 = torch.prim.ListConstruct %int49152, %int4096, %int128, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1501 = torch.aten._reshape_alias %1496, %1499, %1500 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1502 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1503 = torch.prim.ListConstruct %int49152, %int4096, %int32, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1504 = torch.aten._reshape_alias %1498, %1502, %1503 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1505 = torch.aten.transpose.int %1501, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%1506 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1507 = torch.aten.permute %1504, %1506 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1508 = torch.aten.clone %1507, %int0 : !torch.tensor, !torch.int -> !torch.tensor | |
%1509 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1510 = torch.aten._unsafe_view %1508, %1509 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1511 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1512 = torch.aten.permute %1485, %1511 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1513 = torch.aten.clone %1512, %int0 : !torch.tensor, !torch.int -> !torch.tensor | |
%1514 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1515 = torch.aten._unsafe_view %1513, %1514 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1516 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1517 = torch.prim.ListConstruct %int384, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1518 = torch.aten._reshape_alias %1515, %1516, %1517 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1519 = torch.aten.t %930 : !torch.tensor -> !torch.tensor | |
%1520 = torch.aten.mm %1518, %1519 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1521 = torch.aten.t %1518 : !torch.tensor -> !torch.tensor | |
%1522 = torch.aten.mm %1521, %932 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1523 = torch.aten.t %1522 : !torch.tensor -> !torch.tensor | |
%1524 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%1525 = torch.aten.sum.dim_IntList %1518, %1524, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1526 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%1527 = torch.aten.view %1525, %1526 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1528 = torch.aten.detach %1527 : !torch.tensor -> !torch.tensor | |
%1529 = torch.aten.detach %1528 : !torch.tensor -> !torch.tensor | |
%1530 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1531 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1532 = torch.aten._reshape_alias %1520, %1530, %1531 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1533 = torch.aten.add.Tensor %1441, %1532, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1534 = torch.aten.t %1523 : !torch.tensor -> !torch.tensor | |
%1535 = torch.aten.detach %1534 : !torch.tensor -> !torch.tensor | |
%1536 = torch.aten.detach %1535 : !torch.tensor -> !torch.tensor | |
%1537 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1538 = torch.aten.permute %1505, %1537 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1539 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1540 = torch.prim.ListConstruct %int128, %int1, %int128 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1541 = torch.aten._reshape_alias %1538, %1539, %1540 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1542 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1543 = torch.prim.ListConstruct %int1, %int128 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1544 = torch.aten._reshape_alias %1541, %1542, %1543 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1545 = torch.aten.t %920 : !torch.tensor -> !torch.tensor | |
%1546 = torch.aten.mm %1544, %1545 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1547 = torch.aten.t %1544 : !torch.tensor -> !torch.tensor | |
%1548 = torch.aten.mm %1547, %922 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1549 = torch.aten.t %1548 : !torch.tensor -> !torch.tensor | |
%1550 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%1551 = torch.aten.sum.dim_IntList %1544, %1550, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1552 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%1553 = torch.aten.view %1551, %1552 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1554 = torch.aten.detach %1553 : !torch.tensor -> !torch.tensor | |
%1555 = torch.aten.detach %1554 : !torch.tensor -> !torch.tensor | |
%1556 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1557 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1558 = torch.aten._reshape_alias %1546, %1556, %1557 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1559 = torch.aten.add.Tensor %1533, %1558, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1560 = torch.aten.t %1549 : !torch.tensor -> !torch.tensor | |
%1561 = torch.aten.detach %1560 : !torch.tensor -> !torch.tensor | |
%1562 = torch.aten.detach %1561 : !torch.tensor -> !torch.tensor | |
%1563 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1564 = torch.prim.ListConstruct %int384, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1565 = torch.aten._reshape_alias %1510, %1563, %1564 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1566 = torch.aten.t %914 : !torch.tensor -> !torch.tensor | |
%1567 = torch.aten.mm %1565, %1566 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1568 = torch.aten.t %1565 : !torch.tensor -> !torch.tensor | |
%1569 = torch.aten.mm %1568, %916 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1570 = torch.aten.t %1569 : !torch.tensor -> !torch.tensor | |
%1571 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%1572 = torch.aten.sum.dim_IntList %1565, %1571, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1573 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%1574 = torch.aten.view %1572, %1573 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1575 = torch.aten.detach %1574 : !torch.tensor -> !torch.tensor | |
%1576 = torch.aten.detach %1575 : !torch.tensor -> !torch.tensor | |
%1577 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1578 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1579 = torch.aten._reshape_alias %1567, %1577, %1578 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1580 = torch.aten.add.Tensor %1559, %1579, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1581 = torch.aten.t %1570 : !torch.tensor -> !torch.tensor | |
%1582 = torch.aten.detach %1581 : !torch.tensor -> !torch.tensor | |
%1583 = torch.aten.detach %1582 : !torch.tensor -> !torch.tensor | |
%1584 = torch.aten.sub.Tensor %912, %result1_61, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1585 = torch.aten.mul.Tensor %1584, %result2_62 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1586 = torch.aten.mul.Tensor %1580, %arg195 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1587 = torch.aten.mul.Scalar %1586, %int384 : !torch.tensor, !torch.int -> !torch.tensor | |
%1588 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> | |
%1589 = torch.aten.sum.dim_IntList %1586, %1588, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1590 = torch.aten.mul.Tensor %1586, %1585 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1591 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> | |
%1592 = torch.aten.sum.dim_IntList %1590, %1591, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1593 = torch.aten.mul.Tensor %1585, %1592 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1594 = torch.aten.sub.Tensor %1587, %1589, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1595 = torch.aten.sub.Tensor %1594, %1593, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1596 = torch.aten.div.Scalar %result2_62, %int384 : !torch.tensor, !torch.int -> !torch.tensor | |
%1597 = torch.aten.mul.Tensor %1596, %1595 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1598 = torch.aten.mul.Tensor %1580, %1585 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1599 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1600 = torch.aten.sum.dim_IntList %1598, %1599, %false, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1601 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1602 = torch.aten.sum.dim_IntList %1580, %1601, %false, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1603 = torch.aten.detach %1600 : !torch.tensor -> !torch.tensor | |
%1604 = torch.aten.detach %1603 : !torch.tensor -> !torch.tensor | |
%1605 = torch.aten.detach %1602 : !torch.tensor -> !torch.tensor | |
%1606 = torch.aten.detach %1605 : !torch.tensor -> !torch.tensor | |
%1607 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1608 = torch.prim.ListConstruct %int384, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1609 = torch.aten._reshape_alias %1597, %1607, %1608 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1610 = torch.aten.t %906 : !torch.tensor -> !torch.tensor | |
%1611 = torch.aten.mm %1609, %1610 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1612 = torch.aten.t %1609 : !torch.tensor -> !torch.tensor | |
%1613 = torch.aten.mm %1612, %908 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1614 = torch.aten.t %1613 : !torch.tensor -> !torch.tensor | |
%1615 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%1616 = torch.aten.sum.dim_IntList %1609, %1615, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1617 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%1618 = torch.aten.view %1616, %1617 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1619 = torch.aten.detach %1618 : !torch.tensor -> !torch.tensor | |
%1620 = torch.aten.detach %1619 : !torch.tensor -> !torch.tensor | |
%1621 = torch.prim.ListConstruct %int1, %int128, %int1536 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1622 = torch.prim.ListConstruct %int196608, %int1536, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1623 = torch.aten._reshape_alias %1611, %1621, %1622 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1624 = torch.aten.t %1614 : !torch.tensor -> !torch.tensor | |
%1625 = torch.aten.detach %1624 : !torch.tensor -> !torch.tensor | |
%1626 = torch.aten.detach %1625 : !torch.tensor -> !torch.tensor | |
%1627 = torch.aten.gelu_backward %1623, %904, %str_2 : !torch.tensor, !torch.tensor, !torch.str -> !torch.tensor | |
%1628 = torch.prim.ListConstruct %int128, %int1536 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1629 = torch.prim.ListConstruct %int1536, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1630 = torch.aten._reshape_alias %1627, %1628, %1629 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1631 = torch.aten.t %899 : !torch.tensor -> !torch.tensor | |
%1632 = torch.aten.mm %1630, %1631 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1633 = torch.aten.t %1630 : !torch.tensor -> !torch.tensor | |
%1634 = torch.aten.mm %1633, %901 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1635 = torch.aten.t %1634 : !torch.tensor -> !torch.tensor | |
%1636 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%1637 = torch.aten.sum.dim_IntList %1630, %1636, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1638 = torch.prim.ListConstruct %int1536 : (!torch.int) -> !torch.list<int> | |
%1639 = torch.aten.view %1637, %1638 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1640 = torch.aten.detach %1639 : !torch.tensor -> !torch.tensor | |
%1641 = torch.aten.detach %1640 : !torch.tensor -> !torch.tensor | |
%1642 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1643 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1644 = torch.aten._reshape_alias %1632, %1642, %1643 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1645 = torch.aten.add.Tensor %1597, %1644, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1646 = torch.aten.t %1635 : !torch.tensor -> !torch.tensor | |
%1647 = torch.aten.detach %1646 : !torch.tensor -> !torch.tensor | |
%1648 = torch.aten.detach %1647 : !torch.tensor -> !torch.tensor | |
%1649 = torch.aten.sub.Tensor %897, %result1_58, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1650 = torch.aten.mul.Tensor %1649, %result2_59 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1651 = torch.aten.mul.Tensor %1645, %arg183 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1652 = torch.aten.mul.Scalar %1651, %int384 : !torch.tensor, !torch.int -> !torch.tensor | |
%1653 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> | |
%1654 = torch.aten.sum.dim_IntList %1651, %1653, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1655 = torch.aten.mul.Tensor %1651, %1650 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1656 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> | |
%1657 = torch.aten.sum.dim_IntList %1655, %1656, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1658 = torch.aten.mul.Tensor %1650, %1657 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1659 = torch.aten.sub.Tensor %1652, %1654, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1660 = torch.aten.sub.Tensor %1659, %1658, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1661 = torch.aten.div.Scalar %result2_59, %int384 : !torch.tensor, !torch.int -> !torch.tensor | |
%1662 = torch.aten.mul.Tensor %1661, %1660 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1663 = torch.aten.mul.Tensor %1645, %1650 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1664 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1665 = torch.aten.sum.dim_IntList %1663, %1664, %false, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1666 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1667 = torch.aten.sum.dim_IntList %1645, %1666, %false, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1668 = torch.aten.detach %1665 : !torch.tensor -> !torch.tensor | |
%1669 = torch.aten.detach %1668 : !torch.tensor -> !torch.tensor | |
%1670 = torch.aten.detach %1667 : !torch.tensor -> !torch.tensor | |
%1671 = torch.aten.detach %1670 : !torch.tensor -> !torch.tensor | |
%1672 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1673 = torch.prim.ListConstruct %int384, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1674 = torch.aten._reshape_alias %1662, %1672, %1673 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1675 = torch.aten.t %891 : !torch.tensor -> !torch.tensor | |
%1676 = torch.aten.mm %1674, %1675 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1677 = torch.aten.t %1674 : !torch.tensor -> !torch.tensor | |
%1678 = torch.aten.mm %1677, %893 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1679 = torch.aten.t %1678 : !torch.tensor -> !torch.tensor | |
%1680 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%1681 = torch.aten.sum.dim_IntList %1674, %1680, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1682 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%1683 = torch.aten.view %1681, %1682 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1684 = torch.aten.detach %1683 : !torch.tensor -> !torch.tensor | |
%1685 = torch.aten.detach %1684 : !torch.tensor -> !torch.tensor | |
%1686 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1687 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1688 = torch.aten._reshape_alias %1676, %1686, %1687 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1689 = torch.aten.t %1679 : !torch.tensor -> !torch.tensor | |
%1690 = torch.aten.detach %1689 : !torch.tensor -> !torch.tensor | |
%1691 = torch.aten.detach %1690 : !torch.tensor -> !torch.tensor | |
%1692 = torch.prim.ListConstruct %int1, %int128, %int12, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1693 = torch.prim.ListConstruct %int49152, %int384, %int32, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1694 = torch.aten._reshape_alias %1688, %1692, %1693 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1695 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1696 = torch.aten.permute %1694, %1695 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1697 = torch.prim.ListConstruct %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1698 = torch.prim.ListConstruct %int32, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1699 = torch.aten._reshape_alias %1696, %1697, %1698 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1700 = torch.aten.transpose.int %877, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%1701 = torch.aten.bmm %1700, %1699 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1702 = torch.aten.transpose.int %882, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%1703 = torch.aten.bmm %1699, %1702 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1704 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1705 = torch.prim.ListConstruct %int49152, %int4096, %int32, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1706 = torch.aten._reshape_alias %1701, %1704, %1705 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1707 = torch.prim.ListConstruct %int1, %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1708 = torch.prim.ListConstruct %int196608, %int16384, %int128, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1709 = torch.aten._reshape_alias %1703, %1707, %1708 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1710 = torch.aten.detach %872 : !torch.tensor -> !torch.tensor | |
%1711 = torch.aten._softmax_backward_data %1709, %1710, %int-1, %int6 : !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%1712 = torch.aten.div.Scalar %1711, %float5.656850e00 : !torch.tensor, !torch.float -> !torch.tensor | |
%1713 = torch.prim.ListConstruct %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1714 = torch.prim.ListConstruct %int16384, %int128, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1715 = torch.aten._reshape_alias %1712, %1713, %1714 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1716 = torch.aten.transpose.int %859, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%1717 = torch.aten.bmm %1716, %1715 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1718 = torch.aten.transpose.int %864, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%1719 = torch.aten.bmm %1715, %1718 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1720 = torch.prim.ListConstruct %int1, %int12, %int32, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1721 = torch.prim.ListConstruct %int49152, %int4096, %int128, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1722 = torch.aten._reshape_alias %1717, %1720, %1721 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1723 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1724 = torch.prim.ListConstruct %int49152, %int4096, %int32, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1725 = torch.aten._reshape_alias %1719, %1723, %1724 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1726 = torch.aten.transpose.int %1722, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%1727 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1728 = torch.aten.permute %1725, %1727 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1729 = torch.aten.clone %1728, %int0 : !torch.tensor, !torch.int -> !torch.tensor | |
%1730 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1731 = torch.aten._unsafe_view %1729, %1730 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1732 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1733 = torch.aten.permute %1706, %1732 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1734 = torch.aten.clone %1733, %int0 : !torch.tensor, !torch.int -> !torch.tensor | |
%1735 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1736 = torch.aten._unsafe_view %1734, %1735 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1737 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1738 = torch.prim.ListConstruct %int384, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1739 = torch.aten._reshape_alias %1736, %1737, %1738 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1740 = torch.aten.t %840 : !torch.tensor -> !torch.tensor | |
%1741 = torch.aten.mm %1739, %1740 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1742 = torch.aten.t %1739 : !torch.tensor -> !torch.tensor | |
%1743 = torch.aten.mm %1742, %842 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1744 = torch.aten.t %1743 : !torch.tensor -> !torch.tensor | |
%1745 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%1746 = torch.aten.sum.dim_IntList %1739, %1745, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1747 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%1748 = torch.aten.view %1746, %1747 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1749 = torch.aten.detach %1748 : !torch.tensor -> !torch.tensor | |
%1750 = torch.aten.detach %1749 : !torch.tensor -> !torch.tensor | |
%1751 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1752 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1753 = torch.aten._reshape_alias %1741, %1751, %1752 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1754 = torch.aten.add.Tensor %1662, %1753, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1755 = torch.aten.t %1744 : !torch.tensor -> !torch.tensor | |
%1756 = torch.aten.detach %1755 : !torch.tensor -> !torch.tensor | |
%1757 = torch.aten.detach %1756 : !torch.tensor -> !torch.tensor | |
%1758 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1759 = torch.aten.permute %1726, %1758 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1760 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1761 = torch.prim.ListConstruct %int128, %int1, %int128 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1762 = torch.aten._reshape_alias %1759, %1760, %1761 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1763 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1764 = torch.prim.ListConstruct %int1, %int128 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1765 = torch.aten._reshape_alias %1762, %1763, %1764 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1766 = torch.aten.t %830 : !torch.tensor -> !torch.tensor | |
%1767 = torch.aten.mm %1765, %1766 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1768 = torch.aten.t %1765 : !torch.tensor -> !torch.tensor | |
%1769 = torch.aten.mm %1768, %832 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1770 = torch.aten.t %1769 : !torch.tensor -> !torch.tensor | |
%1771 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%1772 = torch.aten.sum.dim_IntList %1765, %1771, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1773 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%1774 = torch.aten.view %1772, %1773 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1775 = torch.aten.detach %1774 : !torch.tensor -> !torch.tensor | |
%1776 = torch.aten.detach %1775 : !torch.tensor -> !torch.tensor | |
%1777 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1778 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1779 = torch.aten._reshape_alias %1767, %1777, %1778 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1780 = torch.aten.add.Tensor %1754, %1779, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1781 = torch.aten.t %1770 : !torch.tensor -> !torch.tensor | |
%1782 = torch.aten.detach %1781 : !torch.tensor -> !torch.tensor | |
%1783 = torch.aten.detach %1782 : !torch.tensor -> !torch.tensor | |
%1784 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1785 = torch.prim.ListConstruct %int384, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1786 = torch.aten._reshape_alias %1731, %1784, %1785 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1787 = torch.aten.t %824 : !torch.tensor -> !torch.tensor | |
%1788 = torch.aten.mm %1786, %1787 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1789 = torch.aten.t %1786 : !torch.tensor -> !torch.tensor | |
%1790 = torch.aten.mm %1789, %826 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1791 = torch.aten.t %1790 : !torch.tensor -> !torch.tensor | |
%1792 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%1793 = torch.aten.sum.dim_IntList %1786, %1792, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1794 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%1795 = torch.aten.view %1793, %1794 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1796 = torch.aten.detach %1795 : !torch.tensor -> !torch.tensor | |
%1797 = torch.aten.detach %1796 : !torch.tensor -> !torch.tensor | |
%1798 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1799 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1800 = torch.aten._reshape_alias %1788, %1798, %1799 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1801 = torch.aten.add.Tensor %1780, %1800, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1802 = torch.aten.t %1791 : !torch.tensor -> !torch.tensor | |
%1803 = torch.aten.detach %1802 : !torch.tensor -> !torch.tensor | |
%1804 = torch.aten.detach %1803 : !torch.tensor -> !torch.tensor | |
%1805 = torch.aten.sub.Tensor %822, %result1_55, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1806 = torch.aten.mul.Tensor %1805, %result2_56 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1807 = torch.aten.mul.Tensor %1801, %arg179 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1808 = torch.aten.mul.Scalar %1807, %int384 : !torch.tensor, !torch.int -> !torch.tensor | |
%1809 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> | |
%1810 = torch.aten.sum.dim_IntList %1807, %1809, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1811 = torch.aten.mul.Tensor %1807, %1806 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1812 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> | |
%1813 = torch.aten.sum.dim_IntList %1811, %1812, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1814 = torch.aten.mul.Tensor %1806, %1813 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1815 = torch.aten.sub.Tensor %1808, %1810, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1816 = torch.aten.sub.Tensor %1815, %1814, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1817 = torch.aten.div.Scalar %result2_56, %int384 : !torch.tensor, !torch.int -> !torch.tensor | |
%1818 = torch.aten.mul.Tensor %1817, %1816 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1819 = torch.aten.mul.Tensor %1801, %1806 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1820 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1821 = torch.aten.sum.dim_IntList %1819, %1820, %false, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1822 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1823 = torch.aten.sum.dim_IntList %1801, %1822, %false, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1824 = torch.aten.detach %1821 : !torch.tensor -> !torch.tensor | |
%1825 = torch.aten.detach %1824 : !torch.tensor -> !torch.tensor | |
%1826 = torch.aten.detach %1823 : !torch.tensor -> !torch.tensor | |
%1827 = torch.aten.detach %1826 : !torch.tensor -> !torch.tensor | |
%1828 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1829 = torch.prim.ListConstruct %int384, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1830 = torch.aten._reshape_alias %1818, %1828, %1829 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1831 = torch.aten.t %816 : !torch.tensor -> !torch.tensor | |
%1832 = torch.aten.mm %1830, %1831 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1833 = torch.aten.t %1830 : !torch.tensor -> !torch.tensor | |
%1834 = torch.aten.mm %1833, %818 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1835 = torch.aten.t %1834 : !torch.tensor -> !torch.tensor | |
%1836 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%1837 = torch.aten.sum.dim_IntList %1830, %1836, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1838 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%1839 = torch.aten.view %1837, %1838 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1840 = torch.aten.detach %1839 : !torch.tensor -> !torch.tensor | |
%1841 = torch.aten.detach %1840 : !torch.tensor -> !torch.tensor | |
%1842 = torch.prim.ListConstruct %int1, %int128, %int1536 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1843 = torch.prim.ListConstruct %int196608, %int1536, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1844 = torch.aten._reshape_alias %1832, %1842, %1843 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1845 = torch.aten.t %1835 : !torch.tensor -> !torch.tensor | |
%1846 = torch.aten.detach %1845 : !torch.tensor -> !torch.tensor | |
%1847 = torch.aten.detach %1846 : !torch.tensor -> !torch.tensor | |
%1848 = torch.aten.gelu_backward %1844, %814, %str_2 : !torch.tensor, !torch.tensor, !torch.str -> !torch.tensor | |
%1849 = torch.prim.ListConstruct %int128, %int1536 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1850 = torch.prim.ListConstruct %int1536, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1851 = torch.aten._reshape_alias %1848, %1849, %1850 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1852 = torch.aten.t %809 : !torch.tensor -> !torch.tensor | |
%1853 = torch.aten.mm %1851, %1852 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1854 = torch.aten.t %1851 : !torch.tensor -> !torch.tensor | |
%1855 = torch.aten.mm %1854, %811 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1856 = torch.aten.t %1855 : !torch.tensor -> !torch.tensor | |
%1857 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%1858 = torch.aten.sum.dim_IntList %1851, %1857, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1859 = torch.prim.ListConstruct %int1536 : (!torch.int) -> !torch.list<int> | |
%1860 = torch.aten.view %1858, %1859 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1861 = torch.aten.detach %1860 : !torch.tensor -> !torch.tensor | |
%1862 = torch.aten.detach %1861 : !torch.tensor -> !torch.tensor | |
%1863 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1864 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1865 = torch.aten._reshape_alias %1853, %1863, %1864 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1866 = torch.aten.add.Tensor %1818, %1865, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1867 = torch.aten.t %1856 : !torch.tensor -> !torch.tensor | |
%1868 = torch.aten.detach %1867 : !torch.tensor -> !torch.tensor | |
%1869 = torch.aten.detach %1868 : !torch.tensor -> !torch.tensor | |
%1870 = torch.aten.sub.Tensor %807, %result1_52, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1871 = torch.aten.mul.Tensor %1870, %result2_53 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1872 = torch.aten.mul.Tensor %1866, %arg167 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1873 = torch.aten.mul.Scalar %1872, %int384 : !torch.tensor, !torch.int -> !torch.tensor | |
%1874 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> | |
%1875 = torch.aten.sum.dim_IntList %1872, %1874, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1876 = torch.aten.mul.Tensor %1872, %1871 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1877 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> | |
%1878 = torch.aten.sum.dim_IntList %1876, %1877, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1879 = torch.aten.mul.Tensor %1871, %1878 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1880 = torch.aten.sub.Tensor %1873, %1875, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1881 = torch.aten.sub.Tensor %1880, %1879, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1882 = torch.aten.div.Scalar %result2_53, %int384 : !torch.tensor, !torch.int -> !torch.tensor | |
%1883 = torch.aten.mul.Tensor %1882, %1881 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1884 = torch.aten.mul.Tensor %1866, %1871 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1885 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1886 = torch.aten.sum.dim_IntList %1884, %1885, %false, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1887 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1888 = torch.aten.sum.dim_IntList %1866, %1887, %false, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1889 = torch.aten.detach %1886 : !torch.tensor -> !torch.tensor | |
%1890 = torch.aten.detach %1889 : !torch.tensor -> !torch.tensor | |
%1891 = torch.aten.detach %1888 : !torch.tensor -> !torch.tensor | |
%1892 = torch.aten.detach %1891 : !torch.tensor -> !torch.tensor | |
%1893 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1894 = torch.prim.ListConstruct %int384, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1895 = torch.aten._reshape_alias %1883, %1893, %1894 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1896 = torch.aten.t %801 : !torch.tensor -> !torch.tensor | |
%1897 = torch.aten.mm %1895, %1896 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1898 = torch.aten.t %1895 : !torch.tensor -> !torch.tensor | |
%1899 = torch.aten.mm %1898, %803 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1900 = torch.aten.t %1899 : !torch.tensor -> !torch.tensor | |
%1901 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%1902 = torch.aten.sum.dim_IntList %1895, %1901, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1903 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%1904 = torch.aten.view %1902, %1903 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1905 = torch.aten.detach %1904 : !torch.tensor -> !torch.tensor | |
%1906 = torch.aten.detach %1905 : !torch.tensor -> !torch.tensor | |
%1907 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1908 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1909 = torch.aten._reshape_alias %1897, %1907, %1908 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1910 = torch.aten.t %1900 : !torch.tensor -> !torch.tensor | |
%1911 = torch.aten.detach %1910 : !torch.tensor -> !torch.tensor | |
%1912 = torch.aten.detach %1911 : !torch.tensor -> !torch.tensor | |
%1913 = torch.prim.ListConstruct %int1, %int128, %int12, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1914 = torch.prim.ListConstruct %int49152, %int384, %int32, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1915 = torch.aten._reshape_alias %1909, %1913, %1914 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1916 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1917 = torch.aten.permute %1915, %1916 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1918 = torch.prim.ListConstruct %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1919 = torch.prim.ListConstruct %int32, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1920 = torch.aten._reshape_alias %1917, %1918, %1919 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1921 = torch.aten.transpose.int %787, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%1922 = torch.aten.bmm %1921, %1920 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1923 = torch.aten.transpose.int %792, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%1924 = torch.aten.bmm %1920, %1923 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1925 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1926 = torch.prim.ListConstruct %int49152, %int4096, %int32, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1927 = torch.aten._reshape_alias %1922, %1925, %1926 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1928 = torch.prim.ListConstruct %int1, %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1929 = torch.prim.ListConstruct %int196608, %int16384, %int128, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1930 = torch.aten._reshape_alias %1924, %1928, %1929 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1931 = torch.aten.detach %782 : !torch.tensor -> !torch.tensor | |
%1932 = torch.aten._softmax_backward_data %1930, %1931, %int-1, %int6 : !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%1933 = torch.aten.div.Scalar %1932, %float5.656850e00 : !torch.tensor, !torch.float -> !torch.tensor | |
%1934 = torch.prim.ListConstruct %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1935 = torch.prim.ListConstruct %int16384, %int128, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1936 = torch.aten._reshape_alias %1933, %1934, %1935 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1937 = torch.aten.transpose.int %769, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%1938 = torch.aten.bmm %1937, %1936 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1939 = torch.aten.transpose.int %774, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%1940 = torch.aten.bmm %1936, %1939 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1941 = torch.prim.ListConstruct %int1, %int12, %int32, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1942 = torch.prim.ListConstruct %int49152, %int4096, %int128, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1943 = torch.aten._reshape_alias %1938, %1941, %1942 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1944 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1945 = torch.prim.ListConstruct %int49152, %int4096, %int32, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1946 = torch.aten._reshape_alias %1940, %1944, %1945 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1947 = torch.aten.transpose.int %1943, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%1948 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1949 = torch.aten.permute %1946, %1948 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1950 = torch.aten.clone %1949, %int0 : !torch.tensor, !torch.int -> !torch.tensor | |
%1951 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1952 = torch.aten._unsafe_view %1950, %1951 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1953 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1954 = torch.aten.permute %1927, %1953 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1955 = torch.aten.clone %1954, %int0 : !torch.tensor, !torch.int -> !torch.tensor | |
%1956 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1957 = torch.aten._unsafe_view %1955, %1956 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1958 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1959 = torch.prim.ListConstruct %int384, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1960 = torch.aten._reshape_alias %1957, %1958, %1959 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1961 = torch.aten.t %750 : !torch.tensor -> !torch.tensor | |
%1962 = torch.aten.mm %1960, %1961 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1963 = torch.aten.t %1960 : !torch.tensor -> !torch.tensor | |
%1964 = torch.aten.mm %1963, %752 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1965 = torch.aten.t %1964 : !torch.tensor -> !torch.tensor | |
%1966 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%1967 = torch.aten.sum.dim_IntList %1960, %1966, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1968 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%1969 = torch.aten.view %1967, %1968 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1970 = torch.aten.detach %1969 : !torch.tensor -> !torch.tensor | |
%1971 = torch.aten.detach %1970 : !torch.tensor -> !torch.tensor | |
%1972 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1973 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1974 = torch.aten._reshape_alias %1962, %1972, %1973 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1975 = torch.aten.add.Tensor %1883, %1974, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1976 = torch.aten.t %1965 : !torch.tensor -> !torch.tensor | |
%1977 = torch.aten.detach %1976 : !torch.tensor -> !torch.tensor | |
%1978 = torch.aten.detach %1977 : !torch.tensor -> !torch.tensor | |
%1979 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1980 = torch.aten.permute %1947, %1979 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1981 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1982 = torch.prim.ListConstruct %int128, %int1, %int128 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1983 = torch.aten._reshape_alias %1980, %1981, %1982 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1984 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1985 = torch.prim.ListConstruct %int1, %int128 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1986 = torch.aten._reshape_alias %1983, %1984, %1985 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%1987 = torch.aten.t %740 : !torch.tensor -> !torch.tensor | |
%1988 = torch.aten.mm %1986, %1987 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1989 = torch.aten.t %1986 : !torch.tensor -> !torch.tensor | |
%1990 = torch.aten.mm %1989, %742 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1991 = torch.aten.t %1990 : !torch.tensor -> !torch.tensor | |
%1992 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%1993 = torch.aten.sum.dim_IntList %1986, %1992, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%1994 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%1995 = torch.aten.view %1993, %1994 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%1996 = torch.aten.detach %1995 : !torch.tensor -> !torch.tensor | |
%1997 = torch.aten.detach %1996 : !torch.tensor -> !torch.tensor | |
%1998 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1999 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2000 = torch.aten._reshape_alias %1988, %1998, %1999 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2001 = torch.aten.add.Tensor %1975, %2000, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%2002 = torch.aten.t %1991 : !torch.tensor -> !torch.tensor | |
%2003 = torch.aten.detach %2002 : !torch.tensor -> !torch.tensor | |
%2004 = torch.aten.detach %2003 : !torch.tensor -> !torch.tensor | |
%2005 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2006 = torch.prim.ListConstruct %int384, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2007 = torch.aten._reshape_alias %1952, %2005, %2006 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2008 = torch.aten.t %734 : !torch.tensor -> !torch.tensor | |
%2009 = torch.aten.mm %2007, %2008 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2010 = torch.aten.t %2007 : !torch.tensor -> !torch.tensor | |
%2011 = torch.aten.mm %2010, %736 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2012 = torch.aten.t %2011 : !torch.tensor -> !torch.tensor | |
%2013 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%2014 = torch.aten.sum.dim_IntList %2007, %2013, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2015 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%2016 = torch.aten.view %2014, %2015 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%2017 = torch.aten.detach %2016 : !torch.tensor -> !torch.tensor | |
%2018 = torch.aten.detach %2017 : !torch.tensor -> !torch.tensor | |
%2019 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2020 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2021 = torch.aten._reshape_alias %2009, %2019, %2020 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2022 = torch.aten.add.Tensor %2001, %2021, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%2023 = torch.aten.t %2012 : !torch.tensor -> !torch.tensor | |
%2024 = torch.aten.detach %2023 : !torch.tensor -> !torch.tensor | |
%2025 = torch.aten.detach %2024 : !torch.tensor -> !torch.tensor | |
%2026 = torch.aten.sub.Tensor %732, %result1_49, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%2027 = torch.aten.mul.Tensor %2026, %result2_50 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2028 = torch.aten.mul.Tensor %2022, %arg163 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2029 = torch.aten.mul.Scalar %2028, %int384 : !torch.tensor, !torch.int -> !torch.tensor | |
%2030 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> | |
%2031 = torch.aten.sum.dim_IntList %2028, %2030, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2032 = torch.aten.mul.Tensor %2028, %2027 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2033 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> | |
%2034 = torch.aten.sum.dim_IntList %2032, %2033, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2035 = torch.aten.mul.Tensor %2027, %2034 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2036 = torch.aten.sub.Tensor %2029, %2031, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%2037 = torch.aten.sub.Tensor %2036, %2035, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%2038 = torch.aten.div.Scalar %result2_50, %int384 : !torch.tensor, !torch.int -> !torch.tensor | |
%2039 = torch.aten.mul.Tensor %2038, %2037 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2040 = torch.aten.mul.Tensor %2022, %2027 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2041 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2042 = torch.aten.sum.dim_IntList %2040, %2041, %false, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2043 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2044 = torch.aten.sum.dim_IntList %2022, %2043, %false, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2045 = torch.aten.detach %2042 : !torch.tensor -> !torch.tensor | |
%2046 = torch.aten.detach %2045 : !torch.tensor -> !torch.tensor | |
%2047 = torch.aten.detach %2044 : !torch.tensor -> !torch.tensor | |
%2048 = torch.aten.detach %2047 : !torch.tensor -> !torch.tensor | |
%2049 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2050 = torch.prim.ListConstruct %int384, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2051 = torch.aten._reshape_alias %2039, %2049, %2050 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2052 = torch.aten.t %726 : !torch.tensor -> !torch.tensor | |
%2053 = torch.aten.mm %2051, %2052 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2054 = torch.aten.t %2051 : !torch.tensor -> !torch.tensor | |
%2055 = torch.aten.mm %2054, %728 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2056 = torch.aten.t %2055 : !torch.tensor -> !torch.tensor | |
%2057 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%2058 = torch.aten.sum.dim_IntList %2051, %2057, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2059 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%2060 = torch.aten.view %2058, %2059 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%2061 = torch.aten.detach %2060 : !torch.tensor -> !torch.tensor | |
%2062 = torch.aten.detach %2061 : !torch.tensor -> !torch.tensor | |
%2063 = torch.prim.ListConstruct %int1, %int128, %int1536 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2064 = torch.prim.ListConstruct %int196608, %int1536, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2065 = torch.aten._reshape_alias %2053, %2063, %2064 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2066 = torch.aten.t %2056 : !torch.tensor -> !torch.tensor | |
%2067 = torch.aten.detach %2066 : !torch.tensor -> !torch.tensor | |
%2068 = torch.aten.detach %2067 : !torch.tensor -> !torch.tensor | |
%2069 = torch.aten.gelu_backward %2065, %724, %str_2 : !torch.tensor, !torch.tensor, !torch.str -> !torch.tensor | |
%2070 = torch.prim.ListConstruct %int128, %int1536 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2071 = torch.prim.ListConstruct %int1536, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2072 = torch.aten._reshape_alias %2069, %2070, %2071 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2073 = torch.aten.t %719 : !torch.tensor -> !torch.tensor | |
%2074 = torch.aten.mm %2072, %2073 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2075 = torch.aten.t %2072 : !torch.tensor -> !torch.tensor | |
%2076 = torch.aten.mm %2075, %721 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2077 = torch.aten.t %2076 : !torch.tensor -> !torch.tensor | |
%2078 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%2079 = torch.aten.sum.dim_IntList %2072, %2078, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2080 = torch.prim.ListConstruct %int1536 : (!torch.int) -> !torch.list<int> | |
%2081 = torch.aten.view %2079, %2080 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%2082 = torch.aten.detach %2081 : !torch.tensor -> !torch.tensor | |
%2083 = torch.aten.detach %2082 : !torch.tensor -> !torch.tensor | |
%2084 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2085 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2086 = torch.aten._reshape_alias %2074, %2084, %2085 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2087 = torch.aten.add.Tensor %2039, %2086, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%2088 = torch.aten.t %2077 : !torch.tensor -> !torch.tensor | |
%2089 = torch.aten.detach %2088 : !torch.tensor -> !torch.tensor | |
%2090 = torch.aten.detach %2089 : !torch.tensor -> !torch.tensor | |
%2091 = torch.aten.sub.Tensor %717, %result1_46, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%2092 = torch.aten.mul.Tensor %2091, %result2_47 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2093 = torch.aten.mul.Tensor %2087, %arg151 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2094 = torch.aten.mul.Scalar %2093, %int384 : !torch.tensor, !torch.int -> !torch.tensor | |
%2095 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> | |
%2096 = torch.aten.sum.dim_IntList %2093, %2095, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2097 = torch.aten.mul.Tensor %2093, %2092 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2098 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> | |
%2099 = torch.aten.sum.dim_IntList %2097, %2098, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2100 = torch.aten.mul.Tensor %2092, %2099 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2101 = torch.aten.sub.Tensor %2094, %2096, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%2102 = torch.aten.sub.Tensor %2101, %2100, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%2103 = torch.aten.div.Scalar %result2_47, %int384 : !torch.tensor, !torch.int -> !torch.tensor | |
%2104 = torch.aten.mul.Tensor %2103, %2102 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2105 = torch.aten.mul.Tensor %2087, %2092 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2106 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2107 = torch.aten.sum.dim_IntList %2105, %2106, %false, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2108 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2109 = torch.aten.sum.dim_IntList %2087, %2108, %false, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2110 = torch.aten.detach %2107 : !torch.tensor -> !torch.tensor | |
%2111 = torch.aten.detach %2110 : !torch.tensor -> !torch.tensor | |
%2112 = torch.aten.detach %2109 : !torch.tensor -> !torch.tensor | |
%2113 = torch.aten.detach %2112 : !torch.tensor -> !torch.tensor | |
%2114 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2115 = torch.prim.ListConstruct %int384, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2116 = torch.aten._reshape_alias %2104, %2114, %2115 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2117 = torch.aten.t %711 : !torch.tensor -> !torch.tensor | |
%2118 = torch.aten.mm %2116, %2117 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2119 = torch.aten.t %2116 : !torch.tensor -> !torch.tensor | |
%2120 = torch.aten.mm %2119, %713 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2121 = torch.aten.t %2120 : !torch.tensor -> !torch.tensor | |
%2122 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%2123 = torch.aten.sum.dim_IntList %2116, %2122, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2124 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%2125 = torch.aten.view %2123, %2124 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%2126 = torch.aten.detach %2125 : !torch.tensor -> !torch.tensor | |
%2127 = torch.aten.detach %2126 : !torch.tensor -> !torch.tensor | |
%2128 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2129 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2130 = torch.aten._reshape_alias %2118, %2128, %2129 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2131 = torch.aten.t %2121 : !torch.tensor -> !torch.tensor | |
%2132 = torch.aten.detach %2131 : !torch.tensor -> !torch.tensor | |
%2133 = torch.aten.detach %2132 : !torch.tensor -> !torch.tensor | |
%2134 = torch.prim.ListConstruct %int1, %int128, %int12, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2135 = torch.prim.ListConstruct %int49152, %int384, %int32, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2136 = torch.aten._reshape_alias %2130, %2134, %2135 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2137 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2138 = torch.aten.permute %2136, %2137 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%2139 = torch.prim.ListConstruct %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2140 = torch.prim.ListConstruct %int32, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2141 = torch.aten._reshape_alias %2138, %2139, %2140 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2142 = torch.aten.transpose.int %697, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%2143 = torch.aten.bmm %2142, %2141 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2144 = torch.aten.transpose.int %702, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%2145 = torch.aten.bmm %2141, %2144 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2146 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2147 = torch.prim.ListConstruct %int49152, %int4096, %int32, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2148 = torch.aten._reshape_alias %2143, %2146, %2147 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2149 = torch.prim.ListConstruct %int1, %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2150 = torch.prim.ListConstruct %int196608, %int16384, %int128, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2151 = torch.aten._reshape_alias %2145, %2149, %2150 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2152 = torch.aten.detach %692 : !torch.tensor -> !torch.tensor | |
%2153 = torch.aten._softmax_backward_data %2151, %2152, %int-1, %int6 : !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%2154 = torch.aten.div.Scalar %2153, %float5.656850e00 : !torch.tensor, !torch.float -> !torch.tensor | |
%2155 = torch.prim.ListConstruct %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2156 = torch.prim.ListConstruct %int16384, %int128, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2157 = torch.aten._reshape_alias %2154, %2155, %2156 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2158 = torch.aten.transpose.int %679, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%2159 = torch.aten.bmm %2158, %2157 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2160 = torch.aten.transpose.int %684, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%2161 = torch.aten.bmm %2157, %2160 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2162 = torch.prim.ListConstruct %int1, %int12, %int32, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2163 = torch.prim.ListConstruct %int49152, %int4096, %int128, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2164 = torch.aten._reshape_alias %2159, %2162, %2163 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2165 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2166 = torch.prim.ListConstruct %int49152, %int4096, %int32, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2167 = torch.aten._reshape_alias %2161, %2165, %2166 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2168 = torch.aten.transpose.int %2164, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%2169 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2170 = torch.aten.permute %2167, %2169 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%2171 = torch.aten.clone %2170, %int0 : !torch.tensor, !torch.int -> !torch.tensor | |
%2172 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2173 = torch.aten._unsafe_view %2171, %2172 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%2174 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2175 = torch.aten.permute %2148, %2174 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%2176 = torch.aten.clone %2175, %int0 : !torch.tensor, !torch.int -> !torch.tensor | |
%2177 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2178 = torch.aten._unsafe_view %2176, %2177 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%2179 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2180 = torch.prim.ListConstruct %int384, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2181 = torch.aten._reshape_alias %2178, %2179, %2180 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2182 = torch.aten.t %660 : !torch.tensor -> !torch.tensor | |
%2183 = torch.aten.mm %2181, %2182 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2184 = torch.aten.t %2181 : !torch.tensor -> !torch.tensor | |
%2185 = torch.aten.mm %2184, %662 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2186 = torch.aten.t %2185 : !torch.tensor -> !torch.tensor | |
%2187 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%2188 = torch.aten.sum.dim_IntList %2181, %2187, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2189 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%2190 = torch.aten.view %2188, %2189 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%2191 = torch.aten.detach %2190 : !torch.tensor -> !torch.tensor | |
%2192 = torch.aten.detach %2191 : !torch.tensor -> !torch.tensor | |
%2193 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2194 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2195 = torch.aten._reshape_alias %2183, %2193, %2194 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2196 = torch.aten.add.Tensor %2104, %2195, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%2197 = torch.aten.t %2186 : !torch.tensor -> !torch.tensor | |
%2198 = torch.aten.detach %2197 : !torch.tensor -> !torch.tensor | |
%2199 = torch.aten.detach %2198 : !torch.tensor -> !torch.tensor | |
%2200 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2201 = torch.aten.permute %2168, %2200 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%2202 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2203 = torch.prim.ListConstruct %int128, %int1, %int128 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2204 = torch.aten._reshape_alias %2201, %2202, %2203 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2205 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2206 = torch.prim.ListConstruct %int1, %int128 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2207 = torch.aten._reshape_alias %2204, %2205, %2206 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2208 = torch.aten.t %650 : !torch.tensor -> !torch.tensor | |
%2209 = torch.aten.mm %2207, %2208 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2210 = torch.aten.t %2207 : !torch.tensor -> !torch.tensor | |
%2211 = torch.aten.mm %2210, %652 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2212 = torch.aten.t %2211 : !torch.tensor -> !torch.tensor | |
%2213 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%2214 = torch.aten.sum.dim_IntList %2207, %2213, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2215 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%2216 = torch.aten.view %2214, %2215 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%2217 = torch.aten.detach %2216 : !torch.tensor -> !torch.tensor | |
%2218 = torch.aten.detach %2217 : !torch.tensor -> !torch.tensor | |
%2219 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2220 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2221 = torch.aten._reshape_alias %2209, %2219, %2220 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2222 = torch.aten.add.Tensor %2196, %2221, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%2223 = torch.aten.t %2212 : !torch.tensor -> !torch.tensor | |
%2224 = torch.aten.detach %2223 : !torch.tensor -> !torch.tensor | |
%2225 = torch.aten.detach %2224 : !torch.tensor -> !torch.tensor | |
%2226 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2227 = torch.prim.ListConstruct %int384, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2228 = torch.aten._reshape_alias %2173, %2226, %2227 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2229 = torch.aten.t %644 : !torch.tensor -> !torch.tensor | |
%2230 = torch.aten.mm %2228, %2229 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2231 = torch.aten.t %2228 : !torch.tensor -> !torch.tensor | |
%2232 = torch.aten.mm %2231, %646 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2233 = torch.aten.t %2232 : !torch.tensor -> !torch.tensor | |
%2234 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%2235 = torch.aten.sum.dim_IntList %2228, %2234, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2236 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%2237 = torch.aten.view %2235, %2236 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%2238 = torch.aten.detach %2237 : !torch.tensor -> !torch.tensor | |
%2239 = torch.aten.detach %2238 : !torch.tensor -> !torch.tensor | |
%2240 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2241 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2242 = torch.aten._reshape_alias %2230, %2240, %2241 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2243 = torch.aten.add.Tensor %2222, %2242, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%2244 = torch.aten.t %2233 : !torch.tensor -> !torch.tensor | |
%2245 = torch.aten.detach %2244 : !torch.tensor -> !torch.tensor | |
%2246 = torch.aten.detach %2245 : !torch.tensor -> !torch.tensor | |
%2247 = torch.aten.sub.Tensor %642, %result1_43, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%2248 = torch.aten.mul.Tensor %2247, %result2_44 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2249 = torch.aten.mul.Tensor %2243, %arg147 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2250 = torch.aten.mul.Scalar %2249, %int384 : !torch.tensor, !torch.int -> !torch.tensor | |
%2251 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> | |
%2252 = torch.aten.sum.dim_IntList %2249, %2251, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2253 = torch.aten.mul.Tensor %2249, %2248 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2254 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> | |
%2255 = torch.aten.sum.dim_IntList %2253, %2254, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2256 = torch.aten.mul.Tensor %2248, %2255 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2257 = torch.aten.sub.Tensor %2250, %2252, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%2258 = torch.aten.sub.Tensor %2257, %2256, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%2259 = torch.aten.div.Scalar %result2_44, %int384 : !torch.tensor, !torch.int -> !torch.tensor | |
%2260 = torch.aten.mul.Tensor %2259, %2258 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2261 = torch.aten.mul.Tensor %2243, %2248 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2262 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2263 = torch.aten.sum.dim_IntList %2261, %2262, %false, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2264 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2265 = torch.aten.sum.dim_IntList %2243, %2264, %false, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2266 = torch.aten.detach %2263 : !torch.tensor -> !torch.tensor | |
%2267 = torch.aten.detach %2266 : !torch.tensor -> !torch.tensor | |
%2268 = torch.aten.detach %2265 : !torch.tensor -> !torch.tensor | |
%2269 = torch.aten.detach %2268 : !torch.tensor -> !torch.tensor | |
%2270 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2271 = torch.prim.ListConstruct %int384, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2272 = torch.aten._reshape_alias %2260, %2270, %2271 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2273 = torch.aten.t %636 : !torch.tensor -> !torch.tensor | |
%2274 = torch.aten.mm %2272, %2273 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2275 = torch.aten.t %2272 : !torch.tensor -> !torch.tensor | |
%2276 = torch.aten.mm %2275, %638 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2277 = torch.aten.t %2276 : !torch.tensor -> !torch.tensor | |
%2278 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%2279 = torch.aten.sum.dim_IntList %2272, %2278, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2280 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%2281 = torch.aten.view %2279, %2280 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%2282 = torch.aten.detach %2281 : !torch.tensor -> !torch.tensor | |
%2283 = torch.aten.detach %2282 : !torch.tensor -> !torch.tensor | |
%2284 = torch.prim.ListConstruct %int1, %int128, %int1536 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2285 = torch.prim.ListConstruct %int196608, %int1536, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2286 = torch.aten._reshape_alias %2274, %2284, %2285 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2287 = torch.aten.t %2277 : !torch.tensor -> !torch.tensor | |
%2288 = torch.aten.detach %2287 : !torch.tensor -> !torch.tensor | |
%2289 = torch.aten.detach %2288 : !torch.tensor -> !torch.tensor | |
%2290 = torch.aten.gelu_backward %2286, %634, %str_2 : !torch.tensor, !torch.tensor, !torch.str -> !torch.tensor | |
%2291 = torch.prim.ListConstruct %int128, %int1536 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2292 = torch.prim.ListConstruct %int1536, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2293 = torch.aten._reshape_alias %2290, %2291, %2292 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2294 = torch.aten.t %629 : !torch.tensor -> !torch.tensor | |
%2295 = torch.aten.mm %2293, %2294 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2296 = torch.aten.t %2293 : !torch.tensor -> !torch.tensor | |
%2297 = torch.aten.mm %2296, %631 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2298 = torch.aten.t %2297 : !torch.tensor -> !torch.tensor | |
%2299 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%2300 = torch.aten.sum.dim_IntList %2293, %2299, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2301 = torch.prim.ListConstruct %int1536 : (!torch.int) -> !torch.list<int> | |
%2302 = torch.aten.view %2300, %2301 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%2303 = torch.aten.detach %2302 : !torch.tensor -> !torch.tensor | |
%2304 = torch.aten.detach %2303 : !torch.tensor -> !torch.tensor | |
%2305 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2306 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2307 = torch.aten._reshape_alias %2295, %2305, %2306 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2308 = torch.aten.add.Tensor %2260, %2307, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%2309 = torch.aten.t %2298 : !torch.tensor -> !torch.tensor | |
%2310 = torch.aten.detach %2309 : !torch.tensor -> !torch.tensor | |
%2311 = torch.aten.detach %2310 : !torch.tensor -> !torch.tensor | |
%2312 = torch.aten.sub.Tensor %627, %result1_40, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%2313 = torch.aten.mul.Tensor %2312, %result2_41 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2314 = torch.aten.mul.Tensor %2308, %arg135 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2315 = torch.aten.mul.Scalar %2314, %int384 : !torch.tensor, !torch.int -> !torch.tensor | |
%2316 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> | |
%2317 = torch.aten.sum.dim_IntList %2314, %2316, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2318 = torch.aten.mul.Tensor %2314, %2313 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2319 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> | |
%2320 = torch.aten.sum.dim_IntList %2318, %2319, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2321 = torch.aten.mul.Tensor %2313, %2320 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2322 = torch.aten.sub.Tensor %2315, %2317, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%2323 = torch.aten.sub.Tensor %2322, %2321, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%2324 = torch.aten.div.Scalar %result2_41, %int384 : !torch.tensor, !torch.int -> !torch.tensor | |
%2325 = torch.aten.mul.Tensor %2324, %2323 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2326 = torch.aten.mul.Tensor %2308, %2313 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2327 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2328 = torch.aten.sum.dim_IntList %2326, %2327, %false, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2329 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2330 = torch.aten.sum.dim_IntList %2308, %2329, %false, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2331 = torch.aten.detach %2328 : !torch.tensor -> !torch.tensor | |
%2332 = torch.aten.detach %2331 : !torch.tensor -> !torch.tensor | |
%2333 = torch.aten.detach %2330 : !torch.tensor -> !torch.tensor | |
%2334 = torch.aten.detach %2333 : !torch.tensor -> !torch.tensor | |
%2335 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2336 = torch.prim.ListConstruct %int384, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2337 = torch.aten._reshape_alias %2325, %2335, %2336 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2338 = torch.aten.t %621 : !torch.tensor -> !torch.tensor | |
%2339 = torch.aten.mm %2337, %2338 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2340 = torch.aten.t %2337 : !torch.tensor -> !torch.tensor | |
%2341 = torch.aten.mm %2340, %623 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2342 = torch.aten.t %2341 : !torch.tensor -> !torch.tensor | |
%2343 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%2344 = torch.aten.sum.dim_IntList %2337, %2343, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2345 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%2346 = torch.aten.view %2344, %2345 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%2347 = torch.aten.detach %2346 : !torch.tensor -> !torch.tensor | |
%2348 = torch.aten.detach %2347 : !torch.tensor -> !torch.tensor | |
%2349 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2350 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2351 = torch.aten._reshape_alias %2339, %2349, %2350 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2352 = torch.aten.t %2342 : !torch.tensor -> !torch.tensor | |
%2353 = torch.aten.detach %2352 : !torch.tensor -> !torch.tensor | |
%2354 = torch.aten.detach %2353 : !torch.tensor -> !torch.tensor | |
%2355 = torch.prim.ListConstruct %int1, %int128, %int12, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2356 = torch.prim.ListConstruct %int49152, %int384, %int32, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2357 = torch.aten._reshape_alias %2351, %2355, %2356 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2358 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2359 = torch.aten.permute %2357, %2358 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%2360 = torch.prim.ListConstruct %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2361 = torch.prim.ListConstruct %int32, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2362 = torch.aten._reshape_alias %2359, %2360, %2361 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2363 = torch.aten.transpose.int %607, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%2364 = torch.aten.bmm %2363, %2362 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2365 = torch.aten.transpose.int %612, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%2366 = torch.aten.bmm %2362, %2365 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2367 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2368 = torch.prim.ListConstruct %int49152, %int4096, %int32, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2369 = torch.aten._reshape_alias %2364, %2367, %2368 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2370 = torch.prim.ListConstruct %int1, %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2371 = torch.prim.ListConstruct %int196608, %int16384, %int128, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2372 = torch.aten._reshape_alias %2366, %2370, %2371 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2373 = torch.aten.detach %602 : !torch.tensor -> !torch.tensor | |
%2374 = torch.aten._softmax_backward_data %2372, %2373, %int-1, %int6 : !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%2375 = torch.aten.div.Scalar %2374, %float5.656850e00 : !torch.tensor, !torch.float -> !torch.tensor | |
%2376 = torch.prim.ListConstruct %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2377 = torch.prim.ListConstruct %int16384, %int128, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2378 = torch.aten._reshape_alias %2375, %2376, %2377 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2379 = torch.aten.transpose.int %589, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%2380 = torch.aten.bmm %2379, %2378 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2381 = torch.aten.transpose.int %594, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%2382 = torch.aten.bmm %2378, %2381 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2383 = torch.prim.ListConstruct %int1, %int12, %int32, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2384 = torch.prim.ListConstruct %int49152, %int4096, %int128, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2385 = torch.aten._reshape_alias %2380, %2383, %2384 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2386 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2387 = torch.prim.ListConstruct %int49152, %int4096, %int32, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2388 = torch.aten._reshape_alias %2382, %2386, %2387 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2389 = torch.aten.transpose.int %2385, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%2390 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2391 = torch.aten.permute %2388, %2390 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%2392 = torch.aten.clone %2391, %int0 : !torch.tensor, !torch.int -> !torch.tensor | |
%2393 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2394 = torch.aten._unsafe_view %2392, %2393 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%2395 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2396 = torch.aten.permute %2369, %2395 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%2397 = torch.aten.clone %2396, %int0 : !torch.tensor, !torch.int -> !torch.tensor | |
%2398 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2399 = torch.aten._unsafe_view %2397, %2398 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%2400 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2401 = torch.prim.ListConstruct %int384, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2402 = torch.aten._reshape_alias %2399, %2400, %2401 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2403 = torch.aten.t %570 : !torch.tensor -> !torch.tensor | |
%2404 = torch.aten.mm %2402, %2403 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2405 = torch.aten.t %2402 : !torch.tensor -> !torch.tensor | |
%2406 = torch.aten.mm %2405, %572 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2407 = torch.aten.t %2406 : !torch.tensor -> !torch.tensor | |
%2408 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%2409 = torch.aten.sum.dim_IntList %2402, %2408, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2410 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%2411 = torch.aten.view %2409, %2410 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%2412 = torch.aten.detach %2411 : !torch.tensor -> !torch.tensor | |
%2413 = torch.aten.detach %2412 : !torch.tensor -> !torch.tensor | |
%2414 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2415 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2416 = torch.aten._reshape_alias %2404, %2414, %2415 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2417 = torch.aten.add.Tensor %2325, %2416, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%2418 = torch.aten.t %2407 : !torch.tensor -> !torch.tensor | |
%2419 = torch.aten.detach %2418 : !torch.tensor -> !torch.tensor | |
%2420 = torch.aten.detach %2419 : !torch.tensor -> !torch.tensor | |
%2421 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2422 = torch.aten.permute %2389, %2421 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%2423 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2424 = torch.prim.ListConstruct %int128, %int1, %int128 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2425 = torch.aten._reshape_alias %2422, %2423, %2424 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2426 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2427 = torch.prim.ListConstruct %int1, %int128 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2428 = torch.aten._reshape_alias %2425, %2426, %2427 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2429 = torch.aten.t %560 : !torch.tensor -> !torch.tensor | |
%2430 = torch.aten.mm %2428, %2429 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2431 = torch.aten.t %2428 : !torch.tensor -> !torch.tensor | |
%2432 = torch.aten.mm %2431, %562 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2433 = torch.aten.t %2432 : !torch.tensor -> !torch.tensor | |
%2434 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%2435 = torch.aten.sum.dim_IntList %2428, %2434, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2436 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%2437 = torch.aten.view %2435, %2436 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%2438 = torch.aten.detach %2437 : !torch.tensor -> !torch.tensor | |
%2439 = torch.aten.detach %2438 : !torch.tensor -> !torch.tensor | |
%2440 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2441 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2442 = torch.aten._reshape_alias %2430, %2440, %2441 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2443 = torch.aten.add.Tensor %2417, %2442, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%2444 = torch.aten.t %2433 : !torch.tensor -> !torch.tensor | |
%2445 = torch.aten.detach %2444 : !torch.tensor -> !torch.tensor | |
%2446 = torch.aten.detach %2445 : !torch.tensor -> !torch.tensor | |
%2447 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2448 = torch.prim.ListConstruct %int384, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2449 = torch.aten._reshape_alias %2394, %2447, %2448 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2450 = torch.aten.t %554 : !torch.tensor -> !torch.tensor | |
%2451 = torch.aten.mm %2449, %2450 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2452 = torch.aten.t %2449 : !torch.tensor -> !torch.tensor | |
%2453 = torch.aten.mm %2452, %556 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2454 = torch.aten.t %2453 : !torch.tensor -> !torch.tensor | |
%2455 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%2456 = torch.aten.sum.dim_IntList %2449, %2455, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2457 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%2458 = torch.aten.view %2456, %2457 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%2459 = torch.aten.detach %2458 : !torch.tensor -> !torch.tensor | |
%2460 = torch.aten.detach %2459 : !torch.tensor -> !torch.tensor | |
%2461 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2462 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2463 = torch.aten._reshape_alias %2451, %2461, %2462 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2464 = torch.aten.add.Tensor %2443, %2463, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%2465 = torch.aten.t %2454 : !torch.tensor -> !torch.tensor | |
%2466 = torch.aten.detach %2465 : !torch.tensor -> !torch.tensor | |
%2467 = torch.aten.detach %2466 : !torch.tensor -> !torch.tensor | |
%2468 = torch.aten.sub.Tensor %552, %result1_37, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%2469 = torch.aten.mul.Tensor %2468, %result2_38 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2470 = torch.aten.mul.Tensor %2464, %arg131 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2471 = torch.aten.mul.Scalar %2470, %int384 : !torch.tensor, !torch.int -> !torch.tensor | |
%2472 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> | |
%2473 = torch.aten.sum.dim_IntList %2470, %2472, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2474 = torch.aten.mul.Tensor %2470, %2469 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2475 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> | |
%2476 = torch.aten.sum.dim_IntList %2474, %2475, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2477 = torch.aten.mul.Tensor %2469, %2476 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2478 = torch.aten.sub.Tensor %2471, %2473, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%2479 = torch.aten.sub.Tensor %2478, %2477, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%2480 = torch.aten.div.Scalar %result2_38, %int384 : !torch.tensor, !torch.int -> !torch.tensor | |
%2481 = torch.aten.mul.Tensor %2480, %2479 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2482 = torch.aten.mul.Tensor %2464, %2469 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2483 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2484 = torch.aten.sum.dim_IntList %2482, %2483, %false, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2485 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2486 = torch.aten.sum.dim_IntList %2464, %2485, %false, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2487 = torch.aten.detach %2484 : !torch.tensor -> !torch.tensor | |
%2488 = torch.aten.detach %2487 : !torch.tensor -> !torch.tensor | |
%2489 = torch.aten.detach %2486 : !torch.tensor -> !torch.tensor | |
%2490 = torch.aten.detach %2489 : !torch.tensor -> !torch.tensor | |
%2491 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2492 = torch.prim.ListConstruct %int384, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2493 = torch.aten._reshape_alias %2481, %2491, %2492 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2494 = torch.aten.t %546 : !torch.tensor -> !torch.tensor | |
%2495 = torch.aten.mm %2493, %2494 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2496 = torch.aten.t %2493 : !torch.tensor -> !torch.tensor | |
%2497 = torch.aten.mm %2496, %548 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2498 = torch.aten.t %2497 : !torch.tensor -> !torch.tensor | |
%2499 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%2500 = torch.aten.sum.dim_IntList %2493, %2499, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2501 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%2502 = torch.aten.view %2500, %2501 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%2503 = torch.aten.detach %2502 : !torch.tensor -> !torch.tensor | |
%2504 = torch.aten.detach %2503 : !torch.tensor -> !torch.tensor | |
%2505 = torch.prim.ListConstruct %int1, %int128, %int1536 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2506 = torch.prim.ListConstruct %int196608, %int1536, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2507 = torch.aten._reshape_alias %2495, %2505, %2506 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2508 = torch.aten.t %2498 : !torch.tensor -> !torch.tensor | |
%2509 = torch.aten.detach %2508 : !torch.tensor -> !torch.tensor | |
%2510 = torch.aten.detach %2509 : !torch.tensor -> !torch.tensor | |
%2511 = torch.aten.gelu_backward %2507, %544, %str_2 : !torch.tensor, !torch.tensor, !torch.str -> !torch.tensor | |
%2512 = torch.prim.ListConstruct %int128, %int1536 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2513 = torch.prim.ListConstruct %int1536, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2514 = torch.aten._reshape_alias %2511, %2512, %2513 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2515 = torch.aten.t %539 : !torch.tensor -> !torch.tensor | |
%2516 = torch.aten.mm %2514, %2515 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2517 = torch.aten.t %2514 : !torch.tensor -> !torch.tensor | |
%2518 = torch.aten.mm %2517, %541 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2519 = torch.aten.t %2518 : !torch.tensor -> !torch.tensor | |
%2520 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%2521 = torch.aten.sum.dim_IntList %2514, %2520, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2522 = torch.prim.ListConstruct %int1536 : (!torch.int) -> !torch.list<int> | |
%2523 = torch.aten.view %2521, %2522 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%2524 = torch.aten.detach %2523 : !torch.tensor -> !torch.tensor | |
%2525 = torch.aten.detach %2524 : !torch.tensor -> !torch.tensor | |
%2526 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2527 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2528 = torch.aten._reshape_alias %2516, %2526, %2527 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2529 = torch.aten.add.Tensor %2481, %2528, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%2530 = torch.aten.t %2519 : !torch.tensor -> !torch.tensor | |
%2531 = torch.aten.detach %2530 : !torch.tensor -> !torch.tensor | |
%2532 = torch.aten.detach %2531 : !torch.tensor -> !torch.tensor | |
%2533 = torch.aten.sub.Tensor %537, %result1_34, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%2534 = torch.aten.mul.Tensor %2533, %result2_35 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2535 = torch.aten.mul.Tensor %2529, %arg119 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2536 = torch.aten.mul.Scalar %2535, %int384 : !torch.tensor, !torch.int -> !torch.tensor | |
%2537 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> | |
%2538 = torch.aten.sum.dim_IntList %2535, %2537, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2539 = torch.aten.mul.Tensor %2535, %2534 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2540 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> | |
%2541 = torch.aten.sum.dim_IntList %2539, %2540, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2542 = torch.aten.mul.Tensor %2534, %2541 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2543 = torch.aten.sub.Tensor %2536, %2538, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%2544 = torch.aten.sub.Tensor %2543, %2542, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%2545 = torch.aten.div.Scalar %result2_35, %int384 : !torch.tensor, !torch.int -> !torch.tensor | |
%2546 = torch.aten.mul.Tensor %2545, %2544 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2547 = torch.aten.mul.Tensor %2529, %2534 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2548 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2549 = torch.aten.sum.dim_IntList %2547, %2548, %false, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2550 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2551 = torch.aten.sum.dim_IntList %2529, %2550, %false, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2552 = torch.aten.detach %2549 : !torch.tensor -> !torch.tensor | |
%2553 = torch.aten.detach %2552 : !torch.tensor -> !torch.tensor | |
%2554 = torch.aten.detach %2551 : !torch.tensor -> !torch.tensor | |
%2555 = torch.aten.detach %2554 : !torch.tensor -> !torch.tensor | |
%2556 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2557 = torch.prim.ListConstruct %int384, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2558 = torch.aten._reshape_alias %2546, %2556, %2557 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2559 = torch.aten.t %531 : !torch.tensor -> !torch.tensor | |
%2560 = torch.aten.mm %2558, %2559 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2561 = torch.aten.t %2558 : !torch.tensor -> !torch.tensor | |
%2562 = torch.aten.mm %2561, %533 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2563 = torch.aten.t %2562 : !torch.tensor -> !torch.tensor | |
%2564 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%2565 = torch.aten.sum.dim_IntList %2558, %2564, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2566 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%2567 = torch.aten.view %2565, %2566 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%2568 = torch.aten.detach %2567 : !torch.tensor -> !torch.tensor | |
%2569 = torch.aten.detach %2568 : !torch.tensor -> !torch.tensor | |
%2570 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2571 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2572 = torch.aten._reshape_alias %2560, %2570, %2571 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2573 = torch.aten.t %2563 : !torch.tensor -> !torch.tensor | |
%2574 = torch.aten.detach %2573 : !torch.tensor -> !torch.tensor | |
%2575 = torch.aten.detach %2574 : !torch.tensor -> !torch.tensor | |
%2576 = torch.prim.ListConstruct %int1, %int128, %int12, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2577 = torch.prim.ListConstruct %int49152, %int384, %int32, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2578 = torch.aten._reshape_alias %2572, %2576, %2577 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2579 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2580 = torch.aten.permute %2578, %2579 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%2581 = torch.prim.ListConstruct %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2582 = torch.prim.ListConstruct %int32, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2583 = torch.aten._reshape_alias %2580, %2581, %2582 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2584 = torch.aten.transpose.int %517, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%2585 = torch.aten.bmm %2584, %2583 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2586 = torch.aten.transpose.int %522, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%2587 = torch.aten.bmm %2583, %2586 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2588 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2589 = torch.prim.ListConstruct %int49152, %int4096, %int32, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2590 = torch.aten._reshape_alias %2585, %2588, %2589 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2591 = torch.prim.ListConstruct %int1, %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2592 = torch.prim.ListConstruct %int196608, %int16384, %int128, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2593 = torch.aten._reshape_alias %2587, %2591, %2592 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2594 = torch.aten.detach %512 : !torch.tensor -> !torch.tensor | |
%2595 = torch.aten._softmax_backward_data %2593, %2594, %int-1, %int6 : !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%2596 = torch.aten.div.Scalar %2595, %float5.656850e00 : !torch.tensor, !torch.float -> !torch.tensor | |
%2597 = torch.prim.ListConstruct %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2598 = torch.prim.ListConstruct %int16384, %int128, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2599 = torch.aten._reshape_alias %2596, %2597, %2598 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2600 = torch.aten.transpose.int %499, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%2601 = torch.aten.bmm %2600, %2599 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2602 = torch.aten.transpose.int %504, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%2603 = torch.aten.bmm %2599, %2602 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2604 = torch.prim.ListConstruct %int1, %int12, %int32, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2605 = torch.prim.ListConstruct %int49152, %int4096, %int128, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2606 = torch.aten._reshape_alias %2601, %2604, %2605 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2607 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2608 = torch.prim.ListConstruct %int49152, %int4096, %int32, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2609 = torch.aten._reshape_alias %2603, %2607, %2608 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2610 = torch.aten.transpose.int %2606, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%2611 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2612 = torch.aten.permute %2609, %2611 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%2613 = torch.aten.clone %2612, %int0 : !torch.tensor, !torch.int -> !torch.tensor | |
%2614 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2615 = torch.aten._unsafe_view %2613, %2614 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%2616 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2617 = torch.aten.permute %2590, %2616 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%2618 = torch.aten.clone %2617, %int0 : !torch.tensor, !torch.int -> !torch.tensor | |
%2619 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2620 = torch.aten._unsafe_view %2618, %2619 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%2621 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2622 = torch.prim.ListConstruct %int384, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2623 = torch.aten._reshape_alias %2620, %2621, %2622 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2624 = torch.aten.t %480 : !torch.tensor -> !torch.tensor | |
%2625 = torch.aten.mm %2623, %2624 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2626 = torch.aten.t %2623 : !torch.tensor -> !torch.tensor | |
%2627 = torch.aten.mm %2626, %482 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2628 = torch.aten.t %2627 : !torch.tensor -> !torch.tensor | |
%2629 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%2630 = torch.aten.sum.dim_IntList %2623, %2629, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2631 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%2632 = torch.aten.view %2630, %2631 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%2633 = torch.aten.detach %2632 : !torch.tensor -> !torch.tensor | |
%2634 = torch.aten.detach %2633 : !torch.tensor -> !torch.tensor | |
%2635 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2636 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2637 = torch.aten._reshape_alias %2625, %2635, %2636 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2638 = torch.aten.add.Tensor %2546, %2637, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%2639 = torch.aten.t %2628 : !torch.tensor -> !torch.tensor | |
%2640 = torch.aten.detach %2639 : !torch.tensor -> !torch.tensor | |
%2641 = torch.aten.detach %2640 : !torch.tensor -> !torch.tensor | |
%2642 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2643 = torch.aten.permute %2610, %2642 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%2644 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2645 = torch.prim.ListConstruct %int128, %int1, %int128 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2646 = torch.aten._reshape_alias %2643, %2644, %2645 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2647 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2648 = torch.prim.ListConstruct %int1, %int128 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2649 = torch.aten._reshape_alias %2646, %2647, %2648 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2650 = torch.aten.t %470 : !torch.tensor -> !torch.tensor | |
%2651 = torch.aten.mm %2649, %2650 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2652 = torch.aten.t %2649 : !torch.tensor -> !torch.tensor | |
%2653 = torch.aten.mm %2652, %472 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2654 = torch.aten.t %2653 : !torch.tensor -> !torch.tensor | |
%2655 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%2656 = torch.aten.sum.dim_IntList %2649, %2655, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2657 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%2658 = torch.aten.view %2656, %2657 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%2659 = torch.aten.detach %2658 : !torch.tensor -> !torch.tensor | |
%2660 = torch.aten.detach %2659 : !torch.tensor -> !torch.tensor | |
%2661 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2662 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2663 = torch.aten._reshape_alias %2651, %2661, %2662 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2664 = torch.aten.add.Tensor %2638, %2663, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%2665 = torch.aten.t %2654 : !torch.tensor -> !torch.tensor | |
%2666 = torch.aten.detach %2665 : !torch.tensor -> !torch.tensor | |
%2667 = torch.aten.detach %2666 : !torch.tensor -> !torch.tensor | |
%2668 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2669 = torch.prim.ListConstruct %int384, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2670 = torch.aten._reshape_alias %2615, %2668, %2669 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2671 = torch.aten.t %464 : !torch.tensor -> !torch.tensor | |
%2672 = torch.aten.mm %2670, %2671 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2673 = torch.aten.t %2670 : !torch.tensor -> !torch.tensor | |
%2674 = torch.aten.mm %2673, %466 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2675 = torch.aten.t %2674 : !torch.tensor -> !torch.tensor | |
%2676 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%2677 = torch.aten.sum.dim_IntList %2670, %2676, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2678 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%2679 = torch.aten.view %2677, %2678 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%2680 = torch.aten.detach %2679 : !torch.tensor -> !torch.tensor | |
%2681 = torch.aten.detach %2680 : !torch.tensor -> !torch.tensor | |
%2682 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2683 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2684 = torch.aten._reshape_alias %2672, %2682, %2683 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2685 = torch.aten.add.Tensor %2664, %2684, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%2686 = torch.aten.t %2675 : !torch.tensor -> !torch.tensor | |
%2687 = torch.aten.detach %2686 : !torch.tensor -> !torch.tensor | |
%2688 = torch.aten.detach %2687 : !torch.tensor -> !torch.tensor | |
%2689 = torch.aten.sub.Tensor %462, %result1_31, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%2690 = torch.aten.mul.Tensor %2689, %result2_32 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2691 = torch.aten.mul.Tensor %2685, %arg115 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2692 = torch.aten.mul.Scalar %2691, %int384 : !torch.tensor, !torch.int -> !torch.tensor | |
%2693 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> | |
%2694 = torch.aten.sum.dim_IntList %2691, %2693, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2695 = torch.aten.mul.Tensor %2691, %2690 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2696 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> | |
%2697 = torch.aten.sum.dim_IntList %2695, %2696, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2698 = torch.aten.mul.Tensor %2690, %2697 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2699 = torch.aten.sub.Tensor %2692, %2694, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%2700 = torch.aten.sub.Tensor %2699, %2698, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%2701 = torch.aten.div.Scalar %result2_32, %int384 : !torch.tensor, !torch.int -> !torch.tensor | |
%2702 = torch.aten.mul.Tensor %2701, %2700 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2703 = torch.aten.mul.Tensor %2685, %2690 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2704 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2705 = torch.aten.sum.dim_IntList %2703, %2704, %false, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2706 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2707 = torch.aten.sum.dim_IntList %2685, %2706, %false, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2708 = torch.aten.detach %2705 : !torch.tensor -> !torch.tensor | |
%2709 = torch.aten.detach %2708 : !torch.tensor -> !torch.tensor | |
%2710 = torch.aten.detach %2707 : !torch.tensor -> !torch.tensor | |
%2711 = torch.aten.detach %2710 : !torch.tensor -> !torch.tensor | |
%2712 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2713 = torch.prim.ListConstruct %int384, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2714 = torch.aten._reshape_alias %2702, %2712, %2713 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2715 = torch.aten.t %456 : !torch.tensor -> !torch.tensor | |
%2716 = torch.aten.mm %2714, %2715 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2717 = torch.aten.t %2714 : !torch.tensor -> !torch.tensor | |
%2718 = torch.aten.mm %2717, %458 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2719 = torch.aten.t %2718 : !torch.tensor -> !torch.tensor | |
%2720 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%2721 = torch.aten.sum.dim_IntList %2714, %2720, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2722 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%2723 = torch.aten.view %2721, %2722 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%2724 = torch.aten.detach %2723 : !torch.tensor -> !torch.tensor | |
%2725 = torch.aten.detach %2724 : !torch.tensor -> !torch.tensor | |
%2726 = torch.prim.ListConstruct %int1, %int128, %int1536 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2727 = torch.prim.ListConstruct %int196608, %int1536, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2728 = torch.aten._reshape_alias %2716, %2726, %2727 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2729 = torch.aten.t %2719 : !torch.tensor -> !torch.tensor | |
%2730 = torch.aten.detach %2729 : !torch.tensor -> !torch.tensor | |
%2731 = torch.aten.detach %2730 : !torch.tensor -> !torch.tensor | |
%2732 = torch.aten.gelu_backward %2728, %454, %str_2 : !torch.tensor, !torch.tensor, !torch.str -> !torch.tensor | |
%2733 = torch.prim.ListConstruct %int128, %int1536 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2734 = torch.prim.ListConstruct %int1536, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2735 = torch.aten._reshape_alias %2732, %2733, %2734 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2736 = torch.aten.t %449 : !torch.tensor -> !torch.tensor | |
%2737 = torch.aten.mm %2735, %2736 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2738 = torch.aten.t %2735 : !torch.tensor -> !torch.tensor | |
%2739 = torch.aten.mm %2738, %451 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2740 = torch.aten.t %2739 : !torch.tensor -> !torch.tensor | |
%2741 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%2742 = torch.aten.sum.dim_IntList %2735, %2741, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2743 = torch.prim.ListConstruct %int1536 : (!torch.int) -> !torch.list<int> | |
%2744 = torch.aten.view %2742, %2743 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%2745 = torch.aten.detach %2744 : !torch.tensor -> !torch.tensor | |
%2746 = torch.aten.detach %2745 : !torch.tensor -> !torch.tensor | |
%2747 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2748 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2749 = torch.aten._reshape_alias %2737, %2747, %2748 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2750 = torch.aten.add.Tensor %2702, %2749, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%2751 = torch.aten.t %2740 : !torch.tensor -> !torch.tensor | |
%2752 = torch.aten.detach %2751 : !torch.tensor -> !torch.tensor | |
%2753 = torch.aten.detach %2752 : !torch.tensor -> !torch.tensor | |
%2754 = torch.aten.sub.Tensor %447, %result1_28, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%2755 = torch.aten.mul.Tensor %2754, %result2_29 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2756 = torch.aten.mul.Tensor %2750, %arg103 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2757 = torch.aten.mul.Scalar %2756, %int384 : !torch.tensor, !torch.int -> !torch.tensor | |
%2758 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> | |
%2759 = torch.aten.sum.dim_IntList %2756, %2758, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2760 = torch.aten.mul.Tensor %2756, %2755 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2761 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> | |
%2762 = torch.aten.sum.dim_IntList %2760, %2761, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2763 = torch.aten.mul.Tensor %2755, %2762 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2764 = torch.aten.sub.Tensor %2757, %2759, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%2765 = torch.aten.sub.Tensor %2764, %2763, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%2766 = torch.aten.div.Scalar %result2_29, %int384 : !torch.tensor, !torch.int -> !torch.tensor | |
%2767 = torch.aten.mul.Tensor %2766, %2765 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2768 = torch.aten.mul.Tensor %2750, %2755 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2769 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2770 = torch.aten.sum.dim_IntList %2768, %2769, %false, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2771 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2772 = torch.aten.sum.dim_IntList %2750, %2771, %false, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2773 = torch.aten.detach %2770 : !torch.tensor -> !torch.tensor | |
%2774 = torch.aten.detach %2773 : !torch.tensor -> !torch.tensor | |
%2775 = torch.aten.detach %2772 : !torch.tensor -> !torch.tensor | |
%2776 = torch.aten.detach %2775 : !torch.tensor -> !torch.tensor | |
%2777 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2778 = torch.prim.ListConstruct %int384, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2779 = torch.aten._reshape_alias %2767, %2777, %2778 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2780 = torch.aten.t %441 : !torch.tensor -> !torch.tensor | |
%2781 = torch.aten.mm %2779, %2780 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2782 = torch.aten.t %2779 : !torch.tensor -> !torch.tensor | |
%2783 = torch.aten.mm %2782, %443 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2784 = torch.aten.t %2783 : !torch.tensor -> !torch.tensor | |
%2785 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%2786 = torch.aten.sum.dim_IntList %2779, %2785, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2787 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%2788 = torch.aten.view %2786, %2787 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%2789 = torch.aten.detach %2788 : !torch.tensor -> !torch.tensor | |
%2790 = torch.aten.detach %2789 : !torch.tensor -> !torch.tensor | |
%2791 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2792 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2793 = torch.aten._reshape_alias %2781, %2791, %2792 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2794 = torch.aten.t %2784 : !torch.tensor -> !torch.tensor | |
%2795 = torch.aten.detach %2794 : !torch.tensor -> !torch.tensor | |
%2796 = torch.aten.detach %2795 : !torch.tensor -> !torch.tensor | |
%2797 = torch.prim.ListConstruct %int1, %int128, %int12, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2798 = torch.prim.ListConstruct %int49152, %int384, %int32, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2799 = torch.aten._reshape_alias %2793, %2797, %2798 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2800 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2801 = torch.aten.permute %2799, %2800 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%2802 = torch.prim.ListConstruct %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2803 = torch.prim.ListConstruct %int32, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2804 = torch.aten._reshape_alias %2801, %2802, %2803 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2805 = torch.aten.transpose.int %427, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%2806 = torch.aten.bmm %2805, %2804 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2807 = torch.aten.transpose.int %432, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%2808 = torch.aten.bmm %2804, %2807 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2809 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2810 = torch.prim.ListConstruct %int49152, %int4096, %int32, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2811 = torch.aten._reshape_alias %2806, %2809, %2810 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2812 = torch.prim.ListConstruct %int1, %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2813 = torch.prim.ListConstruct %int196608, %int16384, %int128, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2814 = torch.aten._reshape_alias %2808, %2812, %2813 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2815 = torch.aten.detach %422 : !torch.tensor -> !torch.tensor | |
%2816 = torch.aten._softmax_backward_data %2814, %2815, %int-1, %int6 : !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%2817 = torch.aten.div.Scalar %2816, %float5.656850e00 : !torch.tensor, !torch.float -> !torch.tensor | |
%2818 = torch.prim.ListConstruct %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2819 = torch.prim.ListConstruct %int16384, %int128, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2820 = torch.aten._reshape_alias %2817, %2818, %2819 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2821 = torch.aten.transpose.int %409, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%2822 = torch.aten.bmm %2821, %2820 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2823 = torch.aten.transpose.int %414, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%2824 = torch.aten.bmm %2820, %2823 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2825 = torch.prim.ListConstruct %int1, %int12, %int32, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2826 = torch.prim.ListConstruct %int49152, %int4096, %int128, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2827 = torch.aten._reshape_alias %2822, %2825, %2826 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2828 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2829 = torch.prim.ListConstruct %int49152, %int4096, %int32, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2830 = torch.aten._reshape_alias %2824, %2828, %2829 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2831 = torch.aten.transpose.int %2827, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%2832 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2833 = torch.aten.permute %2830, %2832 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%2834 = torch.aten.clone %2833, %int0 : !torch.tensor, !torch.int -> !torch.tensor | |
%2835 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2836 = torch.aten._unsafe_view %2834, %2835 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%2837 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2838 = torch.aten.permute %2811, %2837 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%2839 = torch.aten.clone %2838, %int0 : !torch.tensor, !torch.int -> !torch.tensor | |
%2840 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2841 = torch.aten._unsafe_view %2839, %2840 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%2842 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2843 = torch.prim.ListConstruct %int384, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2844 = torch.aten._reshape_alias %2841, %2842, %2843 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2845 = torch.aten.t %390 : !torch.tensor -> !torch.tensor | |
%2846 = torch.aten.mm %2844, %2845 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2847 = torch.aten.t %2844 : !torch.tensor -> !torch.tensor | |
%2848 = torch.aten.mm %2847, %392 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2849 = torch.aten.t %2848 : !torch.tensor -> !torch.tensor | |
%2850 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%2851 = torch.aten.sum.dim_IntList %2844, %2850, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2852 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%2853 = torch.aten.view %2851, %2852 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%2854 = torch.aten.detach %2853 : !torch.tensor -> !torch.tensor | |
%2855 = torch.aten.detach %2854 : !torch.tensor -> !torch.tensor | |
%2856 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2857 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2858 = torch.aten._reshape_alias %2846, %2856, %2857 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2859 = torch.aten.add.Tensor %2767, %2858, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%2860 = torch.aten.t %2849 : !torch.tensor -> !torch.tensor | |
%2861 = torch.aten.detach %2860 : !torch.tensor -> !torch.tensor | |
%2862 = torch.aten.detach %2861 : !torch.tensor -> !torch.tensor | |
%2863 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2864 = torch.aten.permute %2831, %2863 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%2865 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2866 = torch.prim.ListConstruct %int128, %int1, %int128 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2867 = torch.aten._reshape_alias %2864, %2865, %2866 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2868 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2869 = torch.prim.ListConstruct %int1, %int128 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2870 = torch.aten._reshape_alias %2867, %2868, %2869 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2871 = torch.aten.t %380 : !torch.tensor -> !torch.tensor | |
%2872 = torch.aten.mm %2870, %2871 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2873 = torch.aten.t %2870 : !torch.tensor -> !torch.tensor | |
%2874 = torch.aten.mm %2873, %382 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2875 = torch.aten.t %2874 : !torch.tensor -> !torch.tensor | |
%2876 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%2877 = torch.aten.sum.dim_IntList %2870, %2876, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2878 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%2879 = torch.aten.view %2877, %2878 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%2880 = torch.aten.detach %2879 : !torch.tensor -> !torch.tensor | |
%2881 = torch.aten.detach %2880 : !torch.tensor -> !torch.tensor | |
%2882 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2883 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2884 = torch.aten._reshape_alias %2872, %2882, %2883 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2885 = torch.aten.add.Tensor %2859, %2884, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%2886 = torch.aten.t %2875 : !torch.tensor -> !torch.tensor | |
%2887 = torch.aten.detach %2886 : !torch.tensor -> !torch.tensor | |
%2888 = torch.aten.detach %2887 : !torch.tensor -> !torch.tensor | |
%2889 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2890 = torch.prim.ListConstruct %int384, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2891 = torch.aten._reshape_alias %2836, %2889, %2890 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2892 = torch.aten.t %374 : !torch.tensor -> !torch.tensor | |
%2893 = torch.aten.mm %2891, %2892 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2894 = torch.aten.t %2891 : !torch.tensor -> !torch.tensor | |
%2895 = torch.aten.mm %2894, %376 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2896 = torch.aten.t %2895 : !torch.tensor -> !torch.tensor | |
%2897 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%2898 = torch.aten.sum.dim_IntList %2891, %2897, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2899 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%2900 = torch.aten.view %2898, %2899 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%2901 = torch.aten.detach %2900 : !torch.tensor -> !torch.tensor | |
%2902 = torch.aten.detach %2901 : !torch.tensor -> !torch.tensor | |
%2903 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2904 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2905 = torch.aten._reshape_alias %2893, %2903, %2904 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2906 = torch.aten.add.Tensor %2885, %2905, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%2907 = torch.aten.t %2896 : !torch.tensor -> !torch.tensor | |
%2908 = torch.aten.detach %2907 : !torch.tensor -> !torch.tensor | |
%2909 = torch.aten.detach %2908 : !torch.tensor -> !torch.tensor | |
%2910 = torch.aten.sub.Tensor %372, %result1_25, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%2911 = torch.aten.mul.Tensor %2910, %result2_26 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2912 = torch.aten.mul.Tensor %2906, %arg99 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2913 = torch.aten.mul.Scalar %2912, %int384 : !torch.tensor, !torch.int -> !torch.tensor | |
%2914 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> | |
%2915 = torch.aten.sum.dim_IntList %2912, %2914, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2916 = torch.aten.mul.Tensor %2912, %2911 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2917 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> | |
%2918 = torch.aten.sum.dim_IntList %2916, %2917, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2919 = torch.aten.mul.Tensor %2911, %2918 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2920 = torch.aten.sub.Tensor %2913, %2915, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%2921 = torch.aten.sub.Tensor %2920, %2919, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%2922 = torch.aten.div.Scalar %result2_26, %int384 : !torch.tensor, !torch.int -> !torch.tensor | |
%2923 = torch.aten.mul.Tensor %2922, %2921 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2924 = torch.aten.mul.Tensor %2906, %2911 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2925 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2926 = torch.aten.sum.dim_IntList %2924, %2925, %false, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2927 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2928 = torch.aten.sum.dim_IntList %2906, %2927, %false, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2929 = torch.aten.detach %2926 : !torch.tensor -> !torch.tensor | |
%2930 = torch.aten.detach %2929 : !torch.tensor -> !torch.tensor | |
%2931 = torch.aten.detach %2928 : !torch.tensor -> !torch.tensor | |
%2932 = torch.aten.detach %2931 : !torch.tensor -> !torch.tensor | |
%2933 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2934 = torch.prim.ListConstruct %int384, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2935 = torch.aten._reshape_alias %2923, %2933, %2934 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2936 = torch.aten.t %366 : !torch.tensor -> !torch.tensor | |
%2937 = torch.aten.mm %2935, %2936 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2938 = torch.aten.t %2935 : !torch.tensor -> !torch.tensor | |
%2939 = torch.aten.mm %2938, %368 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2940 = torch.aten.t %2939 : !torch.tensor -> !torch.tensor | |
%2941 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%2942 = torch.aten.sum.dim_IntList %2935, %2941, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2943 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%2944 = torch.aten.view %2942, %2943 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%2945 = torch.aten.detach %2944 : !torch.tensor -> !torch.tensor | |
%2946 = torch.aten.detach %2945 : !torch.tensor -> !torch.tensor | |
%2947 = torch.prim.ListConstruct %int1, %int128, %int1536 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2948 = torch.prim.ListConstruct %int196608, %int1536, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2949 = torch.aten._reshape_alias %2937, %2947, %2948 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2950 = torch.aten.t %2940 : !torch.tensor -> !torch.tensor | |
%2951 = torch.aten.detach %2950 : !torch.tensor -> !torch.tensor | |
%2952 = torch.aten.detach %2951 : !torch.tensor -> !torch.tensor | |
%2953 = torch.aten.gelu_backward %2949, %364, %str_2 : !torch.tensor, !torch.tensor, !torch.str -> !torch.tensor | |
%2954 = torch.prim.ListConstruct %int128, %int1536 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2955 = torch.prim.ListConstruct %int1536, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2956 = torch.aten._reshape_alias %2953, %2954, %2955 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2957 = torch.aten.t %359 : !torch.tensor -> !torch.tensor | |
%2958 = torch.aten.mm %2956, %2957 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2959 = torch.aten.t %2956 : !torch.tensor -> !torch.tensor | |
%2960 = torch.aten.mm %2959, %361 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2961 = torch.aten.t %2960 : !torch.tensor -> !torch.tensor | |
%2962 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%2963 = torch.aten.sum.dim_IntList %2956, %2962, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2964 = torch.prim.ListConstruct %int1536 : (!torch.int) -> !torch.list<int> | |
%2965 = torch.aten.view %2963, %2964 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%2966 = torch.aten.detach %2965 : !torch.tensor -> !torch.tensor | |
%2967 = torch.aten.detach %2966 : !torch.tensor -> !torch.tensor | |
%2968 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2969 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2970 = torch.aten._reshape_alias %2958, %2968, %2969 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%2971 = torch.aten.add.Tensor %2923, %2970, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%2972 = torch.aten.t %2961 : !torch.tensor -> !torch.tensor | |
%2973 = torch.aten.detach %2972 : !torch.tensor -> !torch.tensor | |
%2974 = torch.aten.detach %2973 : !torch.tensor -> !torch.tensor | |
%2975 = torch.aten.sub.Tensor %357, %result1_22, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%2976 = torch.aten.mul.Tensor %2975, %result2_23 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2977 = torch.aten.mul.Tensor %2971, %arg87 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2978 = torch.aten.mul.Scalar %2977, %int384 : !torch.tensor, !torch.int -> !torch.tensor | |
%2979 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> | |
%2980 = torch.aten.sum.dim_IntList %2977, %2979, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2981 = torch.aten.mul.Tensor %2977, %2976 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2982 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> | |
%2983 = torch.aten.sum.dim_IntList %2981, %2982, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2984 = torch.aten.mul.Tensor %2976, %2983 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2985 = torch.aten.sub.Tensor %2978, %2980, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%2986 = torch.aten.sub.Tensor %2985, %2984, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%2987 = torch.aten.div.Scalar %result2_23, %int384 : !torch.tensor, !torch.int -> !torch.tensor | |
%2988 = torch.aten.mul.Tensor %2987, %2986 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2989 = torch.aten.mul.Tensor %2971, %2976 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%2990 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2991 = torch.aten.sum.dim_IntList %2989, %2990, %false, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2992 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2993 = torch.aten.sum.dim_IntList %2971, %2992, %false, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%2994 = torch.aten.detach %2991 : !torch.tensor -> !torch.tensor | |
%2995 = torch.aten.detach %2994 : !torch.tensor -> !torch.tensor | |
%2996 = torch.aten.detach %2993 : !torch.tensor -> !torch.tensor | |
%2997 = torch.aten.detach %2996 : !torch.tensor -> !torch.tensor | |
%2998 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2999 = torch.prim.ListConstruct %int384, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3000 = torch.aten._reshape_alias %2988, %2998, %2999 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3001 = torch.aten.t %351 : !torch.tensor -> !torch.tensor | |
%3002 = torch.aten.mm %3000, %3001 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3003 = torch.aten.t %3000 : !torch.tensor -> !torch.tensor | |
%3004 = torch.aten.mm %3003, %353 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3005 = torch.aten.t %3004 : !torch.tensor -> !torch.tensor | |
%3006 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%3007 = torch.aten.sum.dim_IntList %3000, %3006, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%3008 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%3009 = torch.aten.view %3007, %3008 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%3010 = torch.aten.detach %3009 : !torch.tensor -> !torch.tensor | |
%3011 = torch.aten.detach %3010 : !torch.tensor -> !torch.tensor | |
%3012 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3013 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3014 = torch.aten._reshape_alias %3002, %3012, %3013 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3015 = torch.aten.t %3005 : !torch.tensor -> !torch.tensor | |
%3016 = torch.aten.detach %3015 : !torch.tensor -> !torch.tensor | |
%3017 = torch.aten.detach %3016 : !torch.tensor -> !torch.tensor | |
%3018 = torch.prim.ListConstruct %int1, %int128, %int12, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3019 = torch.prim.ListConstruct %int49152, %int384, %int32, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3020 = torch.aten._reshape_alias %3014, %3018, %3019 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3021 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3022 = torch.aten.permute %3020, %3021 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%3023 = torch.prim.ListConstruct %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3024 = torch.prim.ListConstruct %int32, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3025 = torch.aten._reshape_alias %3022, %3023, %3024 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3026 = torch.aten.transpose.int %337, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%3027 = torch.aten.bmm %3026, %3025 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3028 = torch.aten.transpose.int %342, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%3029 = torch.aten.bmm %3025, %3028 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3030 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3031 = torch.prim.ListConstruct %int49152, %int4096, %int32, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3032 = torch.aten._reshape_alias %3027, %3030, %3031 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3033 = torch.prim.ListConstruct %int1, %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3034 = torch.prim.ListConstruct %int196608, %int16384, %int128, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3035 = torch.aten._reshape_alias %3029, %3033, %3034 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3036 = torch.aten.detach %332 : !torch.tensor -> !torch.tensor | |
%3037 = torch.aten._softmax_backward_data %3035, %3036, %int-1, %int6 : !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%3038 = torch.aten.div.Scalar %3037, %float5.656850e00 : !torch.tensor, !torch.float -> !torch.tensor | |
%3039 = torch.prim.ListConstruct %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3040 = torch.prim.ListConstruct %int16384, %int128, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3041 = torch.aten._reshape_alias %3038, %3039, %3040 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3042 = torch.aten.transpose.int %319, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%3043 = torch.aten.bmm %3042, %3041 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3044 = torch.aten.transpose.int %324, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%3045 = torch.aten.bmm %3041, %3044 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3046 = torch.prim.ListConstruct %int1, %int12, %int32, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3047 = torch.prim.ListConstruct %int49152, %int4096, %int128, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3048 = torch.aten._reshape_alias %3043, %3046, %3047 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3049 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3050 = torch.prim.ListConstruct %int49152, %int4096, %int32, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3051 = torch.aten._reshape_alias %3045, %3049, %3050 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3052 = torch.aten.transpose.int %3048, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%3053 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3054 = torch.aten.permute %3051, %3053 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%3055 = torch.aten.clone %3054, %int0 : !torch.tensor, !torch.int -> !torch.tensor | |
%3056 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3057 = torch.aten._unsafe_view %3055, %3056 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%3058 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3059 = torch.aten.permute %3032, %3058 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%3060 = torch.aten.clone %3059, %int0 : !torch.tensor, !torch.int -> !torch.tensor | |
%3061 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3062 = torch.aten._unsafe_view %3060, %3061 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%3063 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3064 = torch.prim.ListConstruct %int384, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3065 = torch.aten._reshape_alias %3062, %3063, %3064 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3066 = torch.aten.t %300 : !torch.tensor -> !torch.tensor | |
%3067 = torch.aten.mm %3065, %3066 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3068 = torch.aten.t %3065 : !torch.tensor -> !torch.tensor | |
%3069 = torch.aten.mm %3068, %302 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3070 = torch.aten.t %3069 : !torch.tensor -> !torch.tensor | |
%3071 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%3072 = torch.aten.sum.dim_IntList %3065, %3071, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%3073 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%3074 = torch.aten.view %3072, %3073 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%3075 = torch.aten.detach %3074 : !torch.tensor -> !torch.tensor | |
%3076 = torch.aten.detach %3075 : !torch.tensor -> !torch.tensor | |
%3077 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3078 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3079 = torch.aten._reshape_alias %3067, %3077, %3078 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3080 = torch.aten.add.Tensor %2988, %3079, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%3081 = torch.aten.t %3070 : !torch.tensor -> !torch.tensor | |
%3082 = torch.aten.detach %3081 : !torch.tensor -> !torch.tensor | |
%3083 = torch.aten.detach %3082 : !torch.tensor -> !torch.tensor | |
%3084 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3085 = torch.aten.permute %3052, %3084 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%3086 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3087 = torch.prim.ListConstruct %int128, %int1, %int128 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3088 = torch.aten._reshape_alias %3085, %3086, %3087 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3089 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3090 = torch.prim.ListConstruct %int1, %int128 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3091 = torch.aten._reshape_alias %3088, %3089, %3090 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3092 = torch.aten.t %290 : !torch.tensor -> !torch.tensor | |
%3093 = torch.aten.mm %3091, %3092 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3094 = torch.aten.t %3091 : !torch.tensor -> !torch.tensor | |
%3095 = torch.aten.mm %3094, %292 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3096 = torch.aten.t %3095 : !torch.tensor -> !torch.tensor | |
%3097 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%3098 = torch.aten.sum.dim_IntList %3091, %3097, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%3099 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%3100 = torch.aten.view %3098, %3099 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%3101 = torch.aten.detach %3100 : !torch.tensor -> !torch.tensor | |
%3102 = torch.aten.detach %3101 : !torch.tensor -> !torch.tensor | |
%3103 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3104 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3105 = torch.aten._reshape_alias %3093, %3103, %3104 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3106 = torch.aten.add.Tensor %3080, %3105, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%3107 = torch.aten.t %3096 : !torch.tensor -> !torch.tensor | |
%3108 = torch.aten.detach %3107 : !torch.tensor -> !torch.tensor | |
%3109 = torch.aten.detach %3108 : !torch.tensor -> !torch.tensor | |
%3110 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3111 = torch.prim.ListConstruct %int384, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3112 = torch.aten._reshape_alias %3057, %3110, %3111 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3113 = torch.aten.t %284 : !torch.tensor -> !torch.tensor | |
%3114 = torch.aten.mm %3112, %3113 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3115 = torch.aten.t %3112 : !torch.tensor -> !torch.tensor | |
%3116 = torch.aten.mm %3115, %286 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3117 = torch.aten.t %3116 : !torch.tensor -> !torch.tensor | |
%3118 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%3119 = torch.aten.sum.dim_IntList %3112, %3118, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%3120 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%3121 = torch.aten.view %3119, %3120 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%3122 = torch.aten.detach %3121 : !torch.tensor -> !torch.tensor | |
%3123 = torch.aten.detach %3122 : !torch.tensor -> !torch.tensor | |
%3124 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3125 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3126 = torch.aten._reshape_alias %3114, %3124, %3125 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3127 = torch.aten.add.Tensor %3106, %3126, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%3128 = torch.aten.t %3117 : !torch.tensor -> !torch.tensor | |
%3129 = torch.aten.detach %3128 : !torch.tensor -> !torch.tensor | |
%3130 = torch.aten.detach %3129 : !torch.tensor -> !torch.tensor | |
%3131 = torch.aten.sub.Tensor %282, %result1_19, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%3132 = torch.aten.mul.Tensor %3131, %result2_20 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3133 = torch.aten.mul.Tensor %3127, %arg83 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3134 = torch.aten.mul.Scalar %3133, %int384 : !torch.tensor, !torch.int -> !torch.tensor | |
%3135 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> | |
%3136 = torch.aten.sum.dim_IntList %3133, %3135, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%3137 = torch.aten.mul.Tensor %3133, %3132 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3138 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> | |
%3139 = torch.aten.sum.dim_IntList %3137, %3138, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%3140 = torch.aten.mul.Tensor %3132, %3139 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3141 = torch.aten.sub.Tensor %3134, %3136, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%3142 = torch.aten.sub.Tensor %3141, %3140, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%3143 = torch.aten.div.Scalar %result2_20, %int384 : !torch.tensor, !torch.int -> !torch.tensor | |
%3144 = torch.aten.mul.Tensor %3143, %3142 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3145 = torch.aten.mul.Tensor %3127, %3132 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3146 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3147 = torch.aten.sum.dim_IntList %3145, %3146, %false, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%3148 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3149 = torch.aten.sum.dim_IntList %3127, %3148, %false, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%3150 = torch.aten.detach %3147 : !torch.tensor -> !torch.tensor | |
%3151 = torch.aten.detach %3150 : !torch.tensor -> !torch.tensor | |
%3152 = torch.aten.detach %3149 : !torch.tensor -> !torch.tensor | |
%3153 = torch.aten.detach %3152 : !torch.tensor -> !torch.tensor | |
%3154 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3155 = torch.prim.ListConstruct %int384, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3156 = torch.aten._reshape_alias %3144, %3154, %3155 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3157 = torch.aten.t %276 : !torch.tensor -> !torch.tensor | |
%3158 = torch.aten.mm %3156, %3157 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3159 = torch.aten.t %3156 : !torch.tensor -> !torch.tensor | |
%3160 = torch.aten.mm %3159, %278 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3161 = torch.aten.t %3160 : !torch.tensor -> !torch.tensor | |
%3162 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%3163 = torch.aten.sum.dim_IntList %3156, %3162, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%3164 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%3165 = torch.aten.view %3163, %3164 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%3166 = torch.aten.detach %3165 : !torch.tensor -> !torch.tensor | |
%3167 = torch.aten.detach %3166 : !torch.tensor -> !torch.tensor | |
%3168 = torch.prim.ListConstruct %int1, %int128, %int1536 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3169 = torch.prim.ListConstruct %int196608, %int1536, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3170 = torch.aten._reshape_alias %3158, %3168, %3169 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3171 = torch.aten.t %3161 : !torch.tensor -> !torch.tensor | |
%3172 = torch.aten.detach %3171 : !torch.tensor -> !torch.tensor | |
%3173 = torch.aten.detach %3172 : !torch.tensor -> !torch.tensor | |
%3174 = torch.aten.gelu_backward %3170, %274, %str_2 : !torch.tensor, !torch.tensor, !torch.str -> !torch.tensor | |
%3175 = torch.prim.ListConstruct %int128, %int1536 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3176 = torch.prim.ListConstruct %int1536, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3177 = torch.aten._reshape_alias %3174, %3175, %3176 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3178 = torch.aten.t %269 : !torch.tensor -> !torch.tensor | |
%3179 = torch.aten.mm %3177, %3178 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3180 = torch.aten.t %3177 : !torch.tensor -> !torch.tensor | |
%3181 = torch.aten.mm %3180, %271 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3182 = torch.aten.t %3181 : !torch.tensor -> !torch.tensor | |
%3183 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%3184 = torch.aten.sum.dim_IntList %3177, %3183, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%3185 = torch.prim.ListConstruct %int1536 : (!torch.int) -> !torch.list<int> | |
%3186 = torch.aten.view %3184, %3185 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%3187 = torch.aten.detach %3186 : !torch.tensor -> !torch.tensor | |
%3188 = torch.aten.detach %3187 : !torch.tensor -> !torch.tensor | |
%3189 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3190 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3191 = torch.aten._reshape_alias %3179, %3189, %3190 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3192 = torch.aten.add.Tensor %3144, %3191, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%3193 = torch.aten.t %3182 : !torch.tensor -> !torch.tensor | |
%3194 = torch.aten.detach %3193 : !torch.tensor -> !torch.tensor | |
%3195 = torch.aten.detach %3194 : !torch.tensor -> !torch.tensor | |
%3196 = torch.aten.sub.Tensor %267, %result1_16, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%3197 = torch.aten.mul.Tensor %3196, %result2_17 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3198 = torch.aten.mul.Tensor %3192, %arg71 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3199 = torch.aten.mul.Scalar %3198, %int384 : !torch.tensor, !torch.int -> !torch.tensor | |
%3200 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> | |
%3201 = torch.aten.sum.dim_IntList %3198, %3200, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%3202 = torch.aten.mul.Tensor %3198, %3197 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3203 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> | |
%3204 = torch.aten.sum.dim_IntList %3202, %3203, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%3205 = torch.aten.mul.Tensor %3197, %3204 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3206 = torch.aten.sub.Tensor %3199, %3201, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%3207 = torch.aten.sub.Tensor %3206, %3205, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%3208 = torch.aten.div.Scalar %result2_17, %int384 : !torch.tensor, !torch.int -> !torch.tensor | |
%3209 = torch.aten.mul.Tensor %3208, %3207 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3210 = torch.aten.mul.Tensor %3192, %3197 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3211 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3212 = torch.aten.sum.dim_IntList %3210, %3211, %false, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%3213 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3214 = torch.aten.sum.dim_IntList %3192, %3213, %false, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%3215 = torch.aten.detach %3212 : !torch.tensor -> !torch.tensor | |
%3216 = torch.aten.detach %3215 : !torch.tensor -> !torch.tensor | |
%3217 = torch.aten.detach %3214 : !torch.tensor -> !torch.tensor | |
%3218 = torch.aten.detach %3217 : !torch.tensor -> !torch.tensor | |
%3219 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3220 = torch.prim.ListConstruct %int384, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3221 = torch.aten._reshape_alias %3209, %3219, %3220 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3222 = torch.aten.t %261 : !torch.tensor -> !torch.tensor | |
%3223 = torch.aten.mm %3221, %3222 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3224 = torch.aten.t %3221 : !torch.tensor -> !torch.tensor | |
%3225 = torch.aten.mm %3224, %263 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3226 = torch.aten.t %3225 : !torch.tensor -> !torch.tensor | |
%3227 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%3228 = torch.aten.sum.dim_IntList %3221, %3227, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%3229 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%3230 = torch.aten.view %3228, %3229 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%3231 = torch.aten.detach %3230 : !torch.tensor -> !torch.tensor | |
%3232 = torch.aten.detach %3231 : !torch.tensor -> !torch.tensor | |
%3233 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3234 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3235 = torch.aten._reshape_alias %3223, %3233, %3234 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3236 = torch.aten.t %3226 : !torch.tensor -> !torch.tensor | |
%3237 = torch.aten.detach %3236 : !torch.tensor -> !torch.tensor | |
%3238 = torch.aten.detach %3237 : !torch.tensor -> !torch.tensor | |
%3239 = torch.prim.ListConstruct %int1, %int128, %int12, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3240 = torch.prim.ListConstruct %int49152, %int384, %int32, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3241 = torch.aten._reshape_alias %3235, %3239, %3240 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3242 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3243 = torch.aten.permute %3241, %3242 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%3244 = torch.prim.ListConstruct %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3245 = torch.prim.ListConstruct %int32, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3246 = torch.aten._reshape_alias %3243, %3244, %3245 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3247 = torch.aten.transpose.int %247, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%3248 = torch.aten.bmm %3247, %3246 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3249 = torch.aten.transpose.int %252, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%3250 = torch.aten.bmm %3246, %3249 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3251 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3252 = torch.prim.ListConstruct %int49152, %int4096, %int32, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3253 = torch.aten._reshape_alias %3248, %3251, %3252 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3254 = torch.prim.ListConstruct %int1, %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3255 = torch.prim.ListConstruct %int196608, %int16384, %int128, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3256 = torch.aten._reshape_alias %3250, %3254, %3255 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3257 = torch.aten.detach %242 : !torch.tensor -> !torch.tensor | |
%3258 = torch.aten._softmax_backward_data %3256, %3257, %int-1, %int6 : !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%3259 = torch.aten.div.Scalar %3258, %float5.656850e00 : !torch.tensor, !torch.float -> !torch.tensor | |
%3260 = torch.prim.ListConstruct %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3261 = torch.prim.ListConstruct %int16384, %int128, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3262 = torch.aten._reshape_alias %3259, %3260, %3261 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3263 = torch.aten.transpose.int %229, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%3264 = torch.aten.bmm %3263, %3262 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3265 = torch.aten.transpose.int %234, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%3266 = torch.aten.bmm %3262, %3265 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3267 = torch.prim.ListConstruct %int1, %int12, %int32, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3268 = torch.prim.ListConstruct %int49152, %int4096, %int128, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3269 = torch.aten._reshape_alias %3264, %3267, %3268 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3270 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3271 = torch.prim.ListConstruct %int49152, %int4096, %int32, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3272 = torch.aten._reshape_alias %3266, %3270, %3271 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3273 = torch.aten.transpose.int %3269, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%3274 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3275 = torch.aten.permute %3272, %3274 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%3276 = torch.aten.clone %3275, %int0 : !torch.tensor, !torch.int -> !torch.tensor | |
%3277 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3278 = torch.aten._unsafe_view %3276, %3277 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%3279 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3280 = torch.aten.permute %3253, %3279 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%3281 = torch.aten.clone %3280, %int0 : !torch.tensor, !torch.int -> !torch.tensor | |
%3282 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3283 = torch.aten._unsafe_view %3281, %3282 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%3284 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3285 = torch.prim.ListConstruct %int384, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3286 = torch.aten._reshape_alias %3283, %3284, %3285 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3287 = torch.aten.t %210 : !torch.tensor -> !torch.tensor | |
%3288 = torch.aten.mm %3286, %3287 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3289 = torch.aten.t %3286 : !torch.tensor -> !torch.tensor | |
%3290 = torch.aten.mm %3289, %212 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3291 = torch.aten.t %3290 : !torch.tensor -> !torch.tensor | |
%3292 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%3293 = torch.aten.sum.dim_IntList %3286, %3292, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%3294 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%3295 = torch.aten.view %3293, %3294 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%3296 = torch.aten.detach %3295 : !torch.tensor -> !torch.tensor | |
%3297 = torch.aten.detach %3296 : !torch.tensor -> !torch.tensor | |
%3298 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3299 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3300 = torch.aten._reshape_alias %3288, %3298, %3299 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3301 = torch.aten.add.Tensor %3209, %3300, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%3302 = torch.aten.t %3291 : !torch.tensor -> !torch.tensor | |
%3303 = torch.aten.detach %3302 : !torch.tensor -> !torch.tensor | |
%3304 = torch.aten.detach %3303 : !torch.tensor -> !torch.tensor | |
%3305 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3306 = torch.aten.permute %3273, %3305 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%3307 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3308 = torch.prim.ListConstruct %int128, %int1, %int128 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3309 = torch.aten._reshape_alias %3306, %3307, %3308 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3310 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3311 = torch.prim.ListConstruct %int1, %int128 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3312 = torch.aten._reshape_alias %3309, %3310, %3311 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3313 = torch.aten.t %200 : !torch.tensor -> !torch.tensor | |
%3314 = torch.aten.mm %3312, %3313 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3315 = torch.aten.t %3312 : !torch.tensor -> !torch.tensor | |
%3316 = torch.aten.mm %3315, %202 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3317 = torch.aten.t %3316 : !torch.tensor -> !torch.tensor | |
%3318 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%3319 = torch.aten.sum.dim_IntList %3312, %3318, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%3320 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%3321 = torch.aten.view %3319, %3320 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%3322 = torch.aten.detach %3321 : !torch.tensor -> !torch.tensor | |
%3323 = torch.aten.detach %3322 : !torch.tensor -> !torch.tensor | |
%3324 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3325 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3326 = torch.aten._reshape_alias %3314, %3324, %3325 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3327 = torch.aten.add.Tensor %3301, %3326, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%3328 = torch.aten.t %3317 : !torch.tensor -> !torch.tensor | |
%3329 = torch.aten.detach %3328 : !torch.tensor -> !torch.tensor | |
%3330 = torch.aten.detach %3329 : !torch.tensor -> !torch.tensor | |
%3331 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3332 = torch.prim.ListConstruct %int384, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3333 = torch.aten._reshape_alias %3278, %3331, %3332 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3334 = torch.aten.t %194 : !torch.tensor -> !torch.tensor | |
%3335 = torch.aten.mm %3333, %3334 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3336 = torch.aten.t %3333 : !torch.tensor -> !torch.tensor | |
%3337 = torch.aten.mm %3336, %196 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3338 = torch.aten.t %3337 : !torch.tensor -> !torch.tensor | |
%3339 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%3340 = torch.aten.sum.dim_IntList %3333, %3339, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%3341 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%3342 = torch.aten.view %3340, %3341 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%3343 = torch.aten.detach %3342 : !torch.tensor -> !torch.tensor | |
%3344 = torch.aten.detach %3343 : !torch.tensor -> !torch.tensor | |
%3345 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3346 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3347 = torch.aten._reshape_alias %3335, %3345, %3346 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3348 = torch.aten.add.Tensor %3327, %3347, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%3349 = torch.aten.t %3338 : !torch.tensor -> !torch.tensor | |
%3350 = torch.aten.detach %3349 : !torch.tensor -> !torch.tensor | |
%3351 = torch.aten.detach %3350 : !torch.tensor -> !torch.tensor | |
%3352 = torch.aten.sub.Tensor %192, %result1_13, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%3353 = torch.aten.mul.Tensor %3352, %result2_14 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3354 = torch.aten.mul.Tensor %3348, %arg35 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3355 = torch.aten.mul.Scalar %3354, %int384 : !torch.tensor, !torch.int -> !torch.tensor | |
%3356 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> | |
%3357 = torch.aten.sum.dim_IntList %3354, %3356, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%3358 = torch.aten.mul.Tensor %3354, %3353 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3359 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> | |
%3360 = torch.aten.sum.dim_IntList %3358, %3359, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%3361 = torch.aten.mul.Tensor %3353, %3360 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3362 = torch.aten.sub.Tensor %3355, %3357, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%3363 = torch.aten.sub.Tensor %3362, %3361, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%3364 = torch.aten.div.Scalar %result2_14, %int384 : !torch.tensor, !torch.int -> !torch.tensor | |
%3365 = torch.aten.mul.Tensor %3364, %3363 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3366 = torch.aten.mul.Tensor %3348, %3353 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3367 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3368 = torch.aten.sum.dim_IntList %3366, %3367, %false, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%3369 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3370 = torch.aten.sum.dim_IntList %3348, %3369, %false, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%3371 = torch.aten.detach %3368 : !torch.tensor -> !torch.tensor | |
%3372 = torch.aten.detach %3371 : !torch.tensor -> !torch.tensor | |
%3373 = torch.aten.detach %3370 : !torch.tensor -> !torch.tensor | |
%3374 = torch.aten.detach %3373 : !torch.tensor -> !torch.tensor | |
%3375 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3376 = torch.prim.ListConstruct %int384, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3377 = torch.aten._reshape_alias %3365, %3375, %3376 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3378 = torch.aten.t %186 : !torch.tensor -> !torch.tensor | |
%3379 = torch.aten.mm %3377, %3378 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3380 = torch.aten.t %3377 : !torch.tensor -> !torch.tensor | |
%3381 = torch.aten.mm %3380, %188 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3382 = torch.aten.t %3381 : !torch.tensor -> !torch.tensor | |
%3383 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%3384 = torch.aten.sum.dim_IntList %3377, %3383, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%3385 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%3386 = torch.aten.view %3384, %3385 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%3387 = torch.aten.detach %3386 : !torch.tensor -> !torch.tensor | |
%3388 = torch.aten.detach %3387 : !torch.tensor -> !torch.tensor | |
%3389 = torch.prim.ListConstruct %int1, %int128, %int1536 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3390 = torch.prim.ListConstruct %int196608, %int1536, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3391 = torch.aten._reshape_alias %3379, %3389, %3390 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3392 = torch.aten.t %3382 : !torch.tensor -> !torch.tensor | |
%3393 = torch.aten.detach %3392 : !torch.tensor -> !torch.tensor | |
%3394 = torch.aten.detach %3393 : !torch.tensor -> !torch.tensor | |
%3395 = torch.aten.gelu_backward %3391, %184, %str_2 : !torch.tensor, !torch.tensor, !torch.str -> !torch.tensor | |
%3396 = torch.prim.ListConstruct %int128, %int1536 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3397 = torch.prim.ListConstruct %int1536, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3398 = torch.aten._reshape_alias %3395, %3396, %3397 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3399 = torch.aten.t %179 : !torch.tensor -> !torch.tensor | |
%3400 = torch.aten.mm %3398, %3399 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3401 = torch.aten.t %3398 : !torch.tensor -> !torch.tensor | |
%3402 = torch.aten.mm %3401, %181 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3403 = torch.aten.t %3402 : !torch.tensor -> !torch.tensor | |
%3404 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%3405 = torch.aten.sum.dim_IntList %3398, %3404, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%3406 = torch.prim.ListConstruct %int1536 : (!torch.int) -> !torch.list<int> | |
%3407 = torch.aten.view %3405, %3406 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%3408 = torch.aten.detach %3407 : !torch.tensor -> !torch.tensor | |
%3409 = torch.aten.detach %3408 : !torch.tensor -> !torch.tensor | |
%3410 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3411 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3412 = torch.aten._reshape_alias %3400, %3410, %3411 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3413 = torch.aten.add.Tensor %3365, %3412, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%3414 = torch.aten.t %3403 : !torch.tensor -> !torch.tensor | |
%3415 = torch.aten.detach %3414 : !torch.tensor -> !torch.tensor | |
%3416 = torch.aten.detach %3415 : !torch.tensor -> !torch.tensor | |
%3417 = torch.aten.sub.Tensor %177, %result1_10, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%3418 = torch.aten.mul.Tensor %3417, %result2_11 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3419 = torch.aten.mul.Tensor %3413, %arg23 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3420 = torch.aten.mul.Scalar %3419, %int384 : !torch.tensor, !torch.int -> !torch.tensor | |
%3421 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> | |
%3422 = torch.aten.sum.dim_IntList %3419, %3421, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%3423 = torch.aten.mul.Tensor %3419, %3418 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3424 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> | |
%3425 = torch.aten.sum.dim_IntList %3423, %3424, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%3426 = torch.aten.mul.Tensor %3418, %3425 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3427 = torch.aten.sub.Tensor %3420, %3422, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%3428 = torch.aten.sub.Tensor %3427, %3426, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%3429 = torch.aten.div.Scalar %result2_11, %int384 : !torch.tensor, !torch.int -> !torch.tensor | |
%3430 = torch.aten.mul.Tensor %3429, %3428 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3431 = torch.aten.mul.Tensor %3413, %3418 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3432 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3433 = torch.aten.sum.dim_IntList %3431, %3432, %false, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%3434 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3435 = torch.aten.sum.dim_IntList %3413, %3434, %false, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%3436 = torch.aten.detach %3433 : !torch.tensor -> !torch.tensor | |
%3437 = torch.aten.detach %3436 : !torch.tensor -> !torch.tensor | |
%3438 = torch.aten.detach %3435 : !torch.tensor -> !torch.tensor | |
%3439 = torch.aten.detach %3438 : !torch.tensor -> !torch.tensor | |
%3440 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3441 = torch.prim.ListConstruct %int384, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3442 = torch.aten._reshape_alias %3430, %3440, %3441 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3443 = torch.aten.t %171 : !torch.tensor -> !torch.tensor | |
%3444 = torch.aten.mm %3442, %3443 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3445 = torch.aten.t %3442 : !torch.tensor -> !torch.tensor | |
%3446 = torch.aten.mm %3445, %173 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3447 = torch.aten.t %3446 : !torch.tensor -> !torch.tensor | |
%3448 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%3449 = torch.aten.sum.dim_IntList %3442, %3448, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%3450 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%3451 = torch.aten.view %3449, %3450 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%3452 = torch.aten.detach %3451 : !torch.tensor -> !torch.tensor | |
%3453 = torch.aten.detach %3452 : !torch.tensor -> !torch.tensor | |
%3454 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3455 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3456 = torch.aten._reshape_alias %3444, %3454, %3455 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3457 = torch.aten.t %3447 : !torch.tensor -> !torch.tensor | |
%3458 = torch.aten.detach %3457 : !torch.tensor -> !torch.tensor | |
%3459 = torch.aten.detach %3458 : !torch.tensor -> !torch.tensor | |
%3460 = torch.prim.ListConstruct %int1, %int128, %int12, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3461 = torch.prim.ListConstruct %int49152, %int384, %int32, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3462 = torch.aten._reshape_alias %3456, %3460, %3461 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3463 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3464 = torch.aten.permute %3462, %3463 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%3465 = torch.prim.ListConstruct %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3466 = torch.prim.ListConstruct %int32, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3467 = torch.aten._reshape_alias %3464, %3465, %3466 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3468 = torch.aten.transpose.int %157, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%3469 = torch.aten.bmm %3468, %3467 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3470 = torch.aten.transpose.int %162, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%3471 = torch.aten.bmm %3467, %3470 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3472 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3473 = torch.prim.ListConstruct %int49152, %int4096, %int32, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3474 = torch.aten._reshape_alias %3469, %3472, %3473 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3475 = torch.prim.ListConstruct %int1, %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3476 = torch.prim.ListConstruct %int196608, %int16384, %int128, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3477 = torch.aten._reshape_alias %3471, %3475, %3476 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3478 = torch.aten.detach %152 : !torch.tensor -> !torch.tensor | |
%3479 = torch.aten._softmax_backward_data %3477, %3478, %int-1, %int6 : !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%3480 = torch.aten.div.Scalar %3479, %float5.656850e00 : !torch.tensor, !torch.float -> !torch.tensor | |
%3481 = torch.prim.ListConstruct %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3482 = torch.prim.ListConstruct %int16384, %int128, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3483 = torch.aten._reshape_alias %3480, %3481, %3482 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3484 = torch.aten.transpose.int %139, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%3485 = torch.aten.bmm %3484, %3483 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3486 = torch.aten.transpose.int %144, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%3487 = torch.aten.bmm %3483, %3486 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3488 = torch.prim.ListConstruct %int1, %int12, %int32, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3489 = torch.prim.ListConstruct %int49152, %int4096, %int128, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3490 = torch.aten._reshape_alias %3485, %3488, %3489 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3491 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3492 = torch.prim.ListConstruct %int49152, %int4096, %int32, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3493 = torch.aten._reshape_alias %3487, %3491, %3492 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3494 = torch.aten.transpose.int %3490, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%3495 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3496 = torch.aten.permute %3493, %3495 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%3497 = torch.aten.clone %3496, %int0 : !torch.tensor, !torch.int -> !torch.tensor | |
%3498 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3499 = torch.aten._unsafe_view %3497, %3498 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%3500 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3501 = torch.aten.permute %3474, %3500 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%3502 = torch.aten.clone %3501, %int0 : !torch.tensor, !torch.int -> !torch.tensor | |
%3503 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3504 = torch.aten._unsafe_view %3502, %3503 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%3505 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3506 = torch.prim.ListConstruct %int384, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3507 = torch.aten._reshape_alias %3504, %3505, %3506 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3508 = torch.aten.t %120 : !torch.tensor -> !torch.tensor | |
%3509 = torch.aten.mm %3507, %3508 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3510 = torch.aten.t %3507 : !torch.tensor -> !torch.tensor | |
%3511 = torch.aten.mm %3510, %122 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3512 = torch.aten.t %3511 : !torch.tensor -> !torch.tensor | |
%3513 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%3514 = torch.aten.sum.dim_IntList %3507, %3513, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%3515 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%3516 = torch.aten.view %3514, %3515 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%3517 = torch.aten.detach %3516 : !torch.tensor -> !torch.tensor | |
%3518 = torch.aten.detach %3517 : !torch.tensor -> !torch.tensor | |
%3519 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3520 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3521 = torch.aten._reshape_alias %3509, %3519, %3520 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3522 = torch.aten.add.Tensor %3430, %3521, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%3523 = torch.aten.t %3512 : !torch.tensor -> !torch.tensor | |
%3524 = torch.aten.detach %3523 : !torch.tensor -> !torch.tensor | |
%3525 = torch.aten.detach %3524 : !torch.tensor -> !torch.tensor | |
%3526 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3527 = torch.aten.permute %3494, %3526 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%3528 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3529 = torch.prim.ListConstruct %int128, %int1, %int128 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3530 = torch.aten._reshape_alias %3527, %3528, %3529 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3531 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3532 = torch.prim.ListConstruct %int1, %int128 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3533 = torch.aten._reshape_alias %3530, %3531, %3532 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3534 = torch.aten.t %110 : !torch.tensor -> !torch.tensor | |
%3535 = torch.aten.mm %3533, %3534 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3536 = torch.aten.t %3533 : !torch.tensor -> !torch.tensor | |
%3537 = torch.aten.mm %3536, %112 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3538 = torch.aten.t %3537 : !torch.tensor -> !torch.tensor | |
%3539 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%3540 = torch.aten.sum.dim_IntList %3533, %3539, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%3541 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%3542 = torch.aten.view %3540, %3541 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%3543 = torch.aten.detach %3542 : !torch.tensor -> !torch.tensor | |
%3544 = torch.aten.detach %3543 : !torch.tensor -> !torch.tensor | |
%3545 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3546 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3547 = torch.aten._reshape_alias %3535, %3545, %3546 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3548 = torch.aten.add.Tensor %3522, %3547, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%3549 = torch.aten.t %3538 : !torch.tensor -> !torch.tensor | |
%3550 = torch.aten.detach %3549 : !torch.tensor -> !torch.tensor | |
%3551 = torch.aten.detach %3550 : !torch.tensor -> !torch.tensor | |
%3552 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3553 = torch.prim.ListConstruct %int384, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3554 = torch.aten._reshape_alias %3499, %3552, %3553 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3555 = torch.aten.t %104 : !torch.tensor -> !torch.tensor | |
%3556 = torch.aten.mm %3554, %3555 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3557 = torch.aten.t %3554 : !torch.tensor -> !torch.tensor | |
%3558 = torch.aten.mm %3557, %106 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3559 = torch.aten.t %3558 : !torch.tensor -> !torch.tensor | |
%3560 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%3561 = torch.aten.sum.dim_IntList %3554, %3560, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%3562 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%3563 = torch.aten.view %3561, %3562 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%3564 = torch.aten.detach %3563 : !torch.tensor -> !torch.tensor | |
%3565 = torch.aten.detach %3564 : !torch.tensor -> !torch.tensor | |
%3566 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3567 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3568 = torch.aten._reshape_alias %3556, %3566, %3567 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3569 = torch.aten.add.Tensor %3548, %3568, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%3570 = torch.aten.t %3559 : !torch.tensor -> !torch.tensor | |
%3571 = torch.aten.detach %3570 : !torch.tensor -> !torch.tensor | |
%3572 = torch.aten.detach %3571 : !torch.tensor -> !torch.tensor | |
%3573 = torch.aten.sub.Tensor %102, %result1_7, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%3574 = torch.aten.mul.Tensor %3573, %result2_8 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3575 = torch.aten.mul.Tensor %3569, %arg19 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3576 = torch.aten.mul.Scalar %3575, %int384 : !torch.tensor, !torch.int -> !torch.tensor | |
%3577 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> | |
%3578 = torch.aten.sum.dim_IntList %3575, %3577, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%3579 = torch.aten.mul.Tensor %3575, %3574 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3580 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> | |
%3581 = torch.aten.sum.dim_IntList %3579, %3580, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%3582 = torch.aten.mul.Tensor %3574, %3581 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3583 = torch.aten.sub.Tensor %3576, %3578, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%3584 = torch.aten.sub.Tensor %3583, %3582, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%3585 = torch.aten.div.Scalar %result2_8, %int384 : !torch.tensor, !torch.int -> !torch.tensor | |
%3586 = torch.aten.mul.Tensor %3585, %3584 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3587 = torch.aten.mul.Tensor %3569, %3574 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3588 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3589 = torch.aten.sum.dim_IntList %3587, %3588, %false, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%3590 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3591 = torch.aten.sum.dim_IntList %3569, %3590, %false, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%3592 = torch.aten.detach %3589 : !torch.tensor -> !torch.tensor | |
%3593 = torch.aten.detach %3592 : !torch.tensor -> !torch.tensor | |
%3594 = torch.aten.detach %3591 : !torch.tensor -> !torch.tensor | |
%3595 = torch.aten.detach %3594 : !torch.tensor -> !torch.tensor | |
%3596 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3597 = torch.prim.ListConstruct %int384, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3598 = torch.aten._reshape_alias %3586, %3596, %3597 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3599 = torch.aten.t %96 : !torch.tensor -> !torch.tensor | |
%3600 = torch.aten.mm %3598, %3599 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3601 = torch.aten.t %3598 : !torch.tensor -> !torch.tensor | |
%3602 = torch.aten.mm %3601, %98 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3603 = torch.aten.t %3602 : !torch.tensor -> !torch.tensor | |
%3604 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%3605 = torch.aten.sum.dim_IntList %3598, %3604, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%3606 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%3607 = torch.aten.view %3605, %3606 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%3608 = torch.aten.detach %3607 : !torch.tensor -> !torch.tensor | |
%3609 = torch.aten.detach %3608 : !torch.tensor -> !torch.tensor | |
%3610 = torch.prim.ListConstruct %int1, %int128, %int1536 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3611 = torch.prim.ListConstruct %int196608, %int1536, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3612 = torch.aten._reshape_alias %3600, %3610, %3611 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3613 = torch.aten.t %3603 : !torch.tensor -> !torch.tensor | |
%3614 = torch.aten.detach %3613 : !torch.tensor -> !torch.tensor | |
%3615 = torch.aten.detach %3614 : !torch.tensor -> !torch.tensor | |
%3616 = torch.aten.gelu_backward %3612, %94, %str_2 : !torch.tensor, !torch.tensor, !torch.str -> !torch.tensor | |
%3617 = torch.prim.ListConstruct %int128, %int1536 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3618 = torch.prim.ListConstruct %int1536, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3619 = torch.aten._reshape_alias %3616, %3617, %3618 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3620 = torch.aten.t %89 : !torch.tensor -> !torch.tensor | |
%3621 = torch.aten.mm %3619, %3620 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3622 = torch.aten.t %3619 : !torch.tensor -> !torch.tensor | |
%3623 = torch.aten.mm %3622, %91 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3624 = torch.aten.t %3623 : !torch.tensor -> !torch.tensor | |
%3625 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%3626 = torch.aten.sum.dim_IntList %3619, %3625, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%3627 = torch.prim.ListConstruct %int1536 : (!torch.int) -> !torch.list<int> | |
%3628 = torch.aten.view %3626, %3627 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%3629 = torch.aten.detach %3628 : !torch.tensor -> !torch.tensor | |
%3630 = torch.aten.detach %3629 : !torch.tensor -> !torch.tensor | |
%3631 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3632 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3633 = torch.aten._reshape_alias %3621, %3631, %3632 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3634 = torch.aten.add.Tensor %3586, %3633, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%3635 = torch.aten.t %3624 : !torch.tensor -> !torch.tensor | |
%3636 = torch.aten.detach %3635 : !torch.tensor -> !torch.tensor | |
%3637 = torch.aten.detach %3636 : !torch.tensor -> !torch.tensor | |
%3638 = torch.aten.sub.Tensor %87, %result1_4, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%3639 = torch.aten.mul.Tensor %3638, %result2_5 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3640 = torch.aten.mul.Tensor %3634, %arg7 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3641 = torch.aten.mul.Scalar %3640, %int384 : !torch.tensor, !torch.int -> !torch.tensor | |
%3642 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> | |
%3643 = torch.aten.sum.dim_IntList %3640, %3642, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%3644 = torch.aten.mul.Tensor %3640, %3639 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3645 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> | |
%3646 = torch.aten.sum.dim_IntList %3644, %3645, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%3647 = torch.aten.mul.Tensor %3639, %3646 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3648 = torch.aten.sub.Tensor %3641, %3643, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%3649 = torch.aten.sub.Tensor %3648, %3647, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%3650 = torch.aten.div.Scalar %result2_5, %int384 : !torch.tensor, !torch.int -> !torch.tensor | |
%3651 = torch.aten.mul.Tensor %3650, %3649 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3652 = torch.aten.mul.Tensor %3634, %3639 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3653 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3654 = torch.aten.sum.dim_IntList %3652, %3653, %false, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%3655 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3656 = torch.aten.sum.dim_IntList %3634, %3655, %false, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%3657 = torch.aten.detach %3654 : !torch.tensor -> !torch.tensor | |
%3658 = torch.aten.detach %3657 : !torch.tensor -> !torch.tensor | |
%3659 = torch.aten.detach %3656 : !torch.tensor -> !torch.tensor | |
%3660 = torch.aten.detach %3659 : !torch.tensor -> !torch.tensor | |
%3661 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3662 = torch.prim.ListConstruct %int384, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3663 = torch.aten._reshape_alias %3651, %3661, %3662 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3664 = torch.aten.t %81 : !torch.tensor -> !torch.tensor | |
%3665 = torch.aten.mm %3663, %3664 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3666 = torch.aten.t %3663 : !torch.tensor -> !torch.tensor | |
%3667 = torch.aten.mm %3666, %83 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3668 = torch.aten.t %3667 : !torch.tensor -> !torch.tensor | |
%3669 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%3670 = torch.aten.sum.dim_IntList %3663, %3669, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%3671 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%3672 = torch.aten.view %3670, %3671 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%3673 = torch.aten.detach %3672 : !torch.tensor -> !torch.tensor | |
%3674 = torch.aten.detach %3673 : !torch.tensor -> !torch.tensor | |
%3675 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3676 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3677 = torch.aten._reshape_alias %3665, %3675, %3676 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3678 = torch.aten.t %3668 : !torch.tensor -> !torch.tensor | |
%3679 = torch.aten.detach %3678 : !torch.tensor -> !torch.tensor | |
%3680 = torch.aten.detach %3679 : !torch.tensor -> !torch.tensor | |
%3681 = torch.prim.ListConstruct %int1, %int128, %int12, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3682 = torch.prim.ListConstruct %int49152, %int384, %int32, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3683 = torch.aten._reshape_alias %3677, %3681, %3682 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3684 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3685 = torch.aten.permute %3683, %3684 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%3686 = torch.prim.ListConstruct %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3687 = torch.prim.ListConstruct %int32, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3688 = torch.aten._reshape_alias %3685, %3686, %3687 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3689 = torch.aten.transpose.int %67, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%3690 = torch.aten.bmm %3689, %3688 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3691 = torch.aten.transpose.int %72, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%3692 = torch.aten.bmm %3688, %3691 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3693 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3694 = torch.prim.ListConstruct %int49152, %int4096, %int32, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3695 = torch.aten._reshape_alias %3690, %3693, %3694 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3696 = torch.prim.ListConstruct %int1, %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3697 = torch.prim.ListConstruct %int196608, %int16384, %int128, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3698 = torch.aten._reshape_alias %3692, %3696, %3697 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3699 = torch.aten.detach %62 : !torch.tensor -> !torch.tensor | |
%3700 = torch.aten._softmax_backward_data %3698, %3699, %int-1, %int6 : !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%3701 = torch.aten.div.Scalar %3700, %float5.656850e00 : !torch.tensor, !torch.float -> !torch.tensor | |
%3702 = torch.prim.ListConstruct %int12, %int128, %int128 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3703 = torch.prim.ListConstruct %int16384, %int128, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3704 = torch.aten._reshape_alias %3701, %3702, %3703 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3705 = torch.aten.transpose.int %49, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%3706 = torch.aten.bmm %3705, %3704 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3707 = torch.aten.transpose.int %54, %int1, %int2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%3708 = torch.aten.bmm %3704, %3707 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3709 = torch.prim.ListConstruct %int1, %int12, %int32, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3710 = torch.prim.ListConstruct %int49152, %int4096, %int128, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3711 = torch.aten._reshape_alias %3706, %3709, %3710 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3712 = torch.prim.ListConstruct %int1, %int12, %int128, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3713 = torch.prim.ListConstruct %int49152, %int4096, %int32, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3714 = torch.aten._reshape_alias %3708, %3712, %3713 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3715 = torch.aten.transpose.int %3711, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%3716 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3717 = torch.aten.permute %3714, %3716 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%3718 = torch.aten.clone %3717, %int0 : !torch.tensor, !torch.int -> !torch.tensor | |
%3719 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3720 = torch.aten._unsafe_view %3718, %3719 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%3721 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3722 = torch.aten.permute %3695, %3721 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%3723 = torch.aten.clone %3722, %int0 : !torch.tensor, !torch.int -> !torch.tensor | |
%3724 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3725 = torch.aten._unsafe_view %3723, %3724 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%3726 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3727 = torch.prim.ListConstruct %int384, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3728 = torch.aten._reshape_alias %3725, %3726, %3727 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3729 = torch.aten.t %30 : !torch.tensor -> !torch.tensor | |
%3730 = torch.aten.mm %3728, %3729 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3731 = torch.aten.t %3728 : !torch.tensor -> !torch.tensor | |
%3732 = torch.aten.mm %3731, %32 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3733 = torch.aten.t %3732 : !torch.tensor -> !torch.tensor | |
%3734 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%3735 = torch.aten.sum.dim_IntList %3728, %3734, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%3736 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%3737 = torch.aten.view %3735, %3736 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%3738 = torch.aten.detach %3737 : !torch.tensor -> !torch.tensor | |
%3739 = torch.aten.detach %3738 : !torch.tensor -> !torch.tensor | |
%3740 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3741 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3742 = torch.aten._reshape_alias %3730, %3740, %3741 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3743 = torch.aten.add.Tensor %3651, %3742, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%3744 = torch.aten.t %3733 : !torch.tensor -> !torch.tensor | |
%3745 = torch.aten.detach %3744 : !torch.tensor -> !torch.tensor | |
%3746 = torch.aten.detach %3745 : !torch.tensor -> !torch.tensor | |
%3747 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3748 = torch.aten.permute %3715, %3747 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%3749 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3750 = torch.prim.ListConstruct %int128, %int1, %int128 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3751 = torch.aten._reshape_alias %3748, %3749, %3750 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3752 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3753 = torch.prim.ListConstruct %int1, %int128 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3754 = torch.aten._reshape_alias %3751, %3752, %3753 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3755 = torch.aten.t %20 : !torch.tensor -> !torch.tensor | |
%3756 = torch.aten.mm %3754, %3755 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3757 = torch.aten.t %3754 : !torch.tensor -> !torch.tensor | |
%3758 = torch.aten.mm %3757, %22 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3759 = torch.aten.t %3758 : !torch.tensor -> !torch.tensor | |
%3760 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%3761 = torch.aten.sum.dim_IntList %3754, %3760, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%3762 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%3763 = torch.aten.view %3761, %3762 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%3764 = torch.aten.detach %3763 : !torch.tensor -> !torch.tensor | |
%3765 = torch.aten.detach %3764 : !torch.tensor -> !torch.tensor | |
%3766 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3767 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3768 = torch.aten._reshape_alias %3756, %3766, %3767 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3769 = torch.aten.add.Tensor %3743, %3768, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%3770 = torch.aten.t %3759 : !torch.tensor -> !torch.tensor | |
%3771 = torch.aten.detach %3770 : !torch.tensor -> !torch.tensor | |
%3772 = torch.aten.detach %3771 : !torch.tensor -> !torch.tensor | |
%3773 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3774 = torch.prim.ListConstruct %int384, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3775 = torch.aten._reshape_alias %3720, %3773, %3774 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3776 = torch.aten.t %14 : !torch.tensor -> !torch.tensor | |
%3777 = torch.aten.mm %3775, %3776 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3778 = torch.aten.t %3775 : !torch.tensor -> !torch.tensor | |
%3779 = torch.aten.mm %3778, %16 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3780 = torch.aten.t %3779 : !torch.tensor -> !torch.tensor | |
%3781 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%3782 = torch.aten.sum.dim_IntList %3775, %3781, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%3783 = torch.prim.ListConstruct %int384 : (!torch.int) -> !torch.list<int> | |
%3784 = torch.aten.view %3782, %3783 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%3785 = torch.aten.detach %3784 : !torch.tensor -> !torch.tensor | |
%3786 = torch.aten.detach %3785 : !torch.tensor -> !torch.tensor | |
%3787 = torch.prim.ListConstruct %int1, %int128, %int384 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3788 = torch.prim.ListConstruct %int49152, %int384, %int1 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3789 = torch.aten._reshape_alias %3777, %3787, %3788 : !torch.tensor, !torch.list<int>, !torch.list<int> -> !torch.tensor | |
%3790 = torch.aten.add.Tensor %3769, %3789, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%3791 = torch.aten.t %3780 : !torch.tensor -> !torch.tensor | |
%3792 = torch.aten.detach %3791 : !torch.tensor -> !torch.tensor | |
%3793 = torch.aten.detach %3792 : !torch.tensor -> !torch.tensor | |
%3794 = torch.aten.sub.Tensor %12, %result1, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%3795 = torch.aten.mul.Tensor %3794, %result2 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3796 = torch.aten.mul.Tensor %3790, %arg2 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3797 = torch.aten.mul.Scalar %3796, %int384 : !torch.tensor, !torch.int -> !torch.tensor | |
%3798 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> | |
%3799 = torch.aten.sum.dim_IntList %3796, %3798, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%3800 = torch.aten.mul.Tensor %3796, %3795 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3801 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> | |
%3802 = torch.aten.sum.dim_IntList %3800, %3801, %true_0, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%3803 = torch.aten.mul.Tensor %3795, %3802 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3804 = torch.aten.sub.Tensor %3797, %3799, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%3805 = torch.aten.sub.Tensor %3804, %3803, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%3806 = torch.aten.div.Scalar %result2, %int384 : !torch.tensor, !torch.int -> !torch.tensor | |
%3807 = torch.aten.mul.Tensor %3806, %3805 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3808 = torch.aten.mul.Tensor %3790, %3795 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3809 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3810 = torch.aten.sum.dim_IntList %3808, %3809, %false, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%3811 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3812 = torch.aten.sum.dim_IntList %3790, %3811, %false, %none_1 : !torch.tensor, !torch.list<int>, !torch.bool, !torch.none -> !torch.tensor | |
%3813 = torch.aten.detach %3810 : !torch.tensor -> !torch.tensor | |
%3814 = torch.aten.detach %3813 : !torch.tensor -> !torch.tensor | |
%3815 = torch.aten.detach %3812 : !torch.tensor -> !torch.tensor | |
%3816 = torch.aten.detach %3815 : !torch.tensor -> !torch.tensor | |
%3817 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3818 = torch.aten.view %3807, %3817 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%3819 = torch.prim.ListConstruct %int512, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3820 = torch.aten.new_zeros %3807, %3819, %int6, %int0, %cpu, %false : !torch.tensor, !torch.list<int>, !torch.int, !torch.int, !torch.Device, !torch.bool -> !torch.tensor | |
%3821 = torch.prim.ListConstruct %int128 : (!torch.int) -> !torch.list<int> | |
%3822 = torch.aten.view %7, %3821 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%3823 = torch.aten.ne.Scalar %3822, %int-1 : !torch.tensor, !torch.int -> !torch.tensor | |
%3824 = torch.aten.unsqueeze %3823, %int1 : !torch.tensor, !torch.int -> !torch.tensor | |
%3825 = torch.aten.expand_as %3824, %3818 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3826 = torch.aten.full_like %3818, %int0, %int6, %int0, %cpu, %false, %none_1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.Device, !torch.bool, !torch.none -> !torch.tensor | |
%3827 = torch.aten.where.self %3825, %3818, %3826 : !torch.tensor, !torch.tensor, !torch.tensor -> !torch.tensor | |
%3828 = torch.prim.ListConstruct %3822 : (!torch.tensor) -> !torch.list<tensor> | |
%3829 = torch.aten.index_put.hacked_twin %3820, %3828, %3827, %true_0 : !torch.tensor, !torch.list<tensor>, !torch.tensor, !torch.bool -> !torch.tensor | |
%3830 = torch.aten.detach %3829 : !torch.tensor -> !torch.tensor | |
%3831 = torch.aten.detach %3830 : !torch.tensor -> !torch.tensor | |
%3832 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3833 = torch.aten.view %3807, %3832 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%3834 = torch.prim.ListConstruct %int2, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3835 = torch.aten.new_zeros %3807, %3834, %int6, %int0, %cpu, %false : !torch.tensor, !torch.list<int>, !torch.int, !torch.int, !torch.Device, !torch.bool -> !torch.tensor | |
%3836 = torch.prim.ListConstruct %int128 : (!torch.int) -> !torch.list<int> | |
%3837 = torch.aten.view %5, %3836 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%3838 = torch.aten.ne.Scalar %3837, %int-1 : !torch.tensor, !torch.int -> !torch.tensor | |
%3839 = torch.aten.unsqueeze %3838, %int1 : !torch.tensor, !torch.int -> !torch.tensor | |
%3840 = torch.aten.expand_as %3839, %3833 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3841 = torch.aten.full_like %3833, %int0, %int6, %int0, %cpu, %false, %none_1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.Device, !torch.bool, !torch.none -> !torch.tensor | |
%3842 = torch.aten.where.self %3840, %3833, %3841 : !torch.tensor, !torch.tensor, !torch.tensor -> !torch.tensor | |
%3843 = torch.prim.ListConstruct %3837 : (!torch.tensor) -> !torch.list<tensor> | |
%3844 = torch.aten.index_put.hacked_twin %3835, %3843, %3842, %true_0 : !torch.tensor, !torch.list<tensor>, !torch.tensor, !torch.bool -> !torch.tensor | |
%3845 = torch.aten.detach %3844 : !torch.tensor -> !torch.tensor | |
%3846 = torch.aten.detach %3845 : !torch.tensor -> !torch.tensor | |
%3847 = torch.prim.ListConstruct %int128, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3848 = torch.aten.view %3807, %3847 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%3849 = torch.prim.ListConstruct %int30522, %int384 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3850 = torch.aten.new_zeros %3807, %3849, %int6, %int0, %cpu, %false : !torch.tensor, !torch.list<int>, !torch.int, !torch.int, !torch.Device, !torch.bool -> !torch.tensor | |
%3851 = torch.prim.ListConstruct %int128 : (!torch.int) -> !torch.list<int> | |
%3852 = torch.aten.view %arg204, %3851 : !torch.tensor, !torch.list<int> -> !torch.tensor | |
%3853 = torch.aten.ne.Scalar %3852, %int0 : !torch.tensor, !torch.int -> !torch.tensor | |
%3854 = torch.aten.unsqueeze %3853, %int1 : !torch.tensor, !torch.int -> !torch.tensor | |
%3855 = torch.aten.expand_as %3854, %3848 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%3856 = torch.aten.full_like %3848, %int0, %int6, %int0, %cpu, %false, %none_1 : !torch.tensor, !torch.int, !torch.int, !torch.int, !torch.Device, !torch.bool, !torch.none -> !torch.tensor | |
%3857 = torch.aten.where.self %3855, %3848, %3856 : !torch.tensor, !torch.tensor, !torch.tensor -> !torch.tensor | |
%3858 = torch.prim.ListConstruct %3852 : (!torch.tensor) -> !torch.list<tensor> | |
%3859 = torch.aten.index_put.hacked_twin %3850, %3858, %3857, %true_0 : !torch.tensor, !torch.list<tensor>, !torch.tensor, !torch.bool -> !torch.tensor | |
%3860 = torch.aten.detach %3859 : !torch.tensor -> !torch.tensor | |
%3861 = torch.aten.detach %3860 : !torch.tensor -> !torch.tensor | |
%3862 = torch.aten.add_.Tensor %arg1, %3816, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3863 = torch.aten.add_.Tensor %arg2, %3814, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3864 = torch.aten.add_.Tensor %arg3, %3831, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3865 = torch.aten.add_.Tensor %arg4, %3846, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3866 = torch.aten.add_.Tensor %arg5, %3861, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3867 = torch.aten.add_.Tensor %arg6, %3660, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3868 = torch.aten.add_.Tensor %arg7, %3658, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3869 = torch.aten.add_.Tensor %arg8, %3674, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3870 = torch.aten.add_.Tensor %arg9, %3680, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3871 = torch.aten.add_.Tensor %arg10, %3765, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3872 = torch.aten.add_.Tensor %arg11, %3772, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3873 = torch.aten.add_.Tensor %arg12, %3786, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3874 = torch.aten.add_.Tensor %arg13, %3793, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3875 = torch.aten.add_.Tensor %arg14, %3739, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3876 = torch.aten.add_.Tensor %arg15, %3746, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3877 = torch.aten.add_.Tensor %arg16, %3630, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3878 = torch.aten.add_.Tensor %arg17, %3637, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3879 = torch.aten.add_.Tensor %arg18, %3595, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3880 = torch.aten.add_.Tensor %arg19, %3593, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3881 = torch.aten.add_.Tensor %arg20, %3609, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3882 = torch.aten.add_.Tensor %arg21, %3615, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3883 = torch.aten.add_.Tensor %arg22, %3439, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3884 = torch.aten.add_.Tensor %arg23, %3437, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3885 = torch.aten.add_.Tensor %arg24, %3453, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3886 = torch.aten.add_.Tensor %arg25, %3459, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3887 = torch.aten.add_.Tensor %arg26, %3544, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3888 = torch.aten.add_.Tensor %arg27, %3551, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3889 = torch.aten.add_.Tensor %arg28, %3565, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3890 = torch.aten.add_.Tensor %arg29, %3572, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3891 = torch.aten.add_.Tensor %arg30, %3518, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3892 = torch.aten.add_.Tensor %arg31, %3525, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3893 = torch.aten.add_.Tensor %arg32, %3409, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3894 = torch.aten.add_.Tensor %arg33, %3416, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3895 = torch.aten.add_.Tensor %arg34, %3374, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3896 = torch.aten.add_.Tensor %arg35, %3372, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3897 = torch.aten.add_.Tensor %arg36, %3388, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3898 = torch.aten.add_.Tensor %arg37, %3394, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3899 = torch.aten.add_.Tensor %arg38, %1450, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3900 = torch.aten.add_.Tensor %arg39, %1448, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3901 = torch.aten.add_.Tensor %arg40, %1464, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3902 = torch.aten.add_.Tensor %arg41, %1470, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3903 = torch.aten.add_.Tensor %arg42, %1555, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3904 = torch.aten.add_.Tensor %arg43, %1562, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3905 = torch.aten.add_.Tensor %arg44, %1576, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3906 = torch.aten.add_.Tensor %arg45, %1583, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3907 = torch.aten.add_.Tensor %arg46, %1529, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3908 = torch.aten.add_.Tensor %arg47, %1536, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3909 = torch.aten.add_.Tensor %arg48, %1420, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3910 = torch.aten.add_.Tensor %arg49, %1427, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3911 = torch.aten.add_.Tensor %arg50, %1385, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3912 = torch.aten.add_.Tensor %arg51, %1383, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3913 = torch.aten.add_.Tensor %arg52, %1399, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3914 = torch.aten.add_.Tensor %arg53, %1405, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3915 = torch.aten.add_.Tensor %arg54, %1229, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3916 = torch.aten.add_.Tensor %arg55, %1227, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3917 = torch.aten.add_.Tensor %arg56, %1243, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3918 = torch.aten.add_.Tensor %arg57, %1249, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3919 = torch.aten.add_.Tensor %arg58, %1334, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3920 = torch.aten.add_.Tensor %arg59, %1341, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3921 = torch.aten.add_.Tensor %arg60, %1355, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3922 = torch.aten.add_.Tensor %arg61, %1362, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3923 = torch.aten.add_.Tensor %arg62, %1308, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3924 = torch.aten.add_.Tensor %arg63, %1315, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3925 = torch.aten.add_.Tensor %arg64, %1199, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3926 = torch.aten.add_.Tensor %arg65, %1206, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3927 = torch.aten.add_.Tensor %arg66, %1164, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3928 = torch.aten.add_.Tensor %arg67, %1162, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3929 = torch.aten.add_.Tensor %arg68, %1178, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3930 = torch.aten.add_.Tensor %arg69, %1184, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3931 = torch.aten.add_.Tensor %arg70, %3218, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3932 = torch.aten.add_.Tensor %arg71, %3216, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3933 = torch.aten.add_.Tensor %arg72, %3232, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3934 = torch.aten.add_.Tensor %arg73, %3238, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3935 = torch.aten.add_.Tensor %arg74, %3323, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3936 = torch.aten.add_.Tensor %arg75, %3330, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3937 = torch.aten.add_.Tensor %arg76, %3344, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3938 = torch.aten.add_.Tensor %arg77, %3351, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3939 = torch.aten.add_.Tensor %arg78, %3297, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3940 = torch.aten.add_.Tensor %arg79, %3304, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3941 = torch.aten.add_.Tensor %arg80, %3188, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3942 = torch.aten.add_.Tensor %arg81, %3195, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3943 = torch.aten.add_.Tensor %arg82, %3153, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3944 = torch.aten.add_.Tensor %arg83, %3151, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3945 = torch.aten.add_.Tensor %arg84, %3167, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3946 = torch.aten.add_.Tensor %arg85, %3173, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3947 = torch.aten.add_.Tensor %arg86, %2997, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3948 = torch.aten.add_.Tensor %arg87, %2995, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3949 = torch.aten.add_.Tensor %arg88, %3011, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3950 = torch.aten.add_.Tensor %arg89, %3017, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3951 = torch.aten.add_.Tensor %arg90, %3102, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3952 = torch.aten.add_.Tensor %arg91, %3109, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3953 = torch.aten.add_.Tensor %arg92, %3123, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3954 = torch.aten.add_.Tensor %arg93, %3130, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3955 = torch.aten.add_.Tensor %arg94, %3076, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3956 = torch.aten.add_.Tensor %arg95, %3083, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3957 = torch.aten.add_.Tensor %arg96, %2967, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3958 = torch.aten.add_.Tensor %arg97, %2974, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3959 = torch.aten.add_.Tensor %arg98, %2932, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3960 = torch.aten.add_.Tensor %arg99, %2930, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3961 = torch.aten.add_.Tensor %arg100, %2946, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3962 = torch.aten.add_.Tensor %arg101, %2952, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3963 = torch.aten.add_.Tensor %arg102, %2776, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3964 = torch.aten.add_.Tensor %arg103, %2774, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3965 = torch.aten.add_.Tensor %arg104, %2790, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3966 = torch.aten.add_.Tensor %arg105, %2796, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3967 = torch.aten.add_.Tensor %arg106, %2881, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3968 = torch.aten.add_.Tensor %arg107, %2888, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3969 = torch.aten.add_.Tensor %arg108, %2902, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3970 = torch.aten.add_.Tensor %arg109, %2909, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3971 = torch.aten.add_.Tensor %arg110, %2855, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3972 = torch.aten.add_.Tensor %arg111, %2862, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3973 = torch.aten.add_.Tensor %arg112, %2746, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3974 = torch.aten.add_.Tensor %arg113, %2753, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3975 = torch.aten.add_.Tensor %arg114, %2711, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3976 = torch.aten.add_.Tensor %arg115, %2709, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3977 = torch.aten.add_.Tensor %arg116, %2725, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3978 = torch.aten.add_.Tensor %arg117, %2731, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3979 = torch.aten.add_.Tensor %arg118, %2555, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3980 = torch.aten.add_.Tensor %arg119, %2553, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3981 = torch.aten.add_.Tensor %arg120, %2569, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3982 = torch.aten.add_.Tensor %arg121, %2575, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3983 = torch.aten.add_.Tensor %arg122, %2660, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3984 = torch.aten.add_.Tensor %arg123, %2667, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3985 = torch.aten.add_.Tensor %arg124, %2681, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3986 = torch.aten.add_.Tensor %arg125, %2688, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3987 = torch.aten.add_.Tensor %arg126, %2634, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3988 = torch.aten.add_.Tensor %arg127, %2641, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3989 = torch.aten.add_.Tensor %arg128, %2525, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3990 = torch.aten.add_.Tensor %arg129, %2532, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3991 = torch.aten.add_.Tensor %arg130, %2490, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3992 = torch.aten.add_.Tensor %arg131, %2488, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3993 = torch.aten.add_.Tensor %arg132, %2504, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3994 = torch.aten.add_.Tensor %arg133, %2510, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3995 = torch.aten.add_.Tensor %arg134, %2334, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3996 = torch.aten.add_.Tensor %arg135, %2332, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3997 = torch.aten.add_.Tensor %arg136, %2348, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3998 = torch.aten.add_.Tensor %arg137, %2354, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%3999 = torch.aten.add_.Tensor %arg138, %2439, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4000 = torch.aten.add_.Tensor %arg139, %2446, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4001 = torch.aten.add_.Tensor %arg140, %2460, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4002 = torch.aten.add_.Tensor %arg141, %2467, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4003 = torch.aten.add_.Tensor %arg142, %2413, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4004 = torch.aten.add_.Tensor %arg143, %2420, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4005 = torch.aten.add_.Tensor %arg144, %2304, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4006 = torch.aten.add_.Tensor %arg145, %2311, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4007 = torch.aten.add_.Tensor %arg146, %2269, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4008 = torch.aten.add_.Tensor %arg147, %2267, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4009 = torch.aten.add_.Tensor %arg148, %2283, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4010 = torch.aten.add_.Tensor %arg149, %2289, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4011 = torch.aten.add_.Tensor %arg150, %2113, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4012 = torch.aten.add_.Tensor %arg151, %2111, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4013 = torch.aten.add_.Tensor %arg152, %2127, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4014 = torch.aten.add_.Tensor %arg153, %2133, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4015 = torch.aten.add_.Tensor %arg154, %2218, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4016 = torch.aten.add_.Tensor %arg155, %2225, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4017 = torch.aten.add_.Tensor %arg156, %2239, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4018 = torch.aten.add_.Tensor %arg157, %2246, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4019 = torch.aten.add_.Tensor %arg158, %2192, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4020 = torch.aten.add_.Tensor %arg159, %2199, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4021 = torch.aten.add_.Tensor %arg160, %2083, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4022 = torch.aten.add_.Tensor %arg161, %2090, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4023 = torch.aten.add_.Tensor %arg162, %2048, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4024 = torch.aten.add_.Tensor %arg163, %2046, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4025 = torch.aten.add_.Tensor %arg164, %2062, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4026 = torch.aten.add_.Tensor %arg165, %2068, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4027 = torch.aten.add_.Tensor %arg166, %1892, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4028 = torch.aten.add_.Tensor %arg167, %1890, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4029 = torch.aten.add_.Tensor %arg168, %1906, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4030 = torch.aten.add_.Tensor %arg169, %1912, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4031 = torch.aten.add_.Tensor %arg170, %1997, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4032 = torch.aten.add_.Tensor %arg171, %2004, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4033 = torch.aten.add_.Tensor %arg172, %2018, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4034 = torch.aten.add_.Tensor %arg173, %2025, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4035 = torch.aten.add_.Tensor %arg174, %1971, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4036 = torch.aten.add_.Tensor %arg175, %1978, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4037 = torch.aten.add_.Tensor %arg176, %1862, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4038 = torch.aten.add_.Tensor %arg177, %1869, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4039 = torch.aten.add_.Tensor %arg178, %1827, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4040 = torch.aten.add_.Tensor %arg179, %1825, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4041 = torch.aten.add_.Tensor %arg180, %1841, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4042 = torch.aten.add_.Tensor %arg181, %1847, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4043 = torch.aten.add_.Tensor %arg182, %1671, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4044 = torch.aten.add_.Tensor %arg183, %1669, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4045 = torch.aten.add_.Tensor %arg184, %1685, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4046 = torch.aten.add_.Tensor %arg185, %1691, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4047 = torch.aten.add_.Tensor %arg186, %1776, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4048 = torch.aten.add_.Tensor %arg187, %1783, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4049 = torch.aten.add_.Tensor %arg188, %1797, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4050 = torch.aten.add_.Tensor %arg189, %1804, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4051 = torch.aten.add_.Tensor %arg190, %1750, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4052 = torch.aten.add_.Tensor %arg191, %1757, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4053 = torch.aten.add_.Tensor %arg192, %1641, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4054 = torch.aten.add_.Tensor %arg193, %1648, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4055 = torch.aten.add_.Tensor %arg194, %1606, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4056 = torch.aten.add_.Tensor %arg195, %1604, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4057 = torch.aten.add_.Tensor %arg196, %1620, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4058 = torch.aten.add_.Tensor %arg197, %1626, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4059 = torch.aten.add_.Tensor %arg198, %1132, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4060 = torch.aten.add_.Tensor %arg199, %1135, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4061 = torch.aten.add_.Tensor %arg200, %1116, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4062 = torch.aten.add_.Tensor %arg201, %1119, %float-1.000000e-02 : !torch.tensor, !torch.tensor, !torch.float -> !torch.tensor | |
%4063 = torch.prim.TupleConstruct %3862, %3863, %3864, %3865, %3866, %3867, %3868, %3869, %3870, %3871, %3872, %3873, %3874, %3875, %3876, %3877, %3878, %3879, %3880, %3881, %3882, %3883, %3884, %3885, %3886, %3887, %3888, %3889, %3890, %3891, %3892, %3893, %3894, %3895, %3896, %3897, %3898, %3899, %3900, %3901, %3902, %3903, %3904, %3905, %3906, %3907, %3908, %3909, %3910, %3911, %3912, %3913, %3914, %3915, %3916, %3917, %3918, %3919, %3920, %3921, %3922, %3923, %3924, %3925, %3926, %3927, %3928, %3929, %3930, %3931, %3932, %3933, %3934, %3935, %3936, %3937, %3938, %3939, %3940, %3941, %3942, %3943, %3944, %3945, %3946, %3947, %3948, %3949, %3950, %3951, %3952, %3953, %3954, %3955, %3956, %3957, %3958, %3959, %3960, %3961, %3962, %3963, %3964, %3965, %3966, %3967, %3968, %3969, %3970, %3971, %3972, %3973, %3974, %3975, %3976, %3977, %3978, %3979, %3980, %3981, %3982, %3983, %3984, %3985, %3986, %3987, %3988, %3989, %3990, %3991, %3992, %3993, %3994, %3995, %3996, %3997, %3998, %3999, %4000, %4001, %4002, %4003, %4004, %4005, %4006, %4007, %4008, %4009, %4010, %4011, %4012, %4013, %4014, %4015, %4016, %4017, %4018, %4019, %4020, %4021, %4022, %4023, %4024, %4025, %4026, %4027, %4028, %4029, %4030, %4031, %4032, %4033, %4034, %4035, %4036, %4037, %4038, %4039, %4040, %4041, %4042, %4043, %4044, %4045, %4046, %4047, %4048, %4049, %4050, %4051, %4052, %4053, %4054, %4055, %4056, %4057, %4058, %4059, %4060, %4061, %4062, %arg202, %arg203 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor -> !torch.tuple<tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor> | |
return %4063 : !torch.tuple<tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor, tensor> | |
} | |
torch.class_type @__torch__.torch.fx.graph_module.forward { | |
torch.attr private "_tensor_constant0" : !torch.tensor | |
torch.attr private "training" : !torch.bool | |
torch.attr private "_is_full_backward_hook" : !torch.optional<bool> | |
torch.attr private "_code" : !torch.str | |
torch.method private "__code_getter", @__torch__.torch.fx.graph_module.forward.__code_getter | |
torch.method "forward", @__torch__.torch.fx.graph_module.forward.forward | |
} | |
%0 = torch.tensor.literal(dense<-0.000000e+00> : tensor<1x1x1x128xf32>) : !torch.tensor<[1,1,1,128],f32> | |
%true = torch.constant.bool true | |
%none = torch.constant.none | |
%str = torch.constant.str "\0A\0A\0Adef forward(self, params_1, params_2, params_3, params_4, params_5, params_6, params_7, params_8, params_9, params_10, params_11, params_12, params_13, params_14, params_15, params_16, params_17, params_18, params_19, params_20, params_21, params_22, params_23, params_24, params_25, params_26, params_27, params_28, params_29, params_30, params_31, params_32, params_33, params_34, params_35, params_36, params_37, params_38, params_39, params_40, params_41, params_42, params_43, params_44, params_45, params_46, params_47, params_48, params_49, params_50, params_51, params_52, params_53, params_54, params_55, params_56, params_57, params_58, params_59, params_60, params_61, params_62, params_63, params_64, params_65, params_66, params_67, params_68, params_69, params_70, params_71, params_72, params_73, params_74, params_75, params_76, params_77, params_78, params_79, params_80, params_81, params_82, params_83, params_84, params_85, params_86, params_87, params_88, params_89, params_90, params_91, params_92, params_93, params_94, params_95, params_96, params_97, params_98, params_99, params_100, params_101, params_102, params_103, params_104, params_105, params_106, params_107, params_108, params_109, params_110, params_111, params_112, params_113, params_114, params_115, params_116, params_117, params_118, params_119, params_120, params_121, params_122, params_123, params_124, params_125, params_126, params_127, params_128, params_129, params_130, params_131, params_132, params_133, params_134, params_135, params_136, params_137, params_138, params_139, params_140, params_141, params_142, params_143, params_144, params_145, params_146, params_147, params_148, params_149, params_150, params_151, params_152, params_153, params_154, params_155, params_156, params_157, params_158, params_159, params_160, params_161, params_162, params_163, params_164, params_165, params_166, params_167, params_168, params_169, params_170, params_171, params_172, params_173, params_174, params_175, params_176, params_177, params_178, params_179, params_180, params_181, params_182, params_183, params_184, params_185, params_186, params_187, params_188, params_189, params_190, params_191, params_192, params_193, params_194, params_195, params_196, params_197, params_198, params_199, params_200, params_201, buffers_1, buffers_2, args_1):\0A slice_1 = torch.ops.aten.slice(buffers_2, 0, 0, 9223372036854775807)\0A slice_2 = torch.ops.aten.slice(slice_1, 1, 0, 128); slice_1 = None\0A expand = torch.ops.aten.expand(slice_2, [1, 128]); slice_2 = None\0A slice_3 = torch.ops.aten.slice(buffers_1, 0, 0, 9223372036854775807)\0A slice_4 = torch.ops.aten.slice(slice_3, 1, 0, 128); slice_3 = None\0A embedding = torch.ops.aten.embedding(params_5, args_1, 0)\0A embedding_1 = torch.ops.aten.embedding(params_4, expand)\0A add = torch.ops.aten.add(embedding, embedding_1); embedding = embedding_1 = None\0A embedding_2 = torch.ops.aten.embedding(params_3, slice_4)\0A add_ = torch.ops.aten.add_(add, embedding_2); add = embedding_2 = None\0A native_layer_norm = torch.ops.aten.native_layer_norm(add_, [384], params_2, params_1, 1e-12)\0A getitem = native_layer_norm[0]\0A getitem_1 = native_layer_norm[1]\0A getitem_2 = native_layer_norm[2]; native_layer_norm = None\0A t = torch.ops.aten.t(params_13)\0A view = torch.ops.aten.view(getitem, [128, 384])\0A addmm = torch.ops.aten.addmm(params_12, view, t)\0A view_1 = torch.ops.aten.view(addmm, [1, 128, 384]); addmm = None\0A t_1 = torch.ops.aten.t(params_11)\0A view_2 = torch.ops.aten.view(getitem, [128, 384])\0A addmm_1 = torch.ops.aten.addmm(params_10, view_2, t_1)\0A view_3 = torch.ops.aten.view(addmm_1, [1, 128, 384]); addmm_1 = None\0A view_4 = torch.ops.aten.view(view_3, [1, 128, 12, 32]); view_3 = None\0A permute = torch.ops.aten.permute(view_4, [0, 2, 1, 3]); view_4 = None\0A t_2 = torch.ops.aten.t(params_15)\0A view_5 = torch.ops.aten.view(getitem, [128, 384])\0A addmm_2 = torch.ops.aten.addmm(params_14, view_5, t_2)\0A view_6 = torch.ops.aten.view(addmm_2, [1, 128, 384]); addmm_2 = None\0A view_7 = torch.ops.aten.view(view_6, [1, 128, 12, 32]); view_6 = None\0A permute_1 = torch.ops.aten.permute(view_7, [0, 2, 1, 3]); view_7 = None\0A view_8 = torch.ops.aten.view(view_1, [1, 128, 12, 32]); view_1 = None\0A permute_2 = torch.ops.aten.permute(view_8, [0, 2, 1, 3]); view_8 = None\0A transpose = torch.ops.aten.transpose(permute, -1, -2); permute = None\0A expand_1 = torch.ops.aten.expand(permute_2, [1, 12, 128, 32]); permute_2 = None\0A _reshape_alias = torch.ops.aten._reshape_alias(expand_1, [12, 128, 32], [32, 384, 1]); expand_1 = None\0A expand_2 = torch.ops.aten.expand(transpose, [1, 12, 32, 128]); transpose = None\0A _reshape_alias_1 = torch.ops.aten._reshape_alias(expand_2, [12, 32, 128], [32, 1, 384]); expand_2 = None\0A bmm = torch.ops.aten.bmm(_reshape_alias, _reshape_alias_1)\0A _unsafe_view = torch.ops.aten._unsafe_view(bmm, [1, 12, 128, 128]); bmm = None\0A div = torch.ops.aten.div(_unsafe_view, 5.656854249492381); _unsafe_view = None\0A _tensor_constant0 = self._tensor_constant0\0A add_1 = torch.ops.aten.add(div, _tensor_constant0); div = _tensor_constant0 = None\0A _softmax = torch.ops.aten._softmax(add_1, -1, False); add_1 = None\0A detach = torch.ops.aten.detach(_softmax)\0A expand_3 = torch.ops.aten.expand(_softmax, [1, 12, 128, 128]); _softmax = None\0A _reshape_alias_2 = torch.ops.aten._reshape_alias(expand_3, [12, 128, 128], [16384, 128, 1]); expand_3 = None\0A expand_4 = torch.ops.aten.expand(permute_1, [1, 12, 128, 32]); permute_1 = None\0A _reshape_alias_3 = torch.ops.aten._reshape_alias(expand_4, [12, 128, 32], [32, 384, 1]); expand_4 = None\0A bmm_1 = torch.ops.aten.bmm(_reshape_alias_2, _reshape_alias_3)\0A _unsafe_view_1 = torch.ops.aten._unsafe_view(bmm_1, [1, 12, 128, 32]); bmm_1 = None\0A permute_3 = torch.ops.aten.permute(_unsafe_view_1, [0, 2, 1, 3]); _unsafe_view_1 = None\0A clone = torch.ops.aten.clone(permute_3, memory_format = 0); permute_3 = None\0A view_9 = torch.ops.aten.view(clone, [1, 128, 384]); clone = None\0A t_3 = torch.ops.aten.t(params_9)\0A view_10 = torch.ops.aten.view(view_9, [128, 384]); view_9 = None\0A addmm_3 = torch.ops.aten.addmm(params_8, view_10, t_3)\0A view_11 = torch.ops.aten.view(addmm_3, [1, 128, 384]); addmm_3 = None\0A add_2 = torch.ops.aten.add(view_11, getitem); view_11 = getitem = None\0A native_layer_norm_1 = torch.ops.aten.native_layer_norm(add_2, [384], params_7, params_6, 1e-12)\0A getitem_3 = native_layer_norm_1[0]\0A getitem_4 = native_layer_norm_1[1]\0A getitem_5 = native_layer_norm_1[2]; native_layer_norm_1 = None\0A t_4 = torch.ops.aten.t(params_17)\0A view_12 = torch.ops.aten.view(getitem_3, [128, 384])\0A addmm_4 = torch.ops.aten.addmm(params_16, view_12, t_4)\0A view_13 = torch.ops.aten.view(addmm_4, [1, 128, 1536]); addmm_4 = None\0A gelu = torch.ops.aten.gelu(view_13)\0A t_5 = torch.ops.aten.t(params_21)\0A view_14 = torch.ops.aten.view(gelu, [128, 1536]); gelu = None\0A addmm_5 = torch.ops.aten.addmm(params_20, view_14, t_5)\0A view_15 = torch.ops.aten.view(addmm_5, [1, 128, 384]); addmm_5 = None\0A add_3 = torch.ops.aten.add(view_15, getitem_3); view_15 = getitem_3 = None\0A native_layer_norm_2 = torch.ops.aten.native_layer_norm(add_3, [384], params_19, params_18, 1e-12)\0A getitem_6 = native_layer_norm_2[0]\0A getitem_7 = native_layer_norm_2[1]\0A getitem_8 = native_layer_norm_2[2]; native_layer_norm_2 = None\0A t_6 = torch.ops.aten.t(params_29)\0A view_16 = torch.ops.aten.view(getitem_6, [128, 384])\0A addmm_6 = torch.ops.aten.addmm(params_28, view_16, t_6)\0A view_17 = torch.ops.aten.view(addmm_6, [1, 128, 384]); addmm_6 = None\0A t_7 = torch.ops.aten.t(params_27)\0A view_18 = torch.ops.aten.view(getitem_6, [128, 384])\0A addmm_7 = torch.ops.aten.addmm(params_26, view_18, t_7)\0A view_19 = torch.ops.aten.view(addmm_7, [1, 128, 384]); addmm_7 = None\0A view_20 = torch.ops.aten.view(view_19, [1, 128, 12, 32]); view_19 = None\0A permute_4 = torch.ops.aten.permute(view_20, [0, 2, 1, 3]); view_20 = None\0A t_8 = torch.ops.aten.t(params_31)\0A view_21 = torch.ops.aten.view(getitem_6, [128, 384])\0A addmm_8 = torch.ops.aten.addmm(params_30, view_21, t_8)\0A view_22 = torch.ops.aten.view(addmm_8, [1, 128, 384]); addmm_8 = None\0A view_23 = torch.ops.aten.view(view_22, [1, 128, 12, 32]); view_22 = None\0A permute_5 = torch.ops.aten.permute(view_23, [0, 2, 1, 3]); view_23 = None\0A view_24 = torch.ops.aten.view(view_17, [1, 128, 12, 32]); view_17 = None\0A permute_6 = torch.ops.aten.permute(view_24, [0, 2, 1, 3]); view_24 = None\0A transpose_1 = torch.ops.aten.transpose(permute_4, -1, -2); permute_4 = None\0A expand_5 = torch.ops.aten.expand(permute_6, [1, 12, 128, 32]); permute_6 = None\0A _reshape_alias_4 = torch.ops.aten._reshape_alias(expand_5, [12, 128, 32], [32, 384, 1]); expand_5 = None\0A expand_6 = torch.ops.aten.expand(transpose_1, [1, 12, 32, 128]); transpose_1 = None\0A _reshape_alias_5 = torch.ops.aten._reshape_alias(expand_6, [12, 32, 128], [32, 1, 384]); expand_6 = None\0A bmm_2 = torch.ops.aten.bmm(_reshape_alias_4, _reshape_alias_5)\0A _unsafe_view_2 = torch.ops.aten._unsafe_view(bmm_2, [1, 12, 128, 128]); bmm_2 = None\0A div_1 = torch.ops.aten.div(_unsafe_view_2, 5.656854249492381); _unsafe_view_2 = None\0A _tensor_constant0_1 = self._tensor_constant0\0A add_4 = torch.ops.aten.add(div_1, _tensor_constant0_1); div_1 = _tensor_constant0_1 = None\0A _softmax_1 = torch.ops.aten._softmax(add_4, -1, False); add_4 = None\0A detach_1 = torch.ops.aten.detach(_softmax_1)\0A expand_7 = torch.ops.aten.expand(_softmax_1, [1, 12, 128, 128]); _softmax_1 = None\0A _reshape_alias_6 = torch.ops.aten._reshape_alias(expand_7, [12, 128, 128], [16384, 128, 1]); expand_7 = None\0A expand_8 = torch.ops.aten.expand(permute_5, [1, 12, 128, 32]); permute_5 = None\0A _reshape_alias_7 = torch.ops.aten._reshape_alias(expand_8, [12, 128, 32], [32, 384, 1]); expand_8 = None\0A bmm_3 = torch.ops.aten.bmm(_reshape_alias_6, _reshape_alias_7)\0A _unsafe_view_3 = torch.ops.aten._unsafe_view(bmm_3, [1, 12, 128, 32]); bmm_3 = None\0A permute_7 = torch.ops.aten.permute(_unsafe_view_3, [0, 2, 1, 3]); _unsafe_view_3 = None\0A clone_1 = torch.ops.aten.clone(permute_7, memory_format = 0); permute_7 = None\0A view_25 = torch.ops.aten.view(clone_1, [1, 128, 384]); clone_1 = None\0A t_9 = torch.ops.aten.t(params_25)\0A view_26 = torch.ops.aten.view(view_25, [128, 384]); view_25 = None\0A addmm_9 = torch.ops.aten.addmm(params_24, view_26, t_9)\0A view_27 = torch.ops.aten.view(addmm_9, [1, 128, 384]); addmm_9 = None\0A add_5 = torch.ops.aten.add(view_27, getitem_6); view_27 = getitem_6 = None\0A native_layer_norm_3 = torch.ops.aten.native_layer_norm(add_5, [384], params_23, params_22, 1e-12)\0A getitem_9 = native_layer_norm_3[0]\0A getitem_10 = native_layer_norm_3[1]\0A getitem_11 = native_layer_norm_3[2]; native_layer_norm_3 = None\0A t_10 = torch.ops.aten.t(params_33)\0A view_28 = torch.ops.aten.view(getitem_9, [128, 384])\0A addmm_10 = torch.ops.aten.addmm(params_32, view_28, t_10)\0A view_29 = torch.ops.aten.view(addmm_10, [1, 128, 1536]); addmm_10 = None\0A gelu_1 = torch.ops.aten.gelu(view_29)\0A t_11 = torch.ops.aten.t(params_37)\0A view_30 = torch.ops.aten.view(gelu_1, [128, 1536]); gelu_1 = None\0A addmm_11 = torch.ops.aten.addmm(params_36, view_30, t_11)\0A view_31 = torch.ops.aten.view(addmm_11, [1, 128, 384]); addmm_11 = None\0A add_6 = torch.ops.aten.add(view_31, getitem_9); view_31 = getitem_9 = None\0A native_layer_norm_4 = torch.ops.aten.native_layer_norm(add_6, [384], params_35, params_34, 1e-12)\0A getitem_12 = native_layer_norm_4[0]\0A getitem_13 = native_layer_norm_4[1]\0A getitem_14 = native_layer_norm_4[2]; native_layer_norm_4 = None\0A t_12 = torch.ops.aten.t(params_77)\0A view_32 = torch.ops.aten.view(getitem_12, [128, 384])\0A addmm_12 = torch.ops.aten.addmm(params_76, view_32, t_12)\0A view_33 = torch.ops.aten.view(addmm_12, [1, 128, 384]); addmm_12 = None\0A t_13 = torch.ops.aten.t(params_75)\0A view_34 = torch.ops.aten.view(getitem_12, [128, 384])\0A addmm_13 = torch.ops.aten.addmm(params_74, view_34, t_13)\0A view_35 = torch.ops.aten.view(addmm_13, [1, 128, 384]); addmm_13 = None\0A view_36 = torch.ops.aten.view(view_35, [1, 128, 12, 32]); view_35 = None\0A permute_8 = torch.ops.aten.permute(view_36, [0, 2, 1, 3]); view_36 = None\0A t_14 = torch.ops.aten.t(params_79)\0A view_37 = torch.ops.aten.view(getitem_12, [128, 384])\0A addmm_14 = torch.ops.aten.addmm(params_78, view_37, t_14)\0A view_38 = torch.ops.aten.view(addmm_14, [1, 128, 384]); addmm_14 = None\0A view_39 = torch.ops.aten.view(view_38, [1, 128, 12, 32]); view_38 = None\0A permute_9 = torch.ops.aten.permute(view_39, [0, 2, 1, 3]); view_39 = None\0A view_40 = torch.ops.aten.view(view_33, [1, 128, 12, 32]); view_33 = None\0A permute_10 = torch.ops.aten.permute(view_40, [0, 2, 1, 3]); view_40 = None\0A transpose_2 = torch.ops.aten.transpose(permute_8, -1, -2); permute_8 = None\0A expand_9 = torch.ops.aten.expand(permute_10, [1, 12, 128, 32]); permute_10 = None\0A _reshape_alias_8 = torch.ops.aten._reshape_alias(expand_9, [12, 128, 32], [32, 384, 1]); expand_9 = None\0A expand_10 = torch.ops.aten.expand(transpose_2, [1, 12, 32, 128]); transpose_2 = None\0A _reshape_alias_9 = torch.ops.aten._reshape_alias(expand_10, [12, 32, 128], [32, 1, 384]); expand_10 = None\0A bmm_4 = torch.ops.aten.bmm(_reshape_alias_8, _reshape_alias_9)\0A _unsafe_view_4 = torch.ops.aten._unsafe_view(bmm_4, [1, 12, 128, 128]); bmm_4 = None\0A div_2 = torch.ops.aten.div(_unsafe_view_4, 5.656854249492381); _unsafe_view_4 = None\0A _tensor_constant0_2 = self._tensor_constant0\0A add_7 = torch.ops.aten.add(div_2, _tensor_constant0_2); div_2 = _tensor_constant0_2 = None\0A _softmax_2 = torch.ops.aten._softmax(add_7, -1, False); add_7 = None\0A detach_2 = torch.ops.aten.detach(_softmax_2)\0A expand_11 = torch.ops.aten.expand(_softmax_2, [1, 12, 128, 128]); _softmax_2 = None\0A _reshape_alias_10 = torch.ops.aten._reshape_alias(expand_11, [12, 128, 128], [16384, 128, 1]); expand_11 = None\0A expand_12 = torch.ops.aten.expand(permute_9, [1, 12, 128, 32]); permute_9 = None\0A _reshape_alias_11 = torch.ops.aten._reshape_alias(expand_12, [12, 128, 32], [32, 384, 1]); expand_12 = None\0A bmm_5 = torch.ops.aten.bmm(_reshape_alias_10, _reshape_alias_11)\0A _unsafe_view_5 = torch.ops.aten._unsafe_view(bmm_5, [1, 12, 128, 32]); bmm_5 = None\0A permute_11 = torch.ops.aten.permute(_unsafe_view_5, [0, 2, 1, 3]); _unsafe_view_5 = None\0A clone_2 = torch.ops.aten.clone(permute_11, memory_format = 0); permute_11 = None\0A view_41 = torch.ops.aten.view(clone_2, [1, 128, 384]); clone_2 = None\0A t_15 = torch.ops.aten.t(params_73)\0A view_42 = torch.ops.aten.view(view_41, [128, 384]); view_41 = None\0A addmm_15 = torch.ops.aten.addmm(params_72, view_42, t_15)\0A view_43 = torch.ops.aten.view(addmm_15, [1, 128, 384]); addmm_15 = None\0A add_8 = torch.ops.aten.add(view_43, getitem_12); view_43 = getitem_12 = None\0A native_layer_norm_5 = torch.ops.aten.native_layer_norm(add_8, [384], params_71, params_70, 1e-12)\0A getitem_15 = native_layer_norm_5[0]\0A getitem_16 = native_layer_norm_5[1]\0A getitem_17 = native_layer_norm_5[2]; native_layer_norm_5 = None\0A t_16 = torch.ops.aten.t(params_81)\0A view_44 = torch.ops.aten.view(getitem_15, [128, 384])\0A addmm_16 = torch.ops.aten.addmm(params_80, view_44, t_16)\0A view_45 = torch.ops.aten.view(addmm_16, [1, 128, 1536]); addmm_16 = None\0A gelu_2 = torch.ops.aten.gelu(view_45)\0A t_17 = torch.ops.aten.t(params_85)\0A view_46 = torch.ops.aten.view(gelu_2, [128, 1536]); gelu_2 = None\0A addmm_17 = torch.ops.aten.addmm(params_84, view_46, t_17)\0A view_47 = torch.ops.aten.view(addmm_17, [1, 128, 384]); addmm_17 = None\0A add_9 = torch.ops.aten.add(view_47, getitem_15); view_47 = getitem_15 = None\0A native_layer_norm_6 = torch.ops.aten.native_layer_norm(add_9, [384], params_83, params_82, 1e-12)\0A getitem_18 = native_layer_norm_6[0]\0A getitem_19 = native_layer_norm_6[1]\0A getitem_20 = native_layer_norm_6[2]; native_layer_norm_6 = None\0A t_18 = torch.ops.aten.t(params_93)\0A view_48 = torch.ops.aten.view(getitem_18, [128, 384])\0A addmm_18 = torch.ops.aten.addmm(params_92, view_48, t_18)\0A view_49 = torch.ops.aten.view(addmm_18, [1, 128, 384]); addmm_18 = None\0A t_19 = torch.ops.aten.t(params_91)\0A view_50 = torch.ops.aten.view(getitem_18, [128, 384])\0A addmm_19 = torch.ops.aten.addmm(params_90, view_50, t_19)\0A view_51 = torch.ops.aten.view(addmm_19, [1, 128, 384]); addmm_19 = None\0A view_52 = torch.ops.aten.view(view_51, [1, 128, 12, 32]); view_51 = None\0A permute_12 = torch.ops.aten.permute(view_52, [0, 2, 1, 3]); view_52 = None\0A t_20 = torch.ops.aten.t(params_95)\0A view_53 = torch.ops.aten.view(getitem_18, [128, 384])\0A addmm_20 = torch.ops.aten.addmm(params_94, view_53, t_20)\0A view_54 = torch.ops.aten.view(addmm_20, [1, 128, 384]); addmm_20 = None\0A view_55 = torch.ops.aten.view(view_54, [1, 128, 12, 32]); view_54 = None\0A permute_13 = torch.ops.aten.permute(view_55, [0, 2, 1, 3]); view_55 = None\0A view_56 = torch.ops.aten.view(view_49, [1, 128, 12, 32]); view_49 = None\0A permute_14 = torch.ops.aten.permute(view_56, [0, 2, 1, 3]); view_56 = None\0A transpose_3 = torch.ops.aten.transpose(permute_12, -1, -2); permute_12 = None\0A expand_13 = torch.ops.aten.expand(permute_14, [1, 12, 128, 32]); permute_14 = None\0A _reshape_alias_12 = torch.ops.aten._reshape_alias(expand_13, [12, 128, 32], [32, 384, 1]); expand_13 = None\0A expand_14 = torch.ops.aten.expand(transpose_3, [1, 12, 32, 128]); transpose_3 = None\0A _reshape_alias_13 = torch.ops.aten._reshape_alias(expand_14, [12, 32, 128], [32, 1, 384]); expand_14 = None\0A bmm_6 = torch.ops.aten.bmm(_reshape_alias_12, _reshape_alias_13)\0A _unsafe_view_6 = torch.ops.aten._unsafe_view(bmm_6, [1, 12, 128, 128]); bmm_6 = None\0A div_3 = torch.ops.aten.div(_unsafe_view_6, 5.656854249492381); _unsafe_view_6 = None\0A _tensor_constant0_3 = self._tensor_constant0\0A add_10 = torch.ops.aten.add(div_3, _tensor_constant0_3); div_3 = _tensor_constant0_3 = None\0A _softmax_3 = torch.ops.aten._softmax(add_10, -1, False); add_10 = None\0A detach_3 = torch.ops.aten.detach(_softmax_3)\0A expand_15 = torch.ops.aten.expand(_softmax_3, [1, 12, 128, 128]); _softmax_3 = None\0A _reshape_alias_14 = torch.ops.aten._reshape_alias(expand_15, [12, 128, 128], [16384, 128, 1]); expand_15 = None\0A expand_16 = torch.ops.aten.expand(permute_13, [1, 12, 128, 32]); permute_13 = None\0A _reshape_alias_15 = torch.ops.aten._reshape_alias(expand_16, [12, 128, 32], [32, 384, 1]); expand_16 = None\0A bmm_7 = torch.ops.aten.bmm(_reshape_alias_14, _reshape_alias_15)\0A _unsafe_view_7 = torch.ops.aten._unsafe_view(bmm_7, [1, 12, 128, 32]); bmm_7 = None\0A permute_15 = torch.ops.aten.permute(_unsafe_view_7, [0, 2, 1, 3]); _unsafe_view_7 = None\0A clone_3 = torch.ops.aten.clone(permute_15, memory_format = 0); permute_15 = None\0A view_57 = torch.ops.aten.view(clone_3, [1, 128, 384]); clone_3 = None\0A t_21 = torch.ops.aten.t(params_89)\0A view_58 = torch.ops.aten.view(view_57, [128, 384]); view_57 = None\0A addmm_21 = torch.ops.aten.addmm(params_88, view_58, t_21)\0A view_59 = torch.ops.aten.view(addmm_21, [1, 128, 384]); addmm_21 = None\0A add_11 = torch.ops.aten.add(view_59, getitem_18); view_59 = getitem_18 = None\0A native_layer_norm_7 = torch.ops.aten.native_layer_norm(add_11, [384], params_87, params_86, 1e-12)\0A getitem_21 = native_layer_norm_7[0]\0A getitem_22 = native_layer_norm_7[1]\0A getitem_23 = native_layer_norm_7[2]; native_layer_norm_7 = None\0A t_22 = torch.ops.aten.t(params_97)\0A view_60 = torch.ops.aten.view(getitem_21, [128, 384])\0A addmm_22 = torch.ops.aten.addmm(params_96, view_60, t_22)\0A view_61 = torch.ops.aten.view(addmm_22, [1, 128, 1536]); addmm_22 = None\0A gelu_3 = torch.ops.aten.gelu(view_61)\0A t_23 = torch.ops.aten.t(params_101)\0A view_62 = torch.ops.aten.view(gelu_3, [128, 1536]); gelu_3 = None\0A addmm_23 = torch.ops.aten.addmm(params_100, view_62, t_23)\0A view_63 = torch.ops.aten.view(addmm_23, [1, 128, 384]); addmm_23 = None\0A add_12 = torch.ops.aten.add(view_63, getitem_21); view_63 = getitem_21 = None\0A native_layer_norm_8 = torch.ops.aten.native_layer_norm(add_12, [384], params_99, params_98, 1e-12)\0A getitem_24 = native_layer_norm_8[0]\0A getitem_25 = native_layer_norm_8[1]\0A getitem_26 = native_layer_norm_8[2]; native_layer_norm_8 = None\0A t_24 = torch.ops.aten.t(params_109)\0A view_64 = torch.ops.aten.view(getitem_24, [128, 384])\0A addmm_24 = torch.ops.aten.addmm(params_108, view_64, t_24)\0A view_65 = torch.ops.aten.view(addmm_24, [1, 128, 384]); addmm_24 = None\0A t_25 = torch.ops.aten.t(params_107)\0A view_66 = torch.ops.aten.view(getitem_24, [128, 384])\0A addmm_25 = torch.ops.aten.addmm(params_106, view_66, t_25)\0A view_67 = torch.ops.aten.view(addmm_25, [1, 128, 384]); addmm_25 = None\0A view_68 = torch.ops.aten.view(view_67, [1, 128, 12, 32]); view_67 = None\0A permute_16 = torch.ops.aten.permute(view_68, [0, 2, 1, 3]); view_68 = None\0A t_26 = torch.ops.aten.t(params_111)\0A view_69 = torch.ops.aten.view(getitem_24, [128, 384])\0A addmm_26 = torch.ops.aten.addmm(params_110, view_69, t_26)\0A view_70 = torch.ops.aten.view(addmm_26, [1, 128, 384]); addmm_26 = None\0A view_71 = torch.ops.aten.view(view_70, [1, 128, 12, 32]); view_70 = None\0A permute_17 = torch.ops.aten.permute(view_71, [0, 2, 1, 3]); view_71 = None\0A view_72 = torch.ops.aten.view(view_65, [1, 128, 12, 32]); view_65 = None\0A permute_18 = torch.ops.aten.permute(view_72, [0, 2, 1, 3]); view_72 = None\0A transpose_4 = torch.ops.aten.transpose(permute_16, -1, -2); permute_16 = None\0A expand_17 = torch.ops.aten.expand(permute_18, [1, 12, 128, 32]); permute_18 = None\0A _reshape_alias_16 = torch.ops.aten._reshape_alias(expand_17, [12, 128, 32], [32, 384, 1]); expand_17 = None\0A expand_18 = torch.ops.aten.expand(transpose_4, [1, 12, 32, 128]); transpose_4 = None\0A _reshape_alias_17 = torch.ops.aten._reshape_alias(expand_18, [12, 32, 128], [32, 1, 384]); expand_18 = None\0A bmm_8 = torch.ops.aten.bmm(_reshape_alias_16, _reshape_alias_17)\0A _unsafe_view_8 = torch.ops.aten._unsafe_view(bmm_8, [1, 12, 128, 128]); bmm_8 = None\0A div_4 = torch.ops.aten.div(_unsafe_view_8, 5.656854249492381); _unsafe_view_8 = None\0A _tensor_constant0_4 = self._tensor_constant0\0A add_13 = torch.ops.aten.add(div_4, _tensor_constant0_4); div_4 = _tensor_constant0_4 = None\0A _softmax_4 = torch.ops.aten._softmax(add_13, -1, False); add_13 = None\0A detach_4 = torch.ops.aten.detach(_softmax_4)\0A expand_19 = torch.ops.aten.expand(_softmax_4, [1, 12, 128, 128]); _softmax_4 = None\0A _reshape_alias_18 = torch.ops.aten._reshape_alias(expand_19, [12, 128, 128], [16384, 128, 1]); expand_19 = None\0A expand_20 = torch.ops.aten.expand(permute_17, [1, 12, 128, 32]); permute_17 = None\0A _reshape_alias_19 = torch.ops.aten._reshape_alias(expand_20, [12, 128, 32], [32, 384, 1]); expand_20 = None\0A bmm_9 = torch.ops.aten.bmm(_reshape_alias_18, _reshape_alias_19)\0A _unsafe_view_9 = torch.ops.aten._unsafe_view(bmm_9, [1, 12, 128, 32]); bmm_9 = None\0A permute_19 = torch.ops.aten.permute(_unsafe_view_9, [0, 2, 1, 3]); _unsafe_view_9 = None\0A clone_4 = torch.ops.aten.clone(permute_19, memory_format = 0); permute_19 = None\0A view_73 = torch.ops.aten.view(clone_4, [1, 128, 384]); clone_4 = None\0A t_27 = torch.ops.aten.t(params_105)\0A view_74 = torch.ops.aten.view(view_73, [128, 384]); view_73 = None\0A addmm_27 = torch.ops.aten.addmm(params_104, view_74, t_27)\0A view_75 = torch.ops.aten.view(addmm_27, [1, 128, 384]); addmm_27 = None\0A add_14 = torch.ops.aten.add(view_75, getitem_24); view_75 = getitem_24 = None\0A native_layer_norm_9 = torch.ops.aten.native_layer_norm(add_14, [384], params_103, params_102, 1e-12)\0A getitem_27 = native_layer_norm_9[0]\0A getitem_28 = native_layer_norm_9[1]\0A getitem_29 = native_layer_norm_9[2]; native_layer_norm_9 = None\0A t_28 = torch.ops.aten.t(params_113)\0A view_76 = torch.ops.aten.view(getitem_27, [128, 384])\0A addmm_28 = torch.ops.aten.addmm(params_112, view_76, t_28)\0A view_77 = torch.ops.aten.view(addmm_28, [1, 128, 1536]); addmm_28 = None\0A gelu_4 = torch.ops.aten.gelu(view_77)\0A t_29 = torch.ops.aten.t(params_117)\0A view_78 = torch.ops.aten.view(gelu_4, [128, 1536]); gelu_4 = None\0A addmm_29 = torch.ops.aten.addmm(params_116, view_78, t_29)\0A view_79 = torch.ops.aten.view(addmm_29, [1, 128, 384]); addmm_29 = None\0A add_15 = torch.ops.aten.add(view_79, getitem_27); view_79 = getitem_27 = None\0A native_layer_norm_10 = torch.ops.aten.native_layer_norm(add_15, [384], params_115, params_114, 1e-12)\0A getitem_30 = native_layer_norm_10[0]\0A getitem_31 = native_layer_norm_10[1]\0A getitem_32 = native_layer_norm_10[2]; native_layer_norm_10 = None\0A t_30 = torch.ops.aten.t(params_125)\0A view_80 = torch.ops.aten.view(getitem_30, [128, 384])\0A addmm_30 = torch.ops.aten.addmm(params_124, view_80, t_30)\0A view_81 = torch.ops.aten.view(addmm_30, [1, 128, 384]); addmm_30 = None\0A t_31 = torch.ops.aten.t(params_123)\0A view_82 = torch.ops.aten.view(getitem_30, [128, 384])\0A addmm_31 = torch.ops.aten.addmm(params_122, view_82, t_31)\0A view_83 = torch.ops.aten.view(addmm_31, [1, 128, 384]); addmm_31 = None\0A view_84 = torch.ops.aten.view(view_83, [1, 128, 12, 32]); view_83 = None\0A permute_20 = torch.ops.aten.permute(view_84, [0, 2, 1, 3]); view_84 = None\0A t_32 = torch.ops.aten.t(params_127)\0A view_85 = torch.ops.aten.view(getitem_30, [128, 384])\0A addmm_32 = torch.ops.aten.addmm(params_126, view_85, t_32)\0A view_86 = torch.ops.aten.view(addmm_32, [1, 128, 384]); addmm_32 = None\0A view_87 = torch.ops.aten.view(view_86, [1, 128, 12, 32]); view_86 = None\0A permute_21 = torch.ops.aten.permute(view_87, [0, 2, 1, 3]); view_87 = None\0A view_88 = torch.ops.aten.view(view_81, [1, 128, 12, 32]); view_81 = None\0A permute_22 = torch.ops.aten.permute(view_88, [0, 2, 1, 3]); view_88 = None\0A transpose_5 = torch.ops.aten.transpose(permute_20, -1, -2); permute_20 = None\0A expand_21 = torch.ops.aten.expand(permute_22, [1, 12, 128, 32]); permute_22 = None\0A _reshape_alias_20 = torch.ops.aten._reshape_alias(expand_21, [12, 128, 32], [32, 384, 1]); expand_21 = None\0A expand_22 = torch.ops.aten.expand(transpose_5, [1, 12, 32, 128]); transpose_5 = None\0A _reshape_alias_21 = torch.ops.aten._reshape_alias(expand_22, [12, 32, 128], [32, 1, 384]); expand_22 = None\0A bmm_10 = torch.ops.aten.bmm(_reshape_alias_20, _reshape_alias_21)\0A _unsafe_view_10 = torch.ops.aten._unsafe_view(bmm_10, [1, 12, 128, 128]); bmm_10 = None\0A div_5 = torch.ops.aten.div(_unsafe_view_10, 5.656854249492381); _unsafe_view_10 = None\0A _tensor_constant0_5 = self._tensor_constant0\0A add_16 = torch.ops.aten.add(div_5, _tensor_constant0_5); div_5 = _tensor_constant0_5 = None\0A _softmax_5 = torch.ops.aten._softmax(add_16, -1, False); add_16 = None\0A detach_5 = torch.ops.aten.detach(_softmax_5)\0A expand_23 = torch.ops.aten.expand(_softmax_5, [1, 12, 128, 128]); _softmax_5 = None\0A _reshape_alias_22 = torch.ops.aten._reshape_alias(expand_23, [12, 128, 128], [16384, 128, 1]); expand_23 = None\0A expand_24 = torch.ops.aten.expand(permute_21, [1, 12, 128, 32]); permute_21 = None\0A _reshape_alias_23 = torch.ops.aten._reshape_alias(expand_24, [12, 128, 32], [32, 384, 1]); expand_24 = None\0A bmm_11 = torch.ops.aten.bmm(_reshape_alias_22, _reshape_alias_23)\0A _unsafe_view_11 = torch.ops.aten._unsafe_view(bmm_11, [1, 12, 128, 32]); bmm_11 = None\0A permute_23 = torch.ops.aten.permute(_unsafe_view_11, [0, 2, 1, 3]); _unsafe_view_11 = None\0A clone_5 = torch.ops.aten.clone(permute_23, memory_format = 0); permute_23 = None\0A view_89 = torch.ops.aten.view(clone_5, [1, 128, 384]); clone_5 = None\0A t_33 = torch.ops.aten.t(params_121)\0A view_90 = torch.ops.aten.view(view_89, [128, 384]); view_89 = None\0A addmm_33 = torch.ops.aten.addmm(params_120, view_90, t_33)\0A view_91 = torch.ops.aten.view(addmm_33, [1, 128, 384]); addmm_33 = None\0A add_17 = torch.ops.aten.add(view_91, getitem_30); view_91 = getitem_30 = None\0A native_layer_norm_11 = torch.ops.aten.native_layer_norm(add_17, [384], params_119, params_118, 1e-12)\0A getitem_33 = native_layer_norm_11[0]\0A getitem_34 = native_layer_norm_11[1]\0A getitem_35 = native_layer_norm_11[2]; native_layer_norm_11 = None\0A t_34 = torch.ops.aten.t(params_129)\0A view_92 = torch.ops.aten.view(getitem_33, [128, 384])\0A addmm_34 = torch.ops.aten.addmm(params_128, view_92, t_34)\0A view_93 = torch.ops.aten.view(addmm_34, [1, 128, 1536]); addmm_34 = None\0A gelu_5 = torch.ops.aten.gelu(view_93)\0A t_35 = torch.ops.aten.t(params_133)\0A view_94 = torch.ops.aten.view(gelu_5, [128, 1536]); gelu_5 = None\0A addmm_35 = torch.ops.aten.addmm(params_132, view_94, t_35)\0A view_95 = torch.ops.aten.view(addmm_35, [1, 128, 384]); addmm_35 = None\0A add_18 = torch.ops.aten.add(view_95, getitem_33); view_95 = getitem_33 = None\0A native_layer_norm_12 = torch.ops.aten.native_layer_norm(add_18, [384], params_131, params_130, 1e-12)\0A getitem_36 = native_layer_norm_12[0]\0A getitem_37 = native_layer_norm_12[1]\0A getitem_38 = native_layer_norm_12[2]; native_layer_norm_12 = None\0A t_36 = torch.ops.aten.t(params_141)\0A view_96 = torch.ops.aten.view(getitem_36, [128, 384])\0A addmm_36 = torch.ops.aten.addmm(params_140, view_96, t_36)\0A view_97 = torch.ops.aten.view(addmm_36, [1, 128, 384]); addmm_36 = None\0A t_37 = torch.ops.aten.t(params_139)\0A view_98 = torch.ops.aten.view(getitem_36, [128, 384])\0A addmm_37 = torch.ops.aten.addmm(params_138, view_98, t_37)\0A view_99 = torch.ops.aten.view(addmm_37, [1, 128, 384]); addmm_37 = None\0A view_100 = torch.ops.aten.view(view_99, [1, 128, 12, 32]); view_99 = None\0A permute_24 = torch.ops.aten.permute(view_100, [0, 2, 1, 3]); view_100 = None\0A t_38 = torch.ops.aten.t(params_143)\0A view_101 = torch.ops.aten.view(getitem_36, [128, 384])\0A addmm_38 = torch.ops.aten.addmm(params_142, view_101, t_38)\0A view_102 = torch.ops.aten.view(addmm_38, [1, 128, 384]); addmm_38 = None\0A view_103 = torch.ops.aten.view(view_102, [1, 128, 12, 32]); view_102 = None\0A permute_25 = torch.ops.aten.permute(view_103, [0, 2, 1, 3]); view_103 = None\0A view_104 = torch.ops.aten.view(view_97, [1, 128, 12, 32]); view_97 = None\0A permute_26 = torch.ops.aten.permute(view_104, [0, 2, 1, 3]); view_104 = None\0A transpose_6 = torch.ops.aten.transpose(permute_24, -1, -2); permute_24 = None\0A expand_25 = torch.ops.aten.expand(permute_26, [1, 12, 128, 32]); permute_26 = None\0A _reshape_alias_24 = torch.ops.aten._reshape_alias(expand_25, [12, 128, 32], [32, 384, 1]); expand_25 = None\0A expand_26 = torch.ops.aten.expand(transpose_6, [1, 12, 32, 128]); transpose_6 = None\0A _reshape_alias_25 = torch.ops.aten._reshape_alias(expand_26, [12, 32, 128], [32, 1, 384]); expand_26 = None\0A bmm_12 = torch.ops.aten.bmm(_reshape_alias_24, _reshape_alias_25)\0A _unsafe_view_12 = torch.ops.aten._unsafe_view(bmm_12, [1, 12, 128, 128]); bmm_12 = None\0A div_6 = torch.ops.aten.div(_unsafe_view_12, 5.656854249492381); _unsafe_view_12 = None\0A _tensor_constant0_6 = self._tensor_constant0\0A add_19 = torch.ops.aten.add(div_6, _tensor_constant0_6); div_6 = _tensor_constant0_6 = None\0A _softmax_6 = torch.ops.aten._softmax(add_19, -1, False); add_19 = None\0A detach_6 = torch.ops.aten.detach(_softmax_6)\0A expand_27 = torch.ops.aten.expand(_softmax_6, [1, 12, 128, 128]); _softmax_6 = None\0A _reshape_alias_26 = torch.ops.aten._reshape_alias(expand_27, [12, 128, 128], [16384, 128, 1]); expand_27 = None\0A expand_28 = torch.ops.aten.expand(permute_25, [1, 12, 128, 32]); permute_25 = None\0A _reshape_alias_27 = torch.ops.aten._reshape_alias(expand_28, [12, 128, 32], [32, 384, 1]); expand_28 = None\0A bmm_13 = torch.ops.aten.bmm(_reshape_alias_26, _reshape_alias_27)\0A _unsafe_view_13 = torch.ops.aten._unsafe_view(bmm_13, [1, 12, 128, 32]); bmm_13 = None\0A permute_27 = torch.ops.aten.permute(_unsafe_view_13, [0, 2, 1, 3]); _unsafe_view_13 = None\0A clone_6 = torch.ops.aten.clone(permute_27, memory_format = 0); permute_27 = None\0A view_105 = torch.ops.aten.view(clone_6, [1, 128, 384]); clone_6 = None\0A t_39 = torch.ops.aten.t(params_137)\0A view_106 = torch.ops.aten.view(view_105, [128, 384]); view_105 = None\0A addmm_39 = torch.ops.aten.addmm(params_136, view_106, t_39)\0A view_107 = torch.ops.aten.view(addmm_39, [1, 128, 384]); addmm_39 = None\0A add_20 = torch.ops.aten.add(view_107, getitem_36); view_107 = getitem_36 = None\0A native_layer_norm_13 = torch.ops.aten.native_layer_norm(add_20, [384], params_135, params_134, 1e-12)\0A getitem_39 = native_layer_norm_13[0]\0A getitem_40 = native_layer_norm_13[1]\0A getitem_41 = native_layer_norm_13[2]; native_layer_norm_13 = None\0A t_40 = torch.ops.aten.t(params_145)\0A view_108 = torch.ops.aten.view(getitem_39, [128, 384])\0A addmm_40 = torch.ops.aten.addmm(params_144, view_108, t_40)\0A view_109 = torch.ops.aten.view(addmm_40, [1, 128, 1536]); addmm_40 = None\0A gelu_6 = torch.ops.aten.gelu(view_109)\0A t_41 = torch.ops.aten.t(params_149)\0A view_110 = torch.ops.aten.view(gelu_6, [128, 1536]); gelu_6 = None\0A addmm_41 = torch.ops.aten.addmm(params_148, view_110, t_41)\0A view_111 = torch.ops.aten.view(addmm_41, [1, 128, 384]); addmm_41 = None\0A add_21 = torch.ops.aten.add(view_111, getitem_39); view_111 = getitem_39 = None\0A native_layer_norm_14 = torch.ops.aten.native_layer_norm(add_21, [384], params_147, params_146, 1e-12)\0A getitem_42 = native_layer_norm_14[0]\0A getitem_43 = native_layer_norm_14[1]\0A getitem_44 = native_layer_norm_14[2]; native_layer_norm_14 = None\0A t_42 = torch.ops.aten.t(params_157)\0A view_112 = torch.ops.aten.view(getitem_42, [128, 384])\0A addmm_42 = torch.ops.aten.addmm(params_156, view_112, t_42)\0A view_113 = torch.ops.aten.view(addmm_42, [1, 128, 384]); addmm_42 = None\0A t_43 = torch.ops.aten.t(params_155)\0A view_114 = torch.ops.aten.view(getitem_42, [128, 384])\0A addmm_43 = torch.ops.aten.addmm(params_154, view_114, t_43)\0A view_115 = torch.ops.aten.view(addmm_43, [1, 128, 384]); addmm_43 = None\0A view_116 = torch.ops.aten.view(view_115, [1, 128, 12, 32]); view_115 = None\0A permute_28 = torch.ops.aten.permute(view_116, [0, 2, 1, 3]); view_116 = None\0A t_44 = torch.ops.aten.t(params_159)\0A view_117 = torch.ops.aten.view(getitem_42, [128, 384])\0A addmm_44 = torch.ops.aten.addmm(params_158, view_117, t_44)\0A view_118 = torch.ops.aten.view(addmm_44, [1, 128, 384]); addmm_44 = None\0A view_119 = torch.ops.aten.view(view_118, [1, 128, 12, 32]); view_118 = None\0A permute_29 = torch.ops.aten.permute(view_119, [0, 2, 1, 3]); view_119 = None\0A view_120 = torch.ops.aten.view(view_113, [1, 128, 12, 32]); view_113 = None\0A permute_30 = torch.ops.aten.permute(view_120, [0, 2, 1, 3]); view_120 = None\0A transpose_7 = torch.ops.aten.transpose(permute_28, -1, -2); permute_28 = None\0A expand_29 = torch.ops.aten.expand(permute_30, [1, 12, 128, 32]); permute_30 = None\0A _reshape_alias_28 = torch.ops.aten._reshape_alias(expand_29, [12, 128, 32], [32, 384, 1]); expand_29 = None\0A expand_30 = torch.ops.aten.expand(transpose_7, [1, 12, 32, 128]); transpose_7 = None\0A _reshape_alias_29 = torch.ops.aten._reshape_alias(expand_30, [12, 32, 128], [32, 1, 384]); expand_30 = None\0A bmm_14 = torch.ops.aten.bmm(_reshape_alias_28, _reshape_alias_29)\0A _unsafe_view_14 = torch.ops.aten._unsafe_view(bmm_14, [1, 12, 128, 128]); bmm_14 = None\0A div_7 = torch.ops.aten.div(_unsafe_view_14, 5.656854249492381); _unsafe_view_14 = None\0A _tensor_constant0_7 = self._tensor_constant0\0A add_22 = torch.ops.aten.add(div_7, _tensor_constant0_7); div_7 = _tensor_constant0_7 = None\0A _softmax_7 = torch.ops.aten._softmax(add_22, -1, False); add_22 = None\0A detach_7 = torch.ops.aten.detach(_softmax_7)\0A expand_31 = torch.ops.aten.expand(_softmax_7, [1, 12, 128, 128]); _softmax_7 = None\0A _reshape_alias_30 = torch.ops.aten._reshape_alias(expand_31, [12, 128, 128], [16384, 128, 1]); expand_31 = None\0A expand_32 = torch.ops.aten.expand(permute_29, [1, 12, 128, 32]); permute_29 = None\0A _reshape_alias_31 = torch.ops.aten._reshape_alias(expand_32, [12, 128, 32], [32, 384, 1]); expand_32 = None\0A bmm_15 = torch.ops.aten.bmm(_reshape_alias_30, _reshape_alias_31)\0A _unsafe_view_15 = torch.ops.aten._unsafe_view(bmm_15, [1, 12, 128, 32]); bmm_15 = None\0A permute_31 = torch.ops.aten.permute(_unsafe_view_15, [0, 2, 1, 3]); _unsafe_view_15 = None\0A clone_7 = torch.ops.aten.clone(permute_31, memory_format = 0); permute_31 = None\0A view_121 = torch.ops.aten.view(clone_7, [1, 128, 384]); clone_7 = None\0A t_45 = torch.ops.aten.t(params_153)\0A view_122 = torch.ops.aten.view(view_121, [128, 384]); view_121 = None\0A addmm_45 = torch.ops.aten.addmm(params_152, view_122, t_45)\0A view_123 = torch.ops.aten.view(addmm_45, [1, 128, 384]); addmm_45 = None\0A add_23 = torch.ops.aten.add(view_123, getitem_42); view_123 = getitem_42 = None\0A native_layer_norm_15 = torch.ops.aten.native_layer_norm(add_23, [384], params_151, params_150, 1e-12)\0A getitem_45 = native_layer_norm_15[0]\0A getitem_46 = native_layer_norm_15[1]\0A getitem_47 = native_layer_norm_15[2]; native_layer_norm_15 = None\0A t_46 = torch.ops.aten.t(params_161)\0A view_124 = torch.ops.aten.view(getitem_45, [128, 384])\0A addmm_46 = torch.ops.aten.addmm(params_160, view_124, t_46)\0A view_125 = torch.ops.aten.view(addmm_46, [1, 128, 1536]); addmm_46 = None\0A gelu_7 = torch.ops.aten.gelu(view_125)\0A t_47 = torch.ops.aten.t(params_165)\0A view_126 = torch.ops.aten.view(gelu_7, [128, 1536]); gelu_7 = None\0A addmm_47 = torch.ops.aten.addmm(params_164, view_126, t_47)\0A view_127 = torch.ops.aten.view(addmm_47, [1, 128, 384]); addmm_47 = None\0A add_24 = torch.ops.aten.add(view_127, getitem_45); view_127 = getitem_45 = None\0A native_layer_norm_16 = torch.ops.aten.native_layer_norm(add_24, [384], params_163, params_162, 1e-12)\0A getitem_48 = native_layer_norm_16[0]\0A getitem_49 = native_layer_norm_16[1]\0A getitem_50 = native_layer_norm_16[2]; native_layer_norm_16 = None\0A t_48 = torch.ops.aten.t(params_173)\0A view_128 = torch.ops.aten.view(getitem_48, [128, 384])\0A addmm_48 = torch.ops.aten.addmm(params_172, view_128, t_48)\0A view_129 = torch.ops.aten.view(addmm_48, [1, 128, 384]); addmm_48 = None\0A t_49 = torch.ops.aten.t(params_171)\0A view_130 = torch.ops.aten.view(getitem_48, [128, 384])\0A addmm_49 = torch.ops.aten.addmm(params_170, view_130, t_49)\0A view_131 = torch.ops.aten.view(addmm_49, [1, 128, 384]); addmm_49 = None\0A view_132 = torch.ops.aten.view(view_131, [1, 128, 12, 32]); view_131 = None\0A permute_32 = torch.ops.aten.permute(view_132, [0, 2, 1, 3]); view_132 = None\0A t_50 = torch.ops.aten.t(params_175)\0A view_133 = torch.ops.aten.view(getitem_48, [128, 384])\0A addmm_50 = torch.ops.aten.addmm(params_174, view_133, t_50)\0A view_134 = torch.ops.aten.view(addmm_50, [1, 128, 384]); addmm_50 = None\0A view_135 = torch.ops.aten.view(view_134, [1, 128, 12, 32]); view_134 = None\0A permute_33 = torch.ops.aten.permute(view_135, [0, 2, 1, 3]); view_135 = None\0A view_136 = torch.ops.aten.view(view_129, [1, 128, 12, 32]); view_129 = None\0A permute_34 = torch.ops.aten.permute(view_136, [0, 2, 1, 3]); view_136 = None\0A transpose_8 = torch.ops.aten.transpose(permute_32, -1, -2); permute_32 = None\0A expand_33 = torch.ops.aten.expand(permute_34, [1, 12, 128, 32]); permute_34 = None\0A _reshape_alias_32 = torch.ops.aten._reshape_alias(expand_33, [12, 128, 32], [32, 384, 1]); expand_33 = None\0A expand_34 = torch.ops.aten.expand(transpose_8, [1, 12, 32, 128]); transpose_8 = None\0A _reshape_alias_33 = torch.ops.aten._reshape_alias(expand_34, [12, 32, 128], [32, 1, 384]); expand_34 = None\0A bmm_16 = torch.ops.aten.bmm(_reshape_alias_32, _reshape_alias_33)\0A _unsafe_view_16 = torch.ops.aten._unsafe_view(bmm_16, [1, 12, 128, 128]); bmm_16 = None\0A div_8 = torch.ops.aten.div(_unsafe_view_16, 5.656854249492381); _unsafe_view_16 = None\0A _tensor_constant0_8 = self._tensor_constant0\0A add_25 = torch.ops.aten.add(div_8, _tensor_constant0_8); div_8 = _tensor_constant0_8 = None\0A _softmax_8 = torch.ops.aten._softmax(add_25, -1, False); add_25 = None\0A detach_8 = torch.ops.aten.detach(_softmax_8)\0A expand_35 = torch.ops.aten.expand(_softmax_8, [1, 12, 128, 128]); _softmax_8 = None\0A _reshape_alias_34 = torch.ops.aten._reshape_alias(expand_35, [12, 128, 128], [16384, 128, 1]); expand_35 = None\0A expand_36 = torch.ops.aten.expand(permute_33, [1, 12, 128, 32]); permute_33 = None\0A _reshape_alias_35 = torch.ops.aten._reshape_alias(expand_36, [12, 128, 32], [32, 384, 1]); expand_36 = None\0A bmm_17 = torch.ops.aten.bmm(_reshape_alias_34, _reshape_alias_35)\0A _unsafe_view_17 = torch.ops.aten._unsafe_view(bmm_17, [1, 12, 128, 32]); bmm_17 = None\0A permute_35 = torch.ops.aten.permute(_unsafe_view_17, [0, 2, 1, 3]); _unsafe_view_17 = None\0A clone_8 = torch.ops.aten.clone(permute_35, memory_format = 0); permute_35 = None\0A view_137 = torch.ops.aten.view(clone_8, [1, 128, 384]); clone_8 = None\0A t_51 = torch.ops.aten.t(params_169)\0A view_138 = torch.ops.aten.view(view_137, [128, 384]); view_137 = None\0A addmm_51 = torch.ops.aten.addmm(params_168, view_138, t_51)\0A view_139 = torch.ops.aten.view(addmm_51, [1, 128, 384]); addmm_51 = None\0A add_26 = torch.ops.aten.add(view_139, getitem_48); view_139 = getitem_48 = None\0A native_layer_norm_17 = torch.ops.aten.native_layer_norm(add_26, [384], params_167, params_166, 1e-12)\0A getitem_51 = native_layer_norm_17[0]\0A getitem_52 = native_layer_norm_17[1]\0A getitem_53 = native_layer_norm_17[2]; native_layer_norm_17 = None\0A t_52 = torch.ops.aten.t(params_177)\0A view_140 = torch.ops.aten.view(getitem_51, [128, 384])\0A addmm_52 = torch.ops.aten.addmm(params_176, view_140, t_52)\0A view_141 = torch.ops.aten.view(addmm_52, [1, 128, 1536]); addmm_52 = None\0A gelu_8 = torch.ops.aten.gelu(view_141)\0A t_53 = torch.ops.aten.t(params_181)\0A view_142 = torch.ops.aten.view(gelu_8, [128, 1536]); gelu_8 = None\0A addmm_53 = torch.ops.aten.addmm(params_180, view_142, t_53)\0A view_143 = torch.ops.aten.view(addmm_53, [1, 128, 384]); addmm_53 = None\0A add_27 = torch.ops.aten.add(view_143, getitem_51); view_143 = getitem_51 = None\0A native_layer_norm_18 = torch.ops.aten.native_layer_norm(add_27, [384], params_179, params_178, 1e-12)\0A getitem_54 = native_layer_norm_18[0]\0A getitem_55 = native_layer_norm_18[1]\0A getitem_56 = native_layer_norm_18[2]; native_layer_norm_18 = None\0A t_54 = torch.ops.aten.t(params_189)\0A view_144 = torch.ops.aten.view(getitem_54, [128, 384])\0A addmm_54 = torch.ops.aten.addmm(params_188, view_144, t_54)\0A view_145 = torch.ops.aten.view(addmm_54, [1, 128, 384]); addmm_54 = None\0A t_55 = torch.ops.aten.t(params_187)\0A view_146 = torch.ops.aten.view(getitem_54, [128, 384])\0A addmm_55 = torch.ops.aten.addmm(params_186, view_146, t_55)\0A view_147 = torch.ops.aten.view(addmm_55, [1, 128, 384]); addmm_55 = None\0A view_148 = torch.ops.aten.view(view_147, [1, 128, 12, 32]); view_147 = None\0A permute_36 = torch.ops.aten.permute(view_148, [0, 2, 1, 3]); view_148 = None\0A t_56 = torch.ops.aten.t(params_191)\0A view_149 = torch.ops.aten.view(getitem_54, [128, 384])\0A addmm_56 = torch.ops.aten.addmm(params_190, view_149, t_56)\0A view_150 = torch.ops.aten.view(addmm_56, [1, 128, 384]); addmm_56 = None\0A view_151 = torch.ops.aten.view(view_150, [1, 128, 12, 32]); view_150 = None\0A permute_37 = torch.ops.aten.permute(view_151, [0, 2, 1, 3]); view_151 = None\0A view_152 = torch.ops.aten.view(view_145, [1, 128, 12, 32]); view_145 = None\0A permute_38 = torch.ops.aten.permute(view_152, [0, 2, 1, 3]); view_152 = None\0A transpose_9 = torch.ops.aten.transpose(permute_36, -1, -2); permute_36 = None\0A expand_37 = torch.ops.aten.expand(permute_38, [1, 12, 128, 32]); permute_38 = None\0A _reshape_alias_36 = torch.ops.aten._reshape_alias(expand_37, [12, 128, 32], [32, 384, 1]); expand_37 = None\0A expand_38 = torch.ops.aten.expand(transpose_9, [1, 12, 32, 128]); transpose_9 = None\0A _reshape_alias_37 = torch.ops.aten._reshape_alias(expand_38, [12, 32, 128], [32, 1, 384]); expand_38 = None\0A bmm_18 = torch.ops.aten.bmm(_reshape_alias_36, _reshape_alias_37)\0A _unsafe_view_18 = torch.ops.aten._unsafe_view(bmm_18, [1, 12, 128, 128]); bmm_18 = None\0A div_9 = torch.ops.aten.div(_unsafe_view_18, 5.656854249492381); _unsafe_view_18 = None\0A _tensor_constant0_9 = self._tensor_constant0\0A add_28 = torch.ops.aten.add(div_9, _tensor_constant0_9); div_9 = _tensor_constant0_9 = None\0A _softmax_9 = torch.ops.aten._softmax(add_28, -1, False); add_28 = None\0A detach_9 = torch.ops.aten.detach(_softmax_9)\0A expand_39 = torch.ops.aten.expand(_softmax_9, [1, 12, 128, 128]); _softmax_9 = None\0A _reshape_alias_38 = torch.ops.aten._reshape_alias(expand_39, [12, 128, 128], [16384, 128, 1]); expand_39 = None\0A expand_40 = torch.ops.aten.expand(permute_37, [1, 12, 128, 32]); permute_37 = None\0A _reshape_alias_39 = torch.ops.aten._reshape_alias(expand_40, [12, 128, 32], [32, 384, 1]); expand_40 = None\0A bmm_19 = torch.ops.aten.bmm(_reshape_alias_38, _reshape_alias_39)\0A _unsafe_view_19 = torch.ops.aten._unsafe_view(bmm_19, [1, 12, 128, 32]); bmm_19 = None\0A permute_39 = torch.ops.aten.permute(_unsafe_view_19, [0, 2, 1, 3]); _unsafe_view_19 = None\0A clone_9 = torch.ops.aten.clone(permute_39, memory_format = 0); permute_39 = None\0A view_153 = torch.ops.aten.view(clone_9, [1, 128, 384]); clone_9 = None\0A t_57 = torch.ops.aten.t(params_185)\0A view_154 = torch.ops.aten.view(view_153, [128, 384]); view_153 = None\0A addmm_57 = torch.ops.aten.addmm(params_184, view_154, t_57)\0A view_155 = torch.ops.aten.view(addmm_57, [1, 128, 384]); addmm_57 = None\0A add_29 = torch.ops.aten.add(view_155, getitem_54); view_155 = getitem_54 = None\0A native_layer_norm_19 = torch.ops.aten.native_layer_norm(add_29, [384], params_183, params_182, 1e-12)\0A getitem_57 = native_layer_norm_19[0]\0A getitem_58 = native_layer_norm_19[1]\0A getitem_59 = native_layer_norm_19[2]; native_layer_norm_19 = None\0A t_58 = torch.ops.aten.t(params_193)\0A view_156 = torch.ops.aten.view(getitem_57, [128, 384])\0A addmm_58 = torch.ops.aten.addmm(params_192, view_156, t_58)\0A view_157 = torch.ops.aten.view(addmm_58, [1, 128, 1536]); addmm_58 = None\0A gelu_9 = torch.ops.aten.gelu(view_157)\0A t_59 = torch.ops.aten.t(params_197)\0A view_158 = torch.ops.aten.view(gelu_9, [128, 1536]); gelu_9 = None\0A addmm_59 = torch.ops.aten.addmm(params_196, view_158, t_59)\0A view_159 = torch.ops.aten.view(addmm_59, [1, 128, 384]); addmm_59 = None\0A add_30 = torch.ops.aten.add(view_159, getitem_57); view_159 = getitem_57 = None\0A native_layer_norm_20 = torch.ops.aten.native_layer_norm(add_30, [384], params_195, params_194, 1e-12)\0A getitem_60 = native_layer_norm_20[0]\0A getitem_61 = native_layer_norm_20[1]\0A getitem_62 = native_layer_norm_20[2]; native_layer_norm_20 = None\0A t_60 = torch.ops.aten.t(params_45)\0A view_160 = torch.ops.aten.view(getitem_60, [128, 384])\0A addmm_60 = torch.ops.aten.addmm(params_44, view_160, t_60)\0A view_161 = torch.ops.aten.view(addmm_60, [1, 128, 384]); addmm_60 = None\0A t_61 = torch.ops.aten.t(params_43)\0A view_162 = torch.ops.aten.view(getitem_60, [128, 384])\0A addmm_61 = torch.ops.aten.addmm(params_42, view_162, t_61)\0A view_163 = torch.ops.aten.view(addmm_61, [1, 128, 384]); addmm_61 = None\0A view_164 = torch.ops.aten.view(view_163, [1, 128, 12, 32]); view_163 = None\0A permute_40 = torch.ops.aten.permute(view_164, [0, 2, 1, 3]); view_164 = None\0A t_62 = torch.ops.aten.t(params_47)\0A view_165 = torch.ops.aten.view(getitem_60, [128, 384])\0A addmm_62 = torch.ops.aten.addmm(params_46, view_165, t_62)\0A view_166 = torch.ops.aten.view(addmm_62, [1, 128, 384]); addmm_62 = None\0A view_167 = torch.ops.aten.view(view_166, [1, 128, 12, 32]); view_166 = None\0A permute_41 = torch.ops.aten.permute(view_167, [0, 2, 1, 3]); view_167 = None\0A view_168 = torch.ops.aten.view(view_161, [1, 128, 12, 32]); view_161 = None\0A permute_42 = torch.ops.aten.permute(view_168, [0, 2, 1, 3]); view_168 = None\0A transpose_10 = torch.ops.aten.transpose(permute_40, -1, -2); permute_40 = None\0A expand_41 = torch.ops.aten.expand(permute_42, [1, 12, 128, 32]); permute_42 = None\0A _reshape_alias_40 = torch.ops.aten._reshape_alias(expand_41, [12, 128, 32], [32, 384, 1]); expand_41 = None\0A expand_42 = torch.ops.aten.expand(transpose_10, [1, 12, 32, 128]); transpose_10 = None\0A _reshape_alias_41 = torch.ops.aten._reshape_alias(expand_42, [12, 32, 128], [32, 1, 384]); expand_42 = None\0A bmm_20 = torch.ops.aten.bmm(_reshape_alias_40, _reshape_alias_41)\0A _unsafe_view_20 = torch.ops.aten._unsafe_view(bmm_20, [1, 12, 128, 128]); bmm_20 = None\0A div_10 = torch.ops.aten.div(_unsafe_view_20, 5.656854249492381); _unsafe_view_20 = None\0A _tensor_constant0_10 = self._tensor_constant0\0A add_31 = torch.ops.aten.add(div_10, _tensor_constant0_10); div_10 = _tensor_constant0_10 = None\0A _softmax_10 = torch.ops.aten._softmax(add_31, -1, False); add_31 = None\0A detach_10 = torch.ops.aten.detach(_softmax_10)\0A expand_43 = torch.ops.aten.expand(_softmax_10, [1, 12, 128, 128]); _softmax_10 = None\0A _reshape_alias_42 = torch.ops.aten._reshape_alias(expand_43, [12, 128, 128], [16384, 128, 1]); expand_43 = None\0A expand_44 = torch.ops.aten.expand(permute_41, [1, 12, 128, 32]); permute_41 = None\0A _reshape_alias_43 = torch.ops.aten._reshape_alias(expand_44, [12, 128, 32], [32, 384, 1]); expand_44 = None\0A bmm_21 = torch.ops.aten.bmm(_reshape_alias_42, _reshape_alias_43)\0A _unsafe_view_21 = torch.ops.aten._unsafe_view(bmm_21, [1, 12, 128, 32]); bmm_21 = None\0A permute_43 = torch.ops.aten.permute(_unsafe_view_21, [0, 2, 1, 3]); _unsafe_view_21 = None\0A clone_10 = torch.ops.aten.clone(permute_43, memory_format = 0); permute_43 = None\0A view_169 = torch.ops.aten.view(clone_10, [1, 128, 384]); clone_10 = None\0A t_63 = torch.ops.aten.t(params_41)\0A view_170 = torch.ops.aten.view(view_169, [128, 384]); view_169 = None\0A addmm_63 = torch.ops.aten.addmm(params_40, view_170, t_63)\0A view_171 = torch.ops.aten.view(addmm_63, [1, 128, 384]); addmm_63 = None\0A add_32 = torch.ops.aten.add(view_171, getitem_60); view_171 = getitem_60 = None\0A native_layer_norm_21 = torch.ops.aten.native_layer_norm(add_32, [384], params_39, params_38, 1e-12)\0A getitem_63 = native_layer_norm_21[0]\0A getitem_64 = native_layer_norm_21[1]\0A getitem_65 = native_layer_norm_21[2]; native_layer_norm_21 = None\0A t_64 = torch.ops.aten.t(params_49)\0A view_172 = torch.ops.aten.view(getitem_63, [128, 384])\0A addmm_64 = torch.ops.aten.addmm(params_48, view_172, t_64)\0A view_173 = torch.ops.aten.view(addmm_64, [1, 128, 1536]); addmm_64 = None\0A gelu_10 = torch.ops.aten.gelu(view_173)\0A t_65 = torch.ops.aten.t(params_53)\0A view_174 = torch.ops.aten.view(gelu_10, [128, 1536]); gelu_10 = None\0A addmm_65 = torch.ops.aten.addmm(params_52, view_174, t_65)\0A view_175 = torch.ops.aten.view(addmm_65, [1, 128, 384]); addmm_65 = None\0A add_33 = torch.ops.aten.add(view_175, getitem_63); view_175 = getitem_63 = None\0A native_layer_norm_22 = torch.ops.aten.native_layer_norm(add_33, [384], params_51, params_50, 1e-12)\0A getitem_66 = native_layer_norm_22[0]\0A getitem_67 = native_layer_norm_22[1]\0A getitem_68 = native_layer_norm_22[2]; native_layer_norm_22 = None\0A t_66 = torch.ops.aten.t(params_61)\0A view_176 = torch.ops.aten.view(getitem_66, [128, 384])\0A addmm_66 = torch.ops.aten.addmm(params_60, view_176, t_66)\0A view_177 = torch.ops.aten.view(addmm_66, [1, 128, 384]); addmm_66 = None\0A t_67 = torch.ops.aten.t(params_59)\0A view_178 = torch.ops.aten.view(getitem_66, [128, 384])\0A addmm_67 = torch.ops.aten.addmm(params_58, view_178, t_67)\0A view_179 = torch.ops.aten.view(addmm_67, [1, 128, 384]); addmm_67 = None\0A view_180 = torch.ops.aten.view(view_179, [1, 128, 12, 32]); view_179 = None\0A permute_44 = torch.ops.aten.permute(view_180, [0, 2, 1, 3]); view_180 = None\0A t_68 = torch.ops.aten.t(params_63)\0A view_181 = torch.ops.aten.view(getitem_66, [128, 384])\0A addmm_68 = torch.ops.aten.addmm(params_62, view_181, t_68)\0A view_182 = torch.ops.aten.view(addmm_68, [1, 128, 384]); addmm_68 = None\0A view_183 = torch.ops.aten.view(view_182, [1, 128, 12, 32]); view_182 = None\0A permute_45 = torch.ops.aten.permute(view_183, [0, 2, 1, 3]); view_183 = None\0A view_184 = torch.ops.aten.view(view_177, [1, 128, 12, 32]); view_177 = None\0A permute_46 = torch.ops.aten.permute(view_184, [0, 2, 1, 3]); view_184 = None\0A transpose_11 = torch.ops.aten.transpose(permute_44, -1, -2); permute_44 = None\0A expand_45 = torch.ops.aten.expand(permute_46, [1, 12, 128, 32]); permute_46 = None\0A _reshape_alias_44 = torch.ops.aten._reshape_alias(expand_45, [12, 128, 32], [32, 384, 1]); expand_45 = None\0A expand_46 = torch.ops.aten.expand(transpose_11, [1, 12, 32, 128]); transpose_11 = None\0A _reshape_alias_45 = torch.ops.aten._reshape_alias(expand_46, [12, 32, 128], [32, 1, 384]); expand_46 = None\0A bmm_22 = torch.ops.aten.bmm(_reshape_alias_44, _reshape_alias_45)\0A _unsafe_view_22 = torch.ops.aten._unsafe_view(bmm_22, [1, 12, 128, 128]); bmm_22 = None\0A div_11 = torch.ops.aten.div(_unsafe_view_22, 5.656854249492381); _unsafe_view_22 = None\0A _tensor_constant0_11 = self._tensor_constant0\0A add_34 = torch.ops.aten.add(div_11, _tensor_constant0_11); div_11 = _tensor_constant0_11 = None\0A _softmax_11 = torch.ops.aten._softmax(add_34, -1, False); add_34 = None\0A detach_11 = torch.ops.aten.detach(_softmax_11)\0A expand_47 = torch.ops.aten.expand(_softmax_11, [1, 12, 128, 128]); _softmax_11 = None\0A _reshape_alias_46 = torch.ops.aten._reshape_alias(expand_47, [12, 128, 128], [16384, 128, 1]); expand_47 = None\0A expand_48 = torch.ops.aten.expand(permute_45, [1, 12, 128, 32]); permute_45 = None\0A _reshape_alias_47 = torch.ops.aten._reshape_alias(expand_48, [12, 128, 32], [32, 384, 1]); expand_48 = None\0A bmm_23 = torch.ops.aten.bmm(_reshape_alias_46, _reshape_alias_47)\0A _unsafe_view_23 = torch.ops.aten._unsafe_view(bmm_23, [1, 12, 128, 32]); bmm_23 = None\0A permute_47 = torch.ops.aten.permute(_unsafe_view_23, [0, 2, 1, 3]); _unsafe_view_23 = None\0A clone_11 = torch.ops.aten.clone(permute_47, memory_format = 0); permute_47 = None\0A view_185 = torch.ops.aten.view(clone_11, [1, 128, 384]); clone_11 = None\0A t_69 = torch.ops.aten.t(params_57)\0A view_186 = torch.ops.aten.view(view_185, [128, 384]); view_185 = None\0A addmm_69 = torch.ops.aten.addmm(params_56, view_186, t_69)\0A view_187 = torch.ops.aten.view(addmm_69, [1, 128, 384]); addmm_69 = None\0A add_35 = torch.ops.aten.add(view_187, getitem_66); view_187 = getitem_66 = None\0A native_layer_norm_23 = torch.ops.aten.native_layer_norm(add_35, [384], params_55, params_54, 1e-12)\0A getitem_69 = native_layer_norm_23[0]\0A getitem_70 = native_layer_norm_23[1]\0A getitem_71 = native_layer_norm_23[2]; native_layer_norm_23 = None\0A t_70 = torch.ops.aten.t(params_65)\0A view_188 = torch.ops.aten.view(getitem_69, [128, 384])\0A addmm_70 = torch.ops.aten.addmm(params_64, view_188, t_70)\0A view_189 = torch.ops.aten.view(addmm_70, [1, 128, 1536]); addmm_70 = None\0A gelu_11 = torch.ops.aten.gelu(view_189)\0A t_71 = torch.ops.aten.t(params_69)\0A view_190 = torch.ops.aten.view(gelu_11, [128, 1536]); gelu_11 = None\0A addmm_71 = torch.ops.aten.addmm(params_68, view_190, t_71)\0A view_191 = torch.ops.aten.view(addmm_71, [1, 128, 384]); addmm_71 = None\0A add_36 = torch.ops.aten.add(view_191, getitem_69); view_191 = getitem_69 = None\0A native_layer_norm_24 = torch.ops.aten.native_layer_norm(add_36, [384], params_67, params_66, 1e-12)\0A getitem_72 = native_layer_norm_24[0]\0A getitem_73 = native_layer_norm_24[1]\0A getitem_74 = native_layer_norm_24[2]; native_layer_norm_24 = None\0A slice_5 = torch.ops.aten.slice(getitem_72, 0, 0, 9223372036854775807); getitem_72 = None\0A select = torch.ops.aten.select(slice_5, 1, 0); slice_5 = None\0A t_72 = torch.ops.aten.t(params_199)\0A addmm_72 = torch.ops.aten.addmm(params_198, select, t_72)\0A tanh = torch.ops.aten.tanh(addmm_72); addmm_72 = None\0A detach_12 = torch.ops.aten.detach(tanh)\0A t_73 = torch.ops.aten.t(params_201)\0A addmm_73 = torch.ops.aten.addmm(params_200, tanh, t_73)\0A sum_1 = torch.ops.aten.sum(addmm_73); addmm_73 = None\0A ones_like = torch.ops.aten.ones_like(sum_1, device = device(type='cpu'), dtype = 6, layout = 0, memory_format = 1, pin_memory = False); sum_1 = None\0A expand_49 = torch.ops.aten.expand(ones_like, [1, 2]); ones_like = None\0A t_74 = torch.ops.aten.t(t_73); t_73 = None\0A mm = torch.ops.aten.mm(expand_49, t_74); t_74 = None\0A t_75 = torch.ops.aten.t(expand_49)\0A mm_1 = torch.ops.aten.mm(t_75, tanh); t_75 = tanh = None\0A t_76 = torch.ops.aten.t(mm_1); mm_1 = None\0A sum_2 = torch.ops.aten.sum(expand_49, [0], True); expand_49 = None\0A view_192 = torch.ops.aten.view(sum_2, [2]); sum_2 = None\0A detach_13 = torch.ops.aten.detach(view_192); view_192 = None\0A detach_14 = torch.ops.aten.detach(detach_13); detach_13 = None\0A t_77 = torch.ops.aten.t(t_76); t_76 = None\0A detach_15 = torch.ops.aten.detach(t_77); t_77 = None\0A detach_16 = torch.ops.aten.detach(detach_15); detach_15 = None\0A detach_17 = torch.ops.aten.detach(detach_12); detach_12 = None\0A tanh_backward = torch.ops.aten.tanh_backward(mm, detach_17); mm = detach_17 = None\0A t_78 = torch.ops.aten.t(t_72); t_72 = None\0A mm_2 = torch.ops.aten.mm(tanh_backward, t_78); t_78 = None\0A t_79 = torch.ops.aten.t(tanh_backward)\0A mm_3 = torch.ops.aten.mm(t_79, select); t_79 = select = None\0A t_80 = torch.ops.aten.t(mm_3); mm_3 = None\0A sum_3 = torch.ops.aten.sum(tanh_backward, [0], True); tanh_backward = None\0A view_193 = torch.ops.aten.view(sum_3, [384]); sum_3 = None\0A detach_18 = torch.ops.aten.detach(view_193); view_193 = None\0A detach_19 = torch.ops.aten.detach(detach_18); detach_18 = None\0A t_81 = torch.ops.aten.t(t_80); t_80 = None\0A detach_20 = torch.ops.aten.detach(t_81); t_81 = None\0A detach_21 = torch.ops.aten.detach(detach_20); detach_20 = None\0A new_zeros = torch.ops.aten.new_zeros(mm_2, [1, 128, 384], device = device(type='cpu'), dtype = 6, layout = 0, pin_memory = False)\0A select_scatter = torch.ops.aten.select_scatter(new_zeros, mm_2, 1, 0); new_zeros = mm_2 = None\0A new_zeros_1 = torch.ops.aten.new_zeros(select_scatter, [1, 128, 384], device = device(type='cpu'), dtype = 6, layout = 0, pin_memory = False)\0A slice_scatter = torch.ops.aten.slice_scatter(new_zeros_1, select_scatter, 0, 0, 9223372036854775807); new_zeros_1 = select_scatter = None\0A to = torch.ops.aten.to(slice_scatter, 6)\0A to_1 = torch.ops.aten.to(add_36, 6)\0A to_2 = torch.ops.aten.to(getitem_73, 6)\0A to_3 = torch.ops.aten.to(getitem_74, 6)\0A to_4 = torch.ops.aten.to(params_67, 6)\0A to_5 = torch.ops.aten.to(params_66, 6)\0A sub = torch.ops.aten.sub(add_36, getitem_73); add_36 = getitem_73 = None\0A mul = torch.ops.aten.mul(sub, getitem_74); sub = None\0A mul_1 = torch.ops.aten.mul(slice_scatter, params_67)\0A mul_2 = torch.ops.aten.mul(mul_1, 384)\0A sum_4 = torch.ops.aten.sum(mul_1, [2], True)\0A mul_3 = torch.ops.aten.mul(mul_1, mul); mul_1 = None\0A sum_5 = torch.ops.aten.sum(mul_3, [2], True); mul_3 = None\0A mul_4 = torch.ops.aten.mul(mul, sum_5); sum_5 = None\0A sub_1 = torch.ops.aten.sub(mul_2, sum_4); mul_2 = sum_4 = None\0A sub_2 = torch.ops.aten.sub(sub_1, mul_4); sub_1 = mul_4 = None\0A div_12 = torch.ops.aten.div(getitem_74, 384); getitem_74 = None\0A mul_5 = torch.ops.aten.mul(div_12, sub_2); div_12 = sub_2 = None\0A mul_6 = torch.ops.aten.mul(slice_scatter, mul); mul = None\0A sum_6 = torch.ops.aten.sum(mul_6, [0, 1]); mul_6 = None\0A sum_7 = torch.ops.aten.sum(slice_scatter, [0, 1]); slice_scatter = None\0A to_6 = torch.ops.aten.to(mul_5, 6)\0A to_7 = torch.ops.aten.to(sum_6, 6)\0A to_8 = torch.ops.aten.to(sum_7, 6)\0A detach_22 = torch.ops.aten.detach(sum_6); sum_6 = None\0A detach_23 = torch.ops.aten.detach(detach_22); detach_22 = None\0A detach_24 = torch.ops.aten.detach(sum_7); sum_7 = None\0A detach_25 = torch.ops.aten.detach(detach_24); detach_24 = None\0A _reshape_alias_48 = torch.ops.aten._reshape_alias(mul_5, [128, 384], [384, 1])\0A t_82 = torch.ops.aten.t(t_71); t_71 = None\0A mm_4 = torch.ops.aten.mm(_reshape_alias_48, t_82); t_82 = None\0A t_83 = torch.ops.aten.t(_reshape_alias_48)\0A mm_5 = torch.ops.aten.mm(t_83, view_190); t_83 = view_190 = None\0A t_84 = torch.ops.aten.t(mm_5); mm_5 = None\0A sum_8 = torch.ops.aten.sum(_reshape_alias_48, [0], True); _reshape_alias_48 = None\0A view_194 = torch.ops.aten.view(sum_8, [384]); sum_8 = None\0A detach_26 = torch.ops.aten.detach(view_194); view_194 = None\0A detach_27 = torch.ops.aten.detach(detach_26); detach_26 = None\0A _reshape_alias_49 = torch.ops.aten._reshape_alias(mm_4, [1, 128, 1536], [196608, 1536, 1]); mm_4 = None\0A t_85 = torch.ops.aten.t(t_84); t_84 = None\0A detach_28 = torch.ops.aten.detach(t_85); t_85 = None\0A detach_29 = torch.ops.aten.detach(detach_28); detach_28 = None\0A gelu_backward = torch.ops.aten.gelu_backward(_reshape_alias_49, view_189); _reshape_alias_49 = view_189 = None\0A _reshape_alias_50 = torch.ops.aten._reshape_alias(gelu_backward, [128, 1536], [1536, 1]); gelu_backward = None\0A t_86 = torch.ops.aten.t(t_70); t_70 = None\0A mm_6 = torch.ops.aten.mm(_reshape_alias_50, t_86); t_86 = None\0A t_87 = torch.ops.aten.t(_reshape_alias_50)\0A mm_7 = torch.ops.aten.mm(t_87, view_188); t_87 = view_188 = None\0A t_88 = torch.ops.aten.t(mm_7); mm_7 = None\0A sum_9 = torch.ops.aten.sum(_reshape_alias_50, [0], True); _reshape_alias_50 = None\0A view_195 = torch.ops.aten.view(sum_9, [1536]); sum_9 = None\0A detach_30 = torch.ops.aten.detach(view_195); view_195 = None\0A detach_31 = torch.ops.aten.detach(detach_30); detach_30 = None\0A _reshape_alias_51 = torch.ops.aten._reshape_alias(mm_6, [1, 128, 384], [49152, 384, 1]); mm_6 = None\0A add_37 = torch.ops.aten.add(mul_5, _reshape_alias_51); mul_5 = _reshape_alias_51 = None\0A t_89 = torch.ops.aten.t(t_88); t_88 = None\0A detach_32 = torch.ops.aten.detach(t_89); t_89 = None\0A detach_33 = torch.ops.aten.detach(detach_32); detach_32 = None\0A to_9 = torch.ops.aten.to(add_37, 6)\0A to_10 = torch.ops.aten.to(add_35, 6)\0A to_11 = torch.ops.aten.to(getitem_70, 6)\0A to_12 = torch.ops.aten.to(getitem_71, 6)\0A to_13 = torch.ops.aten.to(params_55, 6)\0A to_14 = torch.ops.aten.to(params_54, 6)\0A sub_3 = torch.ops.aten.sub(add_35, getitem_70); add_35 = getitem_70 = None\0A mul_7 = torch.ops.aten.mul(sub_3, getitem_71); sub_3 = None\0A mul_8 = torch.ops.aten.mul(add_37, params_55)\0A mul_9 = torch.ops.aten.mul(mul_8, 384)\0A sum_10 = torch.ops.aten.sum(mul_8, [2], True)\0A mul_10 = torch.ops.aten.mul(mul_8, mul_7); mul_8 = None\0A sum_11 = torch.ops.aten.sum(mul_10, [2], True); mul_10 = None\0A mul_11 = torch.ops.aten.mul(mul_7, sum_11); sum_11 = None\0A sub_4 = torch.ops.aten.sub(mul_9, sum_10); mul_9 = sum_10 = None\0A sub_5 = torch.ops.aten.sub(sub_4, mul_11); sub_4 = mul_11 = None\0A div_13 = torch.ops.aten.div(getitem_71, 384); getitem_71 = None\0A mul_12 = torch.ops.aten.mul(div_13, sub_5); div_13 = sub_5 = None\0A mul_13 = torch.ops.aten.mul(add_37, mul_7); mul_7 = None\0A sum_12 = torch.ops.aten.sum(mul_13, [0, 1]); mul_13 = None\0A sum_13 = torch.ops.aten.sum(add_37, [0, 1]); add_37 = None\0A to_15 = torch.ops.aten.to(mul_12, 6)\0A to_16 = torch.ops.aten.to(sum_12, 6)\0A to_17 = torch.ops.aten.to(sum_13, 6)\0A detach_34 = torch.ops.aten.detach(sum_12); sum_12 = None\0A detach_35 = torch.ops.aten.detach(detach_34); detach_34 = None\0A detach_36 = torch.ops.aten.detach(sum_13); sum_13 = None\0A detach_37 = torch.ops.aten.detach(detach_36); detach_36 = None\0A _reshape_alias_52 = torch.ops.aten._reshape_alias(mul_12, [128, 384], [384, 1])\0A t_90 = torch.ops.aten.t(t_69); t_69 = None\0A mm_8 = torch.ops.aten.mm(_reshape_alias_52, t_90); t_90 = None\0A t_91 = torch.ops.aten.t(_reshape_alias_52)\0A mm_9 = torch.ops.aten.mm(t_91, view_186); t_91 = view_186 = None\0A t_92 = torch.ops.aten.t(mm_9); mm_9 = None\0A sum_14 = torch.ops.aten.sum(_reshape_alias_52, [0], True); _reshape_alias_52 = None\0A view_196 = torch.ops.aten.view(sum_14, [384]); sum_14 = None\0A detach_38 = torch.ops.aten.detach(view_196); view_196 = None\0A detach_39 = torch.ops.aten.detach(detach_38); detach_38 = None\0A _reshape_alias_53 = torch.ops.aten._reshape_alias(mm_8, [1, 128, 384], [49152, 384, 1]); mm_8 = None\0A t_93 = torch.ops.aten.t(t_92); t_92 = None\0A detach_40 = torch.ops.aten.detach(t_93); t_93 = None\0A detach_41 = torch.ops.aten.detach(detach_40); detach_40 = None\0A _reshape_alias_54 = torch.ops.aten._reshape_alias(_reshape_alias_53, [1, 128, 12, 32], [49152, 384, 32, 1]); _reshape_alias_53 = None\0A permute_48 = torch.ops.aten.permute(_reshape_alias_54, [0, 2, 1, 3]); _reshape_alias_54 = None\0A _reshape_alias_55 = torch.ops.aten._reshape_alias(permute_48, [12, 128, 32], [32, 384, 1]); permute_48 = None\0A transpose_12 = torch.ops.aten.transpose(_reshape_alias_46, 1, 2); _reshape_alias_46 = None\0A bmm_24 = torch.ops.aten.bmm(transpose_12, _reshape_alias_55); transpose_12 = None\0A transpose_13 = torch.ops.aten.transpose(_reshape_alias_47, 1, 2); _reshape_alias_47 = None\0A bmm_25 = torch.ops.aten.bmm(_reshape_alias_55, transpose_13); _reshape_alias_55 = transpose_13 = None\0A _reshape_alias_56 = torch.ops.aten._reshape_alias(bmm_24, [1, 12, 128, 32], [49152, 4096, 32, 1]); bmm_24 = None\0A _reshape_alias_57 = torch.ops.aten._reshape_alias(bmm_25, [1, 12, 128, 128], [196608, 16384, 128, 1]); bmm_25 = None\0A detach_42 = torch.ops.aten.detach(detach_11); detach_11 = None\0A _softmax_backward_data = torch.ops.aten._softmax_backward_data(_reshape_alias_57, detach_42, -1, 6); _reshape_alias_57 = detach_42 = None\0A div_14 = torch.ops.aten.div(_softmax_backward_data, 5.656854249492381); _softmax_backward_data = None\0A _reshape_alias_58 = torch.ops.aten._reshape_alias(div_14, [12, 128, 128], [16384, 128, 1]); div_14 = None\0A transpose_14 = torch.ops.aten.transpose(_reshape_alias_44, 1, 2); _reshape_alias_44 = None\0A bmm_26 = torch.ops.aten.bmm(transpose_14, _reshape_alias_58); transpose_14 = None\0A transpose_15 = torch.ops.aten.transpose(_reshape_alias_45, 1, 2); _reshape_alias_45 = None\0A bmm_27 = torch.ops.aten.bmm(_reshape_alias_58, transpose_15); _reshape_alias_58 = transpose_15 = None\0A _reshape_alias_59 = torch.ops.aten._reshape_alias(bmm_26, [1, 12, 32, 128], [49152, 4096, 128, 1]); bmm_26 = None\0A _reshape_alias_60 = torch.ops.aten._reshape_alias(bmm_27, [1, 12, 128, 32], [49152, 4096, 32, 1]); bmm_27 = None\0A transpose_16 = torch.ops.aten.transpose(_reshape_alias_59, -1, -2); _reshape_alias_59 = None\0A permute_49 = torch.ops.aten.permute(_reshape_alias_60, [0, 2, 1, 3]); _reshape_alias_60 = None\0A clone_12 = torch.ops.aten.clone(permute_49, memory_format = 0); permute_49 = None\0A _unsafe_view_24 = torch.ops.aten._unsafe_view(clone_12, [1, 128, 384]); clone_12 = None\0A permute_50 = torch.ops.aten.permute(_reshape_alias_56, [0, 2, 1, 3]); _reshape_alias_56 = None\0A clone_13 = torch.ops.aten.clone(permute_50, memory_format = 0); permute_50 = None\0A _unsafe_view_25 = torch.ops.aten._unsafe_view(clone_13, [1, 128, 384]); clone_13 = None\0A _reshape_alias_61 = torch.ops.aten._reshape_alias(_unsafe_view_25, [128, 384], [384, 1]); _unsafe_view_25 = None\0A t_94 = torch.ops.aten.t(t_68); t_68 = None\0A mm_10 = torch.ops.aten.mm(_reshape_alias_61, t_94); t_94 = None\0A t_95 = torch.ops.aten.t(_reshape_alias_61)\0A mm_11 = torch.ops.aten.mm(t_95, view_181); t_95 = view_181 = None\0A t_96 = torch.ops.aten.t(mm_11); mm_11 = None\0A sum_15 = torch.ops.aten.sum(_reshape_alias_61, [0], True); _reshape_alias_61 = None\0A view_197 = torch.ops.aten.view(sum_15, [384]); sum_15 = None\0A detach_43 = torch.ops.aten.detach(view_197); view_197 = None\0A detach_44 = torch.ops.aten.detach(detach_43); detach_43 = None\0A _reshape_alias_62 = torch.ops.aten._reshape_alias(mm_10, [1, 128, 384], [49152, 384, 1]); mm_10 = None\0A add_38 = torch.ops.aten.add(mul_12, _reshape_alias_62); mul_12 = _reshape_alias_62 = None\0A t_97 = torch.ops.aten.t(t_96); t_96 = None\0A detach_45 = torch.ops.aten.detach(t_97); t_97 = None\0A detach_46 = torch.ops.aten.detach(detach_45); detach_45 = None\0A permute_51 = torch.ops.aten.permute(transpose_16, [0, 2, 1, 3]); transpose_16 = None\0A _reshape_alias_63 = torch.ops.aten._reshape_alias(permute_51, [1, 128, 384], [128, 1, 128]); permute_51 = None\0A _reshape_alias_64 = torch.ops.aten._reshape_alias(_reshape_alias_63, [128, 384], [1, 128]); _reshape_alias_63 = None\0A t_98 = torch.ops.aten.t(t_67); t_67 = None\0A mm_12 = torch.ops.aten.mm(_reshape_alias_64, t_98); t_98 = None\0A t_99 = torch.ops.aten.t(_reshape_alias_64)\0A mm_13 = torch.ops.aten.mm(t_99, view_178); t_99 = view_178 = None\0A t_100 = torch.ops.aten.t(mm_13); mm_13 = None\0A sum_16 = torch.ops.aten.sum(_reshape_alias_64, [0], True); _reshape_alias_64 = None\0A view_198 = torch.ops.aten.view(sum_16, [384]); sum_16 = None\0A detach_47 = torch.ops.aten.detach(view_198); view_198 = None\0A detach_48 = torch.ops.aten.detach(detach_47); detach_47 = None\0A _reshape_alias_65 = torch.ops.aten._reshape_alias(mm_12, [1, 128, 384], [49152, 384, 1]); mm_12 = None\0A add_39 = torch.ops.aten.add(add_38, _reshape_alias_65); add_38 = _reshape_alias_65 = None\0A t_101 = torch.ops.aten.t(t_100); t_100 = None\0A detach_49 = torch.ops.aten.detach(t_101); t_101 = None\0A detach_50 = torch.ops.aten.detach(detach_49); detach_49 = None\0A _reshape_alias_66 = torch.ops.aten._reshape_alias(_unsafe_view_24, [128, 384], [384, 1]); _unsafe_view_24 = None\0A t_102 = torch.ops.aten.t(t_66); t_66 = None\0A mm_14 = torch.ops.aten.mm(_reshape_alias_66, t_102); t_102 = None\0A t_103 = torch.ops.aten.t(_reshape_alias_66)\0A mm_15 = torch.ops.aten.mm(t_103, view_176); t_103 = view_176 = None\0A t_104 = torch.ops.aten.t(mm_15); mm_15 = None\0A sum_17 = torch.ops.aten.sum(_reshape_alias_66, [0], True); _reshape_alias_66 = None\0A view_199 = torch.ops.aten.view(sum_17, [384]); sum_17 = None\0A detach_51 = torch.ops.aten.detach(view_199); view_199 = None\0A detach_52 = torch.ops.aten.detach(detach_51); detach_51 = None\0A _reshape_alias_67 = torch.ops.aten._reshape_alias(mm_14, [1, 128, 384], [49152, 384, 1]); mm_14 = None\0A add_40 = torch.ops.aten.add(add_39, _reshape_alias_67); add_39 = _reshape_alias_67 = None\0A t_105 = torch.ops.aten.t(t_104); t_104 = None\0A detach_53 = torch.ops.aten.detach(t_105); t_105 = None\0A detach_54 = torch.ops.aten.detach(detach_53); detach_53 = None\0A to_18 = torch.ops.aten.to(add_40, 6)\0A to_19 = torch.ops.aten.to(add_33, 6)\0A to_20 = torch.ops.aten.to(getitem_67, 6)\0A to_21 = torch.ops.aten.to(getitem_68, 6)\0A to_22 = torch.ops.aten.to(params_51, 6)\0A to_23 = torch.ops.aten.to(params_50, 6)\0A sub_6 = torch.ops.aten.sub(add_33, getitem_67); add_33 = getitem_67 = None\0A mul_14 = torch.ops.aten.mul(sub_6, getitem_68); sub_6 = None\0A mul_15 = torch.ops.aten.mul(add_40, params_51)\0A mul_16 = torch.ops.aten.mul(mul_15, 384)\0A sum_18 = torch.ops.aten.sum(mul_15, [2], True)\0A mul_17 = torch.ops.aten.mul(mul_15, mul_14); mul_15 = None\0A sum_19 = torch.ops.aten.sum(mul_17, [2], True); mul_17 = None\0A mul_18 = torch.ops.aten.mul(mul_14, sum_19); sum_19 = None\0A sub_7 = torch.ops.aten.sub(mul_16, sum_18); mul_16 = sum_18 = None\0A sub_8 = torch.ops.aten.sub(sub_7, mul_18); sub_7 = mul_18 = None\0A div_15 = torch.ops.aten.div(getitem_68, 384); getitem_68 = None\0A mul_19 = torch.ops.aten.mul(div_15, sub_8); div_15 = sub_8 = None\0A mul_20 = torch.ops.aten.mul(add_40, mul_14); mul_14 = None\0A sum_20 = torch.ops.aten.sum(mul_20, [0, 1]); mul_20 = None\0A sum_21 = torch.ops.aten.sum(add_40, [0, 1]); add_40 = None\0A to_24 = torch.ops.aten.to(mul_19, 6)\0A to_25 = torch.ops.aten.to(sum_20, 6)\0A to_26 = torch.ops.aten.to(sum_21, 6)\0A detach_55 = torch.ops.aten.detach(sum_20); sum_20 = None\0A detach_56 = torch.ops.aten.detach(detach_55); detach_55 = None\0A detach_57 = torch.ops.aten.detach(sum_21); sum_21 = None\0A detach_58 = torch.ops.aten.detach(detach_57); detach_57 = None\0A _reshape_alias_68 = torch.ops.aten._reshape_alias(mul_19, [128, 384], [384, 1])\0A t_106 = torch.ops.aten.t(t_65); t_65 = None\0A mm_16 = torch.ops.aten.mm(_reshape_alias_68, t_106); t_106 = None\0A t_107 = torch.ops.aten.t(_reshape_alias_68)\0A mm_17 = torch.ops.aten.mm(t_107, view_174); t_107 = view_174 = None\0A t_108 = torch.ops.aten.t(mm_17); mm_17 = None\0A sum_22 = torch.ops.aten.sum(_reshape_alias_68, [0], True); _reshape_alias_68 = None\0A view_200 = torch.ops.aten.view(sum_22, [384]); sum_22 = None\0A detach_59 = torch.ops.aten.detach(view_200); view_200 = None\0A detach_60 = torch.ops.aten.detach(detach_59); detach_59 = None\0A _reshape_alias_69 = torch.ops.aten._reshape_alias(mm_16, [1, 128, 1536], [196608, 1536, 1]); mm_16 = None\0A t_109 = torch.ops.aten.t(t_108); t_108 = None\0A detach_61 = torch.ops.aten.detach(t_109); t_109 = None\0A detach_62 = torch.ops.aten.detach(detach_61); detach_61 = None\0A gelu_backward_1 = torch.ops.aten.gelu_backward(_reshape_alias_69, view_173); _reshape_alias_69 = view_173 = None\0A _reshape_alias_70 = torch.ops.aten._reshape_alias(gelu_backward_1, [128, 1536], [1536, 1]); gelu_backward_1 = None\0A t_110 = torch.ops.aten.t(t_64); t_64 = None\0A mm_18 = torch.ops.aten.mm(_reshape_alias_70, t_110); t_110 = None\0A t_111 = torch.ops.aten.t(_reshape_alias_70)\0A mm_19 = torch.ops.aten.mm(t_111, view_172); t_111 = view_172 = None\0A t_112 = torch.ops.aten.t(mm_19); mm_19 = None\0A sum_23 = torch.ops.aten.sum(_reshape_alias_70, [0], True); _reshape_alias_70 = None\0A view_201 = torch.ops.aten.view(sum_23, [1536]); sum_23 = None\0A detach_63 = torch.ops.aten.detach(view_201); view_201 = None\0A detach_64 = torch.ops.aten.detach(detach_63); detach_63 = None\0A _reshape_alias_71 = torch.ops.aten._reshape_alias(mm_18, [1, 128, 384], [49152, 384, 1]); mm_18 = None\0A add_41 = torch.ops.aten.add(mul_19, _reshape_alias_71); mul_19 = _reshape_alias_71 = None\0A t_113 = torch.ops.aten.t(t_112); t_112 = None\0A detach_65 = torch.ops.aten.detach(t_113); t_113 = None\0A detach_66 = torch.ops.aten.detach(detach_65); detach_65 = None\0A to_27 = torch.ops.aten.to(add_41, 6)\0A to_28 = torch.ops.aten.to(add_32, 6)\0A to_29 = torch.ops.aten.to(getitem_64, 6)\0A to_30 = torch.ops.aten.to(getitem_65, 6)\0A to_31 = torch.ops.aten.to(params_39, 6)\0A to_32 = torch.ops.aten.to(params_38, 6)\0A sub_9 = torch.ops.aten.sub(add_32, getitem_64); add_32 = getitem_64 = None\0A mul_21 = torch.ops.aten.mul(sub_9, getitem_65); sub_9 = None\0A mul_22 = torch.ops.aten.mul(add_41, params_39)\0A mul_23 = torch.ops.aten.mul(mul_22, 384)\0A sum_24 = torch.ops.aten.sum(mul_22, [2], True)\0A mul_24 = torch.ops.aten.mul(mul_22, mul_21); mul_22 = None\0A sum_25 = torch.ops.aten.sum(mul_24, [2], True); mul_24 = None\0A mul_25 = torch.ops.aten.mul(mul_21, sum_25); sum_25 = None\0A sub_10 = torch.ops.aten.sub(mul_23, sum_24); mul_23 = sum_24 = None\0A sub_11 = torch.ops.aten.sub(sub_10, mul_25); sub_10 = mul_25 = None\0A div_16 = torch.ops.aten.div(getitem_65, 384); getitem_65 = None\0A mul_26 = torch.ops.aten.mul(div_16, sub_11); div_16 = sub_11 = None\0A mul_27 = torch.ops.aten.mul(add_41, mul_21); mul_21 = None\0A sum_26 = torch.ops.aten.sum(mul_27, [0, 1]); mul_27 = None\0A sum_27 = torch.ops.aten.sum(add_41, [0, 1]); add_41 = None\0A to_33 = torch.ops.aten.to(mul_26, 6)\0A to_34 = torch.ops.aten.to(sum_26, 6)\0A to_35 = torch.ops.aten.to(sum_27, 6)\0A detach_67 = torch.ops.aten.detach(sum_26); sum_26 = None\0A detach_68 = torch.ops.aten.detach(detach_67); detach_67 = None\0A detach_69 = torch.ops.aten.detach(sum_27); sum_27 = None\0A detach_70 = torch.ops.aten.detach(detach_69); detach_69 = None\0A _reshape_alias_72 = torch.ops.aten._reshape_alias(mul_26, [128, 384], [384, 1])\0A t_114 = torch.ops.aten.t(t_63); t_63 = None\0A mm_20 = torch.ops.aten.mm(_reshape_alias_72, t_114); t_114 = None\0A t_115 = torch.ops.aten.t(_reshape_alias_72)\0A mm_21 = torch.ops.aten.mm(t_115, view_170); t_115 = view_170 = None\0A t_116 = torch.ops.aten.t(mm_21); mm_21 = None\0A sum_28 = torch.ops.aten.sum(_reshape_alias_72, [0], True); _reshape_alias_72 = None\0A view_202 = torch.ops.aten.view(sum_28, [384]); sum_28 = None\0A detach_71 = torch.ops.aten.detach(view_202); view_202 = None\0A detach_72 = torch.ops.aten.detach(detach_71); detach_71 = None\0A _reshape_alias_73 = torch.ops.aten._reshape_alias(mm_20, [1, 128, 384], [49152, 384, 1]); mm_20 = None\0A t_117 = torch.ops.aten.t(t_116); t_116 = None\0A detach_73 = torch.ops.aten.detach(t_117); t_117 = None\0A detach_74 = torch.ops.aten.detach(detach_73); detach_73 = None\0A _reshape_alias_74 = torch.ops.aten._reshape_alias(_reshape_alias_73, [1, 128, 12, 32], [49152, 384, 32, 1]); _reshape_alias_73 = None\0A permute_52 = torch.ops.aten.permute(_reshape_alias_74, [0, 2, 1, 3]); _reshape_alias_74 = None\0A _reshape_alias_75 = torch.ops.aten._reshape_alias(permute_52, [12, 128, 32], [32, 384, 1]); permute_52 = None\0A transpose_17 = torch.ops.aten.transpose(_reshape_alias_42, 1, 2); _reshape_alias_42 = None\0A bmm_28 = torch.ops.aten.bmm(transpose_17, _reshape_alias_75); transpose_17 = None\0A transpose_18 = torch.ops.aten.transpose(_reshape_alias_43, 1, 2); _reshape_alias_43 = None\0A bmm_29 = torch.ops.aten.bmm(_reshape_alias_75, transpose_18); _reshape_alias_75 = transpose_18 = None\0A _reshape_alias_76 = torch.ops.aten._reshape_alias(bmm_28, [1, 12, 128, 32], [49152, 4096, 32, 1]); bmm_28 = None\0A _reshape_alias_77 = torch.ops.aten._reshape_alias(bmm_29, [1, 12, 128, 128], [196608, 16384, 128, 1]); bmm_29 = None\0A detach_75 = torch.ops.aten.detach(detach_10); detach_10 = None\0A _softmax_backward_data_1 = torch.ops.aten._softmax_backward_data(_reshape_alias_77, detach_75, -1, 6); _reshape_alias_77 = detach_75 = None\0A div_17 = torch.ops.aten.div(_softmax_backward_data_1, 5.656854249492381); _softmax_backward_data_1 = None\0A _reshape_alias_78 = torch.ops.aten._reshape_alias(div_17, [12, 128, 128], [16384, 128, 1]); div_17 = None\0A transpose_19 = torch.ops.aten.transpose(_reshape_alias_40, 1, 2); _reshape_alias_40 = None\0A bmm_30 = torch.ops.aten.bmm(transpose_19, _reshape_alias_78); transpose_19 = None\0A transpose_20 = torch.ops.aten.transpose(_reshape_alias_41, 1, 2); _reshape_alias_41 = None\0A bmm_31 = torch.ops.aten.bmm(_reshape_alias_78, transpose_20); _reshape_alias_78 = transpose_20 = None\0A _reshape_alias_79 = torch.ops.aten._reshape_alias(bmm_30, [1, 12, 32, 128], [49152, 4096, 128, 1]); bmm_30 = None\0A _reshape_alias_80 = torch.ops.aten._reshape_alias(bmm_31, [1, 12, 128, 32], [49152, 4096, 32, 1]); bmm_31 = None\0A transpose_21 = torch.ops.aten.transpose(_reshape_alias_79, -1, -2); _reshape_alias_79 = None\0A permute_53 = torch.ops.aten.permute(_reshape_alias_80, [0, 2, 1, 3]); _reshape_alias_80 = None\0A clone_14 = torch.ops.aten.clone(permute_53, memory_format = 0); permute_53 = None\0A _unsafe_view_26 = torch.ops.aten._unsafe_view(clone_14, [1, 128, 384]); clone_14 = None\0A permute_54 = torch.ops.aten.permute(_reshape_alias_76, [0, 2, 1, 3]); _reshape_alias_76 = None\0A clone_15 = torch.ops.aten.clone(permute_54, memory_format = 0); permute_54 = None\0A _unsafe_view_27 = torch.ops.aten._unsafe_view(clone_15, [1, 128, 384]); clone_15 = None\0A _reshape_alias_81 = torch.ops.aten._reshape_alias(_unsafe_view_27, [128, 384], [384, 1]); _unsafe_view_27 = None\0A t_118 = torch.ops.aten.t(t_62); t_62 = None\0A mm_22 = torch.ops.aten.mm(_reshape_alias_81, t_118); t_118 = None\0A t_119 = torch.ops.aten.t(_reshape_alias_81)\0A mm_23 = torch.ops.aten.mm(t_119, view_165); t_119 = view_165 = None\0A t_120 = torch.ops.aten.t(mm_23); mm_23 = None\0A sum_29 = torch.ops.aten.sum(_reshape_alias_81, [0], True); _reshape_alias_81 = None\0A view_203 = torch.ops.aten.view(sum_29, [384]); sum_29 = None\0A detach_76 = torch.ops.aten.detach(view_203); view_203 = None\0A detach_77 = torch.ops.aten.detach(detach_76); detach_76 = None\0A _reshape_alias_82 = torch.ops.aten._reshape_alias(mm_22, [1, 128, 384], [49152, 384, 1]); mm_22 = None\0A add_42 = torch.ops.aten.add(mul_26, _reshape_alias_82); mul_26 = _reshape_alias_82 = None\0A t_121 = torch.ops.aten.t(t_120); t_120 = None\0A detach_78 = torch.ops.aten.detach(t_121); t_121 = None\0A detach_79 = torch.ops.aten.detach(detach_78); detach_78 = None\0A permute_55 = torch.ops.aten.permute(transpose_21, [0, 2, 1, 3]); transpose_21 = None\0A _reshape_alias_83 = torch.ops.aten._reshape_alias(permute_55, [1, 128, 384], [128, 1, 128]); permute_55 = None\0A _reshape_alias_84 = torch.ops.aten._reshape_alias(_reshape_alias_83, [128, 384], [1, 128]); _reshape_alias_83 = None\0A t_122 = torch.ops.aten.t(t_61); t_61 = None\0A mm_24 = torch.ops.aten.mm(_reshape_alias_84, t_122); t_122 = None\0A t_123 = torch.ops.aten.t(_reshape_alias_84)\0A mm_25 = torch.ops.aten.mm(t_123, view_162); t_123 = view_162 = None\0A t_124 = torch.ops.aten.t(mm_25); mm_25 = None\0A sum_30 = torch.ops.aten.sum(_reshape_alias_84, [0], True); _reshape_alias_84 = None\0A view_204 = torch.ops.aten.view(sum_30, [384]); sum_30 = None\0A detach_80 = torch.ops.aten.detach(view_204); view_204 = None\0A detach_81 = torch.ops.aten.detach(detach_80); detach_80 = None\0A _reshape_alias_85 = torch.ops.aten._reshape_alias(mm_24, [1, 128, 384], [49152, 384, 1]); mm_24 = None\0A add_43 = torch.ops.aten.add(add_42, _reshape_alias_85); add_42 = _reshape_alias_85 = None\0A t_125 = torch.ops.aten.t(t_124); t_124 = None\0A detach_82 = torch.ops.aten.detach(t_125); t_125 = None\0A detach_83 = torch.ops.aten.detach(detach_82); detach_82 = None\0A _reshape_alias_86 = torch.ops.aten._reshape_alias(_unsafe_view_26, [128, 384], [384, 1]); _unsafe_view_26 = None\0A t_126 = torch.ops.aten.t(t_60); t_60 = None\0A mm_26 = torch.ops.aten.mm(_reshape_alias_86, t_126); t_126 = None\0A t_127 = torch.ops.aten.t(_reshape_alias_86)\0A mm_27 = torch.ops.aten.mm(t_127, view_160); t_127 = view_160 = None\0A t_128 = torch.ops.aten.t(mm_27); mm_27 = None\0A sum_31 = torch.ops.aten.sum(_reshape_alias_86, [0], True); _reshape_alias_86 = None\0A view_205 = torch.ops.aten.view(sum_31, [384]); sum_31 = None\0A detach_84 = torch.ops.aten.detach(view_205); view_205 = None\0A detach_85 = torch.ops.aten.detach(detach_84); detach_84 = None\0A _reshape_alias_87 = torch.ops.aten._reshape_alias(mm_26, [1, 128, 384], [49152, 384, 1]); mm_26 = None\0A add_44 = torch.ops.aten.add(add_43, _reshape_alias_87); add_43 = _reshape_alias_87 = None\0A t_129 = torch.ops.aten.t(t_128); t_128 = None\0A detach_86 = torch.ops.aten.detach(t_129); t_129 = None\0A detach_87 = torch.ops.aten.detach(detach_86); detach_86 = None\0A to_36 = torch.ops.aten.to(add_44, 6)\0A to_37 = torch.ops.aten.to(add_30, 6)\0A to_38 = torch.ops.aten.to(getitem_61, 6)\0A to_39 = torch.ops.aten.to(getitem_62, 6)\0A to_40 = torch.ops.aten.to(params_195, 6)\0A to_41 = torch.ops.aten.to(params_194, 6)\0A sub_12 = torch.ops.aten.sub(add_30, getitem_61); add_30 = getitem_61 = None\0A mul_28 = torch.ops.aten.mul(sub_12, getitem_62); sub_12 = None\0A mul_29 = torch.ops.aten.mul(add_44, params_195)\0A mul_30 = torch.ops.aten.mul(mul_29, 384)\0A sum_32 = torch.ops.aten.sum(mul_29, [2], True)\0A mul_31 = torch.ops.aten.mul(mul_29, mul_28); mul_29 = None\0A sum_33 = torch.ops.aten.sum(mul_31, [2], True); mul_31 = None\0A mul_32 = torch.ops.aten.mul(mul_28, sum_33); sum_33 = None\0A sub_13 = torch.ops.aten.sub(mul_30, sum_32); mul_30 = sum_32 = None\0A sub_14 = torch.ops.aten.sub(sub_13, mul_32); sub_13 = mul_32 = None\0A div_18 = torch.ops.aten.div(getitem_62, 384); getitem_62 = None\0A mul_33 = torch.ops.aten.mul(div_18, sub_14); div_18 = sub_14 = None\0A mul_34 = torch.ops.aten.mul(add_44, mul_28); mul_28 = None\0A sum_34 = torch.ops.aten.sum(mul_34, [0, 1]); mul_34 = None\0A sum_35 = torch.ops.aten.sum(add_44, [0, 1]); add_44 = None\0A to_42 = torch.ops.aten.to(mul_33, 6)\0A to_43 = torch.ops.aten.to(sum_34, 6)\0A to_44 = torch.ops.aten.to(sum_35, 6)\0A detach_88 = torch.ops.aten.detach(sum_34); sum_34 = None\0A detach_89 = torch.ops.aten.detach(detach_88); detach_88 = None\0A detach_90 = torch.ops.aten.detach(sum_35); sum_35 = None\0A detach_91 = torch.ops.aten.detach(detach_90); detach_90 = None\0A _reshape_alias_88 = torch.ops.aten._reshape_alias(mul_33, [128, 384], [384, 1])\0A t_130 = torch.ops.aten.t(t_59); t_59 = None\0A mm_28 = torch.ops.aten.mm(_reshape_alias_88, t_130); t_130 = None\0A t_131 = torch.ops.aten.t(_reshape_alias_88)\0A mm_29 = torch.ops.aten.mm(t_131, view_158); t_131 = view_158 = None\0A t_132 = torch.ops.aten.t(mm_29); mm_29 = None\0A sum_36 = torch.ops.aten.sum(_reshape_alias_88, [0], True); _reshape_alias_88 = None\0A view_206 = torch.ops.aten.view(sum_36, [384]); sum_36 = None\0A detach_92 = torch.ops.aten.detach(view_206); view_206 = None\0A detach_93 = torch.ops.aten.detach(detach_92); detach_92 = None\0A _reshape_alias_89 = torch.ops.aten._reshape_alias(mm_28, [1, 128, 1536], [196608, 1536, 1]); mm_28 = None\0A t_133 = torch.ops.aten.t(t_132); t_132 = None\0A detach_94 = torch.ops.aten.detach(t_133); t_133 = None\0A detach_95 = torch.ops.aten.detach(detach_94); detach_94 = None\0A gelu_backward_2 = torch.ops.aten.gelu_backward(_reshape_alias_89, view_157); _reshape_alias_89 = view_157 = None\0A _reshape_alias_90 = torch.ops.aten._reshape_alias(gelu_backward_2, [128, 1536], [1536, 1]); gelu_backward_2 = None\0A t_134 = torch.ops.aten.t(t_58); t_58 = None\0A mm_30 = torch.ops.aten.mm(_reshape_alias_90, t_134); t_134 = None\0A t_135 = torch.ops.aten.t(_reshape_alias_90)\0A mm_31 = torch.ops.aten.mm(t_135, view_156); t_135 = view_156 = None\0A t_136 = torch.ops.aten.t(mm_31); mm_31 = None\0A sum_37 = torch.ops.aten.sum(_reshape_alias_90, [0], True); _reshape_alias_90 = None\0A view_207 = torch.ops.aten.view(sum_37, [1536]); sum_37 = None\0A detach_96 = torch.ops.aten.detach(view_207); view_207 = None\0A detach_97 = torch.ops.aten.detach(detach_96); detach_96 = None\0A _reshape_alias_91 = torch.ops.aten._reshape_alias(mm_30, [1, 128, 384], [49152, 384, 1]); mm_30 = None\0A add_45 = torch.ops.aten.add(mul_33, _reshape_alias_91); mul_33 = _reshape_alias_91 = None\0A t_137 = torch.ops.aten.t(t_136); t_136 = None\0A detach_98 = torch.ops.aten.detach(t_137); t_137 = None\0A detach_99 = torch.ops.aten.detach(detach_98); detach_98 = None\0A to_45 = torch.ops.aten.to(add_45, 6)\0A to_46 = torch.ops.aten.to(add_29, 6)\0A to_47 = torch.ops.aten.to(getitem_58, 6)\0A to_48 = torch.ops.aten.to(getitem_59, 6)\0A to_49 = torch.ops.aten.to(params_183, 6)\0A to_50 = torch.ops.aten.to(params_182, 6)\0A sub_15 = torch.ops.aten.sub(add_29, getitem_58); add_29 = getitem_58 = None\0A mul_35 = torch.ops.aten.mul(sub_15, getitem_59); sub_15 = None\0A mul_36 = torch.ops.aten.mul(add_45, params_183)\0A mul_37 = torch.ops.aten.mul(mul_36, 384)\0A sum_38 = torch.ops.aten.sum(mul_36, [2], True)\0A mul_38 = torch.ops.aten.mul(mul_36, mul_35); mul_36 = None\0A sum_39 = torch.ops.aten.sum(mul_38, [2], True); mul_38 = None\0A mul_39 = torch.ops.aten.mul(mul_35, sum_39); sum_39 = None\0A sub_16 = torch.ops.aten.sub(mul_37, sum_38); mul_37 = sum_38 = None\0A sub_17 = torch.ops.aten.sub(sub_16, mul_39); sub_16 = mul_39 = None\0A div_19 = torch.ops.aten.div(getitem_59, 384); getitem_59 = None\0A mul_40 = torch.ops.aten.mul(div_19, sub_17); div_19 = sub_17 = None\0A mul_41 = torch.ops.aten.mul(add_45, mul_35); mul_35 = None\0A sum_40 = torch.ops.aten.sum(mul_41, [0, 1]); mul_41 = None\0A sum_41 = torch.ops.aten.sum(add_45, [0, 1]); add_45 = None\0A to_51 = torch.ops.aten.to(mul_40, 6)\0A to_52 = torch.ops.aten.to(sum_40, 6)\0A to_53 = torch.ops.aten.to(sum_41, 6)\0A detach_100 = torch.ops.aten.detach(sum_40); sum_40 = None\0A detach_101 = torch.ops.aten.detach(detach_100); detach_100 = None\0A detach_102 = torch.ops.aten.detach(sum_41); sum_41 = None\0A detach_103 = torch.ops.aten.detach(detach_102); detach_102 = None\0A _reshape_alias_92 = torch.ops.aten._reshape_alias(mul_40, [128, 384], [384, 1])\0A t_138 = torch.ops.aten.t(t_57); t_57 = None\0A mm_32 = torch.ops.aten.mm(_reshape_alias_92, t_138); t_138 = None\0A t_139 = torch.ops.aten.t(_reshape_alias_92)\0A mm_33 = torch.ops.aten.mm(t_139, view_154); t_139 = view_154 = None\0A t_140 = torch.ops.aten.t(mm_33); mm_33 = None\0A sum_42 = torch.ops.aten.sum(_reshape_alias_92, [0], True); _reshape_alias_92 = None\0A view_208 = torch.ops.aten.view(sum_42, [384]); sum_42 = None\0A detach_104 = torch.ops.aten.detach(view_208); view_208 = None\0A detach_105 = torch.ops.aten.detach(detach_104); detach_104 = None\0A _reshape_alias_93 = torch.ops.aten._reshape_alias(mm_32, [1, 128, 384], [49152, 384, 1]); mm_32 = None\0A t_141 = torch.ops.aten.t(t_140); t_140 = None\0A detach_106 = torch.ops.aten.detach(t_141); t_141 = None\0A detach_107 = torch.ops.aten.detach(detach_106); detach_106 = None\0A _reshape_alias_94 = torch.ops.aten._reshape_alias(_reshape_alias_93, [1, 128, 12, 32], [49152, 384, 32, 1]); _reshape_alias_93 = None\0A permute_56 = torch.ops.aten.permute(_reshape_alias_94, [0, 2, 1, 3]); _reshape_alias_94 = None\0A _reshape_alias_95 = torch.ops.aten._reshape_alias(permute_56, [12, 128, 32], [32, 384, 1]); permute_56 = None\0A transpose_22 = torch.ops.aten.transpose(_reshape_alias_38, 1, 2); _reshape_alias_38 = None\0A bmm_32 = torch.ops.aten.bmm(transpose_22, _reshape_alias_95); transpose_22 = None\0A transpose_23 = torch.ops.aten.transpose(_reshape_alias_39, 1, 2); _reshape_alias_39 = None\0A bmm_33 = torch.ops.aten.bmm(_reshape_alias_95, transpose_23); _reshape_alias_95 = transpose_23 = None\0A _reshape_alias_96 = torch.ops.aten._reshape_alias(bmm_32, [1, 12, 128, 32], [49152, 4096, 32, 1]); bmm_32 = None\0A _reshape_alias_97 = torch.ops.aten._reshape_alias(bmm_33, [1, 12, 128, 128], [196608, 16384, 128, 1]); bmm_33 = None\0A detach_108 = torch.ops.aten.detach(detach_9); detach_9 = None\0A _softmax_backward_data_2 = torch.ops.aten._softmax_backward_data(_reshape_alias_97, detach_108, -1, 6); _reshape_alias_97 = detach_108 = None\0A div_20 = torch.ops.aten.div(_softmax_backward_data_2, 5.656854249492381); _softmax_backward_data_2 = None\0A _reshape_alias_98 = torch.ops.aten._reshape_alias(div_20, [12, 128, 128], [16384, 128, 1]); div_20 = None\0A transpose_24 = torch.ops.aten.transpose(_reshape_alias_36, 1, 2); _reshape_alias_36 = None\0A bmm_34 = torch.ops.aten.bmm(transpose_24, _reshape_alias_98); transpose_24 = None\0A transpose_25 = torch.ops.aten.transpose(_reshape_alias_37, 1, 2); _reshape_alias_37 = None\0A bmm_35 = torch.ops.aten.bmm(_reshape_alias_98, transpose_25); _reshape_alias_98 = transpose_25 = None\0A _reshape_alias_99 = torch.ops.aten._reshape_alias(bmm_34, [1, 12, 32, 128], [49152, 4096, 128, 1]); bmm_34 = None\0A _reshape_alias_100 = torch.ops.aten._reshape_alias(bmm_35, [1, 12, 128, 32], [49152, 4096, 32, 1]); bmm_35 = None\0A transpose_26 = torch.ops.aten.transpose(_reshape_alias_99, -1, -2); _reshape_alias_99 = None\0A permute_57 = torch.ops.aten.permute(_reshape_alias_100, [0, 2, 1, 3]); _reshape_alias_100 = None\0A clone_16 = torch.ops.aten.clone(permute_57, memory_format = 0); permute_57 = None\0A _unsafe_view_28 = torch.ops.aten._unsafe_view(clone_16, [1, 128, 384]); clone_16 = None\0A permute_58 = torch.ops.aten.permute(_reshape_alias_96, [0, 2, 1, 3]); _reshape_alias_96 = None\0A clone_17 = torch.ops.aten.clone(permute_58, memory_format = 0); permute_58 = None\0A _unsafe_view_29 = torch.ops.aten._unsafe_view(clone_17, [1, 128, 384]); clone_17 = None\0A _reshape_alias_101 = torch.ops.aten._reshape_alias(_unsafe_view_29, [128, 384], [384, 1]); _unsafe_view_29 = None\0A t_142 = torch.ops.aten.t(t_56); t_56 = None\0A mm_34 = torch.ops.aten.mm(_reshape_alias_101, t_142); t_142 = None\0A t_143 = torch.ops.aten.t(_reshape_alias_101)\0A mm_35 = torch.ops.aten.mm(t_143, view_149); t_143 = view_149 = None\0A t_144 = torch.ops.aten.t(mm_35); mm_35 = None\0A sum_43 = torch.ops.aten.sum(_reshape_alias_101, [0], True); _reshape_alias_101 = None\0A view_209 = torch.ops.aten.view(sum_43, [384]); sum_43 = None\0A detach_109 = torch.ops.aten.detach(view_209); view_209 = None\0A detach_110 = torch.ops.aten.detach(detach_109); detach_109 = None\0A _reshape_alias_102 = torch.ops.aten._reshape_alias(mm_34, [1, 128, 384], [49152, 384, 1]); mm_34 = None\0A add_46 = torch.ops.aten.add(mul_40, _reshape_alias_102); mul_40 = _reshape_alias_102 = None\0A t_145 = torch.ops.aten.t(t_144); t_144 = None\0A detach_111 = torch.ops.aten.detach(t_145); t_145 = None\0A detach_112 = torch.ops.aten.detach(detach_111); detach_111 = None\0A permute_59 = torch.ops.aten.permute(transpose_26, [0, 2, 1, 3]); transpose_26 = None\0A _reshape_alias_103 = torch.ops.aten._reshape_alias(permute_59, [1, 128, 384], [128, 1, 128]); permute_59 = None\0A _reshape_alias_104 = torch.ops.aten._reshape_alias(_reshape_alias_103, [128, 384], [1, 128]); _reshape_alias_103 = None\0A t_146 = torch.ops.aten.t(t_55); t_55 = None\0A mm_36 = torch.ops.aten.mm(_reshape_alias_104, t_146); t_146 = None\0A t_147 = torch.ops.aten.t(_reshape_alias_104)\0A mm_37 = torch.ops.aten.mm(t_147, view_146); t_147 = view_146 = None\0A t_148 = torch.ops.aten.t(mm_37); mm_37 = None\0A sum_44 = torch.ops.aten.sum(_reshape_alias_104, [0], True); _reshape_alias_104 = None\0A view_210 = torch.ops.aten.view(sum_44, [384]); sum_44 = None\0A detach_113 = torch.ops.aten.detach(view_210); view_210 = None\0A detach_114 = torch.ops.aten.detach(detach_113); detach_113 = None\0A _reshape_alias_105 = torch.ops.aten._reshape_alias(mm_36, [1, 128, 384], [49152, 384, 1]); mm_36 = None\0A add_47 = torch.ops.aten.add(add_46, _reshape_alias_105); add_46 = _reshape_alias_105 = None\0A t_149 = torch.ops.aten.t(t_148); t_148 = None\0A detach_115 = torch.ops.aten.detach(t_149); t_149 = None\0A detach_116 = torch.ops.aten.detach(detach_115); detach_115 = None\0A _reshape_alias_106 = torch.ops.aten._reshape_alias(_unsafe_view_28, [128, 384], [384, 1]); _unsafe_view_28 = None\0A t_150 = torch.ops.aten.t(t_54); t_54 = None\0A mm_38 = torch.ops.aten.mm(_reshape_alias_106, t_150); t_150 = None\0A t_151 = torch.ops.aten.t(_reshape_alias_106)\0A mm_39 = torch.ops.aten.mm(t_151, view_144); t_151 = view_144 = None\0A t_152 = torch.ops.aten.t(mm_39); mm_39 = None\0A sum_45 = torch.ops.aten.sum(_reshape_alias_106, [0], True); _reshape_alias_106 = None\0A view_211 = torch.ops.aten.view(sum_45, [384]); sum_45 = None\0A detach_117 = torch.ops.aten.detach(view_211); view_211 = None\0A detach_118 = torch.ops.aten.detach(detach_117); detach_117 = None\0A _reshape_alias_107 = torch.ops.aten._reshape_alias(mm_38, [1, 128, 384], [49152, 384, 1]); mm_38 = None\0A add_48 = torch.ops.aten.add(add_47, _reshape_alias_107); add_47 = _reshape_alias_107 = None\0A t_153 = torch.ops.aten.t(t_152); t_152 = None\0A detach_119 = torch.ops.aten.detach(t_153); t_153 = None\0A detach_120 = torch.ops.aten.detach(detach_119); detach_119 = None\0A to_54 = torch.ops.aten.to(add_48, 6)\0A to_55 = torch.ops.aten.to(add_27, 6)\0A to_56 = torch.ops.aten.to(getitem_55, 6)\0A to_57 = torch.ops.aten.to(getitem_56, 6)\0A to_58 = torch.ops.aten.to(params_179, 6)\0A to_59 = torch.ops.aten.to(params_178, 6)\0A sub_18 = torch.ops.aten.sub(add_27, getitem_55); add_27 = getitem_55 = None\0A mul_42 = torch.ops.aten.mul(sub_18, getitem_56); sub_18 = None\0A mul_43 = torch.ops.aten.mul(add_48, params_179)\0A mul_44 = torch.ops.aten.mul(mul_43, 384)\0A sum_46 = torch.ops.aten.sum(mul_43, [2], True)\0A mul_45 = torch.ops.aten.mul(mul_43, mul_42); mul_43 = None\0A sum_47 = torch.ops.aten.sum(mul_45, [2], True); mul_45 = None\0A mul_46 = torch.ops.aten.mul(mul_42, sum_47); sum_47 = None\0A sub_19 = torch.ops.aten.sub(mul_44, sum_46); mul_44 = sum_46 = None\0A sub_20 = torch.ops.aten.sub(sub_19, mul_46); sub_19 = mul_46 = None\0A div_21 = torch.ops.aten.div(getitem_56, 384); getitem_56 = None\0A mul_47 = torch.ops.aten.mul(div_21, sub_20); div_21 = sub_20 = None\0A mul_48 = torch.ops.aten.mul(add_48, mul_42); mul_42 = None\0A sum_48 = torch.ops.aten.sum(mul_48, [0, 1]); mul_48 = None\0A sum_49 = torch.ops.aten.sum(add_48, [0, 1]); add_48 = None\0A to_60 = torch.ops.aten.to(mul_47, 6)\0A to_61 = torch.ops.aten.to(sum_48, 6)\0A to_62 = torch.ops.aten.to(sum_49, 6)\0A detach_121 = torch.ops.aten.detach(sum_48); sum_48 = None\0A detach_122 = torch.ops.aten.detach(detach_121); detach_121 = None\0A detach_123 = torch.ops.aten.detach(sum_49); sum_49 = None\0A detach_124 = torch.ops.aten.detach(detach_123); detach_123 = None\0A _reshape_alias_108 = torch.ops.aten._reshape_alias(mul_47, [128, 384], [384, 1])\0A t_154 = torch.ops.aten.t(t_53); t_53 = None\0A mm_40 = torch.ops.aten.mm(_reshape_alias_108, t_154); t_154 = None\0A t_155 = torch.ops.aten.t(_reshape_alias_108)\0A mm_41 = torch.ops.aten.mm(t_155, view_142); t_155 = view_142 = None\0A t_156 = torch.ops.aten.t(mm_41); mm_41 = None\0A sum_50 = torch.ops.aten.sum(_reshape_alias_108, [0], True); _reshape_alias_108 = None\0A view_212 = torch.ops.aten.view(sum_50, [384]); sum_50 = None\0A detach_125 = torch.ops.aten.detach(view_212); view_212 = None\0A detach_126 = torch.ops.aten.detach(detach_125); detach_125 = None\0A _reshape_alias_109 = torch.ops.aten._reshape_alias(mm_40, [1, 128, 1536], [196608, 1536, 1]); mm_40 = None\0A t_157 = torch.ops.aten.t(t_156); t_156 = None\0A detach_127 = torch.ops.aten.detach(t_157); t_157 = None\0A detach_128 = torch.ops.aten.detach(detach_127); detach_127 = None\0A gelu_backward_3 = torch.ops.aten.gelu_backward(_reshape_alias_109, view_141); _reshape_alias_109 = view_141 = None\0A _reshape_alias_110 = torch.ops.aten._reshape_alias(gelu_backward_3, [128, 1536], [1536, 1]); gelu_backward_3 = None\0A t_158 = torch.ops.aten.t(t_52); t_52 = None\0A mm_42 = torch.ops.aten.mm(_reshape_alias_110, t_158); t_158 = None\0A t_159 = torch.ops.aten.t(_reshape_alias_110)\0A mm_43 = torch.ops.aten.mm(t_159, view_140); t_159 = view_140 = None\0A t_160 = torch.ops.aten.t(mm_43); mm_43 = None\0A sum_51 = torch.ops.aten.sum(_reshape_alias_110, [0], True); _reshape_alias_110 = None\0A view_213 = torch.ops.aten.view(sum_51, [1536]); sum_51 = None\0A detach_129 = torch.ops.aten.detach(view_213); view_213 = None\0A detach_130 = torch.ops.aten.detach(detach_129); detach_129 = None\0A _reshape_alias_111 = torch.ops.aten._reshape_alias(mm_42, [1, 128, 384], [49152, 384, 1]); mm_42 = None\0A add_49 = torch.ops.aten.add(mul_47, _reshape_alias_111); mul_47 = _reshape_alias_111 = None\0A t_161 = torch.ops.aten.t(t_160); t_160 = None\0A detach_131 = torch.ops.aten.detach(t_161); t_161 = None\0A detach_132 = torch.ops.aten.detach(detach_131); detach_131 = None\0A to_63 = torch.ops.aten.to(add_49, 6)\0A to_64 = torch.ops.aten.to(add_26, 6)\0A to_65 = torch.ops.aten.to(getitem_52, 6)\0A to_66 = torch.ops.aten.to(getitem_53, 6)\0A to_67 = torch.ops.aten.to(params_167, 6)\0A to_68 = torch.ops.aten.to(params_166, 6)\0A sub_21 = torch.ops.aten.sub(add_26, getitem_52); add_26 = getitem_52 = None\0A mul_49 = torch.ops.aten.mul(sub_21, getitem_53); sub_21 = None\0A mul_50 = torch.ops.aten.mul(add_49, params_167)\0A mul_51 = torch.ops.aten.mul(mul_50, 384)\0A sum_52 = torch.ops.aten.sum(mul_50, [2], True)\0A mul_52 = torch.ops.aten.mul(mul_50, mul_49); mul_50 = None\0A sum_53 = torch.ops.aten.sum(mul_52, [2], True); mul_52 = None\0A mul_53 = torch.ops.aten.mul(mul_49, sum_53); sum_53 = None\0A sub_22 = torch.ops.aten.sub(mul_51, sum_52); mul_51 = sum_52 = None\0A sub_23 = torch.ops.aten.sub(sub_22, mul_53); sub_22 = mul_53 = None\0A div_22 = torch.ops.aten.div(getitem_53, 384); getitem_53 = None\0A mul_54 = torch.ops.aten.mul(div_22, sub_23); div_22 = sub_23 = None\0A mul_55 = torch.ops.aten.mul(add_49, mul_49); mul_49 = None\0A sum_54 = torch.ops.aten.sum(mul_55, [0, 1]); mul_55 = None\0A sum_55 = torch.ops.aten.sum(add_49, [0, 1]); add_49 = None\0A to_69 = torch.ops.aten.to(mul_54, 6)\0A to_70 = torch.ops.aten.to(sum_54, 6)\0A to_71 = torch.ops.aten.to(sum_55, 6)\0A detach_133 = torch.ops.aten.detach(sum_54); sum_54 = None\0A detach_134 = torch.ops.aten.detach(detach_133); detach_133 = None\0A detach_135 = torch.ops.aten.detach(sum_55); sum_55 = None\0A detach_136 = torch.ops.aten.detach(detach_135); detach_135 = None\0A _reshape_alias_112 = torch.ops.aten._reshape_alias(mul_54, [128, 384], [384, 1])\0A t_162 = torch.ops.aten.t(t_51); t_51 = None\0A mm_44 = torch.ops.aten.mm(_reshape_alias_112, t_162); t_162 = None\0A t_163 = torch.ops.aten.t(_reshape_alias_112)\0A mm_45 = torch.ops.aten.mm(t_163, view_138); t_163 = view_138 = None\0A t_164 = torch.ops.aten.t(mm_45); mm_45 = None\0A sum_56 = torch.ops.aten.sum(_reshape_alias_112, [0], True); _reshape_alias_112 = None\0A view_214 = torch.ops.aten.view(sum_56, [384]); sum_56 = None\0A detach_137 = torch.ops.aten.detach(view_214); view_214 = None\0A detach_138 = torch.ops.aten.detach(detach_137); detach_137 = None\0A _reshape_alias_113 = torch.ops.aten._reshape_alias(mm_44, [1, 128, 384], [49152, 384, 1]); mm_44 = None\0A t_165 = torch.ops.aten.t(t_164); t_164 = None\0A detach_139 = torch.ops.aten.detach(t_165); t_165 = None\0A detach_140 = torch.ops.aten.detach(detach_139); detach_139 = None\0A _reshape_alias_114 = torch.ops.aten._reshape_alias(_reshape_alias_113, [1, 128, 12, 32], [49152, 384, 32, 1]); _reshape_alias_113 = None\0A permute_60 = torch.ops.aten.permute(_reshape_alias_114, [0, 2, 1, 3]); _reshape_alias_114 = None\0A _reshape_alias_115 = torch.ops.aten._reshape_alias(permute_60, [12, 128, 32], [32, 384, 1]); permute_60 = None\0A transpose_27 = torch.ops.aten.transpose(_reshape_alias_34, 1, 2); _reshape_alias_34 = None\0A bmm_36 = torch.ops.aten.bmm(transpose_27, _reshape_alias_115); transpose_27 = None\0A transpose_28 = torch.ops.aten.transpose(_reshape_alias_35, 1, 2); _reshape_alias_35 = None\0A bmm_37 = torch.ops.aten.bmm(_reshape_alias_115, transpose_28); _reshape_alias_115 = transpose_28 = None\0A _reshape_alias_116 = torch.ops.aten._reshape_alias(bmm_36, [1, 12, 128, 32], [49152, 4096, 32, 1]); bmm_36 = None\0A _reshape_alias_117 = torch.ops.aten._reshape_alias(bmm_37, [1, 12, 128, 128], [196608, 16384, 128, 1]); bmm_37 = None\0A detach_141 = torch.ops.aten.detach(detach_8); detach_8 = None\0A _softmax_backward_data_3 = torch.ops.aten._softmax_backward_data(_reshape_alias_117, detach_141, -1, 6); _reshape_alias_117 = detach_141 = None\0A div_23 = torch.ops.aten.div(_softmax_backward_data_3, 5.656854249492381); _softmax_backward_data_3 = None\0A _reshape_alias_118 = torch.ops.aten._reshape_alias(div_23, [12, 128, 128], [16384, 128, 1]); div_23 = None\0A transpose_29 = torch.ops.aten.transpose(_reshape_alias_32, 1, 2); _reshape_alias_32 = None\0A bmm_38 = torch.ops.aten.bmm(transpose_29, _reshape_alias_118); transpose_29 = None\0A transpose_30 = torch.ops.aten.transpose(_reshape_alias_33, 1, 2); _reshape_alias_33 = None\0A bmm_39 = torch.ops.aten.bmm(_reshape_alias_118, transpose_30); _reshape_alias_118 = transpose_30 = None\0A _reshape_alias_119 = torch.ops.aten._reshape_alias(bmm_38, [1, 12, 32, 128], [49152, 4096, 128, 1]); bmm_38 = None\0A _reshape_alias_120 = torch.ops.aten._reshape_alias(bmm_39, [1, 12, 128, 32], [49152, 4096, 32, 1]); bmm_39 = None\0A transpose_31 = torch.ops.aten.transpose(_reshape_alias_119, -1, -2); _reshape_alias_119 = None\0A permute_61 = torch.ops.aten.permute(_reshape_alias_120, [0, 2, 1, 3]); _reshape_alias_120 = None\0A clone_18 = torch.ops.aten.clone(permute_61, memory_format = 0); permute_61 = None\0A _unsafe_view_30 = torch.ops.aten._unsafe_view(clone_18, [1, 128, 384]); clone_18 = None\0A permute_62 = torch.ops.aten.permute(_reshape_alias_116, [0, 2, 1, 3]); _reshape_alias_116 = None\0A clone_19 = torch.ops.aten.clone(permute_62, memory_format = 0); permute_62 = None\0A _unsafe_view_31 = torch.ops.aten._unsafe_view(clone_19, [1, 128, 384]); clone_19 = None\0A _reshape_alias_121 = torch.ops.aten._reshape_alias(_unsafe_view_31, [128, 384], [384, 1]); _unsafe_view_31 = None\0A t_166 = torch.ops.aten.t(t_50); t_50 = None\0A mm_46 = torch.ops.aten.mm(_reshape_alias_121, t_166); t_166 = None\0A t_167 = torch.ops.aten.t(_reshape_alias_121)\0A mm_47 = torch.ops.aten.mm(t_167, view_133); t_167 = view_133 = None\0A t_168 = torch.ops.aten.t(mm_47); mm_47 = None\0A sum_57 = torch.ops.aten.sum(_reshape_alias_121, [0], True); _reshape_alias_121 = None\0A view_215 = torch.ops.aten.view(sum_57, [384]); sum_57 = None\0A detach_142 = torch.ops.aten.detach(view_215); view_215 = None\0A detach_143 = torch.ops.aten.detach(detach_142); detach_142 = None\0A _reshape_alias_122 = torch.ops.aten._reshape_alias(mm_46, [1, 128, 384], [49152, 384, 1]); mm_46 = None\0A add_50 = torch.ops.aten.add(mul_54, _reshape_alias_122); mul_54 = _reshape_alias_122 = None\0A t_169 = torch.ops.aten.t(t_168); t_168 = None\0A detach_144 = torch.ops.aten.detach(t_169); t_169 = None\0A detach_145 = torch.ops.aten.detach(detach_144); detach_144 = None\0A permute_63 = torch.ops.aten.permute(transpose_31, [0, 2, 1, 3]); transpose_31 = None\0A _reshape_alias_123 = torch.ops.aten._reshape_alias(permute_63, [1, 128, 384], [128, 1, 128]); permute_63 = None\0A _reshape_alias_124 = torch.ops.aten._reshape_alias(_reshape_alias_123, [128, 384], [1, 128]); _reshape_alias_123 = None\0A t_170 = torch.ops.aten.t(t_49); t_49 = None\0A mm_48 = torch.ops.aten.mm(_reshape_alias_124, t_170); t_170 = None\0A t_171 = torch.ops.aten.t(_reshape_alias_124)\0A mm_49 = torch.ops.aten.mm(t_171, view_130); t_171 = view_130 = None\0A t_172 = torch.ops.aten.t(mm_49); mm_49 = None\0A sum_58 = torch.ops.aten.sum(_reshape_alias_124, [0], True); _reshape_alias_124 = None\0A view_216 = torch.ops.aten.view(sum_58, [384]); sum_58 = None\0A detach_146 = torch.ops.aten.detach(view_216); view_216 = None\0A detach_147 = torch.ops.aten.detach(detach_146); detach_146 = None\0A _reshape_alias_125 = torch.ops.aten._reshape_alias(mm_48, [1, 128, 384], [49152, 384, 1]); mm_48 = None\0A add_51 = torch.ops.aten.add(add_50, _reshape_alias_125); add_50 = _reshape_alias_125 = None\0A t_173 = torch.ops.aten.t(t_172); t_172 = None\0A detach_148 = torch.ops.aten.detach(t_173); t_173 = None\0A detach_149 = torch.ops.aten.detach(detach_148); detach_148 = None\0A _reshape_alias_126 = torch.ops.aten._reshape_alias(_unsafe_view_30, [128, 384], [384, 1]); _unsafe_view_30 = None\0A t_174 = torch.ops.aten.t(t_48); t_48 = None\0A mm_50 = torch.ops.aten.mm(_reshape_alias_126, t_174); t_174 = None\0A t_175 = torch.ops.aten.t(_reshape_alias_126)\0A mm_51 = torch.ops.aten.mm(t_175, view_128); t_175 = view_128 = None\0A t_176 = torch.ops.aten.t(mm_51); mm_51 = None\0A sum_59 = torch.ops.aten.sum(_reshape_alias_126, [0], True); _reshape_alias_126 = None\0A view_217 = torch.ops.aten.view(sum_59, [384]); sum_59 = None\0A detach_150 = torch.ops.aten.detach(view_217); view_217 = None\0A detach_151 = torch.ops.aten.detach(detach_150); detach_150 = None\0A _reshape_alias_127 = torch.ops.aten._reshape_alias(mm_50, [1, 128, 384], [49152, 384, 1]); mm_50 = None\0A add_52 = torch.ops.aten.add(add_51, _reshape_alias_127); add_51 = _reshape_alias_127 = None\0A t_177 = torch.ops.aten.t(t_176); t_176 = None\0A detach_152 = torch.ops.aten.detach(t_177); t_177 = None\0A detach_153 = torch.ops.aten.detach(detach_152); detach_152 = None\0A to_72 = torch.ops.aten.to(add_52, 6)\0A to_73 = torch.ops.aten.to(add_24, 6)\0A to_74 = torch.ops.aten.to(getitem_49, 6)\0A to_75 = torch.ops.aten.to(getitem_50, 6)\0A to_76 = torch.ops.aten.to(params_163, 6)\0A to_77 = torch.ops.aten.to(params_162, 6)\0A sub_24 = torch.ops.aten.sub(add_24, getitem_49); add_24 = getitem_49 = None\0A mul_56 = torch.ops.aten.mul(sub_24, getitem_50); sub_24 = None\0A mul_57 = torch.ops.aten.mul(add_52, params_163)\0A mul_58 = torch.ops.aten.mul(mul_57, 384)\0A sum_60 = torch.ops.aten.sum(mul_57, [2], True)\0A mul_59 = torch.ops.aten.mul(mul_57, mul_56); mul_57 = None\0A sum_61 = torch.ops.aten.sum(mul_59, [2], True); mul_59 = None\0A mul_60 = torch.ops.aten.mul(mul_56, sum_61); sum_61 = None\0A sub_25 = torch.ops.aten.sub(mul_58, sum_60); mul_58 = sum_60 = None\0A sub_26 = torch.ops.aten.sub(sub_25, mul_60); sub_25 = mul_60 = None\0A div_24 = torch.ops.aten.div(getitem_50, 384); getitem_50 = None\0A mul_61 = torch.ops.aten.mul(div_24, sub_26); div_24 = sub_26 = None\0A mul_62 = torch.ops.aten.mul(add_52, mul_56); mul_56 = None\0A sum_62 = torch.ops.aten.sum(mul_62, [0, 1]); mul_62 = None\0A sum_63 = torch.ops.aten.sum(add_52, [0, 1]); add_52 = None\0A to_78 = torch.ops.aten.to(mul_61, 6)\0A to_79 = torch.ops.aten.to(sum_62, 6)\0A to_80 = torch.ops.aten.to(sum_63, 6)\0A detach_154 = torch.ops.aten.detach(sum_62); sum_62 = None\0A detach_155 = torch.ops.aten.detach(detach_154); detach_154 = None\0A detach_156 = torch.ops.aten.detach(sum_63); sum_63 = None\0A detach_157 = torch.ops.aten.detach(detach_156); detach_156 = None\0A _reshape_alias_128 = torch.ops.aten._reshape_alias(mul_61, [128, 384], [384, 1])\0A t_178 = torch.ops.aten.t(t_47); t_47 = None\0A mm_52 = torch.ops.aten.mm(_reshape_alias_128, t_178); t_178 = None\0A t_179 = torch.ops.aten.t(_reshape_alias_128)\0A mm_53 = torch.ops.aten.mm(t_179, view_126); t_179 = view_126 = None\0A t_180 = torch.ops.aten.t(mm_53); mm_53 = None\0A sum_64 = torch.ops.aten.sum(_reshape_alias_128, [0], True); _reshape_alias_128 = None\0A view_218 = torch.ops.aten.view(sum_64, [384]); sum_64 = None\0A detach_158 = torch.ops.aten.detach(view_218); view_218 = None\0A detach_159 = torch.ops.aten.detach(detach_158); detach_158 = None\0A _reshape_alias_129 = torch.ops.aten._reshape_alias(mm_52, [1, 128, 1536], [196608, 1536, 1]); mm_52 = None\0A t_181 = torch.ops.aten.t(t_180); t_180 = None\0A detach_160 = torch.ops.aten.detach(t_181); t_181 = None\0A detach_161 = torch.ops.aten.detach(detach_160); detach_160 = None\0A gelu_backward_4 = torch.ops.aten.gelu_backward(_reshape_alias_129, view_125); _reshape_alias_129 = view_125 = None\0A _reshape_alias_130 = torch.ops.aten._reshape_alias(gelu_backward_4, [128, 1536], [1536, 1]); gelu_backward_4 = None\0A t_182 = torch.ops.aten.t(t_46); t_46 = None\0A mm_54 = torch.ops.aten.mm(_reshape_alias_130, t_182); t_182 = None\0A t_183 = torch.ops.aten.t(_reshape_alias_130)\0A mm_55 = torch.ops.aten.mm(t_183, view_124); t_183 = view_124 = None\0A t_184 = torch.ops.aten.t(mm_55); mm_55 = None\0A sum_65 = torch.ops.aten.sum(_reshape_alias_130, [0], True); _reshape_alias_130 = None\0A view_219 = torch.ops.aten.view(sum_65, [1536]); sum_65 = None\0A detach_162 = torch.ops.aten.detach(view_219); view_219 = None\0A detach_163 = torch.ops.aten.detach(detach_162); detach_162 = None\0A _reshape_alias_131 = torch.ops.aten._reshape_alias(mm_54, [1, 128, 384], [49152, 384, 1]); mm_54 = None\0A add_53 = torch.ops.aten.add(mul_61, _reshape_alias_131); mul_61 = _reshape_alias_131 = None\0A t_185 = torch.ops.aten.t(t_184); t_184 = None\0A detach_164 = torch.ops.aten.detach(t_185); t_185 = None\0A detach_165 = torch.ops.aten.detach(detach_164); detach_164 = None\0A to_81 = torch.ops.aten.to(add_53, 6)\0A to_82 = torch.ops.aten.to(add_23, 6)\0A to_83 = torch.ops.aten.to(getitem_46, 6)\0A to_84 = torch.ops.aten.to(getitem_47, 6)\0A to_85 = torch.ops.aten.to(params_151, 6)\0A to_86 = torch.ops.aten.to(params_150, 6)\0A sub_27 = torch.ops.aten.sub(add_23, getitem_46); add_23 = getitem_46 = None\0A mul_63 = torch.ops.aten.mul(sub_27, getitem_47); sub_27 = None\0A mul_64 = torch.ops.aten.mul(add_53, params_151)\0A mul_65 = torch.ops.aten.mul(mul_64, 384)\0A sum_66 = torch.ops.aten.sum(mul_64, [2], True)\0A mul_66 = torch.ops.aten.mul(mul_64, mul_63); mul_64 = None\0A sum_67 = torch.ops.aten.sum(mul_66, [2], True); mul_66 = None\0A mul_67 = torch.ops.aten.mul(mul_63, sum_67); sum_67 = None\0A sub_28 = torch.ops.aten.sub(mul_65, sum_66); mul_65 = sum_66 = None\0A sub_29 = torch.ops.aten.sub(sub_28, mul_67); sub_28 = mul_67 = None\0A div_25 = torch.ops.aten.div(getitem_47, 384); getitem_47 = None\0A mul_68 = torch.ops.aten.mul(div_25, sub_29); div_25 = sub_29 = None\0A mul_69 = torch.ops.aten.mul(add_53, mul_63); mul_63 = None\0A sum_68 = torch.ops.aten.sum(mul_69, [0, 1]); mul_69 = None\0A sum_69 = torch.ops.aten.sum(add_53, [0, 1]); add_53 = None\0A to_87 = torch.ops.aten.to(mul_68, 6)\0A to_88 = torch.ops.aten.to(sum_68, 6)\0A to_89 = torch.ops.aten.to(sum_69, 6)\0A detach_166 = torch.ops.aten.detach(sum_68); sum_68 = None\0A detach_167 = torch.ops.aten.detach(detach_166); detach_166 = None\0A detach_168 = torch.ops.aten.detach(sum_69); sum_69 = None\0A detach_169 = torch.ops.aten.detach(detach_168); detach_168 = None\0A _reshape_alias_132 = torch.ops.aten._reshape_alias(mul_68, [128, 384], [384, 1])\0A t_186 = torch.ops.aten.t(t_45); t_45 = None\0A mm_56 = torch.ops.aten.mm(_reshape_alias_132, t_186); t_186 = None\0A t_187 = torch.ops.aten.t(_reshape_alias_132)\0A mm_57 = torch.ops.aten.mm(t_187, view_122); t_187 = view_122 = None\0A t_188 = torch.ops.aten.t(mm_57); mm_57 = None\0A sum_70 = torch.ops.aten.sum(_reshape_alias_132, [0], True); _reshape_alias_132 = None\0A view_220 = torch.ops.aten.view(sum_70, [384]); sum_70 = None\0A detach_170 = torch.ops.aten.detach(view_220); view_220 = None\0A detach_171 = torch.ops.aten.detach(detach_170); detach_170 = None\0A _reshape_alias_133 = torch.ops.aten._reshape_alias(mm_56, [1, 128, 384], [49152, 384, 1]); mm_56 = None\0A t_189 = torch.ops.aten.t(t_188); t_188 = None\0A detach_172 = torch.ops.aten.detach(t_189); t_189 = None\0A detach_173 = torch.ops.aten.detach(detach_172); detach_172 = None\0A _reshape_alias_134 = torch.ops.aten._reshape_alias(_reshape_alias_133, [1, 128, 12, 32], [49152, 384, 32, 1]); _reshape_alias_133 = None\0A permute_64 = torch.ops.aten.permute(_reshape_alias_134, [0, 2, 1, 3]); _reshape_alias_134 = None\0A _reshape_alias_135 = torch.ops.aten._reshape_alias(permute_64, [12, 128, 32], [32, 384, 1]); permute_64 = None\0A transpose_32 = torch.ops.aten.transpose(_reshape_alias_30, 1, 2); _reshape_alias_30 = None\0A bmm_40 = torch.ops.aten.bmm(transpose_32, _reshape_alias_135); transpose_32 = None\0A transpose_33 = torch.ops.aten.transpose(_reshape_alias_31, 1, 2); _reshape_alias_31 = None\0A bmm_41 = torch.ops.aten.bmm(_reshape_alias_135, transpose_33); _reshape_alias_135 = transpose_33 = None\0A _reshape_alias_136 = torch.ops.aten._reshape_alias(bmm_40, [1, 12, 128, 32], [49152, 4096, 32, 1]); bmm_40 = None\0A _reshape_alias_137 = torch.ops.aten._reshape_alias(bmm_41, [1, 12, 128, 128], [196608, 16384, 128, 1]); bmm_41 = None\0A detach_174 = torch.ops.aten.detach(detach_7); detach_7 = None\0A _softmax_backward_data_4 = torch.ops.aten._softmax_backward_data(_reshape_alias_137, detach_174, -1, 6); _reshape_alias_137 = detach_174 = None\0A div_26 = torch.ops.aten.div(_softmax_backward_data_4, 5.656854249492381); _softmax_backward_data_4 = None\0A _reshape_alias_138 = torch.ops.aten._reshape_alias(div_26, [12, 128, 128], [16384, 128, 1]); div_26 = None\0A transpose_34 = torch.ops.aten.transpose(_reshape_alias_28, 1, 2); _reshape_alias_28 = None\0A bmm_42 = torch.ops.aten.bmm(transpose_34, _reshape_alias_138); transpose_34 = None\0A transpose_35 = torch.ops.aten.transpose(_reshape_alias_29, 1, 2); _reshape_alias_29 = None\0A bmm_43 = torch.ops.aten.bmm(_reshape_alias_138, transpose_35); _reshape_alias_138 = transpose_35 = None\0A _reshape_alias_139 = torch.ops.aten._reshape_alias(bmm_42, [1, 12, 32, 128], [49152, 4096, 128, 1]); bmm_42 = None\0A _reshape_alias_140 = torch.ops.aten._reshape_alias(bmm_43, [1, 12, 128, 32], [49152, 4096, 32, 1]); bmm_43 = None\0A transpose_36 = torch.ops.aten.transpose(_reshape_alias_139, -1, -2); _reshape_alias_139 = None\0A permute_65 = torch.ops.aten.permute(_reshape_alias_140, [0, 2, 1, 3]); _reshape_alias_140 = None\0A clone_20 = torch.ops.aten.clone(permute_65, memory_format = 0); permute_65 = None\0A _unsafe_view_32 = torch.ops.aten._unsafe_view(clone_20, [1, 128, 384]); clone_20 = None\0A permute_66 = torch.ops.aten.permute(_reshape_alias_136, [0, 2, 1, 3]); _reshape_alias_136 = None\0A clone_21 = torch.ops.aten.clone(permute_66, memory_format = 0); permute_66 = None\0A _unsafe_view_33 = torch.ops.aten._unsafe_view(clone_21, [1, 128, 384]); clone_21 = None\0A _reshape_alias_141 = torch.ops.aten._reshape_alias(_unsafe_view_33, [128, 384], [384, 1]); _unsafe_view_33 = None\0A t_190 = torch.ops.aten.t(t_44); t_44 = None\0A mm_58 = torch.ops.aten.mm(_reshape_alias_141, t_190); t_190 = None\0A t_191 = torch.ops.aten.t(_reshape_alias_141)\0A mm_59 = torch.ops.aten.mm(t_191, view_117); t_191 = view_117 = None\0A t_192 = torch.ops.aten.t(mm_59); mm_59 = None\0A sum_71 = torch.ops.aten.sum(_reshape_alias_141, [0], True); _reshape_alias_141 = None\0A view_221 = torch.ops.aten.view(sum_71, [384]); sum_71 = None\0A detach_175 = torch.ops.aten.detach(view_221); view_221 = None\0A detach_176 = torch.ops.aten.detach(detach_175); detach_175 = None\0A _reshape_alias_142 = torch.ops.aten._reshape_alias(mm_58, [1, 128, 384], [49152, 384, 1]); mm_58 = None\0A add_54 = torch.ops.aten.add(mul_68, _reshape_alias_142); mul_68 = _reshape_alias_142 = None\0A t_193 = torch.ops.aten.t(t_192); t_192 = None\0A detach_177 = torch.ops.aten.detach(t_193); t_193 = None\0A detach_178 = torch.ops.aten.detach(detach_177); detach_177 = None\0A permute_67 = torch.ops.aten.permute(transpose_36, [0, 2, 1, 3]); transpose_36 = None\0A _reshape_alias_143 = torch.ops.aten._reshape_alias(permute_67, [1, 128, 384], [128, 1, 128]); permute_67 = None\0A _reshape_alias_144 = torch.ops.aten._reshape_alias(_reshape_alias_143, [128, 384], [1, 128]); _reshape_alias_143 = None\0A t_194 = torch.ops.aten.t(t_43); t_43 = None\0A mm_60 = torch.ops.aten.mm(_reshape_alias_144, t_194); t_194 = None\0A t_195 = torch.ops.aten.t(_reshape_alias_144)\0A mm_61 = torch.ops.aten.mm(t_195, view_114); t_195 = view_114 = None\0A t_196 = torch.ops.aten.t(mm_61); mm_61 = None\0A sum_72 = torch.ops.aten.sum(_reshape_alias_144, [0], True); _reshape_alias_144 = None\0A view_222 = torch.ops.aten.view(sum_72, [384]); sum_72 = None\0A detach_179 = torch.ops.aten.detach(view_222); view_222 = None\0A detach_180 = torch.ops.aten.detach(detach_179); detach_179 = None\0A _reshape_alias_145 = torch.ops.aten._reshape_alias(mm_60, [1, 128, 384], [49152, 384, 1]); mm_60 = None\0A add_55 = torch.ops.aten.add(add_54, _reshape_alias_145); add_54 = _reshape_alias_145 = None\0A t_197 = torch.ops.aten.t(t_196); t_196 = None\0A detach_181 = torch.ops.aten.detach(t_197); t_197 = None\0A detach_182 = torch.ops.aten.detach(detach_181); detach_181 = None\0A _reshape_alias_146 = torch.ops.aten._reshape_alias(_unsafe_view_32, [128, 384], [384, 1]); _unsafe_view_32 = None\0A t_198 = torch.ops.aten.t(t_42); t_42 = None\0A mm_62 = torch.ops.aten.mm(_reshape_alias_146, t_198); t_198 = None\0A t_199 = torch.ops.aten.t(_reshape_alias_146)\0A mm_63 = torch.ops.aten.mm(t_199, view_112); t_199 = view_112 = None\0A t_200 = torch.ops.aten.t(mm_63); mm_63 = None\0A sum_73 = torch.ops.aten.sum(_reshape_alias_146, [0], True); _reshape_alias_146 = None\0A view_223 = torch.ops.aten.view(sum_73, [384]); sum_73 = None\0A detach_183 = torch.ops.aten.detach(view_223); view_223 = None\0A detach_184 = torch.ops.aten.detach(detach_183); detach_183 = None\0A _reshape_alias_147 = torch.ops.aten._reshape_alias(mm_62, [1, 128, 384], [49152, 384, 1]); mm_62 = None\0A add_56 = torch.ops.aten.add(add_55, _reshape_alias_147); add_55 = _reshape_alias_147 = None\0A t_201 = torch.ops.aten.t(t_200); t_200 = None\0A detach_185 = torch.ops.aten.detach(t_201); t_201 = None\0A detach_186 = torch.ops.aten.detach(detach_185); detach_185 = None\0A to_90 = torch.ops.aten.to(add_56, 6)\0A to_91 = torch.ops.aten.to(add_21, 6)\0A to_92 = torch.ops.aten.to(getitem_43, 6)\0A to_93 = torch.ops.aten.to(getitem_44, 6)\0A to_94 = torch.ops.aten.to(params_147, 6)\0A to_95 = torch.ops.aten.to(params_146, 6)\0A sub_30 = torch.ops.aten.sub(add_21, getitem_43); add_21 = getitem_43 = None\0A mul_70 = torch.ops.aten.mul(sub_30, getitem_44); sub_30 = None\0A mul_71 = torch.ops.aten.mul(add_56, params_147)\0A mul_72 = torch.ops.aten.mul(mul_71, 384)\0A sum_74 = torch.ops.aten.sum(mul_71, [2], True)\0A mul_73 = torch.ops.aten.mul(mul_71, mul_70); mul_71 = None\0A sum_75 = torch.ops.aten.sum(mul_73, [2], True); mul_73 = None\0A mul_74 = torch.ops.aten.mul(mul_70, sum_75); sum_75 = None\0A sub_31 = torch.ops.aten.sub(mul_72, sum_74); mul_72 = sum_74 = None\0A sub_32 = torch.ops.aten.sub(sub_31, mul_74); sub_31 = mul_74 = None\0A div_27 = torch.ops.aten.div(getitem_44, 384); getitem_44 = None\0A mul_75 = torch.ops.aten.mul(div_27, sub_32); div_27 = sub_32 = None\0A mul_76 = torch.ops.aten.mul(add_56, mul_70); mul_70 = None\0A sum_76 = torch.ops.aten.sum(mul_76, [0, 1]); mul_76 = None\0A sum_77 = torch.ops.aten.sum(add_56, [0, 1]); add_56 = None\0A to_96 = torch.ops.aten.to(mul_75, 6)\0A to_97 = torch.ops.aten.to(sum_76, 6)\0A to_98 = torch.ops.aten.to(sum_77, 6)\0A detach_187 = torch.ops.aten.detach(sum_76); sum_76 = None\0A detach_188 = torch.ops.aten.detach(detach_187); detach_187 = None\0A detach_189 = torch.ops.aten.detach(sum_77); sum_77 = None\0A detach_190 = torch.ops.aten.detach(detach_189); detach_189 = None\0A _reshape_alias_148 = torch.ops.aten._reshape_alias(mul_75, [128, 384], [384, 1])\0A t_202 = torch.ops.aten.t(t_41); t_41 = None\0A mm_64 = torch.ops.aten.mm(_reshape_alias_148, t_202); t_202 = None\0A t_203 = torch.ops.aten.t(_reshape_alias_148)\0A mm_65 = torch.ops.aten.mm(t_203, view_110); t_203 = view_110 = None\0A t_204 = torch.ops.aten.t(mm_65); mm_65 = None\0A sum_78 = torch.ops.aten.sum(_reshape_alias_148, [0], True); _reshape_alias_148 = None\0A view_224 = torch.ops.aten.view(sum_78, [384]); sum_78 = None\0A detach_191 = torch.ops.aten.detach(view_224); view_224 = None\0A detach_192 = torch.ops.aten.detach(detach_191); detach_191 = None\0A _reshape_alias_149 = torch.ops.aten._reshape_alias(mm_64, [1, 128, 1536], [196608, 1536, 1]); mm_64 = None\0A t_205 = torch.ops.aten.t(t_204); t_204 = None\0A detach_193 = torch.ops.aten.detach(t_205); t_205 = None\0A detach_194 = torch.ops.aten.detach(detach_193); detach_193 = None\0A gelu_backward_5 = torch.ops.aten.gelu_backward(_reshape_alias_149, view_109); _reshape_alias_149 = view_109 = None\0A _reshape_alias_150 = torch.ops.aten._reshape_alias(gelu_backward_5, [128, 1536], [1536, 1]); gelu_backward_5 = None\0A t_206 = torch.ops.aten.t(t_40); t_40 = None\0A mm_66 = torch.ops.aten.mm(_reshape_alias_150, t_206); t_206 = None\0A t_207 = torch.ops.aten.t(_reshape_alias_150)\0A mm_67 = torch.ops.aten.mm(t_207, view_108); t_207 = view_108 = None\0A t_208 = torch.ops.aten.t(mm_67); mm_67 = None\0A sum_79 = torch.ops.aten.sum(_reshape_alias_150, [0], True); _reshape_alias_150 = None\0A view_225 = torch.ops.aten.view(sum_79, [1536]); sum_79 = None\0A detach_195 = torch.ops.aten.detach(view_225); view_225 = None\0A detach_196 = torch.ops.aten.detach(detach_195); detach_195 = None\0A _reshape_alias_151 = torch.ops.aten._reshape_alias(mm_66, [1, 128, 384], [49152, 384, 1]); mm_66 = None\0A add_57 = torch.ops.aten.add(mul_75, _reshape_alias_151); mul_75 = _reshape_alias_151 = None\0A t_209 = torch.ops.aten.t(t_208); t_208 = None\0A detach_197 = torch.ops.aten.detach(t_209); t_209 = None\0A detach_198 = torch.ops.aten.detach(detach_197); detach_197 = None\0A to_99 = torch.ops.aten.to(add_57, 6)\0A to_100 = torch.ops.aten.to(add_20, 6)\0A to_101 = torch.ops.aten.to(getitem_40, 6)\0A to_102 = torch.ops.aten.to(getitem_41, 6)\0A to_103 = torch.ops.aten.to(params_135, 6)\0A to_104 = torch.ops.aten.to(params_134, 6)\0A sub_33 = torch.ops.aten.sub(add_20, getitem_40); add_20 = getitem_40 = None\0A mul_77 = torch.ops.aten.mul(sub_33, getitem_41); sub_33 = None\0A mul_78 = torch.ops.aten.mul(add_57, params_135)\0A mul_79 = torch.ops.aten.mul(mul_78, 384)\0A sum_80 = torch.ops.aten.sum(mul_78, [2], True)\0A mul_80 = torch.ops.aten.mul(mul_78, mul_77); mul_78 = None\0A sum_81 = torch.ops.aten.sum(mul_80, [2], True); mul_80 = None\0A mul_81 = torch.ops.aten.mul(mul_77, sum_81); sum_81 = None\0A sub_34 = torch.ops.aten.sub(mul_79, sum_80); mul_79 = sum_80 = None\0A sub_35 = torch.ops.aten.sub(sub_34, mul_81); sub_34 = mul_81 = None\0A div_28 = torch.ops.aten.div(getitem_41, 384); getitem_41 = None\0A mul_82 = torch.ops.aten.mul(div_28, sub_35); div_28 = sub_35 = None\0A mul_83 = torch.ops.aten.mul(add_57, mul_77); mul_77 = None\0A sum_82 = torch.ops.aten.sum(mul_83, [0, 1]); mul_83 = None\0A sum_83 = torch.ops.aten.sum(add_57, [0, 1]); add_57 = None\0A to_105 = torch.ops.aten.to(mul_82, 6)\0A to_106 = torch.ops.aten.to(sum_82, 6)\0A to_107 = torch.ops.aten.to(sum_83, 6)\0A detach_199 = torch.ops.aten.detach(sum_82); sum_82 = None\0A detach_200 = torch.ops.aten.detach(detach_199); detach_199 = None\0A detach_201 = torch.ops.aten.detach(sum_83); sum_83 = None\0A detach_202 = torch.ops.aten.detach(detach_201); detach_201 = None\0A _reshape_alias_152 = torch.ops.aten._reshape_alias(mul_82, [128, 384], [384, 1])\0A t_210 = torch.ops.aten.t(t_39); t_39 = None\0A mm_68 = torch.ops.aten.mm(_reshape_alias_152, t_210); t_210 = None\0A t_211 = torch.ops.aten.t(_reshape_alias_152)\0A mm_69 = torch.ops.aten.mm(t_211, view_106); t_211 = view_106 = None\0A t_212 = torch.ops.aten.t(mm_69); mm_69 = None\0A sum_84 = torch.ops.aten.sum(_reshape_alias_152, [0], True); _reshape_alias_152 = None\0A view_226 = torch.ops.aten.view(sum_84, [384]); sum_84 = None\0A detach_203 = torch.ops.aten.detach(view_226); view_226 = None\0A detach_204 = torch.ops.aten.detach(detach_203); detach_203 = None\0A _reshape_alias_153 = torch.ops.aten._reshape_alias(mm_68, [1, 128, 384], [49152, 384, 1]); mm_68 = None\0A t_213 = torch.ops.aten.t(t_212); t_212 = None\0A detach_205 = torch.ops.aten.detach(t_213); t_213 = None\0A detach_206 = torch.ops.aten.detach(detach_205); detach_205 = None\0A _reshape_alias_154 = torch.ops.aten._reshape_alias(_reshape_alias_153, [1, 128, 12, 32], [49152, 384, 32, 1]); _reshape_alias_153 = None\0A permute_68 = torch.ops.aten.permute(_reshape_alias_154, [0, 2, 1, 3]); _reshape_alias_154 = None\0A _reshape_alias_155 = torch.ops.aten._reshape_alias(permute_68, [12, 128, 32], [32, 384, 1]); permute_68 = None\0A transpose_37 = torch.ops.aten.transpose(_reshape_alias_26, 1, 2); _reshape_alias_26 = None\0A bmm_44 = torch.ops.aten.bmm(transpose_37, _reshape_alias_155); transpose_37 = None\0A transpose_38 = torch.ops.aten.transpose(_reshape_alias_27, 1, 2); _reshape_alias_27 = None\0A bmm_45 = torch.ops.aten.bmm(_reshape_alias_155, transpose_38); _reshape_alias_155 = transpose_38 = None\0A _reshape_alias_156 = torch.ops.aten._reshape_alias(bmm_44, [1, 12, 128, 32], [49152, 4096, 32, 1]); bmm_44 = None\0A _reshape_alias_157 = torch.ops.aten._reshape_alias(bmm_45, [1, 12, 128, 128], [196608, 16384, 128, 1]); bmm_45 = None\0A detach_207 = torch.ops.aten.detach(detach_6); detach_6 = None\0A _softmax_backward_data_5 = torch.ops.aten._softmax_backward_data(_reshape_alias_157, detach_207, -1, 6); _reshape_alias_157 = detach_207 = None\0A div_29 = torch.ops.aten.div(_softmax_backward_data_5, 5.656854249492381); _softmax_backward_data_5 = None\0A _reshape_alias_158 = torch.ops.aten._reshape_alias(div_29, [12, 128, 128], [16384, 128, 1]); div_29 = None\0A transpose_39 = torch.ops.aten.transpose(_reshape_alias_24, 1, 2); _reshape_alias_24 = None\0A bmm_46 = torch.ops.aten.bmm(transpose_39, _reshape_alias_158); transpose_39 = None\0A transpose_40 = torch.ops.aten.transpose(_reshape_alias_25, 1, 2); _reshape_alias_25 = None\0A bmm_47 = torch.ops.aten.bmm(_reshape_alias_158, transpose_40); _reshape_alias_158 = transpose_40 = None\0A _reshape_alias_159 = torch.ops.aten._reshape_alias(bmm_46, [1, 12, 32, 128], [49152, 4096, 128, 1]); bmm_46 = None\0A _reshape_alias_160 = torch.ops.aten._reshape_alias(bmm_47, [1, 12, 128, 32], [49152, 4096, 32, 1]); bmm_47 = None\0A transpose_41 = torch.ops.aten.transpose(_reshape_alias_159, -1, -2); _reshape_alias_159 = None\0A permute_69 = torch.ops.aten.permute(_reshape_alias_160, [0, 2, 1, 3]); _reshape_alias_160 = None\0A clone_22 = torch.ops.aten.clone(permute_69, memory_format = 0); permute_69 = None\0A _unsafe_view_34 = torch.ops.aten._unsafe_view(clone_22, [1, 128, 384]); clone_22 = None\0A permute_70 = torch.ops.aten.permute(_reshape_alias_156, [0, 2, 1, 3]); _reshape_alias_156 = None\0A clone_23 = torch.ops.aten.clone(permute_70, memory_format = 0); permute_70 = None\0A _unsafe_view_35 = torch.ops.aten._unsafe_view(clone_23, [1, 128, 384]); clone_23 = None\0A _reshape_alias_161 = torch.ops.aten._reshape_alias(_unsafe_view_35, [128, 384], [384, 1]); _unsafe_view_35 = None\0A t_214 = torch.ops.aten.t(t_38); t_38 = None\0A mm_70 = torch.ops.aten.mm(_reshape_alias_161, t_214); t_214 = None\0A t_215 = torch.ops.aten.t(_reshape_alias_161)\0A mm_71 = torch.ops.aten.mm(t_215, view_101); t_215 = view_101 = None\0A t_216 = torch.ops.aten.t(mm_71); mm_71 = None\0A sum_85 = torch.ops.aten.sum(_reshape_alias_161, [0], True); _reshape_alias_161 = None\0A view_227 = torch.ops.aten.view(sum_85, [384]); sum_85 = None\0A detach_208 = torch.ops.aten.detach(view_227); view_227 = None\0A detach_209 = torch.ops.aten.detach(detach_208); detach_208 = None\0A _reshape_alias_162 = torch.ops.aten._reshape_alias(mm_70, [1, 128, 384], [49152, 384, 1]); mm_70 = None\0A add_58 = torch.ops.aten.add(mul_82, _reshape_alias_162); mul_82 = _reshape_alias_162 = None\0A t_217 = torch.ops.aten.t(t_216); t_216 = None\0A detach_210 = torch.ops.aten.detach(t_217); t_217 = None\0A detach_211 = torch.ops.aten.detach(detach_210); detach_210 = None\0A permute_71 = torch.ops.aten.permute(transpose_41, [0, 2, 1, 3]); transpose_41 = None\0A _reshape_alias_163 = torch.ops.aten._reshape_alias(permute_71, [1, 128, 384], [128, 1, 128]); permute_71 = None\0A _reshape_alias_164 = torch.ops.aten._reshape_alias(_reshape_alias_163, [128, 384], [1, 128]); _reshape_alias_163 = None\0A t_218 = torch.ops.aten.t(t_37); t_37 = None\0A mm_72 = torch.ops.aten.mm(_reshape_alias_164, t_218); t_218 = None\0A t_219 = torch.ops.aten.t(_reshape_alias_164)\0A mm_73 = torch.ops.aten.mm(t_219, view_98); t_219 = view_98 = None\0A t_220 = torch.ops.aten.t(mm_73); mm_73 = None\0A sum_86 = torch.ops.aten.sum(_reshape_alias_164, [0], True); _reshape_alias_164 = None\0A view_228 = torch.ops.aten.view(sum_86, [384]); sum_86 = None\0A detach_212 = torch.ops.aten.detach(view_228); view_228 = None\0A detach_213 = torch.ops.aten.detach(detach_212); detach_212 = None\0A _reshape_alias_165 = torch.ops.aten._reshape_alias(mm_72, [1, 128, 384], [49152, 384, 1]); mm_72 = None\0A add_59 = torch.ops.aten.add(add_58, _reshape_alias_165); add_58 = _reshape_alias_165 = None\0A t_221 = torch.ops.aten.t(t_220); t_220 = None\0A detach_214 = torch.ops.aten.detach(t_221); t_221 = None\0A detach_215 = torch.ops.aten.detach(detach_214); detach_214 = None\0A _reshape_alias_166 = torch.ops.aten._reshape_alias(_unsafe_view_34, [128, 384], [384, 1]); _unsafe_view_34 = None\0A t_222 = torch.ops.aten.t(t_36); t_36 = None\0A mm_74 = torch.ops.aten.mm(_reshape_alias_166, t_222); t_222 = None\0A t_223 = torch.ops.aten.t(_reshape_alias_166)\0A mm_75 = torch.ops.aten.mm(t_223, view_96); t_223 = view_96 = None\0A t_224 = torch.ops.aten.t(mm_75); mm_75 = None\0A sum_87 = torch.ops.aten.sum(_reshape_alias_166, [0], True); _reshape_alias_166 = None\0A view_229 = torch.ops.aten.view(sum_87, [384]); sum_87 = None\0A detach_216 = torch.ops.aten.detach(view_229); view_229 = None\0A detach_217 = torch.ops.aten.detach(detach_216); detach_216 = None\0A _reshape_alias_167 = torch.ops.aten._reshape_alias(mm_74, [1, 128, 384], [49152, 384, 1]); mm_74 = None\0A add_60 = torch.ops.aten.add(add_59, _reshape_alias_167); add_59 = _reshape_alias_167 = None\0A t_225 = torch.ops.aten.t(t_224); t_224 = None\0A detach_218 = torch.ops.aten.detach(t_225); t_225 = None\0A detach_219 = torch.ops.aten.detach(detach_218); detach_218 = None\0A to_108 = torch.ops.aten.to(add_60, 6)\0A to_109 = torch.ops.aten.to(add_18, 6)\0A to_110 = torch.ops.aten.to(getitem_37, 6)\0A to_111 = torch.ops.aten.to(getitem_38, 6)\0A to_112 = torch.ops.aten.to(params_131, 6)\0A to_113 = torch.ops.aten.to(params_130, 6)\0A sub_36 = torch.ops.aten.sub(add_18, getitem_37); add_18 = getitem_37 = None\0A mul_84 = torch.ops.aten.mul(sub_36, getitem_38); sub_36 = None\0A mul_85 = torch.ops.aten.mul(add_60, params_131)\0A mul_86 = torch.ops.aten.mul(mul_85, 384)\0A sum_88 = torch.ops.aten.sum(mul_85, [2], True)\0A mul_87 = torch.ops.aten.mul(mul_85, mul_84); mul_85 = None\0A sum_89 = torch.ops.aten.sum(mul_87, [2], True); mul_87 = None\0A mul_88 = torch.ops.aten.mul(mul_84, sum_89); sum_89 = None\0A sub_37 = torch.ops.aten.sub(mul_86, sum_88); mul_86 = sum_88 = None\0A sub_38 = torch.ops.aten.sub(sub_37, mul_88); sub_37 = mul_88 = None\0A div_30 = torch.ops.aten.div(getitem_38, 384); getitem_38 = None\0A mul_89 = torch.ops.aten.mul(div_30, sub_38); div_30 = sub_38 = None\0A mul_90 = torch.ops.aten.mul(add_60, mul_84); mul_84 = None\0A sum_90 = torch.ops.aten.sum(mul_90, [0, 1]); mul_90 = None\0A sum_91 = torch.ops.aten.sum(add_60, [0, 1]); add_60 = None\0A to_114 = torch.ops.aten.to(mul_89, 6)\0A to_115 = torch.ops.aten.to(sum_90, 6)\0A to_116 = torch.ops.aten.to(sum_91, 6)\0A detach_220 = torch.ops.aten.detach(sum_90); sum_90 = None\0A detach_221 = torch.ops.aten.detach(detach_220); detach_220 = None\0A detach_222 = torch.ops.aten.detach(sum_91); sum_91 = None\0A detach_223 = torch.ops.aten.detach(detach_222); detach_222 = None\0A _reshape_alias_168 = torch.ops.aten._reshape_alias(mul_89, [128, 384], [384, 1])\0A t_226 = torch.ops.aten.t(t_35); t_35 = None\0A mm_76 = torch.ops.aten.mm(_reshape_alias_168, t_226); t_226 = None\0A t_227 = torch.ops.aten.t(_reshape_alias_168)\0A mm_77 = torch.ops.aten.mm(t_227, view_94); t_227 = view_94 = None\0A t_228 = torch.ops.aten.t(mm_77); mm_77 = None\0A sum_92 = torch.ops.aten.sum(_reshape_alias_168, [0], True); _reshape_alias_168 = None\0A view_230 = torch.ops.aten.view(sum_92, [384]); sum_92 = None\0A detach_224 = torch.ops.aten.detach(view_230); view_230 = None\0A detach_225 = torch.ops.aten.detach(detach_224); detach_224 = None\0A _reshape_alias_169 = torch.ops.aten._reshape_alias(mm_76, [1, 128, 1536], [196608, 1536, 1]); mm_76 = None\0A t_229 = torch.ops.aten.t(t_228); t_228 = None\0A detach_226 = torch.ops.aten.detach(t_229); t_229 = None\0A detach_227 = torch.ops.aten.detach(detach_226); detach_226 = None\0A gelu_backward_6 = torch.ops.aten.gelu_backward(_reshape_alias_169, view_93); _reshape_alias_169 = view_93 = None\0A _reshape_alias_170 = torch.ops.aten._reshape_alias(gelu_backward_6, [128, 1536], [1536, 1]); gelu_backward_6 = None\0A t_230 = torch.ops.aten.t(t_34); t_34 = None\0A mm_78 = torch.ops.aten.mm(_reshape_alias_170, t_230); t_230 = None\0A t_231 = torch.ops.aten.t(_reshape_alias_170)\0A mm_79 = torch.ops.aten.mm(t_231, view_92); t_231 = view_92 = None\0A t_232 = torch.ops.aten.t(mm_79); mm_79 = None\0A sum_93 = torch.ops.aten.sum(_reshape_alias_170, [0], True); _reshape_alias_170 = None\0A view_231 = torch.ops.aten.view(sum_93, [1536]); sum_93 = None\0A detach_228 = torch.ops.aten.detach(view_231); view_231 = None\0A detach_229 = torch.ops.aten.detach(detach_228); detach_228 = None\0A _reshape_alias_171 = torch.ops.aten._reshape_alias(mm_78, [1, 128, 384], [49152, 384, 1]); mm_78 = None\0A add_61 = torch.ops.aten.add(mul_89, _reshape_alias_171); mul_89 = _reshape_alias_171 = None\0A t_233 = torch.ops.aten.t(t_232); t_232 = None\0A detach_230 = torch.ops.aten.detach(t_233); t_233 = None\0A detach_231 = torch.ops.aten.detach(detach_230); detach_230 = None\0A to_117 = torch.ops.aten.to(add_61, 6)\0A to_118 = torch.ops.aten.to(add_17, 6)\0A to_119 = torch.ops.aten.to(getitem_34, 6)\0A to_120 = torch.ops.aten.to(getitem_35, 6)\0A to_121 = torch.ops.aten.to(params_119, 6)\0A to_122 = torch.ops.aten.to(params_118, 6)\0A sub_39 = torch.ops.aten.sub(add_17, getitem_34); add_17 = getitem_34 = None\0A mul_91 = torch.ops.aten.mul(sub_39, getitem_35); sub_39 = None\0A mul_92 = torch.ops.aten.mul(add_61, params_119)\0A mul_93 = torch.ops.aten.mul(mul_92, 384)\0A sum_94 = torch.ops.aten.sum(mul_92, [2], True)\0A mul_94 = torch.ops.aten.mul(mul_92, mul_91); mul_92 = None\0A sum_95 = torch.ops.aten.sum(mul_94, [2], True); mul_94 = None\0A mul_95 = torch.ops.aten.mul(mul_91, sum_95); sum_95 = None\0A sub_40 = torch.ops.aten.sub(mul_93, sum_94); mul_93 = sum_94 = None\0A sub_41 = torch.ops.aten.sub(sub_40, mul_95); sub_40 = mul_95 = None\0A div_31 = torch.ops.aten.div(getitem_35, 384); getitem_35 = None\0A mul_96 = torch.ops.aten.mul(div_31, sub_41); div_31 = sub_41 = None\0A mul_97 = torch.ops.aten.mul(add_61, mul_91); mul_91 = None\0A sum_96 = torch.ops.aten.sum(mul_97, [0, 1]); mul_97 = None\0A sum_97 = torch.ops.aten.sum(add_61, [0, 1]); add_61 = None\0A to_123 = torch.ops.aten.to(mul_96, 6)\0A to_124 = torch.ops.aten.to(sum_96, 6)\0A to_125 = torch.ops.aten.to(sum_97, 6)\0A detach_232 = torch.ops.aten.detach(sum_96); sum_96 = None\0A detach_233 = torch.ops.aten.detach(detach_232); detach_232 = None\0A detach_234 = torch.ops.aten.detach(sum_97); sum_97 = None\0A detach_235 = torch.ops.aten.detach(detach_234); detach_234 = None\0A _reshape_alias_172 = torch.ops.aten._reshape_alias(mul_96, [128, 384], [384, 1])\0A t_234 = torch.ops.aten.t(t_33); t_33 = None\0A mm_80 = torch.ops.aten.mm(_reshape_alias_172, t_234); t_234 = None\0A t_235 = torch.ops.aten.t(_reshape_alias_172)\0A mm_81 = torch.ops.aten.mm(t_235, view_90); t_235 = view_90 = None\0A t_236 = torch.ops.aten.t(mm_81); mm_81 = None\0A sum_98 = torch.ops.aten.sum(_reshape_alias_172, [0], True); _reshape_alias_172 = None\0A view_232 = torch.ops.aten.view(sum_98, [384]); sum_98 = None\0A detach_236 = torch.ops.aten.detach(view_232); view_232 = None\0A detach_237 = torch.ops.aten.detach(detach_236); detach_236 = None\0A _reshape_alias_173 = torch.ops.aten._reshape_alias(mm_80, [1, 128, 384], [49152, 384, 1]); mm_80 = None\0A t_237 = torch.ops.aten.t(t_236); t_236 = None\0A detach_238 = torch.ops.aten.detach(t_237); t_237 = None\0A detach_239 = torch.ops.aten.detach(detach_238); detach_238 = None\0A _reshape_alias_174 = torch.ops.aten._reshape_alias(_reshape_alias_173, [1, 128, 12, 32], [49152, 384, 32, 1]); _reshape_alias_173 = None\0A permute_72 = torch.ops.aten.permute(_reshape_alias_174, [0, 2, 1, 3]); _reshape_alias_174 = None\0A _reshape_alias_175 = torch.ops.aten._reshape_alias(permute_72, [12, 128, 32], [32, 384, 1]); permute_72 = None\0A transpose_42 = torch.ops.aten.transpose(_reshape_alias_22, 1, 2); _reshape_alias_22 = None\0A bmm_48 = torch.ops.aten.bmm(transpose_42, _reshape_alias_175); transpose_42 = None\0A transpose_43 = torch.ops.aten.transpose(_reshape_alias_23, 1, 2); _reshape_alias_23 = None\0A bmm_49 = torch.ops.aten.bmm(_reshape_alias_175, transpose_43); _reshape_alias_175 = transpose_43 = None\0A _reshape_alias_176 = torch.ops.aten._reshape_alias(bmm_48, [1, 12, 128, 32], [49152, 4096, 32, 1]); bmm_48 = None\0A _reshape_alias_177 = torch.ops.aten._reshape_alias(bmm_49, [1, 12, 128, 128], [196608, 16384, 128, 1]); bmm_49 = None\0A detach_240 = torch.ops.aten.detach(detach_5); detach_5 = None\0A _softmax_backward_data_6 = torch.ops.aten._softmax_backward_data(_reshape_alias_177, detach_240, -1, 6); _reshape_alias_177 = detach_240 = None\0A div_32 = torch.ops.aten.div(_softmax_backward_data_6, 5.656854249492381); _softmax_backward_data_6 = None\0A _reshape_alias_178 = torch.ops.aten._reshape_alias(div_32, [12, 128, 128], [16384, 128, 1]); div_32 = None\0A transpose_44 = torch.ops.aten.transpose(_reshape_alias_20, 1, 2); _reshape_alias_20 = None\0A bmm_50 = torch.ops.aten.bmm(transpose_44, _reshape_alias_178); transpose_44 = None\0A transpose_45 = torch.ops.aten.transpose(_reshape_alias_21, 1, 2); _reshape_alias_21 = None\0A bmm_51 = torch.ops.aten.bmm(_reshape_alias_178, transpose_45); _reshape_alias_178 = transpose_45 = None\0A _reshape_alias_179 = torch.ops.aten._reshape_alias(bmm_50, [1, 12, 32, 128], [49152, 4096, 128, 1]); bmm_50 = None\0A _reshape_alias_180 = torch.ops.aten._reshape_alias(bmm_51, [1, 12, 128, 32], [49152, 4096, 32, 1]); bmm_51 = None\0A transpose_46 = torch.ops.aten.transpose(_reshape_alias_179, -1, -2); _reshape_alias_179 = None\0A permute_73 = torch.ops.aten.permute(_reshape_alias_180, [0, 2, 1, 3]); _reshape_alias_180 = None\0A clone_24 = torch.ops.aten.clone(permute_73, memory_format = 0); permute_73 = None\0A _unsafe_view_36 = torch.ops.aten._unsafe_view(clone_24, [1, 128, 384]); clone_24 = None\0A permute_74 = torch.ops.aten.permute(_reshape_alias_176, [0, 2, 1, 3]); _reshape_alias_176 = None\0A clone_25 = torch.ops.aten.clone(permute_74, memory_format = 0); permute_74 = None\0A _unsafe_view_37 = torch.ops.aten._unsafe_view(clone_25, [1, 128, 384]); clone_25 = None\0A _reshape_alias_181 = torch.ops.aten._reshape_alias(_unsafe_view_37, [128, 384], [384, 1]); _unsafe_view_37 = None\0A t_238 = torch.ops.aten.t(t_32); t_32 = None\0A mm_82 = torch.ops.aten.mm(_reshape_alias_181, t_238); t_238 = None\0A t_239 = torch.ops.aten.t(_reshape_alias_181)\0A mm_83 = torch.ops.aten.mm(t_239, view_85); t_239 = view_85 = None\0A t_240 = torch.ops.aten.t(mm_83); mm_83 = None\0A sum_99 = torch.ops.aten.sum(_reshape_alias_181, [0], True); _reshape_alias_181 = None\0A view_233 = torch.ops.aten.view(sum_99, [384]); sum_99 = None\0A detach_241 = torch.ops.aten.detach(view_233); view_233 = None\0A detach_242 = torch.ops.aten.detach(detach_241); detach_241 = None\0A _reshape_alias_182 = torch.ops.aten._reshape_alias(mm_82, [1, 128, 384], [49152, 384, 1]); mm_82 = None\0A add_62 = torch.ops.aten.add(mul_96, _reshape_alias_182); mul_96 = _reshape_alias_182 = None\0A t_241 = torch.ops.aten.t(t_240); t_240 = None\0A detach_243 = torch.ops.aten.detach(t_241); t_241 = None\0A detach_244 = torch.ops.aten.detach(detach_243); detach_243 = None\0A permute_75 = torch.ops.aten.permute(transpose_46, [0, 2, 1, 3]); transpose_46 = None\0A _reshape_alias_183 = torch.ops.aten._reshape_alias(permute_75, [1, 128, 384], [128, 1, 128]); permute_75 = None\0A _reshape_alias_184 = torch.ops.aten._reshape_alias(_reshape_alias_183, [128, 384], [1, 128]); _reshape_alias_183 = None\0A t_242 = torch.ops.aten.t(t_31); t_31 = None\0A mm_84 = torch.ops.aten.mm(_reshape_alias_184, t_242); t_242 = None\0A t_243 = torch.ops.aten.t(_reshape_alias_184)\0A mm_85 = torch.ops.aten.mm(t_243, view_82); t_243 = view_82 = None\0A t_244 = torch.ops.aten.t(mm_85); mm_85 = None\0A sum_100 = torch.ops.aten.sum(_reshape_alias_184, [0], True); _reshape_alias_184 = None\0A view_234 = torch.ops.aten.view(sum_100, [384]); sum_100 = None\0A detach_245 = torch.ops.aten.detach(view_234); view_234 = None\0A detach_246 = torch.ops.aten.detach(detach_245); detach_245 = None\0A _reshape_alias_185 = torch.ops.aten._reshape_alias(mm_84, [1, 128, 384], [49152, 384, 1]); mm_84 = None\0A add_63 = torch.ops.aten.add(add_62, _reshape_alias_185); add_62 = _reshape_alias_185 = None\0A t_245 = torch.ops.aten.t(t_244); t_244 = None\0A detach_247 = torch.ops.aten.detach(t_245); t_245 = None\0A detach_248 = torch.ops.aten.detach(detach_247); detach_247 = None\0A _reshape_alias_186 = torch.ops.aten._reshape_alias(_unsafe_view_36, [128, 384], [384, 1]); _unsafe_view_36 = None\0A t_246 = torch.ops.aten.t(t_30); t_30 = None\0A mm_86 = torch.ops.aten.mm(_reshape_alias_186, t_246); t_246 = None\0A t_247 = torch.ops.aten.t(_reshape_alias_186)\0A mm_87 = torch.ops.aten.mm(t_247, view_80); t_247 = view_80 = None\0A t_248 = torch.ops.aten.t(mm_87); mm_87 = None\0A sum_101 = torch.ops.aten.sum(_reshape_alias_186, [0], True); _reshape_alias_186 = None\0A view_235 = torch.ops.aten.view(sum_101, [384]); sum_101 = None\0A detach_249 = torch.ops.aten.detach(view_235); view_235 = None\0A detach_250 = torch.ops.aten.detach(detach_249); detach_249 = None\0A _reshape_alias_187 = torch.ops.aten._reshape_alias(mm_86, [1, 128, 384], [49152, 384, 1]); mm_86 = None\0A add_64 = torch.ops.aten.add(add_63, _reshape_alias_187); add_63 = _reshape_alias_187 = None\0A t_249 = torch.ops.aten.t(t_248); t_248 = None\0A detach_251 = torch.ops.aten.detach(t_249); t_249 = None\0A detach_252 = torch.ops.aten.detach(detach_251); detach_251 = None\0A to_126 = torch.ops.aten.to(add_64, 6)\0A to_127 = torch.ops.aten.to(add_15, 6)\0A to_128 = torch.ops.aten.to(getitem_31, 6)\0A to_129 = torch.ops.aten.to(getitem_32, 6)\0A to_130 = torch.ops.aten.to(params_115, 6)\0A to_131 = torch.ops.aten.to(params_114, 6)\0A sub_42 = torch.ops.aten.sub(add_15, getitem_31); add_15 = getitem_31 = None\0A mul_98 = torch.ops.aten.mul(sub_42, getitem_32); sub_42 = None\0A mul_99 = torch.ops.aten.mul(add_64, params_115)\0A mul_100 = torch.ops.aten.mul(mul_99, 384)\0A sum_102 = torch.ops.aten.sum(mul_99, [2], True)\0A mul_101 = torch.ops.aten.mul(mul_99, mul_98); mul_99 = None\0A sum_103 = torch.ops.aten.sum(mul_101, [2], True); mul_101 = None\0A mul_102 = torch.ops.aten.mul(mul_98, sum_103); sum_103 = None\0A sub_43 = torch.ops.aten.sub(mul_100, sum_102); mul_100 = sum_102 = None\0A sub_44 = torch.ops.aten.sub(sub_43, mul_102); sub_43 = mul_102 = None\0A div_33 = torch.ops.aten.div(getitem_32, 384); getitem_32 = None\0A mul_103 = torch.ops.aten.mul(div_33, sub_44); div_33 = sub_44 = None\0A mul_104 = torch.ops.aten.mul(add_64, mul_98); mul_98 = None\0A sum_104 = torch.ops.aten.sum(mul_104, [0, 1]); mul_104 = None\0A sum_105 = torch.ops.aten.sum(add_64, [0, 1]); add_64 = None\0A to_132 = torch.ops.aten.to(mul_103, 6)\0A to_133 = torch.ops.aten.to(sum_104, 6)\0A to_134 = torch.ops.aten.to(sum_105, 6)\0A detach_253 = torch.ops.aten.detach(sum_104); sum_104 = None\0A detach_254 = torch.ops.aten.detach(detach_253); detach_253 = None\0A detach_255 = torch.ops.aten.detach(sum_105); sum_105 = None\0A detach_256 = torch.ops.aten.detach(detach_255); detach_255 = None\0A _reshape_alias_188 = torch.ops.aten._reshape_alias(mul_103, [128, 384], [384, 1])\0A t_250 = torch.ops.aten.t(t_29); t_29 = None\0A mm_88 = torch.ops.aten.mm(_reshape_alias_188, t_250); t_250 = None\0A t_251 = torch.ops.aten.t(_reshape_alias_188)\0A mm_89 = torch.ops.aten.mm(t_251, view_78); t_251 = view_78 = None\0A t_252 = torch.ops.aten.t(mm_89); mm_89 = None\0A sum_106 = torch.ops.aten.sum(_reshape_alias_188, [0], True); _reshape_alias_188 = None\0A view_236 = torch.ops.aten.view(sum_106, [384]); sum_106 = None\0A detach_257 = torch.ops.aten.detach(view_236); view_236 = None\0A detach_258 = torch.ops.aten.detach(detach_257); detach_257 = None\0A _reshape_alias_189 = torch.ops.aten._reshape_alias(mm_88, [1, 128, 1536], [196608, 1536, 1]); mm_88 = None\0A t_253 = torch.ops.aten.t(t_252); t_252 = None\0A detach_259 = torch.ops.aten.detach(t_253); t_253 = None\0A detach_260 = torch.ops.aten.detach(detach_259); detach_259 = None\0A gelu_backward_7 = torch.ops.aten.gelu_backward(_reshape_alias_189, view_77); _reshape_alias_189 = view_77 = None\0A _reshape_alias_190 = torch.ops.aten._reshape_alias(gelu_backward_7, [128, 1536], [1536, 1]); gelu_backward_7 = None\0A t_254 = torch.ops.aten.t(t_28); t_28 = None\0A mm_90 = torch.ops.aten.mm(_reshape_alias_190, t_254); t_254 = None\0A t_255 = torch.ops.aten.t(_reshape_alias_190)\0A mm_91 = torch.ops.aten.mm(t_255, view_76); t_255 = view_76 = None\0A t_256 = torch.ops.aten.t(mm_91); mm_91 = None\0A sum_107 = torch.ops.aten.sum(_reshape_alias_190, [0], True); _reshape_alias_190 = None\0A view_237 = torch.ops.aten.view(sum_107, [1536]); sum_107 = None\0A detach_261 = torch.ops.aten.detach(view_237); view_237 = None\0A detach_262 = torch.ops.aten.detach(detach_261); detach_261 = None\0A _reshape_alias_191 = torch.ops.aten._reshape_alias(mm_90, [1, 128, 384], [49152, 384, 1]); mm_90 = None\0A add_65 = torch.ops.aten.add(mul_103, _reshape_alias_191); mul_103 = _reshape_alias_191 = None\0A t_257 = torch.ops.aten.t(t_256); t_256 = None\0A detach_263 = torch.ops.aten.detach(t_257); t_257 = None\0A detach_264 = torch.ops.aten.detach(detach_263); detach_263 = None\0A to_135 = torch.ops.aten.to(add_65, 6)\0A to_136 = torch.ops.aten.to(add_14, 6)\0A to_137 = torch.ops.aten.to(getitem_28, 6)\0A to_138 = torch.ops.aten.to(getitem_29, 6)\0A to_139 = torch.ops.aten.to(params_103, 6)\0A to_140 = torch.ops.aten.to(params_102, 6)\0A sub_45 = torch.ops.aten.sub(add_14, getitem_28); add_14 = getitem_28 = None\0A mul_105 = torch.ops.aten.mul(sub_45, getitem_29); sub_45 = None\0A mul_106 = torch.ops.aten.mul(add_65, params_103)\0A mul_107 = torch.ops.aten.mul(mul_106, 384)\0A sum_108 = torch.ops.aten.sum(mul_106, [2], True)\0A mul_108 = torch.ops.aten.mul(mul_106, mul_105); mul_106 = None\0A sum_109 = torch.ops.aten.sum(mul_108, [2], True); mul_108 = None\0A mul_109 = torch.ops.aten.mul(mul_105, sum_109); sum_109 = None\0A sub_46 = torch.ops.aten.sub(mul_107, sum_108); mul_107 = sum_108 = None\0A sub_47 = torch.ops.aten.sub(sub_46, mul_109); sub_46 = mul_109 = None\0A div_34 = torch.ops.aten.div(getitem_29, 384); getitem_29 = None\0A mul_110 = torch.ops.aten.mul(div_34, sub_47); div_34 = sub_47 = None\0A mul_111 = torch.ops.aten.mul(add_65, mul_105); mul_105 = None\0A sum_110 = torch.ops.aten.sum(mul_111, [0, 1]); mul_111 = None\0A sum_111 = torch.ops.aten.sum(add_65, [0, 1]); add_65 = None\0A to_141 = torch.ops.aten.to(mul_110, 6)\0A to_142 = torch.ops.aten.to(sum_110, 6)\0A to_143 = torch.ops.aten.to(sum_111, 6)\0A detach_265 = torch.ops.aten.detach(sum_110); sum_110 = None\0A detach_266 = torch.ops.aten.detach(detach_265); detach_265 = None\0A detach_267 = torch.ops.aten.detach(sum_111); sum_111 = None\0A detach_268 = torch.ops.aten.detach(detach_267); detach_267 = None\0A _reshape_alias_192 = torch.ops.aten._reshape_alias(mul_110, [128, 384], [384, 1])\0A t_258 = torch.ops.aten.t(t_27); t_27 = None\0A mm_92 = torch.ops.aten.mm(_reshape_alias_192, t_258); t_258 = None\0A t_259 = torch.ops.aten.t(_reshape_alias_192)\0A mm_93 = torch.ops.aten.mm(t_259, view_74); t_259 = view_74 = None\0A t_260 = torch.ops.aten.t(mm_93); mm_93 = None\0A sum_112 = torch.ops.aten.sum(_reshape_alias_192, [0], True); _reshape_alias_192 = None\0A view_238 = torch.ops.aten.view(sum_112, [384]); sum_112 = None\0A detach_269 = torch.ops.aten.detach(view_238); view_238 = None\0A detach_270 = torch.ops.aten.detach(detach_269); detach_269 = None\0A _reshape_alias_193 = torch.ops.aten._reshape_alias(mm_92, [1, 128, 384], [49152, 384, 1]); mm_92 = None\0A t_261 = torch.ops.aten.t(t_260); t_260 = None\0A detach_271 = torch.ops.aten.detach(t_261); t_261 = None\0A detach_272 = torch.ops.aten.detach(detach_271); detach_271 = None\0A _reshape_alias_194 = torch.ops.aten._reshape_alias(_reshape_alias_193, [1, 128, 12, 32], [49152, 384, 32, 1]); _reshape_alias_193 = None\0A permute_76 = torch.ops.aten.permute(_reshape_alias_194, [0, 2, 1, 3]); _reshape_alias_194 = None\0A _reshape_alias_195 = torch.ops.aten._reshape_alias(permute_76, [12, 128, 32], [32, 384, 1]); permute_76 = None\0A transpose_47 = torch.ops.aten.transpose(_reshape_alias_18, 1, 2); _reshape_alias_18 = None\0A bmm_52 = torch.ops.aten.bmm(transpose_47, _reshape_alias_195); transpose_47 = None\0A transpose_48 = torch.ops.aten.transpose(_reshape_alias_19, 1, 2); _reshape_alias_19 = None\0A bmm_53 = torch.ops.aten.bmm(_reshape_alias_195, transpose_48); _reshape_alias_195 = transpose_48 = None\0A _reshape_alias_196 = torch.ops.aten._reshape_alias(bmm_52, [1, 12, 128, 32], [49152, 4096, 32, 1]); bmm_52 = None\0A _reshape_alias_197 = torch.ops.aten._reshape_alias(bmm_53, [1, 12, 128, 128], [196608, 16384, 128, 1]); bmm_53 = None\0A detach_273 = torch.ops.aten.detach(detach_4); detach_4 = None\0A _softmax_backward_data_7 = torch.ops.aten._softmax_backward_data(_reshape_alias_197, detach_273, -1, 6); _reshape_alias_197 = detach_273 = None\0A div_35 = torch.ops.aten.div(_softmax_backward_data_7, 5.656854249492381); _softmax_backward_data_7 = None\0A _reshape_alias_198 = torch.ops.aten._reshape_alias(div_35, [12, 128, 128], [16384, 128, 1]); div_35 = None\0A transpose_49 = torch.ops.aten.transpose(_reshape_alias_16, 1, 2); _reshape_alias_16 = None\0A bmm_54 = torch.ops.aten.bmm(transpose_49, _reshape_alias_198); transpose_49 = None\0A transpose_50 = torch.ops.aten.transpose(_reshape_alias_17, 1, 2); _reshape_alias_17 = None\0A bmm_55 = torch.ops.aten.bmm(_reshape_alias_198, transpose_50); _reshape_alias_198 = transpose_50 = None\0A _reshape_alias_199 = torch.ops.aten._reshape_alias(bmm_54, [1, 12, 32, 128], [49152, 4096, 128, 1]); bmm_54 = None\0A _reshape_alias_200 = torch.ops.aten._reshape_alias(bmm_55, [1, 12, 128, 32], [49152, 4096, 32, 1]); bmm_55 = None\0A transpose_51 = torch.ops.aten.transpose(_reshape_alias_199, -1, -2); _reshape_alias_199 = None\0A permute_77 = torch.ops.aten.permute(_reshape_alias_200, [0, 2, 1, 3]); _reshape_alias_200 = None\0A clone_26 = torch.ops.aten.clone(permute_77, memory_format = 0); permute_77 = None\0A _unsafe_view_38 = torch.ops.aten._unsafe_view(clone_26, [1, 128, 384]); clone_26 = None\0A permute_78 = torch.ops.aten.permute(_reshape_alias_196, [0, 2, 1, 3]); _reshape_alias_196 = None\0A clone_27 = torch.ops.aten.clone(permute_78, memory_format = 0); permute_78 = None\0A _unsafe_view_39 = torch.ops.aten._unsafe_view(clone_27, [1, 128, 384]); clone_27 = None\0A _reshape_alias_201 = torch.ops.aten._reshape_alias(_unsafe_view_39, [128, 384], [384, 1]); _unsafe_view_39 = None\0A t_262 = torch.ops.aten.t(t_26); t_26 = None\0A mm_94 = torch.ops.aten.mm(_reshape_alias_201, t_262); t_262 = None\0A t_263 = torch.ops.aten.t(_reshape_alias_201)\0A mm_95 = torch.ops.aten.mm(t_263, view_69); t_263 = view_69 = None\0A t_264 = torch.ops.aten.t(mm_95); mm_95 = None\0A sum_113 = torch.ops.aten.sum(_reshape_alias_201, [0], True); _reshape_alias_201 = None\0A view_239 = torch.ops.aten.view(sum_113, [384]); sum_113 = None\0A detach_274 = torch.ops.aten.detach(view_239); view_239 = None\0A detach_275 = torch.ops.aten.detach(detach_274); detach_274 = None\0A _reshape_alias_202 = torch.ops.aten._reshape_alias(mm_94, [1, 128, 384], [49152, 384, 1]); mm_94 = None\0A add_66 = torch.ops.aten.add(mul_110, _reshape_alias_202); mul_110 = _reshape_alias_202 = None\0A t_265 = torch.ops.aten.t(t_264); t_264 = None\0A detach_276 = torch.ops.aten.detach(t_265); t_265 = None\0A detach_277 = torch.ops.aten.detach(detach_276); detach_276 = None\0A permute_79 = torch.ops.aten.permute(transpose_51, [0, 2, 1, 3]); transpose_51 = None\0A _reshape_alias_203 = torch.ops.aten._reshape_alias(permute_79, [1, 128, 384], [128, 1, 128]); permute_79 = None\0A _reshape_alias_204 = torch.ops.aten._reshape_alias(_reshape_alias_203, [128, 384], [1, 128]); _reshape_alias_203 = None\0A t_266 = torch.ops.aten.t(t_25); t_25 = None\0A mm_96 = torch.ops.aten.mm(_reshape_alias_204, t_266); t_266 = None\0A t_267 = torch.ops.aten.t(_reshape_alias_204)\0A mm_97 = torch.ops.aten.mm(t_267, view_66); t_267 = view_66 = None\0A t_268 = torch.ops.aten.t(mm_97); mm_97 = None\0A sum_114 = torch.ops.aten.sum(_reshape_alias_204, [0], True); _reshape_alias_204 = None\0A view_240 = torch.ops.aten.view(sum_114, [384]); sum_114 = None\0A detach_278 = torch.ops.aten.detach(view_240); view_240 = None\0A detach_279 = torch.ops.aten.detach(detach_278); detach_278 = None\0A _reshape_alias_205 = torch.ops.aten._reshape_alias(mm_96, [1, 128, 384], [49152, 384, 1]); mm_96 = None\0A add_67 = torch.ops.aten.add(add_66, _reshape_alias_205); add_66 = _reshape_alias_205 = None\0A t_269 = torch.ops.aten.t(t_268); t_268 = None\0A detach_280 = torch.ops.aten.detach(t_269); t_269 = None\0A detach_281 = torch.ops.aten.detach(detach_280); detach_280 = None\0A _reshape_alias_206 = torch.ops.aten._reshape_alias(_unsafe_view_38, [128, 384], [384, 1]); _unsafe_view_38 = None\0A t_270 = torch.ops.aten.t(t_24); t_24 = None\0A mm_98 = torch.ops.aten.mm(_reshape_alias_206, t_270); t_270 = None\0A t_271 = torch.ops.aten.t(_reshape_alias_206)\0A mm_99 = torch.ops.aten.mm(t_271, view_64); t_271 = view_64 = None\0A t_272 = torch.ops.aten.t(mm_99); mm_99 = None\0A sum_115 = torch.ops.aten.sum(_reshape_alias_206, [0], True); _reshape_alias_206 = None\0A view_241 = torch.ops.aten.view(sum_115, [384]); sum_115 = None\0A detach_282 = torch.ops.aten.detach(view_241); view_241 = None\0A detach_283 = torch.ops.aten.detach(detach_282); detach_282 = None\0A _reshape_alias_207 = torch.ops.aten._reshape_alias(mm_98, [1, 128, 384], [49152, 384, 1]); mm_98 = None\0A add_68 = torch.ops.aten.add(add_67, _reshape_alias_207); add_67 = _reshape_alias_207 = None\0A t_273 = torch.ops.aten.t(t_272); t_272 = None\0A detach_284 = torch.ops.aten.detach(t_273); t_273 = None\0A detach_285 = torch.ops.aten.detach(detach_284); detach_284 = None\0A to_144 = torch.ops.aten.to(add_68, 6)\0A to_145 = torch.ops.aten.to(add_12, 6)\0A to_146 = torch.ops.aten.to(getitem_25, 6)\0A to_147 = torch.ops.aten.to(getitem_26, 6)\0A to_148 = torch.ops.aten.to(params_99, 6)\0A to_149 = torch.ops.aten.to(params_98, 6)\0A sub_48 = torch.ops.aten.sub(add_12, getitem_25); add_12 = getitem_25 = None\0A mul_112 = torch.ops.aten.mul(sub_48, getitem_26); sub_48 = None\0A mul_113 = torch.ops.aten.mul(add_68, params_99)\0A mul_114 = torch.ops.aten.mul(mul_113, 384)\0A sum_116 = torch.ops.aten.sum(mul_113, [2], True)\0A mul_115 = torch.ops.aten.mul(mul_113, mul_112); mul_113 = None\0A sum_117 = torch.ops.aten.sum(mul_115, [2], True); mul_115 = None\0A mul_116 = torch.ops.aten.mul(mul_112, sum_117); sum_117 = None\0A sub_49 = torch.ops.aten.sub(mul_114, sum_116); mul_114 = sum_116 = None\0A sub_50 = torch.ops.aten.sub(sub_49, mul_116); sub_49 = mul_116 = None\0A div_36 = torch.ops.aten.div(getitem_26, 384); getitem_26 = None\0A mul_117 = torch.ops.aten.mul(div_36, sub_50); div_36 = sub_50 = None\0A mul_118 = torch.ops.aten.mul(add_68, mul_112); mul_112 = None\0A sum_118 = torch.ops.aten.sum(mul_118, [0, 1]); mul_118 = None\0A sum_119 = torch.ops.aten.sum(add_68, [0, 1]); add_68 = None\0A to_150 = torch.ops.aten.to(mul_117, 6)\0A to_151 = torch.ops.aten.to(sum_118, 6)\0A to_152 = torch.ops.aten.to(sum_119, 6)\0A detach_286 = torch.ops.aten.detach(sum_118); sum_118 = None\0A detach_287 = torch.ops.aten.detach(detach_286); detach_286 = None\0A detach_288 = torch.ops.aten.detach(sum_119); sum_119 = None\0A detach_289 = torch.ops.aten.detach(detach_288); detach_288 = None\0A _reshape_alias_208 = torch.ops.aten._reshape_alias(mul_117, [128, 384], [384, 1])\0A t_274 = torch.ops.aten.t(t_23); t_23 = None\0A mm_100 = torch.ops.aten.mm(_reshape_alias_208, t_274); t_274 = None\0A t_275 = torch.ops.aten.t(_reshape_alias_208)\0A mm_101 = torch.ops.aten.mm(t_275, view_62); t_275 = view_62 = None\0A t_276 = torch.ops.aten.t(mm_101); mm_101 = None\0A sum_120 = torch.ops.aten.sum(_reshape_alias_208, [0], True); _reshape_alias_208 = None\0A view_242 = torch.ops.aten.view(sum_120, [384]); sum_120 = None\0A detach_290 = torch.ops.aten.detach(view_242); view_242 = None\0A detach_291 = torch.ops.aten.detach(detach_290); detach_290 = None\0A _reshape_alias_209 = torch.ops.aten._reshape_alias(mm_100, [1, 128, 1536], [196608, 1536, 1]); mm_100 = None\0A t_277 = torch.ops.aten.t(t_276); t_276 = None\0A detach_292 = torch.ops.aten.detach(t_277); t_277 = None\0A detach_293 = torch.ops.aten.detach(detach_292); detach_292 = None\0A gelu_backward_8 = torch.ops.aten.gelu_backward(_reshape_alias_209, view_61); _reshape_alias_209 = view_61 = None\0A _reshape_alias_210 = torch.ops.aten._reshape_alias(gelu_backward_8, [128, 1536], [1536, 1]); gelu_backward_8 = None\0A t_278 = torch.ops.aten.t(t_22); t_22 = None\0A mm_102 = torch.ops.aten.mm(_reshape_alias_210, t_278); t_278 = None\0A t_279 = torch.ops.aten.t(_reshape_alias_210)\0A mm_103 = torch.ops.aten.mm(t_279, view_60); t_279 = view_60 = None\0A t_280 = torch.ops.aten.t(mm_103); mm_103 = None\0A sum_121 = torch.ops.aten.sum(_reshape_alias_210, [0], True); _reshape_alias_210 = None\0A view_243 = torch.ops.aten.view(sum_121, [1536]); sum_121 = None\0A detach_294 = torch.ops.aten.detach(view_243); view_243 = None\0A detach_295 = torch.ops.aten.detach(detach_294); detach_294 = None\0A _reshape_alias_211 = torch.ops.aten._reshape_alias(mm_102, [1, 128, 384], [49152, 384, 1]); mm_102 = None\0A add_69 = torch.ops.aten.add(mul_117, _reshape_alias_211); mul_117 = _reshape_alias_211 = None\0A t_281 = torch.ops.aten.t(t_280); t_280 = None\0A detach_296 = torch.ops.aten.detach(t_281); t_281 = None\0A detach_297 = torch.ops.aten.detach(detach_296); detach_296 = None\0A to_153 = torch.ops.aten.to(add_69, 6)\0A to_154 = torch.ops.aten.to(add_11, 6)\0A to_155 = torch.ops.aten.to(getitem_22, 6)\0A to_156 = torch.ops.aten.to(getitem_23, 6)\0A to_157 = torch.ops.aten.to(params_87, 6)\0A to_158 = torch.ops.aten.to(params_86, 6)\0A sub_51 = torch.ops.aten.sub(add_11, getitem_22); add_11 = getitem_22 = None\0A mul_119 = torch.ops.aten.mul(sub_51, getitem_23); sub_51 = None\0A mul_120 = torch.ops.aten.mul(add_69, params_87)\0A mul_121 = torch.ops.aten.mul(mul_120, 384)\0A sum_122 = torch.ops.aten.sum(mul_120, [2], True)\0A mul_122 = torch.ops.aten.mul(mul_120, mul_119); mul_120 = None\0A sum_123 = torch.ops.aten.sum(mul_122, [2], True); mul_122 = None\0A mul_123 = torch.ops.aten.mul(mul_119, sum_123); sum_123 = None\0A sub_52 = torch.ops.aten.sub(mul_121, sum_122); mul_121 = sum_122 = None\0A sub_53 = torch.ops.aten.sub(sub_52, mul_123); sub_52 = mul_123 = None\0A div_37 = torch.ops.aten.div(getitem_23, 384); getitem_23 = None\0A mul_124 = torch.ops.aten.mul(div_37, sub_53); div_37 = sub_53 = None\0A mul_125 = torch.ops.aten.mul(add_69, mul_119); mul_119 = None\0A sum_124 = torch.ops.aten.sum(mul_125, [0, 1]); mul_125 = None\0A sum_125 = torch.ops.aten.sum(add_69, [0, 1]); add_69 = None\0A to_159 = torch.ops.aten.to(mul_124, 6)\0A to_160 = torch.ops.aten.to(sum_124, 6)\0A to_161 = torch.ops.aten.to(sum_125, 6)\0A detach_298 = torch.ops.aten.detach(sum_124); sum_124 = None\0A detach_299 = torch.ops.aten.detach(detach_298); detach_298 = None\0A detach_300 = torch.ops.aten.detach(sum_125); sum_125 = None\0A detach_301 = torch.ops.aten.detach(detach_300); detach_300 = None\0A _reshape_alias_212 = torch.ops.aten._reshape_alias(mul_124, [128, 384], [384, 1])\0A t_282 = torch.ops.aten.t(t_21); t_21 = None\0A mm_104 = torch.ops.aten.mm(_reshape_alias_212, t_282); t_282 = None\0A t_283 = torch.ops.aten.t(_reshape_alias_212)\0A mm_105 = torch.ops.aten.mm(t_283, view_58); t_283 = view_58 = None\0A t_284 = torch.ops.aten.t(mm_105); mm_105 = None\0A sum_126 = torch.ops.aten.sum(_reshape_alias_212, [0], True); _reshape_alias_212 = None\0A view_244 = torch.ops.aten.view(sum_126, [384]); sum_126 = None\0A detach_302 = torch.ops.aten.detach(view_244); view_244 = None\0A detach_303 = torch.ops.aten.detach(detach_302); detach_302 = None\0A _reshape_alias_213 = torch.ops.aten._reshape_alias(mm_104, [1, 128, 384], [49152, 384, 1]); mm_104 = None\0A t_285 = torch.ops.aten.t(t_284); t_284 = None\0A detach_304 = torch.ops.aten.detach(t_285); t_285 = None\0A detach_305 = torch.ops.aten.detach(detach_304); detach_304 = None\0A _reshape_alias_214 = torch.ops.aten._reshape_alias(_reshape_alias_213, [1, 128, 12, 32], [49152, 384, 32, 1]); _reshape_alias_213 = None\0A permute_80 = torch.ops.aten.permute(_reshape_alias_214, [0, 2, 1, 3]); _reshape_alias_214 = None\0A _reshape_alias_215 = torch.ops.aten._reshape_alias(permute_80, [12, 128, 32], [32, 384, 1]); permute_80 = None\0A transpose_52 = torch.ops.aten.transpose(_reshape_alias_14, 1, 2); _reshape_alias_14 = None\0A bmm_56 = torch.ops.aten.bmm(transpose_52, _reshape_alias_215); transpose_52 = None\0A transpose_53 = torch.ops.aten.transpose(_reshape_alias_15, 1, 2); _reshape_alias_15 = None\0A bmm_57 = torch.ops.aten.bmm(_reshape_alias_215, transpose_53); _reshape_alias_215 = transpose_53 = None\0A _reshape_alias_216 = torch.ops.aten._reshape_alias(bmm_56, [1, 12, 128, 32], [49152, 4096, 32, 1]); bmm_56 = None\0A _reshape_alias_217 = torch.ops.aten._reshape_alias(bmm_57, [1, 12, 128, 128], [196608, 16384, 128, 1]); bmm_57 = None\0A detach_306 = torch.ops.aten.detach(detach_3); detach_3 = None\0A _softmax_backward_data_8 = torch.ops.aten._softmax_backward_data(_reshape_alias_217, detach_306, -1, 6); _reshape_alias_217 = detach_306 = None\0A div_38 = torch.ops.aten.div(_softmax_backward_data_8, 5.656854249492381); _softmax_backward_data_8 = None\0A _reshape_alias_218 = torch.ops.aten._reshape_alias(div_38, [12, 128, 128], [16384, 128, 1]); div_38 = None\0A transpose_54 = torch.ops.aten.transpose(_reshape_alias_12, 1, 2); _reshape_alias_12 = None\0A bmm_58 = torch.ops.aten.bmm(transpose_54, _reshape_alias_218); transpose_54 = None\0A transpose_55 = torch.ops.aten.transpose(_reshape_alias_13, 1, 2); _reshape_alias_13 = None\0A bmm_59 = torch.ops.aten.bmm(_reshape_alias_218, transpose_55); _reshape_alias_218 = transpose_55 = None\0A _reshape_alias_219 = torch.ops.aten._reshape_alias(bmm_58, [1, 12, 32, 128], [49152, 4096, 128, 1]); bmm_58 = None\0A _reshape_alias_220 = torch.ops.aten._reshape_alias(bmm_59, [1, 12, 128, 32], [49152, 4096, 32, 1]); bmm_59 = None\0A transpose_56 = torch.ops.aten.transpose(_reshape_alias_219, -1, -2); _reshape_alias_219 = None\0A permute_81 = torch.ops.aten.permute(_reshape_alias_220, [0, 2, 1, 3]); _reshape_alias_220 = None\0A clone_28 = torch.ops.aten.clone(permute_81, memory_format = 0); permute_81 = None\0A _unsafe_view_40 = torch.ops.aten._unsafe_view(clone_28, [1, 128, 384]); clone_28 = None\0A permute_82 = torch.ops.aten.permute(_reshape_alias_216, [0, 2, 1, 3]); _reshape_alias_216 = None\0A clone_29 = torch.ops.aten.clone(permute_82, memory_format = 0); permute_82 = None\0A _unsafe_view_41 = torch.ops.aten._unsafe_view(clone_29, [1, 128, 384]); clone_29 = None\0A _reshape_alias_221 = torch.ops.aten._reshape_alias(_unsafe_view_41, [128, 384], [384, 1]); _unsafe_view_41 = None\0A t_286 = torch.ops.aten.t(t_20); t_20 = None\0A mm_106 = torch.ops.aten.mm(_reshape_alias_221, t_286); t_286 = None\0A t_287 = torch.ops.aten.t(_reshape_alias_221)\0A mm_107 = torch.ops.aten.mm(t_287, view_53); t_287 = view_53 = None\0A t_288 = torch.ops.aten.t(mm_107); mm_107 = None\0A sum_127 = torch.ops.aten.sum(_reshape_alias_221, [0], True); _reshape_alias_221 = None\0A view_245 = torch.ops.aten.view(sum_127, [384]); sum_127 = None\0A detach_307 = torch.ops.aten.detach(view_245); view_245 = None\0A detach_308 = torch.ops.aten.detach(detach_307); detach_307 = None\0A _reshape_alias_222 = torch.ops.aten._reshape_alias(mm_106, [1, 128, 384], [49152, 384, 1]); mm_106 = None\0A add_70 = torch.ops.aten.add(mul_124, _reshape_alias_222); mul_124 = _reshape_alias_222 = None\0A t_289 = torch.ops.aten.t(t_288); t_288 = None\0A detach_309 = torch.ops.aten.detach(t_289); t_289 = None\0A detach_310 = torch.ops.aten.detach(detach_309); detach_309 = None\0A permute_83 = torch.ops.aten.permute(transpose_56, [0, 2, 1, 3]); transpose_56 = None\0A _reshape_alias_223 = torch.ops.aten._reshape_alias(permute_83, [1, 128, 384], [128, 1, 128]); permute_83 = None\0A _reshape_alias_224 = torch.ops.aten._reshape_alias(_reshape_alias_223, [128, 384], [1, 128]); _reshape_alias_223 = None\0A t_290 = torch.ops.aten.t(t_19); t_19 = None\0A mm_108 = torch.ops.aten.mm(_reshape_alias_224, t_290); t_290 = None\0A t_291 = torch.ops.aten.t(_reshape_alias_224)\0A mm_109 = torch.ops.aten.mm(t_291, view_50); t_291 = view_50 = None\0A t_292 = torch.ops.aten.t(mm_109); mm_109 = None\0A sum_128 = torch.ops.aten.sum(_reshape_alias_224, [0], True); _reshape_alias_224 = None\0A view_246 = torch.ops.aten.view(sum_128, [384]); sum_128 = None\0A detach_311 = torch.ops.aten.detach(view_246); view_246 = None\0A detach_312 = torch.ops.aten.detach(detach_311); detach_311 = None\0A _reshape_alias_225 = torch.ops.aten._reshape_alias(mm_108, [1, 128, 384], [49152, 384, 1]); mm_108 = None\0A add_71 = torch.ops.aten.add(add_70, _reshape_alias_225); add_70 = _reshape_alias_225 = None\0A t_293 = torch.ops.aten.t(t_292); t_292 = None\0A detach_313 = torch.ops.aten.detach(t_293); t_293 = None\0A detach_314 = torch.ops.aten.detach(detach_313); detach_313 = None\0A _reshape_alias_226 = torch.ops.aten._reshape_alias(_unsafe_view_40, [128, 384], [384, 1]); _unsafe_view_40 = None\0A t_294 = torch.ops.aten.t(t_18); t_18 = None\0A mm_110 = torch.ops.aten.mm(_reshape_alias_226, t_294); t_294 = None\0A t_295 = torch.ops.aten.t(_reshape_alias_226)\0A mm_111 = torch.ops.aten.mm(t_295, view_48); t_295 = view_48 = None\0A t_296 = torch.ops.aten.t(mm_111); mm_111 = None\0A sum_129 = torch.ops.aten.sum(_reshape_alias_226, [0], True); _reshape_alias_226 = None\0A view_247 = torch.ops.aten.view(sum_129, [384]); sum_129 = None\0A detach_315 = torch.ops.aten.detach(view_247); view_247 = None\0A detach_316 = torch.ops.aten.detach(detach_315); detach_315 = None\0A _reshape_alias_227 = torch.ops.aten._reshape_alias(mm_110, [1, 128, 384], [49152, 384, 1]); mm_110 = None\0A add_72 = torch.ops.aten.add(add_71, _reshape_alias_227); add_71 = _reshape_alias_227 = None\0A t_297 = torch.ops.aten.t(t_296); t_296 = None\0A detach_317 = torch.ops.aten.detach(t_297); t_297 = None\0A detach_318 = torch.ops.aten.detach(detach_317); detach_317 = None\0A to_162 = torch.ops.aten.to(add_72, 6)\0A to_163 = torch.ops.aten.to(add_9, 6)\0A to_164 = torch.ops.aten.to(getitem_19, 6)\0A to_165 = torch.ops.aten.to(getitem_20, 6)\0A to_166 = torch.ops.aten.to(params_83, 6)\0A to_167 = torch.ops.aten.to(params_82, 6)\0A sub_54 = torch.ops.aten.sub(add_9, getitem_19); add_9 = getitem_19 = None\0A mul_126 = torch.ops.aten.mul(sub_54, getitem_20); sub_54 = None\0A mul_127 = torch.ops.aten.mul(add_72, params_83)\0A mul_128 = torch.ops.aten.mul(mul_127, 384)\0A sum_130 = torch.ops.aten.sum(mul_127, [2], True)\0A mul_129 = torch.ops.aten.mul(mul_127, mul_126); mul_127 = None\0A sum_131 = torch.ops.aten.sum(mul_129, [2], True); mul_129 = None\0A mul_130 = torch.ops.aten.mul(mul_126, sum_131); sum_131 = None\0A sub_55 = torch.ops.aten.sub(mul_128, sum_130); mul_128 = sum_130 = None\0A sub_56 = torch.ops.aten.sub(sub_55, mul_130); sub_55 = mul_130 = None\0A div_39 = torch.ops.aten.div(getitem_20, 384); getitem_20 = None\0A mul_131 = torch.ops.aten.mul(div_39, sub_56); div_39 = sub_56 = None\0A mul_132 = torch.ops.aten.mul(add_72, mul_126); mul_126 = None\0A sum_132 = torch.ops.aten.sum(mul_132, [0, 1]); mul_132 = None\0A sum_133 = torch.ops.aten.sum(add_72, [0, 1]); add_72 = None\0A to_168 = torch.ops.aten.to(mul_131, 6)\0A to_169 = torch.ops.aten.to(sum_132, 6)\0A to_170 = torch.ops.aten.to(sum_133, 6)\0A detach_319 = torch.ops.aten.detach(sum_132); sum_132 = None\0A detach_320 = torch.ops.aten.detach(detach_319); detach_319 = None\0A detach_321 = torch.ops.aten.detach(sum_133); sum_133 = None\0A detach_322 = torch.ops.aten.detach(detach_321); detach_321 = None\0A _reshape_alias_228 = torch.ops.aten._reshape_alias(mul_131, [128, 384], [384, 1])\0A t_298 = torch.ops.aten.t(t_17); t_17 = None\0A mm_112 = torch.ops.aten.mm(_reshape_alias_228, t_298); t_298 = None\0A t_299 = torch.ops.aten.t(_reshape_alias_228)\0A mm_113 = torch.ops.aten.mm(t_299, view_46); t_299 = view_46 = None\0A t_300 = torch.ops.aten.t(mm_113); mm_113 = None\0A sum_134 = torch.ops.aten.sum(_reshape_alias_228, [0], True); _reshape_alias_228 = None\0A view_248 = torch.ops.aten.view(sum_134, [384]); sum_134 = None\0A detach_323 = torch.ops.aten.detach(view_248); view_248 = None\0A detach_324 = torch.ops.aten.detach(detach_323); detach_323 = None\0A _reshape_alias_229 = torch.ops.aten._reshape_alias(mm_112, [1, 128, 1536], [196608, 1536, 1]); mm_112 = None\0A t_301 = torch.ops.aten.t(t_300); t_300 = None\0A detach_325 = torch.ops.aten.detach(t_301); t_301 = None\0A detach_326 = torch.ops.aten.detach(detach_325); detach_325 = None\0A gelu_backward_9 = torch.ops.aten.gelu_backward(_reshape_alias_229, view_45); _reshape_alias_229 = view_45 = None\0A _reshape_alias_230 = torch.ops.aten._reshape_alias(gelu_backward_9, [128, 1536], [1536, 1]); gelu_backward_9 = None\0A t_302 = torch.ops.aten.t(t_16); t_16 = None\0A mm_114 = torch.ops.aten.mm(_reshape_alias_230, t_302); t_302 = None\0A t_303 = torch.ops.aten.t(_reshape_alias_230)\0A mm_115 = torch.ops.aten.mm(t_303, view_44); t_303 = view_44 = None\0A t_304 = torch.ops.aten.t(mm_115); mm_115 = None\0A sum_135 = torch.ops.aten.sum(_reshape_alias_230, [0], True); _reshape_alias_230 = None\0A view_249 = torch.ops.aten.view(sum_135, [1536]); sum_135 = None\0A detach_327 = torch.ops.aten.detach(view_249); view_249 = None\0A detach_328 = torch.ops.aten.detach(detach_327); detach_327 = None\0A _reshape_alias_231 = torch.ops.aten._reshape_alias(mm_114, [1, 128, 384], [49152, 384, 1]); mm_114 = None\0A add_73 = torch.ops.aten.add(mul_131, _reshape_alias_231); mul_131 = _reshape_alias_231 = None\0A t_305 = torch.ops.aten.t(t_304); t_304 = None\0A detach_329 = torch.ops.aten.detach(t_305); t_305 = None\0A detach_330 = torch.ops.aten.detach(detach_329); detach_329 = None\0A to_171 = torch.ops.aten.to(add_73, 6)\0A to_172 = torch.ops.aten.to(add_8, 6)\0A to_173 = torch.ops.aten.to(getitem_16, 6)\0A to_174 = torch.ops.aten.to(getitem_17, 6)\0A to_175 = torch.ops.aten.to(params_71, 6)\0A to_176 = torch.ops.aten.to(params_70, 6)\0A sub_57 = torch.ops.aten.sub(add_8, getitem_16); add_8 = getitem_16 = None\0A mul_133 = torch.ops.aten.mul(sub_57, getitem_17); sub_57 = None\0A mul_134 = torch.ops.aten.mul(add_73, params_71)\0A mul_135 = torch.ops.aten.mul(mul_134, 384)\0A sum_136 = torch.ops.aten.sum(mul_134, [2], True)\0A mul_136 = torch.ops.aten.mul(mul_134, mul_133); mul_134 = None\0A sum_137 = torch.ops.aten.sum(mul_136, [2], True); mul_136 = None\0A mul_137 = torch.ops.aten.mul(mul_133, sum_137); sum_137 = None\0A sub_58 = torch.ops.aten.sub(mul_135, sum_136); mul_135 = sum_136 = None\0A sub_59 = torch.ops.aten.sub(sub_58, mul_137); sub_58 = mul_137 = None\0A div_40 = torch.ops.aten.div(getitem_17, 384); getitem_17 = None\0A mul_138 = torch.ops.aten.mul(div_40, sub_59); div_40 = sub_59 = None\0A mul_139 = torch.ops.aten.mul(add_73, mul_133); mul_133 = None\0A sum_138 = torch.ops.aten.sum(mul_139, [0, 1]); mul_139 = None\0A sum_139 = torch.ops.aten.sum(add_73, [0, 1]); add_73 = None\0A to_177 = torch.ops.aten.to(mul_138, 6)\0A to_178 = torch.ops.aten.to(sum_138, 6)\0A to_179 = torch.ops.aten.to(sum_139, 6)\0A detach_331 = torch.ops.aten.detach(sum_138); sum_138 = None\0A detach_332 = torch.ops.aten.detach(detach_331); detach_331 = None\0A detach_333 = torch.ops.aten.detach(sum_139); sum_139 = None\0A detach_334 = torch.ops.aten.detach(detach_333); detach_333 = None\0A _reshape_alias_232 = torch.ops.aten._reshape_alias(mul_138, [128, 384], [384, 1])\0A t_306 = torch.ops.aten.t(t_15); t_15 = None\0A mm_116 = torch.ops.aten.mm(_reshape_alias_232, t_306); t_306 = None\0A t_307 = torch.ops.aten.t(_reshape_alias_232)\0A mm_117 = torch.ops.aten.mm(t_307, view_42); t_307 = view_42 = None\0A t_308 = torch.ops.aten.t(mm_117); mm_117 = None\0A sum_140 = torch.ops.aten.sum(_reshape_alias_232, [0], True); _reshape_alias_232 = None\0A view_250 = torch.ops.aten.view(sum_140, [384]); sum_140 = None\0A detach_335 = torch.ops.aten.detach(view_250); view_250 = None\0A detach_336 = torch.ops.aten.detach(detach_335); detach_335 = None\0A _reshape_alias_233 = torch.ops.aten._reshape_alias(mm_116, [1, 128, 384], [49152, 384, 1]); mm_116 = None\0A t_309 = torch.ops.aten.t(t_308); t_308 = None\0A detach_337 = torch.ops.aten.detach(t_309); t_309 = None\0A detach_338 = torch.ops.aten.detach(detach_337); detach_337 = None\0A _reshape_alias_234 = torch.ops.aten._reshape_alias(_reshape_alias_233, [1, 128, 12, 32], [49152, 384, 32, 1]); _reshape_alias_233 = None\0A permute_84 = torch.ops.aten.permute(_reshape_alias_234, [0, 2, 1, 3]); _reshape_alias_234 = None\0A _reshape_alias_235 = torch.ops.aten._reshape_alias(permute_84, [12, 128, 32], [32, 384, 1]); permute_84 = None\0A transpose_57 = torch.ops.aten.transpose(_reshape_alias_10, 1, 2); _reshape_alias_10 = None\0A bmm_60 = torch.ops.aten.bmm(transpose_57, _reshape_alias_235); transpose_57 = None\0A transpose_58 = torch.ops.aten.transpose(_reshape_alias_11, 1, 2); _reshape_alias_11 = None\0A bmm_61 = torch.ops.aten.bmm(_reshape_alias_235, transpose_58); _reshape_alias_235 = transpose_58 = None\0A _reshape_alias_236 = torch.ops.aten._reshape_alias(bmm_60, [1, 12, 128, 32], [49152, 4096, 32, 1]); bmm_60 = None\0A _reshape_alias_237 = torch.ops.aten._reshape_alias(bmm_61, [1, 12, 128, 128], [196608, 16384, 128, 1]); bmm_61 = None\0A detach_339 = torch.ops.aten.detach(detach_2); detach_2 = None\0A _softmax_backward_data_9 = torch.ops.aten._softmax_backward_data(_reshape_alias_237, detach_339, -1, 6); _reshape_alias_237 = detach_339 = None\0A div_41 = torch.ops.aten.div(_softmax_backward_data_9, 5.656854249492381); _softmax_backward_data_9 = None\0A _reshape_alias_238 = torch.ops.aten._reshape_alias(div_41, [12, 128, 128], [16384, 128, 1]); div_41 = None\0A transpose_59 = torch.ops.aten.transpose(_reshape_alias_8, 1, 2); _reshape_alias_8 = None\0A bmm_62 = torch.ops.aten.bmm(transpose_59, _reshape_alias_238); transpose_59 = None\0A transpose_60 = torch.ops.aten.transpose(_reshape_alias_9, 1, 2); _reshape_alias_9 = None\0A bmm_63 = torch.ops.aten.bmm(_reshape_alias_238, transpose_60); _reshape_alias_238 = transpose_60 = None\0A _reshape_alias_239 = torch.ops.aten._reshape_alias(bmm_62, [1, 12, 32, 128], [49152, 4096, 128, 1]); bmm_62 = None\0A _reshape_alias_240 = torch.ops.aten._reshape_alias(bmm_63, [1, 12, 128, 32], [49152, 4096, 32, 1]); bmm_63 = None\0A transpose_61 = torch.ops.aten.transpose(_reshape_alias_239, -1, -2); _reshape_alias_239 = None\0A permute_85 = torch.ops.aten.permute(_reshape_alias_240, [0, 2, 1, 3]); _reshape_alias_240 = None\0A clone_30 = torch.ops.aten.clone(permute_85, memory_format = 0); permute_85 = None\0A _unsafe_view_42 = torch.ops.aten._unsafe_view(clone_30, [1, 128, 384]); clone_30 = None\0A permute_86 = torch.ops.aten.permute(_reshape_alias_236, [0, 2, 1, 3]); _reshape_alias_236 = None\0A clone_31 = torch.ops.aten.clone(permute_86, memory_format = 0); permute_86 = None\0A _unsafe_view_43 = torch.ops.aten._unsafe_view(clone_31, [1, 128, 384]); clone_31 = None\0A _reshape_alias_241 = torch.ops.aten._reshape_alias(_unsafe_view_43, [128, 384], [384, 1]); _unsafe_view_43 = None\0A t_310 = torch.ops.aten.t(t_14); t_14 = None\0A mm_118 = torch.ops.aten.mm(_reshape_alias_241, t_310); t_310 = None\0A t_311 = torch.ops.aten.t(_reshape_alias_241)\0A mm_119 = torch.ops.aten.mm(t_311, view_37); t_311 = view_37 = None\0A t_312 = torch.ops.aten.t(mm_119); mm_119 = None\0A sum_141 = torch.ops.aten.sum(_reshape_alias_241, [0], True); _reshape_alias_241 = None\0A view_251 = torch.ops.aten.view(sum_141, [384]); sum_141 = None\0A detach_340 = torch.ops.aten.detach(view_251); view_251 = None\0A detach_341 = torch.ops.aten.detach(detach_340); detach_340 = None\0A _reshape_alias_242 = torch.ops.aten._reshape_alias(mm_118, [1, 128, 384], [49152, 384, 1]); mm_118 = None\0A add_74 = torch.ops.aten.add(mul_138, _reshape_alias_242); mul_138 = _reshape_alias_242 = None\0A t_313 = torch.ops.aten.t(t_312); t_312 = None\0A detach_342 = torch.ops.aten.detach(t_313); t_313 = None\0A detach_343 = torch.ops.aten.detach(detach_342); detach_342 = None\0A permute_87 = torch.ops.aten.permute(transpose_61, [0, 2, 1, 3]); transpose_61 = None\0A _reshape_alias_243 = torch.ops.aten._reshape_alias(permute_87, [1, 128, 384], [128, 1, 128]); permute_87 = None\0A _reshape_alias_244 = torch.ops.aten._reshape_alias(_reshape_alias_243, [128, 384], [1, 128]); _reshape_alias_243 = None\0A t_314 = torch.ops.aten.t(t_13); t_13 = None\0A mm_120 = torch.ops.aten.mm(_reshape_alias_244, t_314); t_314 = None\0A t_315 = torch.ops.aten.t(_reshape_alias_244)\0A mm_121 = torch.ops.aten.mm(t_315, view_34); t_315 = view_34 = None\0A t_316 = torch.ops.aten.t(mm_121); mm_121 = None\0A sum_142 = torch.ops.aten.sum(_reshape_alias_244, [0], True); _reshape_alias_244 = None\0A view_252 = torch.ops.aten.view(sum_142, [384]); sum_142 = None\0A detach_344 = torch.ops.aten.detach(view_252); view_252 = None\0A detach_345 = torch.ops.aten.detach(detach_344); detach_344 = None\0A _reshape_alias_245 = torch.ops.aten._reshape_alias(mm_120, [1, 128, 384], [49152, 384, 1]); mm_120 = None\0A add_75 = torch.ops.aten.add(add_74, _reshape_alias_245); add_74 = _reshape_alias_245 = None\0A t_317 = torch.ops.aten.t(t_316); t_316 = None\0A detach_346 = torch.ops.aten.detach(t_317); t_317 = None\0A detach_347 = torch.ops.aten.detach(detach_346); detach_346 = None\0A _reshape_alias_246 = torch.ops.aten._reshape_alias(_unsafe_view_42, [128, 384], [384, 1]); _unsafe_view_42 = None\0A t_318 = torch.ops.aten.t(t_12); t_12 = None\0A mm_122 = torch.ops.aten.mm(_reshape_alias_246, t_318); t_318 = None\0A t_319 = torch.ops.aten.t(_reshape_alias_246)\0A mm_123 = torch.ops.aten.mm(t_319, view_32); t_319 = view_32 = None\0A t_320 = torch.ops.aten.t(mm_123); mm_123 = None\0A sum_143 = torch.ops.aten.sum(_reshape_alias_246, [0], True); _reshape_alias_246 = None\0A view_253 = torch.ops.aten.view(sum_143, [384]); sum_143 = None\0A detach_348 = torch.ops.aten.detach(view_253); view_253 = None\0A detach_349 = torch.ops.aten.detach(detach_348); detach_348 = None\0A _reshape_alias_247 = torch.ops.aten._reshape_alias(mm_122, [1, 128, 384], [49152, 384, 1]); mm_122 = None\0A add_76 = torch.ops.aten.add(add_75, _reshape_alias_247); add_75 = _reshape_alias_247 = None\0A t_321 = torch.ops.aten.t(t_320); t_320 = None\0A detach_350 = torch.ops.aten.detach(t_321); t_321 = None\0A detach_351 = torch.ops.aten.detach(detach_350); detach_350 = None\0A to_180 = torch.ops.aten.to(add_76, 6)\0A to_181 = torch.ops.aten.to(add_6, 6)\0A to_182 = torch.ops.aten.to(getitem_13, 6)\0A to_183 = torch.ops.aten.to(getitem_14, 6)\0A to_184 = torch.ops.aten.to(params_35, 6)\0A to_185 = torch.ops.aten.to(params_34, 6)\0A sub_60 = torch.ops.aten.sub(add_6, getitem_13); add_6 = getitem_13 = None\0A mul_140 = torch.ops.aten.mul(sub_60, getitem_14); sub_60 = None\0A mul_141 = torch.ops.aten.mul(add_76, params_35)\0A mul_142 = torch.ops.aten.mul(mul_141, 384)\0A sum_144 = torch.ops.aten.sum(mul_141, [2], True)\0A mul_143 = torch.ops.aten.mul(mul_141, mul_140); mul_141 = None\0A sum_145 = torch.ops.aten.sum(mul_143, [2], True); mul_143 = None\0A mul_144 = torch.ops.aten.mul(mul_140, sum_145); sum_145 = None\0A sub_61 = torch.ops.aten.sub(mul_142, sum_144); mul_142 = sum_144 = None\0A sub_62 = torch.ops.aten.sub(sub_61, mul_144); sub_61 = mul_144 = None\0A div_42 = torch.ops.aten.div(getitem_14, 384); getitem_14 = None\0A mul_145 = torch.ops.aten.mul(div_42, sub_62); div_42 = sub_62 = None\0A mul_146 = torch.ops.aten.mul(add_76, mul_140); mul_140 = None\0A sum_146 = torch.ops.aten.sum(mul_146, [0, 1]); mul_146 = None\0A sum_147 = torch.ops.aten.sum(add_76, [0, 1]); add_76 = None\0A to_186 = torch.ops.aten.to(mul_145, 6)\0A to_187 = torch.ops.aten.to(sum_146, 6)\0A to_188 = torch.ops.aten.to(sum_147, 6)\0A detach_352 = torch.ops.aten.detach(sum_146); sum_146 = None\0A detach_353 = torch.ops.aten.detach(detach_352); detach_352 = None\0A detach_354 = torch.ops.aten.detach(sum_147); sum_147 = None\0A detach_355 = torch.ops.aten.detach(detach_354); detach_354 = None\0A _reshape_alias_248 = torch.ops.aten._reshape_alias(mul_145, [128, 384], [384, 1])\0A t_322 = torch.ops.aten.t(t_11); t_11 = None\0A mm_124 = torch.ops.aten.mm(_reshape_alias_248, t_322); t_322 = None\0A t_323 = torch.ops.aten.t(_reshape_alias_248)\0A mm_125 = torch.ops.aten.mm(t_323, view_30); t_323 = view_30 = None\0A t_324 = torch.ops.aten.t(mm_125); mm_125 = None\0A sum_148 = torch.ops.aten.sum(_reshape_alias_248, [0], True); _reshape_alias_248 = None\0A view_254 = torch.ops.aten.view(sum_148, [384]); sum_148 = None\0A detach_356 = torch.ops.aten.detach(view_254); view_254 = None\0A detach_357 = torch.ops.aten.detach(detach_356); detach_356 = None\0A _reshape_alias_249 = torch.ops.aten._reshape_alias(mm_124, [1, 128, 1536], [196608, 1536, 1]); mm_124 = None\0A t_325 = torch.ops.aten.t(t_324); t_324 = None\0A detach_358 = torch.ops.aten.detach(t_325); t_325 = None\0A detach_359 = torch.ops.aten.detach(detach_358); detach_358 = None\0A gelu_backward_10 = torch.ops.aten.gelu_backward(_reshape_alias_249, view_29); _reshape_alias_249 = view_29 = None\0A _reshape_alias_250 = torch.ops.aten._reshape_alias(gelu_backward_10, [128, 1536], [1536, 1]); gelu_backward_10 = None\0A t_326 = torch.ops.aten.t(t_10); t_10 = None\0A mm_126 = torch.ops.aten.mm(_reshape_alias_250, t_326); t_326 = None\0A t_327 = torch.ops.aten.t(_reshape_alias_250)\0A mm_127 = torch.ops.aten.mm(t_327, view_28); t_327 = view_28 = None\0A t_328 = torch.ops.aten.t(mm_127); mm_127 = None\0A sum_149 = torch.ops.aten.sum(_reshape_alias_250, [0], True); _reshape_alias_250 = None\0A view_255 = torch.ops.aten.view(sum_149, [1536]); sum_149 = None\0A detach_360 = torch.ops.aten.detach(view_255); view_255 = None\0A detach_361 = torch.ops.aten.detach(detach_360); detach_360 = None\0A _reshape_alias_251 = torch.ops.aten._reshape_alias(mm_126, [1, 128, 384], [49152, 384, 1]); mm_126 = None\0A add_77 = torch.ops.aten.add(mul_145, _reshape_alias_251); mul_145 = _reshape_alias_251 = None\0A t_329 = torch.ops.aten.t(t_328); t_328 = None\0A detach_362 = torch.ops.aten.detach(t_329); t_329 = None\0A detach_363 = torch.ops.aten.detach(detach_362); detach_362 = None\0A to_189 = torch.ops.aten.to(add_77, 6)\0A to_190 = torch.ops.aten.to(add_5, 6)\0A to_191 = torch.ops.aten.to(getitem_10, 6)\0A to_192 = torch.ops.aten.to(getitem_11, 6)\0A to_193 = torch.ops.aten.to(params_23, 6)\0A to_194 = torch.ops.aten.to(params_22, 6)\0A sub_63 = torch.ops.aten.sub(add_5, getitem_10); add_5 = getitem_10 = None\0A mul_147 = torch.ops.aten.mul(sub_63, getitem_11); sub_63 = None\0A mul_148 = torch.ops.aten.mul(add_77, params_23)\0A mul_149 = torch.ops.aten.mul(mul_148, 384)\0A sum_150 = torch.ops.aten.sum(mul_148, [2], True)\0A mul_150 = torch.ops.aten.mul(mul_148, mul_147); mul_148 = None\0A sum_151 = torch.ops.aten.sum(mul_150, [2], True); mul_150 = None\0A mul_151 = torch.ops.aten.mul(mul_147, sum_151); sum_151 = None\0A sub_64 = torch.ops.aten.sub(mul_149, sum_150); mul_149 = sum_150 = None\0A sub_65 = torch.ops.aten.sub(sub_64, mul_151); sub_64 = mul_151 = None\0A div_43 = torch.ops.aten.div(getitem_11, 384); getitem_11 = None\0A mul_152 = torch.ops.aten.mul(div_43, sub_65); div_43 = sub_65 = None\0A mul_153 = torch.ops.aten.mul(add_77, mul_147); mul_147 = None\0A sum_152 = torch.ops.aten.sum(mul_153, [0, 1]); mul_153 = None\0A sum_153 = torch.ops.aten.sum(add_77, [0, 1]); add_77 = None\0A to_195 = torch.ops.aten.to(mul_152, 6)\0A to_196 = torch.ops.aten.to(sum_152, 6)\0A to_197 = torch.ops.aten.to(sum_153, 6)\0A detach_364 = torch.ops.aten.detach(sum_152); sum_152 = None\0A detach_365 = torch.ops.aten.detach(detach_364); detach_364 = None\0A detach_366 = torch.ops.aten.detach(sum_153); sum_153 = None\0A detach_367 = torch.ops.aten.detach(detach_366); detach_366 = None\0A _reshape_alias_252 = torch.ops.aten._reshape_alias(mul_152, [128, 384], [384, 1])\0A t_330 = torch.ops.aten.t(t_9); t_9 = None\0A mm_128 = torch.ops.aten.mm(_reshape_alias_252, t_330); t_330 = None\0A t_331 = torch.ops.aten.t(_reshape_alias_252)\0A mm_129 = torch.ops.aten.mm(t_331, view_26); t_331 = view_26 = None\0A t_332 = torch.ops.aten.t(mm_129); mm_129 = None\0A sum_154 = torch.ops.aten.sum(_reshape_alias_252, [0], True); _reshape_alias_252 = None\0A view_256 = torch.ops.aten.view(sum_154, [384]); sum_154 = None\0A detach_368 = torch.ops.aten.detach(view_256); view_256 = None\0A detach_369 = torch.ops.aten.detach(detach_368); detach_368 = None\0A _reshape_alias_253 = torch.ops.aten._reshape_alias(mm_128, [1, 128, 384], [49152, 384, 1]); mm_128 = None\0A t_333 = torch.ops.aten.t(t_332); t_332 = None\0A detach_370 = torch.ops.aten.detach(t_333); t_333 = None\0A detach_371 = torch.ops.aten.detach(detach_370); detach_370 = None\0A _reshape_alias_254 = torch.ops.aten._reshape_alias(_reshape_alias_253, [1, 128, 12, 32], [49152, 384, 32, 1]); _reshape_alias_253 = None\0A permute_88 = torch.ops.aten.permute(_reshape_alias_254, [0, 2, 1, 3]); _reshape_alias_254 = None\0A _reshape_alias_255 = torch.ops.aten._reshape_alias(permute_88, [12, 128, 32], [32, 384, 1]); permute_88 = None\0A transpose_62 = torch.ops.aten.transpose(_reshape_alias_6, 1, 2); _reshape_alias_6 = None\0A bmm_64 = torch.ops.aten.bmm(transpose_62, _reshape_alias_255); transpose_62 = None\0A transpose_63 = torch.ops.aten.transpose(_reshape_alias_7, 1, 2); _reshape_alias_7 = None\0A bmm_65 = torch.ops.aten.bmm(_reshape_alias_255, transpose_63); _reshape_alias_255 = transpose_63 = None\0A _reshape_alias_256 = torch.ops.aten._reshape_alias(bmm_64, [1, 12, 128, 32], [49152, 4096, 32, 1]); bmm_64 = None\0A _reshape_alias_257 = torch.ops.aten._reshape_alias(bmm_65, [1, 12, 128, 128], [196608, 16384, 128, 1]); bmm_65 = None\0A detach_372 = torch.ops.aten.detach(detach_1); detach_1 = None\0A _softmax_backward_data_10 = torch.ops.aten._softmax_backward_data(_reshape_alias_257, detach_372, -1, 6); _reshape_alias_257 = detach_372 = None\0A div_44 = torch.ops.aten.div(_softmax_backward_data_10, 5.656854249492381); _softmax_backward_data_10 = None\0A _reshape_alias_258 = torch.ops.aten._reshape_alias(div_44, [12, 128, 128], [16384, 128, 1]); div_44 = None\0A transpose_64 = torch.ops.aten.transpose(_reshape_alias_4, 1, 2); _reshape_alias_4 = None\0A bmm_66 = torch.ops.aten.bmm(transpose_64, _reshape_alias_258); transpose_64 = None\0A transpose_65 = torch.ops.aten.transpose(_reshape_alias_5, 1, 2); _reshape_alias_5 = None\0A bmm_67 = torch.ops.aten.bmm(_reshape_alias_258, transpose_65); _reshape_alias_258 = transpose_65 = None\0A _reshape_alias_259 = torch.ops.aten._reshape_alias(bmm_66, [1, 12, 32, 128], [49152, 4096, 128, 1]); bmm_66 = None\0A _reshape_alias_260 = torch.ops.aten._reshape_alias(bmm_67, [1, 12, 128, 32], [49152, 4096, 32, 1]); bmm_67 = None\0A transpose_66 = torch.ops.aten.transpose(_reshape_alias_259, -1, -2); _reshape_alias_259 = None\0A permute_89 = torch.ops.aten.permute(_reshape_alias_260, [0, 2, 1, 3]); _reshape_alias_260 = None\0A clone_32 = torch.ops.aten.clone(permute_89, memory_format = 0); permute_89 = None\0A _unsafe_view_44 = torch.ops.aten._unsafe_view(clone_32, [1, 128, 384]); clone_32 = None\0A permute_90 = torch.ops.aten.permute(_reshape_alias_256, [0, 2, 1, 3]); _reshape_alias_256 = None\0A clone_33 = torch.ops.aten.clone(permute_90, memory_format = 0); permute_90 = None\0A _unsafe_view_45 = torch.ops.aten._unsafe_view(clone_33, [1, 128, 384]); clone_33 = None\0A _reshape_alias_261 = torch.ops.aten._reshape_alias(_unsafe_view_45, [128, 384], [384, 1]); _unsafe_view_45 = None\0A t_334 = torch.ops.aten.t(t_8); t_8 = None\0A mm_130 = torch.ops.aten.mm(_reshape_alias_261, t_334); t_334 = None\0A t_335 = torch.ops.aten.t(_reshape_alias_261)\0A mm_131 = torch.ops.aten.mm(t_335, view_21); t_335 = view_21 = None\0A t_336 = torch.ops.aten.t(mm_131); mm_131 = None\0A sum_155 = torch.ops.aten.sum(_reshape_alias_261, [0], True); _reshape_alias_261 = None\0A view_257 = torch.ops.aten.view(sum_155, [384]); sum_155 = None\0A detach_373 = torch.ops.aten.detach(view_257); view_257 = None\0A detach_374 = torch.ops.aten.detach(detach_373); detach_373 = None\0A _reshape_alias_262 = torch.ops.aten._reshape_alias(mm_130, [1, 128, 384], [49152, 384, 1]); mm_130 = None\0A add_78 = torch.ops.aten.add(mul_152, _reshape_alias_262); mul_152 = _reshape_alias_262 = None\0A t_337 = torch.ops.aten.t(t_336); t_336 = None\0A detach_375 = torch.ops.aten.detach(t_337); t_337 = None\0A detach_376 = torch.ops.aten.detach(detach_375); detach_375 = None\0A permute_91 = torch.ops.aten.permute(transpose_66, [0, 2, 1, 3]); transpose_66 = None\0A _reshape_alias_263 = torch.ops.aten._reshape_alias(permute_91, [1, 128, 384], [128, 1, 128]); permute_91 = None\0A _reshape_alias_264 = torch.ops.aten._reshape_alias(_reshape_alias_263, [128, 384], [1, 128]); _reshape_alias_263 = None\0A t_338 = torch.ops.aten.t(t_7); t_7 = None\0A mm_132 = torch.ops.aten.mm(_reshape_alias_264, t_338); t_338 = None\0A t_339 = torch.ops.aten.t(_reshape_alias_264)\0A mm_133 = torch.ops.aten.mm(t_339, view_18); t_339 = view_18 = None\0A t_340 = torch.ops.aten.t(mm_133); mm_133 = None\0A sum_156 = torch.ops.aten.sum(_reshape_alias_264, [0], True); _reshape_alias_264 = None\0A view_258 = torch.ops.aten.view(sum_156, [384]); sum_156 = None\0A detach_377 = torch.ops.aten.detach(view_258); view_258 = None\0A detach_378 = torch.ops.aten.detach(detach_377); detach_377 = None\0A _reshape_alias_265 = torch.ops.aten._reshape_alias(mm_132, [1, 128, 384], [49152, 384, 1]); mm_132 = None\0A add_79 = torch.ops.aten.add(add_78, _reshape_alias_265); add_78 = _reshape_alias_265 = None\0A t_341 = torch.ops.aten.t(t_340); t_340 = None\0A detach_379 = torch.ops.aten.detach(t_341); t_341 = None\0A detach_380 = torch.ops.aten.detach(detach_379); detach_379 = None\0A _reshape_alias_266 = torch.ops.aten._reshape_alias(_unsafe_view_44, [128, 384], [384, 1]); _unsafe_view_44 = None\0A t_342 = torch.ops.aten.t(t_6); t_6 = None\0A mm_134 = torch.ops.aten.mm(_reshape_alias_266, t_342); t_342 = None\0A t_343 = torch.ops.aten.t(_reshape_alias_266)\0A mm_135 = torch.ops.aten.mm(t_343, view_16); t_343 = view_16 = None\0A t_344 = torch.ops.aten.t(mm_135); mm_135 = None\0A sum_157 = torch.ops.aten.sum(_reshape_alias_266, [0], True); _reshape_alias_266 = None\0A view_259 = torch.ops.aten.view(sum_157, [384]); sum_157 = None\0A detach_381 = torch.ops.aten.detach(view_259); view_259 = None\0A detach_382 = torch.ops.aten.detach(detach_381); detach_381 = None\0A _reshape_alias_267 = torch.ops.aten._reshape_alias(mm_134, [1, 128, 384], [49152, 384, 1]); mm_134 = None\0A add_80 = torch.ops.aten.add(add_79, _reshape_alias_267); add_79 = _reshape_alias_267 = None\0A t_345 = torch.ops.aten.t(t_344); t_344 = None\0A detach_383 = torch.ops.aten.detach(t_345); t_345 = None\0A detach_384 = torch.ops.aten.detach(detach_383); detach_383 = None\0A to_198 = torch.ops.aten.to(add_80, 6)\0A to_199 = torch.ops.aten.to(add_3, 6)\0A to_200 = torch.ops.aten.to(getitem_7, 6)\0A to_201 = torch.ops.aten.to(getitem_8, 6)\0A to_202 = torch.ops.aten.to(params_19, 6)\0A to_203 = torch.ops.aten.to(params_18, 6)\0A sub_66 = torch.ops.aten.sub(add_3, getitem_7); add_3 = getitem_7 = None\0A mul_154 = torch.ops.aten.mul(sub_66, getitem_8); sub_66 = None\0A mul_155 = torch.ops.aten.mul(add_80, params_19)\0A mul_156 = torch.ops.aten.mul(mul_155, 384)\0A sum_158 = torch.ops.aten.sum(mul_155, [2], True)\0A mul_157 = torch.ops.aten.mul(mul_155, mul_154); mul_155 = None\0A sum_159 = torch.ops.aten.sum(mul_157, [2], True); mul_157 = None\0A mul_158 = torch.ops.aten.mul(mul_154, sum_159); sum_159 = None\0A sub_67 = torch.ops.aten.sub(mul_156, sum_158); mul_156 = sum_158 = None\0A sub_68 = torch.ops.aten.sub(sub_67, mul_158); sub_67 = mul_158 = None\0A div_45 = torch.ops.aten.div(getitem_8, 384); getitem_8 = None\0A mul_159 = torch.ops.aten.mul(div_45, sub_68); div_45 = sub_68 = None\0A mul_160 = torch.ops.aten.mul(add_80, mul_154); mul_154 = None\0A sum_160 = torch.ops.aten.sum(mul_160, [0, 1]); mul_160 = None\0A sum_161 = torch.ops.aten.sum(add_80, [0, 1]); add_80 = None\0A to_204 = torch.ops.aten.to(mul_159, 6)\0A to_205 = torch.ops.aten.to(sum_160, 6)\0A to_206 = torch.ops.aten.to(sum_161, 6)\0A detach_385 = torch.ops.aten.detach(sum_160); sum_160 = None\0A detach_386 = torch.ops.aten.detach(detach_385); detach_385 = None\0A detach_387 = torch.ops.aten.detach(sum_161); sum_161 = None\0A detach_388 = torch.ops.aten.detach(detach_387); detach_387 = None\0A _reshape_alias_268 = torch.ops.aten._reshape_alias(mul_159, [128, 384], [384, 1])\0A t_346 = torch.ops.aten.t(t_5); t_5 = None\0A mm_136 = torch.ops.aten.mm(_reshape_alias_268, t_346); t_346 = None\0A t_347 = torch.ops.aten.t(_reshape_alias_268)\0A mm_137 = torch.ops.aten.mm(t_347, view_14); t_347 = view_14 = None\0A t_348 = torch.ops.aten.t(mm_137); mm_137 = None\0A sum_162 = torch.ops.aten.sum(_reshape_alias_268, [0], True); _reshape_alias_268 = None\0A view_260 = torch.ops.aten.view(sum_162, [384]); sum_162 = None\0A detach_389 = torch.ops.aten.detach(view_260); view_260 = None\0A detach_390 = torch.ops.aten.detach(detach_389); detach_389 = None\0A _reshape_alias_269 = torch.ops.aten._reshape_alias(mm_136, [1, 128, 1536], [196608, 1536, 1]); mm_136 = None\0A t_349 = torch.ops.aten.t(t_348); t_348 = None\0A detach_391 = torch.ops.aten.detach(t_349); t_349 = None\0A detach_392 = torch.ops.aten.detach(detach_391); detach_391 = None\0A gelu_backward_11 = torch.ops.aten.gelu_backward(_reshape_alias_269, view_13); _reshape_alias_269 = view_13 = None\0A _reshape_alias_270 = torch.ops.aten._reshape_alias(gelu_backward_11, [128, 1536], [1536, 1]); gelu_backward_11 = None\0A t_350 = torch.ops.aten.t(t_4); t_4 = None\0A mm_138 = torch.ops.aten.mm(_reshape_alias_270, t_350); t_350 = None\0A t_351 = torch.ops.aten.t(_reshape_alias_270)\0A mm_139 = torch.ops.aten.mm(t_351, view_12); t_351 = view_12 = None\0A t_352 = torch.ops.aten.t(mm_139); mm_139 = None\0A sum_163 = torch.ops.aten.sum(_reshape_alias_270, [0], True); _reshape_alias_270 = None\0A view_261 = torch.ops.aten.view(sum_163, [1536]); sum_163 = None\0A detach_393 = torch.ops.aten.detach(view_261); view_261 = None\0A detach_394 = torch.ops.aten.detach(detach_393); detach_393 = None\0A _reshape_alias_271 = torch.ops.aten._reshape_alias(mm_138, [1, 128, 384], [49152, 384, 1]); mm_138 = None\0A add_81 = torch.ops.aten.add(mul_159, _reshape_alias_271); mul_159 = _reshape_alias_271 = None\0A t_353 = torch.ops.aten.t(t_352); t_352 = None\0A detach_395 = torch.ops.aten.detach(t_353); t_353 = None\0A detach_396 = torch.ops.aten.detach(detach_395); detach_395 = None\0A to_207 = torch.ops.aten.to(add_81, 6)\0A to_208 = torch.ops.aten.to(add_2, 6)\0A to_209 = torch.ops.aten.to(getitem_4, 6)\0A to_210 = torch.ops.aten.to(getitem_5, 6)\0A to_211 = torch.ops.aten.to(params_7, 6)\0A to_212 = torch.ops.aten.to(params_6, 6)\0A sub_69 = torch.ops.aten.sub(add_2, getitem_4); add_2 = getitem_4 = None\0A mul_161 = torch.ops.aten.mul(sub_69, getitem_5); sub_69 = None\0A mul_162 = torch.ops.aten.mul(add_81, params_7)\0A mul_163 = torch.ops.aten.mul(mul_162, 384)\0A sum_164 = torch.ops.aten.sum(mul_162, [2], True)\0A mul_164 = torch.ops.aten.mul(mul_162, mul_161); mul_162 = None\0A sum_165 = torch.ops.aten.sum(mul_164, [2], True); mul_164 = None\0A mul_165 = torch.ops.aten.mul(mul_161, sum_165); sum_165 = None\0A sub_70 = torch.ops.aten.sub(mul_163, sum_164); mul_163 = sum_164 = None\0A sub_71 = torch.ops.aten.sub(sub_70, mul_165); sub_70 = mul_165 = None\0A div_46 = torch.ops.aten.div(getitem_5, 384); getitem_5 = None\0A mul_166 = torch.ops.aten.mul(div_46, sub_71); div_46 = sub_71 = None\0A mul_167 = torch.ops.aten.mul(add_81, mul_161); mul_161 = None\0A sum_166 = torch.ops.aten.sum(mul_167, [0, 1]); mul_167 = None\0A sum_167 = torch.ops.aten.sum(add_81, [0, 1]); add_81 = None\0A to_213 = torch.ops.aten.to(mul_166, 6)\0A to_214 = torch.ops.aten.to(sum_166, 6)\0A to_215 = torch.ops.aten.to(sum_167, 6)\0A detach_397 = torch.ops.aten.detach(sum_166); sum_166 = None\0A detach_398 = torch.ops.aten.detach(detach_397); detach_397 = None\0A detach_399 = torch.ops.aten.detach(sum_167); sum_167 = None\0A detach_400 = torch.ops.aten.detach(detach_399); detach_399 = None\0A _reshape_alias_272 = torch.ops.aten._reshape_alias(mul_166, [128, 384], [384, 1])\0A t_354 = torch.ops.aten.t(t_3); t_3 = None\0A mm_140 = torch.ops.aten.mm(_reshape_alias_272, t_354); t_354 = None\0A t_355 = torch.ops.aten.t(_reshape_alias_272)\0A mm_141 = torch.ops.aten.mm(t_355, view_10); t_355 = view_10 = None\0A t_356 = torch.ops.aten.t(mm_141); mm_141 = None\0A sum_168 = torch.ops.aten.sum(_reshape_alias_272, [0], True); _reshape_alias_272 = None\0A view_262 = torch.ops.aten.view(sum_168, [384]); sum_168 = None\0A detach_401 = torch.ops.aten.detach(view_262); view_262 = None\0A detach_402 = torch.ops.aten.detach(detach_401); detach_401 = None\0A _reshape_alias_273 = torch.ops.aten._reshape_alias(mm_140, [1, 128, 384], [49152, 384, 1]); mm_140 = None\0A t_357 = torch.ops.aten.t(t_356); t_356 = None\0A detach_403 = torch.ops.aten.detach(t_357); t_357 = None\0A detach_404 = torch.ops.aten.detach(detach_403); detach_403 = None\0A _reshape_alias_274 = torch.ops.aten._reshape_alias(_reshape_alias_273, [1, 128, 12, 32], [49152, 384, 32, 1]); _reshape_alias_273 = None\0A permute_92 = torch.ops.aten.permute(_reshape_alias_274, [0, 2, 1, 3]); _reshape_alias_274 = None\0A _reshape_alias_275 = torch.ops.aten._reshape_alias(permute_92, [12, 128, 32], [32, 384, 1]); permute_92 = None\0A transpose_67 = torch.ops.aten.transpose(_reshape_alias_2, 1, 2); _reshape_alias_2 = None\0A bmm_68 = torch.ops.aten.bmm(transpose_67, _reshape_alias_275); transpose_67 = None\0A transpose_68 = torch.ops.aten.transpose(_reshape_alias_3, 1, 2); _reshape_alias_3 = None\0A bmm_69 = torch.ops.aten.bmm(_reshape_alias_275, transpose_68); _reshape_alias_275 = transpose_68 = None\0A _reshape_alias_276 = torch.ops.aten._reshape_alias(bmm_68, [1, 12, 128, 32], [49152, 4096, 32, 1]); bmm_68 = None\0A _reshape_alias_277 = torch.ops.aten._reshape_alias(bmm_69, [1, 12, 128, 128], [196608, 16384, 128, 1]); bmm_69 = None\0A detach_405 = torch.ops.aten.detach(detach); detach = None\0A _softmax_backward_data_11 = torch.ops.aten._softmax_backward_data(_reshape_alias_277, detach_405, -1, 6); _reshape_alias_277 = detach_405 = None\0A div_47 = torch.ops.aten.div(_softmax_backward_data_11, 5.656854249492381); _softmax_backward_data_11 = None\0A _reshape_alias_278 = torch.ops.aten._reshape_alias(div_47, [12, 128, 128], [16384, 128, 1]); div_47 = None\0A transpose_69 = torch.ops.aten.transpose(_reshape_alias, 1, 2); _reshape_alias = None\0A bmm_70 = torch.ops.aten.bmm(transpose_69, _reshape_alias_278); transpose_69 = None\0A transpose_70 = torch.ops.aten.transpose(_reshape_alias_1, 1, 2); _reshape_alias_1 = None\0A bmm_71 = torch.ops.aten.bmm(_reshape_alias_278, transpose_70); _reshape_alias_278 = transpose_70 = None\0A _reshape_alias_279 = torch.ops.aten._reshape_alias(bmm_70, [1, 12, 32, 128], [49152, 4096, 128, 1]); bmm_70 = None\0A _reshape_alias_280 = torch.ops.aten._reshape_alias(bmm_71, [1, 12, 128, 32], [49152, 4096, 32, 1]); bmm_71 = None\0A transpose_71 = torch.ops.aten.transpose(_reshape_alias_279, -1, -2); _reshape_alias_279 = None\0A permute_93 = torch.ops.aten.permute(_reshape_alias_280, [0, 2, 1, 3]); _reshape_alias_280 = None\0A clone_34 = torch.ops.aten.clone(permute_93, memory_format = 0); permute_93 = None\0A _unsafe_view_46 = torch.ops.aten._unsafe_view(clone_34, [1, 128, 384]); clone_34 = None\0A permute_94 = torch.ops.aten.permute(_reshape_alias_276, [0, 2, 1, 3]); _reshape_alias_276 = None\0A clone_35 = torch.ops.aten.clone(permute_94, memory_format = 0); permute_94 = None\0A _unsafe_view_47 = torch.ops.aten._unsafe_view(clone_35, [1, 128, 384]); clone_35 = None\0A _reshape_alias_281 = torch.ops.aten._reshape_alias(_unsafe_view_47, [128, 384], [384, 1]); _unsafe_view_47 = None\0A t_358 = torch.ops.aten.t(t_2); t_2 = None\0A mm_142 = torch.ops.aten.mm(_reshape_alias_281, t_358); t_358 = None\0A t_359 = torch.ops.aten.t(_reshape_alias_281)\0A mm_143 = torch.ops.aten.mm(t_359, view_5); t_359 = view_5 = None\0A t_360 = torch.ops.aten.t(mm_143); mm_143 = None\0A sum_169 = torch.ops.aten.sum(_reshape_alias_281, [0], True); _reshape_alias_281 = None\0A view_263 = torch.ops.aten.view(sum_169, [384]); sum_169 = None\0A detach_406 = torch.ops.aten.detach(view_263); view_263 = None\0A detach_407 = torch.ops.aten.detach(detach_406); detach_406 = None\0A _reshape_alias_282 = torch.ops.aten._reshape_alias(mm_142, [1, 128, 384], [49152, 384, 1]); mm_142 = None\0A add_82 = torch.ops.aten.add(mul_166, _reshape_alias_282); mul_166 = _reshape_alias_282 = None\0A t_361 = torch.ops.aten.t(t_360); t_360 = None\0A detach_408 = torch.ops.aten.detach(t_361); t_361 = None\0A detach_409 = torch.ops.aten.detach(detach_408); detach_408 = None\0A permute_95 = torch.ops.aten.permute(transpose_71, [0, 2, 1, 3]); transpose_71 = None\0A _reshape_alias_283 = torch.ops.aten._reshape_alias(permute_95, [1, 128, 384], [128, 1, 128]); permute_95 = None\0A _reshape_alias_284 = torch.ops.aten._reshape_alias(_reshape_alias_283, [128, 384], [1, 128]); _reshape_alias_283 = None\0A t_362 = torch.ops.aten.t(t_1); t_1 = None\0A mm_144 = torch.ops.aten.mm(_reshape_alias_284, t_362); t_362 = None\0A t_363 = torch.ops.aten.t(_reshape_alias_284)\0A mm_145 = torch.ops.aten.mm(t_363, view_2); t_363 = view_2 = None\0A t_364 = torch.ops.aten.t(mm_145); mm_145 = None\0A sum_170 = torch.ops.aten.sum(_reshape_alias_284, [0], True); _reshape_alias_284 = None\0A view_264 = torch.ops.aten.view(sum_170, [384]); sum_170 = None\0A detach_410 = torch.ops.aten.detach(view_264); view_264 = None\0A detach_411 = torch.ops.aten.detach(detach_410); detach_410 = None\0A _reshape_alias_285 = torch.ops.aten._reshape_alias(mm_144, [1, 128, 384], [49152, 384, 1]); mm_144 = None\0A add_83 = torch.ops.aten.add(add_82, _reshape_alias_285); add_82 = _reshape_alias_285 = None\0A t_365 = torch.ops.aten.t(t_364); t_364 = None\0A detach_412 = torch.ops.aten.detach(t_365); t_365 = None\0A detach_413 = torch.ops.aten.detach(detach_412); detach_412 = None\0A _reshape_alias_286 = torch.ops.aten._reshape_alias(_unsafe_view_46, [128, 384], [384, 1]); _unsafe_view_46 = None\0A t_366 = torch.ops.aten.t(t); t = None\0A mm_146 = torch.ops.aten.mm(_reshape_alias_286, t_366); t_366 = None\0A t_367 = torch.ops.aten.t(_reshape_alias_286)\0A mm_147 = torch.ops.aten.mm(t_367, view); t_367 = view = None\0A t_368 = torch.ops.aten.t(mm_147); mm_147 = None\0A sum_171 = torch.ops.aten.sum(_reshape_alias_286, [0], True); _reshape_alias_286 = None\0A view_265 = torch.ops.aten.view(sum_171, [384]); sum_171 = None\0A detach_414 = torch.ops.aten.detach(view_265); view_265 = None\0A detach_415 = torch.ops.aten.detach(detach_414); detach_414 = None\0A _reshape_alias_287 = torch.ops.aten._reshape_alias(mm_146, [1, 128, 384], [49152, 384, 1]); mm_146 = None\0A add_84 = torch.ops.aten.add(add_83, _reshape_alias_287); add_83 = _reshape_alias_287 = None\0A t_369 = torch.ops.aten.t(t_368); t_368 = None\0A detach_416 = torch.ops.aten.detach(t_369); t_369 = None\0A detach_417 = torch.ops.aten.detach(detach_416); detach_416 = None\0A to_216 = torch.ops.aten.to(add_84, 6)\0A to_217 = torch.ops.aten.to(add_, 6)\0A to_218 = torch.ops.aten.to(getitem_1, 6)\0A to_219 = torch.ops.aten.to(getitem_2, 6)\0A to_220 = torch.ops.aten.to(params_2, 6)\0A to_221 = torch.ops.aten.to(params_1, 6)\0A sub_72 = torch.ops.aten.sub(add_, getitem_1); add_ = getitem_1 = None\0A mul_168 = torch.ops.aten.mul(sub_72, getitem_2); sub_72 = None\0A mul_169 = torch.ops.aten.mul(add_84, params_2)\0A mul_170 = torch.ops.aten.mul(mul_169, 384)\0A sum_172 = torch.ops.aten.sum(mul_169, [2], True)\0A mul_171 = torch.ops.aten.mul(mul_169, mul_168); mul_169 = None\0A sum_173 = torch.ops.aten.sum(mul_171, [2], True); mul_171 = None\0A mul_172 = torch.ops.aten.mul(mul_168, sum_173); sum_173 = None\0A sub_73 = torch.ops.aten.sub(mul_170, sum_172); mul_170 = sum_172 = None\0A sub_74 = torch.ops.aten.sub(sub_73, mul_172); sub_73 = mul_172 = None\0A div_48 = torch.ops.aten.div(getitem_2, 384); getitem_2 = None\0A mul_173 = torch.ops.aten.mul(div_48, sub_74); div_48 = sub_74 = None\0A mul_174 = torch.ops.aten.mul(add_84, mul_168); mul_168 = None\0A sum_174 = torch.ops.aten.sum(mul_174, [0, 1]); mul_174 = None\0A sum_175 = torch.ops.aten.sum(add_84, [0, 1]); add_84 = None\0A to_222 = torch.ops.aten.to(mul_173, 6)\0A to_223 = torch.ops.aten.to(sum_174, 6)\0A to_224 = torch.ops.aten.to(sum_175, 6)\0A detach_418 = torch.ops.aten.detach(sum_174); sum_174 = None\0A detach_419 = torch.ops.aten.detach(detach_418); detach_418 = None\0A detach_420 = torch.ops.aten.detach(sum_175); sum_175 = None\0A detach_421 = torch.ops.aten.detach(detach_420); detach_420 = None\0A view_266 = torch.ops.aten.view(mul_173, [128, 384])\0A new_zeros_2 = torch.ops.aten.new_zeros(mul_173, [512, 384], device = device(type='cpu'), dtype = 6, layout = 0, pin_memory = False)\0A view_267 = torch.ops.aten.view(slice_4, [128]); slice_4 = None\0A ne = torch.ops.aten.ne(view_267, -1)\0A unsqueeze = torch.ops.aten.unsqueeze(ne, 1); ne = None\0A expand_as = torch.ops.aten.expand_as(unsqueeze, view_266); unsqueeze = None\0A full_like = torch.ops.aten.full_like(view_266, 0, device = device(type='cpu'), dtype = 6, layout = 0, pin_memory = False)\0A where = torch.ops.aten.where(expand_as, view_266, full_like); expand_as = view_266 = full_like = None\0A index_put = torch.ops.aten.index_put(new_zeros_2, [view_267], where, True); new_zeros_2 = view_267 = where = None\0A detach_422 = torch.ops.aten.detach(index_put); index_put = None\0A detach_423 = torch.ops.aten.detach(detach_422); detach_422 = None\0A view_268 = torch.ops.aten.view(mul_173, [128, 384])\0A new_zeros_3 = torch.ops.aten.new_zeros(mul_173, [2, 384], device = device(type='cpu'), dtype = 6, layout = 0, pin_memory = False)\0A view_269 = torch.ops.aten.view(expand, [128]); expand = None\0A ne_1 = torch.ops.aten.ne(view_269, -1)\0A unsqueeze_1 = torch.ops.aten.unsqueeze(ne_1, 1); ne_1 = None\0A expand_as_1 = torch.ops.aten.expand_as(unsqueeze_1, view_268); unsqueeze_1 = None\0A full_like_1 = torch.ops.aten.full_like(view_268, 0, device = device(type='cpu'), dtype = 6, layout = 0, pin_memory = False)\0A where_1 = torch.ops.aten.where(expand_as_1, view_268, full_like_1); expand_as_1 = view_268 = full_like_1 = None\0A index_put_1 = torch.ops.aten.index_put(new_zeros_3, [view_269], where_1, True); new_zeros_3 = view_269 = where_1 = None\0A detach_424 = torch.ops.aten.detach(index_put_1); index_put_1 = None\0A detach_425 = torch.ops.aten.detach(detach_424); detach_424 = None\0A view_270 = torch.ops.aten.view(mul_173, [128, 384])\0A new_zeros_4 = torch.ops.aten.new_zeros(mul_173, [30522, 384], device = device(type='cpu'), dtype = 6, layout = 0, pin_memory = False); mul_173 = None\0A view_271 = torch.ops.aten.view(args_1, [128]); args_1 = None\0A ne_2 = torch.ops.aten.ne(view_271, 0)\0A unsqueeze_2 = torch.ops.aten.unsqueeze(ne_2, 1); ne_2 = None\0A expand_as_2 = torch.ops.aten.expand_as(unsqueeze_2, view_270); unsqueeze_2 = None\0A full_like_2 = torch.ops.aten.full_like(view_270, 0, device = device(type='cpu'), dtype = 6, layout = 0, pin_memory = False)\0A where_2 = torch.ops.aten.where(expand_as_2, view_270, full_like_2); expand_as_2 = view_270 = full_like_2 = None\0A index_put_2 = torch.ops.aten.index_put(new_zeros_4, [view_271], where_2, True); new_zeros_4 = view_271 = where_2 = None\0A detach_426 = torch.ops.aten.detach(index_put_2); index_put_2 = None\0A detach_427 = torch.ops.aten.detach(detach_426); detach_426 = None\0A add__1 = torch.ops.aten.add_(params_1, detach_421, alpha = -0.01); params_1 = detach_421 = None\0A add__2 = torch.ops.aten.add_(params_2, detach_419, alpha = -0.01); params_2 = detach_419 = None\0A add__3 = torch.ops.aten.add_(params_3, detach_423, alpha = -0.01); params_3 = detach_423 = None\0A add__4 = torch.ops.aten.add_(params_4, detach_425, alpha = -0.01); params_4 = detach_425 = None\0A add__5 = torch.ops.aten.add_(params_5, detach_427, alpha = -0.01); params_5 = detach_427 = None\0A add__6 = torch.ops.aten.add_(params_6, detach_400, alpha = -0.01); params_6 = detach_400 = None\0A add__7 = torch.ops.aten.add_(params_7, detach_398, alpha = -0.01); params_7 = detach_398 = None\0A add__8 = torch.ops.aten.add_(params_8, detach_402, alpha = -0.01); params_8 = detach_402 = None\0A add__9 = torch.ops.aten.add_(params_9, detach_404, alpha = -0.01); params_9 = detach_404 = None\0A add__10 = torch.ops.aten.add_(params_10, detach_411, alpha = -0.01); params_10 = detach_411 = None\0A add__11 = torch.ops.aten.add_(params_11, detach_413, alpha = -0.01); params_11 = detach_413 = None\0A add__12 = torch.ops.aten.add_(params_12, detach_415, alpha = -0.01); params_12 = detach_415 = None\0A add__13 = torch.ops.aten.add_(params_13, detach_417, alpha = -0.01); params_13 = detach_417 = None\0A add__14 = torch.ops.aten.add_(params_14, detach_407, alpha = -0.01); params_14 = detach_407 = None\0A add__15 = torch.ops.aten.add_(params_15, detach_409, alpha = -0.01); params_15 = detach_409 = None\0A add__16 = torch.ops.aten.add_(params_16, detach_394, alpha = -0.01); params_16 = detach_394 = None\0A add__17 = torch.ops.aten.add_(params_17, detach_396, alpha = -0.01); params_17 = detach_396 = None\0A add__18 = torch.ops.aten.add_(params_18, detach_388, alpha = -0.01); params_18 = detach_388 = None\0A add__19 = torch.ops.aten.add_(params_19, detach_386, alpha = -0.01); params_19 = detach_386 = None\0A add__20 = torch.ops.aten.add_(params_20, detach_390, alpha = -0.01); params_20 = detach_390 = None\0A add__21 = torch.ops.aten.add_(params_21, detach_392, alpha = -0.01); params_21 = detach_392 = None\0A add__22 = torch.ops.aten.add_(params_22, detach_367, alpha = -0.01); params_22 = detach_367 = None\0A add__23 = torch.ops.aten.add_(params_23, detach_365, alpha = -0.01); params_23 = detach_365 = None\0A add__24 = torch.ops.aten.add_(params_24, detach_369, alpha = -0.01); params_24 = detach_369 = None\0A add__25 = torch.ops.aten.add_(params_25, detach_371, alpha = -0.01); params_25 = detach_371 = None\0A add__26 = torch.ops.aten.add_(params_26, detach_378, alpha = -0.01); params_26 = detach_378 = None\0A add__27 = torch.ops.aten.add_(params_27, detach_380, alpha = -0.01); params_27 = detach_380 = None\0A add__28 = torch.ops.aten.add_(params_28, detach_382, alpha = -0.01); params_28 = detach_382 = None\0A add__29 = torch.ops.aten.add_(params_29, detach_384, alpha = -0.01); params_29 = detach_384 = None\0A add__30 = torch.ops.aten.add_(params_30, detach_374, alpha = -0.01); params_30 = detach_374 = None\0A add__31 = torch.ops.aten.add_(params_31, detach_376, alpha = -0.01); params_31 = detach_376 = None\0A add__32 = torch.ops.aten.add_(params_32, detach_361, alpha = -0.01); params_32 = detach_361 = None\0A add__33 = torch.ops.aten.add_(params_33, detach_363, alpha = -0.01); params_33 = detach_363 = None\0A add__34 = torch.ops.aten.add_(params_34, detach_355, alpha = -0.01); params_34 = detach_355 = None\0A add__35 = torch.ops.aten.add_(params_35, detach_353, alpha = -0.01); params_35 = detach_353 = None\0A add__36 = torch.ops.aten.add_(params_36, detach_357, alpha = -0.01); params_36 = detach_357 = None\0A add__37 = torch.ops.aten.add_(params_37, detach_359, alpha = -0.01); params_37 = detach_359 = None\0A add__38 = torch.ops.aten.add_(params_38, detach_70, alpha = -0.01); params_38 = detach_70 = None\0A add__39 = torch.ops.aten.add_(params_39, detach_68, alpha = -0.01); params_39 = detach_68 = None\0A add__40 = torch.ops.aten.add_(params_40, detach_72, alpha = -0.01); params_40 = detach_72 = None\0A add__41 = torch.ops.aten.add_(params_41, detach_74, alpha = -0.01); params_41 = detach_74 = None\0A add__42 = torch.ops.aten.add_(params_42, detach_81, alpha = -0.01); params_42 = detach_81 = None\0A add__43 = torch.ops.aten.add_(params_43, detach_83, alpha = -0.01); params_43 = detach_83 = None\0A add__44 = torch.ops.aten.add_(params_44, detach_85, alpha = -0.01); params_44 = detach_85 = None\0A add__45 = torch.ops.aten.add_(params_45, detach_87, alpha = -0.01); params_45 = detach_87 = None\0A add__46 = torch.ops.aten.add_(params_46, detach_77, alpha = -0.01); params_46 = detach_77 = None\0A add__47 = torch.ops.aten.add_(params_47, detach_79, alpha = -0.01); params_47 = detach_79 = None\0A add__48 = torch.ops.aten.add_(params_48, detach_64, alpha = -0.01); params_48 = detach_64 = None\0A add__49 = torch.ops.aten.add_(params_49, detach_66, alpha = -0.01); params_49 = detach_66 = None\0A add__50 = torch.ops.aten.add_(params_50, detach_58, alpha = -0.01); params_50 = detach_58 = None\0A add__51 = torch.ops.aten.add_(params_51, detach_56, alpha = -0.01); params_51 = detach_56 = None\0A add__52 = torch.ops.aten.add_(params_52, detach_60, alpha = -0.01); params_52 = detach_60 = None\0A add__53 = torch.ops.aten.add_(params_53, detach_62, alpha = -0.01); params_53 = detach_62 = None\0A add__54 = torch.ops.aten.add_(params_54, detach_37, alpha = -0.01); params_54 = detach_37 = None\0A add__55 = torch.ops.aten.add_(params_55, detach_35, alpha = -0.01); params_55 = detach_35 = None\0A add__56 = torch.ops.aten.add_(params_56, detach_39, alpha = -0.01); params_56 = detach_39 = None\0A add__57 = torch.ops.aten.add_(params_57, detach_41, alpha = -0.01); params_57 = detach_41 = None\0A add__58 = torch.ops.aten.add_(params_58, detach_48, alpha = -0.01); params_58 = detach_48 = None\0A add__59 = torch.ops.aten.add_(params_59, detach_50, alpha = -0.01); params_59 = detach_50 = None\0A add__60 = torch.ops.aten.add_(params_60, detach_52, alpha = -0.01); params_60 = detach_52 = None\0A add__61 = torch.ops.aten.add_(params_61, detach_54, alpha = -0.01); params_61 = detach_54 = None\0A add__62 = torch.ops.aten.add_(params_62, detach_44, alpha = -0.01); params_62 = detach_44 = None\0A add__63 = torch.ops.aten.add_(params_63, detach_46, alpha = -0.01); params_63 = detach_46 = None\0A add__64 = torch.ops.aten.add_(params_64, detach_31, alpha = -0.01); params_64 = detach_31 = None\0A add__65 = torch.ops.aten.add_(params_65, detach_33, alpha = -0.01); params_65 = detach_33 = None\0A add__66 = torch.ops.aten.add_(params_66, detach_25, alpha = -0.01); params_66 = detach_25 = None\0A add__67 = torch.ops.aten.add_(params_67, detach_23, alpha = -0.01); params_67 = detach_23 = None\0A add__68 = torch.ops.aten.add_(params_68, detach_27, alpha = -0.01); params_68 = detach_27 = None\0A add__69 = torch.ops.aten.add_(params_69, detach_29, alpha = -0.01); params_69 = detach_29 = None\0A add__70 = torch.ops.aten.add_(params_70, detach_334, alpha = -0.01); params_70 = detach_334 = None\0A add__71 = torch.ops.aten.add_(params_71, detach_332, alpha = -0.01); params_71 = detach_332 = None\0A add__72 = torch.ops.aten.add_(params_72, detach_336, alpha = -0.01); params_72 = detach_336 = None\0A add__73 = torch.ops.aten.add_(params_73, detach_338, alpha = -0.01); params_73 = detach_338 = None\0A add__74 = torch.ops.aten.add_(params_74, detach_345, alpha = -0.01); params_74 = detach_345 = None\0A add__75 = torch.ops.aten.add_(params_75, detach_347, alpha = -0.01); params_75 = detach_347 = None\0A add__76 = torch.ops.aten.add_(params_76, detach_349, alpha = -0.01); params_76 = detach_349 = None\0A add__77 = torch.ops.aten.add_(params_77, detach_351, alpha = -0.01); params_77 = detach_351 = None\0A add__78 = torch.ops.aten.add_(params_78, detach_341, alpha = -0.01); params_78 = detach_341 = None\0A add__79 = torch.ops.aten.add_(params_79, detach_343, alpha = -0.01); params_79 = detach_343 = None\0A add__80 = torch.ops.aten.add_(params_80, detach_328, alpha = -0.01); params_80 = detach_328 = None\0A add__81 = torch.ops.aten.add_(params_81, detach_330, alpha = -0.01); params_81 = detach_330 = None\0A add__82 = torch.ops.aten.add_(params_82, detach_322, alpha = -0.01); params_82 = detach_322 = None\0A add__83 = torch.ops.aten.add_(params_83, detach_320, alpha = -0.01); params_83 = detach_320 = None\0A add__84 = torch.ops.aten.add_(params_84, detach_324, alpha = -0.01); params_84 = detach_324 = None\0A add__85 = torch.ops.aten.add_(params_85, detach_326, alpha = -0.01); params_85 = detach_326 = None\0A add__86 = torch.ops.aten.add_(params_86, detach_301, alpha = -0.01); params_86 = detach_301 = None\0A add__87 = torch.ops.aten.add_(params_87, detach_299, alpha = -0.01); params_87 = detach_299 = None\0A add__88 = torch.ops.aten.add_(params_88, detach_303, alpha = -0.01); params_88 = detach_303 = None\0A add__89 = torch.ops.aten.add_(params_89, detach_305, alpha = -0.01); params_89 = detach_305 = None\0A add__90 = torch.ops.aten.add_(params_90, detach_312, alpha = -0.01); params_90 = detach_312 = None\0A add__91 = torch.ops.aten.add_(params_91, detach_314, alpha = -0.01); params_91 = detach_314 = None\0A add__92 = torch.ops.aten.add_(params_92, detach_316, alpha = -0.01); params_92 = detach_316 = None\0A add__93 = torch.ops.aten.add_(params_93, detach_318, alpha = -0.01); params_93 = detach_318 = None\0A add__94 = torch.ops.aten.add_(params_94, detach_308, alpha = -0.01); params_94 = detach_308 = None\0A add__95 = torch.ops.aten.add_(params_95, detach_310, alpha = -0.01); params_95 = detach_310 = None\0A add__96 = torch.ops.aten.add_(params_96, detach_295, alpha = -0.01); params_96 = detach_295 = None\0A add__97 = torch.ops.aten.add_(params_97, detach_297, alpha = -0.01); params_97 = detach_297 = None\0A add__98 = torch.ops.aten.add_(params_98, detach_289, alpha = -0.01); params_98 = detach_289 = None\0A add__99 = torch.ops.aten.add_(params_99, detach_287, alpha = -0.01); params_99 = detach_287 = None\0A add__100 = torch.ops.aten.add_(params_100, detach_291, alpha = -0.01); params_100 = detach_291 = None\0A add__101 = torch.ops.aten.add_(params_101, detach_293, alpha = -0.01); params_101 = detach_293 = None\0A add__102 = torch.ops.aten.add_(params_102, detach_268, alpha = -0.01); params_102 = detach_268 = None\0A add__103 = torch.ops.aten.add_(params_103, detach_266, alpha = -0.01); params_103 = detach_266 = None\0A add__104 = torch.ops.aten.add_(params_104, detach_270, alpha = -0.01); params_104 = detach_270 = None\0A add__105 = torch.ops.aten.add_(params_105, detach_272, alpha = -0.01); params_105 = detach_272 = None\0A add__106 = torch.ops.aten.add_(params_106, detach_279, alpha = -0.01); params_106 = detach_279 = None\0A add__107 = torch.ops.aten.add_(params_107, detach_281, alpha = -0.01); params_107 = detach_281 = None\0A add__108 = torch.ops.aten.add_(params_108, detach_283, alpha = -0.01); params_108 = detach_283 = None\0A add__109 = torch.ops.aten.add_(params_109, detach_285, alpha = -0.01); params_109 = detach_285 = None\0A add__110 = torch.ops.aten.add_(params_110, detach_275, alpha = -0.01); params_110 = detach_275 = None\0A add__111 = torch.ops.aten.add_(params_111, detach_277, alpha = -0.01); params_111 = detach_277 = None\0A add__112 = torch.ops.aten.add_(params_112, detach_262, alpha = -0.01); params_112 = detach_262 = None\0A add__113 = torch.ops.aten.add_(params_113, detach_264, alpha = -0.01); params_113 = detach_264 = None\0A add__114 = torch.ops.aten.add_(params_114, detach_256, alpha = -0.01); params_114 = detach_256 = None\0A add__115 = torch.ops.aten.add_(params_115, detach_254, alpha = -0.01); params_115 = detach_254 = None\0A add__116 = torch.ops.aten.add_(params_116, detach_258, alpha = -0.01); params_116 = detach_258 = None\0A add__117 = torch.ops.aten.add_(params_117, detach_260, alpha = -0.01); params_117 = detach_260 = None\0A add__118 = torch.ops.aten.add_(params_118, detach_235, alpha = -0.01); params_118 = detach_235 = None\0A add__119 = torch.ops.aten.add_(params_119, detach_233, alpha = -0.01); params_119 = detach_233 = None\0A add__120 = torch.ops.aten.add_(params_120, detach_237, alpha = -0.01); params_120 = detach_237 = None\0A add__121 = torch.ops.aten.add_(params_121, detach_239, alpha = -0.01); params_121 = detach_239 = None\0A add__122 = torch.ops.aten.add_(params_122, detach_246, alpha = -0.01); params_122 = detach_246 = None\0A add__123 = torch.ops.aten.add_(params_123, detach_248, alpha = -0.01); params_123 = detach_248 = None\0A add__124 = torch.ops.aten.add_(params_124, detach_250, alpha = -0.01); params_124 = detach_250 = None\0A add__125 = torch.ops.aten.add_(params_125, detach_252, alpha = -0.01); params_125 = detach_252 = None\0A add__126 = torch.ops.aten.add_(params_126, detach_242, alpha = -0.01); params_126 = detach_242 = None\0A add__127 = torch.ops.aten.add_(params_127, detach_244, alpha = -0.01); params_127 = detach_244 = None\0A add__128 = torch.ops.aten.add_(params_128, detach_229, alpha = -0.01); params_128 = detach_229 = None\0A add__129 = torch.ops.aten.add_(params_129, detach_231, alpha = -0.01); params_129 = detach_231 = None\0A add__130 = torch.ops.aten.add_(params_130, detach_223, alpha = -0.01); params_130 = detach_223 = None\0A add__131 = torch.ops.aten.add_(params_131, detach_221, alpha = -0.01); params_131 = detach_221 = None\0A add__132 = torch.ops.aten.add_(params_132, detach_225, alpha = -0.01); params_132 = detach_225 = None\0A add__133 = torch.ops.aten.add_(params_133, detach_227, alpha = -0.01); params_133 = detach_227 = None\0A add__134 = torch.ops.aten.add_(params_134, detach_202, alpha = -0.01); params_134 = detach_202 = None\0A add__135 = torch.ops.aten.add_(params_135, detach_200, alpha = -0.01); params_135 = detach_200 = None\0A add__136 = torch.ops.aten.add_(params_136, detach_204, alpha = -0.01); params_136 = detach_204 = None\0A add__137 = torch.ops.aten.add_(params_137, detach_206, alpha = -0.01); params_137 = detach_206 = None\0A add__138 = torch.ops.aten.add_(params_138, detach_213, alpha = -0.01); params_138 = detach_213 = None\0A add__139 = torch.ops.aten.add_(params_139, detach_215, alpha = -0.01); params_139 = detach_215 = None\0A add__140 = torch.ops.aten.add_(params_140, detach_217, alpha = -0.01); params_140 = detach_217 = None\0A add__141 = torch.ops.aten.add_(params_141, detach_219, alpha = -0.01); params_141 = detach_219 = None\0A add__142 = torch.ops.aten.add_(params_142, detach_209, alpha = -0.01); params_142 = detach_209 = None\0A add__143 = torch.ops.aten.add_(params_143, detach_211, alpha = -0.01); params_143 = detach_211 = None\0A add__144 = torch.ops.aten.add_(params_144, detach_196, alpha = -0.01); params_144 = detach_196 = None\0A add__145 = torch.ops.aten.add_(params_145, detach_198, alpha = -0.01); params_145 = detach_198 = None\0A add__146 = torch.ops.aten.add_(params_146, detach_190, alpha = -0.01); params_146 = detach_190 = None\0A add__147 = torch.ops.aten.add_(params_147, detach_188, alpha = -0.01); params_147 = detach_188 = None\0A add__148 = torch.ops.aten.add_(params_148, detach_192, alpha = -0.01); params_148 = detach_192 = None\0A add__149 = torch.ops.aten.add_(params_149, detach_194, alpha = -0.01); params_149 = detach_194 = None\0A add__150 = torch.ops.aten.add_(params_150, detach_169, alpha = -0.01); params_150 = detach_169 = None\0A add__151 = torch.ops.aten.add_(params_151, detach_167, alpha = -0.01); params_151 = detach_167 = None\0A add__152 = torch.ops.aten.add_(params_152, detach_171, alpha = -0.01); params_152 = detach_171 = None\0A add__153 = torch.ops.aten.add_(params_153, detach_173, alpha = -0.01); params_153 = detach_173 = None\0A add__154 = torch.ops.aten.add_(params_154, detach_180, alpha = -0.01); params_154 = detach_180 = None\0A add__155 = torch.ops.aten.add_(params_155, detach_182, alpha = -0.01); params_155 = detach_182 = None\0A add__156 = torch.ops.aten.add_(params_156, detach_184, alpha = -0.01); params_156 = detach_184 = None\0A add__157 = torch.ops.aten.add_(params_157, detach_186, alpha = -0.01); params_157 = detach_186 = None\0A add__158 = torch.ops.aten.add_(params_158, detach_176, alpha = -0.01); params_158 = detach_176 = None\0A add__159 = torch.ops.aten.add_(params_159, detach_178, alpha = -0.01); params_159 = detach_178 = None\0A add__160 = torch.ops.aten.add_(params_160, detach_163, alpha = -0.01); params_160 = detach_163 = None\0A add__161 = torch.ops.aten.add_(params_161, detach_165, alpha = -0.01); params_161 = detach_165 = None\0A add__162 = torch.ops.aten.add_(params_162, detach_157, alpha = -0.01); params_162 = detach_157 = None\0A add__163 = torch.ops.aten.add_(params_163, detach_155, alpha = -0.01); params_163 = detach_155 = None\0A add__164 = torch.ops.aten.add_(params_164, detach_159, alpha = -0.01); params_164 = detach_159 = None\0A add__165 = torch.ops.aten.add_(params_165, detach_161, alpha = -0.01); params_165 = detach_161 = None\0A add__166 = torch.ops.aten.add_(params_166, detach_136, alpha = -0.01); params_166 = detach_136 = None\0A add__167 = torch.ops.aten.add_(params_167, detach_134, alpha = -0.01); params_167 = detach_134 = None\0A add__168 = torch.ops.aten.add_(params_168, detach_138, alpha = -0.01); params_168 = detach_138 = None\0A add__169 = torch.ops.aten.add_(params_169, detach_140, alpha = -0.01); params_169 = detach_140 = None\0A add__170 = torch.ops.aten.add_(params_170, detach_147, alpha = -0.01); params_170 = detach_147 = None\0A add__171 = torch.ops.aten.add_(params_171, detach_149, alpha = -0.01); params_171 = detach_149 = None\0A add__172 = torch.ops.aten.add_(params_172, detach_151, alpha = -0.01); params_172 = detach_151 = None\0A add__173 = torch.ops.aten.add_(params_173, detach_153, alpha = -0.01); params_173 = detach_153 = None\0A add__174 = torch.ops.aten.add_(params_174, detach_143, alpha = -0.01); params_174 = detach_143 = None\0A add__175 = torch.ops.aten.add_(params_175, detach_145, alpha = -0.01); params_175 = detach_145 = None\0A add__176 = torch.ops.aten.add_(params_176, detach_130, alpha = -0.01); params_176 = detach_130 = None\0A add__177 = torch.ops.aten.add_(params_177, detach_132, alpha = -0.01); params_177 = detach_132 = None\0A add__178 = torch.ops.aten.add_(params_178, detach_124, alpha = -0.01); params_178 = detach_124 = None\0A add__179 = torch.ops.aten.add_(params_179, detach_122, alpha = -0.01); params_179 = detach_122 = None\0A add__180 = torch.ops.aten.add_(params_180, detach_126, alpha = -0.01); params_180 = detach_126 = None\0A add__181 = torch.ops.aten.add_(params_181, detach_128, alpha = -0.01); params_181 = detach_128 = None\0A add__182 = torch.ops.aten.add_(params_182, detach_103, alpha = -0.01); params_182 = detach_103 = None\0A add__183 = torch.ops.aten.add_(params_183, detach_101, alpha = -0.01); params_183 = detach_101 = None\0A add__184 = torch.ops.aten.add_(params_184, detach_105, alpha = -0.01); params_184 = detach_105 = None\0A add__185 = torch.ops.aten.add_(params_185, detach_107, alpha = -0.01); params_185 = detach_107 = None\0A add__186 = torch.ops.aten.add_(params_186, detach_114, alpha = -0.01); params_186 = detach_114 = None\0A add__187 = torch.ops.aten.add_(params_187, detach_116, alpha = -0.01); params_187 = detach_116 = None\0A add__188 = torch.ops.aten.add_(params_188, detach_118, alpha = -0.01); params_188 = detach_118 = None\0A add__189 = torch.ops.aten.add_(params_189, detach_120, alpha = -0.01); params_189 = detach_120 = None\0A add__190 = torch.ops.aten.add_(params_190, detach_110, alpha = -0.01); params_190 = detach_110 = None\0A add__191 = torch.ops.aten.add_(params_191, detach_112, alpha = -0.01); params_191 = detach_112 = None\0A add__192 = torch.ops.aten.add_(params_192, detach_97, alpha = -0.01); params_192 = detach_97 = None\0A add__193 = torch.ops.aten.add_(params_193, detach_99, alpha = -0.01); params_193 = detach_99 = None\0A add__194 = torch.ops.aten.add_(params_194, detach_91, alpha = -0.01); params_194 = detach_91 = None\0A add__195 = torch.ops.aten.add_(params_195, detach_89, alpha = -0.01); params_195 = detach_89 = None\0A add__196 = torch.ops.aten.add_(params_196, detach_93, alpha = -0.01); params_196 = detach_93 = None\0A add__197 = torch.ops.aten.add_(params_197, detach_95, alpha = -0.01); params_197 = detach_95 = None\0A add__198 = torch.ops.aten.add_(params_198, detach_19, alpha = -0.01); params_198 = detach_19 = None\0A add__199 = torch.ops.aten.add_(params_199, detach_21, alpha = -0.01); params_199 = detach_21 = None\0A add__200 = torch.ops.aten.add_(params_200, detach_14, alpha = -0.01); params_200 = detach_14 = None\0A add__201 = torch.ops.aten.add_(params_201, detach_16, alpha = -0.01); params_201 = detach_16 = None\0A return (add__1, add__2, add__3, add__4, add__5, add__6, add__7, add__8, add__9, add__10, add__11, add__12, add__13, add__14, add__15, add__16, add__17, add__18, add__19, add__20, add__21, add__22, add__23, add__24, add__25, add__26, add__27, add__28, add__29, add__30, add__31, add__32, add__33, add__34, add__35, add__36, add__37, add__38, add__39, add__40, add__41, add__42, add__43, add__44, add__45, add__46, add__47, add__48, add__49, add__50, add__51, add__52, add__53, add__54, add__55, add__56, add__57, add__58, add__59, add__60, add__61, add__62, add__63, add__64, add__65, add__66, add__67, add__68, add__69, add__70, add__71, add__72, add__73, add__74, add__75, add__76, add__77, add__78, add__79, add__80, add__81, add__82, add__83, add__84, add__85, add__86, add__87, add__88, add__89, add__90, add__91, add__92, add__93, add__94, add__95, add__96, add__97, add__98, add__99, add__100, add__101, add__102, add__103, add__104, add__105, add__106, add__107, add__108, add__109, add__110, add__111, add__112, add__113, add__114, add__115, add__116, add__117, add__118, add__119, add__120, add__121, add__122, add__123, add__124, add__125, add__126, add__127, add__128, add__129, add__130, add__131, add__132, add__133, add__134, add__135, add__136, add__137, add__138, add__139, add__140, add__141, add__142, add__143, add__144, add__145, add__146, add__147, add__148, add__149, add__150, add__151, add__152, add__153, add__154, add__155, add__156, add__157, add__158, add__159, add__160, add__161, add__162, add__163, add__164, add__165, add__166, add__167, add__168, add__169, add__170, add__171, add__172, add__173, add__174, add__175, add__176, add__177, add__178, add__179, add__180, add__181, add__182, add__183, add__184, add__185, add__186, add__187, add__188, add__189, add__190, add__191, add__192, add__193, add__194, add__195, add__196, add__197, add__198, add__199, add__200, add__201, buffers_1, buffers_2)\0A " | |
%1 = torch.nn_module { | |
torch.slot "_tensor_constant0", %0 : !torch.tensor<[1,1,1,128],f32> | |
torch.slot "training", %true : !torch.bool | |
torch.slot "_is_full_backward_hook", %none : !torch.none | |
torch.slot "_code", %str : !torch.str | |
} : !torch.nn.Module<"__torch__.torch.fx.graph_module.forward"> | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment