Created
February 9, 2022 18:30
-
-
Save pashu123/5aff2b56a54ae00a9c177331c3eaa392 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module attributes {torch.debug_module_name = "GraphModule"} { | |
func private @__torch__.torch.fx.graph_module.___torch_mangle_2.GraphModule.forward(%arg0: !torch.nn.Module<"__torch__.torch.fx.graph_module.___torch_mangle_2.GraphModule">, %arg1: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,768],f32>}, %arg2: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,1],f32>}, %arg3: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg4: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,1],f32>}, %arg5: !torch.tensor {torch.type_bound = !torch.vtensor<[4,12,512,512],f32>}, %arg6: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,768],f32>}, %arg7: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg8: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,1],f32>}, %arg9: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg10: !torch.tensor {torch.type_bound = !torch.vtensor<[4,12,512,512],f32>}, %arg11: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg12: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg13: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg14: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg15: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,1],f32>}, %arg16: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg17: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg18: !torch.tensor {torch.type_bound = !torch.vtensor<[48,512,512],f32>}, %arg19: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg20: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,1],f32>}, %arg21: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,1],f32>}, %arg22: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg23: !torch.tensor {torch.type_bound = !torch.vtensor<[4,12,512,512],f32>}, %arg24: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg25: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg26: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg27: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg28: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,30522],f32>}, %arg29: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg30: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,1],f32>}, %arg31: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg32: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,3072],f32>}, %arg33: !torch.tensor {torch.type_bound = !torch.vtensor<[4,12,512,512],f32>}, %arg34: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg35: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg36: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg37: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,768],f32>}, %arg38: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg39: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg40: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg41: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,1],f32>}, %arg42: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg43: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg44: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg45: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg46: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg47: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,1],f32>}, %arg48: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg49: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg50: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512],si64>}, %arg51: !torch.tensor {torch.type_bound = !torch.vtensor<[4,12,512,512],f32>}, %arg52: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,768],f32>}, %arg53: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg54: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg55: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg56: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,1],f32>}, %arg57: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg58: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg59: !torch.tensor {torch.type_bound = !torch.vtensor<[48,512,512],f32>}, %arg60: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg61: !torch.tensor {torch.type_bound = !torch.vtensor<[48,512,512],f32>}, %arg62: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,1],f32>}, %arg63: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg64: !torch.tensor {torch.type_bound = !torch.vtensor<[48,512,512],f32>}, %arg65: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg66: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,768],f32>}, %arg67: !torch.tensor {torch.type_bound = !torch.vtensor<[48,64,512],f32>}, %arg68: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg69: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg70: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,1],f32>}, %arg71: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg72: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg73: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg74: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg75: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg76: !torch.tensor {torch.type_bound = !torch.vtensor<[4,12,512,512],f32>}, %arg77: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg78: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg79: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,768],f32>}, %arg80: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,768],f32>}, %arg81: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg82: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,1],f32>}, %arg83: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,768],f32>}, %arg84: !torch.tensor {torch.type_bound = !torch.vtensor<[48,512,64],f32>}, %arg85: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg86: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,768],f32>}, %arg87: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,768],f32>}, %arg88: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg89: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,1],f32>}, %arg90: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,1],f32>}, %arg91: !torch.tensor {torch.type_bound = !torch.vtensor<[48,512,512],f32>}, %arg92: !torch.tensor {torch.type_bound = !torch.vtensor<[48,64,512],f32>}, %arg93: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg94: !torch.tensor {torch.type_bound = !torch.vtensor<[4,12,512,512],f32>}, %arg95: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg96: !torch.tensor {torch.type_bound = !torch.vtensor<[4,12,512,512],f32>}, %arg97: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg98: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,3072],f32>}, %arg99: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,1],f32>}, %arg100: !torch.tensor {torch.type_bound = !torch.vtensor<[48,512,64],f32>}, %arg101: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,768],f32>}, %arg102: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg103: !torch.tensor {torch.type_bound = !torch.vtensor<[48,64,512],f32>}, %arg104: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg105: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg106: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg107: !torch.tensor {torch.type_bound = !torch.vtensor<[4,12,512,512],f32>}, %arg108: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg109: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,1],f32>}, %arg110: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg111: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,768],f32>}, %arg112: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,1],f32>}, %arg113: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg114: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg115: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,1],f32>}, %arg116: !torch.tensor {torch.type_bound = !torch.vtensor<[4,12,512,512],f32>}, %arg117: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg118: !torch.tensor {torch.type_bound = !torch.vtensor<[48,64,512],f32>}, %arg119: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg120: !torch.tensor {torch.type_bound = !torch.vtensor<[48,64,512],f32>}, %arg121: !torch.tensor {torch.type_bound = !torch.vtensor<[1,512],si64>}, %arg122: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,1],f32>}, %arg123: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg124: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,3072],f32>}, %arg125: !torch.tensor {torch.type_bound = !torch.vtensor<[48,64,512],f32>}, %arg126: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg127: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,768],f32>}, %arg128: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg129: !torch.tensor {torch.type_bound = !torch.vtensor<[48,512,512],f32>}, %arg130: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg131: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg132: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,1],f32>}, %arg133: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg134: !torch.tensor {torch.type_bound = !torch.vtensor<[48,64,512],f32>}, %arg135: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,3072],f32>}, %arg136: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg137: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,1],f32>}, %arg138: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,1],f32>}, %arg139: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,768],f32>}, %arg140: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg141: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg142: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg143: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg144: !torch.tensor {torch.type_bound = !torch.vtensor<[4,12,512,512],f32>}, %arg145: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,1],f32>}, %arg146: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg147: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg148: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,3072],f32>}, %arg149: !torch.tensor {torch.type_bound = !torch.vtensor<[48,512,512],f32>}, %arg150: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg151: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,1],f32>}, %arg152: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,3072],f32>}, %arg153: !torch.tensor {torch.type_bound = !torch.vtensor<[48,512,64],f32>}, %arg154: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg155: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg156: !torch.tensor {torch.type_bound = !torch.vtensor<[48,64,512],f32>}, %arg157: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,3072],f32>}, %arg158: !torch.tensor {torch.type_bound = !torch.vtensor<[48,64,512],f32>}, %arg159: !torch.tensor {torch.type_bound = !torch.vtensor<[4,12,512,512],f32>}, %arg160: !torch.tensor {torch.type_bound = !torch.vtensor<[48,64,512],f32>}, %arg161: !torch.tensor {torch.type_bound = !torch.vtensor<[4,12,512,512],f32>}, %arg162: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg163: !torch.tensor {torch.type_bound = !torch.vtensor<[48,64,512],f32>}, %arg164: !torch.tensor {torch.type_bound = !torch.vtensor<[4,12,512,512],f32>}, %arg165: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg166: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,3072],f32>}, %arg167: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,3072],f32>}, %arg168: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg169: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg170: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,1],f32>}, %arg171: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,768],f32>}, %arg172: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg173: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg174: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,768],f32>}, %arg175: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg176: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,1],f32>}, %arg177: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,768],f32>}, %arg178: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,768],f32>}, %arg179: !torch.tensor {torch.type_bound = !torch.vtensor<[4,12,512,512],f32>}, %arg180: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg181: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,768],f32>}, %arg182: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,768],f32>}, %arg183: !torch.tensor {torch.type_bound = !torch.vtensor<[48,64,512],f32>}, %arg184: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,1],f32>}, %arg185: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg186: !torch.tensor {torch.type_bound = !torch.vtensor<[48,64,512],f32>}, %arg187: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg188: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg189: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg190: !torch.tensor {torch.type_bound = !torch.vtensor<[4,12,512,512],f32>}, %arg191: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg192: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg193: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg194: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,768],f32>}, %arg195: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg196: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg197: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg198: !torch.tensor {torch.type_bound = !torch.vtensor<[48,64,512],f32>}, %arg199: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,1],f32>}, %arg200: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,1],f32>}, %arg201: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg202: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg203: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,3072],f32>}, %arg204: !torch.tensor {torch.type_bound = !torch.vtensor<[48,512,512],f32>}, %arg205: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg206: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg207: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,1],f32>}, %arg208: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg209: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,1],f32>}, %arg210: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg211: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg212: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg213: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg214: !torch.tensor {torch.type_bound = !torch.vtensor<[4,12,512,512],f32>}, %arg215: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,768],f32>}, %arg216: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,768],f32>}, %arg217: !torch.tensor {torch.type_bound = !torch.vtensor<[48,512,64],f32>}, %arg218: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg219: !torch.tensor {torch.type_bound = !torch.vtensor<[48,64,512],f32>}, %arg220: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg221: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,768],f32>}, %arg222: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg223: !torch.tensor {torch.type_bound = !torch.vtensor<[4,12,512,512],f32>}, %arg224: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,3072],f32>}, %arg225: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,768],f32>}, %arg226: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg227: !torch.tensor {torch.type_bound = !torch.vtensor<[48,64,512],f32>}, %arg228: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg229: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,1],f32>}, %arg230: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg231: !torch.tensor {torch.type_bound = !torch.vtensor<[4,12,512,512],f32>}, %arg232: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg233: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg234: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg235: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg236: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg237: !torch.tensor {torch.type_bound = !torch.vtensor<[4,12,512,512],f32>}, %arg238: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg239: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,1],f32>}, %arg240: !torch.tensor {torch.type_bound = !torch.vtensor<[48,512,64],f32>}, %arg241: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,768],f32>}, %arg242: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg243: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,3072],f32>}, %arg244: !torch.tensor {torch.type_bound = !torch.vtensor<[48,512,64],f32>}, %arg245: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg246: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,1],f32>}, %arg247: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg248: !torch.tensor {torch.type_bound = !torch.vtensor<[48,512,512],f32>}, %arg249: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg250: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,768],f32>}, %arg251: !torch.tensor {torch.type_bound = !torch.vtensor<[4,12,512,512],f32>}, %arg252: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg253: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg254: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg255: !torch.tensor {torch.type_bound = !torch.vtensor<[4,12,512,512],f32>}, %arg256: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,1],f32>}, %arg257: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg258: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg259: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,768],f32>}, %arg260: !torch.tensor {torch.type_bound = !torch.vtensor<[4,12,512,512],f32>}, %arg261: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,1],f32>}, %arg262: !torch.tensor {torch.type_bound = !torch.vtensor<[48,64,512],f32>}, %arg263: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg264: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg265: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg266: !torch.tensor {torch.type_bound = !torch.vtensor<[],f32>}, %arg267: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg268: !torch.tensor {torch.type_bound = !torch.vtensor<[48,512,64],f32>}, %arg269: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,1],f32>}, %arg270: !torch.tensor {torch.type_bound = !torch.vtensor<[48,512,512],f32>}, %arg271: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg272: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,768],f32>}, %arg273: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg274: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg275: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg276: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg277: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,1],f32>}, %arg278: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,1],f32>}, %arg279: !torch.tensor {torch.type_bound = !torch.vtensor<[48,64,512],f32>}, %arg280: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,1],f32>}, %arg281: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,3072],f32>}, %arg282: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg283: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,1],f32>}, %arg284: !torch.tensor {torch.type_bound = !torch.vtensor<[48,512,64],f32>}, %arg285: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,768],f32>}, %arg286: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,3072],f32>}, %arg287: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,1],f32>}, %arg288: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,1],f32>}, %arg289: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,1],f32>}, %arg290: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,768],f32>}, %arg291: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,768],f32>}, %arg292: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg293: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg294: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg295: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,768],f32>}, %arg296: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,768],f32>}, %arg297: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg298: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg299: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg300: !torch.tensor {torch.type_bound = !torch.vtensor<[48,512,64],f32>}, %arg301: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg302: !torch.tensor {torch.type_bound = !torch.vtensor<[48,64,512],f32>}, %arg303: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg304: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg305: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512],si64>}, %arg306: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,768],f32>}, %arg307: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg308: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,768],f32>}, %arg309: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg310: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg311: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,3072],f32>}, %arg312: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,768],f32>}, %arg313: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,768],f32>}, %arg314: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg315: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,1],f32>}, %arg316: !torch.tensor {torch.type_bound = !torch.vtensor<[48,512,512],f32>}, %arg317: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,1],f32>}, %arg318: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,768],f32>}, %arg319: !torch.tensor {torch.type_bound = !torch.vtensor<[48,64,512],f32>}, %arg320: !torch.tensor {torch.type_bound = !torch.vtensor<[48,512,64],f32>}, %arg321: !torch.tensor {torch.type_bound = !torch.vtensor<[48,64,512],f32>}, %arg322: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg323: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg324: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg325: !torch.tensor {torch.type_bound = !torch.vtensor<[48,512,64],f32>}, %arg326: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,768],f32>}, %arg327: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,1],f32>}, %arg328: !torch.tensor {torch.type_bound = !torch.vtensor<[48,64,512],f32>}, %arg329: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg330: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,3072],f32>}, %arg331: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg332: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,768],f32>}, %arg333: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg334: !torch.tensor {torch.type_bound = !torch.vtensor<[30522,768],f32>}, %arg335: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg336: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg337: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,768],f32>}, %arg338: !torch.tensor {torch.type_bound = !torch.vtensor<[2048],si64>}, %arg339: !torch.tensor {torch.type_bound = !torch.vtensor<[48,64,512],f32>}, %arg340: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg341: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg342: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,3072],f32>}, %arg343: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,30522],f32>}, %arg344: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,768],f32>}, %arg345: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,768],f32>}, %arg346: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,1],f32>}, %arg347: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg348: !torch.tensor {torch.type_bound = !torch.vtensor<[4,12,512,512],f32>}, %arg349: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,1],f32>}, %arg350: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg351: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,768],f32>}, %arg352: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,768],f32>}, %arg353: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg354: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg355: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg356: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,3072],f32>}, %arg357: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg358: !torch.tensor {torch.type_bound = !torch.vtensor<[48,512,512],f32>}, %arg359: !torch.tensor {torch.type_bound = !torch.vtensor<[768,3072],f32>}, %arg360: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,768],f32>}, %arg361: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,1],f32>}, %arg362: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,1],f32>}, %arg363: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg364: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg365: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg366: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,3072],f32>}, %arg367: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg368: !torch.tensor {torch.type_bound = !torch.vtensor<[3072,768],f32>}, %arg369: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg370: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg371: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,768],f32>}, %arg372: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,3072],f32>}, %arg373: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,768],f32>}, %arg374: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg375: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,768],f32>}, %arg376: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg377: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg378: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,3072],f32>}, %arg379: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg380: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg381: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,768],f32>}, %arg382: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg383: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg384: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,3072],f32>}, %arg385: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg386: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg387: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg388: !torch.tensor {torch.type_bound = !torch.vtensor<[768],f32>}, %arg389: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,3072],f32>}, %arg390: !torch.tensor {torch.type_bound = !torch.vtensor<[48,512,64],f32>}, %arg391: !torch.tensor {torch.type_bound = !torch.vtensor<[48,64,512],f32>}, %arg392: !torch.tensor {torch.type_bound = !torch.vtensor<[768,768],f32>}, %arg393: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,3072],f32>}, %arg394: !torch.tensor {torch.type_bound = !torch.vtensor<[2048,768],f32>}, %arg395: !torch.tensor {torch.type_bound = !torch.vtensor<[],f32>}, %arg396: !torch.tensor {torch.type_bound = !torch.vtensor<[4,512,30522],f32>}) -> !torch.list<!torch.optional<!torch.tensor>> { | |
%false = torch.constant.bool false | |
%1 = torch.tensor.literal(dense<8.000000e+00> : tensor<f64>) : !torch.tensor<[],f64> | |
%true = torch.constant.bool true | |
%none = torch.constant.none | |
%int1 = torch.constant.int 1 | |
%int-100 = torch.constant.int -100 | |
%int6 = torch.constant.int 6 | |
%int4 = torch.constant.int 4 | |
%int512 = torch.constant.int 512 | |
%int30522 = torch.constant.int 30522 | |
%int0 = torch.constant.int 0 | |
%int2048 = torch.constant.int 2048 | |
%int768 = torch.constant.int 768 | |
%int3072 = torch.constant.int 3072 | |
%int12 = torch.constant.int 12 | |
%int64 = torch.constant.int 64 | |
%int2 = torch.constant.int 2 | |
%int3 = torch.constant.int 3 | |
%int48 = torch.constant.int 48 | |
%int-1 = torch.constant.int -1 | |
%int-2 = torch.constant.int -2 | |
%2 = torch.aten.gelu %arg351 : !torch.tensor -> !torch.tensor | |
%3 = torch.aten.nll_loss_backward %arg395, %arg343, %arg338, %none, %int1, %int-100, %arg266 : !torch.tensor, !torch.tensor, !torch.tensor, !torch.none, !torch.int, !torch.int, !torch.tensor -> !torch.tensor | |
%4 = torch.aten._log_softmax_backward_data %3, %arg28, %int1, %int6 : !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%5 = torch.prim.ListConstruct %int4, %int512, %int30522 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%6 = torch.aten.view %4, %5 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%7 = torch.aten.add.Tensor %arg396, %6, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%8 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%9 = torch.aten.sum.dim_IntList %7, %8, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%10 = torch.prim.ListConstruct %int30522 : (!torch.int) -> !torch.list<!torch.int> | |
%11 = torch.aten.view %9, %10 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%12 = torch.prim.ListConstruct %int2048, %int30522 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%13 = torch.aten.view %7, %12 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%14 = torch.aten.t %13 : !torch.tensor -> !torch.tensor | |
%15 = torch.aten.mm %14, %arg238 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%16 = torch.aten.mm %13, %arg334 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%17 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%18 = torch.aten.view %16, %17 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%19 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%20 = torch.prim.ListConstruct %true, %true, %true : (!torch.bool, !torch.bool, !torch.bool) -> !torch.list<!torch.bool> | |
%21:3 = torch.operator "aten.native_layer_norm_backward"(%18, %2, %19, %arg151, %arg349, %arg304, %arg55, %20) : (!torch.tensor, !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.list<!torch.bool>) -> (!torch.tensor, !torch.tensor, !torch.tensor) | |
%22 = torch.aten.gelu_backward %21#0, %arg351 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%23 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%24 = torch.aten.sum.dim_IntList %22, %23, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%25 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%26 = torch.aten.view %24, %25 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%27 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%28 = torch.aten.view %22, %27 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%29 = torch.aten.t %28 : !torch.tensor -> !torch.tensor | |
%30 = torch.aten.mm %29, %arg212 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%31 = torch.aten.mm %28, %arg143 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%32 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%33 = torch.aten.view %31, %32 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%34 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%35 = torch.prim.ListConstruct %true, %true, %true : (!torch.bool, !torch.bool, !torch.bool) -> !torch.list<!torch.bool> | |
%36:3 = torch.operator "aten.native_layer_norm_backward"(%33, %arg6, %34, %arg176, %arg256, %arg71, %arg54, %35) : (!torch.tensor, !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.list<!torch.bool>) -> (!torch.tensor, !torch.tensor, !torch.tensor) | |
%37 = torch.aten.mul.Tensor %36#0, %arg225 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%38 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%39 = torch.aten.sum.dim_IntList %37, %38, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%40 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%41 = torch.aten.view %39, %40 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%42 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%43 = torch.aten.view %37, %42 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%44 = torch.aten.t %43 : !torch.tensor -> !torch.tensor | |
%45 = torch.aten.mm %44, %arg167 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%46 = torch.aten.mm %43, %arg206 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%47 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%48 = torch.aten.view %46, %47 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%49 = torch.aten.gelu_backward %48, %arg243 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%50 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%51 = torch.aten.sum.dim_IntList %49, %50, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%52 = torch.prim.ListConstruct %int3072 : (!torch.int) -> !torch.list<!torch.int> | |
%53 = torch.aten.view %51, %52 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%54 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%55 = torch.aten.view %49, %54 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%56 = torch.aten.t %55 : !torch.tensor -> !torch.tensor | |
%57 = torch.aten.mm %56, %arg301 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%58 = torch.aten.mm %55, %arg191 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%59 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%60 = torch.aten.view %58, %59 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%61 = torch.aten.add.Tensor %36#0, %60, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%62 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%63 = torch.prim.ListConstruct %true, %true, %true : (!torch.bool, !torch.bool, !torch.bool) -> !torch.list<!torch.bool> | |
%64:3 = torch.operator "aten.native_layer_norm_backward"(%61, %arg295, %62, %arg269, %arg200, %arg264, %arg49, %63) : (!torch.tensor, !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.list<!torch.bool>) -> (!torch.tensor, !torch.tensor, !torch.tensor) | |
%65 = torch.aten.mul.Tensor %64#0, %arg79 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%66 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%67 = torch.aten.sum.dim_IntList %65, %66, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%68 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%69 = torch.aten.view %67, %68 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%70 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%71 = torch.aten.view %65, %70 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%72 = torch.aten.t %71 : !torch.tensor -> !torch.tensor | |
%73 = torch.aten.mm %72, %arg19 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%74 = torch.aten.mm %71, %arg310 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%75 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%76 = torch.aten.view %74, %75 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%77 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%78 = torch.aten.view %76, %77 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%79 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%80 = torch.aten.permute %78, %79 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%81 = torch.operator "aten.clone"(%80, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor | |
%82 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%83 = torch.operator "aten._unsafe_view"(%81, %82) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor | |
%84 = torch.aten.bmm %arg358, %83 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%85 = torch.aten.bmm %83, %arg227 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%86 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%87 = torch.aten.view %84, %86 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%88 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%89 = torch.aten.view %85, %88 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%90 = torch.aten.mul.Tensor %89, %arg23 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%91 = torch.aten._softmax_backward_data %90, %arg76, %int-1, %int6 : !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%92 = torch.aten.div.Tensor %91, %1 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor | |
%93 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%94 = torch.aten.view %92, %93 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%95 = torch.aten.bmm %arg158, %94 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%96 = torch.aten.bmm %94, %arg84 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%97 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%98 = torch.aten.view %95, %97 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%99 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%100 = torch.aten.view %96, %99 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%101 = torch.aten.transpose.int %98, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%102 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%103 = torch.aten.permute %100, %102 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%104 = torch.operator "aten.clone"(%103, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor | |
%105 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%106 = torch.operator "aten._unsafe_view"(%104, %105) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor | |
%107 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%108 = torch.aten.permute %87, %107 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%109 = torch.operator "aten.clone"(%108, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor | |
%110 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%111 = torch.operator "aten._unsafe_view"(%109, %110) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor | |
%112 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%113 = torch.aten.sum.dim_IntList %111, %112, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%114 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%115 = torch.aten.view %113, %114 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%116 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%117 = torch.aten.view %111, %116 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%118 = torch.aten.t %117 : !torch.tensor -> !torch.tensor | |
%119 = torch.aten.mm %118, %arg377 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%120 = torch.aten.mm %117, %arg222 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%121 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%122 = torch.aten.view %120, %121 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%123 = torch.aten.add.Tensor %64#0, %122, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%124 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%125 = torch.aten.permute %101, %124 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%126 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%127 = torch.aten.view %125, %126 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%128 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%129 = torch.aten.sum.dim_IntList %127, %128, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%130 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%131 = torch.aten.view %129, %130 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%132 = torch.operator "aten.clone"(%127, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor | |
%133 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%134 = torch.operator "aten._unsafe_view"(%132, %133) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor | |
%135 = torch.aten.t %134 : !torch.tensor -> !torch.tensor | |
%136 = torch.aten.mm %135, %arg354 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%137 = torch.aten.mm %134, %arg265 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%138 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%139 = torch.aten.view %137, %138 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%140 = torch.aten.add.Tensor %123, %139, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%141 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%142 = torch.aten.sum.dim_IntList %106, %141, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%143 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%144 = torch.aten.view %142, %143 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%145 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%146 = torch.aten.view %106, %145 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%147 = torch.aten.t %146 : !torch.tensor -> !torch.tensor | |
%148 = torch.aten.mm %147, %arg226 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%149 = torch.aten.mm %146, %arg48 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%150 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%151 = torch.aten.view %149, %150 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%152 = torch.aten.add.Tensor %140, %151, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%153 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%154 = torch.prim.ListConstruct %true, %true, %true : (!torch.bool, !torch.bool, !torch.bool) -> !torch.list<!torch.bool> | |
%155:3 = torch.operator "aten.native_layer_norm_backward"(%152, %arg360, %153, %arg277, %arg261, %arg363, %arg39, %154) : (!torch.tensor, !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.list<!torch.bool>) -> (!torch.tensor, !torch.tensor, !torch.tensor) | |
%156 = torch.aten.mul.Tensor %155#0, %arg272 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%157 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%158 = torch.aten.sum.dim_IntList %156, %157, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%159 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%160 = torch.aten.view %158, %159 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%161 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%162 = torch.aten.view %156, %161 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%163 = torch.aten.t %162 : !torch.tensor -> !torch.tensor | |
%164 = torch.aten.mm %163, %arg224 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%165 = torch.aten.mm %162, %arg359 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%166 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%167 = torch.aten.view %165, %166 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%168 = torch.aten.gelu_backward %167, %arg152 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%169 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%170 = torch.aten.sum.dim_IntList %168, %169, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%171 = torch.prim.ListConstruct %int3072 : (!torch.int) -> !torch.list<!torch.int> | |
%172 = torch.aten.view %170, %171 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%173 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%174 = torch.aten.view %168, %173 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%175 = torch.aten.t %174 : !torch.tensor -> !torch.tensor | |
%176 = torch.aten.mm %175, %arg63 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%177 = torch.aten.mm %174, %arg236 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%178 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%179 = torch.aten.view %177, %178 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%180 = torch.aten.add.Tensor %155#0, %179, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%181 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%182 = torch.prim.ListConstruct %true, %true, %true : (!torch.bool, !torch.bool, !torch.bool) -> !torch.list<!torch.bool> | |
%183:3 = torch.operator "aten.native_layer_norm_backward"(%180, %arg290, %181, %arg362, %arg346, %arg73, %arg13, %182) : (!torch.tensor, !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.list<!torch.bool>) -> (!torch.tensor, !torch.tensor, !torch.tensor) | |
%184 = torch.aten.mul.Tensor %183#0, %arg194 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%185 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%186 = torch.aten.sum.dim_IntList %184, %185, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%187 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%188 = torch.aten.view %186, %187 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%189 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%190 = torch.aten.view %184, %189 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%191 = torch.aten.t %190 : !torch.tensor -> !torch.tensor | |
%192 = torch.aten.mm %191, %arg168 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%193 = torch.aten.mm %190, %arg44 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%194 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%195 = torch.aten.view %193, %194 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%196 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%197 = torch.aten.view %195, %196 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%198 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%199 = torch.aten.permute %197, %198 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%200 = torch.operator "aten.clone"(%199, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor | |
%201 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%202 = torch.operator "aten._unsafe_view"(%200, %201) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor | |
%203 = torch.aten.bmm %arg316, %202 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%204 = torch.aten.bmm %202, %arg92 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%205 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%206 = torch.aten.view %203, %205 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%207 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%208 = torch.aten.view %204, %207 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%209 = torch.aten.mul.Tensor %208, %arg348 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%210 = torch.aten._softmax_backward_data %209, %arg51, %int-1, %int6 : !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%211 = torch.aten.div.Tensor %210, %1 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor | |
%212 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%213 = torch.aten.view %211, %212 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%214 = torch.aten.bmm %arg186, %213 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%215 = torch.aten.bmm %213, %arg244 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%216 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%217 = torch.aten.view %214, %216 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%218 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%219 = torch.aten.view %215, %218 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%220 = torch.aten.transpose.int %217, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%221 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%222 = torch.aten.permute %219, %221 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%223 = torch.operator "aten.clone"(%222, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor | |
%224 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%225 = torch.operator "aten._unsafe_view"(%223, %224) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor | |
%226 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%227 = torch.aten.permute %206, %226 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%228 = torch.operator "aten.clone"(%227, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor | |
%229 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%230 = torch.operator "aten._unsafe_view"(%228, %229) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor | |
%231 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%232 = torch.aten.sum.dim_IntList %230, %231, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%233 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%234 = torch.aten.view %232, %233 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%235 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%236 = torch.aten.view %230, %235 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%237 = torch.aten.t %236 : !torch.tensor -> !torch.tensor | |
%238 = torch.aten.mm %237, %arg113 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%239 = torch.aten.mm %236, %arg146 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%240 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%241 = torch.aten.view %239, %240 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%242 = torch.aten.add.Tensor %183#0, %241, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%243 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%244 = torch.aten.permute %220, %243 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%245 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%246 = torch.aten.view %244, %245 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%247 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%248 = torch.aten.sum.dim_IntList %246, %247, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%249 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%250 = torch.aten.view %248, %249 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%251 = torch.operator "aten.clone"(%246, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor | |
%252 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%253 = torch.operator "aten._unsafe_view"(%251, %252) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor | |
%254 = torch.aten.t %253 : !torch.tensor -> !torch.tensor | |
%255 = torch.aten.mm %254, %arg11 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%256 = torch.aten.mm %253, %arg130 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%257 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%258 = torch.aten.view %256, %257 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%259 = torch.aten.add.Tensor %242, %258, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%260 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%261 = torch.aten.sum.dim_IntList %225, %260, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%262 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%263 = torch.aten.view %261, %262 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%264 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%265 = torch.aten.view %225, %264 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%266 = torch.aten.t %265 : !torch.tensor -> !torch.tensor | |
%267 = torch.aten.mm %266, %arg188 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%268 = torch.aten.mm %265, %arg95 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%269 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%270 = torch.aten.view %268, %269 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%271 = torch.aten.add.Tensor %259, %270, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%272 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%273 = torch.prim.ListConstruct %true, %true, %true : (!torch.bool, !torch.bool, !torch.bool) -> !torch.list<!torch.bool> | |
%274:3 = torch.operator "aten.native_layer_norm_backward"(%271, %arg371, %272, %arg288, %arg361, %arg74, %arg141, %273) : (!torch.tensor, !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.list<!torch.bool>) -> (!torch.tensor, !torch.tensor, !torch.tensor) | |
%275 = torch.aten.mul.Tensor %274#0, %arg337 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%276 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%277 = torch.aten.sum.dim_IntList %275, %276, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%278 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%279 = torch.aten.view %277, %278 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%280 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%281 = torch.aten.view %275, %280 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%282 = torch.aten.t %281 : !torch.tensor -> !torch.tensor | |
%283 = torch.aten.mm %282, %arg148 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%284 = torch.aten.mm %281, %arg324 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%285 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%286 = torch.aten.view %284, %285 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%287 = torch.aten.gelu_backward %286, %arg166 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%288 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%289 = torch.aten.sum.dim_IntList %287, %288, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%290 = torch.prim.ListConstruct %int3072 : (!torch.int) -> !torch.list<!torch.int> | |
%291 = torch.aten.view %289, %290 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%292 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%293 = torch.aten.view %287, %292 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%294 = torch.aten.t %293 : !torch.tensor -> !torch.tensor | |
%295 = torch.aten.mm %294, %arg242 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%296 = torch.aten.mm %293, %arg123 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%297 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%298 = torch.aten.view %296, %297 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%299 = torch.aten.add.Tensor %274#0, %298, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%300 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%301 = torch.prim.ListConstruct %true, %true, %true : (!torch.bool, !torch.bool, !torch.bool) -> !torch.list<!torch.bool> | |
%302:3 = torch.operator "aten.native_layer_norm_backward"(%299, %arg241, %300, %arg229, %arg90, %arg154, %arg110, %301) : (!torch.tensor, !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.list<!torch.bool>) -> (!torch.tensor, !torch.tensor, !torch.tensor) | |
%303 = torch.aten.mul.Tensor %302#0, %arg375 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%304 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%305 = torch.aten.sum.dim_IntList %303, %304, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%306 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%307 = torch.aten.view %305, %306 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%308 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%309 = torch.aten.view %303, %308 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%310 = torch.aten.t %309 : !torch.tensor -> !torch.tensor | |
%311 = torch.aten.mm %310, %arg383 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%312 = torch.aten.mm %309, %arg232 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%313 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%314 = torch.aten.view %312, %313 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%315 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%316 = torch.aten.view %314, %315 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%317 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%318 = torch.aten.permute %316, %317 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%319 = torch.operator "aten.clone"(%318, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor | |
%320 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%321 = torch.operator "aten._unsafe_view"(%319, %320) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor | |
%322 = torch.aten.bmm %arg248, %321 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%323 = torch.aten.bmm %321, %arg391 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%324 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%325 = torch.aten.view %322, %324 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%326 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%327 = torch.aten.view %323, %326 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%328 = torch.aten.mul.Tensor %327, %arg5 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%329 = torch.aten._softmax_backward_data %328, %arg223, %int-1, %int6 : !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%330 = torch.aten.div.Tensor %329, %1 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor | |
%331 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%332 = torch.aten.view %330, %331 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%333 = torch.aten.bmm %arg120, %332 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%334 = torch.aten.bmm %332, %arg240 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%335 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%336 = torch.aten.view %333, %335 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%337 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%338 = torch.aten.view %334, %337 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%339 = torch.aten.transpose.int %336, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%340 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%341 = torch.aten.permute %338, %340 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%342 = torch.operator "aten.clone"(%341, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor | |
%343 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%344 = torch.operator "aten._unsafe_view"(%342, %343) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor | |
%345 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%346 = torch.aten.permute %325, %345 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%347 = torch.operator "aten.clone"(%346, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor | |
%348 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%349 = torch.operator "aten._unsafe_view"(%347, %348) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor | |
%350 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%351 = torch.aten.sum.dim_IntList %349, %350, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%352 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%353 = torch.aten.view %351, %352 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%354 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%355 = torch.aten.view %349, %354 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%356 = torch.aten.t %355 : !torch.tensor -> !torch.tensor | |
%357 = torch.aten.mm %356, %arg136 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%358 = torch.aten.mm %355, %arg192 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%359 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%360 = torch.aten.view %358, %359 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%361 = torch.aten.add.Tensor %302#0, %360, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%362 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%363 = torch.aten.permute %339, %362 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%364 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%365 = torch.aten.view %363, %364 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%366 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%367 = torch.aten.sum.dim_IntList %365, %366, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%368 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%369 = torch.aten.view %367, %368 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%370 = torch.operator "aten.clone"(%365, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor | |
%371 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%372 = torch.operator "aten._unsafe_view"(%370, %371) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor | |
%373 = torch.aten.t %372 : !torch.tensor -> !torch.tensor | |
%374 = torch.aten.mm %373, %arg205 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%375 = torch.aten.mm %372, %arg131 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%376 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%377 = torch.aten.view %375, %376 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%378 = torch.aten.add.Tensor %361, %377, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%379 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%380 = torch.aten.sum.dim_IntList %344, %379, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%381 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%382 = torch.aten.view %380, %381 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%383 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%384 = torch.aten.view %344, %383 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%385 = torch.aten.t %384 : !torch.tensor -> !torch.tensor | |
%386 = torch.aten.mm %385, %arg40 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%387 = torch.aten.mm %384, %arg88 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%388 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%389 = torch.aten.view %387, %388 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%390 = torch.aten.add.Tensor %378, %389, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%391 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%392 = torch.prim.ListConstruct %true, %true, %true : (!torch.bool, !torch.bool, !torch.bool) -> !torch.list<!torch.bool> | |
%393:3 = torch.operator "aten.native_layer_norm_backward"(%390, %arg345, %391, %arg41, %arg132, %arg234, %arg35, %392) : (!torch.tensor, !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.list<!torch.bool>) -> (!torch.tensor, !torch.tensor, !torch.tensor) | |
%394 = torch.aten.mul.Tensor %393#0, %arg182 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%395 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%396 = torch.aten.sum.dim_IntList %394, %395, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%397 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%398 = torch.aten.view %396, %397 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%399 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%400 = torch.aten.view %394, %399 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%401 = torch.aten.t %400 : !torch.tensor -> !torch.tensor | |
%402 = torch.aten.mm %401, %arg342 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%403 = torch.aten.mm %400, %arg155 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%404 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%405 = torch.aten.view %403, %404 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%406 = torch.aten.gelu_backward %405, %arg124 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%407 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%408 = torch.aten.sum.dim_IntList %406, %407, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%409 = torch.prim.ListConstruct %int3072 : (!torch.int) -> !torch.list<!torch.int> | |
%410 = torch.aten.view %408, %409 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%411 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%412 = torch.aten.view %406, %411 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%413 = torch.aten.t %412 : !torch.tensor -> !torch.tensor | |
%414 = torch.aten.mm %413, %arg211 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%415 = torch.aten.mm %412, %arg335 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%416 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%417 = torch.aten.view %415, %416 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%418 = torch.aten.add.Tensor %393#0, %417, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%419 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%420 = torch.prim.ListConstruct %true, %true, %true : (!torch.bool, !torch.bool, !torch.bool) -> !torch.list<!torch.bool> | |
%421:3 = torch.operator "aten.native_layer_norm_backward"(%418, %arg181, %419, %arg115, %arg4, %arg75, %arg385, %420) : (!torch.tensor, !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.list<!torch.bool>) -> (!torch.tensor, !torch.tensor, !torch.tensor) | |
%422 = torch.aten.mul.Tensor %421#0, %arg250 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%423 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%424 = torch.aten.sum.dim_IntList %422, %423, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%425 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%426 = torch.aten.view %424, %425 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%427 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%428 = torch.aten.view %422, %427 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%429 = torch.aten.t %428 : !torch.tensor -> !torch.tensor | |
%430 = torch.aten.mm %429, %arg233 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%431 = torch.aten.mm %428, %arg336 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%432 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%433 = torch.aten.view %431, %432 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%434 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%435 = torch.aten.view %433, %434 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%436 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%437 = torch.aten.permute %435, %436 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%438 = torch.operator "aten.clone"(%437, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor | |
%439 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%440 = torch.operator "aten._unsafe_view"(%438, %439) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor | |
%441 = torch.aten.bmm %arg149, %440 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%442 = torch.aten.bmm %440, %arg319 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%443 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%444 = torch.aten.view %441, %443 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%445 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%446 = torch.aten.view %442, %445 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%447 = torch.aten.mul.Tensor %446, %arg260 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%448 = torch.aten._softmax_backward_data %447, %arg94, %int-1, %int6 : !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%449 = torch.aten.div.Tensor %448, %1 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor | |
%450 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%451 = torch.aten.view %449, %450 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%452 = torch.aten.bmm %arg328, %451 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%453 = torch.aten.bmm %451, %arg217 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%454 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%455 = torch.aten.view %452, %454 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%456 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%457 = torch.aten.view %453, %456 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%458 = torch.aten.transpose.int %455, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%459 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%460 = torch.aten.permute %457, %459 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%461 = torch.operator "aten.clone"(%460, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor | |
%462 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%463 = torch.operator "aten._unsafe_view"(%461, %462) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor | |
%464 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%465 = torch.aten.permute %444, %464 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%466 = torch.operator "aten.clone"(%465, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor | |
%467 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%468 = torch.operator "aten._unsafe_view"(%466, %467) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor | |
%469 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%470 = torch.aten.sum.dim_IntList %468, %469, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%471 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%472 = torch.aten.view %470, %471 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%473 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%474 = torch.aten.view %468, %473 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%475 = torch.aten.t %474 : !torch.tensor -> !torch.tensor | |
%476 = torch.aten.mm %475, %arg106 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%477 = torch.aten.mm %474, %arg46 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%478 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%479 = torch.aten.view %477, %478 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%480 = torch.aten.add.Tensor %421#0, %479, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%481 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%482 = torch.aten.permute %458, %481 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%483 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%484 = torch.aten.view %482, %483 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%485 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%486 = torch.aten.sum.dim_IntList %484, %485, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%487 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%488 = torch.aten.view %486, %487 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%489 = torch.operator "aten.clone"(%484, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor | |
%490 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%491 = torch.operator "aten._unsafe_view"(%489, %490) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor | |
%492 = torch.aten.t %491 : !torch.tensor -> !torch.tensor | |
%493 = torch.aten.mm %492, %arg367 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%494 = torch.aten.mm %491, %arg382 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%495 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%496 = torch.aten.view %494, %495 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%497 = torch.aten.add.Tensor %480, %496, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%498 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%499 = torch.aten.sum.dim_IntList %463, %498, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%500 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%501 = torch.aten.view %499, %500 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%502 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%503 = torch.aten.view %463, %502 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%504 = torch.aten.t %503 : !torch.tensor -> !torch.tensor | |
%505 = torch.aten.mm %504, %arg104 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%506 = torch.aten.mm %503, %arg364 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%507 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%508 = torch.aten.view %506, %507 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%509 = torch.aten.add.Tensor %497, %508, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%510 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%511 = torch.prim.ListConstruct %true, %true, %true : (!torch.bool, !torch.bool, !torch.bool) -> !torch.list<!torch.bool> | |
%512:3 = torch.operator "aten.native_layer_norm_backward"(%509, %arg101, %510, %arg62, %arg122, %arg341, %arg16, %511) : (!torch.tensor, !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.list<!torch.bool>) -> (!torch.tensor, !torch.tensor, !torch.tensor) | |
%513 = torch.aten.mul.Tensor %512#0, %arg87 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%514 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%515 = torch.aten.sum.dim_IntList %513, %514, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%516 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%517 = torch.aten.view %515, %516 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%518 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%519 = torch.aten.view %513, %518 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%520 = torch.aten.t %519 : !torch.tensor -> !torch.tensor | |
%521 = torch.aten.mm %520, %arg311 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%522 = torch.aten.mm %519, %arg201 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%523 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%524 = torch.aten.view %522, %523 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%525 = torch.aten.gelu_backward %524, %arg281 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%526 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%527 = torch.aten.sum.dim_IntList %525, %526, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%528 = torch.prim.ListConstruct %int3072 : (!torch.int) -> !torch.list<!torch.int> | |
%529 = torch.aten.view %527, %528 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%530 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%531 = torch.aten.view %525, %530 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%532 = torch.aten.t %531 : !torch.tensor -> !torch.tensor | |
%533 = torch.aten.mm %532, %arg380 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%534 = torch.aten.mm %531, %arg196 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%535 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%536 = torch.aten.view %534, %535 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%537 = torch.aten.add.Tensor %512#0, %536, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%538 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%539 = torch.prim.ListConstruct %true, %true, %true : (!torch.bool, !torch.bool, !torch.bool) -> !torch.list<!torch.bool> | |
%540:3 = torch.operator "aten.native_layer_norm_backward"(%537, %arg308, %538, %arg145, %arg109, %arg114, %arg81, %539) : (!torch.tensor, !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.list<!torch.bool>) -> (!torch.tensor, !torch.tensor, !torch.tensor) | |
%541 = torch.aten.mul.Tensor %540#0, %arg177 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%542 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%543 = torch.aten.sum.dim_IntList %541, %542, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%544 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%545 = torch.aten.view %543, %544 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%546 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%547 = torch.aten.view %541, %546 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%548 = torch.aten.t %547 : !torch.tensor -> !torch.tensor | |
%549 = torch.aten.mm %548, %arg9 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%550 = torch.aten.mm %547, %arg102 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%551 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%552 = torch.aten.view %550, %551 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%553 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%554 = torch.aten.view %552, %553 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%555 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%556 = torch.aten.permute %554, %555 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%557 = torch.operator "aten.clone"(%556, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor | |
%558 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%559 = torch.operator "aten._unsafe_view"(%557, %558) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor | |
%560 = torch.aten.bmm %arg64, %559 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%561 = torch.aten.bmm %559, %arg219 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%562 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%563 = torch.aten.view %560, %562 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%564 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%565 = torch.aten.view %561, %564 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%566 = torch.aten.mul.Tensor %565, %arg161 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%567 = torch.aten._softmax_backward_data %566, %arg231, %int-1, %int6 : !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%568 = torch.aten.div.Tensor %567, %1 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor | |
%569 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%570 = torch.aten.view %568, %569 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%571 = torch.aten.bmm %arg118, %570 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%572 = torch.aten.bmm %570, %arg390 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%573 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%574 = torch.aten.view %571, %573 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%575 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%576 = torch.aten.view %572, %575 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%577 = torch.aten.transpose.int %574, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%578 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%579 = torch.aten.permute %576, %578 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%580 = torch.operator "aten.clone"(%579, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor | |
%581 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%582 = torch.operator "aten._unsafe_view"(%580, %581) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor | |
%583 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%584 = torch.aten.permute %563, %583 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%585 = torch.operator "aten.clone"(%584, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor | |
%586 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%587 = torch.operator "aten._unsafe_view"(%585, %586) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor | |
%588 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%589 = torch.aten.sum.dim_IntList %587, %588, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%590 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%591 = torch.aten.view %589, %590 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%592 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%593 = torch.aten.view %587, %592 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%594 = torch.aten.t %593 : !torch.tensor -> !torch.tensor | |
%595 = torch.aten.mm %594, %arg267 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%596 = torch.aten.mm %593, %arg93 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%597 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%598 = torch.aten.view %596, %597 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%599 = torch.aten.add.Tensor %540#0, %598, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%600 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%601 = torch.aten.permute %577, %600 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%602 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%603 = torch.aten.view %601, %602 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%604 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%605 = torch.aten.sum.dim_IntList %603, %604, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%606 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%607 = torch.aten.view %605, %606 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%608 = torch.operator "aten.clone"(%603, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor | |
%609 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%610 = torch.operator "aten._unsafe_view"(%608, %609) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor | |
%611 = torch.aten.t %610 : !torch.tensor -> !torch.tensor | |
%612 = torch.aten.mm %611, %arg254 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%613 = torch.aten.mm %610, %arg142 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%614 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%615 = torch.aten.view %613, %614 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%616 = torch.aten.add.Tensor %599, %615, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%617 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%618 = torch.aten.sum.dim_IntList %582, %617, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%619 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%620 = torch.aten.view %618, %619 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%621 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%622 = torch.aten.view %582, %621 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%623 = torch.aten.t %622 : !torch.tensor -> !torch.tensor | |
%624 = torch.aten.mm %623, %arg275 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%625 = torch.aten.mm %622, %arg392 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%626 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%627 = torch.aten.view %625, %626 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%628 = torch.aten.add.Tensor %616, %627, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%629 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%630 = torch.prim.ListConstruct %true, %true, %true : (!torch.bool, !torch.bool, !torch.bool) -> !torch.list<!torch.bool> | |
%631:3 = torch.operator "aten.native_layer_norm_backward"(%628, %arg221, %629, %arg280, %arg99, %arg150, %arg117, %630) : (!torch.tensor, !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.list<!torch.bool>) -> (!torch.tensor, !torch.tensor, !torch.tensor) | |
%632 = torch.aten.mul.Tensor %631#0, %arg373 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%633 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%634 = torch.aten.sum.dim_IntList %632, %633, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%635 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%636 = torch.aten.view %634, %635 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%637 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%638 = torch.aten.view %632, %637 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%639 = torch.aten.t %638 : !torch.tensor -> !torch.tensor | |
%640 = torch.aten.mm %639, %arg157 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%641 = torch.aten.mm %638, %arg333 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%642 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%643 = torch.aten.view %641, %642 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%644 = torch.aten.gelu_backward %643, %arg286 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%645 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%646 = torch.aten.sum.dim_IntList %644, %645, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%647 = torch.prim.ListConstruct %int3072 : (!torch.int) -> !torch.list<!torch.int> | |
%648 = torch.aten.view %646, %647 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%649 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%650 = torch.aten.view %644, %649 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%651 = torch.aten.t %650 : !torch.tensor -> !torch.tensor | |
%652 = torch.aten.mm %651, %arg34 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%653 = torch.aten.mm %650, %arg368 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%654 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%655 = torch.aten.view %653, %654 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%656 = torch.aten.add.Tensor %631#0, %655, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%657 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%658 = torch.prim.ListConstruct %true, %true, %true : (!torch.bool, !torch.bool, !torch.bool) -> !torch.list<!torch.bool> | |
%659:3 = torch.operator "aten.native_layer_norm_backward"(%656, %arg344, %657, %arg56, %arg170, %arg258, %arg38, %658) : (!torch.tensor, !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.list<!torch.bool>) -> (!torch.tensor, !torch.tensor, !torch.tensor) | |
%660 = torch.aten.mul.Tensor %659#0, %arg332 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%661 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%662 = torch.aten.sum.dim_IntList %660, %661, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%663 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%664 = torch.aten.view %662, %663 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%665 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%666 = torch.aten.view %660, %665 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%667 = torch.aten.t %666 : !torch.tensor -> !torch.tensor | |
%668 = torch.aten.mm %667, %arg43 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%669 = torch.aten.mm %666, %arg25 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%670 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%671 = torch.aten.view %669, %670 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%672 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%673 = torch.aten.view %671, %672 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%674 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%675 = torch.aten.permute %673, %674 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%676 = torch.operator "aten.clone"(%675, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor | |
%677 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%678 = torch.operator "aten._unsafe_view"(%676, %677) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor | |
%679 = torch.aten.bmm %arg270, %678 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%680 = torch.aten.bmm %678, %arg156 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%681 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%682 = torch.aten.view %679, %681 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%683 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%684 = torch.aten.view %680, %683 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%685 = torch.aten.mul.Tensor %684, %arg237 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%686 = torch.aten._softmax_backward_data %685, %arg251, %int-1, %int6 : !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%687 = torch.aten.div.Tensor %686, %1 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor | |
%688 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%689 = torch.aten.view %687, %688 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%690 = torch.aten.bmm %arg339, %689 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%691 = torch.aten.bmm %689, %arg284 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%692 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%693 = torch.aten.view %690, %692 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%694 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%695 = torch.aten.view %691, %694 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%696 = torch.aten.transpose.int %693, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%697 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%698 = torch.aten.permute %695, %697 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%699 = torch.operator "aten.clone"(%698, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor | |
%700 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%701 = torch.operator "aten._unsafe_view"(%699, %700) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor | |
%702 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%703 = torch.aten.permute %682, %702 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%704 = torch.operator "aten.clone"(%703, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor | |
%705 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%706 = torch.operator "aten._unsafe_view"(%704, %705) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor | |
%707 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%708 = torch.aten.sum.dim_IntList %706, %707, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%709 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%710 = torch.aten.view %708, %709 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%711 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%712 = torch.aten.view %706, %711 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%713 = torch.aten.t %712 : !torch.tensor -> !torch.tensor | |
%714 = torch.aten.mm %713, %arg147 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%715 = torch.aten.mm %712, %arg314 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%716 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%717 = torch.aten.view %715, %716 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%718 = torch.aten.add.Tensor %659#0, %717, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%719 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%720 = torch.aten.permute %696, %719 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%721 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%722 = torch.aten.view %720, %721 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%723 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%724 = torch.aten.sum.dim_IntList %722, %723, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%725 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%726 = torch.aten.view %724, %725 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%727 = torch.operator "aten.clone"(%722, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor | |
%728 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%729 = torch.operator "aten._unsafe_view"(%727, %728) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor | |
%730 = torch.aten.t %729 : !torch.tensor -> !torch.tensor | |
%731 = torch.aten.mm %730, %arg69 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%732 = torch.aten.mm %729, %arg140 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%733 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%734 = torch.aten.view %732, %733 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%735 = torch.aten.add.Tensor %718, %734, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%736 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%737 = torch.aten.sum.dim_IntList %701, %736, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%738 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%739 = torch.aten.view %737, %738 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%740 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%741 = torch.aten.view %701, %740 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%742 = torch.aten.t %741 : !torch.tensor -> !torch.tensor | |
%743 = torch.aten.mm %742, %arg329 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%744 = torch.aten.mm %741, %arg119 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%745 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%746 = torch.aten.view %744, %745 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%747 = torch.aten.add.Tensor %735, %746, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%748 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%749 = torch.prim.ListConstruct %true, %true, %true : (!torch.bool, !torch.bool, !torch.bool) -> !torch.list<!torch.bool> | |
%750:3 = torch.operator "aten.native_layer_norm_backward"(%747, %arg174, %748, %arg82, %arg70, %arg72, %arg271, %749) : (!torch.tensor, !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.list<!torch.bool>) -> (!torch.tensor, !torch.tensor, !torch.tensor) | |
%751 = torch.aten.mul.Tensor %750#0, %arg259 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%752 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%753 = torch.aten.sum.dim_IntList %751, %752, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%754 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%755 = torch.aten.view %753, %754 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%756 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%757 = torch.aten.view %751, %756 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%758 = torch.aten.t %757 : !torch.tensor -> !torch.tensor | |
%759 = torch.aten.mm %758, %arg372 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%760 = torch.aten.mm %757, %arg273 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%761 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%762 = torch.aten.view %760, %761 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%763 = torch.aten.gelu_backward %762, %arg366 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%764 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%765 = torch.aten.sum.dim_IntList %763, %764, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%766 = torch.prim.ListConstruct %int3072 : (!torch.int) -> !torch.list<!torch.int> | |
%767 = torch.aten.view %765, %766 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%768 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%769 = torch.aten.view %763, %768 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%770 = torch.aten.t %769 : !torch.tensor -> !torch.tensor | |
%771 = torch.aten.mm %770, %arg85 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%772 = torch.aten.mm %769, %arg218 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%773 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%774 = torch.aten.view %772, %773 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%775 = torch.aten.add.Tensor %750#0, %774, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%776 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%777 = torch.prim.ListConstruct %true, %true, %true : (!torch.bool, !torch.bool, !torch.bool) -> !torch.list<!torch.bool> | |
%778:3 = torch.operator "aten.native_layer_norm_backward"(%775, %arg312, %776, %arg239, %arg327, %arg53, %arg350, %777) : (!torch.tensor, !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.list<!torch.bool>) -> (!torch.tensor, !torch.tensor, !torch.tensor) | |
%779 = torch.aten.mul.Tensor %778#0, %arg127 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%780 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%781 = torch.aten.sum.dim_IntList %779, %780, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%782 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%783 = torch.aten.view %781, %782 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%784 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%785 = torch.aten.view %779, %784 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%786 = torch.aten.t %785 : !torch.tensor -> !torch.tensor | |
%787 = torch.aten.mm %786, %arg133 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%788 = torch.aten.mm %785, %arg12 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%789 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%790 = torch.aten.view %788, %789 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%791 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%792 = torch.aten.view %790, %791 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%793 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%794 = torch.aten.permute %792, %793 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%795 = torch.operator "aten.clone"(%794, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor | |
%796 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%797 = torch.operator "aten._unsafe_view"(%795, %796) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor | |
%798 = torch.aten.bmm %arg59, %797 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%799 = torch.aten.bmm %797, %arg163 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%800 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%801 = torch.aten.view %798, %800 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%802 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%803 = torch.aten.view %799, %802 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%804 = torch.aten.mul.Tensor %803, %arg33 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%805 = torch.aten._softmax_backward_data %804, %arg10, %int-1, %int6 : !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%806 = torch.aten.div.Tensor %805, %1 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor | |
%807 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%808 = torch.aten.view %806, %807 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%809 = torch.aten.bmm %arg125, %808 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%810 = torch.aten.bmm %808, %arg100 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%811 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%812 = torch.aten.view %809, %811 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%813 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%814 = torch.aten.view %810, %813 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%815 = torch.aten.transpose.int %812, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%816 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%817 = torch.aten.permute %814, %816 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%818 = torch.operator "aten.clone"(%817, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor | |
%819 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%820 = torch.operator "aten._unsafe_view"(%818, %819) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor | |
%821 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%822 = torch.aten.permute %801, %821 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%823 = torch.operator "aten.clone"(%822, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor | |
%824 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%825 = torch.operator "aten._unsafe_view"(%823, %824) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor | |
%826 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%827 = torch.aten.sum.dim_IntList %825, %826, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%828 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%829 = torch.aten.view %827, %828 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%830 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%831 = torch.aten.view %825, %830 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%832 = torch.aten.t %831 : !torch.tensor -> !torch.tensor | |
%833 = torch.aten.mm %832, %arg185 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%834 = torch.aten.mm %831, %arg307 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%835 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%836 = torch.aten.view %834, %835 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%837 = torch.aten.add.Tensor %778#0, %836, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%838 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%839 = torch.aten.permute %815, %838 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%840 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%841 = torch.aten.view %839, %840 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%842 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%843 = torch.aten.sum.dim_IntList %841, %842, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%844 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%845 = torch.aten.view %843, %844 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%846 = torch.operator "aten.clone"(%841, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor | |
%847 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%848 = torch.operator "aten._unsafe_view"(%846, %847) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor | |
%849 = torch.aten.t %848 : !torch.tensor -> !torch.tensor | |
%850 = torch.aten.mm %849, %arg298 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%851 = torch.aten.mm %848, %arg263 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%852 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%853 = torch.aten.view %851, %852 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%854 = torch.aten.add.Tensor %837, %853, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%855 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%856 = torch.aten.sum.dim_IntList %820, %855, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%857 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%858 = torch.aten.view %856, %857 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%859 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%860 = torch.aten.view %820, %859 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%861 = torch.aten.t %860 : !torch.tensor -> !torch.tensor | |
%862 = torch.aten.mm %861, %arg230 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%863 = torch.aten.mm %860, %arg197 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%864 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%865 = torch.aten.view %863, %864 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%866 = torch.aten.add.Tensor %854, %865, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%867 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%868 = torch.prim.ListConstruct %true, %true, %true : (!torch.bool, !torch.bool, !torch.bool) -> !torch.list<!torch.bool> | |
%869:3 = torch.operator "aten.native_layer_norm_backward"(%866, %arg37, %867, %arg138, %arg289, %arg353, %arg235, %868) : (!torch.tensor, !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.list<!torch.bool>) -> (!torch.tensor, !torch.tensor, !torch.tensor) | |
%870 = torch.aten.mul.Tensor %869#0, %arg313 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%871 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%872 = torch.aten.sum.dim_IntList %870, %871, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%873 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%874 = torch.aten.view %872, %873 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%875 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%876 = torch.aten.view %870, %875 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%877 = torch.aten.t %876 : !torch.tensor -> !torch.tensor | |
%878 = torch.aten.mm %877, %arg384 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%879 = torch.aten.mm %876, %arg31 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%880 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%881 = torch.aten.view %879, %880 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%882 = torch.aten.gelu_backward %881, %arg378 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%883 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%884 = torch.aten.sum.dim_IntList %882, %883, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%885 = torch.prim.ListConstruct %int3072 : (!torch.int) -> !torch.list<!torch.int> | |
%886 = torch.aten.view %884, %885 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%887 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%888 = torch.aten.view %882, %887 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%889 = torch.aten.t %888 : !torch.tensor -> !torch.tensor | |
%890 = torch.aten.mm %889, %arg394 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%891 = torch.aten.mm %888, %arg68 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%892 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%893 = torch.aten.view %891, %892 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%894 = torch.aten.add.Tensor %869#0, %893, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%895 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%896 = torch.prim.ListConstruct %true, %true, %true : (!torch.bool, !torch.bool, !torch.bool) -> !torch.list<!torch.bool> | |
%897:3 = torch.operator "aten.native_layer_norm_backward"(%894, %arg86, %895, %arg283, %arg246, %arg387, %arg208, %896) : (!torch.tensor, !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.list<!torch.bool>) -> (!torch.tensor, !torch.tensor, !torch.tensor) | |
%898 = torch.aten.mul.Tensor %897#0, %arg83 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%899 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%900 = torch.aten.sum.dim_IntList %898, %899, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%901 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%902 = torch.aten.view %900, %901 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%903 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%904 = torch.aten.view %898, %903 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%905 = torch.aten.t %904 : !torch.tensor -> !torch.tensor | |
%906 = torch.aten.mm %905, %arg29 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%907 = torch.aten.mm %904, %arg162 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%908 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%909 = torch.aten.view %907, %908 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%910 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%911 = torch.aten.view %909, %910 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%912 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%913 = torch.aten.permute %911, %912 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%914 = torch.operator "aten.clone"(%913, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor | |
%915 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%916 = torch.operator "aten._unsafe_view"(%914, %915) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor | |
%917 = torch.aten.bmm %arg129, %916 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%918 = torch.aten.bmm %916, %arg321 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%919 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%920 = torch.aten.view %917, %919 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%921 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%922 = torch.aten.view %918, %921 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%923 = torch.aten.mul.Tensor %922, %arg107 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%924 = torch.aten._softmax_backward_data %923, %arg190, %int-1, %int6 : !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%925 = torch.aten.div.Tensor %924, %1 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor | |
%926 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%927 = torch.aten.view %925, %926 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%928 = torch.aten.bmm %arg160, %927 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%929 = torch.aten.bmm %927, %arg153 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%930 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%931 = torch.aten.view %928, %930 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%932 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%933 = torch.aten.view %929, %932 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%934 = torch.aten.transpose.int %931, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%935 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%936 = torch.aten.permute %933, %935 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%937 = torch.operator "aten.clone"(%936, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor | |
%938 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%939 = torch.operator "aten._unsafe_view"(%937, %938) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor | |
%940 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%941 = torch.aten.permute %920, %940 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%942 = torch.operator "aten.clone"(%941, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor | |
%943 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%944 = torch.operator "aten._unsafe_view"(%942, %943) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor | |
%945 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%946 = torch.aten.sum.dim_IntList %944, %945, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%947 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%948 = torch.aten.view %946, %947 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%949 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%950 = torch.aten.view %944, %949 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%951 = torch.aten.t %950 : !torch.tensor -> !torch.tensor | |
%952 = torch.aten.mm %951, %arg60 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%953 = torch.aten.mm %950, %arg228 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%954 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%955 = torch.aten.view %953, %954 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%956 = torch.aten.add.Tensor %897#0, %955, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%957 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%958 = torch.aten.permute %934, %957 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%959 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%960 = torch.aten.view %958, %959 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%961 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%962 = torch.aten.sum.dim_IntList %960, %961, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%963 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%964 = torch.aten.view %962, %963 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%965 = torch.operator "aten.clone"(%960, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor | |
%966 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%967 = torch.operator "aten._unsafe_view"(%965, %966) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor | |
%968 = torch.aten.t %967 : !torch.tensor -> !torch.tensor | |
%969 = torch.aten.mm %968, %arg14 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%970 = torch.aten.mm %967, %arg173 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%971 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%972 = torch.aten.view %970, %971 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%973 = torch.aten.add.Tensor %956, %972, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%974 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%975 = torch.aten.sum.dim_IntList %939, %974, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%976 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%977 = torch.aten.view %975, %976 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%978 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%979 = torch.aten.view %939, %978 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%980 = torch.aten.t %979 : !torch.tensor -> !torch.tensor | |
%981 = torch.aten.mm %980, %arg97 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%982 = torch.aten.mm %979, %arg299 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%983 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%984 = torch.aten.view %982, %983 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%985 = torch.aten.add.Tensor %973, %984, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%986 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%987 = torch.prim.ListConstruct %true, %true, %true : (!torch.bool, !torch.bool, !torch.bool) -> !torch.list<!torch.bool> | |
%988:3 = torch.operator "aten.native_layer_norm_backward"(%985, %arg216, %986, %arg112, %arg137, %arg3, %arg247, %987) : (!torch.tensor, !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.list<!torch.bool>) -> (!torch.tensor, !torch.tensor, !torch.tensor) | |
%989 = torch.aten.mul.Tensor %988#0, %arg178 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%990 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%991 = torch.aten.sum.dim_IntList %989, %990, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%992 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%993 = torch.aten.view %991, %992 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%994 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%995 = torch.aten.view %989, %994 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%996 = torch.aten.t %995 : !torch.tensor -> !torch.tensor | |
%997 = torch.aten.mm %996, %arg203 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%998 = torch.aten.mm %995, %arg22 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%999 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1000 = torch.aten.view %998, %999 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1001 = torch.aten.gelu_backward %1000, %arg330 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1002 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1003 = torch.aten.sum.dim_IntList %1001, %1002, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%1004 = torch.prim.ListConstruct %int3072 : (!torch.int) -> !torch.list<!torch.int> | |
%1005 = torch.aten.view %1003, %1004 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1006 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1007 = torch.aten.view %1001, %1006 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1008 = torch.aten.t %1007 : !torch.tensor -> !torch.tensor | |
%1009 = torch.aten.mm %1008, %arg193 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1010 = torch.aten.mm %1007, %arg276 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1011 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1012 = torch.aten.view %1010, %1011 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1013 = torch.aten.add.Tensor %988#0, %1012, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1014 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%1015 = torch.prim.ListConstruct %true, %true, %true : (!torch.bool, !torch.bool, !torch.bool) -> !torch.list<!torch.bool> | |
%1016:3 = torch.operator "aten.native_layer_norm_backward"(%1013, %arg306, %1014, %arg315, %arg2, %arg257, %arg180, %1015) : (!torch.tensor, !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.list<!torch.bool>) -> (!torch.tensor, !torch.tensor, !torch.tensor) | |
%1017 = torch.aten.mul.Tensor %1016#0, %arg215 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1018 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1019 = torch.aten.sum.dim_IntList %1017, %1018, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%1020 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%1021 = torch.aten.view %1019, %1020 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1022 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1023 = torch.aten.view %1017, %1022 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1024 = torch.aten.t %1023 : !torch.tensor -> !torch.tensor | |
%1025 = torch.aten.mm %1024, %arg374 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1026 = torch.aten.mm %1023, %arg213 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1027 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1028 = torch.aten.view %1026, %1027 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1029 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1030 = torch.aten.view %1028, %1029 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1031 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1032 = torch.aten.permute %1030, %1031 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1033 = torch.operator "aten.clone"(%1032, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor | |
%1034 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1035 = torch.operator "aten._unsafe_view"(%1033, %1034) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor | |
%1036 = torch.aten.bmm %arg61, %1035 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1037 = torch.aten.bmm %1035, %arg279 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1038 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1039 = torch.aten.view %1036, %1038 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1040 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1041 = torch.aten.view %1037, %1040 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1042 = torch.aten.mul.Tensor %1041, %arg159 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1043 = torch.aten._softmax_backward_data %1042, %arg116, %int-1, %int6 : !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%1044 = torch.aten.div.Tensor %1043, %1 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor | |
%1045 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1046 = torch.aten.view %1044, %1045 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1047 = torch.aten.bmm %arg67, %1046 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1048 = torch.aten.bmm %1046, %arg268 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1049 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1050 = torch.aten.view %1047, %1049 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1051 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1052 = torch.aten.view %1048, %1051 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1053 = torch.aten.transpose.int %1050, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%1054 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1055 = torch.aten.permute %1052, %1054 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1056 = torch.operator "aten.clone"(%1055, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor | |
%1057 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1058 = torch.operator "aten._unsafe_view"(%1056, %1057) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor | |
%1059 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1060 = torch.aten.permute %1039, %1059 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1061 = torch.operator "aten.clone"(%1060, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor | |
%1062 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1063 = torch.operator "aten._unsafe_view"(%1061, %1062) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor | |
%1064 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1065 = torch.aten.sum.dim_IntList %1063, %1064, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%1066 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%1067 = torch.aten.view %1065, %1066 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1068 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1069 = torch.aten.view %1063, %1068 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1070 = torch.aten.t %1069 : !torch.tensor -> !torch.tensor | |
%1071 = torch.aten.mm %1070, %arg292 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1072 = torch.aten.mm %1069, %arg27 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1073 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1074 = torch.aten.view %1072, %1073 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1075 = torch.aten.add.Tensor %1016#0, %1074, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1076 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1077 = torch.aten.permute %1053, %1076 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1078 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1079 = torch.aten.view %1077, %1078 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1080 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1081 = torch.aten.sum.dim_IntList %1079, %1080, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%1082 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%1083 = torch.aten.view %1081, %1082 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1084 = torch.operator "aten.clone"(%1079, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor | |
%1085 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1086 = torch.operator "aten._unsafe_view"(%1084, %1085) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor | |
%1087 = torch.aten.t %1086 : !torch.tensor -> !torch.tensor | |
%1088 = torch.aten.mm %1087, %arg347 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1089 = torch.aten.mm %1086, %arg58 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1090 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1091 = torch.aten.view %1089, %1090 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1092 = torch.aten.add.Tensor %1075, %1091, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1093 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1094 = torch.aten.sum.dim_IntList %1058, %1093, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%1095 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%1096 = torch.aten.view %1094, %1095 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1097 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1098 = torch.aten.view %1058, %1097 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1099 = torch.aten.t %1098 : !torch.tensor -> !torch.tensor | |
%1100 = torch.aten.mm %1099, %arg77 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1101 = torch.aten.mm %1098, %arg126 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1102 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1103 = torch.aten.view %1101, %1102 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1104 = torch.aten.add.Tensor %1092, %1103, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1105 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%1106 = torch.prim.ListConstruct %true, %true, %true : (!torch.bool, !torch.bool, !torch.bool) -> !torch.list<!torch.bool> | |
%1107:3 = torch.operator "aten.native_layer_norm_backward"(%1104, %arg285, %1105, %arg8, %arg209, %arg376, %arg105, %1106) : (!torch.tensor, !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.list<!torch.bool>) -> (!torch.tensor, !torch.tensor, !torch.tensor) | |
%1108 = torch.aten.mul.Tensor %1107#0, %arg80 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1109 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1110 = torch.aten.sum.dim_IntList %1108, %1109, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%1111 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%1112 = torch.aten.view %1110, %1111 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1113 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1114 = torch.aten.view %1108, %1113 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1115 = torch.aten.t %1114 : !torch.tensor -> !torch.tensor | |
%1116 = torch.aten.mm %1115, %arg32 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1117 = torch.aten.mm %1114, %arg252 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1118 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1119 = torch.aten.view %1117, %1118 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1120 = torch.aten.gelu_backward %1119, %arg393 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1121 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1122 = torch.aten.sum.dim_IntList %1120, %1121, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%1123 = torch.prim.ListConstruct %int3072 : (!torch.int) -> !torch.list<!torch.int> | |
%1124 = torch.aten.view %1122, %1123 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1125 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1126 = torch.aten.view %1120, %1125 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1127 = torch.aten.t %1126 : !torch.tensor -> !torch.tensor | |
%1128 = torch.aten.mm %1127, %arg195 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1129 = torch.aten.mm %1126, %arg220 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1130 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1131 = torch.aten.view %1129, %1130 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1132 = torch.aten.add.Tensor %1107#0, %1131, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1133 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%1134 = torch.prim.ListConstruct %true, %true, %true : (!torch.bool, !torch.bool, !torch.bool) -> !torch.list<!torch.bool> | |
%1135:3 = torch.operator "aten.native_layer_norm_backward"(%1132, %arg296, %1133, %arg199, %arg287, %arg42, %arg303, %1134) : (!torch.tensor, !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.list<!torch.bool>) -> (!torch.tensor, !torch.tensor, !torch.tensor) | |
%1136 = torch.aten.mul.Tensor %1135#0, %arg1 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1137 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1138 = torch.aten.sum.dim_IntList %1136, %1137, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%1139 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%1140 = torch.aten.view %1138, %1139 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1141 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1142 = torch.aten.view %1136, %1141 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1143 = torch.aten.t %1142 : !torch.tensor -> !torch.tensor | |
%1144 = torch.aten.mm %1143, %arg172 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1145 = torch.aten.mm %1142, %arg357 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1146 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1147 = torch.aten.view %1145, %1146 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1148 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1149 = torch.aten.view %1147, %1148 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1150 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1151 = torch.aten.permute %1149, %1150 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1152 = torch.operator "aten.clone"(%1151, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor | |
%1153 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1154 = torch.operator "aten._unsafe_view"(%1152, %1153) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor | |
%1155 = torch.aten.bmm %arg91, %1154 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1156 = torch.aten.bmm %1154, %arg103 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1157 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1158 = torch.aten.view %1155, %1157 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1159 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1160 = torch.aten.view %1156, %1159 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1161 = torch.aten.mul.Tensor %1160, %arg96 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1162 = torch.aten._softmax_backward_data %1161, %arg214, %int-1, %int6 : !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%1163 = torch.aten.div.Tensor %1162, %1 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor | |
%1164 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1165 = torch.aten.view %1163, %1164 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1166 = torch.aten.bmm %arg262, %1165 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1167 = torch.aten.bmm %1165, %arg325 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1168 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1169 = torch.aten.view %1166, %1168 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1170 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1171 = torch.aten.view %1167, %1170 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1172 = torch.aten.transpose.int %1169, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%1173 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1174 = torch.aten.permute %1171, %1173 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1175 = torch.operator "aten.clone"(%1174, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor | |
%1176 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1177 = torch.operator "aten._unsafe_view"(%1175, %1176) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor | |
%1178 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1179 = torch.aten.permute %1158, %1178 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1180 = torch.operator "aten.clone"(%1179, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor | |
%1181 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1182 = torch.operator "aten._unsafe_view"(%1180, %1181) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor | |
%1183 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1184 = torch.aten.sum.dim_IntList %1182, %1183, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%1185 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%1186 = torch.aten.view %1184, %1185 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1187 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1188 = torch.aten.view %1182, %1187 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1189 = torch.aten.t %1188 : !torch.tensor -> !torch.tensor | |
%1190 = torch.aten.mm %1189, %arg274 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1191 = torch.aten.mm %1188, %arg323 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1192 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1193 = torch.aten.view %1191, %1192 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1194 = torch.aten.add.Tensor %1135#0, %1193, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1195 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1196 = torch.aten.permute %1172, %1195 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1197 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1198 = torch.aten.view %1196, %1197 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1199 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1200 = torch.aten.sum.dim_IntList %1198, %1199, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%1201 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%1202 = torch.aten.view %1200, %1201 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1203 = torch.operator "aten.clone"(%1198, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor | |
%1204 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1205 = torch.operator "aten._unsafe_view"(%1203, %1204) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor | |
%1206 = torch.aten.t %1205 : !torch.tensor -> !torch.tensor | |
%1207 = torch.aten.mm %1206, %arg331 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1208 = torch.aten.mm %1205, %arg187 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1209 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1210 = torch.aten.view %1208, %1209 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1211 = torch.aten.add.Tensor %1194, %1210, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1212 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1213 = torch.aten.sum.dim_IntList %1177, %1212, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%1214 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%1215 = torch.aten.view %1213, %1214 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1216 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1217 = torch.aten.view %1177, %1216 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1218 = torch.aten.t %1217 : !torch.tensor -> !torch.tensor | |
%1219 = torch.aten.mm %1218, %arg253 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1220 = torch.aten.mm %1217, %arg24 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1221 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1222 = torch.aten.view %1220, %1221 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1223 = torch.aten.add.Tensor %1211, %1222, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1224 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%1225 = torch.prim.ListConstruct %true, %true, %true : (!torch.bool, !torch.bool, !torch.bool) -> !torch.list<!torch.bool> | |
%1226:3 = torch.operator "aten.native_layer_norm_backward"(%1223, %arg381, %1224, %arg30, %arg20, %arg370, %arg45, %1225) : (!torch.tensor, !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.list<!torch.bool>) -> (!torch.tensor, !torch.tensor, !torch.tensor) | |
%1227 = torch.aten.mul.Tensor %1226#0, %arg171 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1228 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1229 = torch.aten.sum.dim_IntList %1227, %1228, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%1230 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%1231 = torch.aten.view %1229, %1230 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1232 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1233 = torch.aten.view %1227, %1232 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1234 = torch.aten.t %1233 : !torch.tensor -> !torch.tensor | |
%1235 = torch.aten.mm %1234, %arg135 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1236 = torch.aten.mm %1233, %arg7 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1237 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1238 = torch.aten.view %1236, %1237 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1239 = torch.aten.gelu_backward %1238, %arg98 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1240 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1241 = torch.aten.sum.dim_IntList %1239, %1240, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%1242 = torch.prim.ListConstruct %int3072 : (!torch.int) -> !torch.list<!torch.int> | |
%1243 = torch.aten.view %1241, %1242 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1244 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1245 = torch.aten.view %1239, %1244 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1246 = torch.aten.t %1245 : !torch.tensor -> !torch.tensor | |
%1247 = torch.aten.mm %1246, %arg175 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1248 = torch.aten.mm %1245, %arg309 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1249 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1250 = torch.aten.view %1248, %1249 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1251 = torch.aten.add.Tensor %1226#0, %1250, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1252 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%1253 = torch.prim.ListConstruct %true, %true, %true : (!torch.bool, !torch.bool, !torch.bool) -> !torch.list<!torch.bool> | |
%1254:3 = torch.operator "aten.native_layer_norm_backward"(%1251, %arg52, %1252, %arg21, %arg207, %arg386, %arg128, %1253) : (!torch.tensor, !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.list<!torch.bool>) -> (!torch.tensor, !torch.tensor, !torch.tensor) | |
%1255 = torch.aten.mul.Tensor %1254#0, %arg318 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1256 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1257 = torch.aten.sum.dim_IntList %1255, %1256, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%1258 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%1259 = torch.aten.view %1257, %1258 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1260 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1261 = torch.aten.view %1255, %1260 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1262 = torch.aten.t %1261 : !torch.tensor -> !torch.tensor | |
%1263 = torch.aten.mm %1262, %arg165 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1264 = torch.aten.mm %1261, %arg297 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1265 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1266 = torch.aten.view %1264, %1265 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1267 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1268 = torch.aten.view %1266, %1267 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1269 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1270 = torch.aten.permute %1268, %1269 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1271 = torch.operator "aten.clone"(%1270, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor | |
%1272 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1273 = torch.operator "aten._unsafe_view"(%1271, %1272) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor | |
%1274 = torch.aten.bmm %arg18, %1273 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1275 = torch.aten.bmm %1273, %arg198 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1276 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1277 = torch.aten.view %1274, %1276 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1278 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1279 = torch.aten.view %1275, %1278 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1280 = torch.aten.mul.Tensor %1279, %arg255 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1281 = torch.aten._softmax_backward_data %1280, %arg179, %int-1, %int6 : !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%1282 = torch.aten.div.Tensor %1281, %1 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor | |
%1283 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1284 = torch.aten.view %1282, %1283 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1285 = torch.aten.bmm %arg134, %1284 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1286 = torch.aten.bmm %1284, %arg320 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1287 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1288 = torch.aten.view %1285, %1287 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1289 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1290 = torch.aten.view %1286, %1289 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1291 = torch.aten.transpose.int %1288, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%1292 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1293 = torch.aten.permute %1290, %1292 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1294 = torch.operator "aten.clone"(%1293, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor | |
%1295 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1296 = torch.operator "aten._unsafe_view"(%1294, %1295) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor | |
%1297 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1298 = torch.aten.permute %1277, %1297 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1299 = torch.operator "aten.clone"(%1298, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor | |
%1300 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1301 = torch.operator "aten._unsafe_view"(%1299, %1300) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor | |
%1302 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1303 = torch.aten.sum.dim_IntList %1301, %1302, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%1304 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%1305 = torch.aten.view %1303, %1304 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1306 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1307 = torch.aten.view %1301, %1306 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1308 = torch.aten.t %1307 : !torch.tensor -> !torch.tensor | |
%1309 = torch.aten.mm %1308, %arg294 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1310 = torch.aten.mm %1307, %arg57 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1311 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1312 = torch.aten.view %1310, %1311 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1313 = torch.aten.add.Tensor %1254#0, %1312, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1314 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1315 = torch.aten.permute %1291, %1314 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1316 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1317 = torch.aten.view %1315, %1316 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1318 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1319 = torch.aten.sum.dim_IntList %1317, %1318, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%1320 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%1321 = torch.aten.view %1319, %1320 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1322 = torch.operator "aten.clone"(%1317, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor | |
%1323 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1324 = torch.operator "aten._unsafe_view"(%1322, %1323) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor | |
%1325 = torch.aten.t %1324 : !torch.tensor -> !torch.tensor | |
%1326 = torch.aten.mm %1325, %arg369 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1327 = torch.aten.mm %1324, %arg17 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1328 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1329 = torch.aten.view %1327, %1328 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1330 = torch.aten.add.Tensor %1313, %1329, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1331 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1332 = torch.aten.sum.dim_IntList %1296, %1331, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%1333 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%1334 = torch.aten.view %1332, %1333 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1335 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1336 = torch.aten.view %1296, %1335 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1337 = torch.aten.t %1336 : !torch.tensor -> !torch.tensor | |
%1338 = torch.aten.mm %1337, %arg26 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1339 = torch.aten.mm %1336, %arg108 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1340 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1341 = torch.aten.view %1339, %1340 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1342 = torch.aten.add.Tensor %1330, %1341, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1343 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%1344 = torch.prim.ListConstruct %true, %true, %true : (!torch.bool, !torch.bool, !torch.bool) -> !torch.list<!torch.bool> | |
%1345:3 = torch.operator "aten.native_layer_norm_backward"(%1342, %arg352, %1343, %arg89, %arg15, %arg202, %arg169, %1344) : (!torch.tensor, !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.list<!torch.bool>) -> (!torch.tensor, !torch.tensor, !torch.tensor) | |
%1346 = torch.aten.mul.Tensor %1345#0, %arg111 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1347 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1348 = torch.aten.sum.dim_IntList %1346, %1347, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%1349 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%1350 = torch.aten.view %1348, %1349 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1351 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1352 = torch.aten.view %1346, %1351 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1353 = torch.aten.t %1352 : !torch.tensor -> !torch.tensor | |
%1354 = torch.aten.mm %1353, %arg389 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1355 = torch.aten.mm %1352, %arg282 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1356 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1357 = torch.aten.view %1355, %1356 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1358 = torch.aten.gelu_backward %1357, %arg356 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1359 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1360 = torch.aten.sum.dim_IntList %1358, %1359, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%1361 = torch.prim.ListConstruct %int3072 : (!torch.int) -> !torch.list<!torch.int> | |
%1362 = torch.aten.view %1360, %1361 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1363 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1364 = torch.aten.view %1358, %1363 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1365 = torch.aten.t %1364 : !torch.tensor -> !torch.tensor | |
%1366 = torch.aten.mm %1365, %arg379 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1367 = torch.aten.mm %1364, %arg36 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1368 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1369 = torch.aten.view %1367, %1368 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1370 = torch.aten.add.Tensor %1345#0, %1369, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1371 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%1372 = torch.prim.ListConstruct %true, %true, %true : (!torch.bool, !torch.bool, !torch.bool) -> !torch.list<!torch.bool> | |
%1373:3 = torch.operator "aten.native_layer_norm_backward"(%1370, %arg291, %1371, %arg317, %arg47, %arg340, %arg388, %1372) : (!torch.tensor, !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.list<!torch.bool>) -> (!torch.tensor, !torch.tensor, !torch.tensor) | |
%1374 = torch.aten.mul.Tensor %1373#0, %arg66 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1375 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1376 = torch.aten.sum.dim_IntList %1374, %1375, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%1377 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%1378 = torch.aten.view %1376, %1377 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1379 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1380 = torch.aten.view %1374, %1379 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1381 = torch.aten.t %1380 : !torch.tensor -> !torch.tensor | |
%1382 = torch.aten.mm %1381, %arg189 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1383 = torch.aten.mm %1380, %arg365 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1384 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1385 = torch.aten.view %1383, %1384 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1386 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1387 = torch.aten.view %1385, %1386 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1388 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1389 = torch.aten.permute %1387, %1388 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1390 = torch.operator "aten.clone"(%1389, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor | |
%1391 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1392 = torch.operator "aten._unsafe_view"(%1390, %1391) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor | |
%1393 = torch.aten.bmm %arg204, %1392 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1394 = torch.aten.bmm %1392, %arg302 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1395 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1396 = torch.aten.view %1393, %1395 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1397 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1398 = torch.aten.view %1394, %1397 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1399 = torch.aten.mul.Tensor %1398, %arg164 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1400 = torch.aten._softmax_backward_data %1399, %arg144, %int-1, %int6 : !torch.tensor, !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%1401 = torch.aten.div.Tensor %1400, %1 : !torch.tensor, !torch.tensor<[],f64> -> !torch.tensor | |
%1402 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1403 = torch.aten.view %1401, %1402 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1404 = torch.aten.bmm %arg183, %1403 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1405 = torch.aten.bmm %1403, %arg300 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1406 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1407 = torch.aten.view %1404, %1406 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1408 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1409 = torch.aten.view %1405, %1408 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1410 = torch.aten.transpose.int %1407, %int-1, %int-2 : !torch.tensor, !torch.int, !torch.int -> !torch.tensor | |
%1411 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1412 = torch.aten.permute %1409, %1411 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1413 = torch.operator "aten.clone"(%1412, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor | |
%1414 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1415 = torch.operator "aten._unsafe_view"(%1413, %1414) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor | |
%1416 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1417 = torch.aten.permute %1396, %1416 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1418 = torch.operator "aten.clone"(%1417, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor | |
%1419 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1420 = torch.operator "aten._unsafe_view"(%1418, %1419) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor | |
%1421 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1422 = torch.aten.sum.dim_IntList %1420, %1421, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%1423 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%1424 = torch.aten.view %1422, %1423 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1425 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1426 = torch.aten.view %1420, %1425 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1427 = torch.aten.t %1426 : !torch.tensor -> !torch.tensor | |
%1428 = torch.aten.mm %1427, %arg322 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1429 = torch.aten.mm %1426, %arg249 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1430 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1431 = torch.aten.view %1429, %1430 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1432 = torch.aten.add.Tensor %1373#0, %1431, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1433 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1434 = torch.aten.permute %1410, %1433 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1435 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1436 = torch.aten.view %1434, %1435 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1437 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1438 = torch.aten.sum.dim_IntList %1436, %1437, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%1439 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%1440 = torch.aten.view %1438, %1439 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1441 = torch.operator "aten.clone"(%1436, %int0) : (!torch.tensor, !torch.int) -> !torch.tensor | |
%1442 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1443 = torch.operator "aten._unsafe_view"(%1441, %1442) : (!torch.tensor, !torch.list<!torch.int>) -> !torch.tensor | |
%1444 = torch.aten.t %1443 : !torch.tensor -> !torch.tensor | |
%1445 = torch.aten.mm %1444, %arg78 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1446 = torch.aten.mm %1443, %arg293 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1447 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1448 = torch.aten.view %1446, %1447 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1449 = torch.aten.add.Tensor %1432, %1448, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1450 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1451 = torch.aten.sum.dim_IntList %1415, %1450, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%1452 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%1453 = torch.aten.view %1451, %1452 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1454 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1455 = torch.aten.view %1415, %1454 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1456 = torch.aten.t %1455 : !torch.tensor -> !torch.tensor | |
%1457 = torch.aten.mm %1456, %arg245 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1458 = torch.aten.mm %1455, %arg355 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1459 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<!torch.int> | |
%1460 = torch.aten.view %1458, %1459 : !torch.tensor, !torch.list<!torch.int> -> !torch.tensor | |
%1461 = torch.aten.add.Tensor %1449, %1460, %int1 : !torch.tensor, !torch.tensor, !torch.int -> !torch.tensor | |
%1462 = torch.aten.mul.Tensor %1461, %arg139 : !torch.tensor, !torch.tensor -> !torch.tensor | |
%1463 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<!torch.int> | |
%1464 = torch.prim.ListConstruct %true, %true, %true : (!torch.bool, !torch.bool, !torch.bool) -> !torch.list<!torch.bool> | |
%1465:3 = torch.operator "aten.native_layer_norm_backward"(%1462, %arg326, %1463, %arg184, %arg278, %arg210, %arg65, %1464) : (!torch.tensor, !torch.tensor, !torch.list<!torch.int>, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.list<!torch.bool>) -> (!torch.tensor, !torch.tensor, !torch.tensor) | |
%1466 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<!torch.int> | |
%1467 = torch.aten.sum.dim_IntList %1465#0, %1466, %true, %none : !torch.tensor, !torch.list<!torch.int>, !torch.bool, !torch.none -> !torch.tensor | |
%1468 = torch.operator "aten.embedding_dense_backward"(%1467, %arg121, %int512, %int-1, %false) : (!torch.tensor, !torch.tensor, !torch.int, !torch.int, !torch.bool) -> !torch.tensor | |
%1469 = torch.operator "aten.embedding_dense_backward"(%1465#0, %arg50, %int2, %int-1, %false) : (!torch.tensor, !torch.tensor, !torch.int, !torch.int, !torch.bool) -> !torch.tensor | |
%1470 = torch.operator "aten.embedding_dense_backward"(%1465#0, %arg305, %int30522, %int0, %false) : (!torch.tensor, !torch.tensor, !torch.int, !torch.int, !torch.bool) -> !torch.tensor | |
%1471 = torch.prim.ListConstruct %1465#2, %1465#1, %1468, %1469, %1470, %1373#2, %1373#1, %1378, %1382, %1440, %1445, %1453, %1457, %1424, %1428, %1362, %1366, %1345#2, %1345#1, %1350, %1354, %1254#2, %1254#1, %1259, %1263, %1321, %1326, %1334, %1338, %1305, %1309, %1243, %1247, %1226#2, %1226#1, %1231, %1235, %183#2, %183#1, %188, %192, %250, %255, %263, %267, %234, %238, %172, %176, %155#2, %155#1, %160, %164, %64#2, %64#1, %69, %73, %131, %136, %144, %148, %115, %119, %53, %57, %36#2, %36#1, %41, %45, %1135#2, %1135#1, %1140, %1144, %1202, %1207, %1215, %1219, %1186, %1190, %1124, %1128, %1107#2, %1107#1, %1112, %1116, %1016#2, %1016#1, %1021, %1025, %1083, %1088, %1096, %1100, %1067, %1071, %1005, %1009, %988#2, %988#1, %993, %997, %897#2, %897#1, %902, %906, %964, %969, %977, %981, %948, %952, %886, %890, %869#2, %869#1, %874, %878, %778#2, %778#1, %783, %787, %845, %850, %858, %862, %829, %833, %767, %771, %750#2, %750#1, %755, %759, %659#2, %659#1, %664, %668, %726, %731, %739, %743, %710, %714, %648, %652, %631#2, %631#1, %636, %640, %540#2, %540#1, %545, %549, %607, %612, %620, %624, %591, %595, %529, %533, %512#2, %512#1, %517, %521, %421#2, %421#1, %426, %430, %488, %493, %501, %505, %472, %476, %410, %414, %393#2, %393#1, %398, %402, %302#2, %302#1, %307, %311, %369, %374, %382, %386, %353, %357, %291, %295, %274#2, %274#1, %279, %283, %none, %11, %15, %21#2, %21#1, %26, %30, %none, %none, %none, %none : (!torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.none, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.tensor, !torch.none, !torch.none, !torch.none, !torch.none) -> !torch.list<!torch.optional<!torch.tensor>> | |
return %1471 : !torch.list<!torch.optional<!torch.tensor>> | |
} | |
torch.class_type @__torch__.torch.fx.graph_module.___torch_mangle_2.GraphModule { | |
torch.method "forward", @__torch__.torch.fx.graph_module.___torch_mangle_2.GraphModule.forward | |
} | |
%0 = torch.nn_module { | |
} : !torch.nn.Module<"__torch__.torch.fx.graph_module.___torch_mangle_2.GraphModule"> | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment