Created
April 6, 2022 12:00
-
-
Save pashu123/4d6c2895926c886e64293be5195784ec to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module attributes {torch.debug_module_name = "GraphModule"} { | |
func @forward(%arg0: !torch.vtensor<[768],f32>, %arg1: !torch.vtensor<[768],f32>, %arg2: !torch.vtensor<[768],f32>, %arg3: !torch.vtensor<[768],f32>, %arg4: !torch.vtensor<[768],f32>, %arg5: !torch.vtensor<[768],f32>, %arg6: !torch.vtensor<[768],f32>, %arg7: !torch.vtensor<[768],f32>, %arg8: !torch.vtensor<[768],f32>, %arg9: !torch.vtensor<[768],f32>, %arg10: !torch.vtensor<[768],f32>, %arg11: !torch.vtensor<[768],f32>, %arg12: !torch.vtensor<[768],f32>, %arg13: !torch.vtensor<[768],f32>, %arg14: !torch.vtensor<[768],f32>, %arg15: !torch.vtensor<[768],f32>, %arg16: !torch.vtensor<[768],f32>, %arg17: !torch.vtensor<[768],f32>, %arg18: !torch.vtensor<[768],f32>, %arg19: !torch.vtensor<[768],f32>, %arg20: !torch.vtensor<[768],f32>, %arg21: !torch.vtensor<[768],f32>, %arg22: !torch.vtensor<[768],f32>, %arg23: !torch.vtensor<[768],f32>, %arg24: !torch.vtensor<[768],f32>, %arg25: !torch.vtensor<[768],f32>, %arg26: !torch.vtensor<[768],f32>, %arg27: !torch.vtensor<[768],f32>, %arg28: !torch.vtensor<[768],f32>, %arg29: !torch.vtensor<[768],f32>, %arg30: !torch.vtensor<[768],f32>, %arg31: !torch.vtensor<[768],f32>, %arg32: !torch.vtensor<[768],f32>, %arg33: !torch.vtensor<[768],f32>, %arg34: !torch.vtensor<[768],f32>, %arg35: !torch.vtensor<[768],f32>, %arg36: !torch.vtensor<[768],f32>, %arg37: !torch.vtensor<[768],f32>, %arg38: !torch.vtensor<[768],f32>, %arg39: !torch.vtensor<[768],f32>, %arg40: !torch.vtensor<[768],f32>, %arg41: !torch.vtensor<[768],f32>, %arg42: !torch.vtensor<[768],f32>, %arg43: !torch.vtensor<[768],f32>, %arg44: !torch.vtensor<[768],f32>, %arg45: !torch.vtensor<[768],f32>, %arg46: !torch.vtensor<[768],f32>, %arg47: !torch.vtensor<[768],f32>, %arg48: !torch.vtensor<[768],f32>, %arg49: !torch.vtensor<[768],f32>, %arg50: !torch.vtensor<[768],f32>, %arg51: !torch.vtensor<[768],f32>, %arg52: !torch.vtensor<[4,512],si64>, %arg53: !torch.vtensor<[4,512],si64>, %arg54: !torch.vtensor<[1,512],si64>, %arg55: !torch.vtensor<[4,512,768],f32>, %arg56: !torch.vtensor<[4,512,768],f32>, %arg57: !torch.vtensor<[768,768],f32>, %arg58: !torch.vtensor<[2048,768],f32>, %arg59: !torch.vtensor<[768,768],f32>, %arg60: !torch.vtensor<[2048,768],f32>, %arg61: !torch.vtensor<[768,768],f32>, %arg62: !torch.vtensor<[2048,768],f32>, %arg63: !torch.vtensor<[48,512,64],f32>, %arg64: !torch.vtensor<[48,64,512],f32>, %arg65: !torch.vtensor<[4,12,512,512],f32>, %arg66: !torch.vtensor<[4,12,512,512],f32>, %arg67: !torch.vtensor<[48,512,512],f32>, %arg68: !torch.vtensor<[48,512,64],f32>, %arg69: !torch.vtensor<[768,768],f32>, %arg70: !torch.vtensor<[2048,768],f32>, %arg71: !torch.vtensor<[4,512,768],f32>, %arg72: !torch.vtensor<[4,512,768],f32>, %arg73: !torch.vtensor<[768,3072],f32>, %arg74: !torch.vtensor<[2048,768],f32>, %arg75: !torch.vtensor<[4,512,3072],f32>, %arg76: !torch.vtensor<[3072,768],f32>, %arg77: !torch.vtensor<[2048,3072],f32>, %arg78: !torch.vtensor<[4,512,768],f32>, %arg79: !torch.vtensor<[4,512,768],f32>, %arg80: !torch.vtensor<[768,768],f32>, %arg81: !torch.vtensor<[2048,768],f32>, %arg82: !torch.vtensor<[768,768],f32>, %arg83: !torch.vtensor<[2048,768],f32>, %arg84: !torch.vtensor<[768,768],f32>, %arg85: !torch.vtensor<[2048,768],f32>, %arg86: !torch.vtensor<[48,512,64],f32>, %arg87: !torch.vtensor<[48,64,512],f32>, %arg88: !torch.vtensor<[4,12,512,512],f32>, %arg89: !torch.vtensor<[4,12,512,512],f32>, %arg90: !torch.vtensor<[48,512,512],f32>, %arg91: !torch.vtensor<[48,512,64],f32>, %arg92: !torch.vtensor<[768,768],f32>, %arg93: !torch.vtensor<[2048,768],f32>, %arg94: !torch.vtensor<[4,512,768],f32>, %arg95: !torch.vtensor<[4,512,768],f32>, %arg96: !torch.vtensor<[768,3072],f32>, %arg97: !torch.vtensor<[2048,768],f32>, %arg98: !torch.vtensor<[4,512,3072],f32>, %arg99: !torch.vtensor<[3072,768],f32>, %arg100: !torch.vtensor<[2048,3072],f32>, %arg101: !torch.vtensor<[4,512,768],f32>, %arg102: !torch.vtensor<[4,512,768],f32>, %arg103: !torch.vtensor<[768,768],f32>, %arg104: !torch.vtensor<[2048,768],f32>, %arg105: !torch.vtensor<[768,768],f32>, %arg106: !torch.vtensor<[2048,768],f32>, %arg107: !torch.vtensor<[768,768],f32>, %arg108: !torch.vtensor<[2048,768],f32>, %arg109: !torch.vtensor<[48,512,64],f32>, %arg110: !torch.vtensor<[48,64,512],f32>, %arg111: !torch.vtensor<[4,12,512,512],f32>, %arg112: !torch.vtensor<[4,12,512,512],f32>, %arg113: !torch.vtensor<[48,512,512],f32>, %arg114: !torch.vtensor<[48,512,64],f32>, %arg115: !torch.vtensor<[768,768],f32>, %arg116: !torch.vtensor<[2048,768],f32>, %arg117: !torch.vtensor<[4,512,768],f32>, %arg118: !torch.vtensor<[4,512,768],f32>, %arg119: !torch.vtensor<[768,3072],f32>, %arg120: !torch.vtensor<[2048,768],f32>, %arg121: !torch.vtensor<[4,512,3072],f32>, %arg122: !torch.vtensor<[3072,768],f32>, %arg123: !torch.vtensor<[2048,3072],f32>, %arg124: !torch.vtensor<[4,512,768],f32>, %arg125: !torch.vtensor<[4,512,768],f32>, %arg126: !torch.vtensor<[768,768],f32>, %arg127: !torch.vtensor<[2048,768],f32>, %arg128: !torch.vtensor<[768,768],f32>, %arg129: !torch.vtensor<[2048,768],f32>, %arg130: !torch.vtensor<[768,768],f32>, %arg131: !torch.vtensor<[2048,768],f32>, %arg132: !torch.vtensor<[48,512,64],f32>, %arg133: !torch.vtensor<[48,64,512],f32>, %arg134: !torch.vtensor<[4,12,512,512],f32>, %arg135: !torch.vtensor<[4,12,512,512],f32>, %arg136: !torch.vtensor<[48,512,512],f32>, %arg137: !torch.vtensor<[48,512,64],f32>, %arg138: !torch.vtensor<[768,768],f32>, %arg139: !torch.vtensor<[2048,768],f32>, %arg140: !torch.vtensor<[4,512,768],f32>, %arg141: !torch.vtensor<[4,512,768],f32>, %arg142: !torch.vtensor<[768,3072],f32>, %arg143: !torch.vtensor<[2048,768],f32>, %arg144: !torch.vtensor<[4,512,3072],f32>, %arg145: !torch.vtensor<[3072,768],f32>, %arg146: !torch.vtensor<[2048,3072],f32>, %arg147: !torch.vtensor<[4,512,768],f32>, %arg148: !torch.vtensor<[4,512,768],f32>, %arg149: !torch.vtensor<[768,768],f32>, %arg150: !torch.vtensor<[2048,768],f32>, %arg151: !torch.vtensor<[768,768],f32>, %arg152: !torch.vtensor<[2048,768],f32>, %arg153: !torch.vtensor<[768,768],f32>, %arg154: !torch.vtensor<[2048,768],f32>, %arg155: !torch.vtensor<[48,512,64],f32>, %arg156: !torch.vtensor<[48,64,512],f32>, %arg157: !torch.vtensor<[4,12,512,512],f32>, %arg158: !torch.vtensor<[4,12,512,512],f32>, %arg159: !torch.vtensor<[48,512,512],f32>, %arg160: !torch.vtensor<[48,512,64],f32>, %arg161: !torch.vtensor<[768,768],f32>, %arg162: !torch.vtensor<[2048,768],f32>, %arg163: !torch.vtensor<[4,512,768],f32>, %arg164: !torch.vtensor<[4,512,768],f32>, %arg165: !torch.vtensor<[768,3072],f32>, %arg166: !torch.vtensor<[2048,768],f32>, %arg167: !torch.vtensor<[4,512,3072],f32>, %arg168: !torch.vtensor<[3072,768],f32>, %arg169: !torch.vtensor<[2048,3072],f32>, %arg170: !torch.vtensor<[4,512,768],f32>, %arg171: !torch.vtensor<[4,512,768],f32>, %arg172: !torch.vtensor<[768,768],f32>, %arg173: !torch.vtensor<[2048,768],f32>, %arg174: !torch.vtensor<[768,768],f32>, %arg175: !torch.vtensor<[2048,768],f32>, %arg176: !torch.vtensor<[768,768],f32>, %arg177: !torch.vtensor<[2048,768],f32>, %arg178: !torch.vtensor<[48,512,64],f32>, %arg179: !torch.vtensor<[48,64,512],f32>, %arg180: !torch.vtensor<[4,12,512,512],f32>, %arg181: !torch.vtensor<[4,12,512,512],f32>, %arg182: !torch.vtensor<[48,512,512],f32>, %arg183: !torch.vtensor<[48,512,64],f32>, %arg184: !torch.vtensor<[768,768],f32>, %arg185: !torch.vtensor<[2048,768],f32>, %arg186: !torch.vtensor<[4,512,768],f32>, %arg187: !torch.vtensor<[4,512,768],f32>, %arg188: !torch.vtensor<[768,3072],f32>, %arg189: !torch.vtensor<[2048,768],f32>, %arg190: !torch.vtensor<[4,512,3072],f32>, %arg191: !torch.vtensor<[3072,768],f32>, %arg192: !torch.vtensor<[2048,3072],f32>, %arg193: !torch.vtensor<[4,512,768],f32>, %arg194: !torch.vtensor<[4,512,768],f32>, %arg195: !torch.vtensor<[768,768],f32>, %arg196: !torch.vtensor<[2048,768],f32>, %arg197: !torch.vtensor<[768,768],f32>, %arg198: !torch.vtensor<[2048,768],f32>, %arg199: !torch.vtensor<[768,768],f32>, %arg200: !torch.vtensor<[2048,768],f32>, %arg201: !torch.vtensor<[48,512,64],f32>, %arg202: !torch.vtensor<[48,64,512],f32>, %arg203: !torch.vtensor<[4,12,512,512],f32>, %arg204: !torch.vtensor<[4,12,512,512],f32>, %arg205: !torch.vtensor<[48,512,512],f32>, %arg206: !torch.vtensor<[48,512,64],f32>, %arg207: !torch.vtensor<[768,768],f32>, %arg208: !torch.vtensor<[2048,768],f32>, %arg209: !torch.vtensor<[4,512,768],f32>, %arg210: !torch.vtensor<[4,512,768],f32>, %arg211: !torch.vtensor<[768,3072],f32>, %arg212: !torch.vtensor<[2048,768],f32>, %arg213: !torch.vtensor<[4,512,3072],f32>, %arg214: !torch.vtensor<[3072,768],f32>, %arg215: !torch.vtensor<[2048,3072],f32>, %arg216: !torch.vtensor<[4,512,768],f32>, %arg217: !torch.vtensor<[4,512,768],f32>, %arg218: !torch.vtensor<[768,768],f32>, %arg219: !torch.vtensor<[2048,768],f32>, %arg220: !torch.vtensor<[768,768],f32>, %arg221: !torch.vtensor<[2048,768],f32>, %arg222: !torch.vtensor<[768,768],f32>, %arg223: !torch.vtensor<[2048,768],f32>, %arg224: !torch.vtensor<[48,512,64],f32>, %arg225: !torch.vtensor<[48,64,512],f32>, %arg226: !torch.vtensor<[4,12,512,512],f32>, %arg227: !torch.vtensor<[4,12,512,512],f32>, %arg228: !torch.vtensor<[48,512,512],f32>, %arg229: !torch.vtensor<[48,512,64],f32>, %arg230: !torch.vtensor<[768,768],f32>, %arg231: !torch.vtensor<[2048,768],f32>, %arg232: !torch.vtensor<[4,512,768],f32>, %arg233: !torch.vtensor<[4,512,768],f32>, %arg234: !torch.vtensor<[768,3072],f32>, %arg235: !torch.vtensor<[2048,768],f32>, %arg236: !torch.vtensor<[4,512,3072],f32>, %arg237: !torch.vtensor<[3072,768],f32>, %arg238: !torch.vtensor<[2048,3072],f32>, %arg239: !torch.vtensor<[4,512,768],f32>, %arg240: !torch.vtensor<[4,512,768],f32>, %arg241: !torch.vtensor<[768,768],f32>, %arg242: !torch.vtensor<[2048,768],f32>, %arg243: !torch.vtensor<[768,768],f32>, %arg244: !torch.vtensor<[2048,768],f32>, %arg245: !torch.vtensor<[768,768],f32>, %arg246: !torch.vtensor<[2048,768],f32>, %arg247: !torch.vtensor<[48,512,64],f32>, %arg248: !torch.vtensor<[48,64,512],f32>, %arg249: !torch.vtensor<[4,12,512,512],f32>, %arg250: !torch.vtensor<[4,12,512,512],f32>, %arg251: !torch.vtensor<[48,512,512],f32>, %arg252: !torch.vtensor<[48,512,64],f32>, %arg253: !torch.vtensor<[768,768],f32>, %arg254: !torch.vtensor<[2048,768],f32>, %arg255: !torch.vtensor<[4,512,768],f32>, %arg256: !torch.vtensor<[4,512,768],f32>, %arg257: !torch.vtensor<[768,3072],f32>, %arg258: !torch.vtensor<[2048,768],f32>, %arg259: !torch.vtensor<[4,512,3072],f32>, %arg260: !torch.vtensor<[3072,768],f32>, %arg261: !torch.vtensor<[2048,3072],f32>, %arg262: !torch.vtensor<[4,512,768],f32>, %arg263: !torch.vtensor<[4,512,768],f32>, %arg264: !torch.vtensor<[768,768],f32>, %arg265: !torch.vtensor<[2048,768],f32>, %arg266: !torch.vtensor<[768,768],f32>, %arg267: !torch.vtensor<[2048,768],f32>, %arg268: !torch.vtensor<[768,768],f32>, %arg269: !torch.vtensor<[2048,768],f32>, %arg270: !torch.vtensor<[48,512,64],f32>, %arg271: !torch.vtensor<[48,64,512],f32>, %arg272: !torch.vtensor<[4,12,512,512],f32>, %arg273: !torch.vtensor<[4,12,512,512],f32>, %arg274: !torch.vtensor<[48,512,512],f32>, %arg275: !torch.vtensor<[48,512,64],f32>, %arg276: !torch.vtensor<[768,768],f32>, %arg277: !torch.vtensor<[2048,768],f32>, %arg278: !torch.vtensor<[4,512,768],f32>, %arg279: !torch.vtensor<[4,512,768],f32>, %arg280: !torch.vtensor<[768,3072],f32>, %arg281: !torch.vtensor<[2048,768],f32>, %arg282: !torch.vtensor<[4,512,3072],f32>, %arg283: !torch.vtensor<[3072,768],f32>, %arg284: !torch.vtensor<[2048,3072],f32>, %arg285: !torch.vtensor<[4,512,768],f32>, %arg286: !torch.vtensor<[4,512,768],f32>, %arg287: !torch.vtensor<[768,768],f32>, %arg288: !torch.vtensor<[2048,768],f32>, %arg289: !torch.vtensor<[768,768],f32>, %arg290: !torch.vtensor<[2048,768],f32>, %arg291: !torch.vtensor<[768,768],f32>, %arg292: !torch.vtensor<[2048,768],f32>, %arg293: !torch.vtensor<[48,512,64],f32>, %arg294: !torch.vtensor<[48,64,512],f32>, %arg295: !torch.vtensor<[4,12,512,512],f32>, %arg296: !torch.vtensor<[4,12,512,512],f32>, %arg297: !torch.vtensor<[48,512,512],f32>, %arg298: !torch.vtensor<[48,512,64],f32>, %arg299: !torch.vtensor<[768,768],f32>, %arg300: !torch.vtensor<[2048,768],f32>, %arg301: !torch.vtensor<[4,512,768],f32>, %arg302: !torch.vtensor<[4,512,768],f32>, %arg303: !torch.vtensor<[768,3072],f32>, %arg304: !torch.vtensor<[2048,768],f32>, %arg305: !torch.vtensor<[4,512,3072],f32>, %arg306: !torch.vtensor<[3072,768],f32>, %arg307: !torch.vtensor<[2048,3072],f32>, %arg308: !torch.vtensor<[4,512,768],f32>, %arg309: !torch.vtensor<[4,512,768],f32>, %arg310: !torch.vtensor<[768,768],f32>, %arg311: !torch.vtensor<[2048,768],f32>, %arg312: !torch.vtensor<[768,768],f32>, %arg313: !torch.vtensor<[2048,768],f32>, %arg314: !torch.vtensor<[768,768],f32>, %arg315: !torch.vtensor<[2048,768],f32>, %arg316: !torch.vtensor<[48,512,64],f32>, %arg317: !torch.vtensor<[48,64,512],f32>, %arg318: !torch.vtensor<[4,12,512,512],f32>, %arg319: !torch.vtensor<[4,12,512,512],f32>, %arg320: !torch.vtensor<[48,512,512],f32>, %arg321: !torch.vtensor<[48,512,64],f32>, %arg322: !torch.vtensor<[768,768],f32>, %arg323: !torch.vtensor<[2048,768],f32>, %arg324: !torch.vtensor<[4,512,768],f32>, %arg325: !torch.vtensor<[4,512,768],f32>, %arg326: !torch.vtensor<[768,3072],f32>, %arg327: !torch.vtensor<[2048,768],f32>, %arg328: !torch.vtensor<[4,512,3072],f32>, %arg329: !torch.vtensor<[3072,768],f32>, %arg330: !torch.vtensor<[2048,3072],f32>, %arg331: !torch.vtensor<[4,512,768],f32>, %arg332: !torch.vtensor<[4,512,768],f32>, %arg333: !torch.vtensor<[768,768],f32>, %arg334: !torch.vtensor<[2048,768],f32>, %arg335: !torch.vtensor<[4,512,768],f32>, %arg336: !torch.vtensor<[4,512,768],f32>, %arg337: !torch.vtensor<[768,30522],f32>, %arg338: !torch.vtensor<[2048,768],f32>, %arg339: !torch.vtensor<[2048],si64>, %arg340: !torch.vtensor<[2048,30522],f32>, %arg341: !torch.vtensor<[],f32>, %arg342: !torch.vtensor<[4,512,30522],f32>) -> (!torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[512,768],f32>, !torch.vtensor<[2,768],f32>, !torch.vtensor<[30522,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[3072],f32>, !torch.vtensor<[3072,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,3072],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[3072],f32>, !torch.vtensor<[3072,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,3072],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[3072],f32>, !torch.vtensor<[3072,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,3072],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[3072],f32>, !torch.vtensor<[3072,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,3072],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[3072],f32>, !torch.vtensor<[3072,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,3072],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[3072],f32>, !torch.vtensor<[3072,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,3072],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[3072],f32>, !torch.vtensor<[3072,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,3072],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[3072],f32>, !torch.vtensor<[3072,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,3072],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[3072],f32>, !torch.vtensor<[3072,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,3072],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[3072],f32>, !torch.vtensor<[3072,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,3072],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[3072],f32>, !torch.vtensor<[3072,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,3072],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[3072],f32>, !torch.vtensor<[3072,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,3072],f32>, !torch.vtensor<[1],f32>, !torch.vtensor<[30522],f32>, !torch.vtensor<[30522,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) { | |
%0 = torch.vtensor.literal(dense<768> : tensor<si64>) : !torch.vtensor<[],si64> | |
%1 = torch.vtensor.literal(dense<8.000000e+00> : tensor<f64>) : !torch.vtensor<[],f64> | |
%2 = torch.vtensor.literal(dense<0.000000e+00> : tensor<1xf32>) : !torch.vtensor<[1],f32> | |
%int-2 = torch.constant.int -2 | |
%int-1 = torch.constant.int -1 | |
%int3072 = torch.constant.int 3072 | |
%int2048 = torch.constant.int 2048 | |
%int30522 = torch.constant.int 30522 | |
%int512 = torch.constant.int 512 | |
%int-100 = torch.constant.int -100 | |
%float9.999990e-13 = torch.constant.float 9.9999999999999998E-13 | |
%int768 = torch.constant.int 768 | |
%none = torch.constant.none | |
%true = torch.constant.bool true | |
%false = torch.constant.bool false | |
%str = torch.constant.str "none" | |
%int0 = torch.constant.int 0 | |
%int1 = torch.constant.int 1 | |
%int2 = torch.constant.int 2 | |
%int3 = torch.constant.int 3 | |
%int4 = torch.constant.int 4 | |
%int6 = torch.constant.int 6 | |
%int12 = torch.constant.int 12 | |
%int48 = torch.constant.int 48 | |
%int64 = torch.constant.int 64 | |
%cpu = torch.constant.device "cpu" | |
%3 = torch.prim.ListConstruct %int768 : (!torch.int) -> !torch.list<int> | |
%result0, %result1, %result2 = torch.aten.native_layer_norm %arg55, %3, %arg1, %arg0, %float9.999990e-13 : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[4,512,1],f32> | |
%result0_0, %result1_1, %result2_2 = torch.aten.native_layer_norm %arg72, %3, %arg3, %arg2, %float9.999990e-13 : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[4,512,1],f32> | |
%result0_3, %result1_4, %result2_5 = torch.aten.native_layer_norm %arg79, %3, %arg5, %arg4, %float9.999990e-13 : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[4,512,1],f32> | |
%result0_6, %result1_7, %result2_8 = torch.aten.native_layer_norm %arg95, %3, %arg7, %arg6, %float9.999990e-13 : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[4,512,1],f32> | |
%result0_9, %result1_10, %result2_11 = torch.aten.native_layer_norm %arg102, %3, %arg9, %arg8, %float9.999990e-13 : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[4,512,1],f32> | |
%result0_12, %result1_13, %result2_14 = torch.aten.native_layer_norm %arg118, %3, %arg19, %arg18, %float9.999990e-13 : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[4,512,1],f32> | |
%result0_15, %result1_16, %result2_17 = torch.aten.native_layer_norm %arg125, %3, %arg21, %arg20, %float9.999990e-13 : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[4,512,1],f32> | |
%result0_18, %result1_19, %result2_20 = torch.aten.native_layer_norm %arg141, %3, %arg23, %arg22, %float9.999990e-13 : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[4,512,1],f32> | |
%result0_21, %result1_22, %result2_23 = torch.aten.native_layer_norm %arg148, %3, %arg25, %arg24, %float9.999990e-13 : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[4,512,1],f32> | |
%result0_24, %result1_25, %result2_26 = torch.aten.native_layer_norm %arg164, %3, %arg27, %arg26, %float9.999990e-13 : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[4,512,1],f32> | |
%result0_27, %result1_28, %result2_29 = torch.aten.native_layer_norm %arg171, %3, %arg29, %arg28, %float9.999990e-13 : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[4,512,1],f32> | |
%result0_30, %result1_31, %result2_32 = torch.aten.native_layer_norm %arg187, %3, %arg31, %arg30, %float9.999990e-13 : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[4,512,1],f32> | |
%result0_33, %result1_34, %result2_35 = torch.aten.native_layer_norm %arg194, %3, %arg33, %arg32, %float9.999990e-13 : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[4,512,1],f32> | |
%result0_36, %result1_37, %result2_38 = torch.aten.native_layer_norm %arg210, %3, %arg35, %arg34, %float9.999990e-13 : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[4,512,1],f32> | |
%result0_39, %result1_40, %result2_41 = torch.aten.native_layer_norm %arg217, %3, %arg37, %arg36, %float9.999990e-13 : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[4,512,1],f32> | |
%result0_42, %result1_43, %result2_44 = torch.aten.native_layer_norm %arg233, %3, %arg39, %arg38, %float9.999990e-13 : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[4,512,1],f32> | |
%result0_45, %result1_46, %result2_47 = torch.aten.native_layer_norm %arg240, %3, %arg41, %arg40, %float9.999990e-13 : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[4,512,1],f32> | |
%result0_48, %result1_49, %result2_50 = torch.aten.native_layer_norm %arg256, %3, %arg43, %arg42, %float9.999990e-13 : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[4,512,1],f32> | |
%result0_51, %result1_52, %result2_53 = torch.aten.native_layer_norm %arg263, %3, %arg45, %arg44, %float9.999990e-13 : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[4,512,1],f32> | |
%result0_54, %result1_55, %result2_56 = torch.aten.native_layer_norm %arg279, %3, %arg47, %arg46, %float9.999990e-13 : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[4,512,1],f32> | |
%result0_57, %result1_58, %result2_59 = torch.aten.native_layer_norm %arg286, %3, %arg49, %arg48, %float9.999990e-13 : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[4,512,1],f32> | |
%result0_60, %result1_61, %result2_62 = torch.aten.native_layer_norm %arg302, %3, %arg11, %arg10, %float9.999990e-13 : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[4,512,1],f32> | |
%result0_63, %result1_64, %result2_65 = torch.aten.native_layer_norm %arg309, %3, %arg13, %arg12, %float9.999990e-13 : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[4,512,1],f32> | |
%result0_66, %result1_67, %result2_68 = torch.aten.native_layer_norm %arg325, %3, %arg15, %arg14, %float9.999990e-13 : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[4,512,1],f32> | |
%result0_69, %result1_70, %result2_71 = torch.aten.native_layer_norm %arg332, %3, %arg17, %arg16, %float9.999990e-13 : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[4,512,1],f32> | |
%result0_72, %result1_73, %result2_74 = torch.aten.native_layer_norm %arg336, %3, %arg51, %arg50, %float9.999990e-13 : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.float -> !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[4,512,1],f32> | |
%output, %total_weight = torch.aten.nll_loss_forward %arg340, %arg339, %none, %int1, %int-100 : !torch.vtensor<[2048,30522],f32>, !torch.vtensor<[2048],si64>, !torch.none, !torch.int, !torch.int -> !torch.vtensor<[],f32>, !torch.vtensor<[],f32> | |
%4 = torch.aten.nll_loss_backward %arg341, %arg340, %arg339, %none, %int1, %int-100, %total_weight : !torch.vtensor<[],f32>, !torch.vtensor<[2048,30522],f32>, !torch.vtensor<[2048],si64>, !torch.none, !torch.int, !torch.int, !torch.vtensor<[],f32> -> !torch.vtensor<[2048,30522],f32> | |
%5 = torch.aten.exp %arg340 : !torch.vtensor<[2048,30522],f32> -> !torch.vtensor<[2048,30522],f32> | |
%6 = torch.prim.ListConstruct %int1 : (!torch.int) -> !torch.list<int> | |
%true_75 = torch.constant.bool true | |
%none_76 = torch.constant.none | |
%7 = torch.aten.sum.dim_IntList %4, %6, %true_75, %none_76 : !torch.vtensor<[2048,30522],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2048,1],f32> | |
%int0_77 = torch.constant.int 0 | |
%8 = torch.aten.size.int %4, %int0_77 : !torch.vtensor<[2048,30522],f32>, !torch.int -> !torch.int | |
%int1_78 = torch.constant.int 1 | |
%9 = torch.aten.size.int %4, %int1_78 : !torch.vtensor<[2048,30522],f32>, !torch.int -> !torch.int | |
%10 = torch.prim.ListConstruct %8, %9 : (!torch.int, !torch.int) -> !torch.list<int> | |
%11 = torch.aten.broadcast_to %7, %10 : !torch.vtensor<[2048,1],f32>, !torch.list<int> -> !torch.vtensor<[2048,30522],f32> | |
%12 = torch.aten.mul.Tensor %5, %11 : !torch.vtensor<[2048,30522],f32>, !torch.vtensor<[2048,30522],f32> -> !torch.vtensor<[2048,30522],f32> | |
%float1.000000e00 = torch.constant.float 1.000000e+00 | |
%13 = torch.aten.sub.Tensor %4, %12, %float1.000000e00 : !torch.vtensor<[2048,30522],f32>, !torch.vtensor<[2048,30522],f32>, !torch.float -> !torch.vtensor<[2048,30522],f32> | |
%14 = torch.prim.ListConstruct %int4, %int512, %int30522 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%15 = torch.aten.view %13, %14 : !torch.vtensor<[2048,30522],f32>, !torch.list<int> -> !torch.vtensor<[4,512,30522],f32> | |
%16 = torch.aten.add.Tensor %arg342, %15, %int1 : !torch.vtensor<[4,512,30522],f32>, !torch.vtensor<[4,512,30522],f32>, !torch.int -> !torch.vtensor<[4,512,30522],f32> | |
%17 = torch.prim.ListConstruct %int2048, %int30522 : (!torch.int, !torch.int) -> !torch.list<int> | |
%18 = torch.aten.view %16, %17 : !torch.vtensor<[4,512,30522],f32>, !torch.list<int> -> !torch.vtensor<[2048,30522],f32> | |
%int0_79 = torch.constant.int 0 | |
%int1_80 = torch.constant.int 1 | |
%19 = torch.aten.transpose.int %arg337, %int0_79, %int1_80 : !torch.vtensor<[768,30522],f32>, !torch.int, !torch.int -> !torch.vtensor<[30522,768],f32> | |
%20 = torch.aten.mm %18, %19 : !torch.vtensor<[2048,30522],f32>, !torch.vtensor<[30522,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_81 = torch.constant.int 0 | |
%int1_82 = torch.constant.int 1 | |
%21 = torch.aten.transpose.int %18, %int0_81, %int1_82 : !torch.vtensor<[2048,30522],f32>, !torch.int, !torch.int -> !torch.vtensor<[30522,2048],f32> | |
%22 = torch.aten.mm %21, %arg338 : !torch.vtensor<[30522,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[30522,768],f32> | |
%int0_83 = torch.constant.int 0 | |
%int1_84 = torch.constant.int 1 | |
%23 = torch.aten.transpose.int %22, %int0_83, %int1_84 : !torch.vtensor<[30522,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,30522],f32> | |
%24 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%25 = torch.aten.sum.dim_IntList %18, %24, %true, %none : !torch.vtensor<[2048,30522],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,30522],f32> | |
%26 = torch.prim.ListConstruct %int30522 : (!torch.int) -> !torch.list<int> | |
%27 = torch.aten.view %25, %26 : !torch.vtensor<[1,30522],f32>, !torch.list<int> -> !torch.vtensor<[30522],f32> | |
%28 = torch.prim.ListConstruct %int4, %int512, %int768 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%29 = torch.aten.view %20, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%int0_85 = torch.constant.int 0 | |
%int1_86 = torch.constant.int 1 | |
%30 = torch.aten.transpose.int %23, %int0_85, %int1_86 : !torch.vtensor<[768,30522],f32>, !torch.int, !torch.int -> !torch.vtensor<[30522,768],f32> | |
%31 = torch.aten.sub.Tensor %arg336, %result1_73, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%32 = torch.aten.mul.Tensor %31, %result2_74 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32> -> !torch.vtensor<[4,512,768],f32> | |
%33 = torch.aten.mul.Tensor %29, %arg51 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%34 = torch.aten.mul.Tensor %33, %0 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[4,512,768],f32> | |
%35 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> | |
%36 = torch.aten.sum.dim_IntList %33, %35, %true, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,512,1],f32> | |
%37 = torch.aten.mul.Tensor %33, %32 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%38 = torch.aten.sum.dim_IntList %37, %35, %true, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,512,1],f32> | |
%39 = torch.aten.mul.Tensor %32, %38 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32> -> !torch.vtensor<[4,512,768],f32> | |
%40 = torch.aten.sub.Tensor %34, %36, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%41 = torch.aten.sub.Tensor %40, %39, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%42 = torch.aten.div.Tensor %result2_74, %0 : !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[4,512,1],f32> | |
%43 = torch.aten.mul.Tensor %42, %41 : !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%44 = torch.aten.mul.Tensor %29, %32 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%45 = torch.prim.ListConstruct %int0, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%46 = torch.aten.sum.dim_IntList %44, %45, %false, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[768],f32> | |
%47 = torch.aten.sum.dim_IntList %29, %45, %false, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[768],f32> | |
%48 = torch.aten.gelu_backward %43, %arg335, %str : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.str -> !torch.vtensor<[4,512,768],f32> | |
%49 = torch.prim.ListConstruct %int2048, %int768 : (!torch.int, !torch.int) -> !torch.list<int> | |
%50 = torch.aten.view %48, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_87 = torch.constant.int 0 | |
%int1_88 = torch.constant.int 1 | |
%51 = torch.aten.transpose.int %arg333, %int0_87, %int1_88 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%52 = torch.aten.mm %50, %51 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_89 = torch.constant.int 0 | |
%int1_90 = torch.constant.int 1 | |
%53 = torch.aten.transpose.int %50, %int0_89, %int1_90 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%54 = torch.aten.mm %53, %arg334 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[768,768],f32> | |
%int0_91 = torch.constant.int 0 | |
%int1_92 = torch.constant.int 1 | |
%55 = torch.aten.transpose.int %54, %int0_91, %int1_92 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%56 = torch.aten.sum.dim_IntList %50, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%57 = torch.aten.view %56, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%58 = torch.aten.view %52, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%int0_93 = torch.constant.int 0 | |
%int1_94 = torch.constant.int 1 | |
%59 = torch.aten.transpose.int %55, %int0_93, %int1_94 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%60 = torch.aten.sub.Tensor %arg332, %result1_70, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%61 = torch.aten.mul.Tensor %60, %result2_71 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32> -> !torch.vtensor<[4,512,768],f32> | |
%62 = torch.aten.mul.Tensor %58, %arg17 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%63 = torch.aten.mul.Tensor %62, %0 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[4,512,768],f32> | |
%64 = torch.aten.sum.dim_IntList %62, %35, %true, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,512,1],f32> | |
%65 = torch.aten.mul.Tensor %62, %61 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%66 = torch.aten.sum.dim_IntList %65, %35, %true, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,512,1],f32> | |
%67 = torch.aten.mul.Tensor %61, %66 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32> -> !torch.vtensor<[4,512,768],f32> | |
%68 = torch.aten.sub.Tensor %63, %64, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%69 = torch.aten.sub.Tensor %68, %67, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%70 = torch.aten.div.Tensor %result2_71, %0 : !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[4,512,1],f32> | |
%71 = torch.aten.mul.Tensor %70, %69 : !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%72 = torch.aten.mul.Tensor %58, %61 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%73 = torch.aten.sum.dim_IntList %72, %45, %false, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[768],f32> | |
%74 = torch.aten.sum.dim_IntList %58, %45, %false, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[768],f32> | |
%75 = torch.aten.mul.Tensor %71, %arg331 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%76 = torch.aten.view %75, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_95 = torch.constant.int 0 | |
%int1_96 = torch.constant.int 1 | |
%77 = torch.aten.transpose.int %arg329, %int0_95, %int1_96 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32> | |
%78 = torch.aten.mm %76, %77 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,3072],f32> -> !torch.vtensor<[2048,3072],f32> | |
%int0_97 = torch.constant.int 0 | |
%int1_98 = torch.constant.int 1 | |
%79 = torch.aten.transpose.int %76, %int0_97, %int1_98 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%80 = torch.aten.mm %79, %arg330 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,3072],f32> -> !torch.vtensor<[768,3072],f32> | |
%int0_99 = torch.constant.int 0 | |
%int1_100 = torch.constant.int 1 | |
%81 = torch.aten.transpose.int %80, %int0_99, %int1_100 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32> | |
%82 = torch.aten.sum.dim_IntList %76, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%83 = torch.aten.view %82, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%84 = torch.prim.ListConstruct %int4, %int512, %int3072 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%85 = torch.aten.view %78, %84 : !torch.vtensor<[2048,3072],f32>, !torch.list<int> -> !torch.vtensor<[4,512,3072],f32> | |
%int0_101 = torch.constant.int 0 | |
%int1_102 = torch.constant.int 1 | |
%86 = torch.aten.transpose.int %81, %int0_101, %int1_102 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32> | |
%87 = torch.aten.gelu_backward %85, %arg328, %str : !torch.vtensor<[4,512,3072],f32>, !torch.vtensor<[4,512,3072],f32>, !torch.str -> !torch.vtensor<[4,512,3072],f32> | |
%88 = torch.prim.ListConstruct %int2048, %int3072 : (!torch.int, !torch.int) -> !torch.list<int> | |
%89 = torch.aten.view %87, %88 : !torch.vtensor<[4,512,3072],f32>, !torch.list<int> -> !torch.vtensor<[2048,3072],f32> | |
%int0_103 = torch.constant.int 0 | |
%int1_104 = torch.constant.int 1 | |
%90 = torch.aten.transpose.int %arg326, %int0_103, %int1_104 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32> | |
%91 = torch.aten.mm %89, %90 : !torch.vtensor<[2048,3072],f32>, !torch.vtensor<[3072,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_105 = torch.constant.int 0 | |
%int1_106 = torch.constant.int 1 | |
%92 = torch.aten.transpose.int %89, %int0_105, %int1_106 : !torch.vtensor<[2048,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,2048],f32> | |
%93 = torch.aten.mm %92, %arg327 : !torch.vtensor<[3072,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[3072,768],f32> | |
%int0_107 = torch.constant.int 0 | |
%int1_108 = torch.constant.int 1 | |
%94 = torch.aten.transpose.int %93, %int0_107, %int1_108 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32> | |
%95 = torch.aten.sum.dim_IntList %89, %24, %true, %none : !torch.vtensor<[2048,3072],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,3072],f32> | |
%96 = torch.prim.ListConstruct %int3072 : (!torch.int) -> !torch.list<int> | |
%97 = torch.aten.view %95, %96 : !torch.vtensor<[1,3072],f32>, !torch.list<int> -> !torch.vtensor<[3072],f32> | |
%98 = torch.aten.view %91, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%99 = torch.aten.add.Tensor %71, %98, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%int0_109 = torch.constant.int 0 | |
%int1_110 = torch.constant.int 1 | |
%100 = torch.aten.transpose.int %94, %int0_109, %int1_110 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32> | |
%101 = torch.aten.sub.Tensor %arg325, %result1_67, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%102 = torch.aten.mul.Tensor %101, %result2_68 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32> -> !torch.vtensor<[4,512,768],f32> | |
%103 = torch.aten.mul.Tensor %99, %arg15 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%104 = torch.aten.mul.Tensor %103, %0 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[4,512,768],f32> | |
%105 = torch.aten.sum.dim_IntList %103, %35, %true, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,512,1],f32> | |
%106 = torch.aten.mul.Tensor %103, %102 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%107 = torch.aten.sum.dim_IntList %106, %35, %true, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,512,1],f32> | |
%108 = torch.aten.mul.Tensor %102, %107 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32> -> !torch.vtensor<[4,512,768],f32> | |
%109 = torch.aten.sub.Tensor %104, %105, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%110 = torch.aten.sub.Tensor %109, %108, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%111 = torch.aten.div.Tensor %result2_68, %0 : !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[4,512,1],f32> | |
%112 = torch.aten.mul.Tensor %111, %110 : !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%113 = torch.aten.mul.Tensor %99, %102 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%114 = torch.aten.sum.dim_IntList %113, %45, %false, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[768],f32> | |
%115 = torch.aten.sum.dim_IntList %99, %45, %false, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[768],f32> | |
%116 = torch.aten.mul.Tensor %112, %arg324 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%117 = torch.aten.view %116, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_111 = torch.constant.int 0 | |
%int1_112 = torch.constant.int 1 | |
%118 = torch.aten.transpose.int %arg322, %int0_111, %int1_112 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%119 = torch.aten.mm %117, %118 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_113 = torch.constant.int 0 | |
%int1_114 = torch.constant.int 1 | |
%120 = torch.aten.transpose.int %117, %int0_113, %int1_114 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%121 = torch.aten.mm %120, %arg323 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[768,768],f32> | |
%int0_115 = torch.constant.int 0 | |
%int1_116 = torch.constant.int 1 | |
%122 = torch.aten.transpose.int %121, %int0_115, %int1_116 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%123 = torch.aten.sum.dim_IntList %117, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%124 = torch.aten.view %123, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%125 = torch.aten.view %119, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%int0_117 = torch.constant.int 0 | |
%int1_118 = torch.constant.int 1 | |
%126 = torch.aten.transpose.int %122, %int0_117, %int1_118 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%127 = torch.prim.ListConstruct %int4, %int512, %int12, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%128 = torch.aten.view %125, %127 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,12,64],f32> | |
%129 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%130 = torch.aten.permute %128, %129 : !torch.vtensor<[4,512,12,64],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,64],f32> | |
%131 = torch.aten.clone %130, %int0 : !torch.vtensor<[4,12,512,64],f32>, !torch.int -> !torch.vtensor<[4,12,512,64],f32> | |
%132 = torch.prim.ListConstruct %int48, %int512, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%133 = torch.aten.view %131, %132 : !torch.vtensor<[4,12,512,64],f32>, !torch.list<int> -> !torch.vtensor<[48,512,64],f32> | |
%134 = torch.aten.transpose.int %arg320, %int1, %int2 : !torch.vtensor<[48,512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[48,512,512],f32> | |
%135 = torch.aten.bmm %134, %133 : !torch.vtensor<[48,512,512],f32>, !torch.vtensor<[48,512,64],f32> -> !torch.vtensor<[48,512,64],f32> | |
%136 = torch.aten.transpose.int %arg321, %int1, %int2 : !torch.vtensor<[48,512,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[48,64,512],f32> | |
%137 = torch.aten.bmm %133, %136 : !torch.vtensor<[48,512,64],f32>, !torch.vtensor<[48,64,512],f32> -> !torch.vtensor<[48,512,512],f32> | |
%138 = torch.prim.ListConstruct %int4, %int12, %int512, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%139 = torch.aten.view %135, %138 : !torch.vtensor<[48,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,64],f32> | |
%140 = torch.prim.ListConstruct %int4, %int12, %int512, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%141 = torch.aten.view %137, %140 : !torch.vtensor<[48,512,512],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,512],f32> | |
%142 = torch.aten.mul.Tensor %141, %arg319 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[4,12,512,512],f32> -> !torch.vtensor<[4,12,512,512],f32> | |
%143 = torch.aten.mul.Tensor %142, %arg318 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[4,12,512,512],f32> -> !torch.vtensor<[4,12,512,512],f32> | |
%144 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> | |
%true_119 = torch.constant.bool true | |
%none_120 = torch.constant.none | |
%145 = torch.aten.sum.dim_IntList %143, %144, %true_119, %none_120 : !torch.vtensor<[4,12,512,512],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,12,512,1],f32> | |
%int0_121 = torch.constant.int 0 | |
%146 = torch.aten.size.int %143, %int0_121 : !torch.vtensor<[4,12,512,512],f32>, !torch.int -> !torch.int | |
%int1_122 = torch.constant.int 1 | |
%147 = torch.aten.size.int %143, %int1_122 : !torch.vtensor<[4,12,512,512],f32>, !torch.int -> !torch.int | |
%int2_123 = torch.constant.int 2 | |
%148 = torch.aten.size.int %143, %int2_123 : !torch.vtensor<[4,12,512,512],f32>, !torch.int -> !torch.int | |
%int3_124 = torch.constant.int 3 | |
%149 = torch.aten.size.int %143, %int3_124 : !torch.vtensor<[4,12,512,512],f32>, !torch.int -> !torch.int | |
%150 = torch.prim.ListConstruct %146, %147, %148, %149 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%151 = torch.aten.broadcast_to %145, %150 : !torch.vtensor<[4,12,512,1],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,512],f32> | |
%152 = torch.aten.mul.Tensor %arg318, %151 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[4,12,512,512],f32> -> !torch.vtensor<[4,12,512,512],f32> | |
%float1.000000e00_125 = torch.constant.float 1.000000e+00 | |
%153 = torch.aten.sub.Tensor %143, %152, %float1.000000e00_125 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[4,12,512,512],f32>, !torch.float -> !torch.vtensor<[4,12,512,512],f32> | |
%154 = torch.aten.div.Tensor %153, %1 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[4,12,512,512],f32> | |
%155 = torch.prim.ListConstruct %int48, %int512, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%156 = torch.aten.view %154, %155 : !torch.vtensor<[4,12,512,512],f32>, !torch.list<int> -> !torch.vtensor<[48,512,512],f32> | |
%157 = torch.aten.transpose.int %arg316, %int1, %int2 : !torch.vtensor<[48,512,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[48,64,512],f32> | |
%158 = torch.aten.bmm %157, %156 : !torch.vtensor<[48,64,512],f32>, !torch.vtensor<[48,512,512],f32> -> !torch.vtensor<[48,64,512],f32> | |
%159 = torch.aten.transpose.int %arg317, %int1, %int2 : !torch.vtensor<[48,64,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[48,512,64],f32> | |
%160 = torch.aten.bmm %156, %159 : !torch.vtensor<[48,512,512],f32>, !torch.vtensor<[48,512,64],f32> -> !torch.vtensor<[48,512,64],f32> | |
%161 = torch.prim.ListConstruct %int4, %int12, %int64, %int512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%162 = torch.aten.view %158, %161 : !torch.vtensor<[48,64,512],f32>, !torch.list<int> -> !torch.vtensor<[4,12,64,512],f32> | |
%163 = torch.aten.view %160, %138 : !torch.vtensor<[48,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,64],f32> | |
%164 = torch.aten.transpose.int %162, %int-1, %int-2 : !torch.vtensor<[4,12,64,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[4,12,512,64],f32> | |
%165 = torch.aten.permute %163, %129 : !torch.vtensor<[4,12,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,12,64],f32> | |
%166 = torch.aten.clone %165, %int0 : !torch.vtensor<[4,512,12,64],f32>, !torch.int -> !torch.vtensor<[4,512,12,64],f32> | |
%167 = torch.aten.view %166, %28 : !torch.vtensor<[4,512,12,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%168 = torch.aten.permute %139, %129 : !torch.vtensor<[4,12,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,12,64],f32> | |
%169 = torch.aten.clone %168, %int0 : !torch.vtensor<[4,512,12,64],f32>, !torch.int -> !torch.vtensor<[4,512,12,64],f32> | |
%170 = torch.aten.view %169, %28 : !torch.vtensor<[4,512,12,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%171 = torch.aten.view %170, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_126 = torch.constant.int 0 | |
%int1_127 = torch.constant.int 1 | |
%172 = torch.aten.transpose.int %arg314, %int0_126, %int1_127 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%173 = torch.aten.mm %171, %172 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_128 = torch.constant.int 0 | |
%int1_129 = torch.constant.int 1 | |
%174 = torch.aten.transpose.int %171, %int0_128, %int1_129 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%175 = torch.aten.mm %174, %arg315 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[768,768],f32> | |
%int0_130 = torch.constant.int 0 | |
%int1_131 = torch.constant.int 1 | |
%176 = torch.aten.transpose.int %175, %int0_130, %int1_131 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%177 = torch.aten.sum.dim_IntList %171, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%178 = torch.aten.view %177, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%179 = torch.aten.view %173, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%180 = torch.aten.add.Tensor %112, %179, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%int0_132 = torch.constant.int 0 | |
%int1_133 = torch.constant.int 1 | |
%181 = torch.aten.transpose.int %176, %int0_132, %int1_133 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%182 = torch.aten.permute %164, %129 : !torch.vtensor<[4,12,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,12,64],f32> | |
%183 = torch.aten.view %182, %28 : !torch.vtensor<[4,512,12,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%184 = torch.aten.clone %183, %int0 : !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%185 = torch.aten.view %184, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_134 = torch.constant.int 0 | |
%int1_135 = torch.constant.int 1 | |
%186 = torch.aten.transpose.int %arg312, %int0_134, %int1_135 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%187 = torch.aten.mm %185, %186 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_136 = torch.constant.int 0 | |
%int1_137 = torch.constant.int 1 | |
%188 = torch.aten.transpose.int %185, %int0_136, %int1_137 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%189 = torch.aten.mm %188, %arg313 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[768,768],f32> | |
%int0_138 = torch.constant.int 0 | |
%int1_139 = torch.constant.int 1 | |
%190 = torch.aten.transpose.int %189, %int0_138, %int1_139 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%191 = torch.aten.sum.dim_IntList %185, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%192 = torch.aten.view %191, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%193 = torch.aten.view %187, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%194 = torch.aten.add.Tensor %180, %193, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%int0_140 = torch.constant.int 0 | |
%int1_141 = torch.constant.int 1 | |
%195 = torch.aten.transpose.int %190, %int0_140, %int1_141 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%196 = torch.aten.view %167, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_142 = torch.constant.int 0 | |
%int1_143 = torch.constant.int 1 | |
%197 = torch.aten.transpose.int %arg310, %int0_142, %int1_143 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%198 = torch.aten.mm %196, %197 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_144 = torch.constant.int 0 | |
%int1_145 = torch.constant.int 1 | |
%199 = torch.aten.transpose.int %196, %int0_144, %int1_145 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%200 = torch.aten.mm %199, %arg311 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[768,768],f32> | |
%int0_146 = torch.constant.int 0 | |
%int1_147 = torch.constant.int 1 | |
%201 = torch.aten.transpose.int %200, %int0_146, %int1_147 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%202 = torch.aten.sum.dim_IntList %196, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%203 = torch.aten.view %202, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%204 = torch.aten.view %198, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%205 = torch.aten.add.Tensor %194, %204, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%int0_148 = torch.constant.int 0 | |
%int1_149 = torch.constant.int 1 | |
%206 = torch.aten.transpose.int %201, %int0_148, %int1_149 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%207 = torch.aten.sub.Tensor %arg309, %result1_64, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%208 = torch.aten.mul.Tensor %207, %result2_65 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32> -> !torch.vtensor<[4,512,768],f32> | |
%209 = torch.aten.mul.Tensor %205, %arg13 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%210 = torch.aten.mul.Tensor %209, %0 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[4,512,768],f32> | |
%211 = torch.aten.sum.dim_IntList %209, %35, %true, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,512,1],f32> | |
%212 = torch.aten.mul.Tensor %209, %208 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%213 = torch.aten.sum.dim_IntList %212, %35, %true, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,512,1],f32> | |
%214 = torch.aten.mul.Tensor %208, %213 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32> -> !torch.vtensor<[4,512,768],f32> | |
%215 = torch.aten.sub.Tensor %210, %211, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%216 = torch.aten.sub.Tensor %215, %214, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%217 = torch.aten.div.Tensor %result2_65, %0 : !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[4,512,1],f32> | |
%218 = torch.aten.mul.Tensor %217, %216 : !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%219 = torch.aten.mul.Tensor %205, %208 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%220 = torch.aten.sum.dim_IntList %219, %45, %false, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[768],f32> | |
%221 = torch.aten.sum.dim_IntList %205, %45, %false, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[768],f32> | |
%222 = torch.aten.mul.Tensor %218, %arg308 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%223 = torch.aten.view %222, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_150 = torch.constant.int 0 | |
%int1_151 = torch.constant.int 1 | |
%224 = torch.aten.transpose.int %arg306, %int0_150, %int1_151 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32> | |
%225 = torch.aten.mm %223, %224 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,3072],f32> -> !torch.vtensor<[2048,3072],f32> | |
%int0_152 = torch.constant.int 0 | |
%int1_153 = torch.constant.int 1 | |
%226 = torch.aten.transpose.int %223, %int0_152, %int1_153 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%227 = torch.aten.mm %226, %arg307 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,3072],f32> -> !torch.vtensor<[768,3072],f32> | |
%int0_154 = torch.constant.int 0 | |
%int1_155 = torch.constant.int 1 | |
%228 = torch.aten.transpose.int %227, %int0_154, %int1_155 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32> | |
%229 = torch.aten.sum.dim_IntList %223, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%230 = torch.aten.view %229, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%231 = torch.aten.view %225, %84 : !torch.vtensor<[2048,3072],f32>, !torch.list<int> -> !torch.vtensor<[4,512,3072],f32> | |
%int0_156 = torch.constant.int 0 | |
%int1_157 = torch.constant.int 1 | |
%232 = torch.aten.transpose.int %228, %int0_156, %int1_157 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32> | |
%233 = torch.aten.gelu_backward %231, %arg305, %str : !torch.vtensor<[4,512,3072],f32>, !torch.vtensor<[4,512,3072],f32>, !torch.str -> !torch.vtensor<[4,512,3072],f32> | |
%234 = torch.aten.view %233, %88 : !torch.vtensor<[4,512,3072],f32>, !torch.list<int> -> !torch.vtensor<[2048,3072],f32> | |
%int0_158 = torch.constant.int 0 | |
%int1_159 = torch.constant.int 1 | |
%235 = torch.aten.transpose.int %arg303, %int0_158, %int1_159 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32> | |
%236 = torch.aten.mm %234, %235 : !torch.vtensor<[2048,3072],f32>, !torch.vtensor<[3072,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_160 = torch.constant.int 0 | |
%int1_161 = torch.constant.int 1 | |
%237 = torch.aten.transpose.int %234, %int0_160, %int1_161 : !torch.vtensor<[2048,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,2048],f32> | |
%238 = torch.aten.mm %237, %arg304 : !torch.vtensor<[3072,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[3072,768],f32> | |
%int0_162 = torch.constant.int 0 | |
%int1_163 = torch.constant.int 1 | |
%239 = torch.aten.transpose.int %238, %int0_162, %int1_163 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32> | |
%240 = torch.aten.sum.dim_IntList %234, %24, %true, %none : !torch.vtensor<[2048,3072],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,3072],f32> | |
%241 = torch.aten.view %240, %96 : !torch.vtensor<[1,3072],f32>, !torch.list<int> -> !torch.vtensor<[3072],f32> | |
%242 = torch.aten.view %236, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%243 = torch.aten.add.Tensor %218, %242, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%int0_164 = torch.constant.int 0 | |
%int1_165 = torch.constant.int 1 | |
%244 = torch.aten.transpose.int %239, %int0_164, %int1_165 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32> | |
%245 = torch.aten.sub.Tensor %arg302, %result1_61, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%246 = torch.aten.mul.Tensor %245, %result2_62 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32> -> !torch.vtensor<[4,512,768],f32> | |
%247 = torch.aten.mul.Tensor %243, %arg11 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%248 = torch.aten.mul.Tensor %247, %0 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[4,512,768],f32> | |
%249 = torch.aten.sum.dim_IntList %247, %35, %true, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,512,1],f32> | |
%250 = torch.aten.mul.Tensor %247, %246 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%251 = torch.aten.sum.dim_IntList %250, %35, %true, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,512,1],f32> | |
%252 = torch.aten.mul.Tensor %246, %251 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32> -> !torch.vtensor<[4,512,768],f32> | |
%253 = torch.aten.sub.Tensor %248, %249, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%254 = torch.aten.sub.Tensor %253, %252, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%255 = torch.aten.div.Tensor %result2_62, %0 : !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[4,512,1],f32> | |
%256 = torch.aten.mul.Tensor %255, %254 : !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%257 = torch.aten.mul.Tensor %243, %246 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%258 = torch.aten.sum.dim_IntList %257, %45, %false, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[768],f32> | |
%259 = torch.aten.sum.dim_IntList %243, %45, %false, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[768],f32> | |
%260 = torch.aten.mul.Tensor %256, %arg301 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%261 = torch.aten.view %260, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_166 = torch.constant.int 0 | |
%int1_167 = torch.constant.int 1 | |
%262 = torch.aten.transpose.int %arg299, %int0_166, %int1_167 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%263 = torch.aten.mm %261, %262 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_168 = torch.constant.int 0 | |
%int1_169 = torch.constant.int 1 | |
%264 = torch.aten.transpose.int %261, %int0_168, %int1_169 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%265 = torch.aten.mm %264, %arg300 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[768,768],f32> | |
%int0_170 = torch.constant.int 0 | |
%int1_171 = torch.constant.int 1 | |
%266 = torch.aten.transpose.int %265, %int0_170, %int1_171 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%267 = torch.aten.sum.dim_IntList %261, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%268 = torch.aten.view %267, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%269 = torch.aten.view %263, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%int0_172 = torch.constant.int 0 | |
%int1_173 = torch.constant.int 1 | |
%270 = torch.aten.transpose.int %266, %int0_172, %int1_173 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%271 = torch.aten.view %269, %127 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,12,64],f32> | |
%272 = torch.aten.permute %271, %129 : !torch.vtensor<[4,512,12,64],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,64],f32> | |
%273 = torch.aten.clone %272, %int0 : !torch.vtensor<[4,12,512,64],f32>, !torch.int -> !torch.vtensor<[4,12,512,64],f32> | |
%274 = torch.aten.view %273, %132 : !torch.vtensor<[4,12,512,64],f32>, !torch.list<int> -> !torch.vtensor<[48,512,64],f32> | |
%275 = torch.aten.transpose.int %arg297, %int1, %int2 : !torch.vtensor<[48,512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[48,512,512],f32> | |
%276 = torch.aten.bmm %275, %274 : !torch.vtensor<[48,512,512],f32>, !torch.vtensor<[48,512,64],f32> -> !torch.vtensor<[48,512,64],f32> | |
%277 = torch.aten.transpose.int %arg298, %int1, %int2 : !torch.vtensor<[48,512,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[48,64,512],f32> | |
%278 = torch.aten.bmm %274, %277 : !torch.vtensor<[48,512,64],f32>, !torch.vtensor<[48,64,512],f32> -> !torch.vtensor<[48,512,512],f32> | |
%279 = torch.aten.view %276, %138 : !torch.vtensor<[48,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,64],f32> | |
%280 = torch.aten.view %278, %140 : !torch.vtensor<[48,512,512],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,512],f32> | |
%281 = torch.aten.mul.Tensor %280, %arg296 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[4,12,512,512],f32> -> !torch.vtensor<[4,12,512,512],f32> | |
%282 = torch.aten.mul.Tensor %281, %arg295 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[4,12,512,512],f32> -> !torch.vtensor<[4,12,512,512],f32> | |
%283 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> | |
%true_174 = torch.constant.bool true | |
%none_175 = torch.constant.none | |
%284 = torch.aten.sum.dim_IntList %282, %283, %true_174, %none_175 : !torch.vtensor<[4,12,512,512],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,12,512,1],f32> | |
%int0_176 = torch.constant.int 0 | |
%285 = torch.aten.size.int %282, %int0_176 : !torch.vtensor<[4,12,512,512],f32>, !torch.int -> !torch.int | |
%int1_177 = torch.constant.int 1 | |
%286 = torch.aten.size.int %282, %int1_177 : !torch.vtensor<[4,12,512,512],f32>, !torch.int -> !torch.int | |
%int2_178 = torch.constant.int 2 | |
%287 = torch.aten.size.int %282, %int2_178 : !torch.vtensor<[4,12,512,512],f32>, !torch.int -> !torch.int | |
%int3_179 = torch.constant.int 3 | |
%288 = torch.aten.size.int %282, %int3_179 : !torch.vtensor<[4,12,512,512],f32>, !torch.int -> !torch.int | |
%289 = torch.prim.ListConstruct %285, %286, %287, %288 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%290 = torch.aten.broadcast_to %284, %289 : !torch.vtensor<[4,12,512,1],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,512],f32> | |
%291 = torch.aten.mul.Tensor %arg295, %290 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[4,12,512,512],f32> -> !torch.vtensor<[4,12,512,512],f32> | |
%float1.000000e00_180 = torch.constant.float 1.000000e+00 | |
%292 = torch.aten.sub.Tensor %282, %291, %float1.000000e00_180 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[4,12,512,512],f32>, !torch.float -> !torch.vtensor<[4,12,512,512],f32> | |
%293 = torch.aten.div.Tensor %292, %1 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[4,12,512,512],f32> | |
%294 = torch.aten.view %293, %155 : !torch.vtensor<[4,12,512,512],f32>, !torch.list<int> -> !torch.vtensor<[48,512,512],f32> | |
%295 = torch.aten.transpose.int %arg293, %int1, %int2 : !torch.vtensor<[48,512,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[48,64,512],f32> | |
%296 = torch.aten.bmm %295, %294 : !torch.vtensor<[48,64,512],f32>, !torch.vtensor<[48,512,512],f32> -> !torch.vtensor<[48,64,512],f32> | |
%297 = torch.aten.transpose.int %arg294, %int1, %int2 : !torch.vtensor<[48,64,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[48,512,64],f32> | |
%298 = torch.aten.bmm %294, %297 : !torch.vtensor<[48,512,512],f32>, !torch.vtensor<[48,512,64],f32> -> !torch.vtensor<[48,512,64],f32> | |
%299 = torch.aten.view %296, %161 : !torch.vtensor<[48,64,512],f32>, !torch.list<int> -> !torch.vtensor<[4,12,64,512],f32> | |
%300 = torch.aten.view %298, %138 : !torch.vtensor<[48,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,64],f32> | |
%301 = torch.aten.transpose.int %299, %int-1, %int-2 : !torch.vtensor<[4,12,64,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[4,12,512,64],f32> | |
%302 = torch.aten.permute %300, %129 : !torch.vtensor<[4,12,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,12,64],f32> | |
%303 = torch.aten.clone %302, %int0 : !torch.vtensor<[4,512,12,64],f32>, !torch.int -> !torch.vtensor<[4,512,12,64],f32> | |
%304 = torch.aten.view %303, %28 : !torch.vtensor<[4,512,12,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%305 = torch.aten.permute %279, %129 : !torch.vtensor<[4,12,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,12,64],f32> | |
%306 = torch.aten.clone %305, %int0 : !torch.vtensor<[4,512,12,64],f32>, !torch.int -> !torch.vtensor<[4,512,12,64],f32> | |
%307 = torch.aten.view %306, %28 : !torch.vtensor<[4,512,12,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%308 = torch.aten.view %307, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_181 = torch.constant.int 0 | |
%int1_182 = torch.constant.int 1 | |
%309 = torch.aten.transpose.int %arg291, %int0_181, %int1_182 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%310 = torch.aten.mm %308, %309 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_183 = torch.constant.int 0 | |
%int1_184 = torch.constant.int 1 | |
%311 = torch.aten.transpose.int %308, %int0_183, %int1_184 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%312 = torch.aten.mm %311, %arg292 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[768,768],f32> | |
%int0_185 = torch.constant.int 0 | |
%int1_186 = torch.constant.int 1 | |
%313 = torch.aten.transpose.int %312, %int0_185, %int1_186 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%314 = torch.aten.sum.dim_IntList %308, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%315 = torch.aten.view %314, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%316 = torch.aten.view %310, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%317 = torch.aten.add.Tensor %256, %316, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%int0_187 = torch.constant.int 0 | |
%int1_188 = torch.constant.int 1 | |
%318 = torch.aten.transpose.int %313, %int0_187, %int1_188 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%319 = torch.aten.permute %301, %129 : !torch.vtensor<[4,12,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,12,64],f32> | |
%320 = torch.aten.view %319, %28 : !torch.vtensor<[4,512,12,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%321 = torch.aten.clone %320, %int0 : !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%322 = torch.aten.view %321, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_189 = torch.constant.int 0 | |
%int1_190 = torch.constant.int 1 | |
%323 = torch.aten.transpose.int %arg289, %int0_189, %int1_190 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%324 = torch.aten.mm %322, %323 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_191 = torch.constant.int 0 | |
%int1_192 = torch.constant.int 1 | |
%325 = torch.aten.transpose.int %322, %int0_191, %int1_192 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%326 = torch.aten.mm %325, %arg290 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[768,768],f32> | |
%int0_193 = torch.constant.int 0 | |
%int1_194 = torch.constant.int 1 | |
%327 = torch.aten.transpose.int %326, %int0_193, %int1_194 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%328 = torch.aten.sum.dim_IntList %322, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%329 = torch.aten.view %328, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%330 = torch.aten.view %324, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%331 = torch.aten.add.Tensor %317, %330, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%int0_195 = torch.constant.int 0 | |
%int1_196 = torch.constant.int 1 | |
%332 = torch.aten.transpose.int %327, %int0_195, %int1_196 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%333 = torch.aten.view %304, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_197 = torch.constant.int 0 | |
%int1_198 = torch.constant.int 1 | |
%334 = torch.aten.transpose.int %arg287, %int0_197, %int1_198 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%335 = torch.aten.mm %333, %334 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_199 = torch.constant.int 0 | |
%int1_200 = torch.constant.int 1 | |
%336 = torch.aten.transpose.int %333, %int0_199, %int1_200 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%337 = torch.aten.mm %336, %arg288 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[768,768],f32> | |
%int0_201 = torch.constant.int 0 | |
%int1_202 = torch.constant.int 1 | |
%338 = torch.aten.transpose.int %337, %int0_201, %int1_202 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%339 = torch.aten.sum.dim_IntList %333, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%340 = torch.aten.view %339, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%341 = torch.aten.view %335, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%342 = torch.aten.add.Tensor %331, %341, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%int0_203 = torch.constant.int 0 | |
%int1_204 = torch.constant.int 1 | |
%343 = torch.aten.transpose.int %338, %int0_203, %int1_204 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%344 = torch.aten.sub.Tensor %arg286, %result1_58, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%345 = torch.aten.mul.Tensor %344, %result2_59 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32> -> !torch.vtensor<[4,512,768],f32> | |
%346 = torch.aten.mul.Tensor %342, %arg49 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%347 = torch.aten.mul.Tensor %346, %0 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[4,512,768],f32> | |
%348 = torch.aten.sum.dim_IntList %346, %35, %true, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,512,1],f32> | |
%349 = torch.aten.mul.Tensor %346, %345 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%350 = torch.aten.sum.dim_IntList %349, %35, %true, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,512,1],f32> | |
%351 = torch.aten.mul.Tensor %345, %350 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32> -> !torch.vtensor<[4,512,768],f32> | |
%352 = torch.aten.sub.Tensor %347, %348, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%353 = torch.aten.sub.Tensor %352, %351, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%354 = torch.aten.div.Tensor %result2_59, %0 : !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[4,512,1],f32> | |
%355 = torch.aten.mul.Tensor %354, %353 : !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%356 = torch.aten.mul.Tensor %342, %345 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%357 = torch.aten.sum.dim_IntList %356, %45, %false, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[768],f32> | |
%358 = torch.aten.sum.dim_IntList %342, %45, %false, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[768],f32> | |
%359 = torch.aten.mul.Tensor %355, %arg285 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%360 = torch.aten.view %359, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_205 = torch.constant.int 0 | |
%int1_206 = torch.constant.int 1 | |
%361 = torch.aten.transpose.int %arg283, %int0_205, %int1_206 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32> | |
%362 = torch.aten.mm %360, %361 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,3072],f32> -> !torch.vtensor<[2048,3072],f32> | |
%int0_207 = torch.constant.int 0 | |
%int1_208 = torch.constant.int 1 | |
%363 = torch.aten.transpose.int %360, %int0_207, %int1_208 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%364 = torch.aten.mm %363, %arg284 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,3072],f32> -> !torch.vtensor<[768,3072],f32> | |
%int0_209 = torch.constant.int 0 | |
%int1_210 = torch.constant.int 1 | |
%365 = torch.aten.transpose.int %364, %int0_209, %int1_210 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32> | |
%366 = torch.aten.sum.dim_IntList %360, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%367 = torch.aten.view %366, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%368 = torch.aten.view %362, %84 : !torch.vtensor<[2048,3072],f32>, !torch.list<int> -> !torch.vtensor<[4,512,3072],f32> | |
%int0_211 = torch.constant.int 0 | |
%int1_212 = torch.constant.int 1 | |
%369 = torch.aten.transpose.int %365, %int0_211, %int1_212 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32> | |
%370 = torch.aten.gelu_backward %368, %arg282, %str : !torch.vtensor<[4,512,3072],f32>, !torch.vtensor<[4,512,3072],f32>, !torch.str -> !torch.vtensor<[4,512,3072],f32> | |
%371 = torch.aten.view %370, %88 : !torch.vtensor<[4,512,3072],f32>, !torch.list<int> -> !torch.vtensor<[2048,3072],f32> | |
%int0_213 = torch.constant.int 0 | |
%int1_214 = torch.constant.int 1 | |
%372 = torch.aten.transpose.int %arg280, %int0_213, %int1_214 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32> | |
%373 = torch.aten.mm %371, %372 : !torch.vtensor<[2048,3072],f32>, !torch.vtensor<[3072,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_215 = torch.constant.int 0 | |
%int1_216 = torch.constant.int 1 | |
%374 = torch.aten.transpose.int %371, %int0_215, %int1_216 : !torch.vtensor<[2048,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,2048],f32> | |
%375 = torch.aten.mm %374, %arg281 : !torch.vtensor<[3072,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[3072,768],f32> | |
%int0_217 = torch.constant.int 0 | |
%int1_218 = torch.constant.int 1 | |
%376 = torch.aten.transpose.int %375, %int0_217, %int1_218 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32> | |
%377 = torch.aten.sum.dim_IntList %371, %24, %true, %none : !torch.vtensor<[2048,3072],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,3072],f32> | |
%378 = torch.aten.view %377, %96 : !torch.vtensor<[1,3072],f32>, !torch.list<int> -> !torch.vtensor<[3072],f32> | |
%379 = torch.aten.view %373, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%380 = torch.aten.add.Tensor %355, %379, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%int0_219 = torch.constant.int 0 | |
%int1_220 = torch.constant.int 1 | |
%381 = torch.aten.transpose.int %376, %int0_219, %int1_220 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32> | |
%382 = torch.aten.sub.Tensor %arg279, %result1_55, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%383 = torch.aten.mul.Tensor %382, %result2_56 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32> -> !torch.vtensor<[4,512,768],f32> | |
%384 = torch.aten.mul.Tensor %380, %arg47 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%385 = torch.aten.mul.Tensor %384, %0 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[4,512,768],f32> | |
%386 = torch.aten.sum.dim_IntList %384, %35, %true, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,512,1],f32> | |
%387 = torch.aten.mul.Tensor %384, %383 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%388 = torch.aten.sum.dim_IntList %387, %35, %true, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,512,1],f32> | |
%389 = torch.aten.mul.Tensor %383, %388 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32> -> !torch.vtensor<[4,512,768],f32> | |
%390 = torch.aten.sub.Tensor %385, %386, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%391 = torch.aten.sub.Tensor %390, %389, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%392 = torch.aten.div.Tensor %result2_56, %0 : !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[4,512,1],f32> | |
%393 = torch.aten.mul.Tensor %392, %391 : !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%394 = torch.aten.mul.Tensor %380, %383 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%395 = torch.aten.sum.dim_IntList %394, %45, %false, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[768],f32> | |
%396 = torch.aten.sum.dim_IntList %380, %45, %false, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[768],f32> | |
%397 = torch.aten.mul.Tensor %393, %arg278 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%398 = torch.aten.view %397, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_221 = torch.constant.int 0 | |
%int1_222 = torch.constant.int 1 | |
%399 = torch.aten.transpose.int %arg276, %int0_221, %int1_222 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%400 = torch.aten.mm %398, %399 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_223 = torch.constant.int 0 | |
%int1_224 = torch.constant.int 1 | |
%401 = torch.aten.transpose.int %398, %int0_223, %int1_224 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%402 = torch.aten.mm %401, %arg277 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[768,768],f32> | |
%int0_225 = torch.constant.int 0 | |
%int1_226 = torch.constant.int 1 | |
%403 = torch.aten.transpose.int %402, %int0_225, %int1_226 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%404 = torch.aten.sum.dim_IntList %398, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%405 = torch.aten.view %404, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%406 = torch.aten.view %400, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%int0_227 = torch.constant.int 0 | |
%int1_228 = torch.constant.int 1 | |
%407 = torch.aten.transpose.int %403, %int0_227, %int1_228 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%408 = torch.aten.view %406, %127 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,12,64],f32> | |
%409 = torch.aten.permute %408, %129 : !torch.vtensor<[4,512,12,64],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,64],f32> | |
%410 = torch.aten.clone %409, %int0 : !torch.vtensor<[4,12,512,64],f32>, !torch.int -> !torch.vtensor<[4,12,512,64],f32> | |
%411 = torch.aten.view %410, %132 : !torch.vtensor<[4,12,512,64],f32>, !torch.list<int> -> !torch.vtensor<[48,512,64],f32> | |
%412 = torch.aten.transpose.int %arg274, %int1, %int2 : !torch.vtensor<[48,512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[48,512,512],f32> | |
%413 = torch.aten.bmm %412, %411 : !torch.vtensor<[48,512,512],f32>, !torch.vtensor<[48,512,64],f32> -> !torch.vtensor<[48,512,64],f32> | |
%414 = torch.aten.transpose.int %arg275, %int1, %int2 : !torch.vtensor<[48,512,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[48,64,512],f32> | |
%415 = torch.aten.bmm %411, %414 : !torch.vtensor<[48,512,64],f32>, !torch.vtensor<[48,64,512],f32> -> !torch.vtensor<[48,512,512],f32> | |
%416 = torch.aten.view %413, %138 : !torch.vtensor<[48,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,64],f32> | |
%417 = torch.aten.view %415, %140 : !torch.vtensor<[48,512,512],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,512],f32> | |
%418 = torch.aten.mul.Tensor %417, %arg273 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[4,12,512,512],f32> -> !torch.vtensor<[4,12,512,512],f32> | |
%419 = torch.aten.mul.Tensor %418, %arg272 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[4,12,512,512],f32> -> !torch.vtensor<[4,12,512,512],f32> | |
%420 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> | |
%true_229 = torch.constant.bool true | |
%none_230 = torch.constant.none | |
%421 = torch.aten.sum.dim_IntList %419, %420, %true_229, %none_230 : !torch.vtensor<[4,12,512,512],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,12,512,1],f32> | |
%int0_231 = torch.constant.int 0 | |
%422 = torch.aten.size.int %419, %int0_231 : !torch.vtensor<[4,12,512,512],f32>, !torch.int -> !torch.int | |
%int1_232 = torch.constant.int 1 | |
%423 = torch.aten.size.int %419, %int1_232 : !torch.vtensor<[4,12,512,512],f32>, !torch.int -> !torch.int | |
%int2_233 = torch.constant.int 2 | |
%424 = torch.aten.size.int %419, %int2_233 : !torch.vtensor<[4,12,512,512],f32>, !torch.int -> !torch.int | |
%int3_234 = torch.constant.int 3 | |
%425 = torch.aten.size.int %419, %int3_234 : !torch.vtensor<[4,12,512,512],f32>, !torch.int -> !torch.int | |
%426 = torch.prim.ListConstruct %422, %423, %424, %425 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%427 = torch.aten.broadcast_to %421, %426 : !torch.vtensor<[4,12,512,1],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,512],f32> | |
%428 = torch.aten.mul.Tensor %arg272, %427 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[4,12,512,512],f32> -> !torch.vtensor<[4,12,512,512],f32> | |
%float1.000000e00_235 = torch.constant.float 1.000000e+00 | |
%429 = torch.aten.sub.Tensor %419, %428, %float1.000000e00_235 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[4,12,512,512],f32>, !torch.float -> !torch.vtensor<[4,12,512,512],f32> | |
%430 = torch.aten.div.Tensor %429, %1 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[4,12,512,512],f32> | |
%431 = torch.aten.view %430, %155 : !torch.vtensor<[4,12,512,512],f32>, !torch.list<int> -> !torch.vtensor<[48,512,512],f32> | |
%432 = torch.aten.transpose.int %arg270, %int1, %int2 : !torch.vtensor<[48,512,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[48,64,512],f32> | |
%433 = torch.aten.bmm %432, %431 : !torch.vtensor<[48,64,512],f32>, !torch.vtensor<[48,512,512],f32> -> !torch.vtensor<[48,64,512],f32> | |
%434 = torch.aten.transpose.int %arg271, %int1, %int2 : !torch.vtensor<[48,64,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[48,512,64],f32> | |
%435 = torch.aten.bmm %431, %434 : !torch.vtensor<[48,512,512],f32>, !torch.vtensor<[48,512,64],f32> -> !torch.vtensor<[48,512,64],f32> | |
%436 = torch.aten.view %433, %161 : !torch.vtensor<[48,64,512],f32>, !torch.list<int> -> !torch.vtensor<[4,12,64,512],f32> | |
%437 = torch.aten.view %435, %138 : !torch.vtensor<[48,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,64],f32> | |
%438 = torch.aten.transpose.int %436, %int-1, %int-2 : !torch.vtensor<[4,12,64,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[4,12,512,64],f32> | |
%439 = torch.aten.permute %437, %129 : !torch.vtensor<[4,12,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,12,64],f32> | |
%440 = torch.aten.clone %439, %int0 : !torch.vtensor<[4,512,12,64],f32>, !torch.int -> !torch.vtensor<[4,512,12,64],f32> | |
%441 = torch.aten.view %440, %28 : !torch.vtensor<[4,512,12,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%442 = torch.aten.permute %416, %129 : !torch.vtensor<[4,12,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,12,64],f32> | |
%443 = torch.aten.clone %442, %int0 : !torch.vtensor<[4,512,12,64],f32>, !torch.int -> !torch.vtensor<[4,512,12,64],f32> | |
%444 = torch.aten.view %443, %28 : !torch.vtensor<[4,512,12,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%445 = torch.aten.view %444, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_236 = torch.constant.int 0 | |
%int1_237 = torch.constant.int 1 | |
%446 = torch.aten.transpose.int %arg268, %int0_236, %int1_237 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%447 = torch.aten.mm %445, %446 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_238 = torch.constant.int 0 | |
%int1_239 = torch.constant.int 1 | |
%448 = torch.aten.transpose.int %445, %int0_238, %int1_239 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%449 = torch.aten.mm %448, %arg269 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[768,768],f32> | |
%int0_240 = torch.constant.int 0 | |
%int1_241 = torch.constant.int 1 | |
%450 = torch.aten.transpose.int %449, %int0_240, %int1_241 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%451 = torch.aten.sum.dim_IntList %445, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%452 = torch.aten.view %451, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%453 = torch.aten.view %447, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%454 = torch.aten.add.Tensor %393, %453, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%int0_242 = torch.constant.int 0 | |
%int1_243 = torch.constant.int 1 | |
%455 = torch.aten.transpose.int %450, %int0_242, %int1_243 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%456 = torch.aten.permute %438, %129 : !torch.vtensor<[4,12,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,12,64],f32> | |
%457 = torch.aten.view %456, %28 : !torch.vtensor<[4,512,12,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%458 = torch.aten.clone %457, %int0 : !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%459 = torch.aten.view %458, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_244 = torch.constant.int 0 | |
%int1_245 = torch.constant.int 1 | |
%460 = torch.aten.transpose.int %arg266, %int0_244, %int1_245 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%461 = torch.aten.mm %459, %460 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_246 = torch.constant.int 0 | |
%int1_247 = torch.constant.int 1 | |
%462 = torch.aten.transpose.int %459, %int0_246, %int1_247 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%463 = torch.aten.mm %462, %arg267 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[768,768],f32> | |
%int0_248 = torch.constant.int 0 | |
%int1_249 = torch.constant.int 1 | |
%464 = torch.aten.transpose.int %463, %int0_248, %int1_249 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%465 = torch.aten.sum.dim_IntList %459, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%466 = torch.aten.view %465, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%467 = torch.aten.view %461, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%468 = torch.aten.add.Tensor %454, %467, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%int0_250 = torch.constant.int 0 | |
%int1_251 = torch.constant.int 1 | |
%469 = torch.aten.transpose.int %464, %int0_250, %int1_251 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%470 = torch.aten.view %441, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_252 = torch.constant.int 0 | |
%int1_253 = torch.constant.int 1 | |
%471 = torch.aten.transpose.int %arg264, %int0_252, %int1_253 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%472 = torch.aten.mm %470, %471 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_254 = torch.constant.int 0 | |
%int1_255 = torch.constant.int 1 | |
%473 = torch.aten.transpose.int %470, %int0_254, %int1_255 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%474 = torch.aten.mm %473, %arg265 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[768,768],f32> | |
%int0_256 = torch.constant.int 0 | |
%int1_257 = torch.constant.int 1 | |
%475 = torch.aten.transpose.int %474, %int0_256, %int1_257 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%476 = torch.aten.sum.dim_IntList %470, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%477 = torch.aten.view %476, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%478 = torch.aten.view %472, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%479 = torch.aten.add.Tensor %468, %478, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%int0_258 = torch.constant.int 0 | |
%int1_259 = torch.constant.int 1 | |
%480 = torch.aten.transpose.int %475, %int0_258, %int1_259 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%481 = torch.aten.sub.Tensor %arg263, %result1_52, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%482 = torch.aten.mul.Tensor %481, %result2_53 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32> -> !torch.vtensor<[4,512,768],f32> | |
%483 = torch.aten.mul.Tensor %479, %arg45 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%484 = torch.aten.mul.Tensor %483, %0 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[4,512,768],f32> | |
%485 = torch.aten.sum.dim_IntList %483, %35, %true, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,512,1],f32> | |
%486 = torch.aten.mul.Tensor %483, %482 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%487 = torch.aten.sum.dim_IntList %486, %35, %true, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,512,1],f32> | |
%488 = torch.aten.mul.Tensor %482, %487 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32> -> !torch.vtensor<[4,512,768],f32> | |
%489 = torch.aten.sub.Tensor %484, %485, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%490 = torch.aten.sub.Tensor %489, %488, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%491 = torch.aten.div.Tensor %result2_53, %0 : !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[4,512,1],f32> | |
%492 = torch.aten.mul.Tensor %491, %490 : !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%493 = torch.aten.mul.Tensor %479, %482 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%494 = torch.aten.sum.dim_IntList %493, %45, %false, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[768],f32> | |
%495 = torch.aten.sum.dim_IntList %479, %45, %false, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[768],f32> | |
%496 = torch.aten.mul.Tensor %492, %arg262 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%497 = torch.aten.view %496, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_260 = torch.constant.int 0 | |
%int1_261 = torch.constant.int 1 | |
%498 = torch.aten.transpose.int %arg260, %int0_260, %int1_261 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32> | |
%499 = torch.aten.mm %497, %498 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,3072],f32> -> !torch.vtensor<[2048,3072],f32> | |
%int0_262 = torch.constant.int 0 | |
%int1_263 = torch.constant.int 1 | |
%500 = torch.aten.transpose.int %497, %int0_262, %int1_263 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%501 = torch.aten.mm %500, %arg261 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,3072],f32> -> !torch.vtensor<[768,3072],f32> | |
%int0_264 = torch.constant.int 0 | |
%int1_265 = torch.constant.int 1 | |
%502 = torch.aten.transpose.int %501, %int0_264, %int1_265 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32> | |
%503 = torch.aten.sum.dim_IntList %497, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%504 = torch.aten.view %503, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%505 = torch.aten.view %499, %84 : !torch.vtensor<[2048,3072],f32>, !torch.list<int> -> !torch.vtensor<[4,512,3072],f32> | |
%int0_266 = torch.constant.int 0 | |
%int1_267 = torch.constant.int 1 | |
%506 = torch.aten.transpose.int %502, %int0_266, %int1_267 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32> | |
%507 = torch.aten.gelu_backward %505, %arg259, %str : !torch.vtensor<[4,512,3072],f32>, !torch.vtensor<[4,512,3072],f32>, !torch.str -> !torch.vtensor<[4,512,3072],f32> | |
%508 = torch.aten.view %507, %88 : !torch.vtensor<[4,512,3072],f32>, !torch.list<int> -> !torch.vtensor<[2048,3072],f32> | |
%int0_268 = torch.constant.int 0 | |
%int1_269 = torch.constant.int 1 | |
%509 = torch.aten.transpose.int %arg257, %int0_268, %int1_269 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32> | |
%510 = torch.aten.mm %508, %509 : !torch.vtensor<[2048,3072],f32>, !torch.vtensor<[3072,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_270 = torch.constant.int 0 | |
%int1_271 = torch.constant.int 1 | |
%511 = torch.aten.transpose.int %508, %int0_270, %int1_271 : !torch.vtensor<[2048,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,2048],f32> | |
%512 = torch.aten.mm %511, %arg258 : !torch.vtensor<[3072,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[3072,768],f32> | |
%int0_272 = torch.constant.int 0 | |
%int1_273 = torch.constant.int 1 | |
%513 = torch.aten.transpose.int %512, %int0_272, %int1_273 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32> | |
%514 = torch.aten.sum.dim_IntList %508, %24, %true, %none : !torch.vtensor<[2048,3072],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,3072],f32> | |
%515 = torch.aten.view %514, %96 : !torch.vtensor<[1,3072],f32>, !torch.list<int> -> !torch.vtensor<[3072],f32> | |
%516 = torch.aten.view %510, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%517 = torch.aten.add.Tensor %492, %516, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%int0_274 = torch.constant.int 0 | |
%int1_275 = torch.constant.int 1 | |
%518 = torch.aten.transpose.int %513, %int0_274, %int1_275 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32> | |
%519 = torch.aten.sub.Tensor %arg256, %result1_49, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%520 = torch.aten.mul.Tensor %519, %result2_50 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32> -> !torch.vtensor<[4,512,768],f32> | |
%521 = torch.aten.mul.Tensor %517, %arg43 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%522 = torch.aten.mul.Tensor %521, %0 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[4,512,768],f32> | |
%523 = torch.aten.sum.dim_IntList %521, %35, %true, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,512,1],f32> | |
%524 = torch.aten.mul.Tensor %521, %520 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%525 = torch.aten.sum.dim_IntList %524, %35, %true, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,512,1],f32> | |
%526 = torch.aten.mul.Tensor %520, %525 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32> -> !torch.vtensor<[4,512,768],f32> | |
%527 = torch.aten.sub.Tensor %522, %523, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%528 = torch.aten.sub.Tensor %527, %526, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%529 = torch.aten.div.Tensor %result2_50, %0 : !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[4,512,1],f32> | |
%530 = torch.aten.mul.Tensor %529, %528 : !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%531 = torch.aten.mul.Tensor %517, %520 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%532 = torch.aten.sum.dim_IntList %531, %45, %false, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[768],f32> | |
%533 = torch.aten.sum.dim_IntList %517, %45, %false, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[768],f32> | |
%534 = torch.aten.mul.Tensor %530, %arg255 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%535 = torch.aten.view %534, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_276 = torch.constant.int 0 | |
%int1_277 = torch.constant.int 1 | |
%536 = torch.aten.transpose.int %arg253, %int0_276, %int1_277 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%537 = torch.aten.mm %535, %536 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_278 = torch.constant.int 0 | |
%int1_279 = torch.constant.int 1 | |
%538 = torch.aten.transpose.int %535, %int0_278, %int1_279 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%539 = torch.aten.mm %538, %arg254 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[768,768],f32> | |
%int0_280 = torch.constant.int 0 | |
%int1_281 = torch.constant.int 1 | |
%540 = torch.aten.transpose.int %539, %int0_280, %int1_281 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%541 = torch.aten.sum.dim_IntList %535, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%542 = torch.aten.view %541, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%543 = torch.aten.view %537, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%int0_282 = torch.constant.int 0 | |
%int1_283 = torch.constant.int 1 | |
%544 = torch.aten.transpose.int %540, %int0_282, %int1_283 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%545 = torch.aten.view %543, %127 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,12,64],f32> | |
%546 = torch.aten.permute %545, %129 : !torch.vtensor<[4,512,12,64],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,64],f32> | |
%547 = torch.aten.clone %546, %int0 : !torch.vtensor<[4,12,512,64],f32>, !torch.int -> !torch.vtensor<[4,12,512,64],f32> | |
%548 = torch.aten.view %547, %132 : !torch.vtensor<[4,12,512,64],f32>, !torch.list<int> -> !torch.vtensor<[48,512,64],f32> | |
%549 = torch.aten.transpose.int %arg251, %int1, %int2 : !torch.vtensor<[48,512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[48,512,512],f32> | |
%550 = torch.aten.bmm %549, %548 : !torch.vtensor<[48,512,512],f32>, !torch.vtensor<[48,512,64],f32> -> !torch.vtensor<[48,512,64],f32> | |
%551 = torch.aten.transpose.int %arg252, %int1, %int2 : !torch.vtensor<[48,512,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[48,64,512],f32> | |
%552 = torch.aten.bmm %548, %551 : !torch.vtensor<[48,512,64],f32>, !torch.vtensor<[48,64,512],f32> -> !torch.vtensor<[48,512,512],f32> | |
%553 = torch.aten.view %550, %138 : !torch.vtensor<[48,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,64],f32> | |
%554 = torch.aten.view %552, %140 : !torch.vtensor<[48,512,512],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,512],f32> | |
%555 = torch.aten.mul.Tensor %554, %arg250 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[4,12,512,512],f32> -> !torch.vtensor<[4,12,512,512],f32> | |
%556 = torch.aten.mul.Tensor %555, %arg249 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[4,12,512,512],f32> -> !torch.vtensor<[4,12,512,512],f32> | |
%557 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> | |
%true_284 = torch.constant.bool true | |
%none_285 = torch.constant.none | |
%558 = torch.aten.sum.dim_IntList %556, %557, %true_284, %none_285 : !torch.vtensor<[4,12,512,512],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,12,512,1],f32> | |
%int0_286 = torch.constant.int 0 | |
%559 = torch.aten.size.int %556, %int0_286 : !torch.vtensor<[4,12,512,512],f32>, !torch.int -> !torch.int | |
%int1_287 = torch.constant.int 1 | |
%560 = torch.aten.size.int %556, %int1_287 : !torch.vtensor<[4,12,512,512],f32>, !torch.int -> !torch.int | |
%int2_288 = torch.constant.int 2 | |
%561 = torch.aten.size.int %556, %int2_288 : !torch.vtensor<[4,12,512,512],f32>, !torch.int -> !torch.int | |
%int3_289 = torch.constant.int 3 | |
%562 = torch.aten.size.int %556, %int3_289 : !torch.vtensor<[4,12,512,512],f32>, !torch.int -> !torch.int | |
%563 = torch.prim.ListConstruct %559, %560, %561, %562 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%564 = torch.aten.broadcast_to %558, %563 : !torch.vtensor<[4,12,512,1],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,512],f32> | |
%565 = torch.aten.mul.Tensor %arg249, %564 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[4,12,512,512],f32> -> !torch.vtensor<[4,12,512,512],f32> | |
%float1.000000e00_290 = torch.constant.float 1.000000e+00 | |
%566 = torch.aten.sub.Tensor %556, %565, %float1.000000e00_290 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[4,12,512,512],f32>, !torch.float -> !torch.vtensor<[4,12,512,512],f32> | |
%567 = torch.aten.div.Tensor %566, %1 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[4,12,512,512],f32> | |
%568 = torch.aten.view %567, %155 : !torch.vtensor<[4,12,512,512],f32>, !torch.list<int> -> !torch.vtensor<[48,512,512],f32> | |
%569 = torch.aten.transpose.int %arg247, %int1, %int2 : !torch.vtensor<[48,512,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[48,64,512],f32> | |
%570 = torch.aten.bmm %569, %568 : !torch.vtensor<[48,64,512],f32>, !torch.vtensor<[48,512,512],f32> -> !torch.vtensor<[48,64,512],f32> | |
%571 = torch.aten.transpose.int %arg248, %int1, %int2 : !torch.vtensor<[48,64,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[48,512,64],f32> | |
%572 = torch.aten.bmm %568, %571 : !torch.vtensor<[48,512,512],f32>, !torch.vtensor<[48,512,64],f32> -> !torch.vtensor<[48,512,64],f32> | |
%573 = torch.aten.view %570, %161 : !torch.vtensor<[48,64,512],f32>, !torch.list<int> -> !torch.vtensor<[4,12,64,512],f32> | |
%574 = torch.aten.view %572, %138 : !torch.vtensor<[48,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,64],f32> | |
%575 = torch.aten.transpose.int %573, %int-1, %int-2 : !torch.vtensor<[4,12,64,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[4,12,512,64],f32> | |
%576 = torch.aten.permute %574, %129 : !torch.vtensor<[4,12,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,12,64],f32> | |
%577 = torch.aten.clone %576, %int0 : !torch.vtensor<[4,512,12,64],f32>, !torch.int -> !torch.vtensor<[4,512,12,64],f32> | |
%578 = torch.aten.view %577, %28 : !torch.vtensor<[4,512,12,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%579 = torch.aten.permute %553, %129 : !torch.vtensor<[4,12,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,12,64],f32> | |
%580 = torch.aten.clone %579, %int0 : !torch.vtensor<[4,512,12,64],f32>, !torch.int -> !torch.vtensor<[4,512,12,64],f32> | |
%581 = torch.aten.view %580, %28 : !torch.vtensor<[4,512,12,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%582 = torch.aten.view %581, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_291 = torch.constant.int 0 | |
%int1_292 = torch.constant.int 1 | |
%583 = torch.aten.transpose.int %arg245, %int0_291, %int1_292 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%584 = torch.aten.mm %582, %583 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_293 = torch.constant.int 0 | |
%int1_294 = torch.constant.int 1 | |
%585 = torch.aten.transpose.int %582, %int0_293, %int1_294 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%586 = torch.aten.mm %585, %arg246 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[768,768],f32> | |
%int0_295 = torch.constant.int 0 | |
%int1_296 = torch.constant.int 1 | |
%587 = torch.aten.transpose.int %586, %int0_295, %int1_296 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%588 = torch.aten.sum.dim_IntList %582, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%589 = torch.aten.view %588, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%590 = torch.aten.view %584, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%591 = torch.aten.add.Tensor %530, %590, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%int0_297 = torch.constant.int 0 | |
%int1_298 = torch.constant.int 1 | |
%592 = torch.aten.transpose.int %587, %int0_297, %int1_298 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%593 = torch.aten.permute %575, %129 : !torch.vtensor<[4,12,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,12,64],f32> | |
%594 = torch.aten.view %593, %28 : !torch.vtensor<[4,512,12,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%595 = torch.aten.clone %594, %int0 : !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%596 = torch.aten.view %595, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_299 = torch.constant.int 0 | |
%int1_300 = torch.constant.int 1 | |
%597 = torch.aten.transpose.int %arg243, %int0_299, %int1_300 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%598 = torch.aten.mm %596, %597 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_301 = torch.constant.int 0 | |
%int1_302 = torch.constant.int 1 | |
%599 = torch.aten.transpose.int %596, %int0_301, %int1_302 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%600 = torch.aten.mm %599, %arg244 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[768,768],f32> | |
%int0_303 = torch.constant.int 0 | |
%int1_304 = torch.constant.int 1 | |
%601 = torch.aten.transpose.int %600, %int0_303, %int1_304 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%602 = torch.aten.sum.dim_IntList %596, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%603 = torch.aten.view %602, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%604 = torch.aten.view %598, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%605 = torch.aten.add.Tensor %591, %604, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%int0_305 = torch.constant.int 0 | |
%int1_306 = torch.constant.int 1 | |
%606 = torch.aten.transpose.int %601, %int0_305, %int1_306 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%607 = torch.aten.view %578, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_307 = torch.constant.int 0 | |
%int1_308 = torch.constant.int 1 | |
%608 = torch.aten.transpose.int %arg241, %int0_307, %int1_308 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%609 = torch.aten.mm %607, %608 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_309 = torch.constant.int 0 | |
%int1_310 = torch.constant.int 1 | |
%610 = torch.aten.transpose.int %607, %int0_309, %int1_310 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%611 = torch.aten.mm %610, %arg242 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[768,768],f32> | |
%int0_311 = torch.constant.int 0 | |
%int1_312 = torch.constant.int 1 | |
%612 = torch.aten.transpose.int %611, %int0_311, %int1_312 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%613 = torch.aten.sum.dim_IntList %607, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%614 = torch.aten.view %613, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%615 = torch.aten.view %609, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%616 = torch.aten.add.Tensor %605, %615, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%int0_313 = torch.constant.int 0 | |
%int1_314 = torch.constant.int 1 | |
%617 = torch.aten.transpose.int %612, %int0_313, %int1_314 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%618 = torch.aten.sub.Tensor %arg240, %result1_46, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%619 = torch.aten.mul.Tensor %618, %result2_47 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32> -> !torch.vtensor<[4,512,768],f32> | |
%620 = torch.aten.mul.Tensor %616, %arg41 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%621 = torch.aten.mul.Tensor %620, %0 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[4,512,768],f32> | |
%622 = torch.aten.sum.dim_IntList %620, %35, %true, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,512,1],f32> | |
%623 = torch.aten.mul.Tensor %620, %619 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%624 = torch.aten.sum.dim_IntList %623, %35, %true, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,512,1],f32> | |
%625 = torch.aten.mul.Tensor %619, %624 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32> -> !torch.vtensor<[4,512,768],f32> | |
%626 = torch.aten.sub.Tensor %621, %622, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%627 = torch.aten.sub.Tensor %626, %625, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%628 = torch.aten.div.Tensor %result2_47, %0 : !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[4,512,1],f32> | |
%629 = torch.aten.mul.Tensor %628, %627 : !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%630 = torch.aten.mul.Tensor %616, %619 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%631 = torch.aten.sum.dim_IntList %630, %45, %false, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[768],f32> | |
%632 = torch.aten.sum.dim_IntList %616, %45, %false, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[768],f32> | |
%633 = torch.aten.mul.Tensor %629, %arg239 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%634 = torch.aten.view %633, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_315 = torch.constant.int 0 | |
%int1_316 = torch.constant.int 1 | |
%635 = torch.aten.transpose.int %arg237, %int0_315, %int1_316 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32> | |
%636 = torch.aten.mm %634, %635 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,3072],f32> -> !torch.vtensor<[2048,3072],f32> | |
%int0_317 = torch.constant.int 0 | |
%int1_318 = torch.constant.int 1 | |
%637 = torch.aten.transpose.int %634, %int0_317, %int1_318 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%638 = torch.aten.mm %637, %arg238 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,3072],f32> -> !torch.vtensor<[768,3072],f32> | |
%int0_319 = torch.constant.int 0 | |
%int1_320 = torch.constant.int 1 | |
%639 = torch.aten.transpose.int %638, %int0_319, %int1_320 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32> | |
%640 = torch.aten.sum.dim_IntList %634, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%641 = torch.aten.view %640, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%642 = torch.aten.view %636, %84 : !torch.vtensor<[2048,3072],f32>, !torch.list<int> -> !torch.vtensor<[4,512,3072],f32> | |
%int0_321 = torch.constant.int 0 | |
%int1_322 = torch.constant.int 1 | |
%643 = torch.aten.transpose.int %639, %int0_321, %int1_322 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32> | |
%644 = torch.aten.gelu_backward %642, %arg236, %str : !torch.vtensor<[4,512,3072],f32>, !torch.vtensor<[4,512,3072],f32>, !torch.str -> !torch.vtensor<[4,512,3072],f32> | |
%645 = torch.aten.view %644, %88 : !torch.vtensor<[4,512,3072],f32>, !torch.list<int> -> !torch.vtensor<[2048,3072],f32> | |
%int0_323 = torch.constant.int 0 | |
%int1_324 = torch.constant.int 1 | |
%646 = torch.aten.transpose.int %arg234, %int0_323, %int1_324 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32> | |
%647 = torch.aten.mm %645, %646 : !torch.vtensor<[2048,3072],f32>, !torch.vtensor<[3072,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_325 = torch.constant.int 0 | |
%int1_326 = torch.constant.int 1 | |
%648 = torch.aten.transpose.int %645, %int0_325, %int1_326 : !torch.vtensor<[2048,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,2048],f32> | |
%649 = torch.aten.mm %648, %arg235 : !torch.vtensor<[3072,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[3072,768],f32> | |
%int0_327 = torch.constant.int 0 | |
%int1_328 = torch.constant.int 1 | |
%650 = torch.aten.transpose.int %649, %int0_327, %int1_328 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32> | |
%651 = torch.aten.sum.dim_IntList %645, %24, %true, %none : !torch.vtensor<[2048,3072],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,3072],f32> | |
%652 = torch.aten.view %651, %96 : !torch.vtensor<[1,3072],f32>, !torch.list<int> -> !torch.vtensor<[3072],f32> | |
%653 = torch.aten.view %647, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%654 = torch.aten.add.Tensor %629, %653, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%int0_329 = torch.constant.int 0 | |
%int1_330 = torch.constant.int 1 | |
%655 = torch.aten.transpose.int %650, %int0_329, %int1_330 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32> | |
%656 = torch.aten.sub.Tensor %arg233, %result1_43, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%657 = torch.aten.mul.Tensor %656, %result2_44 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32> -> !torch.vtensor<[4,512,768],f32> | |
%658 = torch.aten.mul.Tensor %654, %arg39 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%659 = torch.aten.mul.Tensor %658, %0 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[4,512,768],f32> | |
%660 = torch.aten.sum.dim_IntList %658, %35, %true, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,512,1],f32> | |
%661 = torch.aten.mul.Tensor %658, %657 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%662 = torch.aten.sum.dim_IntList %661, %35, %true, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,512,1],f32> | |
%663 = torch.aten.mul.Tensor %657, %662 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32> -> !torch.vtensor<[4,512,768],f32> | |
%664 = torch.aten.sub.Tensor %659, %660, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%665 = torch.aten.sub.Tensor %664, %663, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%666 = torch.aten.div.Tensor %result2_44, %0 : !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[4,512,1],f32> | |
%667 = torch.aten.mul.Tensor %666, %665 : !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%668 = torch.aten.mul.Tensor %654, %657 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%669 = torch.aten.sum.dim_IntList %668, %45, %false, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[768],f32> | |
%670 = torch.aten.sum.dim_IntList %654, %45, %false, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[768],f32> | |
%671 = torch.aten.mul.Tensor %667, %arg232 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%672 = torch.aten.view %671, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_331 = torch.constant.int 0 | |
%int1_332 = torch.constant.int 1 | |
%673 = torch.aten.transpose.int %arg230, %int0_331, %int1_332 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%674 = torch.aten.mm %672, %673 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_333 = torch.constant.int 0 | |
%int1_334 = torch.constant.int 1 | |
%675 = torch.aten.transpose.int %672, %int0_333, %int1_334 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%676 = torch.aten.mm %675, %arg231 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[768,768],f32> | |
%int0_335 = torch.constant.int 0 | |
%int1_336 = torch.constant.int 1 | |
%677 = torch.aten.transpose.int %676, %int0_335, %int1_336 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%678 = torch.aten.sum.dim_IntList %672, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%679 = torch.aten.view %678, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%680 = torch.aten.view %674, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%int0_337 = torch.constant.int 0 | |
%int1_338 = torch.constant.int 1 | |
%681 = torch.aten.transpose.int %677, %int0_337, %int1_338 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%682 = torch.aten.view %680, %127 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,12,64],f32> | |
%683 = torch.aten.permute %682, %129 : !torch.vtensor<[4,512,12,64],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,64],f32> | |
%684 = torch.aten.clone %683, %int0 : !torch.vtensor<[4,12,512,64],f32>, !torch.int -> !torch.vtensor<[4,12,512,64],f32> | |
%685 = torch.aten.view %684, %132 : !torch.vtensor<[4,12,512,64],f32>, !torch.list<int> -> !torch.vtensor<[48,512,64],f32> | |
%686 = torch.aten.transpose.int %arg228, %int1, %int2 : !torch.vtensor<[48,512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[48,512,512],f32> | |
%687 = torch.aten.bmm %686, %685 : !torch.vtensor<[48,512,512],f32>, !torch.vtensor<[48,512,64],f32> -> !torch.vtensor<[48,512,64],f32> | |
%688 = torch.aten.transpose.int %arg229, %int1, %int2 : !torch.vtensor<[48,512,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[48,64,512],f32> | |
%689 = torch.aten.bmm %685, %688 : !torch.vtensor<[48,512,64],f32>, !torch.vtensor<[48,64,512],f32> -> !torch.vtensor<[48,512,512],f32> | |
%690 = torch.aten.view %687, %138 : !torch.vtensor<[48,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,64],f32> | |
%691 = torch.aten.view %689, %140 : !torch.vtensor<[48,512,512],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,512],f32> | |
%692 = torch.aten.mul.Tensor %691, %arg227 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[4,12,512,512],f32> -> !torch.vtensor<[4,12,512,512],f32> | |
%693 = torch.aten.mul.Tensor %692, %arg226 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[4,12,512,512],f32> -> !torch.vtensor<[4,12,512,512],f32> | |
%694 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> | |
%true_339 = torch.constant.bool true | |
%none_340 = torch.constant.none | |
%695 = torch.aten.sum.dim_IntList %693, %694, %true_339, %none_340 : !torch.vtensor<[4,12,512,512],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,12,512,1],f32> | |
%int0_341 = torch.constant.int 0 | |
%696 = torch.aten.size.int %693, %int0_341 : !torch.vtensor<[4,12,512,512],f32>, !torch.int -> !torch.int | |
%int1_342 = torch.constant.int 1 | |
%697 = torch.aten.size.int %693, %int1_342 : !torch.vtensor<[4,12,512,512],f32>, !torch.int -> !torch.int | |
%int2_343 = torch.constant.int 2 | |
%698 = torch.aten.size.int %693, %int2_343 : !torch.vtensor<[4,12,512,512],f32>, !torch.int -> !torch.int | |
%int3_344 = torch.constant.int 3 | |
%699 = torch.aten.size.int %693, %int3_344 : !torch.vtensor<[4,12,512,512],f32>, !torch.int -> !torch.int | |
%700 = torch.prim.ListConstruct %696, %697, %698, %699 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%701 = torch.aten.broadcast_to %695, %700 : !torch.vtensor<[4,12,512,1],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,512],f32> | |
%702 = torch.aten.mul.Tensor %arg226, %701 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[4,12,512,512],f32> -> !torch.vtensor<[4,12,512,512],f32> | |
%float1.000000e00_345 = torch.constant.float 1.000000e+00 | |
%703 = torch.aten.sub.Tensor %693, %702, %float1.000000e00_345 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[4,12,512,512],f32>, !torch.float -> !torch.vtensor<[4,12,512,512],f32> | |
%704 = torch.aten.div.Tensor %703, %1 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[4,12,512,512],f32> | |
%705 = torch.aten.view %704, %155 : !torch.vtensor<[4,12,512,512],f32>, !torch.list<int> -> !torch.vtensor<[48,512,512],f32> | |
%706 = torch.aten.transpose.int %arg224, %int1, %int2 : !torch.vtensor<[48,512,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[48,64,512],f32> | |
%707 = torch.aten.bmm %706, %705 : !torch.vtensor<[48,64,512],f32>, !torch.vtensor<[48,512,512],f32> -> !torch.vtensor<[48,64,512],f32> | |
%708 = torch.aten.transpose.int %arg225, %int1, %int2 : !torch.vtensor<[48,64,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[48,512,64],f32> | |
%709 = torch.aten.bmm %705, %708 : !torch.vtensor<[48,512,512],f32>, !torch.vtensor<[48,512,64],f32> -> !torch.vtensor<[48,512,64],f32> | |
%710 = torch.aten.view %707, %161 : !torch.vtensor<[48,64,512],f32>, !torch.list<int> -> !torch.vtensor<[4,12,64,512],f32> | |
%711 = torch.aten.view %709, %138 : !torch.vtensor<[48,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,64],f32> | |
%712 = torch.aten.transpose.int %710, %int-1, %int-2 : !torch.vtensor<[4,12,64,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[4,12,512,64],f32> | |
%713 = torch.aten.permute %711, %129 : !torch.vtensor<[4,12,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,12,64],f32> | |
%714 = torch.aten.clone %713, %int0 : !torch.vtensor<[4,512,12,64],f32>, !torch.int -> !torch.vtensor<[4,512,12,64],f32> | |
%715 = torch.aten.view %714, %28 : !torch.vtensor<[4,512,12,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%716 = torch.aten.permute %690, %129 : !torch.vtensor<[4,12,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,12,64],f32> | |
%717 = torch.aten.clone %716, %int0 : !torch.vtensor<[4,512,12,64],f32>, !torch.int -> !torch.vtensor<[4,512,12,64],f32> | |
%718 = torch.aten.view %717, %28 : !torch.vtensor<[4,512,12,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%719 = torch.aten.view %718, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_346 = torch.constant.int 0 | |
%int1_347 = torch.constant.int 1 | |
%720 = torch.aten.transpose.int %arg222, %int0_346, %int1_347 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%721 = torch.aten.mm %719, %720 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_348 = torch.constant.int 0 | |
%int1_349 = torch.constant.int 1 | |
%722 = torch.aten.transpose.int %719, %int0_348, %int1_349 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%723 = torch.aten.mm %722, %arg223 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[768,768],f32> | |
%int0_350 = torch.constant.int 0 | |
%int1_351 = torch.constant.int 1 | |
%724 = torch.aten.transpose.int %723, %int0_350, %int1_351 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%725 = torch.aten.sum.dim_IntList %719, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%726 = torch.aten.view %725, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%727 = torch.aten.view %721, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%728 = torch.aten.add.Tensor %667, %727, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%int0_352 = torch.constant.int 0 | |
%int1_353 = torch.constant.int 1 | |
%729 = torch.aten.transpose.int %724, %int0_352, %int1_353 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%730 = torch.aten.permute %712, %129 : !torch.vtensor<[4,12,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,12,64],f32> | |
%731 = torch.aten.view %730, %28 : !torch.vtensor<[4,512,12,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%732 = torch.aten.clone %731, %int0 : !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%733 = torch.aten.view %732, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_354 = torch.constant.int 0 | |
%int1_355 = torch.constant.int 1 | |
%734 = torch.aten.transpose.int %arg220, %int0_354, %int1_355 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%735 = torch.aten.mm %733, %734 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_356 = torch.constant.int 0 | |
%int1_357 = torch.constant.int 1 | |
%736 = torch.aten.transpose.int %733, %int0_356, %int1_357 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%737 = torch.aten.mm %736, %arg221 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[768,768],f32> | |
%int0_358 = torch.constant.int 0 | |
%int1_359 = torch.constant.int 1 | |
%738 = torch.aten.transpose.int %737, %int0_358, %int1_359 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%739 = torch.aten.sum.dim_IntList %733, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%740 = torch.aten.view %739, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%741 = torch.aten.view %735, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%742 = torch.aten.add.Tensor %728, %741, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%int0_360 = torch.constant.int 0 | |
%int1_361 = torch.constant.int 1 | |
%743 = torch.aten.transpose.int %738, %int0_360, %int1_361 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%744 = torch.aten.view %715, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_362 = torch.constant.int 0 | |
%int1_363 = torch.constant.int 1 | |
%745 = torch.aten.transpose.int %arg218, %int0_362, %int1_363 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%746 = torch.aten.mm %744, %745 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_364 = torch.constant.int 0 | |
%int1_365 = torch.constant.int 1 | |
%747 = torch.aten.transpose.int %744, %int0_364, %int1_365 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%748 = torch.aten.mm %747, %arg219 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[768,768],f32> | |
%int0_366 = torch.constant.int 0 | |
%int1_367 = torch.constant.int 1 | |
%749 = torch.aten.transpose.int %748, %int0_366, %int1_367 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%750 = torch.aten.sum.dim_IntList %744, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%751 = torch.aten.view %750, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%752 = torch.aten.view %746, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%753 = torch.aten.add.Tensor %742, %752, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%int0_368 = torch.constant.int 0 | |
%int1_369 = torch.constant.int 1 | |
%754 = torch.aten.transpose.int %749, %int0_368, %int1_369 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%755 = torch.aten.sub.Tensor %arg217, %result1_40, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%756 = torch.aten.mul.Tensor %755, %result2_41 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32> -> !torch.vtensor<[4,512,768],f32> | |
%757 = torch.aten.mul.Tensor %753, %arg37 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%758 = torch.aten.mul.Tensor %757, %0 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[4,512,768],f32> | |
%759 = torch.aten.sum.dim_IntList %757, %35, %true, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,512,1],f32> | |
%760 = torch.aten.mul.Tensor %757, %756 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%761 = torch.aten.sum.dim_IntList %760, %35, %true, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,512,1],f32> | |
%762 = torch.aten.mul.Tensor %756, %761 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32> -> !torch.vtensor<[4,512,768],f32> | |
%763 = torch.aten.sub.Tensor %758, %759, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%764 = torch.aten.sub.Tensor %763, %762, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%765 = torch.aten.div.Tensor %result2_41, %0 : !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[4,512,1],f32> | |
%766 = torch.aten.mul.Tensor %765, %764 : !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%767 = torch.aten.mul.Tensor %753, %756 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%768 = torch.aten.sum.dim_IntList %767, %45, %false, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[768],f32> | |
%769 = torch.aten.sum.dim_IntList %753, %45, %false, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[768],f32> | |
%770 = torch.aten.mul.Tensor %766, %arg216 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%771 = torch.aten.view %770, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_370 = torch.constant.int 0 | |
%int1_371 = torch.constant.int 1 | |
%772 = torch.aten.transpose.int %arg214, %int0_370, %int1_371 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32> | |
%773 = torch.aten.mm %771, %772 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,3072],f32> -> !torch.vtensor<[2048,3072],f32> | |
%int0_372 = torch.constant.int 0 | |
%int1_373 = torch.constant.int 1 | |
%774 = torch.aten.transpose.int %771, %int0_372, %int1_373 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%775 = torch.aten.mm %774, %arg215 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,3072],f32> -> !torch.vtensor<[768,3072],f32> | |
%int0_374 = torch.constant.int 0 | |
%int1_375 = torch.constant.int 1 | |
%776 = torch.aten.transpose.int %775, %int0_374, %int1_375 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32> | |
%777 = torch.aten.sum.dim_IntList %771, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%778 = torch.aten.view %777, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%779 = torch.aten.view %773, %84 : !torch.vtensor<[2048,3072],f32>, !torch.list<int> -> !torch.vtensor<[4,512,3072],f32> | |
%int0_376 = torch.constant.int 0 | |
%int1_377 = torch.constant.int 1 | |
%780 = torch.aten.transpose.int %776, %int0_376, %int1_377 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32> | |
%781 = torch.aten.gelu_backward %779, %arg213, %str : !torch.vtensor<[4,512,3072],f32>, !torch.vtensor<[4,512,3072],f32>, !torch.str -> !torch.vtensor<[4,512,3072],f32> | |
%782 = torch.aten.view %781, %88 : !torch.vtensor<[4,512,3072],f32>, !torch.list<int> -> !torch.vtensor<[2048,3072],f32> | |
%int0_378 = torch.constant.int 0 | |
%int1_379 = torch.constant.int 1 | |
%783 = torch.aten.transpose.int %arg211, %int0_378, %int1_379 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32> | |
%784 = torch.aten.mm %782, %783 : !torch.vtensor<[2048,3072],f32>, !torch.vtensor<[3072,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_380 = torch.constant.int 0 | |
%int1_381 = torch.constant.int 1 | |
%785 = torch.aten.transpose.int %782, %int0_380, %int1_381 : !torch.vtensor<[2048,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,2048],f32> | |
%786 = torch.aten.mm %785, %arg212 : !torch.vtensor<[3072,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[3072,768],f32> | |
%int0_382 = torch.constant.int 0 | |
%int1_383 = torch.constant.int 1 | |
%787 = torch.aten.transpose.int %786, %int0_382, %int1_383 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32> | |
%788 = torch.aten.sum.dim_IntList %782, %24, %true, %none : !torch.vtensor<[2048,3072],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,3072],f32> | |
%789 = torch.aten.view %788, %96 : !torch.vtensor<[1,3072],f32>, !torch.list<int> -> !torch.vtensor<[3072],f32> | |
%790 = torch.aten.view %784, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%791 = torch.aten.add.Tensor %766, %790, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%int0_384 = torch.constant.int 0 | |
%int1_385 = torch.constant.int 1 | |
%792 = torch.aten.transpose.int %787, %int0_384, %int1_385 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32> | |
%793 = torch.aten.sub.Tensor %arg210, %result1_37, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%794 = torch.aten.mul.Tensor %793, %result2_38 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32> -> !torch.vtensor<[4,512,768],f32> | |
%795 = torch.aten.mul.Tensor %791, %arg35 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%796 = torch.aten.mul.Tensor %795, %0 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[4,512,768],f32> | |
%797 = torch.aten.sum.dim_IntList %795, %35, %true, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,512,1],f32> | |
%798 = torch.aten.mul.Tensor %795, %794 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%799 = torch.aten.sum.dim_IntList %798, %35, %true, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,512,1],f32> | |
%800 = torch.aten.mul.Tensor %794, %799 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32> -> !torch.vtensor<[4,512,768],f32> | |
%801 = torch.aten.sub.Tensor %796, %797, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%802 = torch.aten.sub.Tensor %801, %800, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%803 = torch.aten.div.Tensor %result2_38, %0 : !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[4,512,1],f32> | |
%804 = torch.aten.mul.Tensor %803, %802 : !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%805 = torch.aten.mul.Tensor %791, %794 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%806 = torch.aten.sum.dim_IntList %805, %45, %false, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[768],f32> | |
%807 = torch.aten.sum.dim_IntList %791, %45, %false, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[768],f32> | |
%808 = torch.aten.mul.Tensor %804, %arg209 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%809 = torch.aten.view %808, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_386 = torch.constant.int 0 | |
%int1_387 = torch.constant.int 1 | |
%810 = torch.aten.transpose.int %arg207, %int0_386, %int1_387 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%811 = torch.aten.mm %809, %810 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_388 = torch.constant.int 0 | |
%int1_389 = torch.constant.int 1 | |
%812 = torch.aten.transpose.int %809, %int0_388, %int1_389 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%813 = torch.aten.mm %812, %arg208 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[768,768],f32> | |
%int0_390 = torch.constant.int 0 | |
%int1_391 = torch.constant.int 1 | |
%814 = torch.aten.transpose.int %813, %int0_390, %int1_391 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%815 = torch.aten.sum.dim_IntList %809, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%816 = torch.aten.view %815, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%817 = torch.aten.view %811, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%int0_392 = torch.constant.int 0 | |
%int1_393 = torch.constant.int 1 | |
%818 = torch.aten.transpose.int %814, %int0_392, %int1_393 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%819 = torch.aten.view %817, %127 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,12,64],f32> | |
%820 = torch.aten.permute %819, %129 : !torch.vtensor<[4,512,12,64],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,64],f32> | |
%821 = torch.aten.clone %820, %int0 : !torch.vtensor<[4,12,512,64],f32>, !torch.int -> !torch.vtensor<[4,12,512,64],f32> | |
%822 = torch.aten.view %821, %132 : !torch.vtensor<[4,12,512,64],f32>, !torch.list<int> -> !torch.vtensor<[48,512,64],f32> | |
%823 = torch.aten.transpose.int %arg205, %int1, %int2 : !torch.vtensor<[48,512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[48,512,512],f32> | |
%824 = torch.aten.bmm %823, %822 : !torch.vtensor<[48,512,512],f32>, !torch.vtensor<[48,512,64],f32> -> !torch.vtensor<[48,512,64],f32> | |
%825 = torch.aten.transpose.int %arg206, %int1, %int2 : !torch.vtensor<[48,512,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[48,64,512],f32> | |
%826 = torch.aten.bmm %822, %825 : !torch.vtensor<[48,512,64],f32>, !torch.vtensor<[48,64,512],f32> -> !torch.vtensor<[48,512,512],f32> | |
%827 = torch.aten.view %824, %138 : !torch.vtensor<[48,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,64],f32> | |
%828 = torch.aten.view %826, %140 : !torch.vtensor<[48,512,512],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,512],f32> | |
%829 = torch.aten.mul.Tensor %828, %arg204 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[4,12,512,512],f32> -> !torch.vtensor<[4,12,512,512],f32> | |
%830 = torch.aten.mul.Tensor %829, %arg203 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[4,12,512,512],f32> -> !torch.vtensor<[4,12,512,512],f32> | |
%831 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> | |
%true_394 = torch.constant.bool true | |
%none_395 = torch.constant.none | |
%832 = torch.aten.sum.dim_IntList %830, %831, %true_394, %none_395 : !torch.vtensor<[4,12,512,512],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,12,512,1],f32> | |
%int0_396 = torch.constant.int 0 | |
%833 = torch.aten.size.int %830, %int0_396 : !torch.vtensor<[4,12,512,512],f32>, !torch.int -> !torch.int | |
%int1_397 = torch.constant.int 1 | |
%834 = torch.aten.size.int %830, %int1_397 : !torch.vtensor<[4,12,512,512],f32>, !torch.int -> !torch.int | |
%int2_398 = torch.constant.int 2 | |
%835 = torch.aten.size.int %830, %int2_398 : !torch.vtensor<[4,12,512,512],f32>, !torch.int -> !torch.int | |
%int3_399 = torch.constant.int 3 | |
%836 = torch.aten.size.int %830, %int3_399 : !torch.vtensor<[4,12,512,512],f32>, !torch.int -> !torch.int | |
%837 = torch.prim.ListConstruct %833, %834, %835, %836 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%838 = torch.aten.broadcast_to %832, %837 : !torch.vtensor<[4,12,512,1],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,512],f32> | |
%839 = torch.aten.mul.Tensor %arg203, %838 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[4,12,512,512],f32> -> !torch.vtensor<[4,12,512,512],f32> | |
%float1.000000e00_400 = torch.constant.float 1.000000e+00 | |
%840 = torch.aten.sub.Tensor %830, %839, %float1.000000e00_400 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[4,12,512,512],f32>, !torch.float -> !torch.vtensor<[4,12,512,512],f32> | |
%841 = torch.aten.div.Tensor %840, %1 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[4,12,512,512],f32> | |
%842 = torch.aten.view %841, %155 : !torch.vtensor<[4,12,512,512],f32>, !torch.list<int> -> !torch.vtensor<[48,512,512],f32> | |
%843 = torch.aten.transpose.int %arg201, %int1, %int2 : !torch.vtensor<[48,512,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[48,64,512],f32> | |
%844 = torch.aten.bmm %843, %842 : !torch.vtensor<[48,64,512],f32>, !torch.vtensor<[48,512,512],f32> -> !torch.vtensor<[48,64,512],f32> | |
%845 = torch.aten.transpose.int %arg202, %int1, %int2 : !torch.vtensor<[48,64,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[48,512,64],f32> | |
%846 = torch.aten.bmm %842, %845 : !torch.vtensor<[48,512,512],f32>, !torch.vtensor<[48,512,64],f32> -> !torch.vtensor<[48,512,64],f32> | |
%847 = torch.aten.view %844, %161 : !torch.vtensor<[48,64,512],f32>, !torch.list<int> -> !torch.vtensor<[4,12,64,512],f32> | |
%848 = torch.aten.view %846, %138 : !torch.vtensor<[48,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,64],f32> | |
%849 = torch.aten.transpose.int %847, %int-1, %int-2 : !torch.vtensor<[4,12,64,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[4,12,512,64],f32> | |
%850 = torch.aten.permute %848, %129 : !torch.vtensor<[4,12,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,12,64],f32> | |
%851 = torch.aten.clone %850, %int0 : !torch.vtensor<[4,512,12,64],f32>, !torch.int -> !torch.vtensor<[4,512,12,64],f32> | |
%852 = torch.aten.view %851, %28 : !torch.vtensor<[4,512,12,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%853 = torch.aten.permute %827, %129 : !torch.vtensor<[4,12,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,12,64],f32> | |
%854 = torch.aten.clone %853, %int0 : !torch.vtensor<[4,512,12,64],f32>, !torch.int -> !torch.vtensor<[4,512,12,64],f32> | |
%855 = torch.aten.view %854, %28 : !torch.vtensor<[4,512,12,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%856 = torch.aten.view %855, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_401 = torch.constant.int 0 | |
%int1_402 = torch.constant.int 1 | |
%857 = torch.aten.transpose.int %arg199, %int0_401, %int1_402 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%858 = torch.aten.mm %856, %857 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_403 = torch.constant.int 0 | |
%int1_404 = torch.constant.int 1 | |
%859 = torch.aten.transpose.int %856, %int0_403, %int1_404 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%860 = torch.aten.mm %859, %arg200 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[768,768],f32> | |
%int0_405 = torch.constant.int 0 | |
%int1_406 = torch.constant.int 1 | |
%861 = torch.aten.transpose.int %860, %int0_405, %int1_406 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%862 = torch.aten.sum.dim_IntList %856, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%863 = torch.aten.view %862, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%864 = torch.aten.view %858, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%865 = torch.aten.add.Tensor %804, %864, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%int0_407 = torch.constant.int 0 | |
%int1_408 = torch.constant.int 1 | |
%866 = torch.aten.transpose.int %861, %int0_407, %int1_408 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%867 = torch.aten.permute %849, %129 : !torch.vtensor<[4,12,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,12,64],f32> | |
%868 = torch.aten.view %867, %28 : !torch.vtensor<[4,512,12,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%869 = torch.aten.clone %868, %int0 : !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%870 = torch.aten.view %869, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_409 = torch.constant.int 0 | |
%int1_410 = torch.constant.int 1 | |
%871 = torch.aten.transpose.int %arg197, %int0_409, %int1_410 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%872 = torch.aten.mm %870, %871 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_411 = torch.constant.int 0 | |
%int1_412 = torch.constant.int 1 | |
%873 = torch.aten.transpose.int %870, %int0_411, %int1_412 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%874 = torch.aten.mm %873, %arg198 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[768,768],f32> | |
%int0_413 = torch.constant.int 0 | |
%int1_414 = torch.constant.int 1 | |
%875 = torch.aten.transpose.int %874, %int0_413, %int1_414 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%876 = torch.aten.sum.dim_IntList %870, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%877 = torch.aten.view %876, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%878 = torch.aten.view %872, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%879 = torch.aten.add.Tensor %865, %878, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%int0_415 = torch.constant.int 0 | |
%int1_416 = torch.constant.int 1 | |
%880 = torch.aten.transpose.int %875, %int0_415, %int1_416 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%881 = torch.aten.view %852, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_417 = torch.constant.int 0 | |
%int1_418 = torch.constant.int 1 | |
%882 = torch.aten.transpose.int %arg195, %int0_417, %int1_418 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%883 = torch.aten.mm %881, %882 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_419 = torch.constant.int 0 | |
%int1_420 = torch.constant.int 1 | |
%884 = torch.aten.transpose.int %881, %int0_419, %int1_420 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%885 = torch.aten.mm %884, %arg196 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[768,768],f32> | |
%int0_421 = torch.constant.int 0 | |
%int1_422 = torch.constant.int 1 | |
%886 = torch.aten.transpose.int %885, %int0_421, %int1_422 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%887 = torch.aten.sum.dim_IntList %881, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%888 = torch.aten.view %887, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%889 = torch.aten.view %883, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%890 = torch.aten.add.Tensor %879, %889, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%int0_423 = torch.constant.int 0 | |
%int1_424 = torch.constant.int 1 | |
%891 = torch.aten.transpose.int %886, %int0_423, %int1_424 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%892 = torch.aten.sub.Tensor %arg194, %result1_34, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%893 = torch.aten.mul.Tensor %892, %result2_35 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32> -> !torch.vtensor<[4,512,768],f32> | |
%894 = torch.aten.mul.Tensor %890, %arg33 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%895 = torch.aten.mul.Tensor %894, %0 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[4,512,768],f32> | |
%896 = torch.aten.sum.dim_IntList %894, %35, %true, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,512,1],f32> | |
%897 = torch.aten.mul.Tensor %894, %893 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%898 = torch.aten.sum.dim_IntList %897, %35, %true, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,512,1],f32> | |
%899 = torch.aten.mul.Tensor %893, %898 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32> -> !torch.vtensor<[4,512,768],f32> | |
%900 = torch.aten.sub.Tensor %895, %896, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%901 = torch.aten.sub.Tensor %900, %899, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%902 = torch.aten.div.Tensor %result2_35, %0 : !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[4,512,1],f32> | |
%903 = torch.aten.mul.Tensor %902, %901 : !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%904 = torch.aten.mul.Tensor %890, %893 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%905 = torch.aten.sum.dim_IntList %904, %45, %false, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[768],f32> | |
%906 = torch.aten.sum.dim_IntList %890, %45, %false, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[768],f32> | |
%907 = torch.aten.mul.Tensor %903, %arg193 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%908 = torch.aten.view %907, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_425 = torch.constant.int 0 | |
%int1_426 = torch.constant.int 1 | |
%909 = torch.aten.transpose.int %arg191, %int0_425, %int1_426 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32> | |
%910 = torch.aten.mm %908, %909 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,3072],f32> -> !torch.vtensor<[2048,3072],f32> | |
%int0_427 = torch.constant.int 0 | |
%int1_428 = torch.constant.int 1 | |
%911 = torch.aten.transpose.int %908, %int0_427, %int1_428 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%912 = torch.aten.mm %911, %arg192 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,3072],f32> -> !torch.vtensor<[768,3072],f32> | |
%int0_429 = torch.constant.int 0 | |
%int1_430 = torch.constant.int 1 | |
%913 = torch.aten.transpose.int %912, %int0_429, %int1_430 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32> | |
%914 = torch.aten.sum.dim_IntList %908, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%915 = torch.aten.view %914, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%916 = torch.aten.view %910, %84 : !torch.vtensor<[2048,3072],f32>, !torch.list<int> -> !torch.vtensor<[4,512,3072],f32> | |
%int0_431 = torch.constant.int 0 | |
%int1_432 = torch.constant.int 1 | |
%917 = torch.aten.transpose.int %913, %int0_431, %int1_432 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32> | |
%918 = torch.aten.gelu_backward %916, %arg190, %str : !torch.vtensor<[4,512,3072],f32>, !torch.vtensor<[4,512,3072],f32>, !torch.str -> !torch.vtensor<[4,512,3072],f32> | |
%919 = torch.aten.view %918, %88 : !torch.vtensor<[4,512,3072],f32>, !torch.list<int> -> !torch.vtensor<[2048,3072],f32> | |
%int0_433 = torch.constant.int 0 | |
%int1_434 = torch.constant.int 1 | |
%920 = torch.aten.transpose.int %arg188, %int0_433, %int1_434 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32> | |
%921 = torch.aten.mm %919, %920 : !torch.vtensor<[2048,3072],f32>, !torch.vtensor<[3072,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_435 = torch.constant.int 0 | |
%int1_436 = torch.constant.int 1 | |
%922 = torch.aten.transpose.int %919, %int0_435, %int1_436 : !torch.vtensor<[2048,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,2048],f32> | |
%923 = torch.aten.mm %922, %arg189 : !torch.vtensor<[3072,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[3072,768],f32> | |
%int0_437 = torch.constant.int 0 | |
%int1_438 = torch.constant.int 1 | |
%924 = torch.aten.transpose.int %923, %int0_437, %int1_438 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32> | |
%925 = torch.aten.sum.dim_IntList %919, %24, %true, %none : !torch.vtensor<[2048,3072],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,3072],f32> | |
%926 = torch.aten.view %925, %96 : !torch.vtensor<[1,3072],f32>, !torch.list<int> -> !torch.vtensor<[3072],f32> | |
%927 = torch.aten.view %921, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%928 = torch.aten.add.Tensor %903, %927, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%int0_439 = torch.constant.int 0 | |
%int1_440 = torch.constant.int 1 | |
%929 = torch.aten.transpose.int %924, %int0_439, %int1_440 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32> | |
%930 = torch.aten.sub.Tensor %arg187, %result1_31, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%931 = torch.aten.mul.Tensor %930, %result2_32 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32> -> !torch.vtensor<[4,512,768],f32> | |
%932 = torch.aten.mul.Tensor %928, %arg31 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%933 = torch.aten.mul.Tensor %932, %0 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[4,512,768],f32> | |
%934 = torch.aten.sum.dim_IntList %932, %35, %true, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,512,1],f32> | |
%935 = torch.aten.mul.Tensor %932, %931 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%936 = torch.aten.sum.dim_IntList %935, %35, %true, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,512,1],f32> | |
%937 = torch.aten.mul.Tensor %931, %936 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32> -> !torch.vtensor<[4,512,768],f32> | |
%938 = torch.aten.sub.Tensor %933, %934, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%939 = torch.aten.sub.Tensor %938, %937, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%940 = torch.aten.div.Tensor %result2_32, %0 : !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[4,512,1],f32> | |
%941 = torch.aten.mul.Tensor %940, %939 : !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%942 = torch.aten.mul.Tensor %928, %931 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%943 = torch.aten.sum.dim_IntList %942, %45, %false, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[768],f32> | |
%944 = torch.aten.sum.dim_IntList %928, %45, %false, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[768],f32> | |
%945 = torch.aten.mul.Tensor %941, %arg186 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%946 = torch.aten.view %945, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_441 = torch.constant.int 0 | |
%int1_442 = torch.constant.int 1 | |
%947 = torch.aten.transpose.int %arg184, %int0_441, %int1_442 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%948 = torch.aten.mm %946, %947 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_443 = torch.constant.int 0 | |
%int1_444 = torch.constant.int 1 | |
%949 = torch.aten.transpose.int %946, %int0_443, %int1_444 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%950 = torch.aten.mm %949, %arg185 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[768,768],f32> | |
%int0_445 = torch.constant.int 0 | |
%int1_446 = torch.constant.int 1 | |
%951 = torch.aten.transpose.int %950, %int0_445, %int1_446 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%952 = torch.aten.sum.dim_IntList %946, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%953 = torch.aten.view %952, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%954 = torch.aten.view %948, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%int0_447 = torch.constant.int 0 | |
%int1_448 = torch.constant.int 1 | |
%955 = torch.aten.transpose.int %951, %int0_447, %int1_448 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%956 = torch.aten.view %954, %127 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,12,64],f32> | |
%957 = torch.aten.permute %956, %129 : !torch.vtensor<[4,512,12,64],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,64],f32> | |
%958 = torch.aten.clone %957, %int0 : !torch.vtensor<[4,12,512,64],f32>, !torch.int -> !torch.vtensor<[4,12,512,64],f32> | |
%959 = torch.aten.view %958, %132 : !torch.vtensor<[4,12,512,64],f32>, !torch.list<int> -> !torch.vtensor<[48,512,64],f32> | |
%960 = torch.aten.transpose.int %arg182, %int1, %int2 : !torch.vtensor<[48,512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[48,512,512],f32> | |
%961 = torch.aten.bmm %960, %959 : !torch.vtensor<[48,512,512],f32>, !torch.vtensor<[48,512,64],f32> -> !torch.vtensor<[48,512,64],f32> | |
%962 = torch.aten.transpose.int %arg183, %int1, %int2 : !torch.vtensor<[48,512,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[48,64,512],f32> | |
%963 = torch.aten.bmm %959, %962 : !torch.vtensor<[48,512,64],f32>, !torch.vtensor<[48,64,512],f32> -> !torch.vtensor<[48,512,512],f32> | |
%964 = torch.aten.view %961, %138 : !torch.vtensor<[48,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,64],f32> | |
%965 = torch.aten.view %963, %140 : !torch.vtensor<[48,512,512],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,512],f32> | |
%966 = torch.aten.mul.Tensor %965, %arg181 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[4,12,512,512],f32> -> !torch.vtensor<[4,12,512,512],f32> | |
%967 = torch.aten.mul.Tensor %966, %arg180 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[4,12,512,512],f32> -> !torch.vtensor<[4,12,512,512],f32> | |
%968 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> | |
%true_449 = torch.constant.bool true | |
%none_450 = torch.constant.none | |
%969 = torch.aten.sum.dim_IntList %967, %968, %true_449, %none_450 : !torch.vtensor<[4,12,512,512],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,12,512,1],f32> | |
%int0_451 = torch.constant.int 0 | |
%970 = torch.aten.size.int %967, %int0_451 : !torch.vtensor<[4,12,512,512],f32>, !torch.int -> !torch.int | |
%int1_452 = torch.constant.int 1 | |
%971 = torch.aten.size.int %967, %int1_452 : !torch.vtensor<[4,12,512,512],f32>, !torch.int -> !torch.int | |
%int2_453 = torch.constant.int 2 | |
%972 = torch.aten.size.int %967, %int2_453 : !torch.vtensor<[4,12,512,512],f32>, !torch.int -> !torch.int | |
%int3_454 = torch.constant.int 3 | |
%973 = torch.aten.size.int %967, %int3_454 : !torch.vtensor<[4,12,512,512],f32>, !torch.int -> !torch.int | |
%974 = torch.prim.ListConstruct %970, %971, %972, %973 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%975 = torch.aten.broadcast_to %969, %974 : !torch.vtensor<[4,12,512,1],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,512],f32> | |
%976 = torch.aten.mul.Tensor %arg180, %975 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[4,12,512,512],f32> -> !torch.vtensor<[4,12,512,512],f32> | |
%float1.000000e00_455 = torch.constant.float 1.000000e+00 | |
%977 = torch.aten.sub.Tensor %967, %976, %float1.000000e00_455 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[4,12,512,512],f32>, !torch.float -> !torch.vtensor<[4,12,512,512],f32> | |
%978 = torch.aten.div.Tensor %977, %1 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[4,12,512,512],f32> | |
%979 = torch.aten.view %978, %155 : !torch.vtensor<[4,12,512,512],f32>, !torch.list<int> -> !torch.vtensor<[48,512,512],f32> | |
%980 = torch.aten.transpose.int %arg178, %int1, %int2 : !torch.vtensor<[48,512,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[48,64,512],f32> | |
%981 = torch.aten.bmm %980, %979 : !torch.vtensor<[48,64,512],f32>, !torch.vtensor<[48,512,512],f32> -> !torch.vtensor<[48,64,512],f32> | |
%982 = torch.aten.transpose.int %arg179, %int1, %int2 : !torch.vtensor<[48,64,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[48,512,64],f32> | |
%983 = torch.aten.bmm %979, %982 : !torch.vtensor<[48,512,512],f32>, !torch.vtensor<[48,512,64],f32> -> !torch.vtensor<[48,512,64],f32> | |
%984 = torch.aten.view %981, %161 : !torch.vtensor<[48,64,512],f32>, !torch.list<int> -> !torch.vtensor<[4,12,64,512],f32> | |
%985 = torch.aten.view %983, %138 : !torch.vtensor<[48,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,64],f32> | |
%986 = torch.aten.transpose.int %984, %int-1, %int-2 : !torch.vtensor<[4,12,64,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[4,12,512,64],f32> | |
%987 = torch.aten.permute %985, %129 : !torch.vtensor<[4,12,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,12,64],f32> | |
%988 = torch.aten.clone %987, %int0 : !torch.vtensor<[4,512,12,64],f32>, !torch.int -> !torch.vtensor<[4,512,12,64],f32> | |
%989 = torch.aten.view %988, %28 : !torch.vtensor<[4,512,12,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%990 = torch.aten.permute %964, %129 : !torch.vtensor<[4,12,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,12,64],f32> | |
%991 = torch.aten.clone %990, %int0 : !torch.vtensor<[4,512,12,64],f32>, !torch.int -> !torch.vtensor<[4,512,12,64],f32> | |
%992 = torch.aten.view %991, %28 : !torch.vtensor<[4,512,12,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%993 = torch.aten.view %992, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_456 = torch.constant.int 0 | |
%int1_457 = torch.constant.int 1 | |
%994 = torch.aten.transpose.int %arg176, %int0_456, %int1_457 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%995 = torch.aten.mm %993, %994 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_458 = torch.constant.int 0 | |
%int1_459 = torch.constant.int 1 | |
%996 = torch.aten.transpose.int %993, %int0_458, %int1_459 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%997 = torch.aten.mm %996, %arg177 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[768,768],f32> | |
%int0_460 = torch.constant.int 0 | |
%int1_461 = torch.constant.int 1 | |
%998 = torch.aten.transpose.int %997, %int0_460, %int1_461 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%999 = torch.aten.sum.dim_IntList %993, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%1000 = torch.aten.view %999, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%1001 = torch.aten.view %995, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%1002 = torch.aten.add.Tensor %941, %1001, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%int0_462 = torch.constant.int 0 | |
%int1_463 = torch.constant.int 1 | |
%1003 = torch.aten.transpose.int %998, %int0_462, %int1_463 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1004 = torch.aten.permute %986, %129 : !torch.vtensor<[4,12,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,12,64],f32> | |
%1005 = torch.aten.view %1004, %28 : !torch.vtensor<[4,512,12,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%1006 = torch.aten.clone %1005, %int0 : !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%1007 = torch.aten.view %1006, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_464 = torch.constant.int 0 | |
%int1_465 = torch.constant.int 1 | |
%1008 = torch.aten.transpose.int %arg174, %int0_464, %int1_465 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1009 = torch.aten.mm %1007, %1008 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_466 = torch.constant.int 0 | |
%int1_467 = torch.constant.int 1 | |
%1010 = torch.aten.transpose.int %1007, %int0_466, %int1_467 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%1011 = torch.aten.mm %1010, %arg175 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[768,768],f32> | |
%int0_468 = torch.constant.int 0 | |
%int1_469 = torch.constant.int 1 | |
%1012 = torch.aten.transpose.int %1011, %int0_468, %int1_469 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1013 = torch.aten.sum.dim_IntList %1007, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%1014 = torch.aten.view %1013, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%1015 = torch.aten.view %1009, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%1016 = torch.aten.add.Tensor %1002, %1015, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%int0_470 = torch.constant.int 0 | |
%int1_471 = torch.constant.int 1 | |
%1017 = torch.aten.transpose.int %1012, %int0_470, %int1_471 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1018 = torch.aten.view %989, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_472 = torch.constant.int 0 | |
%int1_473 = torch.constant.int 1 | |
%1019 = torch.aten.transpose.int %arg172, %int0_472, %int1_473 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1020 = torch.aten.mm %1018, %1019 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_474 = torch.constant.int 0 | |
%int1_475 = torch.constant.int 1 | |
%1021 = torch.aten.transpose.int %1018, %int0_474, %int1_475 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%1022 = torch.aten.mm %1021, %arg173 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[768,768],f32> | |
%int0_476 = torch.constant.int 0 | |
%int1_477 = torch.constant.int 1 | |
%1023 = torch.aten.transpose.int %1022, %int0_476, %int1_477 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1024 = torch.aten.sum.dim_IntList %1018, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%1025 = torch.aten.view %1024, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%1026 = torch.aten.view %1020, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%1027 = torch.aten.add.Tensor %1016, %1026, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%int0_478 = torch.constant.int 0 | |
%int1_479 = torch.constant.int 1 | |
%1028 = torch.aten.transpose.int %1023, %int0_478, %int1_479 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1029 = torch.aten.sub.Tensor %arg171, %result1_28, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%1030 = torch.aten.mul.Tensor %1029, %result2_29 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1031 = torch.aten.mul.Tensor %1027, %arg29 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1032 = torch.aten.mul.Tensor %1031, %0 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[4,512,768],f32> | |
%1033 = torch.aten.sum.dim_IntList %1031, %35, %true, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,512,1],f32> | |
%1034 = torch.aten.mul.Tensor %1031, %1030 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1035 = torch.aten.sum.dim_IntList %1034, %35, %true, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,512,1],f32> | |
%1036 = torch.aten.mul.Tensor %1030, %1035 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1037 = torch.aten.sub.Tensor %1032, %1033, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%1038 = torch.aten.sub.Tensor %1037, %1036, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%1039 = torch.aten.div.Tensor %result2_29, %0 : !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[4,512,1],f32> | |
%1040 = torch.aten.mul.Tensor %1039, %1038 : !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1041 = torch.aten.mul.Tensor %1027, %1030 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1042 = torch.aten.sum.dim_IntList %1041, %45, %false, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[768],f32> | |
%1043 = torch.aten.sum.dim_IntList %1027, %45, %false, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[768],f32> | |
%1044 = torch.aten.mul.Tensor %1040, %arg170 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1045 = torch.aten.view %1044, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_480 = torch.constant.int 0 | |
%int1_481 = torch.constant.int 1 | |
%1046 = torch.aten.transpose.int %arg168, %int0_480, %int1_481 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32> | |
%1047 = torch.aten.mm %1045, %1046 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,3072],f32> -> !torch.vtensor<[2048,3072],f32> | |
%int0_482 = torch.constant.int 0 | |
%int1_483 = torch.constant.int 1 | |
%1048 = torch.aten.transpose.int %1045, %int0_482, %int1_483 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%1049 = torch.aten.mm %1048, %arg169 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,3072],f32> -> !torch.vtensor<[768,3072],f32> | |
%int0_484 = torch.constant.int 0 | |
%int1_485 = torch.constant.int 1 | |
%1050 = torch.aten.transpose.int %1049, %int0_484, %int1_485 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32> | |
%1051 = torch.aten.sum.dim_IntList %1045, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%1052 = torch.aten.view %1051, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%1053 = torch.aten.view %1047, %84 : !torch.vtensor<[2048,3072],f32>, !torch.list<int> -> !torch.vtensor<[4,512,3072],f32> | |
%int0_486 = torch.constant.int 0 | |
%int1_487 = torch.constant.int 1 | |
%1054 = torch.aten.transpose.int %1050, %int0_486, %int1_487 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32> | |
%1055 = torch.aten.gelu_backward %1053, %arg167, %str : !torch.vtensor<[4,512,3072],f32>, !torch.vtensor<[4,512,3072],f32>, !torch.str -> !torch.vtensor<[4,512,3072],f32> | |
%1056 = torch.aten.view %1055, %88 : !torch.vtensor<[4,512,3072],f32>, !torch.list<int> -> !torch.vtensor<[2048,3072],f32> | |
%int0_488 = torch.constant.int 0 | |
%int1_489 = torch.constant.int 1 | |
%1057 = torch.aten.transpose.int %arg165, %int0_488, %int1_489 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32> | |
%1058 = torch.aten.mm %1056, %1057 : !torch.vtensor<[2048,3072],f32>, !torch.vtensor<[3072,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_490 = torch.constant.int 0 | |
%int1_491 = torch.constant.int 1 | |
%1059 = torch.aten.transpose.int %1056, %int0_490, %int1_491 : !torch.vtensor<[2048,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,2048],f32> | |
%1060 = torch.aten.mm %1059, %arg166 : !torch.vtensor<[3072,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[3072,768],f32> | |
%int0_492 = torch.constant.int 0 | |
%int1_493 = torch.constant.int 1 | |
%1061 = torch.aten.transpose.int %1060, %int0_492, %int1_493 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32> | |
%1062 = torch.aten.sum.dim_IntList %1056, %24, %true, %none : !torch.vtensor<[2048,3072],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,3072],f32> | |
%1063 = torch.aten.view %1062, %96 : !torch.vtensor<[1,3072],f32>, !torch.list<int> -> !torch.vtensor<[3072],f32> | |
%1064 = torch.aten.view %1058, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%1065 = torch.aten.add.Tensor %1040, %1064, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%int0_494 = torch.constant.int 0 | |
%int1_495 = torch.constant.int 1 | |
%1066 = torch.aten.transpose.int %1061, %int0_494, %int1_495 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32> | |
%1067 = torch.aten.sub.Tensor %arg164, %result1_25, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%1068 = torch.aten.mul.Tensor %1067, %result2_26 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1069 = torch.aten.mul.Tensor %1065, %arg27 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1070 = torch.aten.mul.Tensor %1069, %0 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[4,512,768],f32> | |
%1071 = torch.aten.sum.dim_IntList %1069, %35, %true, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,512,1],f32> | |
%1072 = torch.aten.mul.Tensor %1069, %1068 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1073 = torch.aten.sum.dim_IntList %1072, %35, %true, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,512,1],f32> | |
%1074 = torch.aten.mul.Tensor %1068, %1073 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1075 = torch.aten.sub.Tensor %1070, %1071, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%1076 = torch.aten.sub.Tensor %1075, %1074, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%1077 = torch.aten.div.Tensor %result2_26, %0 : !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[4,512,1],f32> | |
%1078 = torch.aten.mul.Tensor %1077, %1076 : !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1079 = torch.aten.mul.Tensor %1065, %1068 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1080 = torch.aten.sum.dim_IntList %1079, %45, %false, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[768],f32> | |
%1081 = torch.aten.sum.dim_IntList %1065, %45, %false, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[768],f32> | |
%1082 = torch.aten.mul.Tensor %1078, %arg163 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1083 = torch.aten.view %1082, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_496 = torch.constant.int 0 | |
%int1_497 = torch.constant.int 1 | |
%1084 = torch.aten.transpose.int %arg161, %int0_496, %int1_497 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1085 = torch.aten.mm %1083, %1084 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_498 = torch.constant.int 0 | |
%int1_499 = torch.constant.int 1 | |
%1086 = torch.aten.transpose.int %1083, %int0_498, %int1_499 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%1087 = torch.aten.mm %1086, %arg162 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[768,768],f32> | |
%int0_500 = torch.constant.int 0 | |
%int1_501 = torch.constant.int 1 | |
%1088 = torch.aten.transpose.int %1087, %int0_500, %int1_501 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1089 = torch.aten.sum.dim_IntList %1083, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%1090 = torch.aten.view %1089, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%1091 = torch.aten.view %1085, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%int0_502 = torch.constant.int 0 | |
%int1_503 = torch.constant.int 1 | |
%1092 = torch.aten.transpose.int %1088, %int0_502, %int1_503 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1093 = torch.aten.view %1091, %127 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,12,64],f32> | |
%1094 = torch.aten.permute %1093, %129 : !torch.vtensor<[4,512,12,64],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,64],f32> | |
%1095 = torch.aten.clone %1094, %int0 : !torch.vtensor<[4,12,512,64],f32>, !torch.int -> !torch.vtensor<[4,12,512,64],f32> | |
%1096 = torch.aten.view %1095, %132 : !torch.vtensor<[4,12,512,64],f32>, !torch.list<int> -> !torch.vtensor<[48,512,64],f32> | |
%1097 = torch.aten.transpose.int %arg159, %int1, %int2 : !torch.vtensor<[48,512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[48,512,512],f32> | |
%1098 = torch.aten.bmm %1097, %1096 : !torch.vtensor<[48,512,512],f32>, !torch.vtensor<[48,512,64],f32> -> !torch.vtensor<[48,512,64],f32> | |
%1099 = torch.aten.transpose.int %arg160, %int1, %int2 : !torch.vtensor<[48,512,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[48,64,512],f32> | |
%1100 = torch.aten.bmm %1096, %1099 : !torch.vtensor<[48,512,64],f32>, !torch.vtensor<[48,64,512],f32> -> !torch.vtensor<[48,512,512],f32> | |
%1101 = torch.aten.view %1098, %138 : !torch.vtensor<[48,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,64],f32> | |
%1102 = torch.aten.view %1100, %140 : !torch.vtensor<[48,512,512],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,512],f32> | |
%1103 = torch.aten.mul.Tensor %1102, %arg158 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[4,12,512,512],f32> -> !torch.vtensor<[4,12,512,512],f32> | |
%1104 = torch.aten.mul.Tensor %1103, %arg157 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[4,12,512,512],f32> -> !torch.vtensor<[4,12,512,512],f32> | |
%1105 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> | |
%true_504 = torch.constant.bool true | |
%none_505 = torch.constant.none | |
%1106 = torch.aten.sum.dim_IntList %1104, %1105, %true_504, %none_505 : !torch.vtensor<[4,12,512,512],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,12,512,1],f32> | |
%int0_506 = torch.constant.int 0 | |
%1107 = torch.aten.size.int %1104, %int0_506 : !torch.vtensor<[4,12,512,512],f32>, !torch.int -> !torch.int | |
%int1_507 = torch.constant.int 1 | |
%1108 = torch.aten.size.int %1104, %int1_507 : !torch.vtensor<[4,12,512,512],f32>, !torch.int -> !torch.int | |
%int2_508 = torch.constant.int 2 | |
%1109 = torch.aten.size.int %1104, %int2_508 : !torch.vtensor<[4,12,512,512],f32>, !torch.int -> !torch.int | |
%int3_509 = torch.constant.int 3 | |
%1110 = torch.aten.size.int %1104, %int3_509 : !torch.vtensor<[4,12,512,512],f32>, !torch.int -> !torch.int | |
%1111 = torch.prim.ListConstruct %1107, %1108, %1109, %1110 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1112 = torch.aten.broadcast_to %1106, %1111 : !torch.vtensor<[4,12,512,1],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,512],f32> | |
%1113 = torch.aten.mul.Tensor %arg157, %1112 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[4,12,512,512],f32> -> !torch.vtensor<[4,12,512,512],f32> | |
%float1.000000e00_510 = torch.constant.float 1.000000e+00 | |
%1114 = torch.aten.sub.Tensor %1104, %1113, %float1.000000e00_510 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[4,12,512,512],f32>, !torch.float -> !torch.vtensor<[4,12,512,512],f32> | |
%1115 = torch.aten.div.Tensor %1114, %1 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[4,12,512,512],f32> | |
%1116 = torch.aten.view %1115, %155 : !torch.vtensor<[4,12,512,512],f32>, !torch.list<int> -> !torch.vtensor<[48,512,512],f32> | |
%1117 = torch.aten.transpose.int %arg155, %int1, %int2 : !torch.vtensor<[48,512,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[48,64,512],f32> | |
%1118 = torch.aten.bmm %1117, %1116 : !torch.vtensor<[48,64,512],f32>, !torch.vtensor<[48,512,512],f32> -> !torch.vtensor<[48,64,512],f32> | |
%1119 = torch.aten.transpose.int %arg156, %int1, %int2 : !torch.vtensor<[48,64,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[48,512,64],f32> | |
%1120 = torch.aten.bmm %1116, %1119 : !torch.vtensor<[48,512,512],f32>, !torch.vtensor<[48,512,64],f32> -> !torch.vtensor<[48,512,64],f32> | |
%1121 = torch.aten.view %1118, %161 : !torch.vtensor<[48,64,512],f32>, !torch.list<int> -> !torch.vtensor<[4,12,64,512],f32> | |
%1122 = torch.aten.view %1120, %138 : !torch.vtensor<[48,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,64],f32> | |
%1123 = torch.aten.transpose.int %1121, %int-1, %int-2 : !torch.vtensor<[4,12,64,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[4,12,512,64],f32> | |
%1124 = torch.aten.permute %1122, %129 : !torch.vtensor<[4,12,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,12,64],f32> | |
%1125 = torch.aten.clone %1124, %int0 : !torch.vtensor<[4,512,12,64],f32>, !torch.int -> !torch.vtensor<[4,512,12,64],f32> | |
%1126 = torch.aten.view %1125, %28 : !torch.vtensor<[4,512,12,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%1127 = torch.aten.permute %1101, %129 : !torch.vtensor<[4,12,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,12,64],f32> | |
%1128 = torch.aten.clone %1127, %int0 : !torch.vtensor<[4,512,12,64],f32>, !torch.int -> !torch.vtensor<[4,512,12,64],f32> | |
%1129 = torch.aten.view %1128, %28 : !torch.vtensor<[4,512,12,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%1130 = torch.aten.view %1129, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_511 = torch.constant.int 0 | |
%int1_512 = torch.constant.int 1 | |
%1131 = torch.aten.transpose.int %arg153, %int0_511, %int1_512 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1132 = torch.aten.mm %1130, %1131 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_513 = torch.constant.int 0 | |
%int1_514 = torch.constant.int 1 | |
%1133 = torch.aten.transpose.int %1130, %int0_513, %int1_514 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%1134 = torch.aten.mm %1133, %arg154 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[768,768],f32> | |
%int0_515 = torch.constant.int 0 | |
%int1_516 = torch.constant.int 1 | |
%1135 = torch.aten.transpose.int %1134, %int0_515, %int1_516 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1136 = torch.aten.sum.dim_IntList %1130, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%1137 = torch.aten.view %1136, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%1138 = torch.aten.view %1132, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%1139 = torch.aten.add.Tensor %1078, %1138, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%int0_517 = torch.constant.int 0 | |
%int1_518 = torch.constant.int 1 | |
%1140 = torch.aten.transpose.int %1135, %int0_517, %int1_518 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1141 = torch.aten.permute %1123, %129 : !torch.vtensor<[4,12,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,12,64],f32> | |
%1142 = torch.aten.view %1141, %28 : !torch.vtensor<[4,512,12,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%1143 = torch.aten.clone %1142, %int0 : !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%1144 = torch.aten.view %1143, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_519 = torch.constant.int 0 | |
%int1_520 = torch.constant.int 1 | |
%1145 = torch.aten.transpose.int %arg151, %int0_519, %int1_520 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1146 = torch.aten.mm %1144, %1145 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_521 = torch.constant.int 0 | |
%int1_522 = torch.constant.int 1 | |
%1147 = torch.aten.transpose.int %1144, %int0_521, %int1_522 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%1148 = torch.aten.mm %1147, %arg152 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[768,768],f32> | |
%int0_523 = torch.constant.int 0 | |
%int1_524 = torch.constant.int 1 | |
%1149 = torch.aten.transpose.int %1148, %int0_523, %int1_524 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1150 = torch.aten.sum.dim_IntList %1144, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%1151 = torch.aten.view %1150, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%1152 = torch.aten.view %1146, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%1153 = torch.aten.add.Tensor %1139, %1152, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%int0_525 = torch.constant.int 0 | |
%int1_526 = torch.constant.int 1 | |
%1154 = torch.aten.transpose.int %1149, %int0_525, %int1_526 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1155 = torch.aten.view %1126, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_527 = torch.constant.int 0 | |
%int1_528 = torch.constant.int 1 | |
%1156 = torch.aten.transpose.int %arg149, %int0_527, %int1_528 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1157 = torch.aten.mm %1155, %1156 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_529 = torch.constant.int 0 | |
%int1_530 = torch.constant.int 1 | |
%1158 = torch.aten.transpose.int %1155, %int0_529, %int1_530 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%1159 = torch.aten.mm %1158, %arg150 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[768,768],f32> | |
%int0_531 = torch.constant.int 0 | |
%int1_532 = torch.constant.int 1 | |
%1160 = torch.aten.transpose.int %1159, %int0_531, %int1_532 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1161 = torch.aten.sum.dim_IntList %1155, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%1162 = torch.aten.view %1161, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%1163 = torch.aten.view %1157, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%1164 = torch.aten.add.Tensor %1153, %1163, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%int0_533 = torch.constant.int 0 | |
%int1_534 = torch.constant.int 1 | |
%1165 = torch.aten.transpose.int %1160, %int0_533, %int1_534 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1166 = torch.aten.sub.Tensor %arg148, %result1_22, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%1167 = torch.aten.mul.Tensor %1166, %result2_23 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1168 = torch.aten.mul.Tensor %1164, %arg25 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1169 = torch.aten.mul.Tensor %1168, %0 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[4,512,768],f32> | |
%1170 = torch.aten.sum.dim_IntList %1168, %35, %true, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,512,1],f32> | |
%1171 = torch.aten.mul.Tensor %1168, %1167 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1172 = torch.aten.sum.dim_IntList %1171, %35, %true, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,512,1],f32> | |
%1173 = torch.aten.mul.Tensor %1167, %1172 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1174 = torch.aten.sub.Tensor %1169, %1170, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%1175 = torch.aten.sub.Tensor %1174, %1173, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%1176 = torch.aten.div.Tensor %result2_23, %0 : !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[4,512,1],f32> | |
%1177 = torch.aten.mul.Tensor %1176, %1175 : !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1178 = torch.aten.mul.Tensor %1164, %1167 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1179 = torch.aten.sum.dim_IntList %1178, %45, %false, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[768],f32> | |
%1180 = torch.aten.sum.dim_IntList %1164, %45, %false, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[768],f32> | |
%1181 = torch.aten.mul.Tensor %1177, %arg147 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1182 = torch.aten.view %1181, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_535 = torch.constant.int 0 | |
%int1_536 = torch.constant.int 1 | |
%1183 = torch.aten.transpose.int %arg145, %int0_535, %int1_536 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32> | |
%1184 = torch.aten.mm %1182, %1183 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,3072],f32> -> !torch.vtensor<[2048,3072],f32> | |
%int0_537 = torch.constant.int 0 | |
%int1_538 = torch.constant.int 1 | |
%1185 = torch.aten.transpose.int %1182, %int0_537, %int1_538 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%1186 = torch.aten.mm %1185, %arg146 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,3072],f32> -> !torch.vtensor<[768,3072],f32> | |
%int0_539 = torch.constant.int 0 | |
%int1_540 = torch.constant.int 1 | |
%1187 = torch.aten.transpose.int %1186, %int0_539, %int1_540 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32> | |
%1188 = torch.aten.sum.dim_IntList %1182, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%1189 = torch.aten.view %1188, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%1190 = torch.aten.view %1184, %84 : !torch.vtensor<[2048,3072],f32>, !torch.list<int> -> !torch.vtensor<[4,512,3072],f32> | |
%int0_541 = torch.constant.int 0 | |
%int1_542 = torch.constant.int 1 | |
%1191 = torch.aten.transpose.int %1187, %int0_541, %int1_542 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32> | |
%1192 = torch.aten.gelu_backward %1190, %arg144, %str : !torch.vtensor<[4,512,3072],f32>, !torch.vtensor<[4,512,3072],f32>, !torch.str -> !torch.vtensor<[4,512,3072],f32> | |
%1193 = torch.aten.view %1192, %88 : !torch.vtensor<[4,512,3072],f32>, !torch.list<int> -> !torch.vtensor<[2048,3072],f32> | |
%int0_543 = torch.constant.int 0 | |
%int1_544 = torch.constant.int 1 | |
%1194 = torch.aten.transpose.int %arg142, %int0_543, %int1_544 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32> | |
%1195 = torch.aten.mm %1193, %1194 : !torch.vtensor<[2048,3072],f32>, !torch.vtensor<[3072,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_545 = torch.constant.int 0 | |
%int1_546 = torch.constant.int 1 | |
%1196 = torch.aten.transpose.int %1193, %int0_545, %int1_546 : !torch.vtensor<[2048,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,2048],f32> | |
%1197 = torch.aten.mm %1196, %arg143 : !torch.vtensor<[3072,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[3072,768],f32> | |
%int0_547 = torch.constant.int 0 | |
%int1_548 = torch.constant.int 1 | |
%1198 = torch.aten.transpose.int %1197, %int0_547, %int1_548 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32> | |
%1199 = torch.aten.sum.dim_IntList %1193, %24, %true, %none : !torch.vtensor<[2048,3072],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,3072],f32> | |
%1200 = torch.aten.view %1199, %96 : !torch.vtensor<[1,3072],f32>, !torch.list<int> -> !torch.vtensor<[3072],f32> | |
%1201 = torch.aten.view %1195, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%1202 = torch.aten.add.Tensor %1177, %1201, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%int0_549 = torch.constant.int 0 | |
%int1_550 = torch.constant.int 1 | |
%1203 = torch.aten.transpose.int %1198, %int0_549, %int1_550 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32> | |
%1204 = torch.aten.sub.Tensor %arg141, %result1_19, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%1205 = torch.aten.mul.Tensor %1204, %result2_20 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1206 = torch.aten.mul.Tensor %1202, %arg23 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1207 = torch.aten.mul.Tensor %1206, %0 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[4,512,768],f32> | |
%1208 = torch.aten.sum.dim_IntList %1206, %35, %true, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,512,1],f32> | |
%1209 = torch.aten.mul.Tensor %1206, %1205 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1210 = torch.aten.sum.dim_IntList %1209, %35, %true, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,512,1],f32> | |
%1211 = torch.aten.mul.Tensor %1205, %1210 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1212 = torch.aten.sub.Tensor %1207, %1208, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%1213 = torch.aten.sub.Tensor %1212, %1211, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%1214 = torch.aten.div.Tensor %result2_20, %0 : !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[4,512,1],f32> | |
%1215 = torch.aten.mul.Tensor %1214, %1213 : !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1216 = torch.aten.mul.Tensor %1202, %1205 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1217 = torch.aten.sum.dim_IntList %1216, %45, %false, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[768],f32> | |
%1218 = torch.aten.sum.dim_IntList %1202, %45, %false, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[768],f32> | |
%1219 = torch.aten.mul.Tensor %1215, %arg140 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1220 = torch.aten.view %1219, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_551 = torch.constant.int 0 | |
%int1_552 = torch.constant.int 1 | |
%1221 = torch.aten.transpose.int %arg138, %int0_551, %int1_552 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1222 = torch.aten.mm %1220, %1221 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_553 = torch.constant.int 0 | |
%int1_554 = torch.constant.int 1 | |
%1223 = torch.aten.transpose.int %1220, %int0_553, %int1_554 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%1224 = torch.aten.mm %1223, %arg139 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[768,768],f32> | |
%int0_555 = torch.constant.int 0 | |
%int1_556 = torch.constant.int 1 | |
%1225 = torch.aten.transpose.int %1224, %int0_555, %int1_556 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1226 = torch.aten.sum.dim_IntList %1220, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%1227 = torch.aten.view %1226, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%1228 = torch.aten.view %1222, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%int0_557 = torch.constant.int 0 | |
%int1_558 = torch.constant.int 1 | |
%1229 = torch.aten.transpose.int %1225, %int0_557, %int1_558 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1230 = torch.aten.view %1228, %127 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,12,64],f32> | |
%1231 = torch.aten.permute %1230, %129 : !torch.vtensor<[4,512,12,64],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,64],f32> | |
%1232 = torch.aten.clone %1231, %int0 : !torch.vtensor<[4,12,512,64],f32>, !torch.int -> !torch.vtensor<[4,12,512,64],f32> | |
%1233 = torch.aten.view %1232, %132 : !torch.vtensor<[4,12,512,64],f32>, !torch.list<int> -> !torch.vtensor<[48,512,64],f32> | |
%1234 = torch.aten.transpose.int %arg136, %int1, %int2 : !torch.vtensor<[48,512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[48,512,512],f32> | |
%1235 = torch.aten.bmm %1234, %1233 : !torch.vtensor<[48,512,512],f32>, !torch.vtensor<[48,512,64],f32> -> !torch.vtensor<[48,512,64],f32> | |
%1236 = torch.aten.transpose.int %arg137, %int1, %int2 : !torch.vtensor<[48,512,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[48,64,512],f32> | |
%1237 = torch.aten.bmm %1233, %1236 : !torch.vtensor<[48,512,64],f32>, !torch.vtensor<[48,64,512],f32> -> !torch.vtensor<[48,512,512],f32> | |
%1238 = torch.aten.view %1235, %138 : !torch.vtensor<[48,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,64],f32> | |
%1239 = torch.aten.view %1237, %140 : !torch.vtensor<[48,512,512],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,512],f32> | |
%1240 = torch.aten.mul.Tensor %1239, %arg135 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[4,12,512,512],f32> -> !torch.vtensor<[4,12,512,512],f32> | |
%1241 = torch.aten.mul.Tensor %1240, %arg134 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[4,12,512,512],f32> -> !torch.vtensor<[4,12,512,512],f32> | |
%1242 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> | |
%true_559 = torch.constant.bool true | |
%none_560 = torch.constant.none | |
%1243 = torch.aten.sum.dim_IntList %1241, %1242, %true_559, %none_560 : !torch.vtensor<[4,12,512,512],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,12,512,1],f32> | |
%int0_561 = torch.constant.int 0 | |
%1244 = torch.aten.size.int %1241, %int0_561 : !torch.vtensor<[4,12,512,512],f32>, !torch.int -> !torch.int | |
%int1_562 = torch.constant.int 1 | |
%1245 = torch.aten.size.int %1241, %int1_562 : !torch.vtensor<[4,12,512,512],f32>, !torch.int -> !torch.int | |
%int2_563 = torch.constant.int 2 | |
%1246 = torch.aten.size.int %1241, %int2_563 : !torch.vtensor<[4,12,512,512],f32>, !torch.int -> !torch.int | |
%int3_564 = torch.constant.int 3 | |
%1247 = torch.aten.size.int %1241, %int3_564 : !torch.vtensor<[4,12,512,512],f32>, !torch.int -> !torch.int | |
%1248 = torch.prim.ListConstruct %1244, %1245, %1246, %1247 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1249 = torch.aten.broadcast_to %1243, %1248 : !torch.vtensor<[4,12,512,1],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,512],f32> | |
%1250 = torch.aten.mul.Tensor %arg134, %1249 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[4,12,512,512],f32> -> !torch.vtensor<[4,12,512,512],f32> | |
%float1.000000e00_565 = torch.constant.float 1.000000e+00 | |
%1251 = torch.aten.sub.Tensor %1241, %1250, %float1.000000e00_565 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[4,12,512,512],f32>, !torch.float -> !torch.vtensor<[4,12,512,512],f32> | |
%1252 = torch.aten.div.Tensor %1251, %1 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[4,12,512,512],f32> | |
%1253 = torch.aten.view %1252, %155 : !torch.vtensor<[4,12,512,512],f32>, !torch.list<int> -> !torch.vtensor<[48,512,512],f32> | |
%1254 = torch.aten.transpose.int %arg132, %int1, %int2 : !torch.vtensor<[48,512,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[48,64,512],f32> | |
%1255 = torch.aten.bmm %1254, %1253 : !torch.vtensor<[48,64,512],f32>, !torch.vtensor<[48,512,512],f32> -> !torch.vtensor<[48,64,512],f32> | |
%1256 = torch.aten.transpose.int %arg133, %int1, %int2 : !torch.vtensor<[48,64,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[48,512,64],f32> | |
%1257 = torch.aten.bmm %1253, %1256 : !torch.vtensor<[48,512,512],f32>, !torch.vtensor<[48,512,64],f32> -> !torch.vtensor<[48,512,64],f32> | |
%1258 = torch.aten.view %1255, %161 : !torch.vtensor<[48,64,512],f32>, !torch.list<int> -> !torch.vtensor<[4,12,64,512],f32> | |
%1259 = torch.aten.view %1257, %138 : !torch.vtensor<[48,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,64],f32> | |
%1260 = torch.aten.transpose.int %1258, %int-1, %int-2 : !torch.vtensor<[4,12,64,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[4,12,512,64],f32> | |
%1261 = torch.aten.permute %1259, %129 : !torch.vtensor<[4,12,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,12,64],f32> | |
%1262 = torch.aten.clone %1261, %int0 : !torch.vtensor<[4,512,12,64],f32>, !torch.int -> !torch.vtensor<[4,512,12,64],f32> | |
%1263 = torch.aten.view %1262, %28 : !torch.vtensor<[4,512,12,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%1264 = torch.aten.permute %1238, %129 : !torch.vtensor<[4,12,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,12,64],f32> | |
%1265 = torch.aten.clone %1264, %int0 : !torch.vtensor<[4,512,12,64],f32>, !torch.int -> !torch.vtensor<[4,512,12,64],f32> | |
%1266 = torch.aten.view %1265, %28 : !torch.vtensor<[4,512,12,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%1267 = torch.aten.view %1266, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_566 = torch.constant.int 0 | |
%int1_567 = torch.constant.int 1 | |
%1268 = torch.aten.transpose.int %arg130, %int0_566, %int1_567 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1269 = torch.aten.mm %1267, %1268 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_568 = torch.constant.int 0 | |
%int1_569 = torch.constant.int 1 | |
%1270 = torch.aten.transpose.int %1267, %int0_568, %int1_569 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%1271 = torch.aten.mm %1270, %arg131 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[768,768],f32> | |
%int0_570 = torch.constant.int 0 | |
%int1_571 = torch.constant.int 1 | |
%1272 = torch.aten.transpose.int %1271, %int0_570, %int1_571 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1273 = torch.aten.sum.dim_IntList %1267, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%1274 = torch.aten.view %1273, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%1275 = torch.aten.view %1269, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%1276 = torch.aten.add.Tensor %1215, %1275, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%int0_572 = torch.constant.int 0 | |
%int1_573 = torch.constant.int 1 | |
%1277 = torch.aten.transpose.int %1272, %int0_572, %int1_573 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1278 = torch.aten.permute %1260, %129 : !torch.vtensor<[4,12,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,12,64],f32> | |
%1279 = torch.aten.view %1278, %28 : !torch.vtensor<[4,512,12,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%1280 = torch.aten.clone %1279, %int0 : !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%1281 = torch.aten.view %1280, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_574 = torch.constant.int 0 | |
%int1_575 = torch.constant.int 1 | |
%1282 = torch.aten.transpose.int %arg128, %int0_574, %int1_575 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1283 = torch.aten.mm %1281, %1282 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_576 = torch.constant.int 0 | |
%int1_577 = torch.constant.int 1 | |
%1284 = torch.aten.transpose.int %1281, %int0_576, %int1_577 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%1285 = torch.aten.mm %1284, %arg129 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[768,768],f32> | |
%int0_578 = torch.constant.int 0 | |
%int1_579 = torch.constant.int 1 | |
%1286 = torch.aten.transpose.int %1285, %int0_578, %int1_579 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1287 = torch.aten.sum.dim_IntList %1281, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%1288 = torch.aten.view %1287, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%1289 = torch.aten.view %1283, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%1290 = torch.aten.add.Tensor %1276, %1289, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%int0_580 = torch.constant.int 0 | |
%int1_581 = torch.constant.int 1 | |
%1291 = torch.aten.transpose.int %1286, %int0_580, %int1_581 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1292 = torch.aten.view %1263, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_582 = torch.constant.int 0 | |
%int1_583 = torch.constant.int 1 | |
%1293 = torch.aten.transpose.int %arg126, %int0_582, %int1_583 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1294 = torch.aten.mm %1292, %1293 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_584 = torch.constant.int 0 | |
%int1_585 = torch.constant.int 1 | |
%1295 = torch.aten.transpose.int %1292, %int0_584, %int1_585 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%1296 = torch.aten.mm %1295, %arg127 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[768,768],f32> | |
%int0_586 = torch.constant.int 0 | |
%int1_587 = torch.constant.int 1 | |
%1297 = torch.aten.transpose.int %1296, %int0_586, %int1_587 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1298 = torch.aten.sum.dim_IntList %1292, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%1299 = torch.aten.view %1298, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%1300 = torch.aten.view %1294, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%1301 = torch.aten.add.Tensor %1290, %1300, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%int0_588 = torch.constant.int 0 | |
%int1_589 = torch.constant.int 1 | |
%1302 = torch.aten.transpose.int %1297, %int0_588, %int1_589 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1303 = torch.aten.sub.Tensor %arg125, %result1_16, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%1304 = torch.aten.mul.Tensor %1303, %result2_17 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1305 = torch.aten.mul.Tensor %1301, %arg21 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1306 = torch.aten.mul.Tensor %1305, %0 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[4,512,768],f32> | |
%1307 = torch.aten.sum.dim_IntList %1305, %35, %true, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,512,1],f32> | |
%1308 = torch.aten.mul.Tensor %1305, %1304 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1309 = torch.aten.sum.dim_IntList %1308, %35, %true, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,512,1],f32> | |
%1310 = torch.aten.mul.Tensor %1304, %1309 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1311 = torch.aten.sub.Tensor %1306, %1307, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%1312 = torch.aten.sub.Tensor %1311, %1310, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%1313 = torch.aten.div.Tensor %result2_17, %0 : !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[4,512,1],f32> | |
%1314 = torch.aten.mul.Tensor %1313, %1312 : !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1315 = torch.aten.mul.Tensor %1301, %1304 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1316 = torch.aten.sum.dim_IntList %1315, %45, %false, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[768],f32> | |
%1317 = torch.aten.sum.dim_IntList %1301, %45, %false, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[768],f32> | |
%1318 = torch.aten.mul.Tensor %1314, %arg124 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1319 = torch.aten.view %1318, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_590 = torch.constant.int 0 | |
%int1_591 = torch.constant.int 1 | |
%1320 = torch.aten.transpose.int %arg122, %int0_590, %int1_591 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32> | |
%1321 = torch.aten.mm %1319, %1320 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,3072],f32> -> !torch.vtensor<[2048,3072],f32> | |
%int0_592 = torch.constant.int 0 | |
%int1_593 = torch.constant.int 1 | |
%1322 = torch.aten.transpose.int %1319, %int0_592, %int1_593 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%1323 = torch.aten.mm %1322, %arg123 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,3072],f32> -> !torch.vtensor<[768,3072],f32> | |
%int0_594 = torch.constant.int 0 | |
%int1_595 = torch.constant.int 1 | |
%1324 = torch.aten.transpose.int %1323, %int0_594, %int1_595 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32> | |
%1325 = torch.aten.sum.dim_IntList %1319, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%1326 = torch.aten.view %1325, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%1327 = torch.aten.view %1321, %84 : !torch.vtensor<[2048,3072],f32>, !torch.list<int> -> !torch.vtensor<[4,512,3072],f32> | |
%int0_596 = torch.constant.int 0 | |
%int1_597 = torch.constant.int 1 | |
%1328 = torch.aten.transpose.int %1324, %int0_596, %int1_597 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32> | |
%1329 = torch.aten.gelu_backward %1327, %arg121, %str : !torch.vtensor<[4,512,3072],f32>, !torch.vtensor<[4,512,3072],f32>, !torch.str -> !torch.vtensor<[4,512,3072],f32> | |
%1330 = torch.aten.view %1329, %88 : !torch.vtensor<[4,512,3072],f32>, !torch.list<int> -> !torch.vtensor<[2048,3072],f32> | |
%int0_598 = torch.constant.int 0 | |
%int1_599 = torch.constant.int 1 | |
%1331 = torch.aten.transpose.int %arg119, %int0_598, %int1_599 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32> | |
%1332 = torch.aten.mm %1330, %1331 : !torch.vtensor<[2048,3072],f32>, !torch.vtensor<[3072,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_600 = torch.constant.int 0 | |
%int1_601 = torch.constant.int 1 | |
%1333 = torch.aten.transpose.int %1330, %int0_600, %int1_601 : !torch.vtensor<[2048,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,2048],f32> | |
%1334 = torch.aten.mm %1333, %arg120 : !torch.vtensor<[3072,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[3072,768],f32> | |
%int0_602 = torch.constant.int 0 | |
%int1_603 = torch.constant.int 1 | |
%1335 = torch.aten.transpose.int %1334, %int0_602, %int1_603 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32> | |
%1336 = torch.aten.sum.dim_IntList %1330, %24, %true, %none : !torch.vtensor<[2048,3072],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,3072],f32> | |
%1337 = torch.aten.view %1336, %96 : !torch.vtensor<[1,3072],f32>, !torch.list<int> -> !torch.vtensor<[3072],f32> | |
%1338 = torch.aten.view %1332, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%1339 = torch.aten.add.Tensor %1314, %1338, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%int0_604 = torch.constant.int 0 | |
%int1_605 = torch.constant.int 1 | |
%1340 = torch.aten.transpose.int %1335, %int0_604, %int1_605 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32> | |
%1341 = torch.aten.sub.Tensor %arg118, %result1_13, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%1342 = torch.aten.mul.Tensor %1341, %result2_14 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1343 = torch.aten.mul.Tensor %1339, %arg19 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1344 = torch.aten.mul.Tensor %1343, %0 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[4,512,768],f32> | |
%1345 = torch.aten.sum.dim_IntList %1343, %35, %true, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,512,1],f32> | |
%1346 = torch.aten.mul.Tensor %1343, %1342 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1347 = torch.aten.sum.dim_IntList %1346, %35, %true, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,512,1],f32> | |
%1348 = torch.aten.mul.Tensor %1342, %1347 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1349 = torch.aten.sub.Tensor %1344, %1345, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%1350 = torch.aten.sub.Tensor %1349, %1348, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%1351 = torch.aten.div.Tensor %result2_14, %0 : !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[4,512,1],f32> | |
%1352 = torch.aten.mul.Tensor %1351, %1350 : !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1353 = torch.aten.mul.Tensor %1339, %1342 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1354 = torch.aten.sum.dim_IntList %1353, %45, %false, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[768],f32> | |
%1355 = torch.aten.sum.dim_IntList %1339, %45, %false, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[768],f32> | |
%1356 = torch.aten.mul.Tensor %1352, %arg117 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1357 = torch.aten.view %1356, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_606 = torch.constant.int 0 | |
%int1_607 = torch.constant.int 1 | |
%1358 = torch.aten.transpose.int %arg115, %int0_606, %int1_607 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1359 = torch.aten.mm %1357, %1358 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_608 = torch.constant.int 0 | |
%int1_609 = torch.constant.int 1 | |
%1360 = torch.aten.transpose.int %1357, %int0_608, %int1_609 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%1361 = torch.aten.mm %1360, %arg116 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[768,768],f32> | |
%int0_610 = torch.constant.int 0 | |
%int1_611 = torch.constant.int 1 | |
%1362 = torch.aten.transpose.int %1361, %int0_610, %int1_611 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1363 = torch.aten.sum.dim_IntList %1357, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%1364 = torch.aten.view %1363, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%1365 = torch.aten.view %1359, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%int0_612 = torch.constant.int 0 | |
%int1_613 = torch.constant.int 1 | |
%1366 = torch.aten.transpose.int %1362, %int0_612, %int1_613 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1367 = torch.aten.view %1365, %127 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,12,64],f32> | |
%1368 = torch.aten.permute %1367, %129 : !torch.vtensor<[4,512,12,64],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,64],f32> | |
%1369 = torch.aten.clone %1368, %int0 : !torch.vtensor<[4,12,512,64],f32>, !torch.int -> !torch.vtensor<[4,12,512,64],f32> | |
%1370 = torch.aten.view %1369, %132 : !torch.vtensor<[4,12,512,64],f32>, !torch.list<int> -> !torch.vtensor<[48,512,64],f32> | |
%1371 = torch.aten.transpose.int %arg113, %int1, %int2 : !torch.vtensor<[48,512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[48,512,512],f32> | |
%1372 = torch.aten.bmm %1371, %1370 : !torch.vtensor<[48,512,512],f32>, !torch.vtensor<[48,512,64],f32> -> !torch.vtensor<[48,512,64],f32> | |
%1373 = torch.aten.transpose.int %arg114, %int1, %int2 : !torch.vtensor<[48,512,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[48,64,512],f32> | |
%1374 = torch.aten.bmm %1370, %1373 : !torch.vtensor<[48,512,64],f32>, !torch.vtensor<[48,64,512],f32> -> !torch.vtensor<[48,512,512],f32> | |
%1375 = torch.aten.view %1372, %138 : !torch.vtensor<[48,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,64],f32> | |
%1376 = torch.aten.view %1374, %140 : !torch.vtensor<[48,512,512],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,512],f32> | |
%1377 = torch.aten.mul.Tensor %1376, %arg112 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[4,12,512,512],f32> -> !torch.vtensor<[4,12,512,512],f32> | |
%1378 = torch.aten.mul.Tensor %1377, %arg111 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[4,12,512,512],f32> -> !torch.vtensor<[4,12,512,512],f32> | |
%1379 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> | |
%true_614 = torch.constant.bool true | |
%none_615 = torch.constant.none | |
%1380 = torch.aten.sum.dim_IntList %1378, %1379, %true_614, %none_615 : !torch.vtensor<[4,12,512,512],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,12,512,1],f32> | |
%int0_616 = torch.constant.int 0 | |
%1381 = torch.aten.size.int %1378, %int0_616 : !torch.vtensor<[4,12,512,512],f32>, !torch.int -> !torch.int | |
%int1_617 = torch.constant.int 1 | |
%1382 = torch.aten.size.int %1378, %int1_617 : !torch.vtensor<[4,12,512,512],f32>, !torch.int -> !torch.int | |
%int2_618 = torch.constant.int 2 | |
%1383 = torch.aten.size.int %1378, %int2_618 : !torch.vtensor<[4,12,512,512],f32>, !torch.int -> !torch.int | |
%int3_619 = torch.constant.int 3 | |
%1384 = torch.aten.size.int %1378, %int3_619 : !torch.vtensor<[4,12,512,512],f32>, !torch.int -> !torch.int | |
%1385 = torch.prim.ListConstruct %1381, %1382, %1383, %1384 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1386 = torch.aten.broadcast_to %1380, %1385 : !torch.vtensor<[4,12,512,1],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,512],f32> | |
%1387 = torch.aten.mul.Tensor %arg111, %1386 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[4,12,512,512],f32> -> !torch.vtensor<[4,12,512,512],f32> | |
%float1.000000e00_620 = torch.constant.float 1.000000e+00 | |
%1388 = torch.aten.sub.Tensor %1378, %1387, %float1.000000e00_620 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[4,12,512,512],f32>, !torch.float -> !torch.vtensor<[4,12,512,512],f32> | |
%1389 = torch.aten.div.Tensor %1388, %1 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[4,12,512,512],f32> | |
%1390 = torch.aten.view %1389, %155 : !torch.vtensor<[4,12,512,512],f32>, !torch.list<int> -> !torch.vtensor<[48,512,512],f32> | |
%1391 = torch.aten.transpose.int %arg109, %int1, %int2 : !torch.vtensor<[48,512,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[48,64,512],f32> | |
%1392 = torch.aten.bmm %1391, %1390 : !torch.vtensor<[48,64,512],f32>, !torch.vtensor<[48,512,512],f32> -> !torch.vtensor<[48,64,512],f32> | |
%1393 = torch.aten.transpose.int %arg110, %int1, %int2 : !torch.vtensor<[48,64,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[48,512,64],f32> | |
%1394 = torch.aten.bmm %1390, %1393 : !torch.vtensor<[48,512,512],f32>, !torch.vtensor<[48,512,64],f32> -> !torch.vtensor<[48,512,64],f32> | |
%1395 = torch.aten.view %1392, %161 : !torch.vtensor<[48,64,512],f32>, !torch.list<int> -> !torch.vtensor<[4,12,64,512],f32> | |
%1396 = torch.aten.view %1394, %138 : !torch.vtensor<[48,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,64],f32> | |
%1397 = torch.aten.transpose.int %1395, %int-1, %int-2 : !torch.vtensor<[4,12,64,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[4,12,512,64],f32> | |
%1398 = torch.aten.permute %1396, %129 : !torch.vtensor<[4,12,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,12,64],f32> | |
%1399 = torch.aten.clone %1398, %int0 : !torch.vtensor<[4,512,12,64],f32>, !torch.int -> !torch.vtensor<[4,512,12,64],f32> | |
%1400 = torch.aten.view %1399, %28 : !torch.vtensor<[4,512,12,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%1401 = torch.aten.permute %1375, %129 : !torch.vtensor<[4,12,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,12,64],f32> | |
%1402 = torch.aten.clone %1401, %int0 : !torch.vtensor<[4,512,12,64],f32>, !torch.int -> !torch.vtensor<[4,512,12,64],f32> | |
%1403 = torch.aten.view %1402, %28 : !torch.vtensor<[4,512,12,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%1404 = torch.aten.view %1403, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_621 = torch.constant.int 0 | |
%int1_622 = torch.constant.int 1 | |
%1405 = torch.aten.transpose.int %arg107, %int0_621, %int1_622 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1406 = torch.aten.mm %1404, %1405 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_623 = torch.constant.int 0 | |
%int1_624 = torch.constant.int 1 | |
%1407 = torch.aten.transpose.int %1404, %int0_623, %int1_624 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%1408 = torch.aten.mm %1407, %arg108 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[768,768],f32> | |
%int0_625 = torch.constant.int 0 | |
%int1_626 = torch.constant.int 1 | |
%1409 = torch.aten.transpose.int %1408, %int0_625, %int1_626 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1410 = torch.aten.sum.dim_IntList %1404, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%1411 = torch.aten.view %1410, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%1412 = torch.aten.view %1406, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%1413 = torch.aten.add.Tensor %1352, %1412, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%int0_627 = torch.constant.int 0 | |
%int1_628 = torch.constant.int 1 | |
%1414 = torch.aten.transpose.int %1409, %int0_627, %int1_628 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1415 = torch.aten.permute %1397, %129 : !torch.vtensor<[4,12,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,12,64],f32> | |
%1416 = torch.aten.view %1415, %28 : !torch.vtensor<[4,512,12,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%1417 = torch.aten.clone %1416, %int0 : !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%1418 = torch.aten.view %1417, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_629 = torch.constant.int 0 | |
%int1_630 = torch.constant.int 1 | |
%1419 = torch.aten.transpose.int %arg105, %int0_629, %int1_630 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1420 = torch.aten.mm %1418, %1419 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_631 = torch.constant.int 0 | |
%int1_632 = torch.constant.int 1 | |
%1421 = torch.aten.transpose.int %1418, %int0_631, %int1_632 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%1422 = torch.aten.mm %1421, %arg106 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[768,768],f32> | |
%int0_633 = torch.constant.int 0 | |
%int1_634 = torch.constant.int 1 | |
%1423 = torch.aten.transpose.int %1422, %int0_633, %int1_634 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1424 = torch.aten.sum.dim_IntList %1418, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%1425 = torch.aten.view %1424, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%1426 = torch.aten.view %1420, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%1427 = torch.aten.add.Tensor %1413, %1426, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%int0_635 = torch.constant.int 0 | |
%int1_636 = torch.constant.int 1 | |
%1428 = torch.aten.transpose.int %1423, %int0_635, %int1_636 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1429 = torch.aten.view %1400, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_637 = torch.constant.int 0 | |
%int1_638 = torch.constant.int 1 | |
%1430 = torch.aten.transpose.int %arg103, %int0_637, %int1_638 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1431 = torch.aten.mm %1429, %1430 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_639 = torch.constant.int 0 | |
%int1_640 = torch.constant.int 1 | |
%1432 = torch.aten.transpose.int %1429, %int0_639, %int1_640 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%1433 = torch.aten.mm %1432, %arg104 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[768,768],f32> | |
%int0_641 = torch.constant.int 0 | |
%int1_642 = torch.constant.int 1 | |
%1434 = torch.aten.transpose.int %1433, %int0_641, %int1_642 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1435 = torch.aten.sum.dim_IntList %1429, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%1436 = torch.aten.view %1435, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%1437 = torch.aten.view %1431, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%1438 = torch.aten.add.Tensor %1427, %1437, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%int0_643 = torch.constant.int 0 | |
%int1_644 = torch.constant.int 1 | |
%1439 = torch.aten.transpose.int %1434, %int0_643, %int1_644 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1440 = torch.aten.sub.Tensor %arg102, %result1_10, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%1441 = torch.aten.mul.Tensor %1440, %result2_11 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1442 = torch.aten.mul.Tensor %1438, %arg9 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1443 = torch.aten.mul.Tensor %1442, %0 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[4,512,768],f32> | |
%1444 = torch.aten.sum.dim_IntList %1442, %35, %true, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,512,1],f32> | |
%1445 = torch.aten.mul.Tensor %1442, %1441 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1446 = torch.aten.sum.dim_IntList %1445, %35, %true, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,512,1],f32> | |
%1447 = torch.aten.mul.Tensor %1441, %1446 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1448 = torch.aten.sub.Tensor %1443, %1444, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%1449 = torch.aten.sub.Tensor %1448, %1447, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%1450 = torch.aten.div.Tensor %result2_11, %0 : !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[4,512,1],f32> | |
%1451 = torch.aten.mul.Tensor %1450, %1449 : !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1452 = torch.aten.mul.Tensor %1438, %1441 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1453 = torch.aten.sum.dim_IntList %1452, %45, %false, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[768],f32> | |
%1454 = torch.aten.sum.dim_IntList %1438, %45, %false, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[768],f32> | |
%1455 = torch.aten.mul.Tensor %1451, %arg101 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1456 = torch.aten.view %1455, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_645 = torch.constant.int 0 | |
%int1_646 = torch.constant.int 1 | |
%1457 = torch.aten.transpose.int %arg99, %int0_645, %int1_646 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32> | |
%1458 = torch.aten.mm %1456, %1457 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,3072],f32> -> !torch.vtensor<[2048,3072],f32> | |
%int0_647 = torch.constant.int 0 | |
%int1_648 = torch.constant.int 1 | |
%1459 = torch.aten.transpose.int %1456, %int0_647, %int1_648 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%1460 = torch.aten.mm %1459, %arg100 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,3072],f32> -> !torch.vtensor<[768,3072],f32> | |
%int0_649 = torch.constant.int 0 | |
%int1_650 = torch.constant.int 1 | |
%1461 = torch.aten.transpose.int %1460, %int0_649, %int1_650 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32> | |
%1462 = torch.aten.sum.dim_IntList %1456, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%1463 = torch.aten.view %1462, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%1464 = torch.aten.view %1458, %84 : !torch.vtensor<[2048,3072],f32>, !torch.list<int> -> !torch.vtensor<[4,512,3072],f32> | |
%int0_651 = torch.constant.int 0 | |
%int1_652 = torch.constant.int 1 | |
%1465 = torch.aten.transpose.int %1461, %int0_651, %int1_652 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32> | |
%1466 = torch.aten.gelu_backward %1464, %arg98, %str : !torch.vtensor<[4,512,3072],f32>, !torch.vtensor<[4,512,3072],f32>, !torch.str -> !torch.vtensor<[4,512,3072],f32> | |
%1467 = torch.aten.view %1466, %88 : !torch.vtensor<[4,512,3072],f32>, !torch.list<int> -> !torch.vtensor<[2048,3072],f32> | |
%int0_653 = torch.constant.int 0 | |
%int1_654 = torch.constant.int 1 | |
%1468 = torch.aten.transpose.int %arg96, %int0_653, %int1_654 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32> | |
%1469 = torch.aten.mm %1467, %1468 : !torch.vtensor<[2048,3072],f32>, !torch.vtensor<[3072,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_655 = torch.constant.int 0 | |
%int1_656 = torch.constant.int 1 | |
%1470 = torch.aten.transpose.int %1467, %int0_655, %int1_656 : !torch.vtensor<[2048,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,2048],f32> | |
%1471 = torch.aten.mm %1470, %arg97 : !torch.vtensor<[3072,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[3072,768],f32> | |
%int0_657 = torch.constant.int 0 | |
%int1_658 = torch.constant.int 1 | |
%1472 = torch.aten.transpose.int %1471, %int0_657, %int1_658 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32> | |
%1473 = torch.aten.sum.dim_IntList %1467, %24, %true, %none : !torch.vtensor<[2048,3072],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,3072],f32> | |
%1474 = torch.aten.view %1473, %96 : !torch.vtensor<[1,3072],f32>, !torch.list<int> -> !torch.vtensor<[3072],f32> | |
%1475 = torch.aten.view %1469, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%1476 = torch.aten.add.Tensor %1451, %1475, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%int0_659 = torch.constant.int 0 | |
%int1_660 = torch.constant.int 1 | |
%1477 = torch.aten.transpose.int %1472, %int0_659, %int1_660 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32> | |
%1478 = torch.aten.sub.Tensor %arg95, %result1_7, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%1479 = torch.aten.mul.Tensor %1478, %result2_8 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1480 = torch.aten.mul.Tensor %1476, %arg7 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1481 = torch.aten.mul.Tensor %1480, %0 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[4,512,768],f32> | |
%1482 = torch.aten.sum.dim_IntList %1480, %35, %true, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,512,1],f32> | |
%1483 = torch.aten.mul.Tensor %1480, %1479 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1484 = torch.aten.sum.dim_IntList %1483, %35, %true, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,512,1],f32> | |
%1485 = torch.aten.mul.Tensor %1479, %1484 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1486 = torch.aten.sub.Tensor %1481, %1482, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%1487 = torch.aten.sub.Tensor %1486, %1485, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%1488 = torch.aten.div.Tensor %result2_8, %0 : !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[4,512,1],f32> | |
%1489 = torch.aten.mul.Tensor %1488, %1487 : !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1490 = torch.aten.mul.Tensor %1476, %1479 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1491 = torch.aten.sum.dim_IntList %1490, %45, %false, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[768],f32> | |
%1492 = torch.aten.sum.dim_IntList %1476, %45, %false, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[768],f32> | |
%1493 = torch.aten.mul.Tensor %1489, %arg94 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1494 = torch.aten.view %1493, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_661 = torch.constant.int 0 | |
%int1_662 = torch.constant.int 1 | |
%1495 = torch.aten.transpose.int %arg92, %int0_661, %int1_662 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1496 = torch.aten.mm %1494, %1495 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_663 = torch.constant.int 0 | |
%int1_664 = torch.constant.int 1 | |
%1497 = torch.aten.transpose.int %1494, %int0_663, %int1_664 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%1498 = torch.aten.mm %1497, %arg93 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[768,768],f32> | |
%int0_665 = torch.constant.int 0 | |
%int1_666 = torch.constant.int 1 | |
%1499 = torch.aten.transpose.int %1498, %int0_665, %int1_666 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1500 = torch.aten.sum.dim_IntList %1494, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%1501 = torch.aten.view %1500, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%1502 = torch.aten.view %1496, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%int0_667 = torch.constant.int 0 | |
%int1_668 = torch.constant.int 1 | |
%1503 = torch.aten.transpose.int %1499, %int0_667, %int1_668 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1504 = torch.aten.view %1502, %127 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,12,64],f32> | |
%1505 = torch.aten.permute %1504, %129 : !torch.vtensor<[4,512,12,64],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,64],f32> | |
%1506 = torch.aten.clone %1505, %int0 : !torch.vtensor<[4,12,512,64],f32>, !torch.int -> !torch.vtensor<[4,12,512,64],f32> | |
%1507 = torch.aten.view %1506, %132 : !torch.vtensor<[4,12,512,64],f32>, !torch.list<int> -> !torch.vtensor<[48,512,64],f32> | |
%1508 = torch.aten.transpose.int %arg90, %int1, %int2 : !torch.vtensor<[48,512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[48,512,512],f32> | |
%1509 = torch.aten.bmm %1508, %1507 : !torch.vtensor<[48,512,512],f32>, !torch.vtensor<[48,512,64],f32> -> !torch.vtensor<[48,512,64],f32> | |
%1510 = torch.aten.transpose.int %arg91, %int1, %int2 : !torch.vtensor<[48,512,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[48,64,512],f32> | |
%1511 = torch.aten.bmm %1507, %1510 : !torch.vtensor<[48,512,64],f32>, !torch.vtensor<[48,64,512],f32> -> !torch.vtensor<[48,512,512],f32> | |
%1512 = torch.aten.view %1509, %138 : !torch.vtensor<[48,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,64],f32> | |
%1513 = torch.aten.view %1511, %140 : !torch.vtensor<[48,512,512],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,512],f32> | |
%1514 = torch.aten.mul.Tensor %1513, %arg89 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[4,12,512,512],f32> -> !torch.vtensor<[4,12,512,512],f32> | |
%1515 = torch.aten.mul.Tensor %1514, %arg88 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[4,12,512,512],f32> -> !torch.vtensor<[4,12,512,512],f32> | |
%1516 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> | |
%true_669 = torch.constant.bool true | |
%none_670 = torch.constant.none | |
%1517 = torch.aten.sum.dim_IntList %1515, %1516, %true_669, %none_670 : !torch.vtensor<[4,12,512,512],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,12,512,1],f32> | |
%int0_671 = torch.constant.int 0 | |
%1518 = torch.aten.size.int %1515, %int0_671 : !torch.vtensor<[4,12,512,512],f32>, !torch.int -> !torch.int | |
%int1_672 = torch.constant.int 1 | |
%1519 = torch.aten.size.int %1515, %int1_672 : !torch.vtensor<[4,12,512,512],f32>, !torch.int -> !torch.int | |
%int2_673 = torch.constant.int 2 | |
%1520 = torch.aten.size.int %1515, %int2_673 : !torch.vtensor<[4,12,512,512],f32>, !torch.int -> !torch.int | |
%int3_674 = torch.constant.int 3 | |
%1521 = torch.aten.size.int %1515, %int3_674 : !torch.vtensor<[4,12,512,512],f32>, !torch.int -> !torch.int | |
%1522 = torch.prim.ListConstruct %1518, %1519, %1520, %1521 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1523 = torch.aten.broadcast_to %1517, %1522 : !torch.vtensor<[4,12,512,1],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,512],f32> | |
%1524 = torch.aten.mul.Tensor %arg88, %1523 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[4,12,512,512],f32> -> !torch.vtensor<[4,12,512,512],f32> | |
%float1.000000e00_675 = torch.constant.float 1.000000e+00 | |
%1525 = torch.aten.sub.Tensor %1515, %1524, %float1.000000e00_675 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[4,12,512,512],f32>, !torch.float -> !torch.vtensor<[4,12,512,512],f32> | |
%1526 = torch.aten.div.Tensor %1525, %1 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[4,12,512,512],f32> | |
%1527 = torch.aten.view %1526, %155 : !torch.vtensor<[4,12,512,512],f32>, !torch.list<int> -> !torch.vtensor<[48,512,512],f32> | |
%1528 = torch.aten.transpose.int %arg86, %int1, %int2 : !torch.vtensor<[48,512,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[48,64,512],f32> | |
%1529 = torch.aten.bmm %1528, %1527 : !torch.vtensor<[48,64,512],f32>, !torch.vtensor<[48,512,512],f32> -> !torch.vtensor<[48,64,512],f32> | |
%1530 = torch.aten.transpose.int %arg87, %int1, %int2 : !torch.vtensor<[48,64,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[48,512,64],f32> | |
%1531 = torch.aten.bmm %1527, %1530 : !torch.vtensor<[48,512,512],f32>, !torch.vtensor<[48,512,64],f32> -> !torch.vtensor<[48,512,64],f32> | |
%1532 = torch.aten.view %1529, %161 : !torch.vtensor<[48,64,512],f32>, !torch.list<int> -> !torch.vtensor<[4,12,64,512],f32> | |
%1533 = torch.aten.view %1531, %138 : !torch.vtensor<[48,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,64],f32> | |
%1534 = torch.aten.transpose.int %1532, %int-1, %int-2 : !torch.vtensor<[4,12,64,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[4,12,512,64],f32> | |
%1535 = torch.aten.permute %1533, %129 : !torch.vtensor<[4,12,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,12,64],f32> | |
%1536 = torch.aten.clone %1535, %int0 : !torch.vtensor<[4,512,12,64],f32>, !torch.int -> !torch.vtensor<[4,512,12,64],f32> | |
%1537 = torch.aten.view %1536, %28 : !torch.vtensor<[4,512,12,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%1538 = torch.aten.permute %1512, %129 : !torch.vtensor<[4,12,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,12,64],f32> | |
%1539 = torch.aten.clone %1538, %int0 : !torch.vtensor<[4,512,12,64],f32>, !torch.int -> !torch.vtensor<[4,512,12,64],f32> | |
%1540 = torch.aten.view %1539, %28 : !torch.vtensor<[4,512,12,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%1541 = torch.aten.view %1540, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_676 = torch.constant.int 0 | |
%int1_677 = torch.constant.int 1 | |
%1542 = torch.aten.transpose.int %arg84, %int0_676, %int1_677 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1543 = torch.aten.mm %1541, %1542 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_678 = torch.constant.int 0 | |
%int1_679 = torch.constant.int 1 | |
%1544 = torch.aten.transpose.int %1541, %int0_678, %int1_679 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%1545 = torch.aten.mm %1544, %arg85 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[768,768],f32> | |
%int0_680 = torch.constant.int 0 | |
%int1_681 = torch.constant.int 1 | |
%1546 = torch.aten.transpose.int %1545, %int0_680, %int1_681 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1547 = torch.aten.sum.dim_IntList %1541, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%1548 = torch.aten.view %1547, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%1549 = torch.aten.view %1543, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%1550 = torch.aten.add.Tensor %1489, %1549, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%int0_682 = torch.constant.int 0 | |
%int1_683 = torch.constant.int 1 | |
%1551 = torch.aten.transpose.int %1546, %int0_682, %int1_683 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1552 = torch.aten.permute %1534, %129 : !torch.vtensor<[4,12,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,12,64],f32> | |
%1553 = torch.aten.view %1552, %28 : !torch.vtensor<[4,512,12,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%1554 = torch.aten.clone %1553, %int0 : !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%1555 = torch.aten.view %1554, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_684 = torch.constant.int 0 | |
%int1_685 = torch.constant.int 1 | |
%1556 = torch.aten.transpose.int %arg82, %int0_684, %int1_685 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1557 = torch.aten.mm %1555, %1556 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_686 = torch.constant.int 0 | |
%int1_687 = torch.constant.int 1 | |
%1558 = torch.aten.transpose.int %1555, %int0_686, %int1_687 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%1559 = torch.aten.mm %1558, %arg83 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[768,768],f32> | |
%int0_688 = torch.constant.int 0 | |
%int1_689 = torch.constant.int 1 | |
%1560 = torch.aten.transpose.int %1559, %int0_688, %int1_689 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1561 = torch.aten.sum.dim_IntList %1555, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%1562 = torch.aten.view %1561, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%1563 = torch.aten.view %1557, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%1564 = torch.aten.add.Tensor %1550, %1563, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%int0_690 = torch.constant.int 0 | |
%int1_691 = torch.constant.int 1 | |
%1565 = torch.aten.transpose.int %1560, %int0_690, %int1_691 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1566 = torch.aten.view %1537, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_692 = torch.constant.int 0 | |
%int1_693 = torch.constant.int 1 | |
%1567 = torch.aten.transpose.int %arg80, %int0_692, %int1_693 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1568 = torch.aten.mm %1566, %1567 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_694 = torch.constant.int 0 | |
%int1_695 = torch.constant.int 1 | |
%1569 = torch.aten.transpose.int %1566, %int0_694, %int1_695 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%1570 = torch.aten.mm %1569, %arg81 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[768,768],f32> | |
%int0_696 = torch.constant.int 0 | |
%int1_697 = torch.constant.int 1 | |
%1571 = torch.aten.transpose.int %1570, %int0_696, %int1_697 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1572 = torch.aten.sum.dim_IntList %1566, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%1573 = torch.aten.view %1572, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%1574 = torch.aten.view %1568, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%1575 = torch.aten.add.Tensor %1564, %1574, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%int0_698 = torch.constant.int 0 | |
%int1_699 = torch.constant.int 1 | |
%1576 = torch.aten.transpose.int %1571, %int0_698, %int1_699 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1577 = torch.aten.sub.Tensor %arg79, %result1_4, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%1578 = torch.aten.mul.Tensor %1577, %result2_5 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1579 = torch.aten.mul.Tensor %1575, %arg5 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1580 = torch.aten.mul.Tensor %1579, %0 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[4,512,768],f32> | |
%1581 = torch.aten.sum.dim_IntList %1579, %35, %true, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,512,1],f32> | |
%1582 = torch.aten.mul.Tensor %1579, %1578 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1583 = torch.aten.sum.dim_IntList %1582, %35, %true, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,512,1],f32> | |
%1584 = torch.aten.mul.Tensor %1578, %1583 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1585 = torch.aten.sub.Tensor %1580, %1581, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%1586 = torch.aten.sub.Tensor %1585, %1584, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%1587 = torch.aten.div.Tensor %result2_5, %0 : !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[4,512,1],f32> | |
%1588 = torch.aten.mul.Tensor %1587, %1586 : !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1589 = torch.aten.mul.Tensor %1575, %1578 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1590 = torch.aten.sum.dim_IntList %1589, %45, %false, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[768],f32> | |
%1591 = torch.aten.sum.dim_IntList %1575, %45, %false, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[768],f32> | |
%1592 = torch.aten.mul.Tensor %1588, %arg78 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1593 = torch.aten.view %1592, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_700 = torch.constant.int 0 | |
%int1_701 = torch.constant.int 1 | |
%1594 = torch.aten.transpose.int %arg76, %int0_700, %int1_701 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32> | |
%1595 = torch.aten.mm %1593, %1594 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,3072],f32> -> !torch.vtensor<[2048,3072],f32> | |
%int0_702 = torch.constant.int 0 | |
%int1_703 = torch.constant.int 1 | |
%1596 = torch.aten.transpose.int %1593, %int0_702, %int1_703 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%1597 = torch.aten.mm %1596, %arg77 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,3072],f32> -> !torch.vtensor<[768,3072],f32> | |
%int0_704 = torch.constant.int 0 | |
%int1_705 = torch.constant.int 1 | |
%1598 = torch.aten.transpose.int %1597, %int0_704, %int1_705 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32> | |
%1599 = torch.aten.sum.dim_IntList %1593, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%1600 = torch.aten.view %1599, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%1601 = torch.aten.view %1595, %84 : !torch.vtensor<[2048,3072],f32>, !torch.list<int> -> !torch.vtensor<[4,512,3072],f32> | |
%int0_706 = torch.constant.int 0 | |
%int1_707 = torch.constant.int 1 | |
%1602 = torch.aten.transpose.int %1598, %int0_706, %int1_707 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32> | |
%1603 = torch.aten.gelu_backward %1601, %arg75, %str : !torch.vtensor<[4,512,3072],f32>, !torch.vtensor<[4,512,3072],f32>, !torch.str -> !torch.vtensor<[4,512,3072],f32> | |
%1604 = torch.aten.view %1603, %88 : !torch.vtensor<[4,512,3072],f32>, !torch.list<int> -> !torch.vtensor<[2048,3072],f32> | |
%int0_708 = torch.constant.int 0 | |
%int1_709 = torch.constant.int 1 | |
%1605 = torch.aten.transpose.int %arg73, %int0_708, %int1_709 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32> | |
%1606 = torch.aten.mm %1604, %1605 : !torch.vtensor<[2048,3072],f32>, !torch.vtensor<[3072,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_710 = torch.constant.int 0 | |
%int1_711 = torch.constant.int 1 | |
%1607 = torch.aten.transpose.int %1604, %int0_710, %int1_711 : !torch.vtensor<[2048,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,2048],f32> | |
%1608 = torch.aten.mm %1607, %arg74 : !torch.vtensor<[3072,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[3072,768],f32> | |
%int0_712 = torch.constant.int 0 | |
%int1_713 = torch.constant.int 1 | |
%1609 = torch.aten.transpose.int %1608, %int0_712, %int1_713 : !torch.vtensor<[3072,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,3072],f32> | |
%1610 = torch.aten.sum.dim_IntList %1604, %24, %true, %none : !torch.vtensor<[2048,3072],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,3072],f32> | |
%1611 = torch.aten.view %1610, %96 : !torch.vtensor<[1,3072],f32>, !torch.list<int> -> !torch.vtensor<[3072],f32> | |
%1612 = torch.aten.view %1606, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%1613 = torch.aten.add.Tensor %1588, %1612, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%int0_714 = torch.constant.int 0 | |
%int1_715 = torch.constant.int 1 | |
%1614 = torch.aten.transpose.int %1609, %int0_714, %int1_715 : !torch.vtensor<[768,3072],f32>, !torch.int, !torch.int -> !torch.vtensor<[3072,768],f32> | |
%1615 = torch.aten.sub.Tensor %arg72, %result1_1, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%1616 = torch.aten.mul.Tensor %1615, %result2_2 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1617 = torch.aten.mul.Tensor %1613, %arg3 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1618 = torch.aten.mul.Tensor %1617, %0 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[4,512,768],f32> | |
%1619 = torch.aten.sum.dim_IntList %1617, %35, %true, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,512,1],f32> | |
%1620 = torch.aten.mul.Tensor %1617, %1616 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1621 = torch.aten.sum.dim_IntList %1620, %35, %true, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,512,1],f32> | |
%1622 = torch.aten.mul.Tensor %1616, %1621 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1623 = torch.aten.sub.Tensor %1618, %1619, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%1624 = torch.aten.sub.Tensor %1623, %1622, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%1625 = torch.aten.div.Tensor %result2_2, %0 : !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[4,512,1],f32> | |
%1626 = torch.aten.mul.Tensor %1625, %1624 : !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1627 = torch.aten.mul.Tensor %1613, %1616 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1628 = torch.aten.sum.dim_IntList %1627, %45, %false, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[768],f32> | |
%1629 = torch.aten.sum.dim_IntList %1613, %45, %false, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[768],f32> | |
%1630 = torch.aten.mul.Tensor %1626, %arg71 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1631 = torch.aten.view %1630, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_716 = torch.constant.int 0 | |
%int1_717 = torch.constant.int 1 | |
%1632 = torch.aten.transpose.int %arg69, %int0_716, %int1_717 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1633 = torch.aten.mm %1631, %1632 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_718 = torch.constant.int 0 | |
%int1_719 = torch.constant.int 1 | |
%1634 = torch.aten.transpose.int %1631, %int0_718, %int1_719 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%1635 = torch.aten.mm %1634, %arg70 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[768,768],f32> | |
%int0_720 = torch.constant.int 0 | |
%int1_721 = torch.constant.int 1 | |
%1636 = torch.aten.transpose.int %1635, %int0_720, %int1_721 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1637 = torch.aten.sum.dim_IntList %1631, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%1638 = torch.aten.view %1637, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%1639 = torch.aten.view %1633, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%int0_722 = torch.constant.int 0 | |
%int1_723 = torch.constant.int 1 | |
%1640 = torch.aten.transpose.int %1636, %int0_722, %int1_723 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1641 = torch.aten.view %1639, %127 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,12,64],f32> | |
%1642 = torch.aten.permute %1641, %129 : !torch.vtensor<[4,512,12,64],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,64],f32> | |
%1643 = torch.aten.clone %1642, %int0 : !torch.vtensor<[4,12,512,64],f32>, !torch.int -> !torch.vtensor<[4,12,512,64],f32> | |
%1644 = torch.aten.view %1643, %132 : !torch.vtensor<[4,12,512,64],f32>, !torch.list<int> -> !torch.vtensor<[48,512,64],f32> | |
%1645 = torch.aten.transpose.int %arg67, %int1, %int2 : !torch.vtensor<[48,512,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[48,512,512],f32> | |
%1646 = torch.aten.bmm %1645, %1644 : !torch.vtensor<[48,512,512],f32>, !torch.vtensor<[48,512,64],f32> -> !torch.vtensor<[48,512,64],f32> | |
%1647 = torch.aten.transpose.int %arg68, %int1, %int2 : !torch.vtensor<[48,512,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[48,64,512],f32> | |
%1648 = torch.aten.bmm %1644, %1647 : !torch.vtensor<[48,512,64],f32>, !torch.vtensor<[48,64,512],f32> -> !torch.vtensor<[48,512,512],f32> | |
%1649 = torch.aten.view %1646, %138 : !torch.vtensor<[48,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,64],f32> | |
%1650 = torch.aten.view %1648, %140 : !torch.vtensor<[48,512,512],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,512],f32> | |
%1651 = torch.aten.mul.Tensor %1650, %arg66 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[4,12,512,512],f32> -> !torch.vtensor<[4,12,512,512],f32> | |
%1652 = torch.aten.mul.Tensor %1651, %arg65 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[4,12,512,512],f32> -> !torch.vtensor<[4,12,512,512],f32> | |
%1653 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> | |
%true_724 = torch.constant.bool true | |
%none_725 = torch.constant.none | |
%1654 = torch.aten.sum.dim_IntList %1652, %1653, %true_724, %none_725 : !torch.vtensor<[4,12,512,512],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,12,512,1],f32> | |
%int0_726 = torch.constant.int 0 | |
%1655 = torch.aten.size.int %1652, %int0_726 : !torch.vtensor<[4,12,512,512],f32>, !torch.int -> !torch.int | |
%int1_727 = torch.constant.int 1 | |
%1656 = torch.aten.size.int %1652, %int1_727 : !torch.vtensor<[4,12,512,512],f32>, !torch.int -> !torch.int | |
%int2_728 = torch.constant.int 2 | |
%1657 = torch.aten.size.int %1652, %int2_728 : !torch.vtensor<[4,12,512,512],f32>, !torch.int -> !torch.int | |
%int3_729 = torch.constant.int 3 | |
%1658 = torch.aten.size.int %1652, %int3_729 : !torch.vtensor<[4,12,512,512],f32>, !torch.int -> !torch.int | |
%1659 = torch.prim.ListConstruct %1655, %1656, %1657, %1658 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1660 = torch.aten.broadcast_to %1654, %1659 : !torch.vtensor<[4,12,512,1],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,512],f32> | |
%1661 = torch.aten.mul.Tensor %arg65, %1660 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[4,12,512,512],f32> -> !torch.vtensor<[4,12,512,512],f32> | |
%float1.000000e00_730 = torch.constant.float 1.000000e+00 | |
%1662 = torch.aten.sub.Tensor %1652, %1661, %float1.000000e00_730 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[4,12,512,512],f32>, !torch.float -> !torch.vtensor<[4,12,512,512],f32> | |
%1663 = torch.aten.div.Tensor %1662, %1 : !torch.vtensor<[4,12,512,512],f32>, !torch.vtensor<[],f64> -> !torch.vtensor<[4,12,512,512],f32> | |
%1664 = torch.aten.view %1663, %155 : !torch.vtensor<[4,12,512,512],f32>, !torch.list<int> -> !torch.vtensor<[48,512,512],f32> | |
%1665 = torch.aten.transpose.int %arg63, %int1, %int2 : !torch.vtensor<[48,512,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[48,64,512],f32> | |
%1666 = torch.aten.bmm %1665, %1664 : !torch.vtensor<[48,64,512],f32>, !torch.vtensor<[48,512,512],f32> -> !torch.vtensor<[48,64,512],f32> | |
%1667 = torch.aten.transpose.int %arg64, %int1, %int2 : !torch.vtensor<[48,64,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[48,512,64],f32> | |
%1668 = torch.aten.bmm %1664, %1667 : !torch.vtensor<[48,512,512],f32>, !torch.vtensor<[48,512,64],f32> -> !torch.vtensor<[48,512,64],f32> | |
%1669 = torch.aten.view %1666, %161 : !torch.vtensor<[48,64,512],f32>, !torch.list<int> -> !torch.vtensor<[4,12,64,512],f32> | |
%1670 = torch.aten.view %1668, %138 : !torch.vtensor<[48,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,12,512,64],f32> | |
%1671 = torch.aten.transpose.int %1669, %int-1, %int-2 : !torch.vtensor<[4,12,64,512],f32>, !torch.int, !torch.int -> !torch.vtensor<[4,12,512,64],f32> | |
%1672 = torch.aten.permute %1670, %129 : !torch.vtensor<[4,12,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,12,64],f32> | |
%1673 = torch.aten.clone %1672, %int0 : !torch.vtensor<[4,512,12,64],f32>, !torch.int -> !torch.vtensor<[4,512,12,64],f32> | |
%1674 = torch.aten.view %1673, %28 : !torch.vtensor<[4,512,12,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%1675 = torch.aten.permute %1649, %129 : !torch.vtensor<[4,12,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,12,64],f32> | |
%1676 = torch.aten.clone %1675, %int0 : !torch.vtensor<[4,512,12,64],f32>, !torch.int -> !torch.vtensor<[4,512,12,64],f32> | |
%1677 = torch.aten.view %1676, %28 : !torch.vtensor<[4,512,12,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%1678 = torch.aten.view %1677, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_731 = torch.constant.int 0 | |
%int1_732 = torch.constant.int 1 | |
%1679 = torch.aten.transpose.int %arg61, %int0_731, %int1_732 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1680 = torch.aten.mm %1678, %1679 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_733 = torch.constant.int 0 | |
%int1_734 = torch.constant.int 1 | |
%1681 = torch.aten.transpose.int %1678, %int0_733, %int1_734 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%1682 = torch.aten.mm %1681, %arg62 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[768,768],f32> | |
%int0_735 = torch.constant.int 0 | |
%int1_736 = torch.constant.int 1 | |
%1683 = torch.aten.transpose.int %1682, %int0_735, %int1_736 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1684 = torch.aten.sum.dim_IntList %1678, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%1685 = torch.aten.view %1684, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%1686 = torch.aten.view %1680, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%1687 = torch.aten.add.Tensor %1626, %1686, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%int0_737 = torch.constant.int 0 | |
%int1_738 = torch.constant.int 1 | |
%1688 = torch.aten.transpose.int %1683, %int0_737, %int1_738 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1689 = torch.aten.permute %1671, %129 : !torch.vtensor<[4,12,512,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,12,64],f32> | |
%1690 = torch.aten.view %1689, %28 : !torch.vtensor<[4,512,12,64],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%1691 = torch.aten.clone %1690, %int0 : !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%1692 = torch.aten.view %1691, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_739 = torch.constant.int 0 | |
%int1_740 = torch.constant.int 1 | |
%1693 = torch.aten.transpose.int %arg59, %int0_739, %int1_740 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1694 = torch.aten.mm %1692, %1693 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_741 = torch.constant.int 0 | |
%int1_742 = torch.constant.int 1 | |
%1695 = torch.aten.transpose.int %1692, %int0_741, %int1_742 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%1696 = torch.aten.mm %1695, %arg60 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[768,768],f32> | |
%int0_743 = torch.constant.int 0 | |
%int1_744 = torch.constant.int 1 | |
%1697 = torch.aten.transpose.int %1696, %int0_743, %int1_744 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1698 = torch.aten.sum.dim_IntList %1692, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%1699 = torch.aten.view %1698, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%1700 = torch.aten.view %1694, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%1701 = torch.aten.add.Tensor %1687, %1700, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%int0_745 = torch.constant.int 0 | |
%int1_746 = torch.constant.int 1 | |
%1702 = torch.aten.transpose.int %1697, %int0_745, %int1_746 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1703 = torch.aten.view %1674, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%int0_747 = torch.constant.int 0 | |
%int1_748 = torch.constant.int 1 | |
%1704 = torch.aten.transpose.int %arg57, %int0_747, %int1_748 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1705 = torch.aten.mm %1703, %1704 : !torch.vtensor<[2048,768],f32>, !torch.vtensor<[768,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%int0_749 = torch.constant.int 0 | |
%int1_750 = torch.constant.int 1 | |
%1706 = torch.aten.transpose.int %1703, %int0_749, %int1_750 : !torch.vtensor<[2048,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,2048],f32> | |
%1707 = torch.aten.mm %1706, %arg58 : !torch.vtensor<[768,2048],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[768,768],f32> | |
%int0_751 = torch.constant.int 0 | |
%int1_752 = torch.constant.int 1 | |
%1708 = torch.aten.transpose.int %1707, %int0_751, %int1_752 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1709 = torch.aten.sum.dim_IntList %1703, %24, %true, %none : !torch.vtensor<[2048,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,768],f32> | |
%1710 = torch.aten.view %1709, %3 : !torch.vtensor<[1,768],f32>, !torch.list<int> -> !torch.vtensor<[768],f32> | |
%1711 = torch.aten.view %1705, %28 : !torch.vtensor<[2048,768],f32>, !torch.list<int> -> !torch.vtensor<[4,512,768],f32> | |
%1712 = torch.aten.add.Tensor %1701, %1711, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%int0_753 = torch.constant.int 0 | |
%int1_754 = torch.constant.int 1 | |
%1713 = torch.aten.transpose.int %1708, %int0_753, %int1_754 : !torch.vtensor<[768,768],f32>, !torch.int, !torch.int -> !torch.vtensor<[768,768],f32> | |
%1714 = torch.aten.mul.Tensor %1712, %arg56 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1715 = torch.aten.sub.Tensor %arg55, %result1, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%1716 = torch.aten.mul.Tensor %1715, %result2 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1717 = torch.aten.mul.Tensor %1714, %arg1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1718 = torch.aten.mul.Tensor %1717, %0 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[4,512,768],f32> | |
%1719 = torch.aten.sum.dim_IntList %1717, %35, %true, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,512,1],f32> | |
%1720 = torch.aten.mul.Tensor %1717, %1716 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1721 = torch.aten.sum.dim_IntList %1720, %35, %true, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,512,1],f32> | |
%1722 = torch.aten.mul.Tensor %1716, %1721 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1723 = torch.aten.sub.Tensor %1718, %1719, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,1],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%1724 = torch.aten.sub.Tensor %1723, %1722, %int1 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32>, !torch.int -> !torch.vtensor<[4,512,768],f32> | |
%1725 = torch.aten.div.Tensor %result2, %0 : !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[],si64> -> !torch.vtensor<[4,512,1],f32> | |
%1726 = torch.aten.mul.Tensor %1725, %1724 : !torch.vtensor<[4,512,1],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1727 = torch.aten.mul.Tensor %1714, %1716 : !torch.vtensor<[4,512,768],f32>, !torch.vtensor<[4,512,768],f32> -> !torch.vtensor<[4,512,768],f32> | |
%1728 = torch.aten.sum.dim_IntList %1727, %45, %false, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[768],f32> | |
%1729 = torch.aten.sum.dim_IntList %1714, %45, %false, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[768],f32> | |
%1730 = torch.aten.sum.dim_IntList %1726, %24, %true, %none : !torch.vtensor<[4,512,768],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,512,768],f32> | |
%1731 = torch.prim.ListConstruct %int512, %int768 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1732 = torch.aten.view %1730, %1731 : !torch.vtensor<[1,512,768],f32>, !torch.list<int> -> !torch.vtensor<[512,768],f32> | |
%none_755 = torch.constant.none | |
%1733 = torch.aten.empty.memory_format %1731, %int6, %int0, %cpu, %false, %none_755 : !torch.list<int>, !torch.int, !torch.int, !torch.Device, !torch.bool, !torch.none -> !torch.vtensor<[512,768],f32> | |
%int0_756 = torch.constant.int 0 | |
%1734 = torch.valsem.aten.fill.Scalar %1733, %int0_756 : !torch.vtensor<[512,768],f32>, !torch.int -> !torch.vtensor<[512,768],f32> | |
%1735 = torch.prim.ListConstruct %int512 : (!torch.int) -> !torch.list<int> | |
%1736 = torch.aten.view %arg54, %1735 : !torch.vtensor<[1,512],si64>, !torch.list<int> -> !torch.vtensor<[512],si64> | |
%1737 = torch.aten.ne.Scalar %1736, %int-1 : !torch.vtensor<[512],si64>, !torch.int -> !torch.vtensor<[512],i1> | |
%1738 = torch.aten.unsqueeze %1737, %int1 : !torch.vtensor<[512],i1>, !torch.int -> !torch.vtensor<[512,1],i1> | |
%1739 = torch.aten.broadcast_to %1738, %1731 : !torch.vtensor<[512,1],i1>, !torch.list<int> -> !torch.vtensor<[512,768],i1> | |
%int0_757 = torch.constant.int 0 | |
%1740 = torch.aten.size.int %1732, %int0_757 : !torch.vtensor<[512,768],f32>, !torch.int -> !torch.int | |
%int1_758 = torch.constant.int 1 | |
%1741 = torch.aten.size.int %1732, %int1_758 : !torch.vtensor<[512,768],f32>, !torch.int -> !torch.int | |
%1742 = torch.prim.ListConstruct %1740, %1741 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1743 = torch.aten.empty.memory_format %1742, %int6, %int0, %cpu, %false, %none : !torch.list<int>, !torch.int, !torch.int, !torch.Device, !torch.bool, !torch.none -> !torch.vtensor<[512,768],f32> | |
%1744 = torch.valsem.aten.fill.Scalar %1743, %int0 : !torch.vtensor<[512,768],f32>, !torch.int -> !torch.vtensor<[512,768],f32> | |
%1745 = torch.aten.where.self %1739, %1732, %1744 : !torch.vtensor<[512,768],i1>, !torch.vtensor<[512,768],f32>, !torch.vtensor<[512,768],f32> -> !torch.vtensor<[512,768],f32> | |
%1746 = torch.prim.ListConstruct %1736 : (!torch.vtensor<[512],si64>) -> !torch.list<vtensor> | |
%false_759 = torch.constant.bool false | |
%1747 = torch.valsem.aten.index_put_impl %1734, %1746, %1745, %true, %false_759 : !torch.vtensor<[512,768],f32>, !torch.list<vtensor>, !torch.vtensor<[512,768],f32>, !torch.bool, !torch.bool -> !torch.vtensor<[512,768],f32> | |
%1748 = torch.aten.view %1726, %49 : !torch.vtensor<[4,512,768],f32>, !torch.list<int> -> !torch.vtensor<[2048,768],f32> | |
%1749 = torch.prim.ListConstruct %int2, %int768 : (!torch.int, !torch.int) -> !torch.list<int> | |
%none_760 = torch.constant.none | |
%1750 = torch.aten.empty.memory_format %1749, %int6, %int0, %cpu, %false, %none_760 : !torch.list<int>, !torch.int, !torch.int, !torch.Device, !torch.bool, !torch.none -> !torch.vtensor<[2,768],f32> | |
%int0_761 = torch.constant.int 0 | |
%1751 = torch.valsem.aten.fill.Scalar %1750, %int0_761 : !torch.vtensor<[2,768],f32>, !torch.int -> !torch.vtensor<[2,768],f32> | |
%1752 = torch.aten.clone %arg53, %int0 : !torch.vtensor<[4,512],si64>, !torch.int -> !torch.vtensor<[4,512],si64> | |
%1753 = torch.prim.ListConstruct %int2048 : (!torch.int) -> !torch.list<int> | |
%1754 = torch.aten.view %1752, %1753 : !torch.vtensor<[4,512],si64>, !torch.list<int> -> !torch.vtensor<[2048],si64> | |
%1755 = torch.aten.ne.Scalar %1754, %int-1 : !torch.vtensor<[2048],si64>, !torch.int -> !torch.vtensor<[2048],i1> | |
%1756 = torch.aten.unsqueeze %1755, %int1 : !torch.vtensor<[2048],i1>, !torch.int -> !torch.vtensor<[2048,1],i1> | |
%1757 = torch.aten.broadcast_to %1756, %49 : !torch.vtensor<[2048,1],i1>, !torch.list<int> -> !torch.vtensor<[2048,768],i1> | |
%int0_762 = torch.constant.int 0 | |
%1758 = torch.aten.size.int %1748, %int0_762 : !torch.vtensor<[2048,768],f32>, !torch.int -> !torch.int | |
%int1_763 = torch.constant.int 1 | |
%1759 = torch.aten.size.int %1748, %int1_763 : !torch.vtensor<[2048,768],f32>, !torch.int -> !torch.int | |
%1760 = torch.prim.ListConstruct %1758, %1759 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1761 = torch.aten.empty.memory_format %1760, %int6, %int0, %cpu, %false, %none : !torch.list<int>, !torch.int, !torch.int, !torch.Device, !torch.bool, !torch.none -> !torch.vtensor<[2048,768],f32> | |
%1762 = torch.valsem.aten.fill.Scalar %1761, %int0 : !torch.vtensor<[2048,768],f32>, !torch.int -> !torch.vtensor<[2048,768],f32> | |
%1763 = torch.aten.where.self %1757, %1748, %1762 : !torch.vtensor<[2048,768],i1>, !torch.vtensor<[2048,768],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%1764 = torch.prim.ListConstruct %1754 : (!torch.vtensor<[2048],si64>) -> !torch.list<vtensor> | |
%false_764 = torch.constant.bool false | |
%1765 = torch.valsem.aten.index_put_impl %1751, %1764, %1763, %true, %false_764 : !torch.vtensor<[2,768],f32>, !torch.list<vtensor>, !torch.vtensor<[2048,768],f32>, !torch.bool, !torch.bool -> !torch.vtensor<[2,768],f32> | |
%1766 = torch.prim.ListConstruct %int30522, %int768 : (!torch.int, !torch.int) -> !torch.list<int> | |
%none_765 = torch.constant.none | |
%1767 = torch.aten.empty.memory_format %1766, %int6, %int0, %cpu, %false, %none_765 : !torch.list<int>, !torch.int, !torch.int, !torch.Device, !torch.bool, !torch.none -> !torch.vtensor<[30522,768],f32> | |
%int0_766 = torch.constant.int 0 | |
%1768 = torch.valsem.aten.fill.Scalar %1767, %int0_766 : !torch.vtensor<[30522,768],f32>, !torch.int -> !torch.vtensor<[30522,768],f32> | |
%1769 = torch.aten.view %arg52, %1753 : !torch.vtensor<[4,512],si64>, !torch.list<int> -> !torch.vtensor<[2048],si64> | |
%1770 = torch.aten.ne.Scalar %1769, %int0 : !torch.vtensor<[2048],si64>, !torch.int -> !torch.vtensor<[2048],i1> | |
%1771 = torch.aten.unsqueeze %1770, %int1 : !torch.vtensor<[2048],i1>, !torch.int -> !torch.vtensor<[2048,1],i1> | |
%1772 = torch.aten.broadcast_to %1771, %49 : !torch.vtensor<[2048,1],i1>, !torch.list<int> -> !torch.vtensor<[2048,768],i1> | |
%1773 = torch.aten.where.self %1772, %1748, %1762 : !torch.vtensor<[2048,768],i1>, !torch.vtensor<[2048,768],f32>, !torch.vtensor<[2048,768],f32> -> !torch.vtensor<[2048,768],f32> | |
%1774 = torch.prim.ListConstruct %1769 : (!torch.vtensor<[2048],si64>) -> !torch.list<vtensor> | |
%false_767 = torch.constant.bool false | |
%1775 = torch.valsem.aten.index_put_impl %1768, %1774, %1773, %true, %false_767 : !torch.vtensor<[30522,768],f32>, !torch.list<vtensor>, !torch.vtensor<[2048,768],f32>, !torch.bool, !torch.bool -> !torch.vtensor<[30522,768],f32> | |
return %1729, %1728, %1747, %1765, %1775, %1629, %1628, %1638, %1640, %1699, %1702, %1710, %1713, %1685, %1688, %1611, %1614, %1591, %1590, %1600, %1602, %1492, %1491, %1501, %1503, %1562, %1565, %1573, %1576, %1548, %1551, %1474, %1477, %1454, %1453, %1463, %1465, %259, %258, %268, %270, %329, %332, %340, %343, %315, %318, %241, %244, %221, %220, %230, %232, %115, %114, %124, %126, %192, %195, %203, %206, %178, %181, %97, %100, %74, %73, %83, %86, %1355, %1354, %1364, %1366, %1425, %1428, %1436, %1439, %1411, %1414, %1337, %1340, %1317, %1316, %1326, %1328, %1218, %1217, %1227, %1229, %1288, %1291, %1299, %1302, %1274, %1277, %1200, %1203, %1180, %1179, %1189, %1191, %1081, %1080, %1090, %1092, %1151, %1154, %1162, %1165, %1137, %1140, %1063, %1066, %1043, %1042, %1052, %1054, %944, %943, %953, %955, %1014, %1017, %1025, %1028, %1000, %1003, %926, %929, %906, %905, %915, %917, %807, %806, %816, %818, %877, %880, %888, %891, %863, %866, %789, %792, %769, %768, %778, %780, %670, %669, %679, %681, %740, %743, %751, %754, %726, %729, %652, %655, %632, %631, %641, %643, %533, %532, %542, %544, %603, %606, %614, %617, %589, %592, %515, %518, %495, %494, %504, %506, %396, %395, %405, %407, %466, %469, %477, %480, %452, %455, %378, %381, %358, %357, %367, %369, %2, %27, %30, %47, %46, %57, %59, %2, %2, %2, %2 : !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[512,768],f32>, !torch.vtensor<[2,768],f32>, !torch.vtensor<[30522,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[3072],f32>, !torch.vtensor<[3072,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,3072],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[3072],f32>, !torch.vtensor<[3072,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,3072],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[3072],f32>, !torch.vtensor<[3072,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,3072],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[3072],f32>, !torch.vtensor<[3072,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,3072],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[3072],f32>, !torch.vtensor<[3072,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,3072],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[3072],f32>, !torch.vtensor<[3072,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,3072],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[3072],f32>, !torch.vtensor<[3072,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,3072],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[3072],f32>, !torch.vtensor<[3072,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,3072],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[3072],f32>, !torch.vtensor<[3072,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,3072],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[3072],f32>, !torch.vtensor<[3072,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,3072],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[3072],f32>, !torch.vtensor<[3072,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,3072],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[3072],f32>, !torch.vtensor<[3072,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,3072],f32>, !torch.vtensor<[1],f32>, !torch.vtensor<[30522],f32>, !torch.vtensor<[30522,768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768],f32>, !torch.vtensor<[768,768],f32>, !torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>, !torch.vtensor<[1],f32> | |
} | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment