Created
March 3, 2023 15:38
-
-
Save pashu123/8cba884c52bee252dfbb31a8d5672504 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| func.func @forward(%arg0: tensor<512xf32>, %arg1: tensor<512xf32>, %arg2: tensor<512xf32>, %arg3: tensor<512xf32>, %arg4: tensor<512xf32>, %arg5: tensor<512xf32>, %arg6: tensor<512xf32>, %arg7: tensor<512xf32>, %arg8: tensor<512xf32>, %arg9: tensor<512xf32>, %arg10: tensor<512xf32>, %arg11: tensor<512xf32>, %arg12: tensor<512xf32>, %arg13: tensor<512xf32>, %arg14: tensor<512xf32>, %arg15: tensor<512xf32>, %arg16: tensor<512xf32>, %arg17: tensor<1x512xf32>, %arg18: tensor<512x512xf32>, %arg19: tensor<512x512xf32>, %arg20: tensor<512x512xf32>, %arg21: tensor<512x512xf32>, %arg22: tensor<1536x512xf32>, %arg23: tensor<1536x512xf32>, %arg24: tensor<512x1536xf32>, %arg25: tensor<512x512xf32>, %arg26: tensor<512x512xf32>, %arg27: tensor<512x512xf32>, %arg28: tensor<512x512xf32>, %arg29: tensor<1536x512xf32>, %arg30: tensor<1536x512xf32>, %arg31: tensor<512x1536xf32>, %arg32: tensor<512x512xf32>, %arg33: tensor<512x512xf32>, %arg34: tensor<512x512xf32>, %arg35: tensor<512x512xf32>, %arg36: tensor<1536x512xf32>, %arg37: tensor<1536x512xf32>, %arg38: tensor<512x1536xf32>, %arg39: tensor<512x512xf32>, %arg40: tensor<512x512xf32>, %arg41: tensor<512x512xf32>, %arg42: tensor<512x512xf32>, %arg43: tensor<1536x512xf32>, %arg44: tensor<1536x512xf32>, %arg45: tensor<512x1536xf32>, %arg46: tensor<512x512xf32>, %arg47: tensor<512x512xf32>, %arg48: tensor<512x512xf32>, %arg49: tensor<512x512xf32>, %arg50: tensor<1536x512xf32>, %arg51: tensor<1536x512xf32>, %arg52: tensor<512x1536xf32>, %arg53: tensor<512x512xf32>, %arg54: tensor<512x512xf32>, %arg55: tensor<512x512xf32>, %arg56: tensor<512x512xf32>, %arg57: tensor<1536x512xf32>, %arg58: tensor<1536x512xf32>, %arg59: tensor<512x1536xf32>, %arg60: tensor<512x512xf32>, %arg61: tensor<512x512xf32>, %arg62: tensor<512x512xf32>, %arg63: tensor<512x512xf32>, %arg64: tensor<1536x512xf32>, %arg65: tensor<1536x512xf32>, %arg66: tensor<512x1536xf32>, %arg67: tensor<512x512xf32>, %arg68: tensor<512x512xf32>, %arg69: tensor<512x512xf32>, %arg70: tensor<512x512xf32>, %arg71: tensor<1536x512xf32>, %arg72: tensor<1536x512xf32>, %arg73: tensor<512x1536xf32>, %arg74: tensor<1x512xf32>, %arg75: tensor<1x1xi64>, %arg76: tensor<2048x32x2xf32>) -> (tensor<1x1xf32>, tensor<2048x32x2xf32>) { | |
| %cst = arith.constant 0.000000e+00 : f32 | |
| %cst_0 = arith.constant -3.40282347E+38 : f32 | |
| %cst_1 = arith.constant 1.000000e+00 : f32 | |
| %c0_i64 = arith.constant 0 : i64 | |
| %cst_2 = arith.constant 1.000000e-05 : f64 | |
| %cst_3 = arith.constant 2.000000e+00 : f32 | |
| %cst_4 = arith.constant 5.120000e+02 : f32 | |
| %cst_5 = arith.constant 8.000000e+00 : f32 | |
| %c0 = arith.constant 0 : index | |
| %c1 = arith.constant 1 : index | |
| %0 = torch_c.from_builtin_tensor %arg76 : tensor<2048x32x2xf32> -> !torch.vtensor<[2048,32,2],f32> | |
| %1 = torch_c.from_builtin_tensor %arg75 : tensor<1x1xi64> -> !torch.vtensor<[1,1],si64> | |
| %2 = torch_c.from_builtin_tensor %arg74 : tensor<1x512xf32> -> !torch.vtensor<[1,512],f32> | |
| %3 = torch_c.from_builtin_tensor %arg73 : tensor<512x1536xf32> -> !torch.vtensor<[512,1536],f32> | |
| %4 = torch_c.from_builtin_tensor %arg72 : tensor<1536x512xf32> -> !torch.vtensor<[1536,512],f32> | |
| %5 = torch_c.from_builtin_tensor %arg71 : tensor<1536x512xf32> -> !torch.vtensor<[1536,512],f32> | |
| %6 = torch_c.from_builtin_tensor %arg70 : tensor<512x512xf32> -> !torch.vtensor<[512,512],f32> | |
| %7 = torch_c.from_builtin_tensor %arg69 : tensor<512x512xf32> -> !torch.vtensor<[512,512],f32> | |
| %8 = torch_c.from_builtin_tensor %arg68 : tensor<512x512xf32> -> !torch.vtensor<[512,512],f32> | |
| %9 = torch_c.from_builtin_tensor %arg67 : tensor<512x512xf32> -> !torch.vtensor<[512,512],f32> | |
| %10 = torch_c.from_builtin_tensor %arg66 : tensor<512x1536xf32> -> !torch.vtensor<[512,1536],f32> | |
| %11 = torch_c.from_builtin_tensor %arg65 : tensor<1536x512xf32> -> !torch.vtensor<[1536,512],f32> | |
| %12 = torch_c.from_builtin_tensor %arg64 : tensor<1536x512xf32> -> !torch.vtensor<[1536,512],f32> | |
| %13 = torch_c.from_builtin_tensor %arg63 : tensor<512x512xf32> -> !torch.vtensor<[512,512],f32> | |
| %14 = torch_c.from_builtin_tensor %arg62 : tensor<512x512xf32> -> !torch.vtensor<[512,512],f32> | |
| %15 = torch_c.from_builtin_tensor %arg61 : tensor<512x512xf32> -> !torch.vtensor<[512,512],f32> | |
| %16 = torch_c.from_builtin_tensor %arg60 : tensor<512x512xf32> -> !torch.vtensor<[512,512],f32> | |
| %17 = torch_c.from_builtin_tensor %arg59 : tensor<512x1536xf32> -> !torch.vtensor<[512,1536],f32> | |
| %18 = torch_c.from_builtin_tensor %arg58 : tensor<1536x512xf32> -> !torch.vtensor<[1536,512],f32> | |
| %19 = torch_c.from_builtin_tensor %arg57 : tensor<1536x512xf32> -> !torch.vtensor<[1536,512],f32> | |
| %20 = torch_c.from_builtin_tensor %arg56 : tensor<512x512xf32> -> !torch.vtensor<[512,512],f32> | |
| %21 = torch_c.from_builtin_tensor %arg55 : tensor<512x512xf32> -> !torch.vtensor<[512,512],f32> | |
| %22 = torch_c.from_builtin_tensor %arg54 : tensor<512x512xf32> -> !torch.vtensor<[512,512],f32> | |
| %23 = torch_c.from_builtin_tensor %arg53 : tensor<512x512xf32> -> !torch.vtensor<[512,512],f32> | |
| %24 = torch_c.from_builtin_tensor %arg52 : tensor<512x1536xf32> -> !torch.vtensor<[512,1536],f32> | |
| %25 = torch_c.from_builtin_tensor %arg51 : tensor<1536x512xf32> -> !torch.vtensor<[1536,512],f32> | |
| %26 = torch_c.from_builtin_tensor %arg50 : tensor<1536x512xf32> -> !torch.vtensor<[1536,512],f32> | |
| %27 = torch_c.from_builtin_tensor %arg49 : tensor<512x512xf32> -> !torch.vtensor<[512,512],f32> | |
| %28 = torch_c.from_builtin_tensor %arg48 : tensor<512x512xf32> -> !torch.vtensor<[512,512],f32> | |
| %29 = torch_c.from_builtin_tensor %arg47 : tensor<512x512xf32> -> !torch.vtensor<[512,512],f32> | |
| %30 = torch_c.from_builtin_tensor %arg46 : tensor<512x512xf32> -> !torch.vtensor<[512,512],f32> | |
| %31 = torch_c.from_builtin_tensor %arg45 : tensor<512x1536xf32> -> !torch.vtensor<[512,1536],f32> | |
| %32 = torch_c.from_builtin_tensor %arg44 : tensor<1536x512xf32> -> !torch.vtensor<[1536,512],f32> | |
| %33 = torch_c.from_builtin_tensor %arg43 : tensor<1536x512xf32> -> !torch.vtensor<[1536,512],f32> | |
| %34 = torch_c.from_builtin_tensor %arg42 : tensor<512x512xf32> -> !torch.vtensor<[512,512],f32> | |
| %35 = torch_c.from_builtin_tensor %arg41 : tensor<512x512xf32> -> !torch.vtensor<[512,512],f32> | |
| %36 = torch_c.from_builtin_tensor %arg40 : tensor<512x512xf32> -> !torch.vtensor<[512,512],f32> | |
| %37 = torch_c.from_builtin_tensor %arg39 : tensor<512x512xf32> -> !torch.vtensor<[512,512],f32> | |
| %38 = torch_c.from_builtin_tensor %arg38 : tensor<512x1536xf32> -> !torch.vtensor<[512,1536],f32> | |
| %39 = torch_c.from_builtin_tensor %arg37 : tensor<1536x512xf32> -> !torch.vtensor<[1536,512],f32> | |
| %40 = torch_c.from_builtin_tensor %arg36 : tensor<1536x512xf32> -> !torch.vtensor<[1536,512],f32> | |
| %41 = torch_c.from_builtin_tensor %arg35 : tensor<512x512xf32> -> !torch.vtensor<[512,512],f32> | |
| %42 = torch_c.from_builtin_tensor %arg34 : tensor<512x512xf32> -> !torch.vtensor<[512,512],f32> | |
| %43 = torch_c.from_builtin_tensor %arg33 : tensor<512x512xf32> -> !torch.vtensor<[512,512],f32> | |
| %44 = torch_c.from_builtin_tensor %arg32 : tensor<512x512xf32> -> !torch.vtensor<[512,512],f32> | |
| %45 = torch_c.from_builtin_tensor %arg31 : tensor<512x1536xf32> -> !torch.vtensor<[512,1536],f32> | |
| %46 = torch_c.from_builtin_tensor %arg30 : tensor<1536x512xf32> -> !torch.vtensor<[1536,512],f32> | |
| %47 = torch_c.from_builtin_tensor %arg29 : tensor<1536x512xf32> -> !torch.vtensor<[1536,512],f32> | |
| %48 = torch_c.from_builtin_tensor %arg28 : tensor<512x512xf32> -> !torch.vtensor<[512,512],f32> | |
| %49 = torch_c.from_builtin_tensor %arg27 : tensor<512x512xf32> -> !torch.vtensor<[512,512],f32> | |
| %50 = torch_c.from_builtin_tensor %arg26 : tensor<512x512xf32> -> !torch.vtensor<[512,512],f32> | |
| %51 = torch_c.from_builtin_tensor %arg25 : tensor<512x512xf32> -> !torch.vtensor<[512,512],f32> | |
| %52 = torch_c.from_builtin_tensor %arg24 : tensor<512x1536xf32> -> !torch.vtensor<[512,1536],f32> | |
| %53 = torch_c.from_builtin_tensor %arg23 : tensor<1536x512xf32> -> !torch.vtensor<[1536,512],f32> | |
| %54 = torch_c.from_builtin_tensor %arg22 : tensor<1536x512xf32> -> !torch.vtensor<[1536,512],f32> | |
| %55 = torch_c.from_builtin_tensor %arg21 : tensor<512x512xf32> -> !torch.vtensor<[512,512],f32> | |
| %56 = torch_c.from_builtin_tensor %arg20 : tensor<512x512xf32> -> !torch.vtensor<[512,512],f32> | |
| %57 = torch_c.from_builtin_tensor %arg19 : tensor<512x512xf32> -> !torch.vtensor<[512,512],f32> | |
| %58 = torch_c.from_builtin_tensor %arg18 : tensor<512x512xf32> -> !torch.vtensor<[512,512],f32> | |
| %59 = torch_c.from_builtin_tensor %arg17 : tensor<1x512xf32> -> !torch.vtensor<[1,512],f32> | |
| %60 = torch_c.from_builtin_tensor %arg16 : tensor<512xf32> -> !torch.vtensor<[512],f32> | |
| %61 = torch_c.from_builtin_tensor %arg15 : tensor<512xf32> -> !torch.vtensor<[512],f32> | |
| %62 = torch_c.from_builtin_tensor %arg14 : tensor<512xf32> -> !torch.vtensor<[512],f32> | |
| %63 = torch_c.from_builtin_tensor %arg13 : tensor<512xf32> -> !torch.vtensor<[512],f32> | |
| %64 = torch_c.from_builtin_tensor %arg12 : tensor<512xf32> -> !torch.vtensor<[512],f32> | |
| %65 = torch_c.from_builtin_tensor %arg11 : tensor<512xf32> -> !torch.vtensor<[512],f32> | |
| %66 = torch_c.from_builtin_tensor %arg10 : tensor<512xf32> -> !torch.vtensor<[512],f32> | |
| %67 = torch_c.from_builtin_tensor %arg9 : tensor<512xf32> -> !torch.vtensor<[512],f32> | |
| %68 = torch_c.from_builtin_tensor %arg8 : tensor<512xf32> -> !torch.vtensor<[512],f32> | |
| %69 = torch_c.from_builtin_tensor %arg7 : tensor<512xf32> -> !torch.vtensor<[512],f32> | |
| %70 = torch_c.from_builtin_tensor %arg6 : tensor<512xf32> -> !torch.vtensor<[512],f32> | |
| %71 = torch_c.from_builtin_tensor %arg5 : tensor<512xf32> -> !torch.vtensor<[512],f32> | |
| %72 = torch_c.from_builtin_tensor %arg4 : tensor<512xf32> -> !torch.vtensor<[512],f32> | |
| %73 = torch_c.from_builtin_tensor %arg3 : tensor<512xf32> -> !torch.vtensor<[512],f32> | |
| %74 = torch_c.from_builtin_tensor %arg2 : tensor<512xf32> -> !torch.vtensor<[512],f32> | |
| %75 = torch_c.from_builtin_tensor %arg1 : tensor<512xf32> -> !torch.vtensor<[512],f32> | |
| %76 = torch_c.from_builtin_tensor %arg0 : tensor<512xf32> -> !torch.vtensor<[512],f32> | |
| %77 = torch_c.to_builtin_tensor %59 : !torch.vtensor<[1,512],f32> -> tensor<1x512xf32> | |
| %78 = torch_c.to_builtin_tensor %1 : !torch.vtensor<[1,1],si64> -> tensor<1x1xi64> | |
| %79 = torch_c.to_builtin_tensor %0 : !torch.vtensor<[2048,32,2],f32> -> tensor<2048x32x2xf32> | |
| %80 = torch_c.to_builtin_tensor %76 : !torch.vtensor<[512],f32> -> tensor<512xf32> | |
| %81 = torch_c.to_builtin_tensor %58 : !torch.vtensor<[512,512],f32> -> tensor<512x512xf32> | |
| %82 = torch_c.to_builtin_tensor %57 : !torch.vtensor<[512,512],f32> -> tensor<512x512xf32> | |
| %83 = torch_c.to_builtin_tensor %56 : !torch.vtensor<[512,512],f32> -> tensor<512x512xf32> | |
| %84 = torch_c.to_builtin_tensor %55 : !torch.vtensor<[512,512],f32> -> tensor<512x512xf32> | |
| %85 = torch_c.to_builtin_tensor %75 : !torch.vtensor<[512],f32> -> tensor<512xf32> | |
| %86 = torch_c.to_builtin_tensor %54 : !torch.vtensor<[1536,512],f32> -> tensor<1536x512xf32> | |
| %87 = torch_c.to_builtin_tensor %53 : !torch.vtensor<[1536,512],f32> -> tensor<1536x512xf32> | |
| %88 = torch_c.to_builtin_tensor %52 : !torch.vtensor<[512,1536],f32> -> tensor<512x1536xf32> | |
| %89 = torch_c.to_builtin_tensor %74 : !torch.vtensor<[512],f32> -> tensor<512xf32> | |
| %90 = torch_c.to_builtin_tensor %51 : !torch.vtensor<[512,512],f32> -> tensor<512x512xf32> | |
| %91 = torch_c.to_builtin_tensor %50 : !torch.vtensor<[512,512],f32> -> tensor<512x512xf32> | |
| %92 = torch_c.to_builtin_tensor %49 : !torch.vtensor<[512,512],f32> -> tensor<512x512xf32> | |
| %93 = torch_c.to_builtin_tensor %48 : !torch.vtensor<[512,512],f32> -> tensor<512x512xf32> | |
| %94 = torch_c.to_builtin_tensor %73 : !torch.vtensor<[512],f32> -> tensor<512xf32> | |
| %95 = torch_c.to_builtin_tensor %47 : !torch.vtensor<[1536,512],f32> -> tensor<1536x512xf32> | |
| %96 = torch_c.to_builtin_tensor %46 : !torch.vtensor<[1536,512],f32> -> tensor<1536x512xf32> | |
| %97 = torch_c.to_builtin_tensor %45 : !torch.vtensor<[512,1536],f32> -> tensor<512x1536xf32> | |
| %98 = torch_c.to_builtin_tensor %72 : !torch.vtensor<[512],f32> -> tensor<512xf32> | |
| %99 = torch_c.to_builtin_tensor %44 : !torch.vtensor<[512,512],f32> -> tensor<512x512xf32> | |
| %100 = torch_c.to_builtin_tensor %43 : !torch.vtensor<[512,512],f32> -> tensor<512x512xf32> | |
| %101 = torch_c.to_builtin_tensor %42 : !torch.vtensor<[512,512],f32> -> tensor<512x512xf32> | |
| %102 = torch_c.to_builtin_tensor %41 : !torch.vtensor<[512,512],f32> -> tensor<512x512xf32> | |
| %103 = torch_c.to_builtin_tensor %71 : !torch.vtensor<[512],f32> -> tensor<512xf32> | |
| %104 = torch_c.to_builtin_tensor %40 : !torch.vtensor<[1536,512],f32> -> tensor<1536x512xf32> | |
| %105 = torch_c.to_builtin_tensor %39 : !torch.vtensor<[1536,512],f32> -> tensor<1536x512xf32> | |
| %106 = torch_c.to_builtin_tensor %38 : !torch.vtensor<[512,1536],f32> -> tensor<512x1536xf32> | |
| %107 = torch_c.to_builtin_tensor %70 : !torch.vtensor<[512],f32> -> tensor<512xf32> | |
| %108 = torch_c.to_builtin_tensor %37 : !torch.vtensor<[512,512],f32> -> tensor<512x512xf32> | |
| %109 = torch_c.to_builtin_tensor %36 : !torch.vtensor<[512,512],f32> -> tensor<512x512xf32> | |
| %110 = torch_c.to_builtin_tensor %35 : !torch.vtensor<[512,512],f32> -> tensor<512x512xf32> | |
| %111 = torch_c.to_builtin_tensor %34 : !torch.vtensor<[512,512],f32> -> tensor<512x512xf32> | |
| %112 = torch_c.to_builtin_tensor %69 : !torch.vtensor<[512],f32> -> tensor<512xf32> | |
| %113 = torch_c.to_builtin_tensor %33 : !torch.vtensor<[1536,512],f32> -> tensor<1536x512xf32> | |
| %114 = torch_c.to_builtin_tensor %32 : !torch.vtensor<[1536,512],f32> -> tensor<1536x512xf32> | |
| %115 = torch_c.to_builtin_tensor %31 : !torch.vtensor<[512,1536],f32> -> tensor<512x1536xf32> | |
| %116 = torch_c.to_builtin_tensor %68 : !torch.vtensor<[512],f32> -> tensor<512xf32> | |
| %117 = torch_c.to_builtin_tensor %30 : !torch.vtensor<[512,512],f32> -> tensor<512x512xf32> | |
| %118 = torch_c.to_builtin_tensor %29 : !torch.vtensor<[512,512],f32> -> tensor<512x512xf32> | |
| %119 = torch_c.to_builtin_tensor %28 : !torch.vtensor<[512,512],f32> -> tensor<512x512xf32> | |
| %120 = torch_c.to_builtin_tensor %27 : !torch.vtensor<[512,512],f32> -> tensor<512x512xf32> | |
| %121 = torch_c.to_builtin_tensor %67 : !torch.vtensor<[512],f32> -> tensor<512xf32> | |
| %122 = torch_c.to_builtin_tensor %26 : !torch.vtensor<[1536,512],f32> -> tensor<1536x512xf32> | |
| %123 = torch_c.to_builtin_tensor %25 : !torch.vtensor<[1536,512],f32> -> tensor<1536x512xf32> | |
| %124 = torch_c.to_builtin_tensor %24 : !torch.vtensor<[512,1536],f32> -> tensor<512x1536xf32> | |
| %125 = torch_c.to_builtin_tensor %66 : !torch.vtensor<[512],f32> -> tensor<512xf32> | |
| %126 = torch_c.to_builtin_tensor %23 : !torch.vtensor<[512,512],f32> -> tensor<512x512xf32> | |
| %127 = torch_c.to_builtin_tensor %22 : !torch.vtensor<[512,512],f32> -> tensor<512x512xf32> | |
| %128 = torch_c.to_builtin_tensor %21 : !torch.vtensor<[512,512],f32> -> tensor<512x512xf32> | |
| %129 = torch_c.to_builtin_tensor %20 : !torch.vtensor<[512,512],f32> -> tensor<512x512xf32> | |
| %130 = torch_c.to_builtin_tensor %65 : !torch.vtensor<[512],f32> -> tensor<512xf32> | |
| %131 = torch_c.to_builtin_tensor %19 : !torch.vtensor<[1536,512],f32> -> tensor<1536x512xf32> | |
| %132 = torch_c.to_builtin_tensor %18 : !torch.vtensor<[1536,512],f32> -> tensor<1536x512xf32> | |
| %133 = torch_c.to_builtin_tensor %17 : !torch.vtensor<[512,1536],f32> -> tensor<512x1536xf32> | |
| %134 = torch_c.to_builtin_tensor %64 : !torch.vtensor<[512],f32> -> tensor<512xf32> | |
| %135 = torch_c.to_builtin_tensor %16 : !torch.vtensor<[512,512],f32> -> tensor<512x512xf32> | |
| %136 = torch_c.to_builtin_tensor %15 : !torch.vtensor<[512,512],f32> -> tensor<512x512xf32> | |
| %137 = torch_c.to_builtin_tensor %14 : !torch.vtensor<[512,512],f32> -> tensor<512x512xf32> | |
| %138 = torch_c.to_builtin_tensor %13 : !torch.vtensor<[512,512],f32> -> tensor<512x512xf32> | |
| %139 = torch_c.to_builtin_tensor %63 : !torch.vtensor<[512],f32> -> tensor<512xf32> | |
| %140 = torch_c.to_builtin_tensor %12 : !torch.vtensor<[1536,512],f32> -> tensor<1536x512xf32> | |
| %141 = torch_c.to_builtin_tensor %11 : !torch.vtensor<[1536,512],f32> -> tensor<1536x512xf32> | |
| %142 = torch_c.to_builtin_tensor %10 : !torch.vtensor<[512,1536],f32> -> tensor<512x1536xf32> | |
| %143 = torch_c.to_builtin_tensor %62 : !torch.vtensor<[512],f32> -> tensor<512xf32> | |
| %144 = torch_c.to_builtin_tensor %9 : !torch.vtensor<[512,512],f32> -> tensor<512x512xf32> | |
| %145 = torch_c.to_builtin_tensor %8 : !torch.vtensor<[512,512],f32> -> tensor<512x512xf32> | |
| %146 = torch_c.to_builtin_tensor %7 : !torch.vtensor<[512,512],f32> -> tensor<512x512xf32> | |
| %147 = torch_c.to_builtin_tensor %6 : !torch.vtensor<[512,512],f32> -> tensor<512x512xf32> | |
| %148 = torch_c.to_builtin_tensor %61 : !torch.vtensor<[512],f32> -> tensor<512xf32> | |
| %149 = torch_c.to_builtin_tensor %5 : !torch.vtensor<[1536,512],f32> -> tensor<1536x512xf32> | |
| %150 = torch_c.to_builtin_tensor %4 : !torch.vtensor<[1536,512],f32> -> tensor<1536x512xf32> | |
| %151 = torch_c.to_builtin_tensor %3 : !torch.vtensor<[512,1536],f32> -> tensor<512x1536xf32> | |
| %152 = torch_c.to_builtin_tensor %60 : !torch.vtensor<[512],f32> -> tensor<512xf32> | |
| %153 = torch_c.to_builtin_tensor %2 : !torch.vtensor<[1,512],f32> -> tensor<1x512xf32> | |
| %154 = tensor.empty() : tensor<1x1x512xf32> | |
| %155 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%78 : tensor<1x1xi64>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: i64, %out: f32): | |
| %788 = arith.index_cast %in : i64 to index | |
| %789 = linalg.index 2 : index | |
| %790 = arith.cmpi slt, %788, %c1 : index | |
| cf.assert %790, "index must be smaller than dim size" | |
| %791 = arith.cmpi sge, %in, %c0_i64 : i64 | |
| cf.assert %791, "index must be larger or equal to 0" | |
| %extracted = tensor.extract %77[%788, %789] : tensor<1x512xf32> | |
| linalg.yield %extracted : f32 | |
| } -> tensor<1x1x512xf32> | |
| %extracted_slice = tensor.extract_slice %79[0, 0, 0] [1, 32, 2] [1, 1, 1] : tensor<2048x32x2xf32> to tensor<1x32x2xf32> | |
| %156 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%155 : tensor<1x1x512xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = math.powf %in, %cst_3 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %157 = tensor.empty() : tensor<1x1x1xf32> | |
| %158 = linalg.fill ins(%cst : f32) outs(%157 : tensor<1x1x1xf32>) -> tensor<1x1x1xf32> | |
| %159 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, 0)>], iterator_types = ["parallel", "parallel", "reduction"]} ins(%156 : tensor<1x1x512xf32>) outs(%158 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.addf %in, %out : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %160 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%159 : tensor<1x1x1xf32>) outs(%157 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.divf %in, %cst_4 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %161 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%160 : tensor<1x1x1xf32>) outs(%157 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.truncf %cst_2 : f64 to f32 | |
| %789 = arith.addf %in, %788 : f32 | |
| linalg.yield %789 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %162 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%161 : tensor<1x1x1xf32>) outs(%157 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = math.rsqrt %in : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %163 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%155, %162 : tensor<1x1x512xf32>, tensor<1x1x1xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.mulf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %164 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%163, %80 : tensor<1x1x512xf32>, tensor<512xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.mulf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %165 = tensor.empty() : tensor<512x512xf32> | |
| %166 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%81 : tensor<512x512xf32>) outs(%165 : tensor<512x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<512x512xf32> | |
| %collapsed = tensor.collapse_shape %164 [[0], [1, 2]] : tensor<1x1x512xf32> into tensor<1x512xf32> | |
| %167 = tensor.empty() : tensor<1x512xf32> | |
| %168 = linalg.fill ins(%cst : f32) outs(%167 : tensor<1x512xf32>) -> tensor<1x512xf32> | |
| %169 = linalg.matmul ins(%collapsed, %166 : tensor<1x512xf32>, tensor<512x512xf32>) outs(%168 : tensor<1x512xf32>) -> tensor<1x512xf32> | |
| %170 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%82 : tensor<512x512xf32>) outs(%165 : tensor<512x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<512x512xf32> | |
| %171 = linalg.matmul ins(%collapsed, %170 : tensor<1x512xf32>, tensor<512x512xf32>) outs(%168 : tensor<1x512xf32>) -> tensor<1x512xf32> | |
| %172 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%83 : tensor<512x512xf32>) outs(%165 : tensor<512x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<512x512xf32> | |
| %173 = linalg.matmul ins(%collapsed, %172 : tensor<1x512xf32>, tensor<512x512xf32>) outs(%168 : tensor<1x512xf32>) -> tensor<1x512xf32> | |
| %expanded = tensor.expand_shape %173 [[0], [1, 2, 3]] : tensor<1x512xf32> into tensor<1x1x8x64xf32> | |
| %expanded_6 = tensor.expand_shape %169 [[0], [1, 2, 3, 4]] : tensor<1x512xf32> into tensor<1x1x8x32x2xf32> | |
| %174 = tensor.empty() : tensor<1x1x8x32xcomplex<f64>> | |
| %175 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} outs(%174 : tensor<1x1x8x32xcomplex<f64>>) { | |
| ^bb0(%out: complex<f64>): | |
| %788 = linalg.index 0 : index | |
| %789 = linalg.index 1 : index | |
| %790 = linalg.index 2 : index | |
| %791 = linalg.index 3 : index | |
| %extracted = tensor.extract %expanded_6[%788, %789, %790, %791, %c0] : tensor<1x1x8x32x2xf32> | |
| %extracted_269 = tensor.extract %expanded_6[%788, %789, %790, %791, %c1] : tensor<1x1x8x32x2xf32> | |
| %792 = arith.extf %extracted : f32 to f64 | |
| %793 = arith.extf %extracted_269 : f32 to f64 | |
| %794 = complex.create %792, %793 : complex<f64> | |
| linalg.yield %794 : complex<f64> | |
| } -> tensor<1x1x8x32xcomplex<f64>> | |
| %expanded_7 = tensor.expand_shape %171 [[0], [1, 2, 3, 4]] : tensor<1x512xf32> into tensor<1x1x8x32x2xf32> | |
| %176 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} outs(%174 : tensor<1x1x8x32xcomplex<f64>>) { | |
| ^bb0(%out: complex<f64>): | |
| %788 = linalg.index 0 : index | |
| %789 = linalg.index 1 : index | |
| %790 = linalg.index 2 : index | |
| %791 = linalg.index 3 : index | |
| %extracted = tensor.extract %expanded_7[%788, %789, %790, %791, %c0] : tensor<1x1x8x32x2xf32> | |
| %extracted_269 = tensor.extract %expanded_7[%788, %789, %790, %791, %c1] : tensor<1x1x8x32x2xf32> | |
| %792 = arith.extf %extracted : f32 to f64 | |
| %793 = arith.extf %extracted_269 : f32 to f64 | |
| %794 = complex.create %792, %793 : complex<f64> | |
| linalg.yield %794 : complex<f64> | |
| } -> tensor<1x1x8x32xcomplex<f64>> | |
| %177 = tensor.empty() : tensor<1x32xcomplex<f64>> | |
| %178 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} outs(%177 : tensor<1x32xcomplex<f64>>) { | |
| ^bb0(%out: complex<f64>): | |
| %788 = linalg.index 0 : index | |
| %789 = linalg.index 1 : index | |
| %extracted = tensor.extract %extracted_slice[%788, %789, %c0] : tensor<1x32x2xf32> | |
| %extracted_269 = tensor.extract %extracted_slice[%788, %789, %c1] : tensor<1x32x2xf32> | |
| %790 = arith.extf %extracted : f32 to f64 | |
| %791 = arith.extf %extracted_269 : f32 to f64 | |
| %792 = complex.create %790, %791 : complex<f64> | |
| linalg.yield %792 : complex<f64> | |
| } -> tensor<1x32xcomplex<f64>> | |
| %expanded_8 = tensor.expand_shape %178 [[0], [1, 2, 3]] : tensor<1x32xcomplex<f64>> into tensor<1x1x1x32xcomplex<f64>> | |
| %179 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, 0, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (0, 0, 0, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%175, %expanded_8 : tensor<1x1x8x32xcomplex<f64>>, tensor<1x1x1x32xcomplex<f64>>) outs(%174 : tensor<1x1x8x32xcomplex<f64>>) { | |
| ^bb0(%in: complex<f64>, %in_269: complex<f64>, %out: complex<f64>): | |
| %788 = complex.mul %in, %in_269 : complex<f64> | |
| linalg.yield %788 : complex<f64> | |
| } -> tensor<1x1x8x32xcomplex<f64>> | |
| %180 = torch_c.from_builtin_tensor %179 : tensor<1x1x8x32xcomplex<f64>> -> !torch.vtensor<[1,1,8,32],complex<f64>> | |
| %181 = torch.aten.view_as_real %180 : !torch.vtensor<[1,1,8,32],complex<f64>> -> !torch.vtensor<[1,1,8,32,2],f32> | |
| %182 = torch_c.to_builtin_tensor %181 : !torch.vtensor<[1,1,8,32,2],f32> -> tensor<1x1x8x32x2xf32> | |
| %extracted_slice_9 = tensor.extract_slice %182[0, 0, 0, 0, 0] [1, 1, 8, 32, 1] [1, 1, 1, 1, 1] : tensor<1x1x8x32x2xf32> to tensor<1x1x8x32x1xf32> | |
| %183 = torch.aten.view_as_real %180 : !torch.vtensor<[1,1,8,32],complex<f64>> -> !torch.vtensor<[1,1,8,32,2],f32> | |
| %184 = torch_c.to_builtin_tensor %183 : !torch.vtensor<[1,1,8,32,2],f32> -> tensor<1x1x8x32x2xf32> | |
| %extracted_slice_10 = tensor.extract_slice %184[0, 0, 0, 0, 1] [1, 1, 8, 32, 1] [1, 1, 1, 1, 1] : tensor<1x1x8x32x2xf32> to tensor<1x1x8x32x1xf32> | |
| %collapsed_11 = tensor.collapse_shape %extracted_slice_9 [[0, 1, 2, 3], [4]] : tensor<1x1x8x32x1xf32> into tensor<256x1xf32> | |
| %185 = torch_c.from_builtin_tensor %collapsed_11 : tensor<256x1xf32> -> !torch.vtensor<[256,1],f32> | |
| %collapsed_12 = tensor.collapse_shape %extracted_slice_10 [[0, 1, 2, 3], [4]] : tensor<1x1x8x32x1xf32> into tensor<256x1xf32> | |
| %186 = torch_c.from_builtin_tensor %collapsed_12 : tensor<256x1xf32> -> !torch.vtensor<[256,1],f32> | |
| %187 = torch_c.to_builtin_tensor %185 : !torch.vtensor<[256,1],f32> -> tensor<256x1xf32> | |
| %188 = torch_c.to_builtin_tensor %186 : !torch.vtensor<[256,1],f32> -> tensor<256x1xf32> | |
| %189 = tensor.empty() : tensor<256x2xf32> | |
| %inserted_slice = tensor.insert_slice %187 into %189[0, 0] [256, 1] [1, 1] : tensor<256x1xf32> into tensor<256x2xf32> | |
| %inserted_slice_13 = tensor.insert_slice %188 into %inserted_slice[0, 1] [256, 1] [1, 1] : tensor<256x1xf32> into tensor<256x2xf32> | |
| %expanded_14 = tensor.expand_shape %inserted_slice_13 [[0, 1, 2, 3], [4]] : tensor<256x2xf32> into tensor<1x1x8x32x2xf32> | |
| %collapsed_15 = tensor.collapse_shape %expanded_14 [[0], [1], [2], [3, 4]] : tensor<1x1x8x32x2xf32> into tensor<1x1x8x64xf32> | |
| %190 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, 0, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (0, 0, 0, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%176, %expanded_8 : tensor<1x1x8x32xcomplex<f64>>, tensor<1x1x1x32xcomplex<f64>>) outs(%174 : tensor<1x1x8x32xcomplex<f64>>) { | |
| ^bb0(%in: complex<f64>, %in_269: complex<f64>, %out: complex<f64>): | |
| %788 = complex.mul %in, %in_269 : complex<f64> | |
| linalg.yield %788 : complex<f64> | |
| } -> tensor<1x1x8x32xcomplex<f64>> | |
| %191 = torch_c.from_builtin_tensor %190 : tensor<1x1x8x32xcomplex<f64>> -> !torch.vtensor<[1,1,8,32],complex<f64>> | |
| %192 = torch.aten.view_as_real %191 : !torch.vtensor<[1,1,8,32],complex<f64>> -> !torch.vtensor<[1,1,8,32,2],f32> | |
| %193 = torch_c.to_builtin_tensor %192 : !torch.vtensor<[1,1,8,32,2],f32> -> tensor<1x1x8x32x2xf32> | |
| %extracted_slice_16 = tensor.extract_slice %193[0, 0, 0, 0, 0] [1, 1, 8, 32, 1] [1, 1, 1, 1, 1] : tensor<1x1x8x32x2xf32> to tensor<1x1x8x32x1xf32> | |
| %194 = torch.aten.view_as_real %191 : !torch.vtensor<[1,1,8,32],complex<f64>> -> !torch.vtensor<[1,1,8,32,2],f32> | |
| %195 = torch_c.to_builtin_tensor %194 : !torch.vtensor<[1,1,8,32,2],f32> -> tensor<1x1x8x32x2xf32> | |
| %extracted_slice_17 = tensor.extract_slice %195[0, 0, 0, 0, 1] [1, 1, 8, 32, 1] [1, 1, 1, 1, 1] : tensor<1x1x8x32x2xf32> to tensor<1x1x8x32x1xf32> | |
| %collapsed_18 = tensor.collapse_shape %extracted_slice_16 [[0, 1, 2, 3], [4]] : tensor<1x1x8x32x1xf32> into tensor<256x1xf32> | |
| %196 = torch_c.from_builtin_tensor %collapsed_18 : tensor<256x1xf32> -> !torch.vtensor<[256,1],f32> | |
| %collapsed_19 = tensor.collapse_shape %extracted_slice_17 [[0, 1, 2, 3], [4]] : tensor<1x1x8x32x1xf32> into tensor<256x1xf32> | |
| %197 = torch_c.from_builtin_tensor %collapsed_19 : tensor<256x1xf32> -> !torch.vtensor<[256,1],f32> | |
| %198 = torch_c.to_builtin_tensor %196 : !torch.vtensor<[256,1],f32> -> tensor<256x1xf32> | |
| %199 = torch_c.to_builtin_tensor %197 : !torch.vtensor<[256,1],f32> -> tensor<256x1xf32> | |
| %inserted_slice_20 = tensor.insert_slice %198 into %189[0, 0] [256, 1] [1, 1] : tensor<256x1xf32> into tensor<256x2xf32> | |
| %inserted_slice_21 = tensor.insert_slice %199 into %inserted_slice_20[0, 1] [256, 1] [1, 1] : tensor<256x1xf32> into tensor<256x2xf32> | |
| %expanded_22 = tensor.expand_shape %inserted_slice_21 [[0, 1, 2, 3], [4]] : tensor<256x2xf32> into tensor<1x1x8x32x2xf32> | |
| %collapsed_23 = tensor.collapse_shape %expanded_22 [[0], [1], [2], [3, 4]] : tensor<1x1x8x32x2xf32> into tensor<1x1x8x64xf32> | |
| %200 = tensor.empty() : tensor<1x1x8x64xf32> | |
| %201 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, 0, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%collapsed_23 : tensor<1x1x8x64xf32>) outs(%200 : tensor<1x1x8x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x1x8x64xf32> | |
| %202 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, 0, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded : tensor<1x1x8x64xf32>) outs(%200 : tensor<1x1x8x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x1x8x64xf32> | |
| %203 = tensor.empty() : tensor<1x8x1x64xf32> | |
| %204 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d2, d1, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%collapsed_15 : tensor<1x1x8x64xf32>) outs(%203 : tensor<1x8x1x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x1x64xf32> | |
| %205 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, 0, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%204 : tensor<1x8x1x64xf32>) outs(%203 : tensor<1x8x1x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x1x64xf32> | |
| %collapsed_24 = tensor.collapse_shape %205 [[0, 1], [2], [3]] : tensor<1x8x1x64xf32> into tensor<8x1x64xf32> | |
| %206 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d2, d1, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%201 : tensor<1x1x8x64xf32>) outs(%203 : tensor<1x8x1x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x1x64xf32> | |
| %207 = tensor.empty() : tensor<1x8x64x1xf32> | |
| %208 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d3, d2)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%206 : tensor<1x8x1x64xf32>) outs(%207 : tensor<1x8x64x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x64x1xf32> | |
| %209 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, d2, 0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%208 : tensor<1x8x64x1xf32>) outs(%207 : tensor<1x8x64x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x64x1xf32> | |
| %collapsed_25 = tensor.collapse_shape %209 [[0, 1], [2], [3]] : tensor<1x8x64x1xf32> into tensor<8x64x1xf32> | |
| %210 = tensor.empty() : tensor<8x1x1xf32> | |
| %211 = linalg.fill ins(%cst : f32) outs(%210 : tensor<8x1x1xf32>) -> tensor<8x1x1xf32> | |
| %212 = linalg.batch_matmul ins(%collapsed_24, %collapsed_25 : tensor<8x1x64xf32>, tensor<8x64x1xf32>) outs(%211 : tensor<8x1x1xf32>) -> tensor<8x1x1xf32> | |
| %expanded_26 = tensor.expand_shape %212 [[0, 1], [2], [3]] : tensor<8x1x1xf32> into tensor<1x8x1x1xf32> | |
| %213 = tensor.empty() : tensor<1x8x1x1xf32> | |
| %214 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_26 : tensor<1x8x1x1xf32>) outs(%213 : tensor<1x8x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.divf %in, %cst_5 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x8x1x1xf32> | |
| %215 = tensor.empty() : tensor<1x8x1x1xi64> | |
| %216 = linalg.fill ins(%c0_i64 : i64) outs(%215 : tensor<1x8x1x1xi64>) -> tensor<1x8x1x1xi64> | |
| %217 = linalg.fill ins(%cst_0 : f32) outs(%213 : tensor<1x8x1x1xf32>) -> tensor<1x8x1x1xf32> | |
| %218:2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, 0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, 0)>], iterator_types = ["parallel", "parallel", "parallel", "reduction"]} ins(%214 : tensor<1x8x1x1xf32>) outs(%217, %216 : tensor<1x8x1x1xf32>, tensor<1x8x1x1xi64>) { | |
| ^bb0(%in: f32, %out: f32, %out_269: i64): | |
| %788 = linalg.index 3 : index | |
| %789 = arith.index_cast %788 : index to i64 | |
| %790 = arith.maxf %in, %out : f32 | |
| %791 = arith.cmpf ogt, %in, %out : f32 | |
| %792 = arith.select %791, %789, %out_269 : i64 | |
| linalg.yield %790, %792 : f32, i64 | |
| } -> (tensor<1x8x1x1xf32>, tensor<1x8x1x1xi64>) | |
| %219 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>, affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%214, %218#0 : tensor<1x8x1x1xf32>, tensor<1x8x1x1xf32>) outs(%213 : tensor<1x8x1x1xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.subf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x8x1x1xf32> | |
| %220 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%219 : tensor<1x8x1x1xf32>) outs(%213 : tensor<1x8x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = math.exp %in : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x8x1x1xf32> | |
| %221 = linalg.fill ins(%cst : f32) outs(%213 : tensor<1x8x1x1xf32>) -> tensor<1x8x1x1xf32> | |
| %222 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, 0)>], iterator_types = ["parallel", "parallel", "parallel", "reduction"]} ins(%220 : tensor<1x8x1x1xf32>) outs(%221 : tensor<1x8x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.addf %in, %out : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x8x1x1xf32> | |
| %223 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>, affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%220, %222 : tensor<1x8x1x1xf32>, tensor<1x8x1x1xf32>) outs(%213 : tensor<1x8x1x1xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.divf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x8x1x1xf32> | |
| %224 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%223 : tensor<1x8x1x1xf32>) outs(%213 : tensor<1x8x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x1x1xf32> | |
| %collapsed_27 = tensor.collapse_shape %224 [[0, 1], [2], [3]] : tensor<1x8x1x1xf32> into tensor<8x1x1xf32> | |
| %225 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d2, d1, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%202 : tensor<1x1x8x64xf32>) outs(%203 : tensor<1x8x1x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x1x64xf32> | |
| %226 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, 0, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%225 : tensor<1x8x1x64xf32>) outs(%203 : tensor<1x8x1x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x1x64xf32> | |
| %collapsed_28 = tensor.collapse_shape %226 [[0, 1], [2], [3]] : tensor<1x8x1x64xf32> into tensor<8x1x64xf32> | |
| %227 = tensor.empty() : tensor<8x1x64xf32> | |
| %228 = linalg.fill ins(%cst : f32) outs(%227 : tensor<8x1x64xf32>) -> tensor<8x1x64xf32> | |
| %229 = linalg.batch_matmul ins(%collapsed_27, %collapsed_28 : tensor<8x1x1xf32>, tensor<8x1x64xf32>) outs(%228 : tensor<8x1x64xf32>) -> tensor<8x1x64xf32> | |
| %expanded_29 = tensor.expand_shape %229 [[0, 1], [2], [3]] : tensor<8x1x64xf32> into tensor<1x8x1x64xf32> | |
| %230 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d2, d1, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_29 : tensor<1x8x1x64xf32>) outs(%200 : tensor<1x1x8x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x1x8x64xf32> | |
| %231 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%84 : tensor<512x512xf32>) outs(%165 : tensor<512x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<512x512xf32> | |
| %collapsed_30 = tensor.collapse_shape %230 [[0], [1, 2, 3]] : tensor<1x1x8x64xf32> into tensor<1x512xf32> | |
| %232 = linalg.matmul ins(%collapsed_30, %231 : tensor<1x512xf32>, tensor<512x512xf32>) outs(%168 : tensor<1x512xf32>) -> tensor<1x512xf32> | |
| %expanded_31 = tensor.expand_shape %232 [[0], [1, 2]] : tensor<1x512xf32> into tensor<1x1x512xf32> | |
| %233 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%155, %expanded_31 : tensor<1x1x512xf32>, tensor<1x1x512xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.addf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %234 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%233 : tensor<1x1x512xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = math.powf %in, %cst_3 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %235 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, 0)>], iterator_types = ["parallel", "parallel", "reduction"]} ins(%234 : tensor<1x1x512xf32>) outs(%158 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.addf %in, %out : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %236 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%235 : tensor<1x1x1xf32>) outs(%157 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.divf %in, %cst_4 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %237 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%236 : tensor<1x1x1xf32>) outs(%157 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.truncf %cst_2 : f64 to f32 | |
| %789 = arith.addf %in, %788 : f32 | |
| linalg.yield %789 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %238 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%237 : tensor<1x1x1xf32>) outs(%157 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = math.rsqrt %in : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %239 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%233, %238 : tensor<1x1x512xf32>, tensor<1x1x1xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.mulf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %240 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%239, %85 : tensor<1x1x512xf32>, tensor<512xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.mulf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %241 = tensor.empty() : tensor<512x1536xf32> | |
| %242 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%86 : tensor<1536x512xf32>) outs(%241 : tensor<512x1536xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<512x1536xf32> | |
| %collapsed_32 = tensor.collapse_shape %240 [[0], [1, 2]] : tensor<1x1x512xf32> into tensor<1x512xf32> | |
| %243 = tensor.empty() : tensor<1x1536xf32> | |
| %244 = linalg.fill ins(%cst : f32) outs(%243 : tensor<1x1536xf32>) -> tensor<1x1536xf32> | |
| %245 = linalg.matmul ins(%collapsed_32, %242 : tensor<1x512xf32>, tensor<512x1536xf32>) outs(%244 : tensor<1x1536xf32>) -> tensor<1x1536xf32> | |
| %expanded_33 = tensor.expand_shape %245 [[0], [1, 2]] : tensor<1x1536xf32> into tensor<1x1x1536xf32> | |
| %246 = tensor.empty() : tensor<1x1x1536xf32> | |
| %247 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%expanded_33 : tensor<1x1x1536xf32>) outs(%246 : tensor<1x1x1536xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.negf %in : f32 | |
| %789 = math.exp %788 : f32 | |
| %790 = arith.addf %789, %cst_1 : f32 | |
| %791 = arith.divf %cst_1, %790 : f32 | |
| linalg.yield %791 : f32 | |
| } -> tensor<1x1x1536xf32> | |
| %248 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%247, %expanded_33 : tensor<1x1x1536xf32>, tensor<1x1x1536xf32>) outs(%246 : tensor<1x1x1536xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.mulf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1536xf32> | |
| %249 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%87 : tensor<1536x512xf32>) outs(%241 : tensor<512x1536xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<512x1536xf32> | |
| %250 = linalg.matmul ins(%collapsed_32, %249 : tensor<1x512xf32>, tensor<512x1536xf32>) outs(%244 : tensor<1x1536xf32>) -> tensor<1x1536xf32> | |
| %expanded_34 = tensor.expand_shape %250 [[0], [1, 2]] : tensor<1x1536xf32> into tensor<1x1x1536xf32> | |
| %251 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%248, %expanded_34 : tensor<1x1x1536xf32>, tensor<1x1x1536xf32>) outs(%246 : tensor<1x1x1536xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.mulf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1536xf32> | |
| %252 = tensor.empty() : tensor<1536x512xf32> | |
| %253 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%88 : tensor<512x1536xf32>) outs(%252 : tensor<1536x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1536x512xf32> | |
| %collapsed_35 = tensor.collapse_shape %251 [[0], [1, 2]] : tensor<1x1x1536xf32> into tensor<1x1536xf32> | |
| %254 = linalg.matmul ins(%collapsed_35, %253 : tensor<1x1536xf32>, tensor<1536x512xf32>) outs(%168 : tensor<1x512xf32>) -> tensor<1x512xf32> | |
| %expanded_36 = tensor.expand_shape %254 [[0], [1, 2]] : tensor<1x512xf32> into tensor<1x1x512xf32> | |
| %255 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%233, %expanded_36 : tensor<1x1x512xf32>, tensor<1x1x512xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.addf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %256 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%255 : tensor<1x1x512xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = math.powf %in, %cst_3 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %257 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, 0)>], iterator_types = ["parallel", "parallel", "reduction"]} ins(%256 : tensor<1x1x512xf32>) outs(%158 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.addf %in, %out : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %258 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%257 : tensor<1x1x1xf32>) outs(%157 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.divf %in, %cst_4 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %259 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%258 : tensor<1x1x1xf32>) outs(%157 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.truncf %cst_2 : f64 to f32 | |
| %789 = arith.addf %in, %788 : f32 | |
| linalg.yield %789 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %260 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%259 : tensor<1x1x1xf32>) outs(%157 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = math.rsqrt %in : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %261 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%255, %260 : tensor<1x1x512xf32>, tensor<1x1x1xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.mulf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %262 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%261, %89 : tensor<1x1x512xf32>, tensor<512xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.mulf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %263 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%90 : tensor<512x512xf32>) outs(%165 : tensor<512x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<512x512xf32> | |
| %collapsed_37 = tensor.collapse_shape %262 [[0], [1, 2]] : tensor<1x1x512xf32> into tensor<1x512xf32> | |
| %264 = linalg.matmul ins(%collapsed_37, %263 : tensor<1x512xf32>, tensor<512x512xf32>) outs(%168 : tensor<1x512xf32>) -> tensor<1x512xf32> | |
| %265 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%91 : tensor<512x512xf32>) outs(%165 : tensor<512x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<512x512xf32> | |
| %266 = linalg.matmul ins(%collapsed_37, %265 : tensor<1x512xf32>, tensor<512x512xf32>) outs(%168 : tensor<1x512xf32>) -> tensor<1x512xf32> | |
| %267 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%92 : tensor<512x512xf32>) outs(%165 : tensor<512x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<512x512xf32> | |
| %268 = linalg.matmul ins(%collapsed_37, %267 : tensor<1x512xf32>, tensor<512x512xf32>) outs(%168 : tensor<1x512xf32>) -> tensor<1x512xf32> | |
| %expanded_38 = tensor.expand_shape %268 [[0], [1, 2, 3]] : tensor<1x512xf32> into tensor<1x1x8x64xf32> | |
| %expanded_39 = tensor.expand_shape %264 [[0], [1, 2, 3, 4]] : tensor<1x512xf32> into tensor<1x1x8x32x2xf32> | |
| %269 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} outs(%174 : tensor<1x1x8x32xcomplex<f64>>) { | |
| ^bb0(%out: complex<f64>): | |
| %788 = linalg.index 0 : index | |
| %789 = linalg.index 1 : index | |
| %790 = linalg.index 2 : index | |
| %791 = linalg.index 3 : index | |
| %extracted = tensor.extract %expanded_39[%788, %789, %790, %791, %c0] : tensor<1x1x8x32x2xf32> | |
| %extracted_269 = tensor.extract %expanded_39[%788, %789, %790, %791, %c1] : tensor<1x1x8x32x2xf32> | |
| %792 = arith.extf %extracted : f32 to f64 | |
| %793 = arith.extf %extracted_269 : f32 to f64 | |
| %794 = complex.create %792, %793 : complex<f64> | |
| linalg.yield %794 : complex<f64> | |
| } -> tensor<1x1x8x32xcomplex<f64>> | |
| %expanded_40 = tensor.expand_shape %266 [[0], [1, 2, 3, 4]] : tensor<1x512xf32> into tensor<1x1x8x32x2xf32> | |
| %270 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} outs(%174 : tensor<1x1x8x32xcomplex<f64>>) { | |
| ^bb0(%out: complex<f64>): | |
| %788 = linalg.index 0 : index | |
| %789 = linalg.index 1 : index | |
| %790 = linalg.index 2 : index | |
| %791 = linalg.index 3 : index | |
| %extracted = tensor.extract %expanded_40[%788, %789, %790, %791, %c0] : tensor<1x1x8x32x2xf32> | |
| %extracted_269 = tensor.extract %expanded_40[%788, %789, %790, %791, %c1] : tensor<1x1x8x32x2xf32> | |
| %792 = arith.extf %extracted : f32 to f64 | |
| %793 = arith.extf %extracted_269 : f32 to f64 | |
| %794 = complex.create %792, %793 : complex<f64> | |
| linalg.yield %794 : complex<f64> | |
| } -> tensor<1x1x8x32xcomplex<f64>> | |
| %271 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, 0, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (0, 0, 0, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%269, %expanded_8 : tensor<1x1x8x32xcomplex<f64>>, tensor<1x1x1x32xcomplex<f64>>) outs(%174 : tensor<1x1x8x32xcomplex<f64>>) { | |
| ^bb0(%in: complex<f64>, %in_269: complex<f64>, %out: complex<f64>): | |
| %788 = complex.mul %in, %in_269 : complex<f64> | |
| linalg.yield %788 : complex<f64> | |
| } -> tensor<1x1x8x32xcomplex<f64>> | |
| %272 = torch_c.from_builtin_tensor %271 : tensor<1x1x8x32xcomplex<f64>> -> !torch.vtensor<[1,1,8,32],complex<f64>> | |
| %273 = torch.aten.view_as_real %272 : !torch.vtensor<[1,1,8,32],complex<f64>> -> !torch.vtensor<[1,1,8,32,2],f32> | |
| %274 = torch_c.to_builtin_tensor %273 : !torch.vtensor<[1,1,8,32,2],f32> -> tensor<1x1x8x32x2xf32> | |
| %extracted_slice_41 = tensor.extract_slice %274[0, 0, 0, 0, 0] [1, 1, 8, 32, 1] [1, 1, 1, 1, 1] : tensor<1x1x8x32x2xf32> to tensor<1x1x8x32x1xf32> | |
| %275 = torch.aten.view_as_real %272 : !torch.vtensor<[1,1,8,32],complex<f64>> -> !torch.vtensor<[1,1,8,32,2],f32> | |
| %276 = torch_c.to_builtin_tensor %275 : !torch.vtensor<[1,1,8,32,2],f32> -> tensor<1x1x8x32x2xf32> | |
| %extracted_slice_42 = tensor.extract_slice %276[0, 0, 0, 0, 1] [1, 1, 8, 32, 1] [1, 1, 1, 1, 1] : tensor<1x1x8x32x2xf32> to tensor<1x1x8x32x1xf32> | |
| %collapsed_43 = tensor.collapse_shape %extracted_slice_41 [[0, 1, 2, 3], [4]] : tensor<1x1x8x32x1xf32> into tensor<256x1xf32> | |
| %277 = torch_c.from_builtin_tensor %collapsed_43 : tensor<256x1xf32> -> !torch.vtensor<[256,1],f32> | |
| %collapsed_44 = tensor.collapse_shape %extracted_slice_42 [[0, 1, 2, 3], [4]] : tensor<1x1x8x32x1xf32> into tensor<256x1xf32> | |
| %278 = torch_c.from_builtin_tensor %collapsed_44 : tensor<256x1xf32> -> !torch.vtensor<[256,1],f32> | |
| %279 = torch_c.to_builtin_tensor %277 : !torch.vtensor<[256,1],f32> -> tensor<256x1xf32> | |
| %280 = torch_c.to_builtin_tensor %278 : !torch.vtensor<[256,1],f32> -> tensor<256x1xf32> | |
| %inserted_slice_45 = tensor.insert_slice %279 into %189[0, 0] [256, 1] [1, 1] : tensor<256x1xf32> into tensor<256x2xf32> | |
| %inserted_slice_46 = tensor.insert_slice %280 into %inserted_slice_45[0, 1] [256, 1] [1, 1] : tensor<256x1xf32> into tensor<256x2xf32> | |
| %expanded_47 = tensor.expand_shape %inserted_slice_46 [[0, 1, 2, 3], [4]] : tensor<256x2xf32> into tensor<1x1x8x32x2xf32> | |
| %collapsed_48 = tensor.collapse_shape %expanded_47 [[0], [1], [2], [3, 4]] : tensor<1x1x8x32x2xf32> into tensor<1x1x8x64xf32> | |
| %281 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, 0, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (0, 0, 0, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%270, %expanded_8 : tensor<1x1x8x32xcomplex<f64>>, tensor<1x1x1x32xcomplex<f64>>) outs(%174 : tensor<1x1x8x32xcomplex<f64>>) { | |
| ^bb0(%in: complex<f64>, %in_269: complex<f64>, %out: complex<f64>): | |
| %788 = complex.mul %in, %in_269 : complex<f64> | |
| linalg.yield %788 : complex<f64> | |
| } -> tensor<1x1x8x32xcomplex<f64>> | |
| %282 = torch_c.from_builtin_tensor %281 : tensor<1x1x8x32xcomplex<f64>> -> !torch.vtensor<[1,1,8,32],complex<f64>> | |
| %283 = torch.aten.view_as_real %282 : !torch.vtensor<[1,1,8,32],complex<f64>> -> !torch.vtensor<[1,1,8,32,2],f32> | |
| %284 = torch_c.to_builtin_tensor %283 : !torch.vtensor<[1,1,8,32,2],f32> -> tensor<1x1x8x32x2xf32> | |
| %extracted_slice_49 = tensor.extract_slice %284[0, 0, 0, 0, 0] [1, 1, 8, 32, 1] [1, 1, 1, 1, 1] : tensor<1x1x8x32x2xf32> to tensor<1x1x8x32x1xf32> | |
| %285 = torch.aten.view_as_real %282 : !torch.vtensor<[1,1,8,32],complex<f64>> -> !torch.vtensor<[1,1,8,32,2],f32> | |
| %286 = torch_c.to_builtin_tensor %285 : !torch.vtensor<[1,1,8,32,2],f32> -> tensor<1x1x8x32x2xf32> | |
| %extracted_slice_50 = tensor.extract_slice %286[0, 0, 0, 0, 1] [1, 1, 8, 32, 1] [1, 1, 1, 1, 1] : tensor<1x1x8x32x2xf32> to tensor<1x1x8x32x1xf32> | |
| %collapsed_51 = tensor.collapse_shape %extracted_slice_49 [[0, 1, 2, 3], [4]] : tensor<1x1x8x32x1xf32> into tensor<256x1xf32> | |
| %287 = torch_c.from_builtin_tensor %collapsed_51 : tensor<256x1xf32> -> !torch.vtensor<[256,1],f32> | |
| %collapsed_52 = tensor.collapse_shape %extracted_slice_50 [[0, 1, 2, 3], [4]] : tensor<1x1x8x32x1xf32> into tensor<256x1xf32> | |
| %288 = torch_c.from_builtin_tensor %collapsed_52 : tensor<256x1xf32> -> !torch.vtensor<[256,1],f32> | |
| %289 = torch_c.to_builtin_tensor %287 : !torch.vtensor<[256,1],f32> -> tensor<256x1xf32> | |
| %290 = torch_c.to_builtin_tensor %288 : !torch.vtensor<[256,1],f32> -> tensor<256x1xf32> | |
| %inserted_slice_53 = tensor.insert_slice %289 into %189[0, 0] [256, 1] [1, 1] : tensor<256x1xf32> into tensor<256x2xf32> | |
| %inserted_slice_54 = tensor.insert_slice %290 into %inserted_slice_53[0, 1] [256, 1] [1, 1] : tensor<256x1xf32> into tensor<256x2xf32> | |
| %expanded_55 = tensor.expand_shape %inserted_slice_54 [[0, 1, 2, 3], [4]] : tensor<256x2xf32> into tensor<1x1x8x32x2xf32> | |
| %collapsed_56 = tensor.collapse_shape %expanded_55 [[0], [1], [2], [3, 4]] : tensor<1x1x8x32x2xf32> into tensor<1x1x8x64xf32> | |
| %291 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, 0, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%collapsed_56 : tensor<1x1x8x64xf32>) outs(%200 : tensor<1x1x8x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x1x8x64xf32> | |
| %292 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, 0, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_38 : tensor<1x1x8x64xf32>) outs(%200 : tensor<1x1x8x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x1x8x64xf32> | |
| %293 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d2, d1, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%collapsed_48 : tensor<1x1x8x64xf32>) outs(%203 : tensor<1x8x1x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x1x64xf32> | |
| %294 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, 0, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%293 : tensor<1x8x1x64xf32>) outs(%203 : tensor<1x8x1x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x1x64xf32> | |
| %collapsed_57 = tensor.collapse_shape %294 [[0, 1], [2], [3]] : tensor<1x8x1x64xf32> into tensor<8x1x64xf32> | |
| %295 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d2, d1, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%291 : tensor<1x1x8x64xf32>) outs(%203 : tensor<1x8x1x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x1x64xf32> | |
| %296 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d3, d2)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%295 : tensor<1x8x1x64xf32>) outs(%207 : tensor<1x8x64x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x64x1xf32> | |
| %297 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, d2, 0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%296 : tensor<1x8x64x1xf32>) outs(%207 : tensor<1x8x64x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x64x1xf32> | |
| %collapsed_58 = tensor.collapse_shape %297 [[0, 1], [2], [3]] : tensor<1x8x64x1xf32> into tensor<8x64x1xf32> | |
| %298 = linalg.batch_matmul ins(%collapsed_57, %collapsed_58 : tensor<8x1x64xf32>, tensor<8x64x1xf32>) outs(%211 : tensor<8x1x1xf32>) -> tensor<8x1x1xf32> | |
| %expanded_59 = tensor.expand_shape %298 [[0, 1], [2], [3]] : tensor<8x1x1xf32> into tensor<1x8x1x1xf32> | |
| %299 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_59 : tensor<1x8x1x1xf32>) outs(%213 : tensor<1x8x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.divf %in, %cst_5 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x8x1x1xf32> | |
| %300:2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, 0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, 0)>], iterator_types = ["parallel", "parallel", "parallel", "reduction"]} ins(%299 : tensor<1x8x1x1xf32>) outs(%217, %216 : tensor<1x8x1x1xf32>, tensor<1x8x1x1xi64>) { | |
| ^bb0(%in: f32, %out: f32, %out_269: i64): | |
| %788 = linalg.index 3 : index | |
| %789 = arith.index_cast %788 : index to i64 | |
| %790 = arith.maxf %in, %out : f32 | |
| %791 = arith.cmpf ogt, %in, %out : f32 | |
| %792 = arith.select %791, %789, %out_269 : i64 | |
| linalg.yield %790, %792 : f32, i64 | |
| } -> (tensor<1x8x1x1xf32>, tensor<1x8x1x1xi64>) | |
| %301 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>, affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%299, %300#0 : tensor<1x8x1x1xf32>, tensor<1x8x1x1xf32>) outs(%213 : tensor<1x8x1x1xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.subf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x8x1x1xf32> | |
| %302 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%301 : tensor<1x8x1x1xf32>) outs(%213 : tensor<1x8x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = math.exp %in : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x8x1x1xf32> | |
| %303 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, 0)>], iterator_types = ["parallel", "parallel", "parallel", "reduction"]} ins(%302 : tensor<1x8x1x1xf32>) outs(%221 : tensor<1x8x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.addf %in, %out : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x8x1x1xf32> | |
| %304 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>, affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%302, %303 : tensor<1x8x1x1xf32>, tensor<1x8x1x1xf32>) outs(%213 : tensor<1x8x1x1xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.divf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x8x1x1xf32> | |
| %305 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%304 : tensor<1x8x1x1xf32>) outs(%213 : tensor<1x8x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x1x1xf32> | |
| %collapsed_60 = tensor.collapse_shape %305 [[0, 1], [2], [3]] : tensor<1x8x1x1xf32> into tensor<8x1x1xf32> | |
| %306 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d2, d1, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%292 : tensor<1x1x8x64xf32>) outs(%203 : tensor<1x8x1x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x1x64xf32> | |
| %307 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, 0, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%306 : tensor<1x8x1x64xf32>) outs(%203 : tensor<1x8x1x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x1x64xf32> | |
| %collapsed_61 = tensor.collapse_shape %307 [[0, 1], [2], [3]] : tensor<1x8x1x64xf32> into tensor<8x1x64xf32> | |
| %308 = linalg.batch_matmul ins(%collapsed_60, %collapsed_61 : tensor<8x1x1xf32>, tensor<8x1x64xf32>) outs(%228 : tensor<8x1x64xf32>) -> tensor<8x1x64xf32> | |
| %expanded_62 = tensor.expand_shape %308 [[0, 1], [2], [3]] : tensor<8x1x64xf32> into tensor<1x8x1x64xf32> | |
| %309 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d2, d1, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_62 : tensor<1x8x1x64xf32>) outs(%200 : tensor<1x1x8x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x1x8x64xf32> | |
| %310 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%93 : tensor<512x512xf32>) outs(%165 : tensor<512x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<512x512xf32> | |
| %collapsed_63 = tensor.collapse_shape %309 [[0], [1, 2, 3]] : tensor<1x1x8x64xf32> into tensor<1x512xf32> | |
| %311 = linalg.matmul ins(%collapsed_63, %310 : tensor<1x512xf32>, tensor<512x512xf32>) outs(%168 : tensor<1x512xf32>) -> tensor<1x512xf32> | |
| %expanded_64 = tensor.expand_shape %311 [[0], [1, 2]] : tensor<1x512xf32> into tensor<1x1x512xf32> | |
| %312 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%255, %expanded_64 : tensor<1x1x512xf32>, tensor<1x1x512xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.addf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %313 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%312 : tensor<1x1x512xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = math.powf %in, %cst_3 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %314 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, 0)>], iterator_types = ["parallel", "parallel", "reduction"]} ins(%313 : tensor<1x1x512xf32>) outs(%158 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.addf %in, %out : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %315 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%314 : tensor<1x1x1xf32>) outs(%157 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.divf %in, %cst_4 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %316 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%315 : tensor<1x1x1xf32>) outs(%157 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.truncf %cst_2 : f64 to f32 | |
| %789 = arith.addf %in, %788 : f32 | |
| linalg.yield %789 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %317 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%316 : tensor<1x1x1xf32>) outs(%157 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = math.rsqrt %in : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %318 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%312, %317 : tensor<1x1x512xf32>, tensor<1x1x1xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.mulf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %319 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%318, %94 : tensor<1x1x512xf32>, tensor<512xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.mulf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %320 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%95 : tensor<1536x512xf32>) outs(%241 : tensor<512x1536xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<512x1536xf32> | |
| %collapsed_65 = tensor.collapse_shape %319 [[0], [1, 2]] : tensor<1x1x512xf32> into tensor<1x512xf32> | |
| %321 = linalg.matmul ins(%collapsed_65, %320 : tensor<1x512xf32>, tensor<512x1536xf32>) outs(%244 : tensor<1x1536xf32>) -> tensor<1x1536xf32> | |
| %expanded_66 = tensor.expand_shape %321 [[0], [1, 2]] : tensor<1x1536xf32> into tensor<1x1x1536xf32> | |
| %322 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%expanded_66 : tensor<1x1x1536xf32>) outs(%246 : tensor<1x1x1536xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.negf %in : f32 | |
| %789 = math.exp %788 : f32 | |
| %790 = arith.addf %789, %cst_1 : f32 | |
| %791 = arith.divf %cst_1, %790 : f32 | |
| linalg.yield %791 : f32 | |
| } -> tensor<1x1x1536xf32> | |
| %323 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%322, %expanded_66 : tensor<1x1x1536xf32>, tensor<1x1x1536xf32>) outs(%246 : tensor<1x1x1536xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.mulf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1536xf32> | |
| %324 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%96 : tensor<1536x512xf32>) outs(%241 : tensor<512x1536xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<512x1536xf32> | |
| %325 = linalg.matmul ins(%collapsed_65, %324 : tensor<1x512xf32>, tensor<512x1536xf32>) outs(%244 : tensor<1x1536xf32>) -> tensor<1x1536xf32> | |
| %expanded_67 = tensor.expand_shape %325 [[0], [1, 2]] : tensor<1x1536xf32> into tensor<1x1x1536xf32> | |
| %326 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%323, %expanded_67 : tensor<1x1x1536xf32>, tensor<1x1x1536xf32>) outs(%246 : tensor<1x1x1536xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.mulf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1536xf32> | |
| %327 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%97 : tensor<512x1536xf32>) outs(%252 : tensor<1536x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1536x512xf32> | |
| %collapsed_68 = tensor.collapse_shape %326 [[0], [1, 2]] : tensor<1x1x1536xf32> into tensor<1x1536xf32> | |
| %328 = linalg.matmul ins(%collapsed_68, %327 : tensor<1x1536xf32>, tensor<1536x512xf32>) outs(%168 : tensor<1x512xf32>) -> tensor<1x512xf32> | |
| %expanded_69 = tensor.expand_shape %328 [[0], [1, 2]] : tensor<1x512xf32> into tensor<1x1x512xf32> | |
| %329 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%312, %expanded_69 : tensor<1x1x512xf32>, tensor<1x1x512xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.addf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %330 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%329 : tensor<1x1x512xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = math.powf %in, %cst_3 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %331 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, 0)>], iterator_types = ["parallel", "parallel", "reduction"]} ins(%330 : tensor<1x1x512xf32>) outs(%158 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.addf %in, %out : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %332 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%331 : tensor<1x1x1xf32>) outs(%157 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.divf %in, %cst_4 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %333 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%332 : tensor<1x1x1xf32>) outs(%157 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.truncf %cst_2 : f64 to f32 | |
| %789 = arith.addf %in, %788 : f32 | |
| linalg.yield %789 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %334 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%333 : tensor<1x1x1xf32>) outs(%157 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = math.rsqrt %in : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %335 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%329, %334 : tensor<1x1x512xf32>, tensor<1x1x1xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.mulf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %336 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%335, %98 : tensor<1x1x512xf32>, tensor<512xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.mulf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %337 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%99 : tensor<512x512xf32>) outs(%165 : tensor<512x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<512x512xf32> | |
| %collapsed_70 = tensor.collapse_shape %336 [[0], [1, 2]] : tensor<1x1x512xf32> into tensor<1x512xf32> | |
| %338 = linalg.matmul ins(%collapsed_70, %337 : tensor<1x512xf32>, tensor<512x512xf32>) outs(%168 : tensor<1x512xf32>) -> tensor<1x512xf32> | |
| %339 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%100 : tensor<512x512xf32>) outs(%165 : tensor<512x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<512x512xf32> | |
| %340 = linalg.matmul ins(%collapsed_70, %339 : tensor<1x512xf32>, tensor<512x512xf32>) outs(%168 : tensor<1x512xf32>) -> tensor<1x512xf32> | |
| %341 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%101 : tensor<512x512xf32>) outs(%165 : tensor<512x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<512x512xf32> | |
| %342 = linalg.matmul ins(%collapsed_70, %341 : tensor<1x512xf32>, tensor<512x512xf32>) outs(%168 : tensor<1x512xf32>) -> tensor<1x512xf32> | |
| %expanded_71 = tensor.expand_shape %342 [[0], [1, 2, 3]] : tensor<1x512xf32> into tensor<1x1x8x64xf32> | |
| %expanded_72 = tensor.expand_shape %338 [[0], [1, 2, 3, 4]] : tensor<1x512xf32> into tensor<1x1x8x32x2xf32> | |
| %343 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} outs(%174 : tensor<1x1x8x32xcomplex<f64>>) { | |
| ^bb0(%out: complex<f64>): | |
| %788 = linalg.index 0 : index | |
| %789 = linalg.index 1 : index | |
| %790 = linalg.index 2 : index | |
| %791 = linalg.index 3 : index | |
| %extracted = tensor.extract %expanded_72[%788, %789, %790, %791, %c0] : tensor<1x1x8x32x2xf32> | |
| %extracted_269 = tensor.extract %expanded_72[%788, %789, %790, %791, %c1] : tensor<1x1x8x32x2xf32> | |
| %792 = arith.extf %extracted : f32 to f64 | |
| %793 = arith.extf %extracted_269 : f32 to f64 | |
| %794 = complex.create %792, %793 : complex<f64> | |
| linalg.yield %794 : complex<f64> | |
| } -> tensor<1x1x8x32xcomplex<f64>> | |
| %expanded_73 = tensor.expand_shape %340 [[0], [1, 2, 3, 4]] : tensor<1x512xf32> into tensor<1x1x8x32x2xf32> | |
| %344 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} outs(%174 : tensor<1x1x8x32xcomplex<f64>>) { | |
| ^bb0(%out: complex<f64>): | |
| %788 = linalg.index 0 : index | |
| %789 = linalg.index 1 : index | |
| %790 = linalg.index 2 : index | |
| %791 = linalg.index 3 : index | |
| %extracted = tensor.extract %expanded_73[%788, %789, %790, %791, %c0] : tensor<1x1x8x32x2xf32> | |
| %extracted_269 = tensor.extract %expanded_73[%788, %789, %790, %791, %c1] : tensor<1x1x8x32x2xf32> | |
| %792 = arith.extf %extracted : f32 to f64 | |
| %793 = arith.extf %extracted_269 : f32 to f64 | |
| %794 = complex.create %792, %793 : complex<f64> | |
| linalg.yield %794 : complex<f64> | |
| } -> tensor<1x1x8x32xcomplex<f64>> | |
| %345 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, 0, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (0, 0, 0, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%343, %expanded_8 : tensor<1x1x8x32xcomplex<f64>>, tensor<1x1x1x32xcomplex<f64>>) outs(%174 : tensor<1x1x8x32xcomplex<f64>>) { | |
| ^bb0(%in: complex<f64>, %in_269: complex<f64>, %out: complex<f64>): | |
| %788 = complex.mul %in, %in_269 : complex<f64> | |
| linalg.yield %788 : complex<f64> | |
| } -> tensor<1x1x8x32xcomplex<f64>> | |
| %346 = torch_c.from_builtin_tensor %345 : tensor<1x1x8x32xcomplex<f64>> -> !torch.vtensor<[1,1,8,32],complex<f64>> | |
| %347 = torch.aten.view_as_real %346 : !torch.vtensor<[1,1,8,32],complex<f64>> -> !torch.vtensor<[1,1,8,32,2],f32> | |
| %348 = torch_c.to_builtin_tensor %347 : !torch.vtensor<[1,1,8,32,2],f32> -> tensor<1x1x8x32x2xf32> | |
| %extracted_slice_74 = tensor.extract_slice %348[0, 0, 0, 0, 0] [1, 1, 8, 32, 1] [1, 1, 1, 1, 1] : tensor<1x1x8x32x2xf32> to tensor<1x1x8x32x1xf32> | |
| %349 = torch.aten.view_as_real %346 : !torch.vtensor<[1,1,8,32],complex<f64>> -> !torch.vtensor<[1,1,8,32,2],f32> | |
| %350 = torch_c.to_builtin_tensor %349 : !torch.vtensor<[1,1,8,32,2],f32> -> tensor<1x1x8x32x2xf32> | |
| %extracted_slice_75 = tensor.extract_slice %350[0, 0, 0, 0, 1] [1, 1, 8, 32, 1] [1, 1, 1, 1, 1] : tensor<1x1x8x32x2xf32> to tensor<1x1x8x32x1xf32> | |
| %collapsed_76 = tensor.collapse_shape %extracted_slice_74 [[0, 1, 2, 3], [4]] : tensor<1x1x8x32x1xf32> into tensor<256x1xf32> | |
| %351 = torch_c.from_builtin_tensor %collapsed_76 : tensor<256x1xf32> -> !torch.vtensor<[256,1],f32> | |
| %collapsed_77 = tensor.collapse_shape %extracted_slice_75 [[0, 1, 2, 3], [4]] : tensor<1x1x8x32x1xf32> into tensor<256x1xf32> | |
| %352 = torch_c.from_builtin_tensor %collapsed_77 : tensor<256x1xf32> -> !torch.vtensor<[256,1],f32> | |
| %353 = torch_c.to_builtin_tensor %351 : !torch.vtensor<[256,1],f32> -> tensor<256x1xf32> | |
| %354 = torch_c.to_builtin_tensor %352 : !torch.vtensor<[256,1],f32> -> tensor<256x1xf32> | |
| %inserted_slice_78 = tensor.insert_slice %353 into %189[0, 0] [256, 1] [1, 1] : tensor<256x1xf32> into tensor<256x2xf32> | |
| %inserted_slice_79 = tensor.insert_slice %354 into %inserted_slice_78[0, 1] [256, 1] [1, 1] : tensor<256x1xf32> into tensor<256x2xf32> | |
| %expanded_80 = tensor.expand_shape %inserted_slice_79 [[0, 1, 2, 3], [4]] : tensor<256x2xf32> into tensor<1x1x8x32x2xf32> | |
| %collapsed_81 = tensor.collapse_shape %expanded_80 [[0], [1], [2], [3, 4]] : tensor<1x1x8x32x2xf32> into tensor<1x1x8x64xf32> | |
| %355 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, 0, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (0, 0, 0, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%344, %expanded_8 : tensor<1x1x8x32xcomplex<f64>>, tensor<1x1x1x32xcomplex<f64>>) outs(%174 : tensor<1x1x8x32xcomplex<f64>>) { | |
| ^bb0(%in: complex<f64>, %in_269: complex<f64>, %out: complex<f64>): | |
| %788 = complex.mul %in, %in_269 : complex<f64> | |
| linalg.yield %788 : complex<f64> | |
| } -> tensor<1x1x8x32xcomplex<f64>> | |
| %356 = torch_c.from_builtin_tensor %355 : tensor<1x1x8x32xcomplex<f64>> -> !torch.vtensor<[1,1,8,32],complex<f64>> | |
| %357 = torch.aten.view_as_real %356 : !torch.vtensor<[1,1,8,32],complex<f64>> -> !torch.vtensor<[1,1,8,32,2],f32> | |
| %358 = torch_c.to_builtin_tensor %357 : !torch.vtensor<[1,1,8,32,2],f32> -> tensor<1x1x8x32x2xf32> | |
| %extracted_slice_82 = tensor.extract_slice %358[0, 0, 0, 0, 0] [1, 1, 8, 32, 1] [1, 1, 1, 1, 1] : tensor<1x1x8x32x2xf32> to tensor<1x1x8x32x1xf32> | |
| %359 = torch.aten.view_as_real %356 : !torch.vtensor<[1,1,8,32],complex<f64>> -> !torch.vtensor<[1,1,8,32,2],f32> | |
| %360 = torch_c.to_builtin_tensor %359 : !torch.vtensor<[1,1,8,32,2],f32> -> tensor<1x1x8x32x2xf32> | |
| %extracted_slice_83 = tensor.extract_slice %360[0, 0, 0, 0, 1] [1, 1, 8, 32, 1] [1, 1, 1, 1, 1] : tensor<1x1x8x32x2xf32> to tensor<1x1x8x32x1xf32> | |
| %collapsed_84 = tensor.collapse_shape %extracted_slice_82 [[0, 1, 2, 3], [4]] : tensor<1x1x8x32x1xf32> into tensor<256x1xf32> | |
| %361 = torch_c.from_builtin_tensor %collapsed_84 : tensor<256x1xf32> -> !torch.vtensor<[256,1],f32> | |
| %collapsed_85 = tensor.collapse_shape %extracted_slice_83 [[0, 1, 2, 3], [4]] : tensor<1x1x8x32x1xf32> into tensor<256x1xf32> | |
| %362 = torch_c.from_builtin_tensor %collapsed_85 : tensor<256x1xf32> -> !torch.vtensor<[256,1],f32> | |
| %363 = torch_c.to_builtin_tensor %361 : !torch.vtensor<[256,1],f32> -> tensor<256x1xf32> | |
| %364 = torch_c.to_builtin_tensor %362 : !torch.vtensor<[256,1],f32> -> tensor<256x1xf32> | |
| %inserted_slice_86 = tensor.insert_slice %363 into %189[0, 0] [256, 1] [1, 1] : tensor<256x1xf32> into tensor<256x2xf32> | |
| %inserted_slice_87 = tensor.insert_slice %364 into %inserted_slice_86[0, 1] [256, 1] [1, 1] : tensor<256x1xf32> into tensor<256x2xf32> | |
| %expanded_88 = tensor.expand_shape %inserted_slice_87 [[0, 1, 2, 3], [4]] : tensor<256x2xf32> into tensor<1x1x8x32x2xf32> | |
| %collapsed_89 = tensor.collapse_shape %expanded_88 [[0], [1], [2], [3, 4]] : tensor<1x1x8x32x2xf32> into tensor<1x1x8x64xf32> | |
| %365 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, 0, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%collapsed_89 : tensor<1x1x8x64xf32>) outs(%200 : tensor<1x1x8x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x1x8x64xf32> | |
| %366 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, 0, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_71 : tensor<1x1x8x64xf32>) outs(%200 : tensor<1x1x8x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x1x8x64xf32> | |
| %367 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d2, d1, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%collapsed_81 : tensor<1x1x8x64xf32>) outs(%203 : tensor<1x8x1x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x1x64xf32> | |
| %368 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, 0, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%367 : tensor<1x8x1x64xf32>) outs(%203 : tensor<1x8x1x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x1x64xf32> | |
| %collapsed_90 = tensor.collapse_shape %368 [[0, 1], [2], [3]] : tensor<1x8x1x64xf32> into tensor<8x1x64xf32> | |
| %369 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d2, d1, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%365 : tensor<1x1x8x64xf32>) outs(%203 : tensor<1x8x1x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x1x64xf32> | |
| %370 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d3, d2)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%369 : tensor<1x8x1x64xf32>) outs(%207 : tensor<1x8x64x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x64x1xf32> | |
| %371 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, d2, 0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%370 : tensor<1x8x64x1xf32>) outs(%207 : tensor<1x8x64x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x64x1xf32> | |
| %collapsed_91 = tensor.collapse_shape %371 [[0, 1], [2], [3]] : tensor<1x8x64x1xf32> into tensor<8x64x1xf32> | |
| %372 = linalg.batch_matmul ins(%collapsed_90, %collapsed_91 : tensor<8x1x64xf32>, tensor<8x64x1xf32>) outs(%211 : tensor<8x1x1xf32>) -> tensor<8x1x1xf32> | |
| %expanded_92 = tensor.expand_shape %372 [[0, 1], [2], [3]] : tensor<8x1x1xf32> into tensor<1x8x1x1xf32> | |
| %373 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_92 : tensor<1x8x1x1xf32>) outs(%213 : tensor<1x8x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.divf %in, %cst_5 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x8x1x1xf32> | |
| %374:2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, 0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, 0)>], iterator_types = ["parallel", "parallel", "parallel", "reduction"]} ins(%373 : tensor<1x8x1x1xf32>) outs(%217, %216 : tensor<1x8x1x1xf32>, tensor<1x8x1x1xi64>) { | |
| ^bb0(%in: f32, %out: f32, %out_269: i64): | |
| %788 = linalg.index 3 : index | |
| %789 = arith.index_cast %788 : index to i64 | |
| %790 = arith.maxf %in, %out : f32 | |
| %791 = arith.cmpf ogt, %in, %out : f32 | |
| %792 = arith.select %791, %789, %out_269 : i64 | |
| linalg.yield %790, %792 : f32, i64 | |
| } -> (tensor<1x8x1x1xf32>, tensor<1x8x1x1xi64>) | |
| %375 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>, affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%373, %374#0 : tensor<1x8x1x1xf32>, tensor<1x8x1x1xf32>) outs(%213 : tensor<1x8x1x1xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.subf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x8x1x1xf32> | |
| %376 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%375 : tensor<1x8x1x1xf32>) outs(%213 : tensor<1x8x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = math.exp %in : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x8x1x1xf32> | |
| %377 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, 0)>], iterator_types = ["parallel", "parallel", "parallel", "reduction"]} ins(%376 : tensor<1x8x1x1xf32>) outs(%221 : tensor<1x8x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.addf %in, %out : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x8x1x1xf32> | |
| %378 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>, affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%376, %377 : tensor<1x8x1x1xf32>, tensor<1x8x1x1xf32>) outs(%213 : tensor<1x8x1x1xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.divf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x8x1x1xf32> | |
| %379 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%378 : tensor<1x8x1x1xf32>) outs(%213 : tensor<1x8x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x1x1xf32> | |
| %collapsed_93 = tensor.collapse_shape %379 [[0, 1], [2], [3]] : tensor<1x8x1x1xf32> into tensor<8x1x1xf32> | |
| %380 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d2, d1, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%366 : tensor<1x1x8x64xf32>) outs(%203 : tensor<1x8x1x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x1x64xf32> | |
| %381 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, 0, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%380 : tensor<1x8x1x64xf32>) outs(%203 : tensor<1x8x1x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x1x64xf32> | |
| %collapsed_94 = tensor.collapse_shape %381 [[0, 1], [2], [3]] : tensor<1x8x1x64xf32> into tensor<8x1x64xf32> | |
| %382 = linalg.batch_matmul ins(%collapsed_93, %collapsed_94 : tensor<8x1x1xf32>, tensor<8x1x64xf32>) outs(%228 : tensor<8x1x64xf32>) -> tensor<8x1x64xf32> | |
| %expanded_95 = tensor.expand_shape %382 [[0, 1], [2], [3]] : tensor<8x1x64xf32> into tensor<1x8x1x64xf32> | |
| %383 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d2, d1, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_95 : tensor<1x8x1x64xf32>) outs(%200 : tensor<1x1x8x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x1x8x64xf32> | |
| %384 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%102 : tensor<512x512xf32>) outs(%165 : tensor<512x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<512x512xf32> | |
| %collapsed_96 = tensor.collapse_shape %383 [[0], [1, 2, 3]] : tensor<1x1x8x64xf32> into tensor<1x512xf32> | |
| %385 = linalg.matmul ins(%collapsed_96, %384 : tensor<1x512xf32>, tensor<512x512xf32>) outs(%168 : tensor<1x512xf32>) -> tensor<1x512xf32> | |
| %expanded_97 = tensor.expand_shape %385 [[0], [1, 2]] : tensor<1x512xf32> into tensor<1x1x512xf32> | |
| %386 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%329, %expanded_97 : tensor<1x1x512xf32>, tensor<1x1x512xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.addf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %387 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%386 : tensor<1x1x512xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = math.powf %in, %cst_3 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %388 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, 0)>], iterator_types = ["parallel", "parallel", "reduction"]} ins(%387 : tensor<1x1x512xf32>) outs(%158 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.addf %in, %out : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %389 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%388 : tensor<1x1x1xf32>) outs(%157 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.divf %in, %cst_4 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %390 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%389 : tensor<1x1x1xf32>) outs(%157 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.truncf %cst_2 : f64 to f32 | |
| %789 = arith.addf %in, %788 : f32 | |
| linalg.yield %789 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %391 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%390 : tensor<1x1x1xf32>) outs(%157 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = math.rsqrt %in : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %392 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%386, %391 : tensor<1x1x512xf32>, tensor<1x1x1xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.mulf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %393 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%392, %103 : tensor<1x1x512xf32>, tensor<512xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.mulf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %394 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%104 : tensor<1536x512xf32>) outs(%241 : tensor<512x1536xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<512x1536xf32> | |
| %collapsed_98 = tensor.collapse_shape %393 [[0], [1, 2]] : tensor<1x1x512xf32> into tensor<1x512xf32> | |
| %395 = linalg.matmul ins(%collapsed_98, %394 : tensor<1x512xf32>, tensor<512x1536xf32>) outs(%244 : tensor<1x1536xf32>) -> tensor<1x1536xf32> | |
| %expanded_99 = tensor.expand_shape %395 [[0], [1, 2]] : tensor<1x1536xf32> into tensor<1x1x1536xf32> | |
| %396 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%expanded_99 : tensor<1x1x1536xf32>) outs(%246 : tensor<1x1x1536xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.negf %in : f32 | |
| %789 = math.exp %788 : f32 | |
| %790 = arith.addf %789, %cst_1 : f32 | |
| %791 = arith.divf %cst_1, %790 : f32 | |
| linalg.yield %791 : f32 | |
| } -> tensor<1x1x1536xf32> | |
| %397 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%396, %expanded_99 : tensor<1x1x1536xf32>, tensor<1x1x1536xf32>) outs(%246 : tensor<1x1x1536xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.mulf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1536xf32> | |
| %398 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%105 : tensor<1536x512xf32>) outs(%241 : tensor<512x1536xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<512x1536xf32> | |
| %399 = linalg.matmul ins(%collapsed_98, %398 : tensor<1x512xf32>, tensor<512x1536xf32>) outs(%244 : tensor<1x1536xf32>) -> tensor<1x1536xf32> | |
| %expanded_100 = tensor.expand_shape %399 [[0], [1, 2]] : tensor<1x1536xf32> into tensor<1x1x1536xf32> | |
| %400 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%397, %expanded_100 : tensor<1x1x1536xf32>, tensor<1x1x1536xf32>) outs(%246 : tensor<1x1x1536xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.mulf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1536xf32> | |
| %401 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%106 : tensor<512x1536xf32>) outs(%252 : tensor<1536x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1536x512xf32> | |
| %collapsed_101 = tensor.collapse_shape %400 [[0], [1, 2]] : tensor<1x1x1536xf32> into tensor<1x1536xf32> | |
| %402 = linalg.matmul ins(%collapsed_101, %401 : tensor<1x1536xf32>, tensor<1536x512xf32>) outs(%168 : tensor<1x512xf32>) -> tensor<1x512xf32> | |
| %expanded_102 = tensor.expand_shape %402 [[0], [1, 2]] : tensor<1x512xf32> into tensor<1x1x512xf32> | |
| %403 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%386, %expanded_102 : tensor<1x1x512xf32>, tensor<1x1x512xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.addf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %404 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%403 : tensor<1x1x512xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = math.powf %in, %cst_3 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %405 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, 0)>], iterator_types = ["parallel", "parallel", "reduction"]} ins(%404 : tensor<1x1x512xf32>) outs(%158 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.addf %in, %out : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %406 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%405 : tensor<1x1x1xf32>) outs(%157 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.divf %in, %cst_4 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %407 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%406 : tensor<1x1x1xf32>) outs(%157 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.truncf %cst_2 : f64 to f32 | |
| %789 = arith.addf %in, %788 : f32 | |
| linalg.yield %789 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %408 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%407 : tensor<1x1x1xf32>) outs(%157 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = math.rsqrt %in : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %409 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%403, %408 : tensor<1x1x512xf32>, tensor<1x1x1xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.mulf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %410 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%409, %107 : tensor<1x1x512xf32>, tensor<512xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.mulf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %411 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%108 : tensor<512x512xf32>) outs(%165 : tensor<512x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<512x512xf32> | |
| %collapsed_103 = tensor.collapse_shape %410 [[0], [1, 2]] : tensor<1x1x512xf32> into tensor<1x512xf32> | |
| %412 = linalg.matmul ins(%collapsed_103, %411 : tensor<1x512xf32>, tensor<512x512xf32>) outs(%168 : tensor<1x512xf32>) -> tensor<1x512xf32> | |
| %413 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%109 : tensor<512x512xf32>) outs(%165 : tensor<512x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<512x512xf32> | |
| %414 = linalg.matmul ins(%collapsed_103, %413 : tensor<1x512xf32>, tensor<512x512xf32>) outs(%168 : tensor<1x512xf32>) -> tensor<1x512xf32> | |
| %415 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%110 : tensor<512x512xf32>) outs(%165 : tensor<512x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<512x512xf32> | |
| %416 = linalg.matmul ins(%collapsed_103, %415 : tensor<1x512xf32>, tensor<512x512xf32>) outs(%168 : tensor<1x512xf32>) -> tensor<1x512xf32> | |
| %expanded_104 = tensor.expand_shape %416 [[0], [1, 2, 3]] : tensor<1x512xf32> into tensor<1x1x8x64xf32> | |
| %expanded_105 = tensor.expand_shape %412 [[0], [1, 2, 3, 4]] : tensor<1x512xf32> into tensor<1x1x8x32x2xf32> | |
| %417 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} outs(%174 : tensor<1x1x8x32xcomplex<f64>>) { | |
| ^bb0(%out: complex<f64>): | |
| %788 = linalg.index 0 : index | |
| %789 = linalg.index 1 : index | |
| %790 = linalg.index 2 : index | |
| %791 = linalg.index 3 : index | |
| %extracted = tensor.extract %expanded_105[%788, %789, %790, %791, %c0] : tensor<1x1x8x32x2xf32> | |
| %extracted_269 = tensor.extract %expanded_105[%788, %789, %790, %791, %c1] : tensor<1x1x8x32x2xf32> | |
| %792 = arith.extf %extracted : f32 to f64 | |
| %793 = arith.extf %extracted_269 : f32 to f64 | |
| %794 = complex.create %792, %793 : complex<f64> | |
| linalg.yield %794 : complex<f64> | |
| } -> tensor<1x1x8x32xcomplex<f64>> | |
| %expanded_106 = tensor.expand_shape %414 [[0], [1, 2, 3, 4]] : tensor<1x512xf32> into tensor<1x1x8x32x2xf32> | |
| %418 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} outs(%174 : tensor<1x1x8x32xcomplex<f64>>) { | |
| ^bb0(%out: complex<f64>): | |
| %788 = linalg.index 0 : index | |
| %789 = linalg.index 1 : index | |
| %790 = linalg.index 2 : index | |
| %791 = linalg.index 3 : index | |
| %extracted = tensor.extract %expanded_106[%788, %789, %790, %791, %c0] : tensor<1x1x8x32x2xf32> | |
| %extracted_269 = tensor.extract %expanded_106[%788, %789, %790, %791, %c1] : tensor<1x1x8x32x2xf32> | |
| %792 = arith.extf %extracted : f32 to f64 | |
| %793 = arith.extf %extracted_269 : f32 to f64 | |
| %794 = complex.create %792, %793 : complex<f64> | |
| linalg.yield %794 : complex<f64> | |
| } -> tensor<1x1x8x32xcomplex<f64>> | |
| %419 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, 0, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (0, 0, 0, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%417, %expanded_8 : tensor<1x1x8x32xcomplex<f64>>, tensor<1x1x1x32xcomplex<f64>>) outs(%174 : tensor<1x1x8x32xcomplex<f64>>) { | |
| ^bb0(%in: complex<f64>, %in_269: complex<f64>, %out: complex<f64>): | |
| %788 = complex.mul %in, %in_269 : complex<f64> | |
| linalg.yield %788 : complex<f64> | |
| } -> tensor<1x1x8x32xcomplex<f64>> | |
| %420 = torch_c.from_builtin_tensor %419 : tensor<1x1x8x32xcomplex<f64>> -> !torch.vtensor<[1,1,8,32],complex<f64>> | |
| %421 = torch.aten.view_as_real %420 : !torch.vtensor<[1,1,8,32],complex<f64>> -> !torch.vtensor<[1,1,8,32,2],f32> | |
| %422 = torch_c.to_builtin_tensor %421 : !torch.vtensor<[1,1,8,32,2],f32> -> tensor<1x1x8x32x2xf32> | |
| %extracted_slice_107 = tensor.extract_slice %422[0, 0, 0, 0, 0] [1, 1, 8, 32, 1] [1, 1, 1, 1, 1] : tensor<1x1x8x32x2xf32> to tensor<1x1x8x32x1xf32> | |
| %423 = torch.aten.view_as_real %420 : !torch.vtensor<[1,1,8,32],complex<f64>> -> !torch.vtensor<[1,1,8,32,2],f32> | |
| %424 = torch_c.to_builtin_tensor %423 : !torch.vtensor<[1,1,8,32,2],f32> -> tensor<1x1x8x32x2xf32> | |
| %extracted_slice_108 = tensor.extract_slice %424[0, 0, 0, 0, 1] [1, 1, 8, 32, 1] [1, 1, 1, 1, 1] : tensor<1x1x8x32x2xf32> to tensor<1x1x8x32x1xf32> | |
| %collapsed_109 = tensor.collapse_shape %extracted_slice_107 [[0, 1, 2, 3], [4]] : tensor<1x1x8x32x1xf32> into tensor<256x1xf32> | |
| %425 = torch_c.from_builtin_tensor %collapsed_109 : tensor<256x1xf32> -> !torch.vtensor<[256,1],f32> | |
| %collapsed_110 = tensor.collapse_shape %extracted_slice_108 [[0, 1, 2, 3], [4]] : tensor<1x1x8x32x1xf32> into tensor<256x1xf32> | |
| %426 = torch_c.from_builtin_tensor %collapsed_110 : tensor<256x1xf32> -> !torch.vtensor<[256,1],f32> | |
| %427 = torch_c.to_builtin_tensor %425 : !torch.vtensor<[256,1],f32> -> tensor<256x1xf32> | |
| %428 = torch_c.to_builtin_tensor %426 : !torch.vtensor<[256,1],f32> -> tensor<256x1xf32> | |
| %inserted_slice_111 = tensor.insert_slice %427 into %189[0, 0] [256, 1] [1, 1] : tensor<256x1xf32> into tensor<256x2xf32> | |
| %inserted_slice_112 = tensor.insert_slice %428 into %inserted_slice_111[0, 1] [256, 1] [1, 1] : tensor<256x1xf32> into tensor<256x2xf32> | |
| %expanded_113 = tensor.expand_shape %inserted_slice_112 [[0, 1, 2, 3], [4]] : tensor<256x2xf32> into tensor<1x1x8x32x2xf32> | |
| %collapsed_114 = tensor.collapse_shape %expanded_113 [[0], [1], [2], [3, 4]] : tensor<1x1x8x32x2xf32> into tensor<1x1x8x64xf32> | |
| %429 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, 0, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (0, 0, 0, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%418, %expanded_8 : tensor<1x1x8x32xcomplex<f64>>, tensor<1x1x1x32xcomplex<f64>>) outs(%174 : tensor<1x1x8x32xcomplex<f64>>) { | |
| ^bb0(%in: complex<f64>, %in_269: complex<f64>, %out: complex<f64>): | |
| %788 = complex.mul %in, %in_269 : complex<f64> | |
| linalg.yield %788 : complex<f64> | |
| } -> tensor<1x1x8x32xcomplex<f64>> | |
| %430 = torch_c.from_builtin_tensor %429 : tensor<1x1x8x32xcomplex<f64>> -> !torch.vtensor<[1,1,8,32],complex<f64>> | |
| %431 = torch.aten.view_as_real %430 : !torch.vtensor<[1,1,8,32],complex<f64>> -> !torch.vtensor<[1,1,8,32,2],f32> | |
| %432 = torch_c.to_builtin_tensor %431 : !torch.vtensor<[1,1,8,32,2],f32> -> tensor<1x1x8x32x2xf32> | |
| %extracted_slice_115 = tensor.extract_slice %432[0, 0, 0, 0, 0] [1, 1, 8, 32, 1] [1, 1, 1, 1, 1] : tensor<1x1x8x32x2xf32> to tensor<1x1x8x32x1xf32> | |
| %433 = torch.aten.view_as_real %430 : !torch.vtensor<[1,1,8,32],complex<f64>> -> !torch.vtensor<[1,1,8,32,2],f32> | |
| %434 = torch_c.to_builtin_tensor %433 : !torch.vtensor<[1,1,8,32,2],f32> -> tensor<1x1x8x32x2xf32> | |
| %extracted_slice_116 = tensor.extract_slice %434[0, 0, 0, 0, 1] [1, 1, 8, 32, 1] [1, 1, 1, 1, 1] : tensor<1x1x8x32x2xf32> to tensor<1x1x8x32x1xf32> | |
| %collapsed_117 = tensor.collapse_shape %extracted_slice_115 [[0, 1, 2, 3], [4]] : tensor<1x1x8x32x1xf32> into tensor<256x1xf32> | |
| %435 = torch_c.from_builtin_tensor %collapsed_117 : tensor<256x1xf32> -> !torch.vtensor<[256,1],f32> | |
| %collapsed_118 = tensor.collapse_shape %extracted_slice_116 [[0, 1, 2, 3], [4]] : tensor<1x1x8x32x1xf32> into tensor<256x1xf32> | |
| %436 = torch_c.from_builtin_tensor %collapsed_118 : tensor<256x1xf32> -> !torch.vtensor<[256,1],f32> | |
| %437 = torch_c.to_builtin_tensor %435 : !torch.vtensor<[256,1],f32> -> tensor<256x1xf32> | |
| %438 = torch_c.to_builtin_tensor %436 : !torch.vtensor<[256,1],f32> -> tensor<256x1xf32> | |
| %inserted_slice_119 = tensor.insert_slice %437 into %189[0, 0] [256, 1] [1, 1] : tensor<256x1xf32> into tensor<256x2xf32> | |
| %inserted_slice_120 = tensor.insert_slice %438 into %inserted_slice_119[0, 1] [256, 1] [1, 1] : tensor<256x1xf32> into tensor<256x2xf32> | |
| %expanded_121 = tensor.expand_shape %inserted_slice_120 [[0, 1, 2, 3], [4]] : tensor<256x2xf32> into tensor<1x1x8x32x2xf32> | |
| %collapsed_122 = tensor.collapse_shape %expanded_121 [[0], [1], [2], [3, 4]] : tensor<1x1x8x32x2xf32> into tensor<1x1x8x64xf32> | |
| %439 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, 0, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%collapsed_122 : tensor<1x1x8x64xf32>) outs(%200 : tensor<1x1x8x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x1x8x64xf32> | |
| %440 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, 0, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_104 : tensor<1x1x8x64xf32>) outs(%200 : tensor<1x1x8x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x1x8x64xf32> | |
| %441 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d2, d1, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%collapsed_114 : tensor<1x1x8x64xf32>) outs(%203 : tensor<1x8x1x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x1x64xf32> | |
| %442 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, 0, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%441 : tensor<1x8x1x64xf32>) outs(%203 : tensor<1x8x1x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x1x64xf32> | |
| %collapsed_123 = tensor.collapse_shape %442 [[0, 1], [2], [3]] : tensor<1x8x1x64xf32> into tensor<8x1x64xf32> | |
| %443 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d2, d1, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%439 : tensor<1x1x8x64xf32>) outs(%203 : tensor<1x8x1x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x1x64xf32> | |
| %444 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d3, d2)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%443 : tensor<1x8x1x64xf32>) outs(%207 : tensor<1x8x64x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x64x1xf32> | |
| %445 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, d2, 0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%444 : tensor<1x8x64x1xf32>) outs(%207 : tensor<1x8x64x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x64x1xf32> | |
| %collapsed_124 = tensor.collapse_shape %445 [[0, 1], [2], [3]] : tensor<1x8x64x1xf32> into tensor<8x64x1xf32> | |
| %446 = linalg.batch_matmul ins(%collapsed_123, %collapsed_124 : tensor<8x1x64xf32>, tensor<8x64x1xf32>) outs(%211 : tensor<8x1x1xf32>) -> tensor<8x1x1xf32> | |
| %expanded_125 = tensor.expand_shape %446 [[0, 1], [2], [3]] : tensor<8x1x1xf32> into tensor<1x8x1x1xf32> | |
| %447 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_125 : tensor<1x8x1x1xf32>) outs(%213 : tensor<1x8x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.divf %in, %cst_5 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x8x1x1xf32> | |
| %448:2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, 0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, 0)>], iterator_types = ["parallel", "parallel", "parallel", "reduction"]} ins(%447 : tensor<1x8x1x1xf32>) outs(%217, %216 : tensor<1x8x1x1xf32>, tensor<1x8x1x1xi64>) { | |
| ^bb0(%in: f32, %out: f32, %out_269: i64): | |
| %788 = linalg.index 3 : index | |
| %789 = arith.index_cast %788 : index to i64 | |
| %790 = arith.maxf %in, %out : f32 | |
| %791 = arith.cmpf ogt, %in, %out : f32 | |
| %792 = arith.select %791, %789, %out_269 : i64 | |
| linalg.yield %790, %792 : f32, i64 | |
| } -> (tensor<1x8x1x1xf32>, tensor<1x8x1x1xi64>) | |
| %449 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>, affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%447, %448#0 : tensor<1x8x1x1xf32>, tensor<1x8x1x1xf32>) outs(%213 : tensor<1x8x1x1xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.subf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x8x1x1xf32> | |
| %450 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%449 : tensor<1x8x1x1xf32>) outs(%213 : tensor<1x8x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = math.exp %in : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x8x1x1xf32> | |
| %451 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, 0)>], iterator_types = ["parallel", "parallel", "parallel", "reduction"]} ins(%450 : tensor<1x8x1x1xf32>) outs(%221 : tensor<1x8x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.addf %in, %out : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x8x1x1xf32> | |
| %452 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>, affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%450, %451 : tensor<1x8x1x1xf32>, tensor<1x8x1x1xf32>) outs(%213 : tensor<1x8x1x1xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.divf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x8x1x1xf32> | |
| %453 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%452 : tensor<1x8x1x1xf32>) outs(%213 : tensor<1x8x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x1x1xf32> | |
| %collapsed_126 = tensor.collapse_shape %453 [[0, 1], [2], [3]] : tensor<1x8x1x1xf32> into tensor<8x1x1xf32> | |
| %454 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d2, d1, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%440 : tensor<1x1x8x64xf32>) outs(%203 : tensor<1x8x1x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x1x64xf32> | |
| %455 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, 0, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%454 : tensor<1x8x1x64xf32>) outs(%203 : tensor<1x8x1x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x1x64xf32> | |
| %collapsed_127 = tensor.collapse_shape %455 [[0, 1], [2], [3]] : tensor<1x8x1x64xf32> into tensor<8x1x64xf32> | |
| %456 = linalg.batch_matmul ins(%collapsed_126, %collapsed_127 : tensor<8x1x1xf32>, tensor<8x1x64xf32>) outs(%228 : tensor<8x1x64xf32>) -> tensor<8x1x64xf32> | |
| %expanded_128 = tensor.expand_shape %456 [[0, 1], [2], [3]] : tensor<8x1x64xf32> into tensor<1x8x1x64xf32> | |
| %457 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d2, d1, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_128 : tensor<1x8x1x64xf32>) outs(%200 : tensor<1x1x8x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x1x8x64xf32> | |
| %458 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%111 : tensor<512x512xf32>) outs(%165 : tensor<512x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<512x512xf32> | |
| %collapsed_129 = tensor.collapse_shape %457 [[0], [1, 2, 3]] : tensor<1x1x8x64xf32> into tensor<1x512xf32> | |
| %459 = linalg.matmul ins(%collapsed_129, %458 : tensor<1x512xf32>, tensor<512x512xf32>) outs(%168 : tensor<1x512xf32>) -> tensor<1x512xf32> | |
| %expanded_130 = tensor.expand_shape %459 [[0], [1, 2]] : tensor<1x512xf32> into tensor<1x1x512xf32> | |
| %460 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%403, %expanded_130 : tensor<1x1x512xf32>, tensor<1x1x512xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.addf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %461 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%460 : tensor<1x1x512xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = math.powf %in, %cst_3 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %462 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, 0)>], iterator_types = ["parallel", "parallel", "reduction"]} ins(%461 : tensor<1x1x512xf32>) outs(%158 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.addf %in, %out : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %463 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%462 : tensor<1x1x1xf32>) outs(%157 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.divf %in, %cst_4 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %464 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%463 : tensor<1x1x1xf32>) outs(%157 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.truncf %cst_2 : f64 to f32 | |
| %789 = arith.addf %in, %788 : f32 | |
| linalg.yield %789 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %465 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%464 : tensor<1x1x1xf32>) outs(%157 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = math.rsqrt %in : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %466 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%460, %465 : tensor<1x1x512xf32>, tensor<1x1x1xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.mulf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %467 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%466, %112 : tensor<1x1x512xf32>, tensor<512xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.mulf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %468 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%113 : tensor<1536x512xf32>) outs(%241 : tensor<512x1536xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<512x1536xf32> | |
| %collapsed_131 = tensor.collapse_shape %467 [[0], [1, 2]] : tensor<1x1x512xf32> into tensor<1x512xf32> | |
| %469 = linalg.matmul ins(%collapsed_131, %468 : tensor<1x512xf32>, tensor<512x1536xf32>) outs(%244 : tensor<1x1536xf32>) -> tensor<1x1536xf32> | |
| %expanded_132 = tensor.expand_shape %469 [[0], [1, 2]] : tensor<1x1536xf32> into tensor<1x1x1536xf32> | |
| %470 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%expanded_132 : tensor<1x1x1536xf32>) outs(%246 : tensor<1x1x1536xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.negf %in : f32 | |
| %789 = math.exp %788 : f32 | |
| %790 = arith.addf %789, %cst_1 : f32 | |
| %791 = arith.divf %cst_1, %790 : f32 | |
| linalg.yield %791 : f32 | |
| } -> tensor<1x1x1536xf32> | |
| %471 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%470, %expanded_132 : tensor<1x1x1536xf32>, tensor<1x1x1536xf32>) outs(%246 : tensor<1x1x1536xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.mulf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1536xf32> | |
| %472 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%114 : tensor<1536x512xf32>) outs(%241 : tensor<512x1536xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<512x1536xf32> | |
| %473 = linalg.matmul ins(%collapsed_131, %472 : tensor<1x512xf32>, tensor<512x1536xf32>) outs(%244 : tensor<1x1536xf32>) -> tensor<1x1536xf32> | |
| %expanded_133 = tensor.expand_shape %473 [[0], [1, 2]] : tensor<1x1536xf32> into tensor<1x1x1536xf32> | |
| %474 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%471, %expanded_133 : tensor<1x1x1536xf32>, tensor<1x1x1536xf32>) outs(%246 : tensor<1x1x1536xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.mulf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1536xf32> | |
| %475 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%115 : tensor<512x1536xf32>) outs(%252 : tensor<1536x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1536x512xf32> | |
| %collapsed_134 = tensor.collapse_shape %474 [[0], [1, 2]] : tensor<1x1x1536xf32> into tensor<1x1536xf32> | |
| %476 = linalg.matmul ins(%collapsed_134, %475 : tensor<1x1536xf32>, tensor<1536x512xf32>) outs(%168 : tensor<1x512xf32>) -> tensor<1x512xf32> | |
| %expanded_135 = tensor.expand_shape %476 [[0], [1, 2]] : tensor<1x512xf32> into tensor<1x1x512xf32> | |
| %477 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%460, %expanded_135 : tensor<1x1x512xf32>, tensor<1x1x512xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.addf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %478 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%477 : tensor<1x1x512xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = math.powf %in, %cst_3 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %479 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, 0)>], iterator_types = ["parallel", "parallel", "reduction"]} ins(%478 : tensor<1x1x512xf32>) outs(%158 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.addf %in, %out : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %480 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%479 : tensor<1x1x1xf32>) outs(%157 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.divf %in, %cst_4 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %481 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%480 : tensor<1x1x1xf32>) outs(%157 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.truncf %cst_2 : f64 to f32 | |
| %789 = arith.addf %in, %788 : f32 | |
| linalg.yield %789 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %482 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%481 : tensor<1x1x1xf32>) outs(%157 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = math.rsqrt %in : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %483 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%477, %482 : tensor<1x1x512xf32>, tensor<1x1x1xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.mulf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %484 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%483, %116 : tensor<1x1x512xf32>, tensor<512xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.mulf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %485 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%117 : tensor<512x512xf32>) outs(%165 : tensor<512x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<512x512xf32> | |
| %collapsed_136 = tensor.collapse_shape %484 [[0], [1, 2]] : tensor<1x1x512xf32> into tensor<1x512xf32> | |
| %486 = linalg.matmul ins(%collapsed_136, %485 : tensor<1x512xf32>, tensor<512x512xf32>) outs(%168 : tensor<1x512xf32>) -> tensor<1x512xf32> | |
| %487 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%118 : tensor<512x512xf32>) outs(%165 : tensor<512x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<512x512xf32> | |
| %488 = linalg.matmul ins(%collapsed_136, %487 : tensor<1x512xf32>, tensor<512x512xf32>) outs(%168 : tensor<1x512xf32>) -> tensor<1x512xf32> | |
| %489 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%119 : tensor<512x512xf32>) outs(%165 : tensor<512x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<512x512xf32> | |
| %490 = linalg.matmul ins(%collapsed_136, %489 : tensor<1x512xf32>, tensor<512x512xf32>) outs(%168 : tensor<1x512xf32>) -> tensor<1x512xf32> | |
| %expanded_137 = tensor.expand_shape %490 [[0], [1, 2, 3]] : tensor<1x512xf32> into tensor<1x1x8x64xf32> | |
| %expanded_138 = tensor.expand_shape %486 [[0], [1, 2, 3, 4]] : tensor<1x512xf32> into tensor<1x1x8x32x2xf32> | |
| %491 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} outs(%174 : tensor<1x1x8x32xcomplex<f64>>) { | |
| ^bb0(%out: complex<f64>): | |
| %788 = linalg.index 0 : index | |
| %789 = linalg.index 1 : index | |
| %790 = linalg.index 2 : index | |
| %791 = linalg.index 3 : index | |
| %extracted = tensor.extract %expanded_138[%788, %789, %790, %791, %c0] : tensor<1x1x8x32x2xf32> | |
| %extracted_269 = tensor.extract %expanded_138[%788, %789, %790, %791, %c1] : tensor<1x1x8x32x2xf32> | |
| %792 = arith.extf %extracted : f32 to f64 | |
| %793 = arith.extf %extracted_269 : f32 to f64 | |
| %794 = complex.create %792, %793 : complex<f64> | |
| linalg.yield %794 : complex<f64> | |
| } -> tensor<1x1x8x32xcomplex<f64>> | |
| %expanded_139 = tensor.expand_shape %488 [[0], [1, 2, 3, 4]] : tensor<1x512xf32> into tensor<1x1x8x32x2xf32> | |
| %492 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} outs(%174 : tensor<1x1x8x32xcomplex<f64>>) { | |
| ^bb0(%out: complex<f64>): | |
| %788 = linalg.index 0 : index | |
| %789 = linalg.index 1 : index | |
| %790 = linalg.index 2 : index | |
| %791 = linalg.index 3 : index | |
| %extracted = tensor.extract %expanded_139[%788, %789, %790, %791, %c0] : tensor<1x1x8x32x2xf32> | |
| %extracted_269 = tensor.extract %expanded_139[%788, %789, %790, %791, %c1] : tensor<1x1x8x32x2xf32> | |
| %792 = arith.extf %extracted : f32 to f64 | |
| %793 = arith.extf %extracted_269 : f32 to f64 | |
| %794 = complex.create %792, %793 : complex<f64> | |
| linalg.yield %794 : complex<f64> | |
| } -> tensor<1x1x8x32xcomplex<f64>> | |
| %493 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, 0, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (0, 0, 0, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%491, %expanded_8 : tensor<1x1x8x32xcomplex<f64>>, tensor<1x1x1x32xcomplex<f64>>) outs(%174 : tensor<1x1x8x32xcomplex<f64>>) { | |
| ^bb0(%in: complex<f64>, %in_269: complex<f64>, %out: complex<f64>): | |
| %788 = complex.mul %in, %in_269 : complex<f64> | |
| linalg.yield %788 : complex<f64> | |
| } -> tensor<1x1x8x32xcomplex<f64>> | |
| %494 = torch_c.from_builtin_tensor %493 : tensor<1x1x8x32xcomplex<f64>> -> !torch.vtensor<[1,1,8,32],complex<f64>> | |
| %495 = torch.aten.view_as_real %494 : !torch.vtensor<[1,1,8,32],complex<f64>> -> !torch.vtensor<[1,1,8,32,2],f32> | |
| %496 = torch_c.to_builtin_tensor %495 : !torch.vtensor<[1,1,8,32,2],f32> -> tensor<1x1x8x32x2xf32> | |
| %extracted_slice_140 = tensor.extract_slice %496[0, 0, 0, 0, 0] [1, 1, 8, 32, 1] [1, 1, 1, 1, 1] : tensor<1x1x8x32x2xf32> to tensor<1x1x8x32x1xf32> | |
| %497 = torch.aten.view_as_real %494 : !torch.vtensor<[1,1,8,32],complex<f64>> -> !torch.vtensor<[1,1,8,32,2],f32> | |
| %498 = torch_c.to_builtin_tensor %497 : !torch.vtensor<[1,1,8,32,2],f32> -> tensor<1x1x8x32x2xf32> | |
| %extracted_slice_141 = tensor.extract_slice %498[0, 0, 0, 0, 1] [1, 1, 8, 32, 1] [1, 1, 1, 1, 1] : tensor<1x1x8x32x2xf32> to tensor<1x1x8x32x1xf32> | |
| %collapsed_142 = tensor.collapse_shape %extracted_slice_140 [[0, 1, 2, 3], [4]] : tensor<1x1x8x32x1xf32> into tensor<256x1xf32> | |
| %499 = torch_c.from_builtin_tensor %collapsed_142 : tensor<256x1xf32> -> !torch.vtensor<[256,1],f32> | |
| %collapsed_143 = tensor.collapse_shape %extracted_slice_141 [[0, 1, 2, 3], [4]] : tensor<1x1x8x32x1xf32> into tensor<256x1xf32> | |
| %500 = torch_c.from_builtin_tensor %collapsed_143 : tensor<256x1xf32> -> !torch.vtensor<[256,1],f32> | |
| %501 = torch_c.to_builtin_tensor %499 : !torch.vtensor<[256,1],f32> -> tensor<256x1xf32> | |
| %502 = torch_c.to_builtin_tensor %500 : !torch.vtensor<[256,1],f32> -> tensor<256x1xf32> | |
| %inserted_slice_144 = tensor.insert_slice %501 into %189[0, 0] [256, 1] [1, 1] : tensor<256x1xf32> into tensor<256x2xf32> | |
| %inserted_slice_145 = tensor.insert_slice %502 into %inserted_slice_144[0, 1] [256, 1] [1, 1] : tensor<256x1xf32> into tensor<256x2xf32> | |
| %expanded_146 = tensor.expand_shape %inserted_slice_145 [[0, 1, 2, 3], [4]] : tensor<256x2xf32> into tensor<1x1x8x32x2xf32> | |
| %collapsed_147 = tensor.collapse_shape %expanded_146 [[0], [1], [2], [3, 4]] : tensor<1x1x8x32x2xf32> into tensor<1x1x8x64xf32> | |
| %503 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, 0, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (0, 0, 0, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%492, %expanded_8 : tensor<1x1x8x32xcomplex<f64>>, tensor<1x1x1x32xcomplex<f64>>) outs(%174 : tensor<1x1x8x32xcomplex<f64>>) { | |
| ^bb0(%in: complex<f64>, %in_269: complex<f64>, %out: complex<f64>): | |
| %788 = complex.mul %in, %in_269 : complex<f64> | |
| linalg.yield %788 : complex<f64> | |
| } -> tensor<1x1x8x32xcomplex<f64>> | |
| %504 = torch_c.from_builtin_tensor %503 : tensor<1x1x8x32xcomplex<f64>> -> !torch.vtensor<[1,1,8,32],complex<f64>> | |
| %505 = torch.aten.view_as_real %504 : !torch.vtensor<[1,1,8,32],complex<f64>> -> !torch.vtensor<[1,1,8,32,2],f32> | |
| %506 = torch_c.to_builtin_tensor %505 : !torch.vtensor<[1,1,8,32,2],f32> -> tensor<1x1x8x32x2xf32> | |
| %extracted_slice_148 = tensor.extract_slice %506[0, 0, 0, 0, 0] [1, 1, 8, 32, 1] [1, 1, 1, 1, 1] : tensor<1x1x8x32x2xf32> to tensor<1x1x8x32x1xf32> | |
| %507 = torch.aten.view_as_real %504 : !torch.vtensor<[1,1,8,32],complex<f64>> -> !torch.vtensor<[1,1,8,32,2],f32> | |
| %508 = torch_c.to_builtin_tensor %507 : !torch.vtensor<[1,1,8,32,2],f32> -> tensor<1x1x8x32x2xf32> | |
| %extracted_slice_149 = tensor.extract_slice %508[0, 0, 0, 0, 1] [1, 1, 8, 32, 1] [1, 1, 1, 1, 1] : tensor<1x1x8x32x2xf32> to tensor<1x1x8x32x1xf32> | |
| %collapsed_150 = tensor.collapse_shape %extracted_slice_148 [[0, 1, 2, 3], [4]] : tensor<1x1x8x32x1xf32> into tensor<256x1xf32> | |
| %509 = torch_c.from_builtin_tensor %collapsed_150 : tensor<256x1xf32> -> !torch.vtensor<[256,1],f32> | |
| %collapsed_151 = tensor.collapse_shape %extracted_slice_149 [[0, 1, 2, 3], [4]] : tensor<1x1x8x32x1xf32> into tensor<256x1xf32> | |
| %510 = torch_c.from_builtin_tensor %collapsed_151 : tensor<256x1xf32> -> !torch.vtensor<[256,1],f32> | |
| %511 = torch_c.to_builtin_tensor %509 : !torch.vtensor<[256,1],f32> -> tensor<256x1xf32> | |
| %512 = torch_c.to_builtin_tensor %510 : !torch.vtensor<[256,1],f32> -> tensor<256x1xf32> | |
| %inserted_slice_152 = tensor.insert_slice %511 into %189[0, 0] [256, 1] [1, 1] : tensor<256x1xf32> into tensor<256x2xf32> | |
| %inserted_slice_153 = tensor.insert_slice %512 into %inserted_slice_152[0, 1] [256, 1] [1, 1] : tensor<256x1xf32> into tensor<256x2xf32> | |
| %expanded_154 = tensor.expand_shape %inserted_slice_153 [[0, 1, 2, 3], [4]] : tensor<256x2xf32> into tensor<1x1x8x32x2xf32> | |
| %collapsed_155 = tensor.collapse_shape %expanded_154 [[0], [1], [2], [3, 4]] : tensor<1x1x8x32x2xf32> into tensor<1x1x8x64xf32> | |
| %513 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, 0, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%collapsed_155 : tensor<1x1x8x64xf32>) outs(%200 : tensor<1x1x8x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x1x8x64xf32> | |
| %514 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, 0, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_137 : tensor<1x1x8x64xf32>) outs(%200 : tensor<1x1x8x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x1x8x64xf32> | |
| %515 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d2, d1, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%collapsed_147 : tensor<1x1x8x64xf32>) outs(%203 : tensor<1x8x1x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x1x64xf32> | |
| %516 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, 0, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%515 : tensor<1x8x1x64xf32>) outs(%203 : tensor<1x8x1x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x1x64xf32> | |
| %collapsed_156 = tensor.collapse_shape %516 [[0, 1], [2], [3]] : tensor<1x8x1x64xf32> into tensor<8x1x64xf32> | |
| %517 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d2, d1, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%513 : tensor<1x1x8x64xf32>) outs(%203 : tensor<1x8x1x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x1x64xf32> | |
| %518 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d3, d2)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%517 : tensor<1x8x1x64xf32>) outs(%207 : tensor<1x8x64x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x64x1xf32> | |
| %519 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, d2, 0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%518 : tensor<1x8x64x1xf32>) outs(%207 : tensor<1x8x64x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x64x1xf32> | |
| %collapsed_157 = tensor.collapse_shape %519 [[0, 1], [2], [3]] : tensor<1x8x64x1xf32> into tensor<8x64x1xf32> | |
| %520 = linalg.batch_matmul ins(%collapsed_156, %collapsed_157 : tensor<8x1x64xf32>, tensor<8x64x1xf32>) outs(%211 : tensor<8x1x1xf32>) -> tensor<8x1x1xf32> | |
| %expanded_158 = tensor.expand_shape %520 [[0, 1], [2], [3]] : tensor<8x1x1xf32> into tensor<1x8x1x1xf32> | |
| %521 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_158 : tensor<1x8x1x1xf32>) outs(%213 : tensor<1x8x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.divf %in, %cst_5 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x8x1x1xf32> | |
| %522:2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, 0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, 0)>], iterator_types = ["parallel", "parallel", "parallel", "reduction"]} ins(%521 : tensor<1x8x1x1xf32>) outs(%217, %216 : tensor<1x8x1x1xf32>, tensor<1x8x1x1xi64>) { | |
| ^bb0(%in: f32, %out: f32, %out_269: i64): | |
| %788 = linalg.index 3 : index | |
| %789 = arith.index_cast %788 : index to i64 | |
| %790 = arith.maxf %in, %out : f32 | |
| %791 = arith.cmpf ogt, %in, %out : f32 | |
| %792 = arith.select %791, %789, %out_269 : i64 | |
| linalg.yield %790, %792 : f32, i64 | |
| } -> (tensor<1x8x1x1xf32>, tensor<1x8x1x1xi64>) | |
| %523 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>, affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%521, %522#0 : tensor<1x8x1x1xf32>, tensor<1x8x1x1xf32>) outs(%213 : tensor<1x8x1x1xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.subf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x8x1x1xf32> | |
| %524 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%523 : tensor<1x8x1x1xf32>) outs(%213 : tensor<1x8x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = math.exp %in : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x8x1x1xf32> | |
| %525 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, 0)>], iterator_types = ["parallel", "parallel", "parallel", "reduction"]} ins(%524 : tensor<1x8x1x1xf32>) outs(%221 : tensor<1x8x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.addf %in, %out : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x8x1x1xf32> | |
| %526 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>, affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%524, %525 : tensor<1x8x1x1xf32>, tensor<1x8x1x1xf32>) outs(%213 : tensor<1x8x1x1xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.divf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x8x1x1xf32> | |
| %527 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%526 : tensor<1x8x1x1xf32>) outs(%213 : tensor<1x8x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x1x1xf32> | |
| %collapsed_159 = tensor.collapse_shape %527 [[0, 1], [2], [3]] : tensor<1x8x1x1xf32> into tensor<8x1x1xf32> | |
| %528 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d2, d1, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%514 : tensor<1x1x8x64xf32>) outs(%203 : tensor<1x8x1x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x1x64xf32> | |
| %529 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, 0, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%528 : tensor<1x8x1x64xf32>) outs(%203 : tensor<1x8x1x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x1x64xf32> | |
| %collapsed_160 = tensor.collapse_shape %529 [[0, 1], [2], [3]] : tensor<1x8x1x64xf32> into tensor<8x1x64xf32> | |
| %530 = linalg.batch_matmul ins(%collapsed_159, %collapsed_160 : tensor<8x1x1xf32>, tensor<8x1x64xf32>) outs(%228 : tensor<8x1x64xf32>) -> tensor<8x1x64xf32> | |
| %expanded_161 = tensor.expand_shape %530 [[0, 1], [2], [3]] : tensor<8x1x64xf32> into tensor<1x8x1x64xf32> | |
| %531 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d2, d1, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_161 : tensor<1x8x1x64xf32>) outs(%200 : tensor<1x1x8x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x1x8x64xf32> | |
| %532 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%120 : tensor<512x512xf32>) outs(%165 : tensor<512x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<512x512xf32> | |
| %collapsed_162 = tensor.collapse_shape %531 [[0], [1, 2, 3]] : tensor<1x1x8x64xf32> into tensor<1x512xf32> | |
| %533 = linalg.matmul ins(%collapsed_162, %532 : tensor<1x512xf32>, tensor<512x512xf32>) outs(%168 : tensor<1x512xf32>) -> tensor<1x512xf32> | |
| %expanded_163 = tensor.expand_shape %533 [[0], [1, 2]] : tensor<1x512xf32> into tensor<1x1x512xf32> | |
| %534 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%477, %expanded_163 : tensor<1x1x512xf32>, tensor<1x1x512xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.addf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %535 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%534 : tensor<1x1x512xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = math.powf %in, %cst_3 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %536 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, 0)>], iterator_types = ["parallel", "parallel", "reduction"]} ins(%535 : tensor<1x1x512xf32>) outs(%158 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.addf %in, %out : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %537 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%536 : tensor<1x1x1xf32>) outs(%157 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.divf %in, %cst_4 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %538 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%537 : tensor<1x1x1xf32>) outs(%157 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.truncf %cst_2 : f64 to f32 | |
| %789 = arith.addf %in, %788 : f32 | |
| linalg.yield %789 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %539 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%538 : tensor<1x1x1xf32>) outs(%157 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = math.rsqrt %in : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %540 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%534, %539 : tensor<1x1x512xf32>, tensor<1x1x1xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.mulf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %541 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%540, %121 : tensor<1x1x512xf32>, tensor<512xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.mulf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %542 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%122 : tensor<1536x512xf32>) outs(%241 : tensor<512x1536xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<512x1536xf32> | |
| %collapsed_164 = tensor.collapse_shape %541 [[0], [1, 2]] : tensor<1x1x512xf32> into tensor<1x512xf32> | |
| %543 = linalg.matmul ins(%collapsed_164, %542 : tensor<1x512xf32>, tensor<512x1536xf32>) outs(%244 : tensor<1x1536xf32>) -> tensor<1x1536xf32> | |
| %expanded_165 = tensor.expand_shape %543 [[0], [1, 2]] : tensor<1x1536xf32> into tensor<1x1x1536xf32> | |
| %544 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%expanded_165 : tensor<1x1x1536xf32>) outs(%246 : tensor<1x1x1536xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.negf %in : f32 | |
| %789 = math.exp %788 : f32 | |
| %790 = arith.addf %789, %cst_1 : f32 | |
| %791 = arith.divf %cst_1, %790 : f32 | |
| linalg.yield %791 : f32 | |
| } -> tensor<1x1x1536xf32> | |
| %545 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%544, %expanded_165 : tensor<1x1x1536xf32>, tensor<1x1x1536xf32>) outs(%246 : tensor<1x1x1536xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.mulf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1536xf32> | |
| %546 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%123 : tensor<1536x512xf32>) outs(%241 : tensor<512x1536xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<512x1536xf32> | |
| %547 = linalg.matmul ins(%collapsed_164, %546 : tensor<1x512xf32>, tensor<512x1536xf32>) outs(%244 : tensor<1x1536xf32>) -> tensor<1x1536xf32> | |
| %expanded_166 = tensor.expand_shape %547 [[0], [1, 2]] : tensor<1x1536xf32> into tensor<1x1x1536xf32> | |
| %548 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%545, %expanded_166 : tensor<1x1x1536xf32>, tensor<1x1x1536xf32>) outs(%246 : tensor<1x1x1536xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.mulf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1536xf32> | |
| %549 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%124 : tensor<512x1536xf32>) outs(%252 : tensor<1536x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1536x512xf32> | |
| %collapsed_167 = tensor.collapse_shape %548 [[0], [1, 2]] : tensor<1x1x1536xf32> into tensor<1x1536xf32> | |
| %550 = linalg.matmul ins(%collapsed_167, %549 : tensor<1x1536xf32>, tensor<1536x512xf32>) outs(%168 : tensor<1x512xf32>) -> tensor<1x512xf32> | |
| %expanded_168 = tensor.expand_shape %550 [[0], [1, 2]] : tensor<1x512xf32> into tensor<1x1x512xf32> | |
| %551 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%534, %expanded_168 : tensor<1x1x512xf32>, tensor<1x1x512xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.addf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %552 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%551 : tensor<1x1x512xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = math.powf %in, %cst_3 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %553 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, 0)>], iterator_types = ["parallel", "parallel", "reduction"]} ins(%552 : tensor<1x1x512xf32>) outs(%158 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.addf %in, %out : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %554 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%553 : tensor<1x1x1xf32>) outs(%157 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.divf %in, %cst_4 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %555 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%554 : tensor<1x1x1xf32>) outs(%157 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.truncf %cst_2 : f64 to f32 | |
| %789 = arith.addf %in, %788 : f32 | |
| linalg.yield %789 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %556 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%555 : tensor<1x1x1xf32>) outs(%157 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = math.rsqrt %in : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %557 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%551, %556 : tensor<1x1x512xf32>, tensor<1x1x1xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.mulf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %558 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%557, %125 : tensor<1x1x512xf32>, tensor<512xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.mulf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %559 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%126 : tensor<512x512xf32>) outs(%165 : tensor<512x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<512x512xf32> | |
| %collapsed_169 = tensor.collapse_shape %558 [[0], [1, 2]] : tensor<1x1x512xf32> into tensor<1x512xf32> | |
| %560 = linalg.matmul ins(%collapsed_169, %559 : tensor<1x512xf32>, tensor<512x512xf32>) outs(%168 : tensor<1x512xf32>) -> tensor<1x512xf32> | |
| %561 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%127 : tensor<512x512xf32>) outs(%165 : tensor<512x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<512x512xf32> | |
| %562 = linalg.matmul ins(%collapsed_169, %561 : tensor<1x512xf32>, tensor<512x512xf32>) outs(%168 : tensor<1x512xf32>) -> tensor<1x512xf32> | |
| %563 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%128 : tensor<512x512xf32>) outs(%165 : tensor<512x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<512x512xf32> | |
| %564 = linalg.matmul ins(%collapsed_169, %563 : tensor<1x512xf32>, tensor<512x512xf32>) outs(%168 : tensor<1x512xf32>) -> tensor<1x512xf32> | |
| %expanded_170 = tensor.expand_shape %564 [[0], [1, 2, 3]] : tensor<1x512xf32> into tensor<1x1x8x64xf32> | |
| %expanded_171 = tensor.expand_shape %560 [[0], [1, 2, 3, 4]] : tensor<1x512xf32> into tensor<1x1x8x32x2xf32> | |
| %565 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} outs(%174 : tensor<1x1x8x32xcomplex<f64>>) { | |
| ^bb0(%out: complex<f64>): | |
| %788 = linalg.index 0 : index | |
| %789 = linalg.index 1 : index | |
| %790 = linalg.index 2 : index | |
| %791 = linalg.index 3 : index | |
| %extracted = tensor.extract %expanded_171[%788, %789, %790, %791, %c0] : tensor<1x1x8x32x2xf32> | |
| %extracted_269 = tensor.extract %expanded_171[%788, %789, %790, %791, %c1] : tensor<1x1x8x32x2xf32> | |
| %792 = arith.extf %extracted : f32 to f64 | |
| %793 = arith.extf %extracted_269 : f32 to f64 | |
| %794 = complex.create %792, %793 : complex<f64> | |
| linalg.yield %794 : complex<f64> | |
| } -> tensor<1x1x8x32xcomplex<f64>> | |
| %expanded_172 = tensor.expand_shape %562 [[0], [1, 2, 3, 4]] : tensor<1x512xf32> into tensor<1x1x8x32x2xf32> | |
| %566 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} outs(%174 : tensor<1x1x8x32xcomplex<f64>>) { | |
| ^bb0(%out: complex<f64>): | |
| %788 = linalg.index 0 : index | |
| %789 = linalg.index 1 : index | |
| %790 = linalg.index 2 : index | |
| %791 = linalg.index 3 : index | |
| %extracted = tensor.extract %expanded_172[%788, %789, %790, %791, %c0] : tensor<1x1x8x32x2xf32> | |
| %extracted_269 = tensor.extract %expanded_172[%788, %789, %790, %791, %c1] : tensor<1x1x8x32x2xf32> | |
| %792 = arith.extf %extracted : f32 to f64 | |
| %793 = arith.extf %extracted_269 : f32 to f64 | |
| %794 = complex.create %792, %793 : complex<f64> | |
| linalg.yield %794 : complex<f64> | |
| } -> tensor<1x1x8x32xcomplex<f64>> | |
| %567 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, 0, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (0, 0, 0, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%565, %expanded_8 : tensor<1x1x8x32xcomplex<f64>>, tensor<1x1x1x32xcomplex<f64>>) outs(%174 : tensor<1x1x8x32xcomplex<f64>>) { | |
| ^bb0(%in: complex<f64>, %in_269: complex<f64>, %out: complex<f64>): | |
| %788 = complex.mul %in, %in_269 : complex<f64> | |
| linalg.yield %788 : complex<f64> | |
| } -> tensor<1x1x8x32xcomplex<f64>> | |
| %568 = torch_c.from_builtin_tensor %567 : tensor<1x1x8x32xcomplex<f64>> -> !torch.vtensor<[1,1,8,32],complex<f64>> | |
| %569 = torch.aten.view_as_real %568 : !torch.vtensor<[1,1,8,32],complex<f64>> -> !torch.vtensor<[1,1,8,32,2],f32> | |
| %570 = torch_c.to_builtin_tensor %569 : !torch.vtensor<[1,1,8,32,2],f32> -> tensor<1x1x8x32x2xf32> | |
| %extracted_slice_173 = tensor.extract_slice %570[0, 0, 0, 0, 0] [1, 1, 8, 32, 1] [1, 1, 1, 1, 1] : tensor<1x1x8x32x2xf32> to tensor<1x1x8x32x1xf32> | |
| %571 = torch.aten.view_as_real %568 : !torch.vtensor<[1,1,8,32],complex<f64>> -> !torch.vtensor<[1,1,8,32,2],f32> | |
| %572 = torch_c.to_builtin_tensor %571 : !torch.vtensor<[1,1,8,32,2],f32> -> tensor<1x1x8x32x2xf32> | |
| %extracted_slice_174 = tensor.extract_slice %572[0, 0, 0, 0, 1] [1, 1, 8, 32, 1] [1, 1, 1, 1, 1] : tensor<1x1x8x32x2xf32> to tensor<1x1x8x32x1xf32> | |
| %collapsed_175 = tensor.collapse_shape %extracted_slice_173 [[0, 1, 2, 3], [4]] : tensor<1x1x8x32x1xf32> into tensor<256x1xf32> | |
| %573 = torch_c.from_builtin_tensor %collapsed_175 : tensor<256x1xf32> -> !torch.vtensor<[256,1],f32> | |
| %collapsed_176 = tensor.collapse_shape %extracted_slice_174 [[0, 1, 2, 3], [4]] : tensor<1x1x8x32x1xf32> into tensor<256x1xf32> | |
| %574 = torch_c.from_builtin_tensor %collapsed_176 : tensor<256x1xf32> -> !torch.vtensor<[256,1],f32> | |
| %575 = torch_c.to_builtin_tensor %573 : !torch.vtensor<[256,1],f32> -> tensor<256x1xf32> | |
| %576 = torch_c.to_builtin_tensor %574 : !torch.vtensor<[256,1],f32> -> tensor<256x1xf32> | |
| %inserted_slice_177 = tensor.insert_slice %575 into %189[0, 0] [256, 1] [1, 1] : tensor<256x1xf32> into tensor<256x2xf32> | |
| %inserted_slice_178 = tensor.insert_slice %576 into %inserted_slice_177[0, 1] [256, 1] [1, 1] : tensor<256x1xf32> into tensor<256x2xf32> | |
| %expanded_179 = tensor.expand_shape %inserted_slice_178 [[0, 1, 2, 3], [4]] : tensor<256x2xf32> into tensor<1x1x8x32x2xf32> | |
| %collapsed_180 = tensor.collapse_shape %expanded_179 [[0], [1], [2], [3, 4]] : tensor<1x1x8x32x2xf32> into tensor<1x1x8x64xf32> | |
| %577 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, 0, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (0, 0, 0, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%566, %expanded_8 : tensor<1x1x8x32xcomplex<f64>>, tensor<1x1x1x32xcomplex<f64>>) outs(%174 : tensor<1x1x8x32xcomplex<f64>>) { | |
| ^bb0(%in: complex<f64>, %in_269: complex<f64>, %out: complex<f64>): | |
| %788 = complex.mul %in, %in_269 : complex<f64> | |
| linalg.yield %788 : complex<f64> | |
| } -> tensor<1x1x8x32xcomplex<f64>> | |
| %578 = torch_c.from_builtin_tensor %577 : tensor<1x1x8x32xcomplex<f64>> -> !torch.vtensor<[1,1,8,32],complex<f64>> | |
| %579 = torch.aten.view_as_real %578 : !torch.vtensor<[1,1,8,32],complex<f64>> -> !torch.vtensor<[1,1,8,32,2],f32> | |
| %580 = torch_c.to_builtin_tensor %579 : !torch.vtensor<[1,1,8,32,2],f32> -> tensor<1x1x8x32x2xf32> | |
| %extracted_slice_181 = tensor.extract_slice %580[0, 0, 0, 0, 0] [1, 1, 8, 32, 1] [1, 1, 1, 1, 1] : tensor<1x1x8x32x2xf32> to tensor<1x1x8x32x1xf32> | |
| %581 = torch.aten.view_as_real %578 : !torch.vtensor<[1,1,8,32],complex<f64>> -> !torch.vtensor<[1,1,8,32,2],f32> | |
| %582 = torch_c.to_builtin_tensor %581 : !torch.vtensor<[1,1,8,32,2],f32> -> tensor<1x1x8x32x2xf32> | |
| %extracted_slice_182 = tensor.extract_slice %582[0, 0, 0, 0, 1] [1, 1, 8, 32, 1] [1, 1, 1, 1, 1] : tensor<1x1x8x32x2xf32> to tensor<1x1x8x32x1xf32> | |
| %collapsed_183 = tensor.collapse_shape %extracted_slice_181 [[0, 1, 2, 3], [4]] : tensor<1x1x8x32x1xf32> into tensor<256x1xf32> | |
| %583 = torch_c.from_builtin_tensor %collapsed_183 : tensor<256x1xf32> -> !torch.vtensor<[256,1],f32> | |
| %collapsed_184 = tensor.collapse_shape %extracted_slice_182 [[0, 1, 2, 3], [4]] : tensor<1x1x8x32x1xf32> into tensor<256x1xf32> | |
| %584 = torch_c.from_builtin_tensor %collapsed_184 : tensor<256x1xf32> -> !torch.vtensor<[256,1],f32> | |
| %585 = torch_c.to_builtin_tensor %583 : !torch.vtensor<[256,1],f32> -> tensor<256x1xf32> | |
| %586 = torch_c.to_builtin_tensor %584 : !torch.vtensor<[256,1],f32> -> tensor<256x1xf32> | |
| %inserted_slice_185 = tensor.insert_slice %585 into %189[0, 0] [256, 1] [1, 1] : tensor<256x1xf32> into tensor<256x2xf32> | |
| %inserted_slice_186 = tensor.insert_slice %586 into %inserted_slice_185[0, 1] [256, 1] [1, 1] : tensor<256x1xf32> into tensor<256x2xf32> | |
| %expanded_187 = tensor.expand_shape %inserted_slice_186 [[0, 1, 2, 3], [4]] : tensor<256x2xf32> into tensor<1x1x8x32x2xf32> | |
| %collapsed_188 = tensor.collapse_shape %expanded_187 [[0], [1], [2], [3, 4]] : tensor<1x1x8x32x2xf32> into tensor<1x1x8x64xf32> | |
| %587 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, 0, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%collapsed_188 : tensor<1x1x8x64xf32>) outs(%200 : tensor<1x1x8x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x1x8x64xf32> | |
| %588 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, 0, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_170 : tensor<1x1x8x64xf32>) outs(%200 : tensor<1x1x8x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x1x8x64xf32> | |
| %589 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d2, d1, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%collapsed_180 : tensor<1x1x8x64xf32>) outs(%203 : tensor<1x8x1x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x1x64xf32> | |
| %590 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, 0, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%589 : tensor<1x8x1x64xf32>) outs(%203 : tensor<1x8x1x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x1x64xf32> | |
| %collapsed_189 = tensor.collapse_shape %590 [[0, 1], [2], [3]] : tensor<1x8x1x64xf32> into tensor<8x1x64xf32> | |
| %591 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d2, d1, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%587 : tensor<1x1x8x64xf32>) outs(%203 : tensor<1x8x1x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x1x64xf32> | |
| %592 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d3, d2)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%591 : tensor<1x8x1x64xf32>) outs(%207 : tensor<1x8x64x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x64x1xf32> | |
| %593 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, d2, 0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%592 : tensor<1x8x64x1xf32>) outs(%207 : tensor<1x8x64x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x64x1xf32> | |
| %collapsed_190 = tensor.collapse_shape %593 [[0, 1], [2], [3]] : tensor<1x8x64x1xf32> into tensor<8x64x1xf32> | |
| %594 = linalg.batch_matmul ins(%collapsed_189, %collapsed_190 : tensor<8x1x64xf32>, tensor<8x64x1xf32>) outs(%211 : tensor<8x1x1xf32>) -> tensor<8x1x1xf32> | |
| %expanded_191 = tensor.expand_shape %594 [[0, 1], [2], [3]] : tensor<8x1x1xf32> into tensor<1x8x1x1xf32> | |
| %595 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_191 : tensor<1x8x1x1xf32>) outs(%213 : tensor<1x8x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.divf %in, %cst_5 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x8x1x1xf32> | |
| %596:2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, 0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, 0)>], iterator_types = ["parallel", "parallel", "parallel", "reduction"]} ins(%595 : tensor<1x8x1x1xf32>) outs(%217, %216 : tensor<1x8x1x1xf32>, tensor<1x8x1x1xi64>) { | |
| ^bb0(%in: f32, %out: f32, %out_269: i64): | |
| %788 = linalg.index 3 : index | |
| %789 = arith.index_cast %788 : index to i64 | |
| %790 = arith.maxf %in, %out : f32 | |
| %791 = arith.cmpf ogt, %in, %out : f32 | |
| %792 = arith.select %791, %789, %out_269 : i64 | |
| linalg.yield %790, %792 : f32, i64 | |
| } -> (tensor<1x8x1x1xf32>, tensor<1x8x1x1xi64>) | |
| %597 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>, affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%595, %596#0 : tensor<1x8x1x1xf32>, tensor<1x8x1x1xf32>) outs(%213 : tensor<1x8x1x1xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.subf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x8x1x1xf32> | |
| %598 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%597 : tensor<1x8x1x1xf32>) outs(%213 : tensor<1x8x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = math.exp %in : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x8x1x1xf32> | |
| %599 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, 0)>], iterator_types = ["parallel", "parallel", "parallel", "reduction"]} ins(%598 : tensor<1x8x1x1xf32>) outs(%221 : tensor<1x8x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.addf %in, %out : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x8x1x1xf32> | |
| %600 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>, affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%598, %599 : tensor<1x8x1x1xf32>, tensor<1x8x1x1xf32>) outs(%213 : tensor<1x8x1x1xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.divf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x8x1x1xf32> | |
| %601 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%600 : tensor<1x8x1x1xf32>) outs(%213 : tensor<1x8x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x1x1xf32> | |
| %collapsed_192 = tensor.collapse_shape %601 [[0, 1], [2], [3]] : tensor<1x8x1x1xf32> into tensor<8x1x1xf32> | |
| %602 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d2, d1, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%588 : tensor<1x1x8x64xf32>) outs(%203 : tensor<1x8x1x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x1x64xf32> | |
| %603 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, 0, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%602 : tensor<1x8x1x64xf32>) outs(%203 : tensor<1x8x1x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x1x64xf32> | |
| %collapsed_193 = tensor.collapse_shape %603 [[0, 1], [2], [3]] : tensor<1x8x1x64xf32> into tensor<8x1x64xf32> | |
| %604 = linalg.batch_matmul ins(%collapsed_192, %collapsed_193 : tensor<8x1x1xf32>, tensor<8x1x64xf32>) outs(%228 : tensor<8x1x64xf32>) -> tensor<8x1x64xf32> | |
| %expanded_194 = tensor.expand_shape %604 [[0, 1], [2], [3]] : tensor<8x1x64xf32> into tensor<1x8x1x64xf32> | |
| %605 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d2, d1, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_194 : tensor<1x8x1x64xf32>) outs(%200 : tensor<1x1x8x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x1x8x64xf32> | |
| %606 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%129 : tensor<512x512xf32>) outs(%165 : tensor<512x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<512x512xf32> | |
| %collapsed_195 = tensor.collapse_shape %605 [[0], [1, 2, 3]] : tensor<1x1x8x64xf32> into tensor<1x512xf32> | |
| %607 = linalg.matmul ins(%collapsed_195, %606 : tensor<1x512xf32>, tensor<512x512xf32>) outs(%168 : tensor<1x512xf32>) -> tensor<1x512xf32> | |
| %expanded_196 = tensor.expand_shape %607 [[0], [1, 2]] : tensor<1x512xf32> into tensor<1x1x512xf32> | |
| %608 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%551, %expanded_196 : tensor<1x1x512xf32>, tensor<1x1x512xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.addf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %609 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%608 : tensor<1x1x512xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = math.powf %in, %cst_3 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %610 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, 0)>], iterator_types = ["parallel", "parallel", "reduction"]} ins(%609 : tensor<1x1x512xf32>) outs(%158 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.addf %in, %out : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %611 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%610 : tensor<1x1x1xf32>) outs(%157 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.divf %in, %cst_4 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %612 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%611 : tensor<1x1x1xf32>) outs(%157 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.truncf %cst_2 : f64 to f32 | |
| %789 = arith.addf %in, %788 : f32 | |
| linalg.yield %789 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %613 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%612 : tensor<1x1x1xf32>) outs(%157 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = math.rsqrt %in : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %614 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%608, %613 : tensor<1x1x512xf32>, tensor<1x1x1xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.mulf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %615 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%614, %130 : tensor<1x1x512xf32>, tensor<512xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.mulf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %616 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%131 : tensor<1536x512xf32>) outs(%241 : tensor<512x1536xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<512x1536xf32> | |
| %collapsed_197 = tensor.collapse_shape %615 [[0], [1, 2]] : tensor<1x1x512xf32> into tensor<1x512xf32> | |
| %617 = linalg.matmul ins(%collapsed_197, %616 : tensor<1x512xf32>, tensor<512x1536xf32>) outs(%244 : tensor<1x1536xf32>) -> tensor<1x1536xf32> | |
| %expanded_198 = tensor.expand_shape %617 [[0], [1, 2]] : tensor<1x1536xf32> into tensor<1x1x1536xf32> | |
| %618 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%expanded_198 : tensor<1x1x1536xf32>) outs(%246 : tensor<1x1x1536xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.negf %in : f32 | |
| %789 = math.exp %788 : f32 | |
| %790 = arith.addf %789, %cst_1 : f32 | |
| %791 = arith.divf %cst_1, %790 : f32 | |
| linalg.yield %791 : f32 | |
| } -> tensor<1x1x1536xf32> | |
| %619 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%618, %expanded_198 : tensor<1x1x1536xf32>, tensor<1x1x1536xf32>) outs(%246 : tensor<1x1x1536xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.mulf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1536xf32> | |
| %620 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%132 : tensor<1536x512xf32>) outs(%241 : tensor<512x1536xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<512x1536xf32> | |
| %621 = linalg.matmul ins(%collapsed_197, %620 : tensor<1x512xf32>, tensor<512x1536xf32>) outs(%244 : tensor<1x1536xf32>) -> tensor<1x1536xf32> | |
| %expanded_199 = tensor.expand_shape %621 [[0], [1, 2]] : tensor<1x1536xf32> into tensor<1x1x1536xf32> | |
| %622 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%619, %expanded_199 : tensor<1x1x1536xf32>, tensor<1x1x1536xf32>) outs(%246 : tensor<1x1x1536xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.mulf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1536xf32> | |
| %623 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%133 : tensor<512x1536xf32>) outs(%252 : tensor<1536x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1536x512xf32> | |
| %collapsed_200 = tensor.collapse_shape %622 [[0], [1, 2]] : tensor<1x1x1536xf32> into tensor<1x1536xf32> | |
| %624 = linalg.matmul ins(%collapsed_200, %623 : tensor<1x1536xf32>, tensor<1536x512xf32>) outs(%168 : tensor<1x512xf32>) -> tensor<1x512xf32> | |
| %expanded_201 = tensor.expand_shape %624 [[0], [1, 2]] : tensor<1x512xf32> into tensor<1x1x512xf32> | |
| %625 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%608, %expanded_201 : tensor<1x1x512xf32>, tensor<1x1x512xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.addf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %626 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%625 : tensor<1x1x512xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = math.powf %in, %cst_3 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %627 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, 0)>], iterator_types = ["parallel", "parallel", "reduction"]} ins(%626 : tensor<1x1x512xf32>) outs(%158 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.addf %in, %out : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %628 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%627 : tensor<1x1x1xf32>) outs(%157 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.divf %in, %cst_4 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %629 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%628 : tensor<1x1x1xf32>) outs(%157 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.truncf %cst_2 : f64 to f32 | |
| %789 = arith.addf %in, %788 : f32 | |
| linalg.yield %789 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %630 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%629 : tensor<1x1x1xf32>) outs(%157 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = math.rsqrt %in : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %631 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%625, %630 : tensor<1x1x512xf32>, tensor<1x1x1xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.mulf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %632 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%631, %134 : tensor<1x1x512xf32>, tensor<512xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.mulf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %633 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%135 : tensor<512x512xf32>) outs(%165 : tensor<512x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<512x512xf32> | |
| %collapsed_202 = tensor.collapse_shape %632 [[0], [1, 2]] : tensor<1x1x512xf32> into tensor<1x512xf32> | |
| %634 = linalg.matmul ins(%collapsed_202, %633 : tensor<1x512xf32>, tensor<512x512xf32>) outs(%168 : tensor<1x512xf32>) -> tensor<1x512xf32> | |
| %635 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%136 : tensor<512x512xf32>) outs(%165 : tensor<512x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<512x512xf32> | |
| %636 = linalg.matmul ins(%collapsed_202, %635 : tensor<1x512xf32>, tensor<512x512xf32>) outs(%168 : tensor<1x512xf32>) -> tensor<1x512xf32> | |
| %637 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%137 : tensor<512x512xf32>) outs(%165 : tensor<512x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<512x512xf32> | |
| %638 = linalg.matmul ins(%collapsed_202, %637 : tensor<1x512xf32>, tensor<512x512xf32>) outs(%168 : tensor<1x512xf32>) -> tensor<1x512xf32> | |
| %expanded_203 = tensor.expand_shape %638 [[0], [1, 2, 3]] : tensor<1x512xf32> into tensor<1x1x8x64xf32> | |
| %expanded_204 = tensor.expand_shape %634 [[0], [1, 2, 3, 4]] : tensor<1x512xf32> into tensor<1x1x8x32x2xf32> | |
| %639 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} outs(%174 : tensor<1x1x8x32xcomplex<f64>>) { | |
| ^bb0(%out: complex<f64>): | |
| %788 = linalg.index 0 : index | |
| %789 = linalg.index 1 : index | |
| %790 = linalg.index 2 : index | |
| %791 = linalg.index 3 : index | |
| %extracted = tensor.extract %expanded_204[%788, %789, %790, %791, %c0] : tensor<1x1x8x32x2xf32> | |
| %extracted_269 = tensor.extract %expanded_204[%788, %789, %790, %791, %c1] : tensor<1x1x8x32x2xf32> | |
| %792 = arith.extf %extracted : f32 to f64 | |
| %793 = arith.extf %extracted_269 : f32 to f64 | |
| %794 = complex.create %792, %793 : complex<f64> | |
| linalg.yield %794 : complex<f64> | |
| } -> tensor<1x1x8x32xcomplex<f64>> | |
| %expanded_205 = tensor.expand_shape %636 [[0], [1, 2, 3, 4]] : tensor<1x512xf32> into tensor<1x1x8x32x2xf32> | |
| %640 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} outs(%174 : tensor<1x1x8x32xcomplex<f64>>) { | |
| ^bb0(%out: complex<f64>): | |
| %788 = linalg.index 0 : index | |
| %789 = linalg.index 1 : index | |
| %790 = linalg.index 2 : index | |
| %791 = linalg.index 3 : index | |
| %extracted = tensor.extract %expanded_205[%788, %789, %790, %791, %c0] : tensor<1x1x8x32x2xf32> | |
| %extracted_269 = tensor.extract %expanded_205[%788, %789, %790, %791, %c1] : tensor<1x1x8x32x2xf32> | |
| %792 = arith.extf %extracted : f32 to f64 | |
| %793 = arith.extf %extracted_269 : f32 to f64 | |
| %794 = complex.create %792, %793 : complex<f64> | |
| linalg.yield %794 : complex<f64> | |
| } -> tensor<1x1x8x32xcomplex<f64>> | |
| %641 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, 0, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (0, 0, 0, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%639, %expanded_8 : tensor<1x1x8x32xcomplex<f64>>, tensor<1x1x1x32xcomplex<f64>>) outs(%174 : tensor<1x1x8x32xcomplex<f64>>) { | |
| ^bb0(%in: complex<f64>, %in_269: complex<f64>, %out: complex<f64>): | |
| %788 = complex.mul %in, %in_269 : complex<f64> | |
| linalg.yield %788 : complex<f64> | |
| } -> tensor<1x1x8x32xcomplex<f64>> | |
| %642 = torch_c.from_builtin_tensor %641 : tensor<1x1x8x32xcomplex<f64>> -> !torch.vtensor<[1,1,8,32],complex<f64>> | |
| %643 = torch.aten.view_as_real %642 : !torch.vtensor<[1,1,8,32],complex<f64>> -> !torch.vtensor<[1,1,8,32,2],f32> | |
| %644 = torch_c.to_builtin_tensor %643 : !torch.vtensor<[1,1,8,32,2],f32> -> tensor<1x1x8x32x2xf32> | |
| %extracted_slice_206 = tensor.extract_slice %644[0, 0, 0, 0, 0] [1, 1, 8, 32, 1] [1, 1, 1, 1, 1] : tensor<1x1x8x32x2xf32> to tensor<1x1x8x32x1xf32> | |
| %645 = torch.aten.view_as_real %642 : !torch.vtensor<[1,1,8,32],complex<f64>> -> !torch.vtensor<[1,1,8,32,2],f32> | |
| %646 = torch_c.to_builtin_tensor %645 : !torch.vtensor<[1,1,8,32,2],f32> -> tensor<1x1x8x32x2xf32> | |
| %extracted_slice_207 = tensor.extract_slice %646[0, 0, 0, 0, 1] [1, 1, 8, 32, 1] [1, 1, 1, 1, 1] : tensor<1x1x8x32x2xf32> to tensor<1x1x8x32x1xf32> | |
| %collapsed_208 = tensor.collapse_shape %extracted_slice_206 [[0, 1, 2, 3], [4]] : tensor<1x1x8x32x1xf32> into tensor<256x1xf32> | |
| %647 = torch_c.from_builtin_tensor %collapsed_208 : tensor<256x1xf32> -> !torch.vtensor<[256,1],f32> | |
| %collapsed_209 = tensor.collapse_shape %extracted_slice_207 [[0, 1, 2, 3], [4]] : tensor<1x1x8x32x1xf32> into tensor<256x1xf32> | |
| %648 = torch_c.from_builtin_tensor %collapsed_209 : tensor<256x1xf32> -> !torch.vtensor<[256,1],f32> | |
| %649 = torch_c.to_builtin_tensor %647 : !torch.vtensor<[256,1],f32> -> tensor<256x1xf32> | |
| %650 = torch_c.to_builtin_tensor %648 : !torch.vtensor<[256,1],f32> -> tensor<256x1xf32> | |
| %inserted_slice_210 = tensor.insert_slice %649 into %189[0, 0] [256, 1] [1, 1] : tensor<256x1xf32> into tensor<256x2xf32> | |
| %inserted_slice_211 = tensor.insert_slice %650 into %inserted_slice_210[0, 1] [256, 1] [1, 1] : tensor<256x1xf32> into tensor<256x2xf32> | |
| %expanded_212 = tensor.expand_shape %inserted_slice_211 [[0, 1, 2, 3], [4]] : tensor<256x2xf32> into tensor<1x1x8x32x2xf32> | |
| %collapsed_213 = tensor.collapse_shape %expanded_212 [[0], [1], [2], [3, 4]] : tensor<1x1x8x32x2xf32> into tensor<1x1x8x64xf32> | |
| %651 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, 0, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (0, 0, 0, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%640, %expanded_8 : tensor<1x1x8x32xcomplex<f64>>, tensor<1x1x1x32xcomplex<f64>>) outs(%174 : tensor<1x1x8x32xcomplex<f64>>) { | |
| ^bb0(%in: complex<f64>, %in_269: complex<f64>, %out: complex<f64>): | |
| %788 = complex.mul %in, %in_269 : complex<f64> | |
| linalg.yield %788 : complex<f64> | |
| } -> tensor<1x1x8x32xcomplex<f64>> | |
| %652 = torch_c.from_builtin_tensor %651 : tensor<1x1x8x32xcomplex<f64>> -> !torch.vtensor<[1,1,8,32],complex<f64>> | |
| %653 = torch.aten.view_as_real %652 : !torch.vtensor<[1,1,8,32],complex<f64>> -> !torch.vtensor<[1,1,8,32,2],f32> | |
| %654 = torch_c.to_builtin_tensor %653 : !torch.vtensor<[1,1,8,32,2],f32> -> tensor<1x1x8x32x2xf32> | |
| %extracted_slice_214 = tensor.extract_slice %654[0, 0, 0, 0, 0] [1, 1, 8, 32, 1] [1, 1, 1, 1, 1] : tensor<1x1x8x32x2xf32> to tensor<1x1x8x32x1xf32> | |
| %655 = torch.aten.view_as_real %652 : !torch.vtensor<[1,1,8,32],complex<f64>> -> !torch.vtensor<[1,1,8,32,2],f32> | |
| %656 = torch_c.to_builtin_tensor %655 : !torch.vtensor<[1,1,8,32,2],f32> -> tensor<1x1x8x32x2xf32> | |
| %extracted_slice_215 = tensor.extract_slice %656[0, 0, 0, 0, 1] [1, 1, 8, 32, 1] [1, 1, 1, 1, 1] : tensor<1x1x8x32x2xf32> to tensor<1x1x8x32x1xf32> | |
| %collapsed_216 = tensor.collapse_shape %extracted_slice_214 [[0, 1, 2, 3], [4]] : tensor<1x1x8x32x1xf32> into tensor<256x1xf32> | |
| %657 = torch_c.from_builtin_tensor %collapsed_216 : tensor<256x1xf32> -> !torch.vtensor<[256,1],f32> | |
| %collapsed_217 = tensor.collapse_shape %extracted_slice_215 [[0, 1, 2, 3], [4]] : tensor<1x1x8x32x1xf32> into tensor<256x1xf32> | |
| %658 = torch_c.from_builtin_tensor %collapsed_217 : tensor<256x1xf32> -> !torch.vtensor<[256,1],f32> | |
| %659 = torch_c.to_builtin_tensor %657 : !torch.vtensor<[256,1],f32> -> tensor<256x1xf32> | |
| %660 = torch_c.to_builtin_tensor %658 : !torch.vtensor<[256,1],f32> -> tensor<256x1xf32> | |
| %inserted_slice_218 = tensor.insert_slice %659 into %189[0, 0] [256, 1] [1, 1] : tensor<256x1xf32> into tensor<256x2xf32> | |
| %inserted_slice_219 = tensor.insert_slice %660 into %inserted_slice_218[0, 1] [256, 1] [1, 1] : tensor<256x1xf32> into tensor<256x2xf32> | |
| %expanded_220 = tensor.expand_shape %inserted_slice_219 [[0, 1, 2, 3], [4]] : tensor<256x2xf32> into tensor<1x1x8x32x2xf32> | |
| %collapsed_221 = tensor.collapse_shape %expanded_220 [[0], [1], [2], [3, 4]] : tensor<1x1x8x32x2xf32> into tensor<1x1x8x64xf32> | |
| %661 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, 0, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%collapsed_221 : tensor<1x1x8x64xf32>) outs(%200 : tensor<1x1x8x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x1x8x64xf32> | |
| %662 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, 0, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_203 : tensor<1x1x8x64xf32>) outs(%200 : tensor<1x1x8x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x1x8x64xf32> | |
| %663 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d2, d1, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%collapsed_213 : tensor<1x1x8x64xf32>) outs(%203 : tensor<1x8x1x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x1x64xf32> | |
| %664 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, 0, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%663 : tensor<1x8x1x64xf32>) outs(%203 : tensor<1x8x1x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x1x64xf32> | |
| %collapsed_222 = tensor.collapse_shape %664 [[0, 1], [2], [3]] : tensor<1x8x1x64xf32> into tensor<8x1x64xf32> | |
| %665 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d2, d1, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%661 : tensor<1x1x8x64xf32>) outs(%203 : tensor<1x8x1x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x1x64xf32> | |
| %666 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d3, d2)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%665 : tensor<1x8x1x64xf32>) outs(%207 : tensor<1x8x64x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x64x1xf32> | |
| %667 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, d2, 0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%666 : tensor<1x8x64x1xf32>) outs(%207 : tensor<1x8x64x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x64x1xf32> | |
| %collapsed_223 = tensor.collapse_shape %667 [[0, 1], [2], [3]] : tensor<1x8x64x1xf32> into tensor<8x64x1xf32> | |
| %668 = linalg.batch_matmul ins(%collapsed_222, %collapsed_223 : tensor<8x1x64xf32>, tensor<8x64x1xf32>) outs(%211 : tensor<8x1x1xf32>) -> tensor<8x1x1xf32> | |
| %expanded_224 = tensor.expand_shape %668 [[0, 1], [2], [3]] : tensor<8x1x1xf32> into tensor<1x8x1x1xf32> | |
| %669 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_224 : tensor<1x8x1x1xf32>) outs(%213 : tensor<1x8x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.divf %in, %cst_5 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x8x1x1xf32> | |
| %670:2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, 0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, 0)>], iterator_types = ["parallel", "parallel", "parallel", "reduction"]} ins(%669 : tensor<1x8x1x1xf32>) outs(%217, %216 : tensor<1x8x1x1xf32>, tensor<1x8x1x1xi64>) { | |
| ^bb0(%in: f32, %out: f32, %out_269: i64): | |
| %788 = linalg.index 3 : index | |
| %789 = arith.index_cast %788 : index to i64 | |
| %790 = arith.maxf %in, %out : f32 | |
| %791 = arith.cmpf ogt, %in, %out : f32 | |
| %792 = arith.select %791, %789, %out_269 : i64 | |
| linalg.yield %790, %792 : f32, i64 | |
| } -> (tensor<1x8x1x1xf32>, tensor<1x8x1x1xi64>) | |
| %671 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>, affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%669, %670#0 : tensor<1x8x1x1xf32>, tensor<1x8x1x1xf32>) outs(%213 : tensor<1x8x1x1xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.subf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x8x1x1xf32> | |
| %672 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%671 : tensor<1x8x1x1xf32>) outs(%213 : tensor<1x8x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = math.exp %in : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x8x1x1xf32> | |
| %673 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, 0)>], iterator_types = ["parallel", "parallel", "parallel", "reduction"]} ins(%672 : tensor<1x8x1x1xf32>) outs(%221 : tensor<1x8x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.addf %in, %out : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x8x1x1xf32> | |
| %674 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>, affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%672, %673 : tensor<1x8x1x1xf32>, tensor<1x8x1x1xf32>) outs(%213 : tensor<1x8x1x1xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.divf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x8x1x1xf32> | |
| %675 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%674 : tensor<1x8x1x1xf32>) outs(%213 : tensor<1x8x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x1x1xf32> | |
| %collapsed_225 = tensor.collapse_shape %675 [[0, 1], [2], [3]] : tensor<1x8x1x1xf32> into tensor<8x1x1xf32> | |
| %676 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d2, d1, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%662 : tensor<1x1x8x64xf32>) outs(%203 : tensor<1x8x1x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x1x64xf32> | |
| %677 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, 0, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%676 : tensor<1x8x1x64xf32>) outs(%203 : tensor<1x8x1x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x1x64xf32> | |
| %collapsed_226 = tensor.collapse_shape %677 [[0, 1], [2], [3]] : tensor<1x8x1x64xf32> into tensor<8x1x64xf32> | |
| %678 = linalg.batch_matmul ins(%collapsed_225, %collapsed_226 : tensor<8x1x1xf32>, tensor<8x1x64xf32>) outs(%228 : tensor<8x1x64xf32>) -> tensor<8x1x64xf32> | |
| %expanded_227 = tensor.expand_shape %678 [[0, 1], [2], [3]] : tensor<8x1x64xf32> into tensor<1x8x1x64xf32> | |
| %679 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d2, d1, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_227 : tensor<1x8x1x64xf32>) outs(%200 : tensor<1x1x8x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x1x8x64xf32> | |
| %680 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%138 : tensor<512x512xf32>) outs(%165 : tensor<512x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<512x512xf32> | |
| %collapsed_228 = tensor.collapse_shape %679 [[0], [1, 2, 3]] : tensor<1x1x8x64xf32> into tensor<1x512xf32> | |
| %681 = linalg.matmul ins(%collapsed_228, %680 : tensor<1x512xf32>, tensor<512x512xf32>) outs(%168 : tensor<1x512xf32>) -> tensor<1x512xf32> | |
| %expanded_229 = tensor.expand_shape %681 [[0], [1, 2]] : tensor<1x512xf32> into tensor<1x1x512xf32> | |
| %682 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%625, %expanded_229 : tensor<1x1x512xf32>, tensor<1x1x512xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.addf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %683 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%682 : tensor<1x1x512xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = math.powf %in, %cst_3 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %684 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, 0)>], iterator_types = ["parallel", "parallel", "reduction"]} ins(%683 : tensor<1x1x512xf32>) outs(%158 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.addf %in, %out : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %685 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%684 : tensor<1x1x1xf32>) outs(%157 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.divf %in, %cst_4 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %686 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%685 : tensor<1x1x1xf32>) outs(%157 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.truncf %cst_2 : f64 to f32 | |
| %789 = arith.addf %in, %788 : f32 | |
| linalg.yield %789 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %687 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%686 : tensor<1x1x1xf32>) outs(%157 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = math.rsqrt %in : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %688 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%682, %687 : tensor<1x1x512xf32>, tensor<1x1x1xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.mulf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %689 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%688, %139 : tensor<1x1x512xf32>, tensor<512xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.mulf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %690 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%140 : tensor<1536x512xf32>) outs(%241 : tensor<512x1536xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<512x1536xf32> | |
| %collapsed_230 = tensor.collapse_shape %689 [[0], [1, 2]] : tensor<1x1x512xf32> into tensor<1x512xf32> | |
| %691 = linalg.matmul ins(%collapsed_230, %690 : tensor<1x512xf32>, tensor<512x1536xf32>) outs(%244 : tensor<1x1536xf32>) -> tensor<1x1536xf32> | |
| %expanded_231 = tensor.expand_shape %691 [[0], [1, 2]] : tensor<1x1536xf32> into tensor<1x1x1536xf32> | |
| %692 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%expanded_231 : tensor<1x1x1536xf32>) outs(%246 : tensor<1x1x1536xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.negf %in : f32 | |
| %789 = math.exp %788 : f32 | |
| %790 = arith.addf %789, %cst_1 : f32 | |
| %791 = arith.divf %cst_1, %790 : f32 | |
| linalg.yield %791 : f32 | |
| } -> tensor<1x1x1536xf32> | |
| %693 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%692, %expanded_231 : tensor<1x1x1536xf32>, tensor<1x1x1536xf32>) outs(%246 : tensor<1x1x1536xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.mulf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1536xf32> | |
| %694 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%141 : tensor<1536x512xf32>) outs(%241 : tensor<512x1536xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<512x1536xf32> | |
| %695 = linalg.matmul ins(%collapsed_230, %694 : tensor<1x512xf32>, tensor<512x1536xf32>) outs(%244 : tensor<1x1536xf32>) -> tensor<1x1536xf32> | |
| %expanded_232 = tensor.expand_shape %695 [[0], [1, 2]] : tensor<1x1536xf32> into tensor<1x1x1536xf32> | |
| %696 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%693, %expanded_232 : tensor<1x1x1536xf32>, tensor<1x1x1536xf32>) outs(%246 : tensor<1x1x1536xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.mulf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1536xf32> | |
| %697 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%142 : tensor<512x1536xf32>) outs(%252 : tensor<1536x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1536x512xf32> | |
| %collapsed_233 = tensor.collapse_shape %696 [[0], [1, 2]] : tensor<1x1x1536xf32> into tensor<1x1536xf32> | |
| %698 = linalg.matmul ins(%collapsed_233, %697 : tensor<1x1536xf32>, tensor<1536x512xf32>) outs(%168 : tensor<1x512xf32>) -> tensor<1x512xf32> | |
| %expanded_234 = tensor.expand_shape %698 [[0], [1, 2]] : tensor<1x512xf32> into tensor<1x1x512xf32> | |
| %699 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%682, %expanded_234 : tensor<1x1x512xf32>, tensor<1x1x512xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.addf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %700 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%699 : tensor<1x1x512xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = math.powf %in, %cst_3 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %701 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, 0)>], iterator_types = ["parallel", "parallel", "reduction"]} ins(%700 : tensor<1x1x512xf32>) outs(%158 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.addf %in, %out : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %702 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%701 : tensor<1x1x1xf32>) outs(%157 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.divf %in, %cst_4 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %703 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%702 : tensor<1x1x1xf32>) outs(%157 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.truncf %cst_2 : f64 to f32 | |
| %789 = arith.addf %in, %788 : f32 | |
| linalg.yield %789 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %704 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%703 : tensor<1x1x1xf32>) outs(%157 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = math.rsqrt %in : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %705 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%699, %704 : tensor<1x1x512xf32>, tensor<1x1x1xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.mulf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %706 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%705, %143 : tensor<1x1x512xf32>, tensor<512xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.mulf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %707 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%144 : tensor<512x512xf32>) outs(%165 : tensor<512x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<512x512xf32> | |
| %collapsed_235 = tensor.collapse_shape %706 [[0], [1, 2]] : tensor<1x1x512xf32> into tensor<1x512xf32> | |
| %708 = linalg.matmul ins(%collapsed_235, %707 : tensor<1x512xf32>, tensor<512x512xf32>) outs(%168 : tensor<1x512xf32>) -> tensor<1x512xf32> | |
| %709 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%145 : tensor<512x512xf32>) outs(%165 : tensor<512x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<512x512xf32> | |
| %710 = linalg.matmul ins(%collapsed_235, %709 : tensor<1x512xf32>, tensor<512x512xf32>) outs(%168 : tensor<1x512xf32>) -> tensor<1x512xf32> | |
| %711 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%146 : tensor<512x512xf32>) outs(%165 : tensor<512x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<512x512xf32> | |
| %712 = linalg.matmul ins(%collapsed_235, %711 : tensor<1x512xf32>, tensor<512x512xf32>) outs(%168 : tensor<1x512xf32>) -> tensor<1x512xf32> | |
| %expanded_236 = tensor.expand_shape %712 [[0], [1, 2, 3]] : tensor<1x512xf32> into tensor<1x1x8x64xf32> | |
| %expanded_237 = tensor.expand_shape %708 [[0], [1, 2, 3, 4]] : tensor<1x512xf32> into tensor<1x1x8x32x2xf32> | |
| %713 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} outs(%174 : tensor<1x1x8x32xcomplex<f64>>) { | |
| ^bb0(%out: complex<f64>): | |
| %788 = linalg.index 0 : index | |
| %789 = linalg.index 1 : index | |
| %790 = linalg.index 2 : index | |
| %791 = linalg.index 3 : index | |
| %extracted = tensor.extract %expanded_237[%788, %789, %790, %791, %c0] : tensor<1x1x8x32x2xf32> | |
| %extracted_269 = tensor.extract %expanded_237[%788, %789, %790, %791, %c1] : tensor<1x1x8x32x2xf32> | |
| %792 = arith.extf %extracted : f32 to f64 | |
| %793 = arith.extf %extracted_269 : f32 to f64 | |
| %794 = complex.create %792, %793 : complex<f64> | |
| linalg.yield %794 : complex<f64> | |
| } -> tensor<1x1x8x32xcomplex<f64>> | |
| %expanded_238 = tensor.expand_shape %710 [[0], [1, 2, 3, 4]] : tensor<1x512xf32> into tensor<1x1x8x32x2xf32> | |
| %714 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} outs(%174 : tensor<1x1x8x32xcomplex<f64>>) { | |
| ^bb0(%out: complex<f64>): | |
| %788 = linalg.index 0 : index | |
| %789 = linalg.index 1 : index | |
| %790 = linalg.index 2 : index | |
| %791 = linalg.index 3 : index | |
| %extracted = tensor.extract %expanded_238[%788, %789, %790, %791, %c0] : tensor<1x1x8x32x2xf32> | |
| %extracted_269 = tensor.extract %expanded_238[%788, %789, %790, %791, %c1] : tensor<1x1x8x32x2xf32> | |
| %792 = arith.extf %extracted : f32 to f64 | |
| %793 = arith.extf %extracted_269 : f32 to f64 | |
| %794 = complex.create %792, %793 : complex<f64> | |
| linalg.yield %794 : complex<f64> | |
| } -> tensor<1x1x8x32xcomplex<f64>> | |
| %715 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, 0, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (0, 0, 0, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%713, %expanded_8 : tensor<1x1x8x32xcomplex<f64>>, tensor<1x1x1x32xcomplex<f64>>) outs(%174 : tensor<1x1x8x32xcomplex<f64>>) { | |
| ^bb0(%in: complex<f64>, %in_269: complex<f64>, %out: complex<f64>): | |
| %788 = complex.mul %in, %in_269 : complex<f64> | |
| linalg.yield %788 : complex<f64> | |
| } -> tensor<1x1x8x32xcomplex<f64>> | |
| %716 = torch_c.from_builtin_tensor %715 : tensor<1x1x8x32xcomplex<f64>> -> !torch.vtensor<[1,1,8,32],complex<f64>> | |
| %717 = torch.aten.view_as_real %716 : !torch.vtensor<[1,1,8,32],complex<f64>> -> !torch.vtensor<[1,1,8,32,2],f32> | |
| %718 = torch_c.to_builtin_tensor %717 : !torch.vtensor<[1,1,8,32,2],f32> -> tensor<1x1x8x32x2xf32> | |
| %extracted_slice_239 = tensor.extract_slice %718[0, 0, 0, 0, 0] [1, 1, 8, 32, 1] [1, 1, 1, 1, 1] : tensor<1x1x8x32x2xf32> to tensor<1x1x8x32x1xf32> | |
| %719 = torch.aten.view_as_real %716 : !torch.vtensor<[1,1,8,32],complex<f64>> -> !torch.vtensor<[1,1,8,32,2],f32> | |
| %720 = torch_c.to_builtin_tensor %719 : !torch.vtensor<[1,1,8,32,2],f32> -> tensor<1x1x8x32x2xf32> | |
| %extracted_slice_240 = tensor.extract_slice %720[0, 0, 0, 0, 1] [1, 1, 8, 32, 1] [1, 1, 1, 1, 1] : tensor<1x1x8x32x2xf32> to tensor<1x1x8x32x1xf32> | |
| %collapsed_241 = tensor.collapse_shape %extracted_slice_239 [[0, 1, 2, 3], [4]] : tensor<1x1x8x32x1xf32> into tensor<256x1xf32> | |
| %721 = torch_c.from_builtin_tensor %collapsed_241 : tensor<256x1xf32> -> !torch.vtensor<[256,1],f32> | |
| %collapsed_242 = tensor.collapse_shape %extracted_slice_240 [[0, 1, 2, 3], [4]] : tensor<1x1x8x32x1xf32> into tensor<256x1xf32> | |
| %722 = torch_c.from_builtin_tensor %collapsed_242 : tensor<256x1xf32> -> !torch.vtensor<[256,1],f32> | |
| %723 = torch_c.to_builtin_tensor %721 : !torch.vtensor<[256,1],f32> -> tensor<256x1xf32> | |
| %724 = torch_c.to_builtin_tensor %722 : !torch.vtensor<[256,1],f32> -> tensor<256x1xf32> | |
| %inserted_slice_243 = tensor.insert_slice %723 into %189[0, 0] [256, 1] [1, 1] : tensor<256x1xf32> into tensor<256x2xf32> | |
| %inserted_slice_244 = tensor.insert_slice %724 into %inserted_slice_243[0, 1] [256, 1] [1, 1] : tensor<256x1xf32> into tensor<256x2xf32> | |
| %expanded_245 = tensor.expand_shape %inserted_slice_244 [[0, 1, 2, 3], [4]] : tensor<256x2xf32> into tensor<1x1x8x32x2xf32> | |
| %collapsed_246 = tensor.collapse_shape %expanded_245 [[0], [1], [2], [3, 4]] : tensor<1x1x8x32x2xf32> into tensor<1x1x8x64xf32> | |
| %725 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, 0, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (0, 0, 0, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%714, %expanded_8 : tensor<1x1x8x32xcomplex<f64>>, tensor<1x1x1x32xcomplex<f64>>) outs(%174 : tensor<1x1x8x32xcomplex<f64>>) { | |
| ^bb0(%in: complex<f64>, %in_269: complex<f64>, %out: complex<f64>): | |
| %788 = complex.mul %in, %in_269 : complex<f64> | |
| linalg.yield %788 : complex<f64> | |
| } -> tensor<1x1x8x32xcomplex<f64>> | |
| %726 = torch_c.from_builtin_tensor %725 : tensor<1x1x8x32xcomplex<f64>> -> !torch.vtensor<[1,1,8,32],complex<f64>> | |
| %727 = torch.aten.view_as_real %726 : !torch.vtensor<[1,1,8,32],complex<f64>> -> !torch.vtensor<[1,1,8,32,2],f32> | |
| %728 = torch_c.to_builtin_tensor %727 : !torch.vtensor<[1,1,8,32,2],f32> -> tensor<1x1x8x32x2xf32> | |
| %extracted_slice_247 = tensor.extract_slice %728[0, 0, 0, 0, 0] [1, 1, 8, 32, 1] [1, 1, 1, 1, 1] : tensor<1x1x8x32x2xf32> to tensor<1x1x8x32x1xf32> | |
| %729 = torch.aten.view_as_real %726 : !torch.vtensor<[1,1,8,32],complex<f64>> -> !torch.vtensor<[1,1,8,32,2],f32> | |
| %730 = torch_c.to_builtin_tensor %729 : !torch.vtensor<[1,1,8,32,2],f32> -> tensor<1x1x8x32x2xf32> | |
| %extracted_slice_248 = tensor.extract_slice %730[0, 0, 0, 0, 1] [1, 1, 8, 32, 1] [1, 1, 1, 1, 1] : tensor<1x1x8x32x2xf32> to tensor<1x1x8x32x1xf32> | |
| %collapsed_249 = tensor.collapse_shape %extracted_slice_247 [[0, 1, 2, 3], [4]] : tensor<1x1x8x32x1xf32> into tensor<256x1xf32> | |
| %731 = torch_c.from_builtin_tensor %collapsed_249 : tensor<256x1xf32> -> !torch.vtensor<[256,1],f32> | |
| %collapsed_250 = tensor.collapse_shape %extracted_slice_248 [[0, 1, 2, 3], [4]] : tensor<1x1x8x32x1xf32> into tensor<256x1xf32> | |
| %732 = torch_c.from_builtin_tensor %collapsed_250 : tensor<256x1xf32> -> !torch.vtensor<[256,1],f32> | |
| %733 = torch_c.to_builtin_tensor %731 : !torch.vtensor<[256,1],f32> -> tensor<256x1xf32> | |
| %734 = torch_c.to_builtin_tensor %732 : !torch.vtensor<[256,1],f32> -> tensor<256x1xf32> | |
| %inserted_slice_251 = tensor.insert_slice %733 into %189[0, 0] [256, 1] [1, 1] : tensor<256x1xf32> into tensor<256x2xf32> | |
| %inserted_slice_252 = tensor.insert_slice %734 into %inserted_slice_251[0, 1] [256, 1] [1, 1] : tensor<256x1xf32> into tensor<256x2xf32> | |
| %expanded_253 = tensor.expand_shape %inserted_slice_252 [[0, 1, 2, 3], [4]] : tensor<256x2xf32> into tensor<1x1x8x32x2xf32> | |
| %collapsed_254 = tensor.collapse_shape %expanded_253 [[0], [1], [2], [3, 4]] : tensor<1x1x8x32x2xf32> into tensor<1x1x8x64xf32> | |
| %735 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, 0, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%collapsed_254 : tensor<1x1x8x64xf32>) outs(%200 : tensor<1x1x8x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x1x8x64xf32> | |
| %736 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, 0, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_236 : tensor<1x1x8x64xf32>) outs(%200 : tensor<1x1x8x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x1x8x64xf32> | |
| %737 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d2, d1, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%collapsed_246 : tensor<1x1x8x64xf32>) outs(%203 : tensor<1x8x1x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x1x64xf32> | |
| %738 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, 0, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%737 : tensor<1x8x1x64xf32>) outs(%203 : tensor<1x8x1x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x1x64xf32> | |
| %collapsed_255 = tensor.collapse_shape %738 [[0, 1], [2], [3]] : tensor<1x8x1x64xf32> into tensor<8x1x64xf32> | |
| %739 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d2, d1, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%735 : tensor<1x1x8x64xf32>) outs(%203 : tensor<1x8x1x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x1x64xf32> | |
| %740 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d3, d2)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%739 : tensor<1x8x1x64xf32>) outs(%207 : tensor<1x8x64x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x64x1xf32> | |
| %741 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, d2, 0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%740 : tensor<1x8x64x1xf32>) outs(%207 : tensor<1x8x64x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x64x1xf32> | |
| %collapsed_256 = tensor.collapse_shape %741 [[0, 1], [2], [3]] : tensor<1x8x64x1xf32> into tensor<8x64x1xf32> | |
| %742 = linalg.batch_matmul ins(%collapsed_255, %collapsed_256 : tensor<8x1x64xf32>, tensor<8x64x1xf32>) outs(%211 : tensor<8x1x1xf32>) -> tensor<8x1x1xf32> | |
| %expanded_257 = tensor.expand_shape %742 [[0, 1], [2], [3]] : tensor<8x1x1xf32> into tensor<1x8x1x1xf32> | |
| %743 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_257 : tensor<1x8x1x1xf32>) outs(%213 : tensor<1x8x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.divf %in, %cst_5 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x8x1x1xf32> | |
| %744:2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, 0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, 0)>], iterator_types = ["parallel", "parallel", "parallel", "reduction"]} ins(%743 : tensor<1x8x1x1xf32>) outs(%217, %216 : tensor<1x8x1x1xf32>, tensor<1x8x1x1xi64>) { | |
| ^bb0(%in: f32, %out: f32, %out_269: i64): | |
| %788 = linalg.index 3 : index | |
| %789 = arith.index_cast %788 : index to i64 | |
| %790 = arith.maxf %in, %out : f32 | |
| %791 = arith.cmpf ogt, %in, %out : f32 | |
| %792 = arith.select %791, %789, %out_269 : i64 | |
| linalg.yield %790, %792 : f32, i64 | |
| } -> (tensor<1x8x1x1xf32>, tensor<1x8x1x1xi64>) | |
| %745 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>, affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%743, %744#0 : tensor<1x8x1x1xf32>, tensor<1x8x1x1xf32>) outs(%213 : tensor<1x8x1x1xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.subf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x8x1x1xf32> | |
| %746 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%745 : tensor<1x8x1x1xf32>) outs(%213 : tensor<1x8x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = math.exp %in : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x8x1x1xf32> | |
| %747 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, 0)>], iterator_types = ["parallel", "parallel", "parallel", "reduction"]} ins(%746 : tensor<1x8x1x1xf32>) outs(%221 : tensor<1x8x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.addf %in, %out : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x8x1x1xf32> | |
| %748 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>, affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%746, %747 : tensor<1x8x1x1xf32>, tensor<1x8x1x1xf32>) outs(%213 : tensor<1x8x1x1xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.divf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x8x1x1xf32> | |
| %749 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, 0, 0)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%748 : tensor<1x8x1x1xf32>) outs(%213 : tensor<1x8x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x1x1xf32> | |
| %collapsed_258 = tensor.collapse_shape %749 [[0, 1], [2], [3]] : tensor<1x8x1x1xf32> into tensor<8x1x1xf32> | |
| %750 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d2, d1, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%736 : tensor<1x1x8x64xf32>) outs(%203 : tensor<1x8x1x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x1x64xf32> | |
| %751 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (0, d1, 0, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%750 : tensor<1x8x1x64xf32>) outs(%203 : tensor<1x8x1x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x8x1x64xf32> | |
| %collapsed_259 = tensor.collapse_shape %751 [[0, 1], [2], [3]] : tensor<1x8x1x64xf32> into tensor<8x1x64xf32> | |
| %752 = linalg.batch_matmul ins(%collapsed_258, %collapsed_259 : tensor<8x1x1xf32>, tensor<8x1x64xf32>) outs(%228 : tensor<8x1x64xf32>) -> tensor<8x1x64xf32> | |
| %expanded_260 = tensor.expand_shape %752 [[0, 1], [2], [3]] : tensor<8x1x64xf32> into tensor<1x8x1x64xf32> | |
| %753 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d2, d1, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%expanded_260 : tensor<1x8x1x64xf32>) outs(%200 : tensor<1x1x8x64xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1x1x8x64xf32> | |
| %754 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%147 : tensor<512x512xf32>) outs(%165 : tensor<512x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<512x512xf32> | |
| %collapsed_261 = tensor.collapse_shape %753 [[0], [1, 2, 3]] : tensor<1x1x8x64xf32> into tensor<1x512xf32> | |
| %755 = linalg.matmul ins(%collapsed_261, %754 : tensor<1x512xf32>, tensor<512x512xf32>) outs(%168 : tensor<1x512xf32>) -> tensor<1x512xf32> | |
| %expanded_262 = tensor.expand_shape %755 [[0], [1, 2]] : tensor<1x512xf32> into tensor<1x1x512xf32> | |
| %756 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%699, %expanded_262 : tensor<1x1x512xf32>, tensor<1x1x512xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.addf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %757 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%756 : tensor<1x1x512xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = math.powf %in, %cst_3 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %758 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, 0)>], iterator_types = ["parallel", "parallel", "reduction"]} ins(%757 : tensor<1x1x512xf32>) outs(%158 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.addf %in, %out : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %759 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%758 : tensor<1x1x1xf32>) outs(%157 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.divf %in, %cst_4 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %760 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%759 : tensor<1x1x1xf32>) outs(%157 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.truncf %cst_2 : f64 to f32 | |
| %789 = arith.addf %in, %788 : f32 | |
| linalg.yield %789 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %761 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%760 : tensor<1x1x1xf32>) outs(%157 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = math.rsqrt %in : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %762 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%756, %761 : tensor<1x1x512xf32>, tensor<1x1x1xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.mulf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %763 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%762, %148 : tensor<1x1x512xf32>, tensor<512xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.mulf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %764 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%149 : tensor<1536x512xf32>) outs(%241 : tensor<512x1536xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<512x1536xf32> | |
| %collapsed_263 = tensor.collapse_shape %763 [[0], [1, 2]] : tensor<1x1x512xf32> into tensor<1x512xf32> | |
| %765 = linalg.matmul ins(%collapsed_263, %764 : tensor<1x512xf32>, tensor<512x1536xf32>) outs(%244 : tensor<1x1536xf32>) -> tensor<1x1536xf32> | |
| %expanded_264 = tensor.expand_shape %765 [[0], [1, 2]] : tensor<1x1536xf32> into tensor<1x1x1536xf32> | |
| %766 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%expanded_264 : tensor<1x1x1536xf32>) outs(%246 : tensor<1x1x1536xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.negf %in : f32 | |
| %789 = math.exp %788 : f32 | |
| %790 = arith.addf %789, %cst_1 : f32 | |
| %791 = arith.divf %cst_1, %790 : f32 | |
| linalg.yield %791 : f32 | |
| } -> tensor<1x1x1536xf32> | |
| %767 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%766, %expanded_264 : tensor<1x1x1536xf32>, tensor<1x1x1536xf32>) outs(%246 : tensor<1x1x1536xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.mulf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1536xf32> | |
| %768 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%150 : tensor<1536x512xf32>) outs(%241 : tensor<512x1536xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<512x1536xf32> | |
| %769 = linalg.matmul ins(%collapsed_263, %768 : tensor<1x512xf32>, tensor<512x1536xf32>) outs(%244 : tensor<1x1536xf32>) -> tensor<1x1536xf32> | |
| %expanded_265 = tensor.expand_shape %769 [[0], [1, 2]] : tensor<1x1536xf32> into tensor<1x1x1536xf32> | |
| %770 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%767, %expanded_265 : tensor<1x1x1536xf32>, tensor<1x1x1536xf32>) outs(%246 : tensor<1x1x1536xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.mulf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1536xf32> | |
| %771 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%151 : tensor<512x1536xf32>) outs(%252 : tensor<1536x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<1536x512xf32> | |
| %collapsed_266 = tensor.collapse_shape %770 [[0], [1, 2]] : tensor<1x1x1536xf32> into tensor<1x1536xf32> | |
| %772 = linalg.matmul ins(%collapsed_266, %771 : tensor<1x1536xf32>, tensor<1536x512xf32>) outs(%168 : tensor<1x512xf32>) -> tensor<1x512xf32> | |
| %expanded_267 = tensor.expand_shape %772 [[0], [1, 2]] : tensor<1x512xf32> into tensor<1x1x512xf32> | |
| %773 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%756, %expanded_267 : tensor<1x1x512xf32>, tensor<1x1x512xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.addf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %774 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%773 : tensor<1x1x512xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = math.powf %in, %cst_3 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %775 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, 0)>], iterator_types = ["parallel", "parallel", "reduction"]} ins(%774 : tensor<1x1x512xf32>) outs(%158 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.addf %in, %out : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %776 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%775 : tensor<1x1x1xf32>) outs(%157 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.divf %in, %cst_4 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %777 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%776 : tensor<1x1x1xf32>) outs(%157 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = arith.truncf %cst_2 : f64 to f32 | |
| %789 = arith.addf %in, %788 : f32 | |
| linalg.yield %789 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %778 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%777 : tensor<1x1x1xf32>) outs(%157 : tensor<1x1x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| %788 = math.rsqrt %in : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x1xf32> | |
| %779 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (0, 0, 0)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%773, %778 : tensor<1x1x512xf32>, tensor<1x1x1xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.mulf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %780 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (0, 0, d2)>, affine_map<(d0, d1, d2) -> (d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%779, %152 : tensor<1x1x512xf32>, tensor<512xf32>) outs(%154 : tensor<1x1x512xf32>) { | |
| ^bb0(%in: f32, %in_269: f32, %out: f32): | |
| %788 = arith.mulf %in, %in_269 : f32 | |
| linalg.yield %788 : f32 | |
| } -> tensor<1x1x512xf32> | |
| %collapsed_268 = tensor.collapse_shape %780 [[0, 1], [2]] : tensor<1x1x512xf32> into tensor<1x512xf32> | |
| %781 = tensor.empty() : tensor<512x1xf32> | |
| %782 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%153 : tensor<1x512xf32>) outs(%781 : tensor<512x1xf32>) { | |
| ^bb0(%in: f32, %out: f32): | |
| linalg.yield %in : f32 | |
| } -> tensor<512x1xf32> | |
| %783 = tensor.empty() : tensor<1x1xf32> | |
| %784 = linalg.fill ins(%cst : f32) outs(%783 : tensor<1x1xf32>) -> tensor<1x1xf32> | |
| %785 = linalg.matmul ins(%collapsed_268, %782 : tensor<1x512xf32>, tensor<512x1xf32>) outs(%784 : tensor<1x1xf32>) -> tensor<1x1xf32> | |
| %786 = torch_c.from_builtin_tensor %785 : tensor<1x1xf32> -> !torch.vtensor<[1,1],f32> | |
| %787 = torch_c.to_builtin_tensor %786 : !torch.vtensor<[1,1],f32> -> tensor<1x1xf32> | |
| return %787, %arg76 : tensor<1x1xf32>, tensor<2048x32x2xf32> | |
| } | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment