Created
July 19, 2025 17:05
-
-
Save navyxliu/e6c240b2d8b05a99317878b1df772d18 to your computer and use it in GitHub Desktop.
vectorize_scf_for.mlir
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module { | |
func.func @local_sparse_attention_kernel_03_bf(%arg0: i32, %arg1: i32, %arg2: i32, %arg3: i32, %arg4: i32, %arg5: i32, %arg6: i32, %arg7: !tt.ptr<f16>, %arg8: !tt.ptr<f16>, %arg9: !tt.ptr<f16>, %arg10: !tt.ptr<f16>) { | |
%c16_i32 = arith.constant 16 : i32 | |
%cst = arith.constant 0.000000e+00 : f32 | |
%cst_1 = arith.constant 0xFF800000 : f32 | |
%c0_i32 = arith.constant 0 : i32 | |
%c1_i32 = arith.constant 1 : i32 | |
%alloc_5 = memref.alloc() {alignment = 64 : i64} : memref<1x128xf16> | |
%alloc_7 = memref.alloc() {alignment = 64 : i64} : memref<1x128xf32> | |
%32 = arith.addi %arg0, %arg1: i32 | |
%35:4 = scf.for %arg12 = %c0_i32 to %32 step %c1_i32 iter_args(%arg13 = %c16_i32, %arg14 = %cst_1, %arg15 = %cst, %arg16 = %arg3) -> (i32, f32, f32, i32) : i32 { | |
scf.yield %arg13, %arg14, %arg15, %arg16 : i32, f32, f32, i32 | |
} | |
affine.for %arg12 = 0 to 1 { | |
affine.for %arg13 = 0 to 128 { | |
%36 = affine.load %alloc_7[%arg12, %arg13] : memref<1x128xf32> | |
%37 = arith.divf %36, %35#2 : f32 | |
%38 = arith.truncf %37 : f32 to f16 | |
affine.store %38, %alloc_5[%arg12, %arg13] : memref<1x128xf16> | |
} | |
} | |
return | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
mlir-opt -allow-unregistered-dialect --affine-super-vectorize="virtual-vector-size=128" --debug-only=early-vect ./vectorize_scf_for.mlir