Created
April 3, 2025 19:27
-
-
Save bjacob/d77a9992028680fc59c61071d11624ae to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
+ __global__ __launch_bounds__(256) static void run(const void *A_data, | |
+ const void *B_data, | |
+ void *C_data, int M_outer, int N_outer, | |
+ int K_outer) { | |
+ int total_tiles = M_outer * N_outer; | |
+ int cu = blockIdx.x; | |
+ int tile_start = total_tiles * cu / CUs; | |
+ int tile_end = total_tiles * (cu + 1) / CUs; | |
+ int m_outer = tile_start / N_outer; | |
+ int n_outer = tile_start - m_outer * N_outer; | |
+ for (int tile = tile_start; tile < tile_end; ++tile) { | |
+ runTile(A_data, B_data, C_data, M_outer, N_outer, K_outer, m_outer, n_outer); | |
+ if (++n_outer == N_outer) { | |
+ n_outer = 0; | |
+ ++m_outer; | |
+ } | |
+ } | |
+ } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment