Skip to content

Instantly share code, notes, and snippets.

@bjacob
Created April 3, 2025 19:27
Show Gist options
  • Save bjacob/d77a9992028680fc59c61071d11624ae to your computer and use it in GitHub Desktop.
Save bjacob/d77a9992028680fc59c61071d11624ae to your computer and use it in GitHub Desktop.
+ __global__ __launch_bounds__(256) static void run(const void *A_data,
+ const void *B_data,
+ void *C_data, int M_outer, int N_outer,
+ int K_outer) {
+ int total_tiles = M_outer * N_outer;
+ int cu = blockIdx.x;
+ int tile_start = total_tiles * cu / CUs;
+ int tile_end = total_tiles * (cu + 1) / CUs;
+ int m_outer = tile_start / N_outer;
+ int n_outer = tile_start - m_outer * N_outer;
+ for (int tile = tile_start; tile < tile_end; ++tile) {
+ runTile(A_data, B_data, C_data, M_outer, N_outer, K_outer, m_outer, n_outer);
+ if (++n_outer == N_outer) {
+ n_outer = 0;
+ ++m_outer;
+ }
+ }
+ }
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment