Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save Munksgaard/a73d94a323d34394d1785115cb8db2b4 to your computer and use it in GitHub Desktop.
Save Munksgaard/a73d94a323d34394d1785115cb8db2b4 to your computer and use it in GitHub Desktop.
let {bool intra_suff_and_fits_17137} =
logand(fits_14842, suff_intra_par_17136)
-- res_17138 : [j_m_i_13929][j_m_i_13929][16i32][16i32]f32@@res_mem_19455->{base: [j_m_i_13929, j_m_i_13929, 16i32, 16i32]; contiguous: True; LMADs: [{offset: 0i32; strides: [mul32 (256i32) (j_m_i_13929), 256i32, 16i32, 1i32]; rotates: [0i32, 0i32, 0i32,
0i32]; shape: [j_m_i_13929, j_m_i_13929, 16i32, 16i32]; permutation: [0, 1, 2, 3]; monotonicity: [Inc, Inc, Inc, Inc]}]}
let {mem res_mem_19455;
[j_m_i_13929][j_m_i_13929][16i32][16i32]f32 res_17138} =
-- Branch returns: {[j_m_i_13929][j_m_i_13929][16i32][16i32]f32@?0->{base: [j_m_i_13929,
-- j_m_i_13929,
-- 16i32,
-- 16i32];
-- contiguous: True;
-- LMADs: [{offset: 0i32;
-- strides: [mul32 (256i32) (j_m_i_13929), 256i32, 16i32, 1i32];
-- rotates: [0i32, 0i32, 0i32, 0i32];
-- shape: [j_m_i_13929, j_m_i_13929, 16i32, 16i32];
-- permutation: [0, 1, 2, 3];
-- monotonicity: [Inc, Inc, Inc, Inc]}]}}
if suff_outer_par_17127
then {
let {i32 tile_size_18609} =
get_size(tile_size_18608, tile_size)
let {i32 group_size_18610} =
mul32(tile_size_18609, tile_size_18609)
let {i32 y_18611} =
sub32(tile_size_18609, 1i32)
let {i32 x_18612} = add32(16i32, y_18611)
let {i32 num_groups_x_18613} =
squot32(x_18612, tile_size_18609)
let {i32 y_18617} =
mul32(j_m_i_13929, num_groups_x_18613)
let {i32 y_18618} =
mul32(j_m_i_13929, y_18617)
let {i32 num_groups_top_18619} =
mul32(num_groups_x_18613, y_18618)
let {i32 num_whole_tiles_18621} =
squot32(16i32, tile_size_18609)
let {i32 residual_input_18750} =
srem32(16i32, tile_size_18609)
let {bool cond_18751} =
eq_i32(residual_input_18750, 0i32)
let {mem mem_19414} =
alloc(bytes_19490)
let {i64 binop_x_19370} =
sext i32 group_size_18610 to i64
let {i64 bytes_19368} =
mul64(4i64, binop_x_19370)
let {i64 binop_x_19373} =
sext i32 tile_size_18609 to i64
let {i64 binop_x_19375} =
mul64(binop_x_19373, binop_x_19373)
let {i64 bytes_19372} =
mul64(4i64, binop_x_19375)
-- res_17139 : [j_m_i_13929][j_m_i_13929][16i32][16i32]f32@@mem_19414->{base: [j_m_i_13929, j_m_i_13929, 16i32, 16i32]; contiguous: True; LMADs: [{offset: 0i32; strides: [mul32 (256i32) (j_m_i_13929), 256i32, 16i32, 1i32]; rotates: [0i32, 0i32, 0i32,
0i32]; shape: [j_m_i_13929, j_m_i_13929, 16i32, 16i32]; permutation: [0, 1, 2, 3]; monotonicity: [Inc, Inc, Inc, Inc]}]}
let {[j_m_i_13929][j_m_i_13929][16i32][16i32]f32 res_17139} =
segmap_group
(#groups=num_groups_top_18619; groupsize=group_size_18610)
(gtid_16258 < j_m_i_13929,
gtid_16259 < j_m_i_13929,
gid_x_18606 < num_groups_x_18613,
gid_y_18607 < num_groups_x_18613) (~gid_flat_18620) : {f32} {
let {mem@[]f32 mem_19371} =
alloc(bytes_19368, @[]f32)
-- mergeinit_18646 : [tile_size_18609][tile_size_18609]f32@@mem_19371->{base: [tile_size_18609, tile_size_18609]; contiguous: True; LMADs: [{offset: 0i32; strides: [tile_size_18609, 1i32]; rotates: [0i32, 0i32]; shape: [tile_size_18609, tile_size_18609]; permutation: [0, 1]; monotonicity: [Inc, Inc]}]}
let {[tile_size_18609][tile_size_18609]f32 mergeinit_18646} =
segmap_thread
(#groups=num_groups_top_18619; groupsize=group_size_18610)
(ltid_x_18637 < tile_size_18609,
ltid_y_18638 < tile_size_18609) (~ltid_flat_18639) : {f32} {
return {returns (private) 0.0f32}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment