Created
May 5, 2020 13:19
-
-
Save Munksgaard/a73d94a323d34394d1785115cb8db2b4 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
let {bool intra_suff_and_fits_17137} = | |
logand(fits_14842, suff_intra_par_17136) | |
-- res_17138 : [j_m_i_13929][j_m_i_13929][16i32][16i32]f32@@res_mem_19455->{base: [j_m_i_13929, j_m_i_13929, 16i32, 16i32]; contiguous: True; LMADs: [{offset: 0i32; strides: [mul32 (256i32) (j_m_i_13929), 256i32, 16i32, 1i32]; rotates: [0i32, 0i32, 0i32, | |
0i32]; shape: [j_m_i_13929, j_m_i_13929, 16i32, 16i32]; permutation: [0, 1, 2, 3]; monotonicity: [Inc, Inc, Inc, Inc]}]} | |
let {mem res_mem_19455; | |
[j_m_i_13929][j_m_i_13929][16i32][16i32]f32 res_17138} = | |
-- Branch returns: {[j_m_i_13929][j_m_i_13929][16i32][16i32]f32@?0->{base: [j_m_i_13929, | |
-- j_m_i_13929, | |
-- 16i32, | |
-- 16i32]; | |
-- contiguous: True; | |
-- LMADs: [{offset: 0i32; | |
-- strides: [mul32 (256i32) (j_m_i_13929), 256i32, 16i32, 1i32]; | |
-- rotates: [0i32, 0i32, 0i32, 0i32]; | |
-- shape: [j_m_i_13929, j_m_i_13929, 16i32, 16i32]; | |
-- permutation: [0, 1, 2, 3]; | |
-- monotonicity: [Inc, Inc, Inc, Inc]}]}} | |
if suff_outer_par_17127 | |
then { | |
let {i32 tile_size_18609} = | |
get_size(tile_size_18608, tile_size) | |
let {i32 group_size_18610} = | |
mul32(tile_size_18609, tile_size_18609) | |
let {i32 y_18611} = | |
sub32(tile_size_18609, 1i32) | |
let {i32 x_18612} = add32(16i32, y_18611) | |
let {i32 num_groups_x_18613} = | |
squot32(x_18612, tile_size_18609) | |
let {i32 y_18617} = | |
mul32(j_m_i_13929, num_groups_x_18613) | |
let {i32 y_18618} = | |
mul32(j_m_i_13929, y_18617) | |
let {i32 num_groups_top_18619} = | |
mul32(num_groups_x_18613, y_18618) | |
let {i32 num_whole_tiles_18621} = | |
squot32(16i32, tile_size_18609) | |
let {i32 residual_input_18750} = | |
srem32(16i32, tile_size_18609) | |
let {bool cond_18751} = | |
eq_i32(residual_input_18750, 0i32) | |
let {mem mem_19414} = | |
alloc(bytes_19490) | |
let {i64 binop_x_19370} = | |
sext i32 group_size_18610 to i64 | |
let {i64 bytes_19368} = | |
mul64(4i64, binop_x_19370) | |
let {i64 binop_x_19373} = | |
sext i32 tile_size_18609 to i64 | |
let {i64 binop_x_19375} = | |
mul64(binop_x_19373, binop_x_19373) | |
let {i64 bytes_19372} = | |
mul64(4i64, binop_x_19375) | |
-- res_17139 : [j_m_i_13929][j_m_i_13929][16i32][16i32]f32@@mem_19414->{base: [j_m_i_13929, j_m_i_13929, 16i32, 16i32]; contiguous: True; LMADs: [{offset: 0i32; strides: [mul32 (256i32) (j_m_i_13929), 256i32, 16i32, 1i32]; rotates: [0i32, 0i32, 0i32, | |
0i32]; shape: [j_m_i_13929, j_m_i_13929, 16i32, 16i32]; permutation: [0, 1, 2, 3]; monotonicity: [Inc, Inc, Inc, Inc]}]} | |
let {[j_m_i_13929][j_m_i_13929][16i32][16i32]f32 res_17139} = | |
segmap_group | |
(#groups=num_groups_top_18619; groupsize=group_size_18610) | |
(gtid_16258 < j_m_i_13929, | |
gtid_16259 < j_m_i_13929, | |
gid_x_18606 < num_groups_x_18613, | |
gid_y_18607 < num_groups_x_18613) (~gid_flat_18620) : {f32} { | |
let {mem@[]f32 mem_19371} = | |
alloc(bytes_19368, @[]f32) | |
-- mergeinit_18646 : [tile_size_18609][tile_size_18609]f32@@mem_19371->{base: [tile_size_18609, tile_size_18609]; contiguous: True; LMADs: [{offset: 0i32; strides: [tile_size_18609, 1i32]; rotates: [0i32, 0i32]; shape: [tile_size_18609, tile_size_18609]; permutation: [0, 1]; monotonicity: [Inc, Inc]}]} | |
let {[tile_size_18609][tile_size_18609]f32 mergeinit_18646} = | |
segmap_thread | |
(#groups=num_groups_top_18619; groupsize=group_size_18610) | |
(ltid_x_18637 < tile_size_18609, | |
ltid_y_18638 < tile_size_18609) (~ltid_flat_18639) : {f32} { | |
return {returns (private) 0.0f32} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment