|
struct StackElement { |
|
PietGroupRef group; |
|
uint index; |
|
float2 offset; // Maybe pack as short2? |
|
} |
|
|
|
kernel1(Buf scene, PietGroupRef root) { |
|
StackElement stack[MAX_STACK]; |
|
uint stack_ix = 0; |
|
uint tos_group = root; |
|
uint n = PietGroup_n(scene, root); |
|
BboxRef bboxes = PietGroup_bboxes(scene, root); |
|
ItemRef items = PietGroup_items(scene, root); |
|
float2 offset = PietGroup_offset(scene, root); |
|
uint index = 0; |
|
|
|
threadgroup_bounds = bounds of a tilegroup x threadgroup region; |
|
|
|
while (1) { |
|
if (index < n) { |
|
uint this_ix = index + thread_index; |
|
short2 bbox = false; |
|
bool hit = false; |
|
bool is_group = false; |
|
uint bitmask; // Note: should be 64 bits if subgroup size == 64 (AMD) |
|
if (this_ix < n) { |
|
// Read up to one subgroup worth of item nodes. |
|
bbox = Bbox_read(scene, bboxes + this_ix * BBOX_SIZE); |
|
hit = bbox_intersects(bbox, threadgroup_bounds); |
|
if (hit) { |
|
if PietItem_tag(scene, items + this_ix * PIET_ITEM_SIZE) == Group { |
|
is_group = true; |
|
} |
|
} |
|
} |
|
vote_t group_ballot = subgroup_ballot(is_group); |
|
uint first_group = count_trailing_zeros(group_ballot); |
|
|
|
if (hit && thread_ix < first_group) { |
|
uint ymin = max(0, (bbox.top - threadgroup_bounds.top) / tile_height); |
|
uint ymax = min(subgroup_size, (bbox.bottom - threadgroup_bounds.top + tile_height - 1) / tile_height); |
|
bitmask = (ymax == 32 ? 0 : (1 << ymax)) - (1 << ymin); |
|
} |
|
|
|
// Write the item refs for all items up to the first group |
|
use subgroup magic to transpose bitmask: bit i on thread j becomes bit j on thread i |
|
while (bitmask != 0) { |
|
uint item_ix = index + count_trailing_zeros(bitmask); |
|
ItemRef itemref; |
|
itemref.item = items + item_ix * PIET_ITEM_SIZE; |
|
itemref.offset = offset; |
|
write itemref to output |
|
bitmask = bitmask & (bitmask - 1); // clears bottom bit |
|
} |
|
|
|
if (first_group < subgroup_size) { |
|
PietGroupRef new_group = items + (index + first_group) * PIET_ITEM_SIZE; |
|
vote_t hit_ballot = subgroup_ballot(hit && thread_ix > first_group); |
|
uint next_ix = index + count_trailing_zeros(hit_ballot); |
|
if (next_ix < n) { |
|
// Push stack |
|
StackElement el; |
|
el.group = group; |
|
el.index = next_ix; |
|
el.offset = offset; |
|
stack[stack_ix] = el; |
|
stack_ix++; |
|
} |
|
group = new_group; |
|
n = PietGroup_n(scene, group); |
|
bboxes = PietGroup_bboxes(scene, group); |
|
items = PietGroup_items(scene, group); |
|
offset += PietGroup_offset(scene, group); |
|
index = 0; |
|
} else { |
|
index += subgroup_size; |
|
} |
|
} |
|
} else { |
|
// processed all items in this group; pop the stack |
|
if (stack_ix == 0) { |
|
break; |
|
} |
|
stack_ix--; |
|
group = stack[stack_ix].group; |
|
index = stack[stack_ix].index; |
|
offset = stack[stack_ix].offset; |
|
n = PietGroup_n(scene, group); |
|
bboxes = PietGroup_bboxes(scene, group); |
|
items = PietGroup_items(scene, group); |
|
} |
|
} |