Skip to content

Instantly share code, notes, and snippets.

@raphlinus
Last active February 8, 2020 20:19
Show Gist options
  • Save raphlinus/4bfe21737c018866c7ccde80712d84f2 to your computer and use it in GitHub Desktop.
Save raphlinus/4bfe21737c018866c7ccde80712d84f2 to your computer and use it in GitHub Desktop.
Pseudocode of the fancy (subgroup) version of piet-gpu's kernel 1 (simplified)
struct StackElement {
PietGroupRef group;
uint index;
float2 offset; // Maybe pack as short2?
}
kernel1(Buf scene, PietGroupRef root) {
StackElement stack[MAX_STACK];
uint stack_ix = 0;
uint tos_group = root;
uint n = PietGroup_n(scene, root);
BboxRef bboxes = PietGroup_bboxes(scene, root);
ItemRef items = PietGroup_items(scene, root);
float2 offset = PietGroup_offset(scene, root);
uint index = 0;
threadgroup_bounds = bounds of a tilegroup x threadgroup region;
while (1) {
if (index < n) {
uint this_ix = index + thread_index;
short2 bbox = false;
bool hit = false;
bool is_group = false;
uint bitmask; // Note: should be 64 bits if subgroup size == 64 (AMD)
if (this_ix < n) {
// Read up to one subgroup worth of item nodes.
bbox = Bbox_read(scene, bboxes + this_ix * BBOX_SIZE);
hit = bbox_intersects(bbox, threadgroup_bounds);
if (hit) {
if PietItem_tag(scene, items + this_ix * PIET_ITEM_SIZE) == Group {
is_group = true;
}
}
}
vote_t group_ballot = subgroup_ballot(is_group);
uint first_group = count_trailing_zeros(group_ballot);
if (hit && thread_ix < first_group) {
uint ymin = max(0, (bbox.top - threadgroup_bounds.top) / tile_height);
uint ymax = min(subgroup_size, (bbox.bottom - threadgroup_bounds.top + tile_height - 1) / tile_height);
bitmask = (ymax == 32 ? 0 : (1 << ymax)) - (1 << ymin);
}
// Write the item refs for all items up to the first group
use subgroup magic to transpose bitmask: bit i on thread j becomes bit j on thread i
while (bitmask != 0) {
uint item_ix = index + count_trailing_zeros(bitmask);
ItemRef itemref;
itemref.item = items + item_ix * PIET_ITEM_SIZE;
itemref.offset = offset;
write itemref to output
bitmask = bitmask & (bitmask - 1); // clears bottom bit
}
if (first_group < subgroup_size) {
PietGroupRef new_group = items + (index + first_group) * PIET_ITEM_SIZE;
vote_t hit_ballot = subgroup_ballot(hit && thread_ix > first_group);
uint next_ix = index + count_trailing_zeros(hit_ballot);
if (next_ix < n) {
// Push stack
StackElement el;
el.group = group;
el.index = next_ix;
el.offset = offset;
stack[stack_ix] = el;
stack_ix++;
}
group = new_group;
n = PietGroup_n(scene, group);
bboxes = PietGroup_bboxes(scene, group);
items = PietGroup_items(scene, group);
offset += PietGroup_offset(scene, group);
index = 0;
} else {
index += subgroup_size;
}
}
} else {
// processed all items in this group; pop the stack
if (stack_ix == 0) {
break;
}
stack_ix--;
group = stack[stack_ix].group;
index = stack[stack_ix].index;
offset = stack[stack_ix].offset;
n = PietGroup_n(scene, group);
bboxes = PietGroup_bboxes(scene, group);
items = PietGroup_items(scene, group);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment