Skip to content

Instantly share code, notes, and snippets.

@raphlinus
Created February 12, 2020 04:19
Show Gist options
  • Save raphlinus/d83d26d382196904c969cce22e40e722 to your computer and use it in GitHub Desktop.
Save raphlinus/d83d26d382196904c969cce22e40e722 to your computer and use it in GitHub Desktop.
A Python scratch file used in support of working out piet-gpu kernels
def ctz(x):
if x == 0: return 32
r = 0
while (x % 2) == 0:
r += 1
x >>= 1
return r
def clz(x):
for k in range(31, -1, -1):
if x & (1 << k):
return 31 - k
return 32
def popcnt(x):
r = 0
for k in range(32):
if x & (1 << k):
r += 1
return r
def subgroup_inclusive_add(xs):
sum = 0
result = []
for x in xs:
sum += x
result.append(sum)
return result
def subgroup_or(xs):
result = 0
for x in xs:
result |= x
return result
def subgroup_ballot(bs):
result = 0
for (i, b) in enumerate(bs):
if b: result |= 1 << i
return result
def iter_backdrop_mask(is_last):
m = is_last
mask = is_last ^ (is_last - 1)
print("first:", hex(mask))
while m:
k = ctz(m)
mask = is_last ^ (is_last - (2 << k))
print("next:", hex(mask & 0xffffffff))
m &= m - 1
def format_bin(x):
return hex(x)
#iter_backdrop_mask(0xffff)
def is_last_helper(k0, tix, ns):
subgroup_size = len(ns)
if ns[tix] >= (k0 + 1) and ns[tix] < (k0 + 1) + subgroup_size:
return 1 << (ns[tix] - k0 - 1)
else:
return 0
def simulate_kernel2(subgroup_size, ns):
for i0 in range(0, len(ns), subgroup_size):
n = [ns[i0 + tix] if i0 + tix < len(ns) else 0 for tix in range(subgroup_size)]
prefix_n = subgroup_inclusive_add(n)
print('n:', n)
print('prefix_n:', prefix_n)
sum_n = prefix_n[-1]
ilast = 0
jlast = 0
for k0 in range(0, sum_n, subgroup_size):
is_last = subgroup_or([is_last_helper(k0, tix, prefix_n) for tix in range(subgroup_size)])
print('is_last:', format_bin(is_last))
delta_i = [0 if tix == 0 else popcnt(is_last << (32 - tix)) for tix in range(subgroup_size)]
i_inv = [ilast + delta_i[tix] for tix in range(subgroup_size)]
print('i_inv:', i_inv)
j_inv = [jlast + tix if delta_i[tix] == 0 else clz(is_last << (32 - tix)) for tix in range(subgroup_size)]
print('j_inv:', j_inv)
ilast += popcnt(is_last)
jlast = j_inv[-1] + 1
simulate_kernel2(8, [2, 3, 5, 1, 1, 1, 1, 1, 1])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment