Created
February 12, 2020 04:19
-
-
Save raphlinus/d83d26d382196904c969cce22e40e722 to your computer and use it in GitHub Desktop.
A Python scratch file used in support of working out piet-gpu kernels
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def ctz(x): | |
if x == 0: return 32 | |
r = 0 | |
while (x % 2) == 0: | |
r += 1 | |
x >>= 1 | |
return r | |
def clz(x): | |
for k in range(31, -1, -1): | |
if x & (1 << k): | |
return 31 - k | |
return 32 | |
def popcnt(x): | |
r = 0 | |
for k in range(32): | |
if x & (1 << k): | |
r += 1 | |
return r | |
def subgroup_inclusive_add(xs): | |
sum = 0 | |
result = [] | |
for x in xs: | |
sum += x | |
result.append(sum) | |
return result | |
def subgroup_or(xs): | |
result = 0 | |
for x in xs: | |
result |= x | |
return result | |
def subgroup_ballot(bs): | |
result = 0 | |
for (i, b) in enumerate(bs): | |
if b: result |= 1 << i | |
return result | |
def iter_backdrop_mask(is_last): | |
m = is_last | |
mask = is_last ^ (is_last - 1) | |
print("first:", hex(mask)) | |
while m: | |
k = ctz(m) | |
mask = is_last ^ (is_last - (2 << k)) | |
print("next:", hex(mask & 0xffffffff)) | |
m &= m - 1 | |
def format_bin(x): | |
return hex(x) | |
#iter_backdrop_mask(0xffff) | |
def is_last_helper(k0, tix, ns): | |
subgroup_size = len(ns) | |
if ns[tix] >= (k0 + 1) and ns[tix] < (k0 + 1) + subgroup_size: | |
return 1 << (ns[tix] - k0 - 1) | |
else: | |
return 0 | |
def simulate_kernel2(subgroup_size, ns): | |
for i0 in range(0, len(ns), subgroup_size): | |
n = [ns[i0 + tix] if i0 + tix < len(ns) else 0 for tix in range(subgroup_size)] | |
prefix_n = subgroup_inclusive_add(n) | |
print('n:', n) | |
print('prefix_n:', prefix_n) | |
sum_n = prefix_n[-1] | |
ilast = 0 | |
jlast = 0 | |
for k0 in range(0, sum_n, subgroup_size): | |
is_last = subgroup_or([is_last_helper(k0, tix, prefix_n) for tix in range(subgroup_size)]) | |
print('is_last:', format_bin(is_last)) | |
delta_i = [0 if tix == 0 else popcnt(is_last << (32 - tix)) for tix in range(subgroup_size)] | |
i_inv = [ilast + delta_i[tix] for tix in range(subgroup_size)] | |
print('i_inv:', i_inv) | |
j_inv = [jlast + tix if delta_i[tix] == 0 else clz(is_last << (32 - tix)) for tix in range(subgroup_size)] | |
print('j_inv:', j_inv) | |
ilast += popcnt(is_last) | |
jlast = j_inv[-1] + 1 | |
simulate_kernel2(8, [2, 3, 5, 1, 1, 1, 1, 1, 1]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment