Skip to content

Instantly share code, notes, and snippets.

@chrisforbes
Created May 1, 2013 10:25
Show Gist options
  • Save chrisforbes/5494607 to your computer and use it in GitHub Desktop.
Save chrisforbes/5494607 to your computer and use it in GitHub Desktop.
New clip shader design
# Python impl of the new gen4/5 clip shader algorithm,
# to explore it in a a slightly easier form than GEN assembly.
# just some simple 4-wide vector instructions
def swiz4(x, a, b, c, d):
return (x[a], x[b], x[c], x[d])
def add4(x, y):
return (x[0] + y[0], x[1] + y[1], x[2] + y[2], x[3] + y[3])
def neg4(x):
return (-x[0], -x[1], -x[2], -x[3])
def mul4s(s, x):
return (s*x[0], s*x[1], s*x[2], s*x[3])
def lerp(t, x, y):
return t * x + (1-t) * y
def lerpv(t, xs, ys):
return tuple(lerp(t,x,y) for x,y in zip(xs,ys))
# set up vector of distances:
# - 6 fixed planes: -w < {x,y,z} < w, from hpos.
# - distances in 'cd' vertex attrib if it exists
# we don't care about anything else that's in the vertex
# until the very end.
def setup_vert(b, v):
p = v['hpos']
w = swiz4(p, 3, 3, 3, 3) #.wwww
cd = v.get('cd', tuple())
return b+add4(w, neg4(p))[:3]+ add4(w, p)[:3]+ cd
# for each vertex, set up barycentric coords and clip distances
def setup(verts):
return [
setup_vert((1.0,0.0,0.0), verts[0]),
setup_vert((0.0,1.0,0.0), verts[1]),
setup_vert((0.0,0.0,1.0), verts[2]),
]
# compute outcodes. in the hardware, some of these are produced by
# the pre-clip shader (VS/GS); others are generated by the hardware
# for the fixed planes.
def outcodes(v):
result = 0
for i in xrange(3, len(v)): # ignore the bottom bits.
if v[i] < 0:
result = result | (1<<i)
return result
# one-sided sutherland-hodgman, applying one plane.
def clip1(verts, i):
prev = -1
for n in xrange(0,len(verts)):
t0 = verts[prev][i]
t1 = verts[n][i]
if (t0<0) ^ (t1<0):
yield lerpv(t1/(t1-t0), verts[prev], verts[n])
if t1 >= 0:
yield verts[n]
prev = n
# clip against each plane in turn, until we are done or have
# clipped away the whole thing. `oc` is the bitwise-OR of the
# vertex-level outcodes. on the hardware, we get this delivered in the
# CLIP thread payload.
def clip(verts, oc):
numattrs = len(verts[0])
for i in xrange(3,numattrs):
if oc & (1<<i):
print 'need to clip against dist %d' % i
verts = list(clip1(verts, i))
if len(verts) < 3:
return []
return verts
# interpolate vertex attributes at the barycentric coordinates in the
# first 3 elements of `b`. attributes which want `flat` interpolation
# are just copied from the provoking vertex instead.
def emit_bary(b, pv, verts):
out = {}
for k in pv.keys():
if k in flat:
out[k] = pv[k]
else:
out[k] = add4(add4(mul4s(b[0], verts[0][k]),
mul4s(b[1], verts[1][k])),
mul4s(b[2], verts[2][k]))
print out
# here's a sample triangle which needs clipping.
verts = [
{ 'hpos': (-2.0, 0.0, 0.0, 1.0), 'c0': (1,0,0,1), },
{ 'hpos': (2.0, 0.0, 0.0, 1.0), 'c0': (0,0,1,0), },
{ 'hpos': (0.0, 1.0, 0.0, 1.0), 'c0': (0,1,0,0), },
]
# attributes which want flat interpolation (copy from pv)
# anything NOT mentioned here gets barycentric interpolation in clip space.
flat = {'c0'}
def main():
pv = verts[0] # provoking vertex -- flat attribs copied from here.
iverts = setup(verts)
oc = map(outcodes, iverts)
print 'outcodes: 0x%x 0x%x 0x%x' % (oc[0], oc[1], oc[2])
if oc[0] & oc[1] & oc[2]:
# TR = oc[0]|oc[1]|oc[2] != 0. all verts are outside the same plane.
# no thread is spawned.
print 'TR, all done.'
return
# on the hardware, TA = oc[0]|oc[1]|oc[2] == 0,
# and no thread is spawned. dont bother emulating this.
print 'pre-clip:\n%s\n' % iverts
for v in iverts:
emit_bary(v, pv, verts)
overts = clip(iverts, oc[0] | oc[1] | oc[2])
print 'post-clip:\n%s\n' % overts
for v in overts:
emit_bary(v, pv, verts)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment