paniq · June 21, 2024 14:56
diff --git a/perfect_spatial_hashing.txt b/perfect_spatial_hashing.txt

 # forays into
    Perfect Spatial Hashing (Lefebvre & Hoppe)
    http://hhoppe.com/perfecthash.pdf

    how it works:

    There are two parts: a slow encoding step, and a fast decoding step.

    Encoding
    --------

    1. You have an array of n sparse points mapping from a domain of u pixels
       or voxels in d dimensions, that is a domain of size u^d.
    2. Your target hashmap size will be m^d where m = ceil(pow(n*1.01,1/d))
    3. You will also need an offset map, whose size starts at r^d where
       r = ceil(pow(n/(2d),1/d)) -- we might need to gradually increase
       the resolution by 1 until it stops failing.
    4. Most of the work that follows is about creating the offset map. Once
       the offset map is built, the hashmap is trivial to populate.
    5. Add each sparse point to a per-pixel linked list the size of the
       offset map (r^d) -- since our offset map size r is much smaller than
       the points range u, we simply modulate p % r to get the offset map
       coordinate.
       You end up with a texture that maps the offset coordinate to a list
       of all points that share the same offset, as well as how many of them.
    6. In order to be able to continue, we need to verify that for each
       offset coordinate, the points that share it will not collide on the
       main map. For this, we create a temporary map the size m, initialized
       to -1, and for each offset coordinate's set of points, we write the
       offset coordinate index to p % m on the temporary map - but first
       we check if it hasn't already been written; if it has, then at least
       two points will always been colliding, regardless of how we will
       offset them, and we cannot continue; Instead, we must increase
       r by 1, and restart our work from step (4). If no points from
       the same offset coordinate are colliding, we can continue.
    7. Sort the per-pixel linked list created in (5) so that the offset
       coordinates that most points share will be processed first,
       and the offsets frequented by a single point come second to last.
    8. What comes now is a packing task. For each offset coordinate and its
       points p % r that map to it, we now have to find a d-dimensional
       offset h in the range (0..m) along each dimension so that
       (p + h) % m will lead to a unique point on the hashmap that collides
       with no other point. This is why we start with the offset coordinates
       that have the most points, as they are the most difficult to assign.
       Before we start, we must initialize the hashmap with an "undefined"
       coordinate.
    9. Now, for each offset coordinate, we start by randomizing h
       and verify that all points mapping to this offset hit a
       hashmap coordinate that is undefined. If they do not, then we can
       either randomize h again or just offset h in such a manner that we
       search the whole possible offset range. Especially for points
       assigned later, it is possible that we can't find a single suitable
       spot. In this case our offset map is too small and we must increase
       r by 1 and restart our work from step (4). (We can also try to restart
       (9) with new random offsets and hope that we're more lucky this time.)
       If no point from the local set collides, we can now assume this
       offset as a good offset for this hashmap and write our original point
       coordinate to the hashmap, as well as writing the offset we found
       to the offset map.

    The paper notes a better strategy for gradually fitting r to succeed,
    namely binary search: we first multiply r by 2 until the fitting succeeds,
    and then attempt to find the optimal encoding spot between r/2 and r.

   Decoding
   --------

   1. For any query point p from the whole domain u^d, fetch the offset
      parameter h from the offset map, at (p % r).
   2. From the hashmap, check the point at ((p + h) % m). If the
      coordinate we wrote in (9) matches our query point p, we have
      a hit, otherwise the hashmap does not contain this point.
   3. That's it. Analog to the coordinate map, you can now also query
      a data map at the same coordinate ((p + h) % m) to obtain the actual
      data you require.

   Nevertheless, check out the paper, as it features a bunch of extensions,
   as well as additional packing, optimization and application ideas.


 using import glm
 using import glsl
 using import Array
 using import ..tukan.gl
 using import ..tukan.bitmap
 using import ..tukan.packing
 using import ..tukan.random
 using import .testfragment

 fn cube-size (n d bias)
    let bias =
        if (none? bias) 1.0:f64
        else bias
    u32
        (pow ((n as f64) * bias) (/ (d as f64))) + 0.5:f64

 fn good-size? (m r)
    and
        (m % r) != 1
        (m % r) != (r - 1)
 fn injective? (u m r rho)
    or
        u <= (m * r)
        rho >= (/ r)

 let bmp =
    load-bitmap
        .. module-dir "/data/duangle_logo_edge.png"
        components = 1

 let samples =
    local
        Array
            tuple
                # coord
                u32
                # next index
                i32
 let w h = (unpack bmp.size)
 for x y in (multirange w h)
    let v = (deref ('fetch bmp x y))
    if (v > 127)
        'append samples
            tupleof
                pack-morton2x16 (uvec2 x y)
                -1

 assert (w == h)
 # how many times we try to fit offsets for a single table size before trying
    a bigger offset table; higher takes longer but gives better results
    particularly for very sparse data.
 let MAX_FIT_ATTEMPTS = 5
 # domain size
 let u = (w as u32)
 # number of dimensions
 let d = 2
 # number of samples
 let n = ((countof samples) as u32)
 # data density
 let rho = (n / u)
 # size of hash table
 let m = (cube-size n d 1.01:f64)
 # allocate main hashtable
 let H =
    malloc-array u32 (m * m)
 let sigma = (1.0:f64 / ((2 * d) as f64))
 let phi-buckets =
    local
        Array
            tuple
                # count
                i32
                # first position index
                i32
                # coordinate
                u32
 print "u=" u "n=" n "m=" m
 let rstart = (cube-size (n as f64 * sigma) d)
 # size of offset table
 let attempt-offset-table (r0 r1) = rstart (m - 1)
 let r = ((r0 + r1) // 2)
 print "attempting offset table size" r "in test range" r0 r1
 # if this fails, we need to pick a different r
 #if (not (good-size? m r))
    print "not a good size"
    attempt-offset-table (r + 1)
 #if (not (injective? u m r rho))
    print "not injective"
    attempt-offset-table (r + 1)
 'clear phi-buckets
 let phi =
    malloc-array (array u32 2) (r * r)
 fn cleanup ()
    free phi
 label offset-table-too-small ()
    cleanup;
    attempt-offset-table (r + 1) r1
 label offset-table-too-big ()
    cleanup;
    attempt-offset-table r0 r
 for x y in (multirange r r)
    'append phi-buckets
        tupleof 0 -1
            pack-morton2x16 (uvec2 x y)

 fn make-offset (x y s)
    y * s + x

 # build per-pixel linked list of colliding entries in buckets
 for i val in (enumerate samples)
    let p next = (unpack val)
    let x y = (unpack (unpack-morton2x16 (deref p)))
    let offset = (make-offset (x % r) (y % r) r)
    let bucket = (phi-buckets @ offset)
    bucket @ 0 += 1
    next = bucket @ 1
    bucket @ 1 = i

 'sort phi-buckets
    fn (x)
        - (x @ 0)

 fn iter-bucket (samples idx)
    Generator
        label (fret fdone idx)
            if (idx == -1)
                fdone;
            else
                let pos idx = (unpack (samples @ idx))
                fret (deref idx) pos
        deref idx

 # clear main map
 for i in (range (m * m))
    H @ i = -1:u32
 # ensure that entries within each bucket do not collide on the main hashmap,
    or we have no chance of avoiding a collision.
 for i k in (enumerate phi-buckets)
    let count idx = (unpack k)
    for pos in (iter-bucket samples idx)
        let x y = (unpack (unpack-morton2x16 (deref pos)))
        let offset = (make-offset (x % m) (y % m) m)
        let dst = (H @ offset)
        let used = (i as u32)
        if (dst == used)
            print "points collide in same group"
            offset-table-too-small;
        dst = used

 let rnd = (local (Random 32))
 'seed rnd

 let fit-all (fit-attempts) = (unconst 0)
 #print "trying fit" fit-attempts
 if (fit-attempts == MAX_FIT_ATTEMPTS)
    print "too many fit attempts"
    offset-table-too-small;
 let max-attempts =
    (m * m) as i32
 # clear main map
 for i in (range (m * m))
    H @ i = -1:u32
 for i k in (enumerate phi-buckets)
    let count idx phipos = (unpack k)
    if (count == 0)
        continue;
    let base-offsetx base-offsety =
        ('range rnd 0 (m as i32)) as u32
        ('range rnd 0 (m as i32)) as u32
    loop (attempt) = 0
    if (attempt == max-attempts)
        #print "fit" fit-attempts "failed for offset" i
        # try from the top, with new random offsets
        fit-all (fit-attempts + 1)
    let offsetx =
        (attempt as u32) % m
    let offsety =
        (attempt as u32 - offsetx) // m
    let offsetx offsety =
        (base-offsetx + offsetx) % m
        (base-offsety + offsety) % m
    let iterator = (iter-bucket samples idx)
    for pos in iterator
        let x y = (unpack (unpack-morton2x16 (deref pos)))
        let offset = (make-offset ((x + offsetx) % m) ((y + offsety) % m) m)
        if ((H @ offset) != -1:u32)
            # try a different offset
            repeat (attempt + 1)
    # if we got here, no point collided with an existing one
    # tag the destination position
    for pos in iterator
        let x y = (unpack (unpack-morton2x16 (deref pos)))
        let offset = (make-offset ((x + offsetx) % m) ((y + offsety) % m) m)
        H @ offset = (deref pos)
    let px py = (unpack (unpack-morton2x16 (deref phipos)))
    # and write the entry to phi
    phi @ (make-offset px py r) =
        arrayof u32
            offsetx as u32
            offsety as u32
 if (r0 < r1)
    print "succeeded, but table size not optimal"
    offset-table-too-big;
 else
    print "success: optimal size for r =" r

 render-fragment-shader
    fn ()
        let test_texture =
            static 'copy
                'setup (glCreateTexture GL_TEXTURE_2D)
                    bitmap =
                        load-bitmap
                            .. module-dir "/data/duangle_logo_edge.png"
                            components = 1
        let sparse_data =
            static 'copy
                'setup (glCreateTexture GL_TEXTURE_2D)
                    size = (ivec2 m)
                    format = GL_R32UI
        glTextureSubImage2D sparse_data 0 0 0 (i32 m) (i32 m) GL_RED_INTEGER GL_UNSIGNED_INT H
        glTextureParameteri sparse_data GL_TEXTURE_MIN_FILTER GL_NEAREST
        glTextureParameteri sparse_data GL_TEXTURE_MAG_FILTER GL_NEAREST
        let sparse_offsets =
            static 'copy
                'setup (glCreateTexture GL_TEXTURE_2D)
                    size = (ivec2 r)
                    format = GL_RG32UI
        glTextureSubImage2D sparse_offsets 0 0 0 (i32 r) (i32 r) GL_RG_INTEGER GL_UNSIGNED_INT phi
        glTextureParameteri sparse_offsets GL_TEXTURE_MIN_FILTER GL_NEAREST
        glTextureParameteri sparse_offsets GL_TEXTURE_MAG_FILTER GL_NEAREST

        xvar uniform smp : sampler2D
            location = 2
        xvar uniform smp-data : usampler2D
            location = 3
        xvar uniform smp-offset : usampler2D
            location = 4

        fn per-frame-setup ()
            glBindTextureUnit 0 sparse_data
            glBindTextureUnit 1 sparse_offsets
            glBindTextureUnit 2 test_texture
            glUniform smp-data 0
            glUniform smp-offset 1
            glUniform smp 2

        fn shader (uv)
            let m = (uvec2 (textureSize smp-data 0))
            let r = (uvec2 (textureSize smp-offset 0))
            let p = (uvec2 (uv * 256.0))
            let key = (pack-morton2x16 p)
            let h1 = ((texelFetch smp-offset (ivec2 (p % r)) 0) . xy)
            let h0 = ((texelFetch smp-data (ivec2 ((p + h1) % m)) 0) . x)
            +
                texelFetch smp (ivec2 p) 0
                ? (h0 == key)
                    vec4 1.0
                    vec4 0.0



        return
            per-frame-setup
            shader
    #debug = true
    size = (ivec2 256)

	# forays into
	Perfect Spatial Hashing (Lefebvre & Hoppe)
	http://hhoppe.com/perfecthash.pdf

	how it works:

	There are two parts: a slow encoding step, and a fast decoding step.

	Encoding
	--------

	1. You have an array of n sparse points mapping from a domain of u pixels
	or voxels in d dimensions, that is a domain of size u^d.
	2. Your target hashmap size will be m^d where m = ceil(pow(n*1.01,1/d))
	3. You will also need an offset map, whose size starts at r^d where
	r = ceil(pow(n/(2d),1/d)) -- we might need to gradually increase
	the resolution by 1 until it stops failing.
	4. Most of the work that follows is about creating the offset map. Once
	the offset map is built, the hashmap is trivial to populate.
	5. Add each sparse point to a per-pixel linked list the size of the
	offset map (r^d) -- since our offset map size r is much smaller than
	the points range u, we simply modulate p % r to get the offset map
	coordinate.
	You end up with a texture that maps the offset coordinate to a list
	of all points that share the same offset, as well as how many of them.
	6. In order to be able to continue, we need to verify that for each
	offset coordinate, the points that share it will not collide on the
	main map. For this, we create a temporary map the size m, initialized
	to -1, and for each offset coordinate's set of points, we write the
	offset coordinate index to p % m on the temporary map - but first
	we check if it hasn't already been written; if it has, then at least
	two points will always been colliding, regardless of how we will
	offset them, and we cannot continue; Instead, we must increase
	r by 1, and restart our work from step (4). If no points from
	the same offset coordinate are colliding, we can continue.
	7. Sort the per-pixel linked list created in (5) so that the offset
	coordinates that most points share will be processed first,
	and the offsets frequented by a single point come second to last.
	8. What comes now is a packing task. For each offset coordinate and its
	points p % r that map to it, we now have to find a d-dimensional
	offset h in the range (0..m) along each dimension so that
	(p + h) % m will lead to a unique point on the hashmap that collides
	with no other point. This is why we start with the offset coordinates
	that have the most points, as they are the most difficult to assign.
	Before we start, we must initialize the hashmap with an "undefined"
	coordinate.
	9. Now, for each offset coordinate, we start by randomizing h
	and verify that all points mapping to this offset hit a
	hashmap coordinate that is undefined. If they do not, then we can
	either randomize h again or just offset h in such a manner that we
	search the whole possible offset range. Especially for points
	assigned later, it is possible that we can't find a single suitable
	spot. In this case our offset map is too small and we must increase
	r by 1 and restart our work from step (4). (We can also try to restart
	(9) with new random offsets and hope that we're more lucky this time.)
	If no point from the local set collides, we can now assume this
	offset as a good offset for this hashmap and write our original point
	coordinate to the hashmap, as well as writing the offset we found
	to the offset map.

	The paper notes a better strategy for gradually fitting r to succeed,
	namely binary search: we first multiply r by 2 until the fitting succeeds,
	and then attempt to find the optimal encoding spot between r/2 and r.

	Decoding
	--------

	1. For any query point p from the whole domain u^d, fetch the offset
	parameter h from the offset map, at (p % r).
	2. From the hashmap, check the point at ((p + h) % m). If the
	coordinate we wrote in (9) matches our query point p, we have
	a hit, otherwise the hashmap does not contain this point.
	3. That's it. Analog to the coordinate map, you can now also query
	a data map at the same coordinate ((p + h) % m) to obtain the actual
	data you require.

	Nevertheless, check out the paper, as it features a bunch of extensions,
	as well as additional packing, optimization and application ideas.


	using import glm
	using import glsl
	using import Array
	using import ..tukan.gl
	using import ..tukan.bitmap
	using import ..tukan.packing
	using import ..tukan.random
	using import .testfragment

	fn cube-size (n d bias)
	let bias =
	if (none? bias) 1.0:f64
	else bias
	u32
	(pow ((n as f64) * bias) (/ (d as f64))) + 0.5:f64

	fn good-size? (m r)
	and
	(m % r) != 1
	(m % r) != (r - 1)
	fn injective? (u m r rho)
	or
	u <= (m * r)
	rho >= (/ r)

	let bmp =
	load-bitmap
	.. module-dir "/data/duangle_logo_edge.png"
	components = 1

	let samples =
	local
	Array
	tuple
	# coord
	u32
	# next index
	i32
	let w h = (unpack bmp.size)
	for x y in (multirange w h)
	let v = (deref ('fetch bmp x y))
	if (v > 127)
	'append samples
	tupleof
	pack-morton2x16 (uvec2 x y)
	-1

	assert (w == h)
	# how many times we try to fit offsets for a single table size before trying
	a bigger offset table; higher takes longer but gives better results
	particularly for very sparse data.
	let MAX_FIT_ATTEMPTS = 5
	# domain size
	let u = (w as u32)
	# number of dimensions
	let d = 2
	# number of samples
	let n = ((countof samples) as u32)
	# data density
	let rho = (n / u)
	# size of hash table
	let m = (cube-size n d 1.01:f64)
	# allocate main hashtable
	let H =
	malloc-array u32 (m * m)
	let sigma = (1.0:f64 / ((2 * d) as f64))
	let phi-buckets =
	local
	Array
	tuple
	# count
	i32
	# first position index
	i32
	# coordinate
	u32
	print "u=" u "n=" n "m=" m
	let rstart = (cube-size (n as f64 * sigma) d)
	# size of offset table
	let attempt-offset-table (r0 r1) = rstart (m - 1)
	let r = ((r0 + r1) // 2)
	print "attempting offset table size" r "in test range" r0 r1
	# if this fails, we need to pick a different r
	#if (not (good-size? m r))
	print "not a good size"
	attempt-offset-table (r + 1)
	#if (not (injective? u m r rho))
	print "not injective"
	attempt-offset-table (r + 1)
	'clear phi-buckets
	let phi =
	malloc-array (array u32 2) (r * r)
	fn cleanup ()
	free phi
	label offset-table-too-small ()
	cleanup;
	attempt-offset-table (r + 1) r1
	label offset-table-too-big ()
	cleanup;
	attempt-offset-table r0 r
	for x y in (multirange r r)
	'append phi-buckets
	tupleof 0 -1
	pack-morton2x16 (uvec2 x y)

	fn make-offset (x y s)
	y * s + x

	# build per-pixel linked list of colliding entries in buckets
	for i val in (enumerate samples)
	let p next = (unpack val)
	let x y = (unpack (unpack-morton2x16 (deref p)))
	let offset = (make-offset (x % r) (y % r) r)
	let bucket = (phi-buckets @ offset)
	bucket @ 0 += 1
	next = bucket @ 1
	bucket @ 1 = i

	'sort phi-buckets
	fn (x)
	- (x @ 0)

	fn iter-bucket (samples idx)
	Generator
	label (fret fdone idx)
	if (idx == -1)
	fdone;
	else
	let pos idx = (unpack (samples @ idx))
	fret (deref idx) pos
	deref idx

	# clear main map
	for i in (range (m * m))
	H @ i = -1:u32
	# ensure that entries within each bucket do not collide on the main hashmap,
	or we have no chance of avoiding a collision.
	for i k in (enumerate phi-buckets)
	let count idx = (unpack k)
	for pos in (iter-bucket samples idx)
	let x y = (unpack (unpack-morton2x16 (deref pos)))
	let offset = (make-offset (x % m) (y % m) m)
	let dst = (H @ offset)
	let used = (i as u32)
	if (dst == used)
	print "points collide in same group"
	offset-table-too-small;
	dst = used

	let rnd = (local (Random 32))
	'seed rnd

	let fit-all (fit-attempts) = (unconst 0)
	#print "trying fit" fit-attempts
	if (fit-attempts == MAX_FIT_ATTEMPTS)
	print "too many fit attempts"
	offset-table-too-small;
	let max-attempts =
	(m * m) as i32
	# clear main map
	for i in (range (m * m))
	H @ i = -1:u32
	for i k in (enumerate phi-buckets)
	let count idx phipos = (unpack k)
	if (count == 0)
	continue;
	let base-offsetx base-offsety =
	('range rnd 0 (m as i32)) as u32
	('range rnd 0 (m as i32)) as u32
	loop (attempt) = 0
	if (attempt == max-attempts)
	#print "fit" fit-attempts "failed for offset" i
	# try from the top, with new random offsets
	fit-all (fit-attempts + 1)
	let offsetx =
	(attempt as u32) % m
	let offsety =
	(attempt as u32 - offsetx) // m
	let offsetx offsety =
	(base-offsetx + offsetx) % m
	(base-offsety + offsety) % m
	let iterator = (iter-bucket samples idx)
	for pos in iterator
	let x y = (unpack (unpack-morton2x16 (deref pos)))
	let offset = (make-offset ((x + offsetx) % m) ((y + offsety) % m) m)
	if ((H @ offset) != -1:u32)
	# try a different offset
	repeat (attempt + 1)
	# if we got here, no point collided with an existing one
	# tag the destination position
	for pos in iterator
	let x y = (unpack (unpack-morton2x16 (deref pos)))
	let offset = (make-offset ((x + offsetx) % m) ((y + offsety) % m) m)
	H @ offset = (deref pos)
	let px py = (unpack (unpack-morton2x16 (deref phipos)))
	# and write the entry to phi
	phi @ (make-offset px py r) =
	arrayof u32
	offsetx as u32
	offsety as u32
	if (r0 < r1)
	print "succeeded, but table size not optimal"
	offset-table-too-big;
	else
	print "success: optimal size for r =" r

	render-fragment-shader
	fn ()
	let test_texture =
	static 'copy
	'setup (glCreateTexture GL_TEXTURE_2D)
	bitmap =
	load-bitmap
	.. module-dir "/data/duangle_logo_edge.png"
	components = 1
	let sparse_data =
	static 'copy
	'setup (glCreateTexture GL_TEXTURE_2D)
	size = (ivec2 m)
	format = GL_R32UI
	glTextureSubImage2D sparse_data 0 0 0 (i32 m) (i32 m) GL_RED_INTEGER GL_UNSIGNED_INT H
	glTextureParameteri sparse_data GL_TEXTURE_MIN_FILTER GL_NEAREST
	glTextureParameteri sparse_data GL_TEXTURE_MAG_FILTER GL_NEAREST
	let sparse_offsets =
	static 'copy
	'setup (glCreateTexture GL_TEXTURE_2D)
	size = (ivec2 r)
	format = GL_RG32UI
	glTextureSubImage2D sparse_offsets 0 0 0 (i32 r) (i32 r) GL_RG_INTEGER GL_UNSIGNED_INT phi
	glTextureParameteri sparse_offsets GL_TEXTURE_MIN_FILTER GL_NEAREST
	glTextureParameteri sparse_offsets GL_TEXTURE_MAG_FILTER GL_NEAREST

	xvar uniform smp : sampler2D
	location = 2
	xvar uniform smp-data : usampler2D
	location = 3
	xvar uniform smp-offset : usampler2D
	location = 4

	fn per-frame-setup ()
	glBindTextureUnit 0 sparse_data
	glBindTextureUnit 1 sparse_offsets
	glBindTextureUnit 2 test_texture
	glUniform smp-data 0
	glUniform smp-offset 1
	glUniform smp 2

	fn shader (uv)
	let m = (uvec2 (textureSize smp-data 0))
	let r = (uvec2 (textureSize smp-offset 0))
	let p = (uvec2 (uv * 256.0))
	let key = (pack-morton2x16 p)
	let h1 = ((texelFetch smp-offset (ivec2 (p % r)) 0) . xy)
	let h0 = ((texelFetch smp-data (ivec2 ((p + h1) % m)) 0) . x)
	+
	texelFetch smp (ivec2 p) 0
	? (h0 == key)
	vec4 1.0
	vec4 0.0



	return
	per-frame-setup
	shader
	#debug = true
	size = (ivec2 256)