Created
March 30, 2010 22:54
-
-
Save kulp/349716 to your computer and use it in GitHub Desktop.
Generates CPP macros for mapping arbitrary integer sets to small integer sets
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
use strict; | |
use List::Util qw(max); | |
my $name = "b"; | |
# for each significant bit position, where the sequence through the bit | |
# positions is chosen such that each bit chosen divides the remaining | |
# search space as nearly evenly as possible, construct an array that | |
# looks like this: | |
# | |
# [ B, [ L, R ] ] | |
# | |
# where B is the number of the current bitposition | |
# L is either a number or another array like this one | |
# R is defined similarly to but independently from L | |
# | |
# where L and R are constructed recursively. L is the "left" side of the | |
# resulting tree structure from this node downward, and R is the "right" | |
# side. The left side is the side that corresponds to bit position B in | |
# the input string being zero; the right side corresponds to a one. | |
# | |
# For example, consider as input the list of integers (1, 5, 6, 9). These | |
# numbers expressed in binary are | |
# | |
# 3210 (bit position) | |
# ____ | |
# 0001 | |
# 0101 | |
# 0110 | |
# 1001 | |
# | |
# The bit-position that causes the most even split is position 2. The first | |
# (unelaborated) level of the structure would look like this : | |
# | |
# [ 2, [ L, R ] ] | |
# | |
# Taking out position 2 leaves us with two lists : | |
# | |
# 310 (bit position) | |
# ___ | |
# 001 (from 1) | |
# 101 (from 9) | |
# --- | |
# 001 (from 5) | |
# 010 (from 6) | |
# | |
# Now from the first list, we can divide into two lists based on bit position 3. | |
# After we do so, there is no further differentiation (all remaining values are | |
# equal in other bit positions), so the recursion ends. A similar operation | |
# occurs for the second list, but in bit position 1. The structure looks like | |
# this : | |
# | |
# [ 2, [ | |
# [ 3, [ 1, 9 ] ] | |
# [ 1, [ 5, 6 ] ] | |
# ] ] | |
# | |
# The resulting tree structure can now be converted into a set of nested | |
# ternary expressions over the bits of an input number. In this case, the | |
# ternary expression would be (x & 4 ? x & 1 ? 3 : 2 : x & 8 ? 1 : 0). The end | |
# result is that the input values are hashed without collision into the range | |
# [0, N) where N is the number of input values, without holes. This is | |
# particular useful for creating sparse maps using simple arrays in C. | |
# macro() expresses the name of a hash output | |
#sub macro { "INDEX($_[0])" } # wrapped in a macro | |
sub macro { shift } # bare | |
# indexer() expresses access to a particular element of $name | |
#sub indexer { "$name\[$_[0]\]" } # an array indexer | |
#sub indexer { "($name) & 1 << $_[0]" } # a bitmask indexer | |
# a clever bitmask indexer that seeks to minimize its length | |
sub indexer { | |
my $expr = "1 << $_[0]"; | |
my $len = length $expr; | |
my $val = eval $expr; | |
"$name & " . ($len < length $val ? $expr : $val) | |
} | |
# Takes a list of input numbers; returns a structure as described above | |
sub build_structure { | |
my ($i, $bits, @nums) = @_; | |
my @masked = map { | |
my $mask = 1 << $_; | |
[ [ grep { !($mask & $_) } @nums ], [ grep { ($mask & $_) } @nums ] ] | |
} 0 .. $bits - 1; | |
my @spread = map { | |
my $mask = 1 << $_; | |
[ $_ => [ (scalar @{ $masked[$_][0] }), (scalar @{ $masked[$_][1] }) ] ] | |
} 0 .. $bits - 1; | |
my @interesting = sort { | |
abs($a->[1][0] - $a->[1][1]) - abs($b->[1][0] - $b->[1][1]) | |
} @spread; | |
my ($bitpos, $split) = @{ $interesting[0] }; | |
my ($left, $right); | |
($i, $left ) = $split->[0] > 1 | |
? build_structure($i, $bits, @{ $masked[$bitpos][0] }) | |
: ($i + 1, macro($i)); | |
($i, $right) = $split->[1] > 1 | |
? build_structure($i, $bits, @{ $masked[$bitpos][1] }) | |
: ($i + 1, macro($i)); | |
my $outer = [ $bitpos, $left, $right ]; | |
return ($i => $outer); | |
} | |
# Takes a structure built by build_structure(); returns a ternary | |
# expression as a string, using $name | |
sub build_expr { | |
my ($struct) = @_; | |
my ($bitpos, $left, $right) = @$struct; | |
my $leftstr = ref $left ? build_expr($left ) : $left; | |
my $rightstr = ref $right ? build_expr($right) : $right; | |
# "right" and "left" are swapped here because truth comes before | |
# falsehood in a ternary expression, but afterward in the original | |
# structure layout | |
return indexer($bitpos) . " ? $rightstr : $leftstr"; | |
} | |
sub get_expr { | |
my (@lines) = @_; | |
my $bits = int(1 + log(max @lines) / log(2)); | |
my ($index, $struct) = build_structure(0, $bits, @lines); | |
(my $string = build_expr($struct)) =~ s/\s+//g; | |
return $string; | |
} | |
# If called as a script, print out the expression generated from the lines of | |
# standard input. Otherwise, don't do anything until called. | |
if (!caller) { | |
my @lines = map { chomp; $_ } <>; | |
print get_expr(@lines), "\n"; | |
} | |
1; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment