kulp · March 30, 2010 22:54
diff --git a/tree_macro.pl b/tree_macro.pl
 #!/usr/bin/env perl
 use strict;
 use List::Util qw(max);

 my $name = "b";

 # for each significant bit position, where the sequence through the bit
 # positions is chosen such that each bit chosen divides the remaining
 # search space as nearly evenly as possible, construct an array that
 # looks like this:
 #
 # [ B, [ L, R ] ]
 #
 # where     B is the number of the current bitposition
 #           L is either a number or another array like this one
 #           R is defined similarly to but independently from L
 #
 # where L and R are constructed recursively. L is the "left" side of the
 # resulting tree structure from this node downward, and R is the "right"
 # side. The left side is the side that corresponds to bit position B in
 # the input string being zero; the right side corresponds to a one.
 #
 # For example, consider as input the list of integers (1, 5, 6, 9). These
 # numbers expressed in binary are
 #
 #   3210 (bit position)
 #   ____
 #   0001
 #   0101
 #   0110
 #   1001
 #
 # The bit-position that causes the most even split is position 2. The first
 # (unelaborated) level of the structure would look like this :
 #
 # [ 2, [ L, R ] ]
 #
 # Taking out position 2 leaves us with two lists :
 #
 #   310 (bit position)
 #   ___
 #   001 (from 1)
 #   101 (from 9)
 #   ---
 #   001 (from 5)
 #   010 (from 6)
 #
 # Now from the first list, we can divide into two lists based on bit position 3.
 # After we do so, there is no further differentiation (all remaining values are
 # equal in other bit positions), so the recursion ends. A similar operation
 # occurs for the second list, but in bit position 1. The structure looks like
 # this :
 #
 # [ 2, [
 #     [ 3, [ 1, 9 ] ]
 #     [ 1, [ 5, 6 ] ]
 # ] ]
 #
 # The resulting tree structure can now be converted into a set of nested
 # ternary expressions over the bits of an input number. In this case, the
 # ternary expression would be (x & 4 ? x & 1 ? 3 : 2 : x & 8 ? 1 : 0). The end
 # result is that the input values are hashed without collision into the range
 # [0, N) where N is the number of input values, without holes. This is
 # particular useful for creating sparse maps using simple arrays in C.

 # macro() expresses the name of a hash output
 #sub macro { "INDEX($_[0])" }    # wrapped in a macro
 sub macro { shift }             # bare

 # indexer() expresses access to a particular element of $name
 #sub indexer { "$name\[$_[0]\]" }        # an array indexer
 #sub indexer { "($name) & 1 << $_[0]" }    # a bitmask indexer
 # a clever bitmask indexer that seeks to minimize its length
 sub indexer {
    my $expr = "1 << $_[0]";
    my $len  = length $expr;
    my $val  = eval $expr;
    "$name & " . ($len < length $val ? $expr : $val)
 }

 # Takes a list of input numbers; returns a structure as described above
 sub build_structure {
    my ($i, $bits, @nums) = @_;

    my @masked = map {
        my $mask = 1 << $_;
        [ [ grep { !($mask & $_) } @nums ], [ grep { ($mask & $_) } @nums ] ]
    } 0 .. $bits - 1;

    my @spread = map {
        my $mask = 1 << $_;
        [ $_ => [ (scalar @{ $masked[$_][0] }), (scalar @{ $masked[$_][1] }) ] ]
    } 0 .. $bits - 1;

    my @interesting = sort {
        abs($a->[1][0] - $a->[1][1]) - abs($b->[1][0] - $b->[1][1])
    } @spread;
    my ($bitpos, $split) = @{ $interesting[0] };
    my ($left, $right);

    ($i, $left ) = $split->[0] > 1
        ? build_structure($i, $bits, @{ $masked[$bitpos][0] })
        : ($i + 1, macro($i));
    ($i, $right) = $split->[1] > 1
        ? build_structure($i, $bits, @{ $masked[$bitpos][1] })
        : ($i + 1, macro($i));

    my $outer = [ $bitpos, $left, $right ];
    return ($i => $outer);
 }

 # Takes a structure built by build_structure(); returns a ternary
 # expression as a string, using $name
 sub build_expr {
    my ($struct) = @_;
    my ($bitpos, $left, $right) = @$struct;
    my $leftstr  = ref $left  ? build_expr($left ) : $left;
    my $rightstr = ref $right ? build_expr($right) : $right;
    # "right" and "left" are swapped here because truth comes before
    # falsehood in a ternary expression, but afterward in the original
    # structure layout
    return indexer($bitpos) . " ? $rightstr : $leftstr";
 }

 sub get_expr {
    my (@lines) = @_;
    my $bits = int(1 + log(max @lines) / log(2));
    my ($index, $struct) = build_structure(0, $bits, @lines);
    (my $string = build_expr($struct)) =~ s/\s+//g;
    return $string;
 }

 # If called as a script, print out the expression generated from the lines of
 # standard input. Otherwise, don't do anything until called.
 if (!caller) {
    my @lines = map { chomp; $_ } <>;
    print get_expr(@lines), "\n";
 }

 1;
	#!/usr/bin/env perl
	use strict;
	use List::Util qw(max);

	my $name = "b";

	# for each significant bit position, where the sequence through the bit
	# positions is chosen such that each bit chosen divides the remaining
	# search space as nearly evenly as possible, construct an array that
	# looks like this:
	#
	# [ B, [ L, R ] ]
	#
	# where B is the number of the current bitposition
	# L is either a number or another array like this one
	# R is defined similarly to but independently from L
	#
	# where L and R are constructed recursively. L is the "left" side of the
	# resulting tree structure from this node downward, and R is the "right"
	# side. The left side is the side that corresponds to bit position B in
	# the input string being zero; the right side corresponds to a one.
	#
	# For example, consider as input the list of integers (1, 5, 6, 9). These
	# numbers expressed in binary are
	#
	# 3210 (bit position)
	# ____
	# 0001
	# 0101
	# 0110
	# 1001
	#
	# The bit-position that causes the most even split is position 2. The first
	# (unelaborated) level of the structure would look like this :
	#
	# [ 2, [ L, R ] ]
	#
	# Taking out position 2 leaves us with two lists :
	#
	# 310 (bit position)
	# ___
	# 001 (from 1)
	# 101 (from 9)
	# ---
	# 001 (from 5)
	# 010 (from 6)
	#
	# Now from the first list, we can divide into two lists based on bit position 3.
	# After we do so, there is no further differentiation (all remaining values are
	# equal in other bit positions), so the recursion ends. A similar operation
	# occurs for the second list, but in bit position 1. The structure looks like
	# this :
	#
	# [ 2, [
	# [ 3, [ 1, 9 ] ]
	# [ 1, [ 5, 6 ] ]
	# ] ]
	#
	# The resulting tree structure can now be converted into a set of nested
	# ternary expressions over the bits of an input number. In this case, the
	# ternary expression would be (x & 4 ? x & 1 ? 3 : 2 : x & 8 ? 1 : 0). The end
	# result is that the input values are hashed without collision into the range
	# [0, N) where N is the number of input values, without holes. This is
	# particular useful for creating sparse maps using simple arrays in C.

	# macro() expresses the name of a hash output
	#sub macro { "INDEX($_[0])" } # wrapped in a macro
	sub macro { shift } # bare

	# indexer() expresses access to a particular element of $name
	#sub indexer { "$name\[$_[0]\]" } # an array indexer
	#sub indexer { "($name) & 1 << $_[0]" } # a bitmask indexer
	# a clever bitmask indexer that seeks to minimize its length
	sub indexer {
	my $expr = "1 << $_[0]";
	my $len = length $expr;
	my $val = eval $expr;
	"$name & " . ($len < length $val ? $expr : $val)
	}

	# Takes a list of input numbers; returns a structure as described above
	sub build_structure {
	my ($i, $bits, @nums) = @_;

	my @masked = map {
	my $mask = 1 << $_;
	[ [ grep { !($mask & $_) } @nums ], [ grep { ($mask & $_) } @nums ] ]
	} 0 .. $bits - 1;

	my @spread = map {
	my $mask = 1 << $_;
	[ $_ => [ (scalar @{ $masked[$_][0] }), (scalar @{ $masked[$_][1] }) ] ]
	} 0 .. $bits - 1;

	my @interesting = sort {
	abs($a->[1][0] - $a->[1][1]) - abs($b->[1][0] - $b->[1][1])
	} @spread;
	my ($bitpos, $split) = @{ $interesting[0] };
	my ($left, $right);

	($i, $left ) = $split->[0] > 1
	? build_structure($i, $bits, @{ $masked[$bitpos][0] })
	: ($i + 1, macro($i));
	($i, $right) = $split->[1] > 1
	? build_structure($i, $bits, @{ $masked[$bitpos][1] })
	: ($i + 1, macro($i));

	my $outer = [ $bitpos, $left, $right ];
	return ($i => $outer);
	}

	# Takes a structure built by build_structure(); returns a ternary
	# expression as a string, using $name
	sub build_expr {
	my ($struct) = @_;
	my ($bitpos, $left, $right) = @$struct;
	my $leftstr = ref $left ? build_expr($left ) : $left;
	my $rightstr = ref $right ? build_expr($right) : $right;
	# "right" and "left" are swapped here because truth comes before
	# falsehood in a ternary expression, but afterward in the original
	# structure layout
	return indexer($bitpos) . " ? $rightstr : $leftstr";
	}

	sub get_expr {
	my (@lines) = @_;
	my $bits = int(1 + log(max @lines) / log(2));
	my ($index, $struct) = build_structure(0, $bits, @lines);
	(my $string = build_expr($struct)) =~ s/\s+//g;
	return $string;
	}

	# If called as a script, print out the expression generated from the lines of
	# standard input. Otherwise, don't do anything until called.
	if (!caller) {
	my @lines = map { chomp; $_ } <>;
	print get_expr(@lines), "\n";
	}

	1;