Created
August 13, 2015 12:27
-
-
Save bdw/f727aa99aece2ff7554d to your computer and use it in GitHub Desktop.
ruleset-generation.pl
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
use Data::Dumper; | |
use strict; | |
use warnings; | |
use sexpr; | |
sub sortn { | |
sort { $a <=> $b } @_; | |
} | |
sub uniq { | |
my %h; | |
$h{$_}++ for @_; | |
return keys %h; | |
} | |
# Collect rules from the grammar | |
my (@rules, @names, @paths, @curpath); | |
sub add_rule { | |
my ($fragment, $terminal, $cost, @trace) = @_; | |
my $list = []; | |
# replace all sublist with pseudorules | |
for (my $i = 0; $i < @$fragment; $i++) { | |
my $item = $fragment->[$i]; | |
if (ref($item) eq 'ARRAY') { | |
# create pseudorule | |
my $label = sprintf('L%dP%d', scalar @rules, scalar @trace); | |
# divide costs | |
$cost /= 2; | |
add_rule($item, $label, $cost, @trace, $i); | |
push @$list, $label; | |
} else { | |
push @curpath, @trace, $i, -1 if $i > 0; | |
push @$list, $item; | |
} | |
} | |
push @curpath, @trace, -1 if @$fragment == 1 && @trace > 0; | |
# NB - only top-level fragments are associated with tiles. | |
my $rulenr = scalar @rules; | |
push @rules, [$list, $terminal, $cost]; | |
return $rulenr; | |
} | |
my $input = \*DATA; | |
my $parser = sexpr->parser($input); | |
while (my $tree = $parser->read) { | |
my ($keyword, $name, $fragment, $terminal, $cost) = @$tree; | |
if ($keyword eq 'tile:') { | |
@curpath = (); | |
my $rulenr = add_rule($fragment, $terminal, $cost); | |
$names[$rulenr] = $name; | |
$paths[$rulenr] = [@curpath, -1]; | |
} | |
} | |
close $input; | |
# initialize nonterminal sets | |
my (%nonterminal_sets, %trie); | |
$nonterminal_sets{$_->[1]} = [$_->[1]] for @rules; | |
my ($added, $deleted, $i); | |
# override hash-key-join character | |
local $; = ","; | |
do { | |
$i++; | |
# lookup table from nonterminals to nonterminalsetnames | |
my %lookup; | |
while (my ($k, $v) = each %nonterminal_sets) { | |
$lookup{$_}{$k} = 1 for @$v; | |
} | |
$lookup{$_} = [keys %{$lookup{$_}}] for keys %lookup; | |
# reinitialize trie | |
%trie = (); | |
# build it based on the terminal-to-terminalset map | |
for (my $rule_nr = 0; $rule_nr < @rules; $rule_nr++) { | |
my ($head, $n1, $n2) = @{$rules[$rule_nr][0]}; | |
if (defined $n2) { | |
for my $nt_k1 (@{$lookup{$n1}}) { | |
for my $nt_k2 (@{$lookup{$n2}}) { | |
$trie{$head, $nt_k1, $nt_k2}{$rule_nr} = $rules[$rule_nr][1]; | |
} | |
} | |
} elsif (defined $n1) { | |
for my $nt_k1 (@{$lookup{$n1}}) { | |
$trie{$head, $nt_k1, -1}{$rule_nr} = $rules[$rule_nr][1]; | |
} | |
} else { | |
$trie{$head,-1, -1}{$rule_nr} = $rules[$rule_nr][1]; | |
} | |
} | |
# generate new nonterminal-sets | |
my %new_nts; | |
for my $generated (values %trie) { | |
my @nt_set_gen = sort(uniq(values %$generated)); | |
my $nt_k = join(':', @nt_set_gen); | |
$new_nts{$nt_k} = [@nt_set_gen]; | |
} | |
# Calculate changes | |
$deleted = 0; | |
for my $k (keys %nonterminal_sets) { | |
$deleted++ unless exists $new_nts{$k}; | |
} | |
$added = scalar(keys %new_nts) - scalar(keys %nonterminal_sets) + $deleted; | |
print "Added $added and deleted $deleted\n"; | |
%nonterminal_sets = %new_nts; | |
} while ($added || $deleted); | |
print "Required $i iterations\n"; | |
# Rulesets can now be read off from the trie | |
my (@rulesets, %inversed); | |
for my $v (values %trie) { | |
my @rules = sortn(keys %$v); | |
my $name = join $;, @rules; | |
next if exists $inversed{$name}; | |
my $ruleset_nr = scalar @rulesets; | |
push @rulesets, [@rules]; | |
$inversed{$name} = $ruleset_nr; | |
} | |
# print them for me to see | |
for my $rs (@rulesets) { | |
my $key = join $;, @$rs; | |
print "$key: "; | |
my @expr = map { sexpr::encode($_) } map { $rules[$_][0] } @$rs; | |
print join("; ", @expr); | |
print "\n"; | |
} | |
__DATA__ | |
(tile: t1 (t) n 1) | |
(tile: s1 (s) n 1) | |
(tile: a1 (a n) n 1) | |
(tile: at1 (a (t)) n 1) | |
(tile: as1 (a (s)) n 1) | |
(tile: b1 (b n n) n 1) | |
(tile: bs1 (b n (s)) n 1) | |
(tile: bt1 (b n (t)) n 1) | |
(tile: bst1 (b (s) (t)) n 1) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package sexpr; | |
use strict; | |
# declare keyword syntax regex | |
my $keyword = qr/^[&\$^,]?[\w\.\[\]_\*]+[!:]?/; | |
sub parser { | |
my ($class, $input) = @_; | |
return bless { | |
input => $input, | |
buffer => '', | |
macros => {}, | |
}, $class; | |
} | |
sub read { | |
my $self = shift; | |
my $file = $self->{input}; | |
my $expr = $self->{buffer}; | |
my ($open, $close) = (0, 0); | |
while (!eof($file)) { | |
my $line = <$file>; | |
next if $line =~ m/^#|^\s*$/; | |
$expr .= $line; | |
$open = $expr =~ tr/(//; | |
$close = $expr =~ tr/)//; | |
last if ($open > 0) && ($open == $close); | |
} | |
die "End of input with unclosed template" if $open > $close; | |
my ($tree, $rest) = $self->parse($expr); | |
$self->{buffer} = $rest; | |
return $tree; | |
} | |
sub parse { | |
my ($self, $expr) = @_; | |
my $tree = []; | |
# consume initial opening parenthesis | |
return (undef, $expr) unless $expr =~ m/^\s*\(/; | |
$expr = substr($expr, $+[0]); | |
while ($expr) { | |
# remove initial space | |
$expr =~ s/^\s*//; | |
if (substr($expr, 0, 1) eq '(') { | |
# descend on opening parenthesis | |
my ($child, $rest) = $self->parse($expr); | |
$expr = $rest; | |
push @$tree, $child; | |
} elsif (substr($expr, 0, 1) eq ')') { | |
# ascend on closing parenthesis | |
$expr = substr $expr, 1; | |
last; | |
} elsif ($expr =~ m/$keyword/) { | |
# consume keyword | |
push @$tree, substr($expr, $-[0], $+[0] - $-[0]); | |
$expr = substr $expr, $+[0]; | |
} else { | |
die "Could not parse $expr"; | |
} | |
} | |
if (@$tree && substr($tree->[0], 0, 1) eq '^') { | |
if (defined $self->{macros}->{$tree->[0]}) { | |
$tree = apply_macro($self->{macros}->{$tree->[0]}, $tree); | |
} else { | |
die "Attempted to invoke undefined macro $tree->[0]"; | |
} | |
} | |
return ($tree, $expr); | |
} | |
sub decl_macro { | |
my ($self, $name, $macro) = @_; | |
die "Macro name '$name' must start with ^ symbol" unless substr($name,0,1) eq '^'; | |
die "Redeclaration of macro $name" if defined $self->{'macros'}->{$name}; | |
$self->{macros}->{$name} = $macro; | |
} | |
sub apply_macro { | |
my ($macro, $tree) = @_; | |
my $params = $macro->[0]; | |
my $args = [@$tree[1..$#$tree]]; | |
die "Incorrect number of args, got ".@$args." expected ".@$params unless @$args == @$params; | |
my %bind; | |
@bind{@$params} = @$args; | |
return fill_macro($macro->[1], \%bind); | |
} | |
sub fill_macro { | |
my ($macro, $bind) = @_; | |
my $result = []; | |
for (my $i = 0; $i < @$macro; $i++) { | |
if (ref($macro->[$i]) eq 'ARRAY') { | |
push @$result, fill_macro($macro->[$i], $bind); | |
} elsif (substr($macro->[$i], 0, 1) eq ',') { | |
if (defined $bind->{$macro->[$i]}) { | |
push @$result, $bind->{$macro->[$i]}; | |
} else { | |
die "Unmatched macro substitution: $macro->[$i]"; | |
} | |
} else { | |
push @$result, $macro->[$i]; | |
} | |
} | |
return $result; | |
} | |
sub encode { | |
my $list = shift; | |
my $out = '('; | |
for my $item (@$list) { | |
if (ref($item) eq 'ARRAY') { | |
$out .= encode($item); | |
} else { | |
$out .= "$item"; | |
} | |
$out .= " "; | |
} | |
$out .= ')'; | |
return $out; | |
} | |
1; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment