Created
January 19, 2015 04:43
-
-
Save arodland/cefdd4212e043f45fa2f to your computer and use it in GitHub Desktop.
Working parser for a simple length-prefixed language
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
use strict; | |
use warnings; | |
use Marpa::R2; | |
use Data::Dumper; | |
# array: 'A' count ':' element+ | |
# string: 'S' length ':' byte+ | |
# element: array | string | |
my $grammar = Marpa::R2::Scanless::G->new({ | |
source => \q{ | |
:default ::= action => [values] | |
:start ::= document | |
document ::= array | string | |
array ::= 'A' <array count> ':' elements <end of array> action => array | |
elements ::= element+ | |
element ::= string action => ::first | |
element ::= array action => ::first | |
event 'predicted element' = predicted element | |
event 'completed element' = completed element | |
string ::= 'S' <string length> ':' bytes action => string | |
<array count> ~ [\d]+ | |
:lexeme ~ <array count> pause => after | |
<string length> ~ [\d]+ | |
:lexeme ~ <string length> pause => after | |
bytes ~ [\x00-\xff] | |
:lexeme ~ bytes pause => before | |
<end of array> ~ [^\d\D] | |
}, | |
}); | |
my $slr = Marpa::R2::Scanless::R->new({ | |
grammar => $grammar, | |
semantics_package => 'main', | |
}); | |
sub string { | |
return $_[4]; | |
} | |
sub array { | |
return $_[4]; | |
} | |
my $input = 'A2:A2:S5:helloS5:worldS1:!'; | |
# This one will cause a parse error, as it should | |
# (3 elements in a 2-element array) | |
# my $input = 'A2:S5:helloS5:worldS5:extra'; | |
my ($string_length, @array_count); | |
INPUT: for( | |
my $pos = $slr->read( \$input ); | |
$pos < length($input); | |
$pos = $slr->resume($pos) | |
) { | |
my $lexeme = $slr->pause_lexeme; | |
if ($lexeme) { | |
my ($lexeme_start, $lexeme_length) = $slr->pause_span; | |
# warn "Paused at $lexeme\n"; | |
if ($lexeme eq 'string length') { | |
$string_length = 0 + $slr->literal($lexeme_start, $lexeme_length); | |
$pos = $lexeme_start + $lexeme_length; | |
warn "Set string length to $string_length\n"; | |
} elsif ($lexeme eq 'bytes') { | |
warn "Reading $string_length bytes\n"; | |
$slr->lexeme_read('bytes', $lexeme_start, $string_length); | |
$pos = $lexeme_start + $string_length; | |
undef $string_length; | |
} elsif ($lexeme eq 'array count') { | |
my $count = 0 + $slr->literal($lexeme_start, $lexeme_length); | |
push @array_count, $count; | |
warn "Beginning array of $count elements\n"; | |
$pos = $lexeme_start + $lexeme_length; | |
} | |
} | |
EVENTS: { | |
my %events; | |
$events{ $_->[0] } ++ for @{ $slr->events }; | |
if ($events{'completed element'}) { | |
$array_count[-1] --; | |
warn "got an element, $array_count[-1] left\n"; | |
} | |
if ($events{'predicted element'}) { | |
if ($array_count[-1] == 0) { | |
warn "array complete\n"; | |
pop @array_count; | |
$slr->lexeme_read('end of array', $slr->pos, 0); | |
redo EVENTS; # Finishing the array might have caused another "completed element" | |
} | |
} | |
} | |
} | |
my $val = $slr->value; | |
if ($$val) { | |
print Dumper($$val); | |
} else { | |
print "Error"; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment