Skip to content

Instantly share code, notes, and snippets.

@arodland
Created January 19, 2015 04:43
Show Gist options
  • Save arodland/cefdd4212e043f45fa2f to your computer and use it in GitHub Desktop.
Save arodland/cefdd4212e043f45fa2f to your computer and use it in GitHub Desktop.
Working parser for a simple length-prefixed language
#!/usr/bin/perl
use strict;
use warnings;
use Marpa::R2;
use Data::Dumper;
# array: 'A' count ':' element+
# string: 'S' length ':' byte+
# element: array | string
my $grammar = Marpa::R2::Scanless::G->new({
source => \q{
:default ::= action => [values]
:start ::= document
document ::= array | string
array ::= 'A' <array count> ':' elements <end of array> action => array
elements ::= element+
element ::= string action => ::first
element ::= array action => ::first
event 'predicted element' = predicted element
event 'completed element' = completed element
string ::= 'S' <string length> ':' bytes action => string
<array count> ~ [\d]+
:lexeme ~ <array count> pause => after
<string length> ~ [\d]+
:lexeme ~ <string length> pause => after
bytes ~ [\x00-\xff]
:lexeme ~ bytes pause => before
<end of array> ~ [^\d\D]
},
});
my $slr = Marpa::R2::Scanless::R->new({
grammar => $grammar,
semantics_package => 'main',
});
sub string {
return $_[4];
}
sub array {
return $_[4];
}
my $input = 'A2:A2:S5:helloS5:worldS1:!';
# This one will cause a parse error, as it should
# (3 elements in a 2-element array)
# my $input = 'A2:S5:helloS5:worldS5:extra';
my ($string_length, @array_count);
INPUT: for(
my $pos = $slr->read( \$input );
$pos < length($input);
$pos = $slr->resume($pos)
) {
my $lexeme = $slr->pause_lexeme;
if ($lexeme) {
my ($lexeme_start, $lexeme_length) = $slr->pause_span;
# warn "Paused at $lexeme\n";
if ($lexeme eq 'string length') {
$string_length = 0 + $slr->literal($lexeme_start, $lexeme_length);
$pos = $lexeme_start + $lexeme_length;
warn "Set string length to $string_length\n";
} elsif ($lexeme eq 'bytes') {
warn "Reading $string_length bytes\n";
$slr->lexeme_read('bytes', $lexeme_start, $string_length);
$pos = $lexeme_start + $string_length;
undef $string_length;
} elsif ($lexeme eq 'array count') {
my $count = 0 + $slr->literal($lexeme_start, $lexeme_length);
push @array_count, $count;
warn "Beginning array of $count elements\n";
$pos = $lexeme_start + $lexeme_length;
}
}
EVENTS: {
my %events;
$events{ $_->[0] } ++ for @{ $slr->events };
if ($events{'completed element'}) {
$array_count[-1] --;
warn "got an element, $array_count[-1] left\n";
}
if ($events{'predicted element'}) {
if ($array_count[-1] == 0) {
warn "array complete\n";
pop @array_count;
$slr->lexeme_read('end of array', $slr->pos, 0);
redo EVENTS; # Finishing the array might have caused another "completed element"
}
}
}
}
my $val = $slr->value;
if ($$val) {
print Dumper($$val);
} else {
print "Error";
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment