Created
July 11, 2017 08:27
-
-
Save bdw/ea18e04d86d38b3484387143985d5b41 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
use strict; | |
use warnings FATAL => 'all'; | |
use Data::Dumper; | |
my $tokenize = qr/ | |
\A | |
(?<open>\() | | |
(?<close>\)) | | |
(?<space>\s+) | | |
(?<comment>\#.+) | | |
(?<string>\".*?") | | |
(?<word>[^\s\(\)\#]+) | |
/x; | |
sub parser { | |
my ($class, $input) = @_; | |
return bless { | |
input => $input, | |
buffer => '', | |
token => undef, | |
match => undef, | |
macros => {}, | |
}, $class; | |
} | |
sub empty { | |
my $self = shift; | |
length($self->{buffer}) == 0 and eof($self->{input}); | |
} | |
sub current { | |
my $self = shift; | |
unless (length($self->{buffer}) or eof($self->{input})) { | |
$self->{buffer} = readline($self->{input}); | |
} | |
$self->{buffer}; | |
} | |
sub token { | |
my $self = shift; | |
my $line = $self->current; | |
# cache token | |
return @$self{'token','match'} if $self->{token}; | |
return unless length($line); | |
return unless $line =~ $tokenize; | |
@$self{'token','match'} = %+; | |
} | |
sub _shift { | |
my ($self) = @_; | |
confess "Can't shift" unless $self->{token}; | |
my $length = length($self->{match}); | |
@$self{'token','match'} = (undef,undef); | |
substr($self->{buffer}, 0, $length, ''); | |
} | |
sub expect { | |
my ($self, $expect) = @_; | |
my ($token, $match) = $self->token; | |
die "Got $token but expected $expect" unless $expect eq $token; | |
$self->_shift; | |
} | |
sub peek { | |
my ($self, $expect) = @_; | |
my ($token, $match) = $self->token or return; | |
return $match if $token eq $expect; | |
} | |
sub skip { | |
my ($self, @possible) = @_; | |
my %check = map { $_ => 1 } @possible; | |
while (my ($token, $match) = $self->token) { | |
last unless $check{$token}; | |
$self->_shift; | |
} | |
} | |
sub parse { | |
my $self = shift; | |
$self->skip('comment', 'space'); | |
return if $self->empty; | |
$self->expect('open'); | |
my @expr; | |
until ($self->peek('close')) { | |
die "Could not continue reading" if $self->empty; | |
my ($token, $what) = $self->token or | |
die "Could not read a token"; | |
if ($token eq 'word' or $token eq 'string') { | |
push @expr, $self->_shift; | |
} elsif ($token eq 'open') { | |
push @expr, $self->parse; | |
} else { | |
$self->_shift; | |
} | |
} | |
$self->_shift; | |
return \@expr; | |
} | |
sub test { | |
my $parser = __PACKAGE__->parser(\*DATA); | |
eval { | |
while (my $list = $parser->parse) { | |
print Dumper($list); | |
} | |
1; | |
} or do { | |
printf "Could not parse: %s\n", $@; | |
print Dumper($parser); | |
} | |
} | |
test unless caller(); | |
__DATA__ | |
# a comment | |
(+ 3 3) | |
(a list structure) | |
(a (nested (list))) | |
(a # commment | |
(within () # a list | |
but more! #data | |
) | |
) | |
() | |
(+) (-) | |
(foo: bar! (quix quam)) | |
(&foo ^bar :baz) ("Foo bar") | |
(foo # bar "baz") | |
"bla bla bla" "qoux") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment