-
-
Save mishin/3ecd201730f983d6bc69 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use strict; | |
use warnings; | |
use feature ':5.10'; | |
use Marpa::XS; | |
use Data::Dumper; | |
# | |
# a simple CSS snippet for testing | |
# | |
my $text = q{ | |
@charset "ebcdic"; | |
// ignored comment -- not CSS compatible, however | |
@import blabla("asdf.css"); | |
div.class { | |
font-size: 13px; | |
font: "Helvetica Regular"; | |
} | |
/* processed comment */ | |
}; | |
my %symbol_for_char = ( | |
'{' => 'OPEN_CURLY', '}' => 'CLOSE_CURLY', | |
'(' => 'OPEN_PAREN', ')' => 'CLOSE_PAREN', | |
'[' => 'OPEN_BRACKET', ']' => 'CLOSE_BRACKET', | |
';' => 'SEMICOLON', ':' => 'COLON', | |
',' => 'COMMA', '.' => 'DOT', | |
'*' => 'SPLASH', '#' => 'HASH', | |
'/' => 'SLASH', '>' => 'GT', | |
'+' => 'PLUS', | |
); | |
# | |
# Phase 1 -- sequentially scan terminals | |
# | |
my @tokens; | |
sub t { | |
push @tokens, [ @_ ]; | |
# must return empty string, because we might be inside a replacement | |
return ''; | |
} | |
while (length($text)) { | |
no warnings; | |
$text =~ s{\A \s* // .*? ^}{}xms and next; | |
$text =~ s{\A \s* (/\* .*? \*/) \s*}{ t COMMENT => $1 }exms and next; | |
$text =~ s{\A \s* \@([a-z]+) \s*}{ t "AT_\U$1" }exms and next; | |
$text =~ s{\A \s* !important \s*}{ t 'IMPORTANT' }exms and next; | |
$text =~ s{\A \s* (\#[0-9a-fA-F]{3}(?:[0-9a-fA-F]{3})?) \s*}{ t HEXCOLOR => $1 }exms and next; | |
$text =~ s{\A \s* (['"]) ((?:[^\\\1] | \\.)*?) \1 \s*}{ t STRING => $2 }exms and next; | |
$text =~ s{\A \s* ([~|]?=) \s*}{ t ATTR_CMP => $1 }exms and next; | |
$text =~ s{\A \s+ }{ t 'SPACE' }exms and next; | |
$text =~ s{\A \s* (-?[_a-zA-Z][-_a-zA-Z0-9]*)}{t IDENT => $1}exms and next; | |
$text =~ s{\A \s* (\.\d+ | \d+ (?:\.\d*)?) \s* (%|em|ex|px|cm|mm|pt|pc|deg|rad|grad|ms|s|hz|khz)? \s*}{ t NUMBER => "$1$2" }exms and next; | |
my $char = substr($text,0,1,''); | |
if (exists($symbol_for_char{$char})) { | |
t $symbol_for_char{$char}; | |
} else { | |
t CHAR => $char; | |
} | |
} | |
# | |
# phase 2 -- define grammar | |
# | |
my $grammar = Marpa::XS::Grammar->new({ | |
start => 'css', | |
actions => 'main', | |
default_action => 'do_default', | |
terminals => [qw( | |
OPEN_CURLY CLOSE_CURLY OPEN_PAREN CLOSE_PAREN OPEN_BRACKET CLOSE_BRACKET | |
SPACE SEMICOLON COLON COMMA SPLASH DOT HASH ATTR_CMP GT PLUS | |
STRING IDENT HEXCOLOR NUMBER | |
AT_CHARSET AT_IMPORT AT_MEDIA AT_PAGE IMPORTANT | |
)], | |
rules => [ | |
{ | |
lhs => 'css', | |
rhs => [qw(optional_space charset | |
optional_space imports | |
optional_space css_content | |
optional_space)], | |
}, | |
{ lhs => 'charset' }, | |
{ lhs => 'charset', rhs => [qw(AT_CHARSET STRING SEMICOLON)] }, | |
{ lhs => 'imports' }, | |
{ | |
lhs => 'imports', | |
rhs => [qw(AT_IMPORT expression SEMICOLON)], | |
}, | |
{ lhs => 'css_content', rhs => ['ruleset_media_page'], min => 0 }, | |
{ lhs => 'ruleset_media_page', rhs => [qw(ruleset)], }, | |
{ lhs => 'ruleset_media_page', rhs => [qw(media)] }, | |
{ lhs => 'ruleset_media_page', rhs => [qw(page)] }, | |
{ | |
lhs => 'ruleset', | |
rhs => [qw(optional_space selectors | |
optional_space OPEN_CURLY | |
optional_space declarations | |
optional_space CLOSE_CURLY)], | |
}, | |
{ lhs => 'declarations', rhs => [qw(declaration optional_space SEMICOLON declarations)], }, | |
{ lhs => 'declarations', rhs => [qw(declaration)], min => 0 }, | |
{ | |
lhs => 'declaration', | |
rhs => [qw(optional_space property | |
optional_space COLON | |
optional_space expression | |
optional_space prio)] }, | |
{ lhs => 'prio', rhs => [qw(IMPORTANT)] }, | |
{ lhs => 'prio' }, | |
{ lhs => 'selectors', rhs => [qw(selector COMMA selectors)] }, | |
{ lhs => 'selectors', rhs => [qw(selector)] }, | |
{ lhs => 'selector', rhs => [qw(element specializers)] }, | |
{ lhs => 'selector', rhs => [qw(specializer)], min => 1 }, | |
{ lhs => 'selector', rhs => [qw(selector divider selector)] }, | |
{ lhs => 'element', rhs => [qw(IDENT)] }, | |
{ lhs => 'element', rhs => [qw(SPLASH)] }, | |
{ lhs => 'divider', rhs => [qw(SPACE)] }, | |
{ lhs => 'divider', rhs => [qw(GT)] }, | |
{ lhs => 'divider', rhs => [qw(PLUS)] }, | |
{ lhs => 'specializers', rhs => [qw(specializer)], min => 0 }, | |
{ lhs => 'specializer', rhs => [qw(DOT IDENT)] }, | |
{ lhs => 'specializer', rhs => [qw(HASH IDENT)] }, | |
{ lhs => 'specializer', rhs => [qw(COLON ident_or_function)] }, | |
{ lhs => 'specializer', rhs => [qw(OPEN_BRACKET attribute CLOSE_BRACKET)] }, | |
{ lhs => 'attribute', rhs => [qw(IDENT)] }, | |
{ lhs => 'attribute', rhs => [qw(IDENT ATTR_CMP ident_or_string)] }, | |
{ lhs => 'property', rhs => [qw(expression)] }, | |
{ lhs => 'media', rhs => [qw(AT_MEDIA idents OPEN_CURLY ruleset CLOSE_CURLY)] }, | |
{ lhs => 'page', rhs => [qw(AT_PAGE pseudo_page OPEN_CURLY declarations CLOSE_CURLY)] }, | |
{ lhs => 'pseudo_page', rhs => [qw(COLON idents)] }, | |
{ lhs => 'pseudo_page' }, | |
{ lhs => 'expression', rhs => ['STRING'] }, | |
{ lhs => 'expression', rhs => ['NUMBER'] }, | |
{ lhs => 'expression', rhs => ['IDENT'] }, | |
{ lhs => 'expression', rhs => ['HEXCOLOR'] }, | |
{ lhs => 'expression', rhs => ['function_call'] }, | |
{ lhs => 'expression', rhs => [qw(OPEN_PAREN expression CLOSE_PAREN)] }, | |
{ lhs => 'ident_or_string', rhs => [qw(IDENT)] }, | |
{ lhs => 'ident_or_string', rhs => [qw(STRING)] }, | |
{ lhs => 'ident_or_function', rhs => [qw(IDENT)] }, | |
{ lhs => 'ident_or_function', rhs => [qw(function_call)] }, | |
{ lhs => 'idents', rhs => [qw(IDENT)] }, | |
{ lhs => 'idents', rhs => [qw(IDENT COMMA idents)] }, | |
{ lhs => 'function_call', rhs => [qw(IDENT OPEN_PAREN expression CLOSE_PAREN)] }, | |
{ lhs => 'optional_space', rhs => [qw(SPACE)], min => 0 }, | |
], | |
}); | |
$grammar->precompute; | |
# | |
# phase 3 -- parse our tokens | |
# | |
my $rec = Marpa::XS::Recognizer->new( { grammar => $grammar } ); | |
foreach my $token (@tokens) { | |
if ($token->[0] eq 'COMMENT') { | |
# process comments in a different way | |
} elsif (defined $rec->read( @$token )) { | |
say "reading Token: @$token"; | |
} else { | |
die "Error reading Token: @$token"; | |
}; | |
} | |
say Data::Dumper->Dump([$rec->value], ['value']); | |
# | |
# some actions for testing | |
# | |
sub charset { | |
say Data::Dumper->Dump([ [@_] ],[ 'Charset' ]); | |
return; | |
} | |
sub imports { | |
say Data::Dumper->Dump([ [@_] ],[ 'Imports' ]); | |
return; | |
} | |
sub function_call { | |
say Data::Dumper->Dump([ [@_] ],[ 'Function_Call' ]); | |
return "$_[1]($_[3])"; | |
} | |
sub expression { | |
say Data::Dumper->Dump([ [@_] ],[ 'Expression' ]); | |
return $_[1]; | |
} | |
# default action if specific action is not defined | |
sub do_default { | |
say Data::Dumper->Dump([ [@_] ],[ 'default' ]); | |
return; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment