Created
January 9, 2013 19:25
-
-
Save ruz/4496039 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use 5.014; | |
use strict; | |
use warnings; | |
use Marpa::R2; | |
use MarpaX::Repa::Lexer; | |
use MarpaX::Repa::Actions; | |
my %RE; | |
$RE{'WSP'} = qr{[ \t]}; | |
$RE{'NON-ASCII'} = qr/ | |
([\xC2-\xDF] | \xE0 [\xA0-\xBF] | \xED [\x80-\x9F]) [\x80-\xBF] | |
| ([\xE1-\xEC] | [\xEE-\xEF] | \xF0 [\x90-\xBF] | \xF4 [\x80-\x8F]) [\x80-\xBF]{2} | |
| ([\xF1-\xF3]) [\x80-\xBF]{3} | |
/x; | |
$RE{'TEXT-CHAR'} = qr/ | |
\\[\\n,] | |
| $RE{'WSP'} | |
| $RE{'NON-ASCII'} | |
| [\x21-\x2B\x2D-\x5B\x5D-\x7E] | |
/x; | |
$RE{'SAFE-CHAR'} = qr/ | |
[!\x23-\x2B\x2D-\x39\x3C-\x7E] | |
| $RE{'WSP'} | |
| $RE{'NON-ASCII'} | |
/x; | |
$RE{'QSAFE-CHAR'} = qr{[!\x23-\x7E]|$RE{'WSP'}|$RE{'NON-ASCII'}}; | |
my $lexer = MarpaX::Repa::Lexer->new( | |
debug => 1, | |
recognizer => recognizer(), | |
tokens => { | |
DOT => { match => '.', store => 'undef' }, | |
COLON => { match => ':', store => 'undef' }, | |
SEMICOLON => { match => ';', store => 'undef' }, | |
EQUAL => { match => '=', store => 'undef' }, | |
COMMA => { match => ',', store => 'undef' }, | |
BEGIN => { match => 'BEGIN:VCARD', store => 'undef' }, | |
END => { match => 'END:VCARD', store => 'undef' }, | |
VERSION => { | |
match => qr{VERSION:([1-4]\.[0-9]+)}, | |
store => sub { return \(split /:/, ${$_[1]})[1] }, | |
}, | |
CRLF => { match => qr{\x0D?\x0A}, store => 'undef' }, # loose | |
'A-D-D' => { match => qr{[A-Za-z0-9-]+}, store => 'scalar' }, | |
'text' => { match => qr{$RE{'TEXT-CHAR'}+}, store => 'scalar' }, | |
'safe' => { match => qr{$RE{'SAFE-CHAR'}+}, store => 'scalar' }, | |
'dquoted' => { match => qr{"$RE{'QSAFE-CHAR'}*"}, store => 'scalar' }, | |
}, | |
); | |
my $rec = $lexer->recognize(\*DATA); | |
$rec->end_input; | |
use Data::Dumper; | |
print Data::Dumper::Dumper( $rec->value ); | |
sub recognizer { | |
use Marpa::R2::Grammar; | |
my $grammar = Marpa::R2::Grammar->new( { | |
start => 'vCard', | |
actions => 'Parse::vCard::Actions::v4', | |
default_action => 'do_what_I_mean', | |
rules => [ | |
[ vCard => [qw/BEGIN CRLF VERSION CRLF content END CRLF/], 'do_vcard' ], | |
{ lhs => 'content', rhs => [qw/content-line/], min => 1, action => 'do_list' }, | |
[ 'content-line' => [qw/content-name params COLON value CRLF/], 'do_content_line' ], | |
[ 'content-name' => [qw/name/], 'do_content_line_name' ], | |
[ 'content-name' => [qw/group DOT name/], 'do_content_line_name' ], | |
[ group => [qw/A-D-D/] ], | |
[ name => [qw/A-D-D/] ], | |
[ params => [], 'do_ignore' ], | |
[ params => [qw/SEMICOLON param-list/], 'do_scalar_or_list' ], | |
{ | |
lhs => 'param-list', rhs => [qw/param/], | |
min => 1, separator => 'SEMICOLON', proper => 1, | |
action => 'do_flat_to_list', | |
}, | |
[ param => [qw/any-param/], 'do_scalar_or_list' ], | |
[ 'any-param' => [qw/any-param-name EQUAL param-values/], 'do_scalar_or_list' ], | |
[ 'any-param-name' => [qw(iana-token)], 'do_scalar_or_list' ], | |
[ 'any-param-name' => [qw(x-name)], 'do_scalar_or_list' ], | |
[ 'any-param-name' => [qw(A-D-D)], 'do_scalar_or_list' ], | |
{ | |
lhs => 'param-values', rhs => [qw/param-value COMMA param-values/], | |
action => 'do_scalar_or_list', | |
}, | |
{ | |
lhs => 'param-values', rhs => [qw/param-value/], action => 'do_scalar_or_list', | |
}, | |
[ 'param-value' => [qw()], 'do_ignore' ], | |
[ 'param-value' => [qw(safe)], 'do_scalar_or_list' ], | |
[ 'param-value' => [qw(dquoted)], 'do_scalar_or_list' ], | |
[ value => [qw()] ], | |
[ value => [qw(text)], 'do_scalar_or_list' ], | |
[ value => [qw(value-list)], 'do_scalar_or_list' ], | |
[ value => [qw(boolean)], 'do_scalar_or_list' ], | |
[ value => [qw(URI)], 'do_scalar_or_list' ], | |
[ value => [qw(utc-offset)], 'do_scalar_or_list' ], | |
[ value => [qw(Language-Tag)], 'do_scalar_or_list' ], | |
[ value => [qw(iana-valuespec)], 'do_scalar_or_list' ], | |
{ | |
lhs => 'value-list', rhs => [qw(value-listable)], | |
min => 1, separator => 'COMMA', proper => 1, | |
}, | |
[ 'value-listable' => [qw(text)] ], | |
[ 'value-listable' => [qw(date)] ], | |
[ 'value-listable' => [qw(time)] ], | |
[ 'value-listable' => [qw(date-time)] ], | |
[ 'value-listable' => [qw(date-and-or-time)] ], | |
[ 'value-listable' => [qw(timestamp)] ], | |
[ 'value-listable' => [qw(integer)] ], | |
[ 'value-listable' => [qw(float)] ], | |
], | |
}); | |
$grammar->precompute; | |
use Marpa::R2::Recognizer; | |
return Marpa::R2::Recognizer->new( { grammar => $grammar } ); | |
} | |
package Parse::vCard::Actions::v4; | |
use MarpaX::Repa::Actions '-base'; | |
sub do_vcard { | |
shift; | |
@_ = grep defined, @_; | |
my @res = ( | |
{ name => 'VERSION', value => shift @_ }, | |
@{ shift @_ }, | |
); | |
foreach my $e ( @res ) { | |
delete $e->{$_} foreach grep !defined $e->{$_} || !length $e->{$_}, keys %$e; | |
} | |
return \@res; | |
} | |
sub do_content_line { | |
shift; | |
@_ = grep defined, @_; | |
return { %{shift @_}, params => shift, value => shift }; | |
} | |
sub do_content_line_name { | |
shift; | |
return { name => pop->{'value'}, group => (shift||{})->{'value'} }; | |
} | |
package main; | |
__DATA__ | |
BEGIN:VCARD | |
VERSION:4.0 | |
UID:urn:uuid:4fbe8971-0bc3-424c-9c26-36c3e1eff6b1 | |
FN:J. Doe | |
N:Doe;J.;;; | |
EMAIL;PID=1.1:[email protected] | |
EMAIL;PID=2.1:[email protected] | |
EMAIL;PID=2.2:[email protected] | |
TEL;PID=1.1;VALUE=uri:tel:+1-555-555-5555 | |
TEL;PID=2.1,2.2;VALUE=uri:tel:+1-666-666-6666 | |
CLIENTPIDMAP:1;urn:uuid:53e374d9-337e-4727-8803-a1e9c14e0556 | |
CLIENTPIDMAP:2;urn:uuid:1f762d2b-03c4-4a83-9a03-75ff658a6eee | |
END:VCARD | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment