Skip to content

Instantly share code, notes, and snippets.

@ruz
Created January 9, 2013 19:25
Show Gist options
  • Save ruz/4496039 to your computer and use it in GitHub Desktop.
Save ruz/4496039 to your computer and use it in GitHub Desktop.
use 5.014;
use strict;
use warnings;
use Marpa::R2;
use MarpaX::Repa::Lexer;
use MarpaX::Repa::Actions;
my %RE;
$RE{'WSP'} = qr{[ \t]};
$RE{'NON-ASCII'} = qr/
([\xC2-\xDF] | \xE0 [\xA0-\xBF] | \xED [\x80-\x9F]) [\x80-\xBF]
| ([\xE1-\xEC] | [\xEE-\xEF] | \xF0 [\x90-\xBF] | \xF4 [\x80-\x8F]) [\x80-\xBF]{2}
| ([\xF1-\xF3]) [\x80-\xBF]{3}
/x;
$RE{'TEXT-CHAR'} = qr/
\\[\\n,]
| $RE{'WSP'}
| $RE{'NON-ASCII'}
| [\x21-\x2B\x2D-\x5B\x5D-\x7E]
/x;
$RE{'SAFE-CHAR'} = qr/
[!\x23-\x2B\x2D-\x39\x3C-\x7E]
| $RE{'WSP'}
| $RE{'NON-ASCII'}
/x;
$RE{'QSAFE-CHAR'} = qr{[!\x23-\x7E]|$RE{'WSP'}|$RE{'NON-ASCII'}};
my $lexer = MarpaX::Repa::Lexer->new(
debug => 1,
recognizer => recognizer(),
tokens => {
DOT => { match => '.', store => 'undef' },
COLON => { match => ':', store => 'undef' },
SEMICOLON => { match => ';', store => 'undef' },
EQUAL => { match => '=', store => 'undef' },
COMMA => { match => ',', store => 'undef' },
BEGIN => { match => 'BEGIN:VCARD', store => 'undef' },
END => { match => 'END:VCARD', store => 'undef' },
VERSION => {
match => qr{VERSION:([1-4]\.[0-9]+)},
store => sub { return \(split /:/, ${$_[1]})[1] },
},
CRLF => { match => qr{\x0D?\x0A}, store => 'undef' }, # loose
'A-D-D' => { match => qr{[A-Za-z0-9-]+}, store => 'scalar' },
'text' => { match => qr{$RE{'TEXT-CHAR'}+}, store => 'scalar' },
'safe' => { match => qr{$RE{'SAFE-CHAR'}+}, store => 'scalar' },
'dquoted' => { match => qr{"$RE{'QSAFE-CHAR'}*"}, store => 'scalar' },
},
);
my $rec = $lexer->recognize(\*DATA);
$rec->end_input;
use Data::Dumper;
print Data::Dumper::Dumper( $rec->value );
sub recognizer {
use Marpa::R2::Grammar;
my $grammar = Marpa::R2::Grammar->new( {
start => 'vCard',
actions => 'Parse::vCard::Actions::v4',
default_action => 'do_what_I_mean',
rules => [
[ vCard => [qw/BEGIN CRLF VERSION CRLF content END CRLF/], 'do_vcard' ],
{ lhs => 'content', rhs => [qw/content-line/], min => 1, action => 'do_list' },
[ 'content-line' => [qw/content-name params COLON value CRLF/], 'do_content_line' ],
[ 'content-name' => [qw/name/], 'do_content_line_name' ],
[ 'content-name' => [qw/group DOT name/], 'do_content_line_name' ],
[ group => [qw/A-D-D/] ],
[ name => [qw/A-D-D/] ],
[ params => [], 'do_ignore' ],
[ params => [qw/SEMICOLON param-list/], 'do_scalar_or_list' ],
{
lhs => 'param-list', rhs => [qw/param/],
min => 1, separator => 'SEMICOLON', proper => 1,
action => 'do_flat_to_list',
},
[ param => [qw/any-param/], 'do_scalar_or_list' ],
[ 'any-param' => [qw/any-param-name EQUAL param-values/], 'do_scalar_or_list' ],
[ 'any-param-name' => [qw(iana-token)], 'do_scalar_or_list' ],
[ 'any-param-name' => [qw(x-name)], 'do_scalar_or_list' ],
[ 'any-param-name' => [qw(A-D-D)], 'do_scalar_or_list' ],
{
lhs => 'param-values', rhs => [qw/param-value COMMA param-values/],
action => 'do_scalar_or_list',
},
{
lhs => 'param-values', rhs => [qw/param-value/], action => 'do_scalar_or_list',
},
[ 'param-value' => [qw()], 'do_ignore' ],
[ 'param-value' => [qw(safe)], 'do_scalar_or_list' ],
[ 'param-value' => [qw(dquoted)], 'do_scalar_or_list' ],
[ value => [qw()] ],
[ value => [qw(text)], 'do_scalar_or_list' ],
[ value => [qw(value-list)], 'do_scalar_or_list' ],
[ value => [qw(boolean)], 'do_scalar_or_list' ],
[ value => [qw(URI)], 'do_scalar_or_list' ],
[ value => [qw(utc-offset)], 'do_scalar_or_list' ],
[ value => [qw(Language-Tag)], 'do_scalar_or_list' ],
[ value => [qw(iana-valuespec)], 'do_scalar_or_list' ],
{
lhs => 'value-list', rhs => [qw(value-listable)],
min => 1, separator => 'COMMA', proper => 1,
},
[ 'value-listable' => [qw(text)] ],
[ 'value-listable' => [qw(date)] ],
[ 'value-listable' => [qw(time)] ],
[ 'value-listable' => [qw(date-time)] ],
[ 'value-listable' => [qw(date-and-or-time)] ],
[ 'value-listable' => [qw(timestamp)] ],
[ 'value-listable' => [qw(integer)] ],
[ 'value-listable' => [qw(float)] ],
],
});
$grammar->precompute;
use Marpa::R2::Recognizer;
return Marpa::R2::Recognizer->new( { grammar => $grammar } );
}
package Parse::vCard::Actions::v4;
use MarpaX::Repa::Actions '-base';
sub do_vcard {
shift;
@_ = grep defined, @_;
my @res = (
{ name => 'VERSION', value => shift @_ },
@{ shift @_ },
);
foreach my $e ( @res ) {
delete $e->{$_} foreach grep !defined $e->{$_} || !length $e->{$_}, keys %$e;
}
return \@res;
}
sub do_content_line {
shift;
@_ = grep defined, @_;
return { %{shift @_}, params => shift, value => shift };
}
sub do_content_line_name {
shift;
return { name => pop->{'value'}, group => (shift||{})->{'value'} };
}
package main;
__DATA__
BEGIN:VCARD
VERSION:4.0
UID:urn:uuid:4fbe8971-0bc3-424c-9c26-36c3e1eff6b1
FN:J. Doe
N:Doe;J.;;;
EMAIL;PID=1.1:[email protected]
EMAIL;PID=2.1:[email protected]
EMAIL;PID=2.2:[email protected]
TEL;PID=1.1;VALUE=uri:tel:+1-555-555-5555
TEL;PID=2.1,2.2;VALUE=uri:tel:+1-666-666-6666
CLIENTPIDMAP:1;urn:uuid:53e374d9-337e-4727-8803-a1e9c14e0556
CLIENTPIDMAP:2;urn:uuid:1f762d2b-03c4-4a83-9a03-75ff658a6eee
END:VCARD
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment