Created
January 6, 2014 21:46
-
-
Save ruz/8290356 to your computer and use it in GitHub Desktop.
Problem with marpa's scanless interface
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
rules L0: | |
G0 R0 'BEGIN:VCARD' ::= [B] [E] [G] [I] [N] [\:] [V] [C] [A] [R] [D] | |
G0 R1 'VERSION:4.0' ::= [V] [E] [R] [S] [I] [O] [N] [\:] [4] [\.] [0] | |
G0 R2 'END:VCARD' ::= [E] [N] [D] [\:] [V] [C] [A] [R] [D] | |
G0 R3 ':' ::= [\:] | |
G0 R4 '.' ::= [\.] | |
G0 R5 group ::= A_D_D | |
G0 R6 name ::= A_D_D | |
G0 R7 ';' ::= [\;] | |
G0 R8 '=' ::= [\=] | |
G0 R9 any_param_name ::= A_D_D | |
G0 R10 boolean ::= [T] [R] [U] [E] | |
G0 R11 boolean ::= [F] [A] [L] [S] [E] | |
G0 R12 integer ::= SIGNED_DIGITS | |
G0 R13 float ::= SIGNED_DIGITS | |
G0 R14 float ::= SIGNED_DIGITS [\.] DIGITS | |
G0 R15 utc_offset ::= SIGN DIGITx2 | |
G0 R16 utc_offset ::= SIGN DIGITx4 | |
G0 R17 URI ::= NOT_IMPLEMENTED | |
G0 R18 Language_Tag ::= NOT_IMPLEMENTED | |
G0 R19 iana_valuespec ::= NOT_IMPLEMENTED | |
G0 R20 date ::= NOT_IMPLEMENTED | |
G0 R21 date_time ::= NOT_IMPLEMENTED | |
G0 R22 date_and_or_time ::= NOT_IMPLEMENTED | |
G0 R23 time ::= NOT_IMPLEMENTED | |
G0 R24 timestamp ::= NOT_IMPLEMENTED | |
G0 R25 DIGITS ::= DIGIT + | |
G0 R26 DIGIT ::= [0-9] | |
G0 R27 DIGITx2 ::= DIGIT DIGIT | |
G0 R28 DIGITx4 ::= DIGIT DIGIT DIGIT DIGIT | |
G0 R29 SIGNED_DIGITS ::= DIGITS | |
G0 R30 SIGNED_DIGITS ::= SIGN DIGITS | |
G0 R31 SIGN ::= [\+] | |
G0 R32 SIGN ::= [\-] | |
G0 R33 NOT_IMPLEMENTED ::= [\x00] | |
G0 R34 A_D_D ::= [A-Za-z0-9-] + | |
G0 R35 text ::= TEXT_CHAR + | |
G0 R36 safe ::= SAFE_CHAR + | |
G0 R37 dquoted ::= [\"] [\"] | |
G0 R38 dquoted ::= [\"] QSAFE_CHARS [\"] | |
G0 R39 SEMICOLON ::= [\;] | |
G0 R40 COMMA ::= [\,] | |
G0 R41 TEXT_CHAR ::= [\\] [\\n,;:] | |
G0 R42 TEXT_CHAR ::= WSP | |
G0 R43 TEXT_CHAR ::= NON_ASCII | |
G0 R44 TEXT_CHAR ::= [\x21-\x2B\x2D-\x5B\x5D-\x7E] | |
G0 R45 SAFE_CHAR ::= [!\x23-\x2B\x2D-\x39\x3C-\x7E] | |
G0 R46 SAFE_CHAR ::= WSP | |
G0 R47 SAFE_CHAR ::= NON_ASCII | |
G0 R48 QSAFE_CHARS ::= QSAFE_CHAR + | |
G0 R49 QSAFE_CHAR ::= [!\x23-\x7E] | |
G0 R50 QSAFE_CHAR ::= WSP | |
G0 R51 QSAFE_CHAR ::= NON_ASCII | |
G0 R52 NON_ASCII ::= [\xC2-\xDF] [\x80-\xBF] | |
G0 R53 NON_ASCII ::= [\xE0] [\xA0-\xBF] [\x80-\xBF] | |
G0 R54 NON_ASCII ::= [\xED] [\x80-\x9F] [\x80-\xBF] | |
G0 R55 NON_ASCII ::= [\xE1-\xEC] [\x80-\xBF] [\x80-\xBF] | |
G0 R56 NON_ASCII ::= [\xEE-\xEF] [\x80-\xBF] [\x80-\xBF] | |
G0 R57 NON_ASCII ::= [\xF0] [\x90-\xBF] [\x80-\xBF] [\x80-\xBF] | |
G0 R58 NON_ASCII ::= [\xF4] [\x80-\x8F] [\x80-\xBF] [\x80-\xBF] | |
G0 R59 NON_ASCII ::= [\xF1-\xF3] [\x80-\xBF] [\x80-\xBF] [\x80-\xBF] | |
G0 R60 WSP ::= [ \t] | |
G0 R61 CRLF ::= [\x0D] [\x0A] | |
G0 R62 CRLF ::= [\x0A] | |
G0 R63 :start_lex ::= COMMA | |
G0 R64 :start_lex ::= CRLF | |
G0 R65 :start_lex ::= Language_Tag | |
G0 R66 :start_lex ::= SEMICOLON | |
G0 R67 :start_lex ::= URI | |
G0 R68 :start_lex ::= 'BEGIN:VCARD' | |
G0 R69 :start_lex ::= 'VERSION:4.0' | |
G0 R70 :start_lex ::= 'END:VCARD' | |
G0 R71 :start_lex ::= ':' | |
G0 R72 :start_lex ::= '.' | |
G0 R73 :start_lex ::= ';' | |
G0 R74 :start_lex ::= '=' | |
G0 R75 :start_lex ::= any_param_name | |
G0 R76 :start_lex ::= boolean | |
G0 R77 :start_lex ::= date | |
G0 R78 :start_lex ::= date_and_or_time | |
G0 R79 :start_lex ::= date_time | |
G0 R80 :start_lex ::= dquoted | |
G0 R81 :start_lex ::= float | |
G0 R82 :start_lex ::= group | |
G0 R83 :start_lex ::= iana_valuespec | |
G0 R84 :start_lex ::= integer | |
G0 R85 :start_lex ::= name | |
G0 R86 :start_lex ::= safe | |
G0 R87 :start_lex ::= text | |
G0 R88 :start_lex ::= time | |
G0 R89 :start_lex ::= timestamp | |
G0 R90 :start_lex ::= utc_offset | |
Setting trace_terminals option | |
Lexer "L0" rejected lexeme L1c1-11: text; value="BEGIN:VCARD" | |
Lexer "L0" accepted lexeme L1c1-11: 'BEGIN:VCARD'; value="BEGIN:VCARD" | |
Lexer "L0" accepted lexeme L1c12: CRLF; value=" | |
" | |
Lexer "L0" rejected lexeme L2c1-11: text; value="VERSION:4.0" | |
Lexer "L0" accepted lexeme L2c1-11: 'VERSION:4.0'; value="VERSION:4.0" | |
Lexer "L0" accepted lexeme L2c12: CRLF; value=" | |
" | |
Lexer "L0" rejected lexeme L3c1-49: text; value="UID:urn:uuid:4fbe8971-0bc3-424c-9c26-36c3e1eff6b1" | |
progress: | |
P0 @0-0 L1c1 vCards -> . vCard + | |
P1 @0-0 L1c1 vCard -> . 'BEGIN:VCARD' CRLF 'VERSION:4.0' CRLF content 'END:VCARD' | |
P33 @0-0 L1c1 :start -> . vCards | |
R1:1 @0-1 L1c1-11 vCard -> 'BEGIN:VCARD' . CRLF 'VERSION:4.0' CRLF content 'END:VCARD' | |
R1:2 @0-2 L1c1-12 vCard -> 'BEGIN:VCARD' CRLF . 'VERSION:4.0' CRLF content 'END:VCARD' | |
R1:3 @0-3 L1c1-L2c11 vCard -> 'BEGIN:VCARD' CRLF 'VERSION:4.0' . CRLF content 'END:VCARD' | |
R1:4 @0-4 L1c1-L2c12 vCard -> 'BEGIN:VCARD' CRLF 'VERSION:4.0' CRLF . content 'END:VCARD' | |
P2 @4-4 L2c12 content -> . content_line + | |
P3 @4-4 L2c12 content_line -> . content_name params ':' value CRLF | |
P4 @4-4 L2c12 content_name -> . name | |
P5 @4-4 L2c12 content_name -> . group '.' name | |
Error in SLIF parse: No lexemes accepted at line 3, column 1 | |
Lexer "L0" rejected 1 lexeme(s) | |
Rejected lexeme #1: text; value="UID:urn:uuid:4fbe8971-0bc3-424c-9c26-36c3e1eff6b1"; length = 49 | |
* String before error: BEGIN:VCARD\nVERSION:4.0\n | |
* The error was at line 3, column 1, and at character 0x0055 'U', ... | |
* here: UID:urn:uuid:4fbe8971-0bc3-424c-9c26-36c3e1eff6b1\n | |
Marpa::R2 exception at try-scanless.pl line 94. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use v5.10; | |
use strict; | |
use warnings; | |
use Marpa::R2; | |
my $syntax = <<'END'; | |
:default ::= action => ::first | |
:start ::= vCards | |
vCards ::= vCard+ separator => CRLF | |
vCard ::= 'BEGIN:VCARD' CRLF 'VERSION:4.0' CRLF content 'END:VCARD' | |
content ::= content_line+ | |
content_line ::= content_name params ':' value CRLF | |
content_name ::= name | group '.' name | |
group ~ A_D_D | |
name ~ A_D_D | |
params ::= ';' param_list | empty | |
param_list ::= param+ separator => SEMICOLON | |
param ::= any_param | |
any_param ::= any_param_name '=' param_values | |
any_param_name ~ A_D_D | |
param_values ::= param_value COMMA param_values | param_value | |
param_value ::= empty | safe | dquoted | |
value ::= text | value_list | boolean | URI | utc_offset | Language_Tag | iana_valuespec | |
value_list ::= value_listable+ separator => COMMA proper => 1 | |
value_listable ::= text | date | time | date_time | date_and_or_time | timestamp | integer | float | |
boolean ~ 'TRUE' | 'FALSE' | |
integer ~ SIGNED_DIGITS | |
float ~ SIGNED_DIGITS | SIGNED_DIGITS '.' DIGITS | |
utc_offset ~ SIGN DIGITx2 | SIGN DIGITx4 | |
URI ~ NOT_IMPLEMENTED | |
Language_Tag ~ NOT_IMPLEMENTED | |
iana_valuespec ~ NOT_IMPLEMENTED | |
date ~ NOT_IMPLEMENTED | |
date_time ~ NOT_IMPLEMENTED | |
date_and_or_time ~ NOT_IMPLEMENTED | |
time ~ NOT_IMPLEMENTED | |
timestamp ~ NOT_IMPLEMENTED | |
DIGITS ~ DIGIT+ | |
DIGIT ~ [0-9] | |
DIGITx2 ~ DIGIT DIGIT | |
DIGITx4 ~ DIGIT DIGIT DIGIT DIGIT | |
SIGNED_DIGITS ~ DIGITS | SIGN DIGITS | |
SIGN ~ '+' | '-' | |
NOT_IMPLEMENTED ~ [\x00] | |
empty ::= | |
A_D_D ~ [A-Za-z0-9-]+ | |
text ~ TEXT_CHAR+ | |
safe ~ SAFE_CHAR+ | |
dquoted ~ '""' | '"' QSAFE_CHARS '"' | |
SEMICOLON ~ ';' | |
COMMA ~ ',' | |
TEXT_CHAR ~ | |
[\\] [\\n,;:] | |
| WSP | |
| NON_ASCII | |
| [\x21-\x2B\x2D-\x5B\x5D-\x7E] | |
SAFE_CHAR ~ | |
[!\x23-\x2B\x2D-\x39\x3C-\x7E] | |
| WSP | |
| NON_ASCII | |
QSAFE_CHARS ~ QSAFE_CHAR+ | |
QSAFE_CHAR ~ [!\x23-\x7E] | WSP | NON_ASCII | |
NON_ASCII ~ | |
[\xC2-\xDF][\x80-\xBF] | |
| [\xE0] [\xA0-\xBF][\x80-\xBF] | |
| [\xED] [\x80-\x9F][\x80-\xBF] | |
| [\xE1-\xEC][\x80-\xBF][\x80-\xBF] | |
| [\xEE-\xEF][\x80-\xBF][\x80-\xBF] | |
| [\xF0] [\x90-\xBF][\x80-\xBF][\x80-\xBF] | |
| [\xF4] [\x80-\x8F][\x80-\xBF][\x80-\xBF] | |
| [\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF] | |
WSP ~ [ \t] | |
CRLF ~ [\x0D] [\x0A] | [\x0A] | |
END | |
my $grammar = Marpa::R2::Scanless::G->new( { source => \$syntax } ); | |
say "rules L0:\n", $grammar->show_rules(1, 'G0'); | |
my $recce = Marpa::R2::Scanless::R->new( | |
{ grammar => $grammar, semantics_package => 'Parse::vCard::Actions::v4', trace_terminals => 1 } ); | |
my $input = do { local $/; <DATA> }; | |
eval { $recce->read( \$input ); 1 } | |
or do { say "\nprogress:\n", $recce->show_progress( 0, -1 ); die $@ }; | |
my $value_ref = $recce->value; | |
my $value = $value_ref ? ${$value_ref} : 'No Parse'; | |
package Parse::vCard::Actions::v4; | |
package main; | |
__DATA__ | |
BEGIN:VCARD | |
VERSION:4.0 | |
UID:urn:uuid:4fbe8971-0bc3-424c-9c26-36c3e1eff6b1 | |
FN:J. Doe | |
N:Doe;J.;;; | |
EMAIL;PID=1.1:[email protected] | |
EMAIL;PID=2.1:[email protected] | |
EMAIL;PID=2.2:[email protected] | |
TEL;PID=1.1;VALUE=uri:tel:+1-555-555-5555 | |
TEL;PID=2.1,2.2;VALUE=uri:tel:+1-666-666-6666 | |
CLIENTPIDMAP:1;urn:uuid:53e374d9-337e-4727-8803-a1e9c14e0556 | |
CLIENTPIDMAP:2;urn:uuid:1f762d2b-03c4-4a83-9a03-75ff658a6eee | |
END:VCARD |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment