-
-
Save pstuifzand/4447349 to your computer and use it in GitHub Desktop.
| package MarpaX::JSON; | |
| use strict; | |
| use Marpa::R2 2.039_000; | |
| sub new { | |
| my ($class) = @_; | |
| my $self = bless {}, $class; | |
| $self->{grammar} = Marpa::R2::Scanless::G->new( | |
| { | |
| action_object => 'MarpaX::JSON::Actions', | |
| default_action => 'do_first_arg', | |
| source => \(<<'END_OF_SOURCE'), | |
| :start ::= json | |
| json ::= object | |
| | array | |
| object ::= '{' '}' action => do_empty_object | |
| | '{' members '}' action => do_object | |
| members ::= pair+ separator => <comma> action => do_list | |
| pair ::= string ':' value action => do_pair | |
| value ::= string | |
| | object | |
| | number | |
| | array | |
| | 'true' action => do_true | |
| | 'false' action => do_true | |
| | 'null' action => do_null | |
| array ::= '[' ']' action => do_empty_array | |
| | '[' elements ']' action => do_array | |
| elements ::= value+ separator => <comma> action => do_list | |
| number ~ int | |
| | int frac | |
| | int exp | |
| | int frac exp | |
| int ~ digits | |
| | '-' digits | |
| digits ~ [\d]+ | |
| frac ~ '.' digits | |
| exp ~ e digits | |
| e ~ 'e' | |
| | 'e+' | |
| | 'e-' | |
| | 'E' | |
| | 'E+' | |
| | 'E-' | |
| string ::= lstring action => do_string | |
| lstring ~ quote in_string quote | |
| quote ~ ["] | |
| in_string ~ in_string_char* | |
| in_string_char ~ [^"\\] | |
| | '\' '"' | |
| | '\' 'b' | |
| | '\' 'f' | |
| | '\' 't' | |
| | '\' 'n' | |
| | '\' 'r' | |
| | '\' 'u' four_hex_digits | |
| | '\' '/' | |
| | '\\' | |
| four_hex_digits ~ hex_digit hex_digit hex_digit hex_digit | |
| hex_digit ~ [0-9a-fA-F] | |
| comma ~ ',' | |
| :discard ~ whitespace | |
| whitespace ~ [\s]+ | |
| END_OF_SOURCE | |
| } | |
| ); | |
| return $self; | |
| } | |
| sub parse { | |
| my ($self, $string) = @_; | |
| my $re = Marpa::R2::Scanless::R->new( { grammar => $self->{grammar} } ); | |
| $re->read(\$string); | |
| my $value_ref = $re->value(); | |
| return ${$value_ref}; | |
| } | |
| sub parse_json { | |
| my ($string) = @_; | |
| my $parser = MarpaX::JSON->new(); | |
| return $parser->parse($string); | |
| } | |
| package MarpaX::JSON::Actions; | |
| use strict; | |
| sub new { | |
| my ($class) = @_; | |
| return bless {}, $class; | |
| } | |
| sub do_first_arg { | |
| shift; | |
| return $_[0]; | |
| } | |
| sub do_empty_object { | |
| return {}; | |
| } | |
| sub do_object { | |
| shift; | |
| return { map { @$_ } @{$_[1]} }; | |
| } | |
| sub do_empty_array { | |
| return []; | |
| } | |
| sub do_array { | |
| shift; | |
| return $_[1]; | |
| } | |
| sub do_list { | |
| shift; | |
| return \@_; | |
| } | |
| sub do_pair { | |
| shift; | |
| return [ $_[0], $_[2] ]; | |
| } | |
| sub do_string { | |
| shift; | |
| my $s = $_[0]; | |
| $s =~ s/^"//; | |
| $s =~ s/"$//; | |
| $s =~ s/\\u([0-9A-Fa-f]{4})/chr(hex($1))/eg; | |
| $s =~ s/\\n/\n/g; | |
| $s =~ s/\\r/\r/g; | |
| $s =~ s/\\b/\b/g; | |
| $s =~ s/\\f/\f/g; | |
| $s =~ s/\\t/\t/g; | |
| $s =~ s/\\\\/\\/g; | |
| $s =~ s{\\/}{/}g; | |
| $s =~ s{\\"}{"}g; | |
| return $s; | |
| } | |
| sub do_true { | |
| shift; | |
| return $_[0] eq 'true'; | |
| } | |
| sub do_null { | |
| return undef; | |
| } | |
| sub do_join { | |
| shift; | |
| return join '', @_; | |
| } | |
| 1; |
| use Test::More; | |
| use Test::Exception; | |
| use lib 'lib'; | |
| use MarpaX::JSON; | |
| my $data = MarpaX::JSON::parse_json(q${"test":"1"}$); | |
| is($data->{test}, 1); | |
| $data = MarpaX::JSON::parse_json(q${"test":[1,2,3]}$); | |
| is_deeply($data->{test}, [1,2,3]); | |
| $data = MarpaX::JSON::parse_json(q${"test":true}$); | |
| is($data->{test}, 1); | |
| $data = MarpaX::JSON::parse_json(q${"test":false}$); | |
| is($data->{test}, ''); | |
| $data = MarpaX::JSON::parse_json(q${"test":null}$); | |
| is($data->{test}, undef); | |
| $data = MarpaX::JSON::parse_json(q${"test":null, "test2":"hello world"}$); | |
| is($data->{test}, undef); | |
| is($data->{test2}, "hello world"); | |
| $data = MarpaX::JSON::parse_json(q${"test":"1.25"}$); | |
| is($data->{test}, '1.25'); | |
| $data = MarpaX::JSON::parse_json(q${"test":"1.25e4"}$); | |
| is($data->{test}, '1.25e4'); | |
| $data = MarpaX::JSON::parse_json(q$[]$); | |
| is_deeply($data, []); | |
| $data = MarpaX::JSON::parse_json(<<'JSON'); | |
| [ | |
| { | |
| "precision": "zip", | |
| "Latitude": 37.7668, | |
| "Longitude": -122.3959, | |
| "Address": "", | |
| "City": "SAN FRANCISCO", | |
| "State": "CA", | |
| "Zip": "94107", | |
| "Country": "US" | |
| }, | |
| { | |
| "precision": "zip", | |
| "Latitude": 37.371991, | |
| "Longitude": -122.026020, | |
| "Address": "", | |
| "City": "SUNNYVALE", | |
| "State": "CA", | |
| "Zip": "94085", | |
| "Country": "US" | |
| } | |
| ] | |
| JSON | |
| is_deeply($data, [ | |
| { "precision"=>"zip", Latitude => "37.7668", Longitude=>"-122.3959", | |
| "Country" => "US", Zip => 94107, Address => '', | |
| City => "SAN FRANCISCO", State => 'CA' }, | |
| { "precision" => "zip", Longitude => "-122.026020", Address => "", | |
| City => "SUNNYVALE", Country => "US", Latitude => "37.371991", | |
| Zip => 94085, State => "CA" } | |
| ]); | |
| $data = MarpaX::JSON::parse_json(<<'JSON'); | |
| { | |
| "Image": { | |
| "Width": 800, | |
| "Height": 600, | |
| "Title": "View from 15th Floor", | |
| "Thumbnail": { | |
| "Url": "http://www.example.com/image/481989943", | |
| "Height": 125, | |
| "Width": "100" | |
| }, | |
| "IDs": [116, 943, 234, 38793] | |
| } | |
| } | |
| JSON | |
| is_deeply($data, { | |
| "Image" => { | |
| "Width" => 800, "Height" => 600, | |
| "Title" => "View from 15th Floor", | |
| "Thumbnail" => { | |
| "Url" => "http://www.example.com/image/481989943", | |
| "Height" => 125, | |
| "Width" => 100, | |
| }, | |
| "IDs" => [ 116, 943, 234, 38793 ], | |
| } | |
| }); | |
| $data = MarpaX::JSON::parse_json(<<'JSON'); | |
| { | |
| "source" : "<a href=\"http://janetter.net/\" rel=\"nofollow\">Janetter</a>", | |
| "entities" : { | |
| "user_mentions" : [ { | |
| "name" : "James Governor", | |
| "screen_name" : "moankchips", | |
| "indices" : [ 0, 10 ], | |
| "id_str" : "61233", | |
| "id" : 61233 | |
| } ], | |
| "media" : [ ], | |
| "hashtags" : [ ], | |
| "urls" : [ ] | |
| }, | |
| "in_reply_to_status_id_str" : "281400879465238529", | |
| "geo" : { | |
| }, | |
| "id_str" : "281405942321532929", | |
| "in_reply_to_user_id" : 61233, | |
| "text" : "@monkchips Ouch. Some regrets are harsher than others.", | |
| "id" : 281405942321532929, | |
| "in_reply_to_status_id" : 281400879465238529, | |
| "created_at" : "Wed Dec 19 14:29:39 +0000 2012", | |
| "in_reply_to_screen_name" : "monkchips", | |
| "in_reply_to_user_id_str" : "61233", | |
| "user" : { | |
| "name" : "Sarah Bourne", | |
| "screen_name" : "sarahebourne", | |
| "protected" : false, | |
| "id_str" : "16010789", | |
| "profile_image_url_https" : "https://si0.twimg.com/profile_images/638441870/Snapshot-of-sb_normal.jpg", | |
| "id" : 16010789, | |
| "verified" : false | |
| } | |
| } | |
| JSON | |
| $data = MarpaX::JSON::parse_json(<<'JSON'); | |
| { "test": "\u2603" } | |
| JSON | |
| is($data->{test}, "\x{2603}"); | |
| dies_ok { | |
| $data = MarpaX::JSON::parse_json(<<'JSON'); | |
| { "test": "éáóüöï" } | |
| JSON | |
| }, 'marpa scanless doesn\'t understand higher than 8-bit codepoints yet'; | |
| done_testing(); |
Also, since it needs 2.039_000 to work, it is probably best to change
use Marpa::R2;
to
use Marpa::R2 2.039_000;
A structural rule with a join is a bit of a red flag. I'll change it.
@uberbaud Thanks. And I guess you're totally right about being used in production.
~ 20-30x slower than JSON::PP for simple JSON strings. Not bad for a half-hour work :)
BTW, the parser doesn't seem to report errors (it just returns undef) for cases like: "[", "[[", "{", q/"a/.
I've done some work on this, which is in my fork of this gist. (I'd create a pull request, but I cannot figure out how to do that for a gist -- in any case, you'll want to edit my work.)
It's now 10 times as fast. Still not quite as fast as JSON::PP, but a lot closer. And it's now an even simpler solution. One big change is to do all parse-time logic in C, creating an AST, and then create the data structure by fixing up the AST.
"number" should be a G0 rule: