Last active
January 3, 2016 11:29
-
-
Save rns/8456567 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/perl | |
| # Copyright 2013 Jeffrey Kegler | |
| # This file is part of Marpa::R2. Marpa::R2 is free software: you can | |
| # redistribute it and/or modify it under the terms of the GNU Lesser | |
| # General Public License as published by the Free Software Foundation, | |
| # either version 3 of the License, or (at your option) any later version. | |
| # | |
| # Marpa::R2 is distributed in the hope that it will be useful, | |
| # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| # Lesser General Public License for more details. | |
| # | |
| # You should have received a copy of the GNU Lesser | |
| # General Public License along with Marpa::R2. If not, see | |
| # http://www.gnu.org/licenses/. | |
| # Test using a JSON parser | |
| # Inspired by a parser written by Peter Stuifzand | |
| use 5.010; | |
| use strict; | |
| use warnings; | |
| use Test::More tests => 13; | |
| use English qw( -no_match_vars ); | |
| use Scalar::Util qw(blessed); | |
| use lib 'inc'; | |
| use Marpa::R2::Test; | |
| ## no critic (ErrorHandling::RequireCarping); | |
| use Marpa::R2; | |
| use Marpa::R2::ASF; | |
| my $data = MarpaX::JSON::parse_json(q${"test":"1"}$); | |
| is($data->{test}, 1); | |
| { | |
| my $test = q${"test":[1,2,3]}$; | |
| $data = MarpaX::JSON::parse_json(q${"test":[1,2,3]}$); | |
| is_deeply( $data->{test}, [ 1, 2, 3 ], $test ); | |
| } | |
| $data = MarpaX::JSON::parse_json(q${"test":true}$); | |
| is($data->{test}, 1); | |
| $data = MarpaX::JSON::parse_json(q${"test":false}$); | |
| is($data->{test}, ''); | |
| $data = MarpaX::JSON::parse_json(q${"test":null}$); | |
| is($data->{test}, undef); | |
| $data = MarpaX::JSON::parse_json(q${"test":null, "test2":"hello world"}$); | |
| is($data->{test}, undef); | |
| is($data->{test2}, "hello world"); | |
| $data = MarpaX::JSON::parse_json(q${"test":"1.25"}$); | |
| is($data->{test}, '1.25', '1.25'); | |
| $data = MarpaX::JSON::parse_json(q${"test":"1.25e4"}$); | |
| is($data->{test}, '1.25e4', '1.25e4'); | |
| $data = MarpaX::JSON::parse_json(q$[]$); | |
| is_deeply($data, [], '[]'); | |
| $data = MarpaX::JSON::parse_json(<<'JSON'); | |
| [ | |
| { | |
| "precision": "zip", | |
| "Latitude": 37.7668, | |
| "Longitude": -122.3959, | |
| "Address": "", | |
| "City": "SAN FRANCISCO", | |
| "State": "CA", | |
| "Zip": "94107", | |
| "Country": "US" | |
| }, | |
| { | |
| "precision": "zip", | |
| "Latitude": 37.371991, | |
| "Longitude": -122.026020, | |
| "Address": "", | |
| "City": "SUNNYVALE", | |
| "State": "CA", | |
| "Zip": "94085", | |
| "Country": "US" | |
| } | |
| ] | |
| JSON | |
| is_deeply($data, [ | |
| { "precision"=>"zip", Latitude => "37.7668", Longitude=>"-122.3959", | |
| "Country" => "US", Zip => 94107, Address => '', | |
| City => "SAN FRANCISCO", State => 'CA' }, | |
| { "precision" => "zip", Longitude => "-122.026020", Address => "", | |
| City => "SUNNYVALE", Country => "US", Latitude => "37.371991", | |
| Zip => 94085, State => "CA" } | |
| ], 'Geo data'); | |
| $data = MarpaX::JSON::parse_json(<<'JSON'); | |
| { | |
| "Image": { | |
| "Width": 800, | |
| "Height": 600, | |
| "Title": "View from 15th Floor", | |
| "Thumbnail": { | |
| "Url": "http://www.example.com/image/481989943", | |
| "Height": 125, | |
| "Width": "100" | |
| }, | |
| "IDs": [116, 943, 234, 38793] | |
| } | |
| } | |
| JSON | |
| is_deeply($data, { | |
| "Image" => { | |
| "Width" => 800, "Height" => 600, | |
| "Title" => "View from 15th Floor", | |
| "Thumbnail" => { | |
| "Url" => "http://www.example.com/image/481989943", | |
| "Height" => 125, | |
| "Width" => 100, | |
| }, | |
| "IDs" => [ 116, 943, 234, 38793 ], | |
| } | |
| }, 'is_deeply test'); | |
| my $big_test = <<'JSON'; | |
| { | |
| "source" : "<a href=\"http://janetter.net/\" rel=\"nofollow\">Janetter</a>", | |
| "entities" : { | |
| "user_mentions" : [ { | |
| "name" : "James Governor", | |
| "screen_name" : "moankchips", | |
| "indices" : [ 0, 10 ], | |
| "id_str" : "61233", | |
| "id" : 61233 | |
| } ], | |
| "media" : [ ], | |
| "hashtags" : [ ], | |
| "urls" : [ ] | |
| }, | |
| "in_reply_to_status_id_str" : "281400879465238529", | |
| "geo" : { | |
| }, | |
| "id_str" : "281405942321532929", | |
| "in_reply_to_user_id" : 61233, | |
| "text" : "@monkchips Ouch. Some regrets are harsher than others.", | |
| "id" : 281405942321532929, | |
| "in_reply_to_status_id" : 281400879465238529, | |
| "created_at" : "Wed Dec 19 14:29:39 +0000 2012", | |
| "in_reply_to_screen_name" : "monkchips", | |
| "in_reply_to_user_id_str" : "61233", | |
| "user" : { | |
| "name" : "Sarah Bourne", | |
| "screen_name" : "sarahebourne", | |
| "protected" : false, | |
| "id_str" : "16010789", | |
| "profile_image_url_https" : "https://si0.twimg.com/profile_images/638441870/Snapshot-of-sb_normal.jpg", | |
| "id" : 16010789, | |
| "verified" : false | |
| } | |
| } | |
| JSON | |
| $data = MarpaX::JSON::parse_json($big_test); | |
| $data = MarpaX::JSON::parse_json(<<'JSON'); | |
| { "test": "\u2603" } | |
| JSON | |
| is($data->{test}, "\x{2603}"); | |
| package MarpaX::JSON; | |
| sub new { | |
| my ($class) = @_; | |
| my $self = bless {}, $class; | |
| $self->{grammar} = Marpa::R2::Scanless::G->new( | |
| { | |
| source => \(<<'END_OF_SOURCE'), | |
| :start ::= json | |
| json ::= object | |
| | array | |
| object ::= '{' members '}' | |
| # comma is provided as a char class here, to ensure that char classes | |
| # as separators are in the test suite. | |
| members ::= pair* separator => [,] | |
| pair ::= string ':' value | |
| value ::= string | |
| | object | |
| | number | |
| | array | |
| | 'true' | |
| | 'false' | |
| | 'null' | |
| array ::= '[' ']' | |
| | '[' elements ']' | |
| # comma is provided as a char class here, to ensure that char classes | |
| # as separators are in the test suite. | |
| elements ::= value+ separator => [,] | |
| number ~ int | |
| | int frac | |
| | int exp | |
| | int frac exp | |
| int ~ digits | |
| | '-' digits | |
| digits ~ [\d]+ | |
| frac ~ '.' digits | |
| exp ~ e digits | |
| e ~ 'e' | |
| | 'e+' | |
| | 'e-' | |
| | 'E' | |
| | 'E+' | |
| | 'E-' | |
| string ::= lstring | |
| :lexeme ~ lstring | |
| lstring ~ quote in_string quote | |
| quote ~ ["] | |
| in_string ~ in_string_char* | |
| in_string_char ~ [^"] | '\"' | |
| :discard ~ whitespace | |
| whitespace ~ [\s]+ | |
| END_OF_SOURCE | |
| } | |
| ); | |
| return $self; | |
| } | |
| my $g; | |
| sub parse { | |
| my ( $self, $string ) = @_; | |
| my $re = Marpa::R2::Scanless::R->new( | |
| { grammar => $self->{grammar}, | |
| } | |
| ); | |
| $re->read( \$string ); | |
| my $asf = Marpa::R2::ASF->new( { slr => $re } ); | |
| # per-parse hashref in the next lline somehow doesn't work | |
| # return $asf->traverse( { 'grammar' => $self->{grammar} }, \&json_traverser ); | |
| # so we set package global as quick hack to get grammar ref | |
| $g = $self->{grammar}; | |
| my $result = $asf->traverse( { }, \&json_traverser ); | |
| # check what we've got | |
| # use YAML; | |
| # say "#" , Dump $result; | |
| # ASF::traverse() can't return undef so convert null's to undef's here | |
| # just a quick hack to make test pass -- full walk will be needed for real use | |
| if (ref $result->[0] eq "HASH"){ | |
| while (my ($k, $v) = each %{ $result->[0] } ){ | |
| if ($v eq 'null'){ | |
| $result->[0]->{$k} = undef; | |
| } | |
| } | |
| } | |
| return $result->[0]; | |
| } ## end sub parse | |
| sub json_traverser{ | |
| # This routine converts the json glade into Perl data structure | |
| my ($glade, $scratch) = @_; | |
| my $rule_id = $glade->rule_id(); | |
| # A token is a single choice, and we know enough to return it | |
| if ( not defined $rule_id ) { | |
| my $literal = $glade->literal(); | |
| return $literal; | |
| } | |
| my $symbol_id = $glade->symbol_id(); | |
| my $symbol_name = $g->symbol_name($symbol_id); | |
| # this must be bottom-to-top? | |
| if ( $symbol_name eq 'json' ){ | |
| return $glade->rh_value(0) | |
| } | |
| elsif ( $symbol_name eq 'array' ){ | |
| # check for empty array | |
| my $a = $glade->rh_value(1); | |
| return ref $a ? $a : [] | |
| } | |
| elsif ( $symbol_name eq 'elements' ){ | |
| return [ | |
| grep { $_ ne ',' } | |
| map { $glade->rh_value($_) } 0 .. $glade->rh_length() - 1 | |
| ]; | |
| } | |
| elsif ( $symbol_name eq 'object' ){ | |
| return $glade->rh_value(1) | |
| } | |
| elsif ( $symbol_name eq 'members' ){ | |
| return { | |
| map { @$_ } | |
| grep { $_ ne ',' } | |
| map { $glade->rh_value($_) } 0 .. $glade->rh_length() - 1 | |
| }; | |
| } | |
| elsif ( $symbol_name eq 'pair' ){ | |
| return [ $glade->rh_value(0), $glade->rh_value(2) ] | |
| } | |
| elsif ( $symbol_name eq 'value' ){ | |
| my $value = $glade->rh_value(0); | |
| if ( $value eq 'true' or | |
| $value eq 'false' ){ | |
| return $value eq 'true'; | |
| } | |
| # ASF::traverse() cannot return undef so it needs to be done in parse() above | |
| else{ | |
| return $value; | |
| } | |
| } | |
| elsif ( $symbol_name eq 'string' ){ | |
| my $s = substr $glade->rh_value(0), 1, -1; | |
| $s = decode_string($s) if -1 != index $s, '\\'; | |
| return $s; | |
| } | |
| else{ # return | |
| my @return_value = map { $glade->rh_value($_) } 0 .. $glade->rh_length() - 1; | |
| # Special case for the start rule | |
| return \@return_value if $symbol_name eq '[:start]' ; | |
| warn "uncaught $symbol_name!"; | |
| return { $symbol_name => \@return_value }; | |
| } | |
| } | |
| sub parse_json { | |
| my ($string) = @_; | |
| my $parser = MarpaX::JSON->new(); | |
| return $parser->parse($string); | |
| } | |
| sub decode_string { | |
| my ($s) = @_; | |
| $s =~ s/\\u([0-9A-Fa-f]{4})/chr(hex($1))/egxms; | |
| $s =~ s/\\n/\n/gxms; | |
| $s =~ s/\\r/\r/gxms; | |
| $s =~ s/\\b/\b/gxms; | |
| $s =~ s/\\f/\f/gxms; | |
| $s =~ s/\\t/\t/gxms; | |
| $s =~ s/\\\\/\\/gxms; | |
| $s =~ s{\\/}{/}gxms; | |
| $s =~ s{\\"}{"}gxms; | |
| return $s; | |
| } ## end sub decode_string | |
| 1; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment