Skip to content

Instantly share code, notes, and snippets.

@rns
Last active January 3, 2016 11:29
Show Gist options
  • Select an option

  • Save rns/8456567 to your computer and use it in GitHub Desktop.

Select an option

Save rns/8456567 to your computer and use it in GitHub Desktop.
#!/usr/bin/perl
# Copyright 2013 Jeffrey Kegler
# This file is part of Marpa::R2. Marpa::R2 is free software: you can
# redistribute it and/or modify it under the terms of the GNU Lesser
# General Public License as published by the Free Software Foundation,
# either version 3 of the License, or (at your option) any later version.
#
# Marpa::R2 is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser
# General Public License along with Marpa::R2. If not, see
# http://www.gnu.org/licenses/.
# Test using a JSON parser
# Inspired by a parser written by Peter Stuifzand
use 5.010;
use strict;
use warnings;
use Test::More tests => 13;
use English qw( -no_match_vars );
use Scalar::Util qw(blessed);
use lib 'inc';
use Marpa::R2::Test;
## no critic (ErrorHandling::RequireCarping);
use Marpa::R2;
use Marpa::R2::ASF;
my $data = MarpaX::JSON::parse_json(q${"test":"1"}$);
is($data->{test}, 1);
{
my $test = q${"test":[1,2,3]}$;
$data = MarpaX::JSON::parse_json(q${"test":[1,2,3]}$);
is_deeply( $data->{test}, [ 1, 2, 3 ], $test );
}
$data = MarpaX::JSON::parse_json(q${"test":true}$);
is($data->{test}, 1);
$data = MarpaX::JSON::parse_json(q${"test":false}$);
is($data->{test}, '');
$data = MarpaX::JSON::parse_json(q${"test":null}$);
is($data->{test}, undef);
$data = MarpaX::JSON::parse_json(q${"test":null, "test2":"hello world"}$);
is($data->{test}, undef);
is($data->{test2}, "hello world");
$data = MarpaX::JSON::parse_json(q${"test":"1.25"}$);
is($data->{test}, '1.25', '1.25');
$data = MarpaX::JSON::parse_json(q${"test":"1.25e4"}$);
is($data->{test}, '1.25e4', '1.25e4');
$data = MarpaX::JSON::parse_json(q$[]$);
is_deeply($data, [], '[]');
$data = MarpaX::JSON::parse_json(<<'JSON');
[
{
"precision": "zip",
"Latitude": 37.7668,
"Longitude": -122.3959,
"Address": "",
"City": "SAN FRANCISCO",
"State": "CA",
"Zip": "94107",
"Country": "US"
},
{
"precision": "zip",
"Latitude": 37.371991,
"Longitude": -122.026020,
"Address": "",
"City": "SUNNYVALE",
"State": "CA",
"Zip": "94085",
"Country": "US"
}
]
JSON
is_deeply($data, [
{ "precision"=>"zip", Latitude => "37.7668", Longitude=>"-122.3959",
"Country" => "US", Zip => 94107, Address => '',
City => "SAN FRANCISCO", State => 'CA' },
{ "precision" => "zip", Longitude => "-122.026020", Address => "",
City => "SUNNYVALE", Country => "US", Latitude => "37.371991",
Zip => 94085, State => "CA" }
], 'Geo data');
$data = MarpaX::JSON::parse_json(<<'JSON');
{
"Image": {
"Width": 800,
"Height": 600,
"Title": "View from 15th Floor",
"Thumbnail": {
"Url": "http://www.example.com/image/481989943",
"Height": 125,
"Width": "100"
},
"IDs": [116, 943, 234, 38793]
}
}
JSON
is_deeply($data, {
"Image" => {
"Width" => 800, "Height" => 600,
"Title" => "View from 15th Floor",
"Thumbnail" => {
"Url" => "http://www.example.com/image/481989943",
"Height" => 125,
"Width" => 100,
},
"IDs" => [ 116, 943, 234, 38793 ],
}
}, 'is_deeply test');
my $big_test = <<'JSON';
{
"source" : "<a href=\"http://janetter.net/\" rel=\"nofollow\">Janetter</a>",
"entities" : {
"user_mentions" : [ {
"name" : "James Governor",
"screen_name" : "moankchips",
"indices" : [ 0, 10 ],
"id_str" : "61233",
"id" : 61233
} ],
"media" : [ ],
"hashtags" : [ ],
"urls" : [ ]
},
"in_reply_to_status_id_str" : "281400879465238529",
"geo" : {
},
"id_str" : "281405942321532929",
"in_reply_to_user_id" : 61233,
"text" : "@monkchips Ouch. Some regrets are harsher than others.",
"id" : 281405942321532929,
"in_reply_to_status_id" : 281400879465238529,
"created_at" : "Wed Dec 19 14:29:39 +0000 2012",
"in_reply_to_screen_name" : "monkchips",
"in_reply_to_user_id_str" : "61233",
"user" : {
"name" : "Sarah Bourne",
"screen_name" : "sarahebourne",
"protected" : false,
"id_str" : "16010789",
"profile_image_url_https" : "https://si0.twimg.com/profile_images/638441870/Snapshot-of-sb_normal.jpg",
"id" : 16010789,
"verified" : false
}
}
JSON
$data = MarpaX::JSON::parse_json($big_test);
$data = MarpaX::JSON::parse_json(<<'JSON');
{ "test": "\u2603" }
JSON
is($data->{test}, "\x{2603}");
package MarpaX::JSON;
sub new {
my ($class) = @_;
my $self = bless {}, $class;
$self->{grammar} = Marpa::R2::Scanless::G->new(
{
source => \(<<'END_OF_SOURCE'),
:start ::= json
json ::= object
| array
object ::= '{' members '}'
# comma is provided as a char class here, to ensure that char classes
# as separators are in the test suite.
members ::= pair* separator => [,]
pair ::= string ':' value
value ::= string
| object
| number
| array
| 'true'
| 'false'
| 'null'
array ::= '[' ']'
| '[' elements ']'
# comma is provided as a char class here, to ensure that char classes
# as separators are in the test suite.
elements ::= value+ separator => [,]
number ~ int
| int frac
| int exp
| int frac exp
int ~ digits
| '-' digits
digits ~ [\d]+
frac ~ '.' digits
exp ~ e digits
e ~ 'e'
| 'e+'
| 'e-'
| 'E'
| 'E+'
| 'E-'
string ::= lstring
:lexeme ~ lstring
lstring ~ quote in_string quote
quote ~ ["]
in_string ~ in_string_char*
in_string_char ~ [^"] | '\"'
:discard ~ whitespace
whitespace ~ [\s]+
END_OF_SOURCE
}
);
return $self;
}
my $g;
sub parse {
my ( $self, $string ) = @_;
my $re = Marpa::R2::Scanless::R->new(
{ grammar => $self->{grammar},
}
);
$re->read( \$string );
my $asf = Marpa::R2::ASF->new( { slr => $re } );
# per-parse hashref in the next lline somehow doesn't work
# return $asf->traverse( { 'grammar' => $self->{grammar} }, \&json_traverser );
# so we set package global as quick hack to get grammar ref
$g = $self->{grammar};
my $result = $asf->traverse( { }, \&json_traverser );
# check what we've got
# use YAML;
# say "#" , Dump $result;
# ASF::traverse() can't return undef so convert null's to undef's here
# just a quick hack to make test pass -- full walk will be needed for real use
if (ref $result->[0] eq "HASH"){
while (my ($k, $v) = each %{ $result->[0] } ){
if ($v eq 'null'){
$result->[0]->{$k} = undef;
}
}
}
return $result->[0];
} ## end sub parse
sub json_traverser{
# This routine converts the json glade into Perl data structure
my ($glade, $scratch) = @_;
my $rule_id = $glade->rule_id();
# A token is a single choice, and we know enough to return it
if ( not defined $rule_id ) {
my $literal = $glade->literal();
return $literal;
}
my $symbol_id = $glade->symbol_id();
my $symbol_name = $g->symbol_name($symbol_id);
# this must be bottom-to-top?
if ( $symbol_name eq 'json' ){
return $glade->rh_value(0)
}
elsif ( $symbol_name eq 'array' ){
# check for empty array
my $a = $glade->rh_value(1);
return ref $a ? $a : []
}
elsif ( $symbol_name eq 'elements' ){
return [
grep { $_ ne ',' }
map { $glade->rh_value($_) } 0 .. $glade->rh_length() - 1
];
}
elsif ( $symbol_name eq 'object' ){
return $glade->rh_value(1)
}
elsif ( $symbol_name eq 'members' ){
return {
map { @$_ }
grep { $_ ne ',' }
map { $glade->rh_value($_) } 0 .. $glade->rh_length() - 1
};
}
elsif ( $symbol_name eq 'pair' ){
return [ $glade->rh_value(0), $glade->rh_value(2) ]
}
elsif ( $symbol_name eq 'value' ){
my $value = $glade->rh_value(0);
if ( $value eq 'true' or
$value eq 'false' ){
return $value eq 'true';
}
# ASF::traverse() cannot return undef so it needs to be done in parse() above
else{
return $value;
}
}
elsif ( $symbol_name eq 'string' ){
my $s = substr $glade->rh_value(0), 1, -1;
$s = decode_string($s) if -1 != index $s, '\\';
return $s;
}
else{ # return
my @return_value = map { $glade->rh_value($_) } 0 .. $glade->rh_length() - 1;
# Special case for the start rule
return \@return_value if $symbol_name eq '[:start]' ;
warn "uncaught $symbol_name!";
return { $symbol_name => \@return_value };
}
}
sub parse_json {
my ($string) = @_;
my $parser = MarpaX::JSON->new();
return $parser->parse($string);
}
sub decode_string {
my ($s) = @_;
$s =~ s/\\u([0-9A-Fa-f]{4})/chr(hex($1))/egxms;
$s =~ s/\\n/\n/gxms;
$s =~ s/\\r/\r/gxms;
$s =~ s/\\b/\b/gxms;
$s =~ s/\\f/\f/gxms;
$s =~ s/\\t/\t/gxms;
$s =~ s/\\\\/\\/gxms;
$s =~ s{\\/}{/}gxms;
$s =~ s{\\"}{"}gxms;
return $s;
} ## end sub decode_string
1;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment