Created
September 10, 2012 20:43
-
-
Save tobyink/3693708 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| --- orig.pm 2012-09-10 20:46:41.516229122 +0100 | |
| +++ mine.pm 2012-09-10 21:38:36.168228288 +0100 | |
| @@ -1,3 +1,47 @@ | |
| +package RDF::Trine::Parser::TurtleConstants; | |
| + | |
| +no thanks; | |
| +our @EXPORT; | |
| +BEGIN { | |
| + @EXPORT = qw( | |
| + LBRACKET | |
| + RBRACKET | |
| + LPAREN | |
| + RPAREN | |
| + DOT | |
| + SEMICOLON | |
| + COMMA | |
| + HATHAT | |
| + A | |
| + BOOLEAN | |
| + PREFIXNAME | |
| + IRI | |
| + BNODE | |
| + DOUBLE | |
| + DECIMAL | |
| + INTEGER | |
| + WS | |
| + COMMENT | |
| + STRING3D | |
| + STRING1D | |
| + BASE | |
| + PREFIX | |
| + LANG | |
| + decrypt_constant | |
| + ) | |
| +}; | |
| +use base 'Exporter'; | |
| +{ | |
| + my ($cx, %reverse) = 0; | |
| + use constant +{ | |
| + map { my $value = ++$cx; $reverse{$value} = $_; $_ => $value } | |
| + grep { $_ ne 'decrypt_constant' } | |
| + @EXPORT | |
| + }; | |
| + sub decrypt_constant { $reverse{+shift} } | |
| +}; | |
| + | |
| + | |
| package RDF::Trine::Parser::StreamTurtle; | |
| use utf8; | |
| @@ -6,6 +50,7 @@ | |
| use base qw(RDF::Trine::Parser); | |
| use RDF::Trine::Error qw(:try); | |
| use Data::Dumper; | |
| +use RDF::Trine::Parser::TurtleConstants; | |
| my $rdf = RDF::Trine::Namespace->new('http://www.w3.org/1999/02/22-rdf-syntax-ns#'); | |
| my $xsd = RDF::Trine::Namespace->new('http://www.w3.org/2001/XMLSchema#'); | |
| @@ -89,7 +134,8 @@ | |
| while (1) { | |
| my $t = $l->get_token; | |
| return unless ($t); | |
| - next if ($t->type eq 'WS' or $t->type eq 'COMMENT'); | |
| + my $type = $t->type; | |
| + next if ($type == WS or $type == COMMENT); | |
| return $t; | |
| } | |
| } | |
| @@ -101,7 +147,7 @@ | |
| my $t = $self->_next_nonws($l); | |
| return unless ($t); | |
| unless ($t->type eq $type) { | |
| - $self->throw_error("Expecting $type but got " . $t->type, $t, $l); | |
| + $self->throw_error(sprintf("Expecting %s but got %s", decrypt_constant($type), decrypt_constant($t->type)), $t, $l); | |
| } | |
| return $t; | |
| } | |
| @@ -112,35 +158,35 @@ | |
| my $t = shift; | |
| my $type = $t->type; | |
| given ($type) { | |
| - when ('WS') {} | |
| - when ('PREFIX') { | |
| - $t = $self->_get_token_type($l, 'PREFIXNAME'); | |
| + when (WS) {} | |
| + when (PREFIX) { | |
| + $t = $self->_get_token_type($l, PREFIXNAME); | |
| my $name = $t->value; | |
| - $t = $self->_get_token_type($l, 'IRI'); | |
| + $t = $self->_get_token_type($l, IRI); | |
| my $iri = $t->value; | |
| - $t = $self->_get_token_type($l, 'DOT'); | |
| + $t = $self->_get_token_type($l, DOT); | |
| $self->{map}->add_mapping( $name => $iri ); | |
| } | |
| - when ('BASE') { | |
| - $t = $self->_get_token_type($l, 'IRI'); | |
| + when (BASE) { | |
| + $t = $self->_get_token_type($l, IRI); | |
| my $iri = $t->value; | |
| - $t = $self->_get_token_type($l, 'DOT'); | |
| + $t = $self->_get_token_type($l, DOT); | |
| $self->{baseURI} = $iri; | |
| } | |
| default { | |
| # subject | |
| my $subj; | |
| - if ($t->type eq 'LBRACKET') { | |
| + if ($type == LBRACKET) { | |
| $subj = RDF::Trine::Node::Blank->new(); | |
| my $t = $self->_next_nonws($l); | |
| - if ($t->type ne 'RBRACKET') { | |
| + if ($t->type != RBRACKET) { | |
| $self->_unget_token($t); | |
| $self->_predicateObjectList( $l, $subj ); | |
| - $t = $self->_get_token_type($l, 'RBRACKET'); | |
| + $t = $self->_get_token_type($l, RBRACKET); | |
| } | |
| - } elsif ($t->type eq 'LPAREN') { | |
| + } elsif ($type == LPAREN) { | |
| my $t = $self->_next_nonws($l); | |
| - if ($t->type eq 'RPAREN') { | |
| + if ($t->type == RPAREN) { | |
| $subj = RDF::Trine::Node::Resource->new('http://www.w3.org/1999/02/22-rdf-syntax-ns#nil'); | |
| } else { | |
| $subj = RDF::Trine::Node::Blank->new(); | |
| @@ -148,7 +194,7 @@ | |
| while (1) { | |
| my $t = $self->_next_nonws($l); | |
| - if ($t->type eq 'RPAREN') { | |
| + if ($t->type == RPAREN) { | |
| last; | |
| } else { | |
| push(@objects, $self->_object($l, $t)); | |
| @@ -156,8 +202,8 @@ | |
| } | |
| $self->_assert_list($subj, @objects); | |
| } | |
| - } elsif (not($t->type eq 'IRI' or $t->type eq 'PREFIXNAME' or $t->type eq 'BNODE')) { | |
| - $self->throw_error("Expecting resource or bnode but got " . $t->type, $t, $l); | |
| + } elsif (not($type==IRI or $type==PREFIXNAME or $type==BNODE)) { | |
| + $self->throw_error("Expecting resource or bnode but got " . decrypt_constant($type), $t, $l); | |
| } else { | |
| $subj = $self->_token_to_node($t); | |
| } | |
| @@ -166,7 +212,7 @@ | |
| #predicateObjectList | |
| $self->_predicateObjectList($l, $subj); | |
| - $t = $self->_get_token_type($l, 'DOT'); | |
| + $t = $self->_get_token_type($l, DOT); | |
| } | |
| } | |
| } | |
| @@ -191,8 +237,9 @@ | |
| my $subj = shift; | |
| my $t = $self->_next_nonws($l); | |
| while (1) { | |
| - unless ($t->type eq 'IRI' or $t->type eq 'PREFIXNAME' or $t->type eq 'A') { | |
| - $self->throw_error("Expecting verb but got " . $t->type, $t, $l); | |
| + my $type = $t->type; | |
| + unless ($type==IRI or $type==PREFIXNAME or $type==A) { | |
| + $self->throw_error("Expecting verb but got " . decrypt_constant($type), $t, $l); | |
| } | |
| my $pred = $self->_token_to_node($t); | |
| # warn "Predicate: $pred\n"; | |
| @@ -200,9 +247,11 @@ | |
| my $t = $self->_next_nonws($l); | |
| last unless ($t); | |
| - if ($t->type eq 'SEMICOLON') { | |
| - my $t = $self->_next_nonws($l); | |
| - if ($t->type eq 'IRI' or $t->type eq 'PREFIXNAME' or $t->type eq 'A') { | |
| + $type = $t->type; | |
| + if ($type==SEMICOLON) { | |
| + my $t = $self->_next_nonws($l); | |
| + my $type = $t->type; | |
| + if ($type==IRI or $type==PREFIXNAME or $type==A) { | |
| next; | |
| } else { | |
| $self->_unget_token($t); | |
| @@ -226,8 +275,8 @@ | |
| my $obj = $self->_object($l, $t); | |
| $self->_triple($subj, $pred, $obj); | |
| - my $t = $self->_next_nonws($l); | |
| - if ($t->type eq 'COMMA') { | |
| + my $t = $self->_next_nonws($l); | |
| + if ($t->type == COMMA) { | |
| next; | |
| } else { | |
| $self->_unget_token($t); | |
| @@ -259,17 +308,17 @@ | |
| my $t = shift; | |
| my $obj; | |
| my $type = $t->type; | |
| - if ($type eq 'LBRACKET') { | |
| + if ($type==LBRACKET) { | |
| $obj = RDF::Trine::Node::Blank->new(); | |
| my $t = $self->_next_nonws($l); | |
| - if ($t->type ne 'RBRACKET') { | |
| + if ($t->type == RBRACKET) { | |
| $self->_unget_token($t); | |
| $self->_predicateObjectList( $l, $obj ); | |
| - $t = $self->_get_token_type($l, 'RBRACKET'); | |
| + $t = $self->_get_token_type($l, RBRACKET); | |
| } | |
| - } elsif ($type eq 'LPAREN') { | |
| + } elsif ($type == LPAREN) { | |
| my $t = $self->_next_nonws($l); | |
| - if ($t->type eq 'RPAREN') { | |
| + if ($t->type == RPAREN) { | |
| $obj = RDF::Trine::Node::Resource->new('http://www.w3.org/1999/02/22-rdf-syntax-ns#nil'); | |
| } else { | |
| $obj = RDF::Trine::Node::Blank->new(); | |
| @@ -277,7 +326,7 @@ | |
| while (1) { | |
| my $t = $self->_next_nonws($l); | |
| - if ($t->type eq 'RPAREN') { | |
| + if ($t->type == RPAREN) { | |
| last; | |
| } else { | |
| push(@objects, $self->_object($l, $t)); | |
| @@ -285,27 +334,27 @@ | |
| } | |
| $self->_assert_list($obj, @objects); | |
| } | |
| - } elsif (not($type eq 'IRI' or $type eq 'PREFIXNAME' or $type eq 'A' or $type eq '1DSTRING' or $type eq '3DSTRING' or $type eq 'BNODE' or $type eq 'INTEGER' or $type eq 'DECIMAL' or $type eq 'DOUBLE' or $type eq 'BOOLEAN')) { | |
| - $self->throw_error("Expecting object but got " . $type, $t, $l); | |
| + } elsif (not($type==IRI or $type==PREFIXNAME or $type==A or $type==STRING1D or $type==STRING3D or $type==BNODE or $type==INTEGER or $type==DECIMAL or $type==DOUBLE or $type==BOOLEAN)) { | |
| + $self->throw_error("Expecting object but got " . decrypt_constant($type), $t, $l); | |
| } else { | |
| - if ($type eq '1DSTRING' or $type eq '3DSTRING') { | |
| + if ($type==STRING1D or $type==STRING3D) { | |
| my $value = $t->value; | |
| my $t = $self->_next_nonws($l); | |
| my $dt; | |
| my $lang; | |
| - if ($t->type eq 'HATHAT') { | |
| + if ($t->type == HATHAT) { | |
| my $t = $self->_next_nonws($l); | |
| - if ($t->type eq 'IRI' or $t->type eq 'PREFIXNAME') { | |
| + if ($t->type == IRI or $t->type == PREFIXNAME) { | |
| $dt = $self->_token_to_node($t); | |
| } | |
| - } elsif ($t->type eq 'LANG') { | |
| + } elsif ($t->type == LANG) { | |
| $lang = $t->value; | |
| } else { | |
| $self->_unget_token($t); | |
| } | |
| $obj = RDF::Trine::Node::Literal->new($value, $lang, $dt); | |
| } else { | |
| - $obj = $self->_token_to_node($t); | |
| + $obj = $self->_token_to_node($t, $type); | |
| } | |
| } | |
| return $obj; | |
| @@ -314,37 +363,37 @@ | |
| sub _token_to_node { | |
| my $self = shift; | |
| my $t = shift; | |
| - my $type = $t->type; | |
| + my $type = shift || $t->type; | |
| given ($type) { | |
| - when ('A') { | |
| + when (A) { | |
| return $rdf->type; | |
| } | |
| - when ('IRI') { | |
| + when (IRI) { | |
| return RDF::Trine::Node::Resource->new($t->value); | |
| } | |
| - when ('INTEGER') { | |
| + when (INTEGER) { | |
| return RDF::Trine::Node::Literal->new($t->value, undef, $xsd->integer); | |
| } | |
| - when ('DECIMAL') { | |
| + when (DECIMAL) { | |
| return RDF::Trine::Node::Literal->new($t->value, undef, $xsd->decimal); | |
| } | |
| - when ('DOUBLE') { | |
| + when (DOUBLE) { | |
| return RDF::Trine::Node::Literal->new($t->value, undef, $xsd->double); | |
| } | |
| - when ('BOOLEAN') { | |
| + when (BOOLEAN) { | |
| return RDF::Trine::Node::Literal->new($t->value, undef, $xsd->boolean); | |
| } | |
| - when ('PREFIXNAME') { | |
| + when (PREFIXNAME) { | |
| my ($ns, $local) = @{ $t->args }; | |
| my $prefix = $self->{map}->namespace_uri($ns); | |
| my $iri = $prefix->uri($local); | |
| return $iri; | |
| } | |
| - when ('BNODE') { | |
| + when (BNODE) { | |
| return RDF::Trine::Node::Blank->new($t->value); | |
| } | |
| default { | |
| - $self->throw_error("Converting $type to node not implemented", $t); | |
| + $self->throw_error("Converting ".decrypt_constant($type)." to node not implemented", $t); | |
| } | |
| } | |
| } | |
| @@ -362,6 +411,7 @@ | |
| package RDF::Trine::Parser::TurtleLexer; | |
| +use RDF::Trine::Parser::TurtleConstants; | |
| use 5.014; | |
| use Moose; | |
| use Data::Dumper; | |
| @@ -420,8 +470,8 @@ | |
| my $self = shift; | |
| my $type = shift; | |
| my $line = $self->line; | |
| - my $col = $self->column; | |
| - return RDF::Trine::Parser::TurtleToken->new( type => $type, line => $line, column => $col, args => \@_ ); | |
| + my $col = $self->column; | |
| + return RDF::Trine::Parser::TurtleToken->fast_constructor($type, $line, $col, \@_); | |
| } | |
| sub lex_file { | |
| @@ -457,27 +507,27 @@ | |
| when('#') { return $self->get_comment } | |
| when('@') { return $self->get_keyword } | |
| when(/[ \r\n]/) { return $self->get_whitespace } | |
| - when('[') { $self->_get_char; return $self->new_token('LBRACKET'); } | |
| - when(']') { $self->_get_char; return $self->new_token('RBRACKET'); } | |
| - when('(') { $self->_get_char; return $self->new_token('LPAREN'); } | |
| - when(')') { $self->_get_char; return $self->new_token('RPAREN'); } | |
| + when('[') { $self->_get_char; return $self->new_token(LBRACKET); } | |
| + when(']') { $self->_get_char; return $self->new_token(RBRACKET); } | |
| + when('(') { $self->_get_char; return $self->new_token(LPAREN); } | |
| + when(')') { $self->_get_char; return $self->new_token(RPAREN); } | |
| when('<') { return $self->get_iriref } | |
| when('_') { return $self->get_bnode } | |
| when(/[-+0-9]/) { return $self->get_number } | |
| when(q[']) { return $self->get_literal } | |
| when(q["]) { return $self->get_literal } | |
| when(':') { return $self->get_pname } | |
| - when('.') { $self->_get_char; return $self->new_token('DOT'); } | |
| - when(';') { $self->_get_char; return $self->new_token('SEMICOLON'); } | |
| - when(',') { $self->_get_char; return $self->new_token('COMMA'); } | |
| - when('^') { $self->_read_word('^^'); return $self->new_token('HATHAT'); } | |
| + when('.') { $self->_get_char; return $self->new_token(DOT); } | |
| + when(';') { $self->_get_char; return $self->new_token(SEMICOLON); } | |
| + when(',') { $self->_get_char; return $self->new_token(COMMA); } | |
| + when('^') { $self->_read_word('^^'); return $self->new_token(HATHAT); } | |
| when(/[A-Za-z\x{00C0}-\x{00D6}\x{00D8}-\x{00F6}\x{00F8}-\x{02FF}\x{0370}-\x{037D}\x{037F}-\x{1FFF}\x{200C}-\x{200D}\x{2070}-\x{218F}\x{2C00}-\x{2FEF}\x{3001}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFFD}\x{10000}-\x{EFFFF}]/) { | |
| if ($self->{buffer} =~ /^a(?!:)\b/) { | |
| $self->_get_char; | |
| - return $self->new_token('A'); | |
| + return $self->new_token(A); | |
| } elsif ($self->{buffer} =~ /^(?:true|false)\b/) { | |
| my $bool = $self->_read_length($+[0]); | |
| - return $self->new_token('BOOLEAN', $bool); | |
| + return $self->new_token(BOOLEAN, $bool); | |
| } else { | |
| return $self->get_pname; | |
| } | |
| @@ -616,14 +666,14 @@ | |
| } | |
| $self->_get_char_safe(':'); | |
| if ($self->{buffer} =~ /^$r_nameStartChar/) { | |
| - unless ($self->{buffer} =~ /^${r_nameStartChar}(${r_nameStartChar}|${r_nameChar_extra})*/o) { | |
| + unless ($self->{buffer} =~ /^${r_nameStartChar}(?:${r_nameStartChar}|${r_nameChar_extra})*/o) { | |
| $self->_error("Expected: name"); | |
| } | |
| my $name = substr($self->{buffer}, 0, $+[0]); | |
| $self->_read_word($name); | |
| - return $self->new_token('PREFIXNAME', $prefix, $name); | |
| + return $self->new_token(PREFIXNAME, $prefix, $name); | |
| } else { | |
| - return $self->new_token('PREFIXNAME', $prefix); | |
| + return $self->new_token(PREFIXNAME, $prefix); | |
| } | |
| } | |
| @@ -634,28 +684,28 @@ | |
| my $iri = substr($self->{buffer}, 0, $+[0]); | |
| $self->_read_word($iri); | |
| $self->_get_char_safe('>'); | |
| - return $self->new_token('IRI', $iri); | |
| + return $self->new_token(IRI, $iri); | |
| } | |
| sub get_bnode { | |
| my $self = shift; | |
| $self->_read_word('_:'); | |
| - unless ($self->{buffer} =~ /^${r_nameStartChar}(${r_nameStartChar}|${r_nameChar_extra})*/o) { | |
| + unless ($self->{buffer} =~ /^${r_nameStartChar}(?:${r_nameStartChar}|${r_nameChar_extra})*/o) { | |
| $self->_error("Expected: name"); | |
| } | |
| my $name = substr($self->{buffer}, 0, $+[0]); | |
| $self->_read_word($name); | |
| - return $self->new_token('BNODE', $name); | |
| + return $self->new_token(BNODE, $name); | |
| } | |
| sub get_number { | |
| my $self = shift; | |
| if ($self->{buffer} =~ /^${r_double}/) { | |
| - return $self->new_token('DOUBLE', $self->_read_length($+[0])); | |
| + return $self->new_token(DOUBLE, $self->_read_length($+[0])); | |
| } elsif ($self->{buffer} =~ /^${r_decimal}/) { | |
| - return $self->new_token('DECIMAL', $self->_read_length($+[0])); | |
| + return $self->new_token(DECIMAL, $self->_read_length($+[0])); | |
| } elsif ($self->{buffer} =~ /^${r_integer}/) { | |
| - return $self->new_token('INTEGER', $self->_read_length($+[0])); | |
| + return $self->new_token(INTEGER, $self->_read_length($+[0])); | |
| } else { | |
| $self->throw_error("Expected number"); | |
| } | |
| @@ -668,7 +718,7 @@ | |
| $self->_get_char; | |
| $c = $self->_peek_char; | |
| } | |
| - return $self->new_token('WS'); | |
| + return $self->new_token(WS); | |
| } | |
| sub get_comment { | |
| @@ -683,7 +733,7 @@ | |
| if (length($c) and $c =~ /[\r\n]/) { | |
| $self->_get_char; | |
| } | |
| - return $self->new_token('COMMENT', $comment); | |
| + return $self->new_token(COMMENT, $comment); | |
| } | |
| sub get_literal { | |
| @@ -748,7 +798,7 @@ | |
| } | |
| } | |
| } | |
| - return $self->new_token('3DSTRING', $string); | |
| + return $self->new_token(STRING3D, $string); | |
| } else { | |
| ### #x22 scharacter* #x22 | |
| my $string = ''; | |
| @@ -790,7 +840,7 @@ | |
| } | |
| } | |
| $self->_get_char_safe(q["]); | |
| - return $self->new_token('1DSTRING', $string); | |
| + return $self->new_token(STRING1D, $string); | |
| } | |
| } | |
| @@ -799,14 +849,14 @@ | |
| $self->_get_char_safe('@'); | |
| if ($self->{buffer} =~ /^base/) { | |
| $self->_read_word('base'); | |
| - return $self->new_token('BASE'); | |
| + return $self->new_token(BASE); | |
| } elsif ($self->{buffer} =~ /^prefix/) { | |
| $self->_read_word('prefix'); | |
| - return $self->new_token('PREFIX'); | |
| + return $self->new_token(PREFIX); | |
| } else { | |
| if ($self->{buffer} =~ /^[a-z]+(-[a-z0-9]+)*/) { | |
| my $lang = $self->_read_length($+[0]); | |
| - return $self->new_token('LANG', $lang); | |
| + return $self->new_token(LANG, $lang); | |
| } else { | |
| $self->throw_error("Expected keyword or language tag"); | |
| } | |
| @@ -827,12 +877,12 @@ | |
| package RDF::Trine::Parser::TurtleToken; | |
| use 5.014; | |
| -use Moose; | |
| +use MooseX::ArrayRef; | |
| -has type => ( is => 'ro', isa => 'Str', required => 1 ); | |
| -has line => ( is => 'ro', isa => 'Int', required => 1 ); | |
| -has column => ( is => 'ro', isa => 'Int', required => 1 ); | |
| -has args => ( is => 'ro', required => 1 ); | |
| +has type => ( is => 'ro', ); | |
| +has line => ( is => 'ro', ); | |
| +has column => ( is => 'ro', ); | |
| +has args => ( is => 'ro', ); | |
| sub value { | |
| my $self = shift; | |
| @@ -840,6 +890,12 @@ | |
| return $args->[0]; | |
| } | |
| +# This constructor relies on the list of attributes not changing order! | |
| +sub fast_constructor { | |
| + my $class = shift; | |
| + bless \@_, $class; | |
| +} | |
| + | |
| __PACKAGE__->meta->make_immutable; | |
| 1; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment