Skip to content

Instantly share code, notes, and snippets.

@tobyink
Created September 10, 2012 20:43
Show Gist options
  • Select an option

  • Save tobyink/3693708 to your computer and use it in GitHub Desktop.

Select an option

Save tobyink/3693708 to your computer and use it in GitHub Desktop.
--- orig.pm 2012-09-10 20:46:41.516229122 +0100
+++ mine.pm 2012-09-10 21:38:36.168228288 +0100
@@ -1,3 +1,47 @@
+package RDF::Trine::Parser::TurtleConstants;
+
+no thanks;
+our @EXPORT;
+BEGIN {
+ @EXPORT = qw(
+ LBRACKET
+ RBRACKET
+ LPAREN
+ RPAREN
+ DOT
+ SEMICOLON
+ COMMA
+ HATHAT
+ A
+ BOOLEAN
+ PREFIXNAME
+ IRI
+ BNODE
+ DOUBLE
+ DECIMAL
+ INTEGER
+ WS
+ COMMENT
+ STRING3D
+ STRING1D
+ BASE
+ PREFIX
+ LANG
+ decrypt_constant
+ )
+};
+use base 'Exporter';
+{
+ my ($cx, %reverse) = 0;
+ use constant +{
+ map { my $value = ++$cx; $reverse{$value} = $_; $_ => $value }
+ grep { $_ ne 'decrypt_constant' }
+ @EXPORT
+ };
+ sub decrypt_constant { $reverse{+shift} }
+};
+
+
package RDF::Trine::Parser::StreamTurtle;
use utf8;
@@ -6,6 +50,7 @@
use base qw(RDF::Trine::Parser);
use RDF::Trine::Error qw(:try);
use Data::Dumper;
+use RDF::Trine::Parser::TurtleConstants;
my $rdf = RDF::Trine::Namespace->new('http://www.w3.org/1999/02/22-rdf-syntax-ns#');
my $xsd = RDF::Trine::Namespace->new('http://www.w3.org/2001/XMLSchema#');
@@ -89,7 +134,8 @@
while (1) {
my $t = $l->get_token;
return unless ($t);
- next if ($t->type eq 'WS' or $t->type eq 'COMMENT');
+ my $type = $t->type;
+ next if ($type == WS or $type == COMMENT);
return $t;
}
}
@@ -101,7 +147,7 @@
my $t = $self->_next_nonws($l);
return unless ($t);
unless ($t->type eq $type) {
- $self->throw_error("Expecting $type but got " . $t->type, $t, $l);
+ $self->throw_error(sprintf("Expecting %s but got %s", decrypt_constant($type), decrypt_constant($t->type)), $t, $l);
}
return $t;
}
@@ -112,35 +158,35 @@
my $t = shift;
my $type = $t->type;
given ($type) {
- when ('WS') {}
- when ('PREFIX') {
- $t = $self->_get_token_type($l, 'PREFIXNAME');
+ when (WS) {}
+ when (PREFIX) {
+ $t = $self->_get_token_type($l, PREFIXNAME);
my $name = $t->value;
- $t = $self->_get_token_type($l, 'IRI');
+ $t = $self->_get_token_type($l, IRI);
my $iri = $t->value;
- $t = $self->_get_token_type($l, 'DOT');
+ $t = $self->_get_token_type($l, DOT);
$self->{map}->add_mapping( $name => $iri );
}
- when ('BASE') {
- $t = $self->_get_token_type($l, 'IRI');
+ when (BASE) {
+ $t = $self->_get_token_type($l, IRI);
my $iri = $t->value;
- $t = $self->_get_token_type($l, 'DOT');
+ $t = $self->_get_token_type($l, DOT);
$self->{baseURI} = $iri;
}
default {
# subject
my $subj;
- if ($t->type eq 'LBRACKET') {
+ if ($type == LBRACKET) {
$subj = RDF::Trine::Node::Blank->new();
my $t = $self->_next_nonws($l);
- if ($t->type ne 'RBRACKET') {
+ if ($t->type != RBRACKET) {
$self->_unget_token($t);
$self->_predicateObjectList( $l, $subj );
- $t = $self->_get_token_type($l, 'RBRACKET');
+ $t = $self->_get_token_type($l, RBRACKET);
}
- } elsif ($t->type eq 'LPAREN') {
+ } elsif ($type == LPAREN) {
my $t = $self->_next_nonws($l);
- if ($t->type eq 'RPAREN') {
+ if ($t->type == RPAREN) {
$subj = RDF::Trine::Node::Resource->new('http://www.w3.org/1999/02/22-rdf-syntax-ns#nil');
} else {
$subj = RDF::Trine::Node::Blank->new();
@@ -148,7 +194,7 @@
while (1) {
my $t = $self->_next_nonws($l);
- if ($t->type eq 'RPAREN') {
+ if ($t->type == RPAREN) {
last;
} else {
push(@objects, $self->_object($l, $t));
@@ -156,8 +202,8 @@
}
$self->_assert_list($subj, @objects);
}
- } elsif (not($t->type eq 'IRI' or $t->type eq 'PREFIXNAME' or $t->type eq 'BNODE')) {
- $self->throw_error("Expecting resource or bnode but got " . $t->type, $t, $l);
+ } elsif (not($type==IRI or $type==PREFIXNAME or $type==BNODE)) {
+ $self->throw_error("Expecting resource or bnode but got " . decrypt_constant($type), $t, $l);
} else {
$subj = $self->_token_to_node($t);
}
@@ -166,7 +212,7 @@
#predicateObjectList
$self->_predicateObjectList($l, $subj);
- $t = $self->_get_token_type($l, 'DOT');
+ $t = $self->_get_token_type($l, DOT);
}
}
}
@@ -191,8 +237,9 @@
my $subj = shift;
my $t = $self->_next_nonws($l);
while (1) {
- unless ($t->type eq 'IRI' or $t->type eq 'PREFIXNAME' or $t->type eq 'A') {
- $self->throw_error("Expecting verb but got " . $t->type, $t, $l);
+ my $type = $t->type;
+ unless ($type==IRI or $type==PREFIXNAME or $type==A) {
+ $self->throw_error("Expecting verb but got " . decrypt_constant($type), $t, $l);
}
my $pred = $self->_token_to_node($t);
# warn "Predicate: $pred\n";
@@ -200,9 +247,11 @@
my $t = $self->_next_nonws($l);
last unless ($t);
- if ($t->type eq 'SEMICOLON') {
- my $t = $self->_next_nonws($l);
- if ($t->type eq 'IRI' or $t->type eq 'PREFIXNAME' or $t->type eq 'A') {
+ $type = $t->type;
+ if ($type==SEMICOLON) {
+ my $t = $self->_next_nonws($l);
+ my $type = $t->type;
+ if ($type==IRI or $type==PREFIXNAME or $type==A) {
next;
} else {
$self->_unget_token($t);
@@ -226,8 +275,8 @@
my $obj = $self->_object($l, $t);
$self->_triple($subj, $pred, $obj);
- my $t = $self->_next_nonws($l);
- if ($t->type eq 'COMMA') {
+ my $t = $self->_next_nonws($l);
+ if ($t->type == COMMA) {
next;
} else {
$self->_unget_token($t);
@@ -259,17 +308,17 @@
my $t = shift;
my $obj;
my $type = $t->type;
- if ($type eq 'LBRACKET') {
+ if ($type==LBRACKET) {
$obj = RDF::Trine::Node::Blank->new();
my $t = $self->_next_nonws($l);
- if ($t->type ne 'RBRACKET') {
+ if ($t->type == RBRACKET) {
$self->_unget_token($t);
$self->_predicateObjectList( $l, $obj );
- $t = $self->_get_token_type($l, 'RBRACKET');
+ $t = $self->_get_token_type($l, RBRACKET);
}
- } elsif ($type eq 'LPAREN') {
+ } elsif ($type == LPAREN) {
my $t = $self->_next_nonws($l);
- if ($t->type eq 'RPAREN') {
+ if ($t->type == RPAREN) {
$obj = RDF::Trine::Node::Resource->new('http://www.w3.org/1999/02/22-rdf-syntax-ns#nil');
} else {
$obj = RDF::Trine::Node::Blank->new();
@@ -277,7 +326,7 @@
while (1) {
my $t = $self->_next_nonws($l);
- if ($t->type eq 'RPAREN') {
+ if ($t->type == RPAREN) {
last;
} else {
push(@objects, $self->_object($l, $t));
@@ -285,27 +334,27 @@
}
$self->_assert_list($obj, @objects);
}
- } elsif (not($type eq 'IRI' or $type eq 'PREFIXNAME' or $type eq 'A' or $type eq '1DSTRING' or $type eq '3DSTRING' or $type eq 'BNODE' or $type eq 'INTEGER' or $type eq 'DECIMAL' or $type eq 'DOUBLE' or $type eq 'BOOLEAN')) {
- $self->throw_error("Expecting object but got " . $type, $t, $l);
+ } elsif (not($type==IRI or $type==PREFIXNAME or $type==A or $type==STRING1D or $type==STRING3D or $type==BNODE or $type==INTEGER or $type==DECIMAL or $type==DOUBLE or $type==BOOLEAN)) {
+ $self->throw_error("Expecting object but got " . decrypt_constant($type), $t, $l);
} else {
- if ($type eq '1DSTRING' or $type eq '3DSTRING') {
+ if ($type==STRING1D or $type==STRING3D) {
my $value = $t->value;
my $t = $self->_next_nonws($l);
my $dt;
my $lang;
- if ($t->type eq 'HATHAT') {
+ if ($t->type == HATHAT) {
my $t = $self->_next_nonws($l);
- if ($t->type eq 'IRI' or $t->type eq 'PREFIXNAME') {
+ if ($t->type == IRI or $t->type == PREFIXNAME) {
$dt = $self->_token_to_node($t);
}
- } elsif ($t->type eq 'LANG') {
+ } elsif ($t->type == LANG) {
$lang = $t->value;
} else {
$self->_unget_token($t);
}
$obj = RDF::Trine::Node::Literal->new($value, $lang, $dt);
} else {
- $obj = $self->_token_to_node($t);
+ $obj = $self->_token_to_node($t, $type);
}
}
return $obj;
@@ -314,37 +363,37 @@
sub _token_to_node {
my $self = shift;
my $t = shift;
- my $type = $t->type;
+ my $type = shift || $t->type;
given ($type) {
- when ('A') {
+ when (A) {
return $rdf->type;
}
- when ('IRI') {
+ when (IRI) {
return RDF::Trine::Node::Resource->new($t->value);
}
- when ('INTEGER') {
+ when (INTEGER) {
return RDF::Trine::Node::Literal->new($t->value, undef, $xsd->integer);
}
- when ('DECIMAL') {
+ when (DECIMAL) {
return RDF::Trine::Node::Literal->new($t->value, undef, $xsd->decimal);
}
- when ('DOUBLE') {
+ when (DOUBLE) {
return RDF::Trine::Node::Literal->new($t->value, undef, $xsd->double);
}
- when ('BOOLEAN') {
+ when (BOOLEAN) {
return RDF::Trine::Node::Literal->new($t->value, undef, $xsd->boolean);
}
- when ('PREFIXNAME') {
+ when (PREFIXNAME) {
my ($ns, $local) = @{ $t->args };
my $prefix = $self->{map}->namespace_uri($ns);
my $iri = $prefix->uri($local);
return $iri;
}
- when ('BNODE') {
+ when (BNODE) {
return RDF::Trine::Node::Blank->new($t->value);
}
default {
- $self->throw_error("Converting $type to node not implemented", $t);
+ $self->throw_error("Converting ".decrypt_constant($type)." to node not implemented", $t);
}
}
}
@@ -362,6 +411,7 @@
package RDF::Trine::Parser::TurtleLexer;
+use RDF::Trine::Parser::TurtleConstants;
use 5.014;
use Moose;
use Data::Dumper;
@@ -420,8 +470,8 @@
my $self = shift;
my $type = shift;
my $line = $self->line;
- my $col = $self->column;
- return RDF::Trine::Parser::TurtleToken->new( type => $type, line => $line, column => $col, args => \@_ );
+ my $col = $self->column;
+ return RDF::Trine::Parser::TurtleToken->fast_constructor($type, $line, $col, \@_);
}
sub lex_file {
@@ -457,27 +507,27 @@
when('#') { return $self->get_comment }
when('@') { return $self->get_keyword }
when(/[ \r\n]/) { return $self->get_whitespace }
- when('[') { $self->_get_char; return $self->new_token('LBRACKET'); }
- when(']') { $self->_get_char; return $self->new_token('RBRACKET'); }
- when('(') { $self->_get_char; return $self->new_token('LPAREN'); }
- when(')') { $self->_get_char; return $self->new_token('RPAREN'); }
+ when('[') { $self->_get_char; return $self->new_token(LBRACKET); }
+ when(']') { $self->_get_char; return $self->new_token(RBRACKET); }
+ when('(') { $self->_get_char; return $self->new_token(LPAREN); }
+ when(')') { $self->_get_char; return $self->new_token(RPAREN); }
when('<') { return $self->get_iriref }
when('_') { return $self->get_bnode }
when(/[-+0-9]/) { return $self->get_number }
when(q[']) { return $self->get_literal }
when(q["]) { return $self->get_literal }
when(':') { return $self->get_pname }
- when('.') { $self->_get_char; return $self->new_token('DOT'); }
- when(';') { $self->_get_char; return $self->new_token('SEMICOLON'); }
- when(',') { $self->_get_char; return $self->new_token('COMMA'); }
- when('^') { $self->_read_word('^^'); return $self->new_token('HATHAT'); }
+ when('.') { $self->_get_char; return $self->new_token(DOT); }
+ when(';') { $self->_get_char; return $self->new_token(SEMICOLON); }
+ when(',') { $self->_get_char; return $self->new_token(COMMA); }
+ when('^') { $self->_read_word('^^'); return $self->new_token(HATHAT); }
when(/[A-Za-z\x{00C0}-\x{00D6}\x{00D8}-\x{00F6}\x{00F8}-\x{02FF}\x{0370}-\x{037D}\x{037F}-\x{1FFF}\x{200C}-\x{200D}\x{2070}-\x{218F}\x{2C00}-\x{2FEF}\x{3001}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFFD}\x{10000}-\x{EFFFF}]/) {
if ($self->{buffer} =~ /^a(?!:)\b/) {
$self->_get_char;
- return $self->new_token('A');
+ return $self->new_token(A);
} elsif ($self->{buffer} =~ /^(?:true|false)\b/) {
my $bool = $self->_read_length($+[0]);
- return $self->new_token('BOOLEAN', $bool);
+ return $self->new_token(BOOLEAN, $bool);
} else {
return $self->get_pname;
}
@@ -616,14 +666,14 @@
}
$self->_get_char_safe(':');
if ($self->{buffer} =~ /^$r_nameStartChar/) {
- unless ($self->{buffer} =~ /^${r_nameStartChar}(${r_nameStartChar}|${r_nameChar_extra})*/o) {
+ unless ($self->{buffer} =~ /^${r_nameStartChar}(?:${r_nameStartChar}|${r_nameChar_extra})*/o) {
$self->_error("Expected: name");
}
my $name = substr($self->{buffer}, 0, $+[0]);
$self->_read_word($name);
- return $self->new_token('PREFIXNAME', $prefix, $name);
+ return $self->new_token(PREFIXNAME, $prefix, $name);
} else {
- return $self->new_token('PREFIXNAME', $prefix);
+ return $self->new_token(PREFIXNAME, $prefix);
}
}
@@ -634,28 +684,28 @@
my $iri = substr($self->{buffer}, 0, $+[0]);
$self->_read_word($iri);
$self->_get_char_safe('>');
- return $self->new_token('IRI', $iri);
+ return $self->new_token(IRI, $iri);
}
sub get_bnode {
my $self = shift;
$self->_read_word('_:');
- unless ($self->{buffer} =~ /^${r_nameStartChar}(${r_nameStartChar}|${r_nameChar_extra})*/o) {
+ unless ($self->{buffer} =~ /^${r_nameStartChar}(?:${r_nameStartChar}|${r_nameChar_extra})*/o) {
$self->_error("Expected: name");
}
my $name = substr($self->{buffer}, 0, $+[0]);
$self->_read_word($name);
- return $self->new_token('BNODE', $name);
+ return $self->new_token(BNODE, $name);
}
sub get_number {
my $self = shift;
if ($self->{buffer} =~ /^${r_double}/) {
- return $self->new_token('DOUBLE', $self->_read_length($+[0]));
+ return $self->new_token(DOUBLE, $self->_read_length($+[0]));
} elsif ($self->{buffer} =~ /^${r_decimal}/) {
- return $self->new_token('DECIMAL', $self->_read_length($+[0]));
+ return $self->new_token(DECIMAL, $self->_read_length($+[0]));
} elsif ($self->{buffer} =~ /^${r_integer}/) {
- return $self->new_token('INTEGER', $self->_read_length($+[0]));
+ return $self->new_token(INTEGER, $self->_read_length($+[0]));
} else {
$self->throw_error("Expected number");
}
@@ -668,7 +718,7 @@
$self->_get_char;
$c = $self->_peek_char;
}
- return $self->new_token('WS');
+ return $self->new_token(WS);
}
sub get_comment {
@@ -683,7 +733,7 @@
if (length($c) and $c =~ /[\r\n]/) {
$self->_get_char;
}
- return $self->new_token('COMMENT', $comment);
+ return $self->new_token(COMMENT, $comment);
}
sub get_literal {
@@ -748,7 +798,7 @@
}
}
}
- return $self->new_token('3DSTRING', $string);
+ return $self->new_token(STRING3D, $string);
} else {
### #x22 scharacter* #x22
my $string = '';
@@ -790,7 +840,7 @@
}
}
$self->_get_char_safe(q["]);
- return $self->new_token('1DSTRING', $string);
+ return $self->new_token(STRING1D, $string);
}
}
@@ -799,14 +849,14 @@
$self->_get_char_safe('@');
if ($self->{buffer} =~ /^base/) {
$self->_read_word('base');
- return $self->new_token('BASE');
+ return $self->new_token(BASE);
} elsif ($self->{buffer} =~ /^prefix/) {
$self->_read_word('prefix');
- return $self->new_token('PREFIX');
+ return $self->new_token(PREFIX);
} else {
if ($self->{buffer} =~ /^[a-z]+(-[a-z0-9]+)*/) {
my $lang = $self->_read_length($+[0]);
- return $self->new_token('LANG', $lang);
+ return $self->new_token(LANG, $lang);
} else {
$self->throw_error("Expected keyword or language tag");
}
@@ -827,12 +877,12 @@
package RDF::Trine::Parser::TurtleToken;
use 5.014;
-use Moose;
+use MooseX::ArrayRef;
-has type => ( is => 'ro', isa => 'Str', required => 1 );
-has line => ( is => 'ro', isa => 'Int', required => 1 );
-has column => ( is => 'ro', isa => 'Int', required => 1 );
-has args => ( is => 'ro', required => 1 );
+has type => ( is => 'ro', );
+has line => ( is => 'ro', );
+has column => ( is => 'ro', );
+has args => ( is => 'ro', );
sub value {
my $self = shift;
@@ -840,6 +890,12 @@
return $args->[0];
}
+# This constructor relies on the list of attributes not changing order!
+sub fast_constructor {
+ my $class = shift;
+ bless \@_, $class;
+}
+
__PACKAGE__->meta->make_immutable;
1;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment