Created
January 4, 2012 04:10
-
-
Save jeffreykegler/1558440 to your computer and use it in GitHub Desktop.
Example: Subset of SQL-2003 SQL INSERT statement
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use strict; | |
use warnings; | |
use 5.010; | |
use Marpa::XS; | |
use Data::Dumper; | |
# An SQL INSERT statement for testing | |
my $text = q{ | |
INSERT INTO `tehtable` ( `field00`, `field01` ) VALUES ( 123, `abcd\'e | |
+fgh` ); | |
}; | |
# PART 1: LEXICAL ANALYSIS | |
my %symbol_for_char = ( | |
'(' => 'OPEN_PAREN', | |
')' => 'CLOSE_PAREN', | |
';' => 'SEMICOLON', | |
',' => 'COMMA', | |
); | |
sub read_terminals { | |
my ( $recce, $text ) = @_; | |
DO_TOKEN: while (1) { | |
my $token_type = undef; | |
my $token_value = undef; | |
FIND_TYPE_AND_VALUE: { | |
next DO_TOKEN if $text =~ m{\G \s+ }gcxms; | |
if ($text =~ m{\G \s* ([`'"]) ((?:[^\\\1] | \\.)*?) \1 \s*}gcxms ) | |
{ | |
$token_type = 'STRING'; | |
$token_value = $2; | |
last FIND_TYPE_AND_VALUE; | |
} ## end if ( $text =~ ...) | |
if ( $text =~ m{\G \s* INSERT \s*}gcxmsi ) { | |
$token_type = 'INSERT'; | |
last FIND_TYPE_AND_VALUE; | |
} | |
if ( $text =~ m{\G \s* INTO \s*}gcxmsi ) { | |
$token_type = 'INTO'; | |
last FIND_TYPE_AND_VALUE; | |
} | |
if ( $text =~ m{\G \s* VALUES \s*}gcxmsi ) { | |
$token_type = 'VALUES'; | |
last FIND_TYPE_AND_VALUE; | |
} | |
if ( $text =~ m{\G \s* (\.\d+ | \d+ (?:\.\d*)?) \s*}gcxms ) { | |
$token_type = 'NUMBER'; | |
$token_value = $1; | |
last FIND_TYPE_AND_VALUE; | |
} | |
if ( $text =~ m{\G (.) }gcxms ) { | |
my $char = $1; | |
if ( exists( $symbol_for_char{$char} ) ) { | |
$token_type = $symbol_for_char{$char}; | |
$token_value = $1; | |
} | |
else { | |
$token_type = 'CHAR'; | |
} | |
} ## end if ( $text =~ m{\G (.) }gcxms ) | |
} ## end FIND_TYPE_AND_VALUE: | |
return if not defined $token_type; | |
if ( defined $recce->read( $token_type, $token_value ) ) { | |
say "Reading Token: $token_type"; | |
} | |
else { | |
die "Error reading Token: $token_type"; | |
} | |
} ## end while (1) | |
} ## end sub read_terminals | |
# Part 2: THE GRAMMAR | |
my $grammar = Marpa::XS::Grammar->new( | |
{ start => 'sql', | |
actions => 'My_Action', | |
default_action => 'do_what_I_mean', | |
inaccessible_ok => [qw(SPACE)], | |
terminals => [ | |
qw( | |
OPEN_PAREN CLOSE_PAREN | |
SPACE COMMA | |
INSERT INTO VALUES | |
STRING NUMBER | |
SEMICOLON | |
) | |
], | |
rules => [ | |
{ lhs => 'sql', | |
rhs => [qw(insert_statement SEMICOLON)], | |
action => 'child1' | |
}, | |
{ lhs => 'insert_statement', | |
rhs => [ | |
qw(INSERT INTO insertion_target insert_columns_and_source) | |
], | |
action => 'last_arg' | |
}, | |
{ lhs => 'insertion_target', | |
rhs => [qw(table_name)], | |
}, | |
{ lhs => 'table_name', | |
rhs => [qw(STRING)], | |
}, | |
{ lhs => 'insert_columns_and_source', | |
rhs => [qw(from_constructor)], | |
}, | |
{ lhs => 'from_constructor', | |
rhs => [ | |
qw(from_constructor_column_list contextually_typed_value_constructor) | |
] | |
}, | |
{ lhs => 'from_constructor_column_list' }, | |
{ lhs => 'from_constructor_column_list', | |
rhs => [qw(OPEN_PAREN insert_column_list CLOSE_PAREN)], | |
action => 'child2' | |
}, | |
{ lhs => 'insert_column_list', | |
rhs => ['column_name_list'], | |
}, | |
{ lhs => 'column_name_list', | |
rhs => ['column_name'], | |
separator => 'COMMA', | |
min => 1 | |
}, | |
{ lhs => 'column_name', rhs => ['STRING'] }, | |
{ lhs => 'contextually_typed_value_constructor', | |
rhs => | |
[qw(VALUES contextually_typed_row_value_expression_list)] | |
}, | |
{ lhs => 'contextually_typed_row_value_expression_list', | |
rhs => ['contextually_typed_row_value_expression'], | |
separator => 'COMMA', | |
min => 1 | |
}, | |
{ lhs => 'contextually_typed_row_value_expression', | |
rhs => ['contextually_typed_row_value_constructor'] | |
}, | |
{ lhs => 'contextually_typed_row_value_constructor', | |
rhs => ['common_value_expression'] | |
}, | |
{ lhs => 'contextually_typed_row_value_constructor', | |
rhs => [ | |
qw(OPEN_PAREN | |
contextually_typed_row_value_constructor_expression_list CLOSE_PAREN) | |
], | |
action => 'child2' | |
}, | |
{ lhs => | |
'contextually_typed_row_value_constructor_expression_list', | |
rhs => ['contextually_typed_row_value_constructor_element'], | |
separator => 'COMMA', | |
min => 1 | |
}, | |
{ lhs => 'contextually_typed_row_value_constructor_element', | |
rhs => ['value_expression'] | |
}, | |
{ lhs => 'value_expression', | |
rhs => ['common_value_expression'] | |
}, | |
{ lhs => 'common_value_expression', | |
rhs => ['STRING'] | |
}, | |
{ lhs => 'common_value_expression', | |
rhs => ['NUMBER'] | |
} | |
], | |
} | |
); | |
# Part 3: THE SEMANTICS | |
# Do What I Mean: That is, return a value | |
# that is what I want, most of the time. | |
# | |
# Specificially: | |
# Always throw away the per-parse variable, | |
# which is ignored in this application. | |
# Throw away any undefined child values as | |
# well. If only one child value is left, it is | |
# returned. If muliple child values are left | |
# a reference to an array of them is returned. | |
# | |
sub My_Action::do_what_I_mean { | |
my @args = grep {defined} @_[ 1 .. $#_ ]; | |
return undef if scalar @args <= 0; | |
return $args[0] if scalar @args == 1; | |
return \@args; | |
} ## end sub My_Action::do_what_I_mean | |
# Value is the value of the first child | |
sub My_Action::child1 { | |
return $_[1]; | |
} | |
# Value is the value of the second child | |
sub My_Action::child2 { | |
return $_[2]; | |
} | |
# Value is the value of the last child | |
sub My_Action::last_arg { | |
return $_[-1]; | |
} | |
# Part 4: DO IT | |
$grammar->precompute; | |
my $rec = Marpa::XS::Recognizer->new( { grammar => $grammar } ); | |
read_terminals( $rec, $text ); | |
my $value_ref = $rec->value(); | |
die "No parse" if not defined $value_ref; | |
say Data::Dumper->Dump( [ ${$value_ref} ], ['value'] ); | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment