Created
May 20, 2011 16:24
-
-
Save knowtheory/983264 to your computer and use it in GitHub Desktop.
A Parsing Expression Grammar for parsing an SQL subset. Written as a start to parsing SQL and generating Veritas relations (see: https://github.com/dkubb/veritas )
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
grammar SQL | |
rule statement | |
select_statement <Veritas::SQL::Select> | |
| set_operation_statement <Veritas::SQL::SetOperation> | |
| "(" [\s]* statement [\s]* ")" | |
end | |
rule set_operation_statement | |
"(" [\s]* left:select_statement <Veritas::SQL::Select> [\s]* ")" | |
[\s]* set_operator ([\s]+ distinct:`DISTINCT`)? [\s]* | |
"(" [\s]* right:select_statement <Veritas::SQL::Select> [\s]* ")" | |
end | |
rule select_statement | |
`SELECT` ([\s]+ distinct:`DISTINCT`)? [\s]+ target_list <Veritas::SQL::Header> | |
[\s]+ `FROM` [\s]+ relation <Veritas::SQL::Relation> | |
( [\s]+ `WHERE` [\s]+ predicate <Veritas::SQL::Predicate>)? | |
( [\s]+ order_statement <Veritas::SQL::Order> )? | |
( [\s]+ limit_statement <Veritas::SQL::Limit> ([\s]+ offset_statement <Veritas::SQL::Offset>)?)? | |
end | |
rule target_list | |
"*" | (target <Veritas::SQL::HeaderTarget> ([\s]* "," [\s]* target <Veritas::SQL::HeaderTarget>)*) | |
end | |
rule target | |
value ([\s]+ `AS` [\s]+ identifier)? | |
end | |
rule relation | |
( "(" [\s]* (select_statement | relation) [\s]* ")" ([\s]* `AS` [\s]+ identifier)? | identifier ) | |
( [\s]+ `INNER` [\s]+ `JOIN` [\s]+ relation ([\s]+ `ON` [\s]+ predicate)? )* | |
end | |
rule order_statement | |
`ORDER BY` [\s]+ order_target ([\s]* "," [\s]* order_target)* | |
end | |
rule order_target | |
qualified_identifier ([\s]+ (`DESC` | `ASC`))? | |
end | |
rule predicate | |
simple_predicate [\s]+ boolean_infix [\s]+ predicate | |
| negation [\s]+ predicate | |
| simple_predicate | |
| "("[\s]* predicate [\s]*")" ([\s]+ boolean_infix [\s]+ predicate)* | |
end | |
rule simple_predicate | |
value [\s]* comparator [\s]* value | |
| value [\s]+ (negation [\s]+)? (between | in_statement) | |
| value | |
end | |
rule between | |
`BETWEEN` [\s]+ value [\s]+ `AND` [\s]+ value | |
end | |
rule in_statement | |
`IN` [\s]* value_list | |
end | |
rule value_list | |
"(" value ([\s]*","[\s]* value)* ")" | |
end | |
rule value | |
"(" [\s]* value [\s]* ")" ([\s]* operator [\s]* value)? | |
| scalar_function | |
| simple_value [\s]* operator [\s]* value | |
| simple_value | |
| qualified_identifier | |
| identifier | |
end | |
rule scalar_function | |
function_name "(" value ([\s]* "," [\s]* value)* ")" | |
end | |
rule simple_value | |
identifier | literal_value | |
end | |
rule literal_value | |
numeric | string | |
end | |
rule qualified_identifier | |
( identifier | ("\"" identifier "\"") )("." (identifier | "\"" identifier "\""))? | |
end | |
###################################### | |
# Lexical rules | |
###################################### | |
rule limit_statement | |
`LIMIT` [\s]+ [0-9]+ | |
end | |
rule offset_statement | |
`OFFSET` [\s]+ [0-9]+ | |
end | |
rule function_name | |
[A-Z]+ | |
end | |
rule boolean_infix | |
`AND` | `OR` | `XOR` | |
end | |
rule negation | |
`NOT` | |
end | |
rule comparator | |
( "=" | "<>" | ">=" | ">" | "<=" | "<" ) | |
end | |
rule operator | |
( "+" | "-" | "*" | "/" | "%" | "sqrt" | "**" ) | |
end | |
rule set_operator | |
`UNION` | `INTERSECT` | `EXCEPT` | |
end | |
rule identifier | |
[A-Za-z_][A-Za-z0-9_]* | |
end | |
rule numeric | |
[-+]?[0-9]+ ("." [0-9]+)? (`E` [-+]? [0-9]+)? | |
end | |
rule string | |
"'" ('\\\'' | /[^']+/)+ "'" | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Yeah, I'd love to see that. I've verified all the generated SQL against PostgreSQL, so I'm sure it's valid. The only things I need to do is some cleanup on my summarization branch (it's functional).