Last active
December 6, 2020 19:41
-
-
Save rgchris/f9f070c15afb64022d1361f191baa16c to your computer and use it in GitHub Desktop.
SQL Lexer
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Statement ::= (Newline | Whitespace | '(' | ')' | ',' | '.' | Value | ';')* | |
Value ::= Comment-Line | Comment | String-Single | String-Double | Literal | Variable | Word | Number | Misc | |
Comment-Line ::= ('--' | '#') [^#xA#xD]* | |
Comment ::= '/*' ( [^*] | '*'+ [^*/] )* '*'* '*/' | |
String-Single ::= "'" ([^'\]+ | '\\' | "\'" | "''")* "'" | |
String-Double ::= '"' ([^"\]+ | '\\' | '\"' | '""')* '"' | |
Literal ::= '`' [^`]+ '`' | |
Variable ::= '@' '@'? ([a-z] | '.' | '_')+ /* include '$' here? */ | |
Word ::= [A-Za-z] [0-9_A-Za-z]* /* include '$' here? */ | |
Number ::= '-'? [0-9]+ ('.' [0-9]+)? ([Ee] '-'? [0-9]+)? | |
Misc ::= [^#x9#xA#xD "'(),.;`]+ | |
Newline ::= #xA | #xD #xA? | |
Whitespace ::= (#x9 | #x20)+ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Rebol [ | |
Title: "SQL Lexer" | |
Date: 24-Jul-2020 | |
Author: "Christopher Ross-Gill" | |
] | |
lexer: make object! [ | |
whitespace: charset " ^-" | |
delimiters: charset {"'(),.;`} | |
chars: complement union union whitespace delimiters charset "^/^M" | |
digit: charset "0123456789" | |
lower-alpha: charset [#"a" - #"z"] | |
upper-alpha: charset [#"A" - #"Z"] | |
alpha: union lower-alpha upper-alpha | |
word-literal: complement charset "`" | |
comment-line: complement charset "^/^M" | |
comment: complement charset "*" | |
quoted-single: complement charset {'\} | |
quoted-double: complement charset {"\} | |
type: _ | |
part: _ | |
delimiter: ";" | |
value: [ | |
"--" any comment-line | |
(type: 'comment-line) | |
| | |
"/*" any [some comment | #"*" not #"/"] "*/" | |
(type: 'comment) | |
| | |
#"#" any comment-line ; Used by MySQL | |
(type: 'comment-hash) | |
| | |
#"'" any [some quoted-single | "\\" | "\'" | "''"] #"'" | |
(type: 'string-single) | |
| | |
#"^"" any [some quoted-double | "\\" | {\"} | {""}] #"^"" | |
(type: 'string-double) | |
| | |
#"`" some word-literal #"`" | |
(type: 'literal) | |
| | |
#"@" opt #"@" some [some lower-alpha | #"." | #"_"] ; include "$" here? | |
(type: 'variable) | |
| | |
alpha any [some alpha | digit | #"_"] ; include #"$" here? | |
(type: 'word) | |
| | |
opt #"-" some digit | |
opt ["." some digit] | |
opt [[#"e" | #"E"] opt #"-" some digit] | |
(type: 'number) | |
| | |
copy part some chars ; misc | |
( | |
type: switch part [ | |
"*" ['star] | |
"+" "-" "/" ['math] | |
"=" "!=" ">=" ">" "<=" "<>" "<" ['operator] | |
default ['other] | |
] | |
) | |
] | |
statement: [ | |
any [ | |
#"^/" | change [#"^M" opt #"^/"] #"^/" | |
| | |
change copy part some whitespace (detab part) | |
| | |
delimiter | |
| | |
#"(" | #")" | |
| | |
#"," | #"." | |
| | |
change copy part value (rejoin ["«" form type "»«" part "»"]) | |
| | |
end | |
| | |
(print "SKIP: Should Not Happen") ?? skip | |
] | |
] | |
] | |
parse-sql: func [ | |
statement [text!] | |
<local> is-sql | |
][ | |
assert [ | |
all [ | |
is-sql: parse/case copy statement lexer/statement | |
tail? is-sql | |
] | |
] | |
head is-sql | |
] | |
probe parse-sql {Select * From `something` Where x < 10; @Foo @bar -- Test} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Red [ | |
Title: "SQL Lexer" | |
Date: 24-Jul-2020 | |
Author: "Christopher Ross-Gill" | |
] | |
lexer: make object! [ | |
whitespace: charset " ^-" | |
delimiters: charset {"'(),.;`} | |
chars: complement union union whitespace delimiters charset "^/^M" | |
digit: charset "0123456789" | |
lower-alpha: charset [#"a" - #"z"] | |
upper-alpha: charset [#"A" - #"Z"] | |
alpha: union lower-alpha upper-alpha | |
word-literal: complement charset "`" | |
comment-line: complement charset "^/^M" | |
comment: complement charset "*" | |
quoted-single: complement charset {'\} | |
quoted-double: complement charset {"\} | |
type: none | |
part: none | |
delimiter: ";" | |
value: [ | |
"--" any comment-line | |
(type: 'comment-line) | |
| | |
"/*" any [some comment | #"*" not #"/"] "*/" | |
(type: 'comment) | |
| | |
#"#" any comment-line ; Used by MySQL | |
(type: 'comment-hash) | |
| | |
#"'" any [some quoted-single | "\\" | "\'" | "''"] #"'" | |
(type: 'string-single) | |
| | |
#"^"" any [some quoted-double | "\\" | {\"} | {""}] #"^"" | |
(type: 'string-double) | |
| | |
#"`" some word-literal #"`" | |
(type: 'literal) | |
| | |
#"@" opt #"@" some [some lower-alpha | #"." | #"_"] ; include "$" here? | |
(type: 'variable) | |
| | |
alpha any [some alpha | digit | #"_"] ; include #"$" here? | |
(type: 'word) | |
| | |
opt #"-" some digit | |
opt ["." some digit] | |
opt [[#"e" | #"E"] opt #"-" some digit] | |
(type: 'number) | |
| | |
copy part some chars ; misc | |
( | |
type: switch part [ | |
"*" ['star] | |
"+" "-" "/" ['math] | |
"=" "!=" ">=" ">" "<=" "<>" "<" ['operator] | |
default ['other] | |
] | |
) | |
] | |
statement: [ | |
any [ | |
#"^/" | change [#"^M" opt #"^/"] #"^/" | |
| | |
change copy part some whitespace (detab part) | |
| | |
delimiter | |
| | |
#"(" | #")" | |
| | |
#"," | #"." | |
| | |
change copy part value (rejoin ["«" form any [type '_] "»«" part "»"]) | |
| | |
end | |
| | |
(print "SKIP: Should Not Happen") ?? skip | |
] | |
] | |
] | |
parse-sql: func [ | |
statement [string!] | |
][ | |
all [ | |
parse/case statement: copy statement lexer/statement | |
statement | |
] | |
] | |
probe parse-sql {Select * From `something` Where x < 10; @Foo @bar -- Test} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment