Created
March 1, 2020 16:24
-
-
Save cloutiy/d8687ae029f452f32f08a53c1cd64ee1 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Red [ author: [email protected] ] | |
comment { | |
An experiment in scanning and parsing a programming language. | |
When an unknown token is encountered, scanner: | |
1. Gives an error message | |
2. Gives a partial dump of the last few things is was able to recorgnize. | |
3. Points out the exact place in the input where it stopped. | |
Why? | |
1. Parse dialect is really useful, but when it fails it does so silently, which is not so useful. | |
2. This program uses parse to tokenize input according to rules, but lets you know when it can't. | |
3. This approach could also be used in the next stage - semantic analysis, where it tries to match | |
rules, but when not able to match any of them, would stop and give line:col of where the problem is. | |
} | |
get-line-number: function [ input ] [ | |
line: 1 | |
parse input [ | |
any [ | |
newline (line: line + 1) | |
| skip ] | |
] | |
line | |
] | |
get-col-number: function [ input ] [ | |
last-line: find/last input newline | |
(length? last-line) - 1 | |
] | |
numbers: charset {1234567890} | |
lowercase-letters: charset {abcdefghijklmnopqrstuvwxyz} | |
uppercase-letters: charset {ABCDEFGHIJKLIMNOPQRSTUVWXYZ} | |
number: [ | |
copy n some numbers | |
keep ('number) keep (to-integer n)] | |
string: [ | |
{"} copy s to {"} | |
thru {"} | |
keep ('string) keep (s) ] | |
end*: [ | |
"end" | |
keep ('end) keep ("end") ] | |
def: [ | |
"def" | |
keep ('def) keep ("def") ] | |
identifier: [ | |
copy id some lowercase-letters | |
keep ('id) keep (id) ] | |
constant: [ | |
copy const some uppercase-letters | |
keep ('constant) keep (const) ] | |
class: [ | |
"class" | |
keep ('class) keep ("class") ] | |
if*: [ | |
"if" | |
keep ('if) keep ("if") ] | |
else*: [ | |
"else" | |
keep ('else) keep ("else") ] | |
true*: [ | |
"true" | |
keep ('true) keep ("true") ] | |
false*: [ | |
"true" | |
keep ('false) keep ("false") ] | |
nil: [ | |
"nil" | |
keep ('nil) keep ("nil") ] | |
and*: [ | |
"&&" | |
keep ("&&") keep ("&&") ] | |
or*: [ | |
"||" | |
keep ("||") keep ("||") ] | |
equal*: [ | |
"==" | |
keep ("==") keep ("==") ] | |
notequal*: [ | |
"!=" | |
keep ("!=") keep ("!=") ] | |
lessequal*: [ | |
"<=" | |
keep ("<=") keep ("<=") ] | |
greatequal*: [ | |
">=" | |
keep (">=") keep (">=") ] | |
assign: [ | |
"=" | |
keep ("=") keep ("=") ] | |
comments: [ | |
"#" thru newline ] | |
keyword: [ | |
class | def | end | |
| and* | or* | equal* | notequal* | lessequal* | greatequal* | |
| if* | else* | true* | false* | nil | assign ] | |
input: {def myfunc | |
CONST 123 | |
mystring = "mystring" | |
end} | |
failure: false | |
tokens: parse/case input [ | |
collect [ | |
any [ | |
number | |
| keyword | |
| string | |
| identifier | |
| constant | |
| comments | |
| newline | |
| space | |
| failure: break | |
] | |
] | |
] | |
if failure [ | |
dump: copy/part input index? failure | |
line: get-line-number dump | |
col: get-col-number dump | |
snippet: skip dump (length? dump) - 20 | |
print "**Scanning Error**" | |
print [ | |
snippet "<-- Encountered something unexpected here on line:" line "column:" col ] | |
] | |
write %tokens.txt tokens |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment