Created
May 2, 2021 23:35
-
-
Save kfsone/773706863be95e50935e4a319459b90e to your computer and use it in GitHub Desktop.
with comments :)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| package; Lex | |
| // Input is expected to be unsigned 8-bit ascii values (0..255), so we'll alias | |
| // uint8 as our character_type. | |
| alias; char_t = uint8_t | |
| // "enum" does not allow values specified inside the compound, although we could do | |
| // this in the 'enum' directive (i.e before the ;), e.g. // enum from=-1; Name { ... } | |
| // enum also only allows either all-on-one-line or one-per-line, | |
| enum: | |
| ParserState { | |
| Uninitialized | |
| Parsing | |
| EOF | |
| } | |
| // Identifiers for each type of token with explicit string representations of | |
| // each value. | |
| enum; TokenType { | |
| Invalid = "Invalid/Unrecognized token" | |
| Space = "Whitespace (space/tab)" | |
| EOL = "End-of-line" | |
| Word = "Alphabetic sequence" | |
| Number = "Numeric sequence" | |
| } | |
| proc; show_token_type(t val TokenType) { | |
| print("token {0:d} is {0:s}\n", t) | |
| } | |
| interface: | |
| Lengthed { | |
| fn; .Len(); size_t | |
| } | |
| interface; Stringable { | |
| fn; .ToString(); string | |
| } | |
| interface; Reader { | |
| proc; .Read(bytes size_t, into []byte); size_t | |
| } | |
| interface; ReaderCloser { | |
| (Reader) | |
| fn; .Close() | |
| } | |
| interface; ReaderTryCloser { | |
| (Reader) | |
| proc; .TryClose() | |
| } | |
| struct; Token { | |
| // Shortcut for a member whose name is also it's type | |
| TokenType | |
| start uint32; .Start() // => uint32 Start() const noexcept { return start; } | |
| end uint32 | |
| .End(); .end // uint32 End() const noexcept { return end; } | |
| // return type inferred | |
| .Len() { .End() - .Start() } | |
| // Specifying interface matching is optional, but allows for additional diagnostics | |
| // if a class becomes separated from an interface it is expecting to meet. | |
| .(Lengthed) // irrelevant since we defined .Len, but would make it an error if we did not. | |
| // If you want to be super explicit about what you mean when you say you are complying | |
| // with an interface, you can implement the functions in the interface agreement: | |
| .(Stringable) { | |
| .ToString(); string { | |
| format("Token(.TokenType:{:s},.start:{},.end:{})\n", .TokenType.String(), .start, .end) | |
| } | |
| } | |
| } | |
| // Token constructor, returns a new token or an error. Being a 'proc' | |
| // instead of a 'fn', the caller must error-check the result. | |
| proc: | |
| NewToken(type TokenType; start, end uint32); Token { | |
| error if; end < start { | |
| InvalidParamError("token end must be >= start; got {}, {}", start, end) | |
| } | |
| return; Token{ TokenType: type, start: start, end: end } | |
| } | |
| proc: | |
| create_new_token(type TokenType; start, end uint32); Token { | |
| // no compound after the expression means we just forward the error | |
| error unless let; token = NewToken(type, start, end) | |
| print("created new token\n") | |
| return; token | |
| } | |
| // Return values can be named, and if you are only interested in returning | |
| // the named return values, you can just use a naked 'return' | |
| proc: | |
| create_new_token(type TokenType; start, end uint32); token Token { | |
| error unless let; token = NewToken(type, start, end) | |
| print("created new token\n") | |
| return | |
| } | |
| proc: | |
| create_new_token(type TokenType; start, end uint32); Token { | |
| // Exception to the rule: we're a proc, so our proc can return a proc call. | |
| return; NewToken(type, start, end) | |
| } | |
| fn: | |
| create_new_token(type TokenType; start, end uint32); Token { | |
| // illegal | |
| return; NewToken(type, start, end) // unhandled error case | |
| } | |
| struct; Parser { | |
| Filename string | |
| Code []char_t | |
| offset size_t | |
| } | |
| // Locate will translate a byte-offset to a line and column number for | |
| // the parser's code. This saves us from having to track line and column | |
| // numbers until we encounter an error. | |
| proc of Parser: | |
| Locate(offset size_t); line, column size_t { | |
| // Check that the token's bounds are within our code slice. | |
| error if; offset >= Parser.Code.Len() { | |
| InvalidParamError("offset is beyond end of code") | |
| } | |
| // let allows either memberwise assignment: x, y, z = Vx, Vy, Vz, | |
| // where each rhs is evaluated before the first assignment. | |
| // or piece-wise assignment: x = Vx; y = Vy; z = Vz, | |
| // where each rhs is evaluated per assignment, | |
| // assignments are executed rtol in both cases. | |
| let; line, column = 1, 1 | |
| for range i; /*from=0,*/ until=offset /*,step=1*/ { // until= stop before, thru= stop after. | |
| // ".Code" is a member reference. | |
| if .Code[i] == ('\r', '\n') { // if Code[i] is none of these | |
| let; line += 1; column = 0 | |
| } | |
| let; column += 1 | |
| } | |
| return // returns the named parameters | |
| } | |
| // short-cut for declaring a translation class that would otherwise just be a | |
| // switch/case/return expression. | |
| switch: | |
| classify(c char_t); TokenType { | |
| case ' ', '\t': | |
| TokenType.Whitespace | |
| // cases do not fall through unless you use the fallthrough keyword. | |
| case '\r', '\n': | |
| TokenType.EOL | |
| case 'a'..'z', 'A'..'Z': | |
| TokenType.Word | |
| case '0'..'9': | |
| TokenType.Number | |
| // the default case is the empty case. | |
| case: | |
| TokenType.Invalid | |
| } | |
| fn of Parser: | |
| Next(); token Token, success bool { | |
| let; start = .offset, .offset += 1 | |
| // success is default initialized false. | |
| return if; start >= .code.Len | |
| if type TokenType = classify(.Code[start]); type == TokenType.Invalid { | |
| return | |
| } | |
| while; .offset < .code.Len() && classify(.Code[.offset]) == type { | |
| let; .offset += 1 | |
| } | |
| error unless let; token = NewToken(type, start, .offset) | |
| return; token, true | |
| } | |
| fn; swap(x mref <Integral>, y mref <Integral>) { | |
| let; x, y = y, x | |
| } | |
| fn; not_swap(x mref <Integral>, y mref <Integral>) { | |
| let; x = y; y = x | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment