Skip to content

Instantly share code, notes, and snippets.

@kfsone
Created May 2, 2021 23:35
Show Gist options
  • Select an option

  • Save kfsone/773706863be95e50935e4a319459b90e to your computer and use it in GitHub Desktop.

Select an option

Save kfsone/773706863be95e50935e4a319459b90e to your computer and use it in GitHub Desktop.
with comments :)
package; Lex
// Input is expected to be unsigned 8-bit ascii values (0..255), so we'll alias
// uint8 as our character_type.
alias; char_t = uint8_t
// "enum" does not allow values specified inside the compound, although we could do
// this in the 'enum' directive (i.e before the ;), e.g. // enum from=-1; Name { ... }
// enum also only allows either all-on-one-line or one-per-line,
enum:
ParserState {
Uninitialized
Parsing
EOF
}
// Identifiers for each type of token with explicit string representations of
// each value.
enum; TokenType {
Invalid = "Invalid/Unrecognized token"
Space = "Whitespace (space/tab)"
EOL = "End-of-line"
Word = "Alphabetic sequence"
Number = "Numeric sequence"
}
proc; show_token_type(t val TokenType) {
print("token {0:d} is {0:s}\n", t)
}
interface:
Lengthed {
fn; .Len(); size_t
}
interface; Stringable {
fn; .ToString(); string
}
interface; Reader {
proc; .Read(bytes size_t, into []byte); size_t
}
interface; ReaderCloser {
(Reader)
fn; .Close()
}
interface; ReaderTryCloser {
(Reader)
proc; .TryClose()
}
struct; Token {
// Shortcut for a member whose name is also it's type
TokenType
start uint32; .Start() // => uint32 Start() const noexcept { return start; }
end uint32
.End(); .end // uint32 End() const noexcept { return end; }
// return type inferred
.Len() { .End() - .Start() }
// Specifying interface matching is optional, but allows for additional diagnostics
// if a class becomes separated from an interface it is expecting to meet.
.(Lengthed) // irrelevant since we defined .Len, but would make it an error if we did not.
// If you want to be super explicit about what you mean when you say you are complying
// with an interface, you can implement the functions in the interface agreement:
.(Stringable) {
.ToString(); string {
format("Token(.TokenType:{:s},.start:{},.end:{})\n", .TokenType.String(), .start, .end)
}
}
}
// Token constructor, returns a new token or an error. Being a 'proc'
// instead of a 'fn', the caller must error-check the result.
proc:
NewToken(type TokenType; start, end uint32); Token {
error if; end < start {
InvalidParamError("token end must be >= start; got {}, {}", start, end)
}
return; Token{ TokenType: type, start: start, end: end }
}
proc:
create_new_token(type TokenType; start, end uint32); Token {
// no compound after the expression means we just forward the error
error unless let; token = NewToken(type, start, end)
print("created new token\n")
return; token
}
// Return values can be named, and if you are only interested in returning
// the named return values, you can just use a naked 'return'
proc:
create_new_token(type TokenType; start, end uint32); token Token {
error unless let; token = NewToken(type, start, end)
print("created new token\n")
return
}
proc:
create_new_token(type TokenType; start, end uint32); Token {
// Exception to the rule: we're a proc, so our proc can return a proc call.
return; NewToken(type, start, end)
}
fn:
create_new_token(type TokenType; start, end uint32); Token {
// illegal
return; NewToken(type, start, end) // unhandled error case
}
struct; Parser {
Filename string
Code []char_t
offset size_t
}
// Locate will translate a byte-offset to a line and column number for
// the parser's code. This saves us from having to track line and column
// numbers until we encounter an error.
proc of Parser:
Locate(offset size_t); line, column size_t {
// Check that the token's bounds are within our code slice.
error if; offset >= Parser.Code.Len() {
InvalidParamError("offset is beyond end of code")
}
// let allows either memberwise assignment: x, y, z = Vx, Vy, Vz,
// where each rhs is evaluated before the first assignment.
// or piece-wise assignment: x = Vx; y = Vy; z = Vz,
// where each rhs is evaluated per assignment,
// assignments are executed rtol in both cases.
let; line, column = 1, 1
for range i; /*from=0,*/ until=offset /*,step=1*/ { // until= stop before, thru= stop after.
// ".Code" is a member reference.
if .Code[i] == ('\r', '\n') { // if Code[i] is none of these
let; line += 1; column = 0
}
let; column += 1
}
return // returns the named parameters
}
// short-cut for declaring a translation class that would otherwise just be a
// switch/case/return expression.
switch:
classify(c char_t); TokenType {
case ' ', '\t':
TokenType.Whitespace
// cases do not fall through unless you use the fallthrough keyword.
case '\r', '\n':
TokenType.EOL
case 'a'..'z', 'A'..'Z':
TokenType.Word
case '0'..'9':
TokenType.Number
// the default case is the empty case.
case:
TokenType.Invalid
}
fn of Parser:
Next(); token Token, success bool {
let; start = .offset, .offset += 1
// success is default initialized false.
return if; start >= .code.Len
if type TokenType = classify(.Code[start]); type == TokenType.Invalid {
return
}
while; .offset < .code.Len() && classify(.Code[.offset]) == type {
let; .offset += 1
}
error unless let; token = NewToken(type, start, .offset)
return; token, true
}
fn; swap(x mref <Integral>, y mref <Integral>) {
let; x, y = y, x
}
fn; not_swap(x mref <Integral>, y mref <Integral>) {
let; x = y; y = x
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment