kfsone · May 2, 2021 23:35
diff --git a/lex.ol b/lex.ol
 package; Lex

 // Input is expected to be unsigned 8-bit ascii values (0..255), so we'll alias
 // uint8 as our character_type.
 alias; char_t = uint8_t

 // "enum" does not allow values specified inside the compound, although we could do
 // this in the 'enum' directive (i.e before the ;), e.g.  // enum from=-1; Name { ... }
 // enum also only allows either all-on-one-line or one-per-line,
 enum:
 ParserState {
    Uninitialized
    Parsing
    EOF
 }

 // Identifiers for each type of token with explicit string representations of
 // each value.
 enum; TokenType {
    Invalid = "Invalid/Unrecognized token"
    Space   = "Whitespace (space/tab)"
    EOL     = "End-of-line"
    Word    = "Alphabetic sequence"
    Number  = "Numeric sequence"
 }

 proc; show_token_type(t val TokenType) {
    print("token {0:d} is {0:s}\n", t)
 }

 interface:
 Lengthed {
    fn; .Len(); size_t
 }

 interface; Stringable {
    fn; .ToString(); string
 }

 interface; Reader {
    proc; .Read(bytes size_t, into []byte); size_t
 }

 interface; ReaderCloser {
    (Reader)
    fn; .Close()
 }

 interface; ReaderTryCloser {
    (Reader)
    proc; .TryClose()
 }

 struct; Token {
    // Shortcut for a member whose name is also it's type
    TokenType

    start   uint32; .Start()  // => uint32 Start() const noexcept { return start; }
    end     uint32
    .End(); .end              // uint32 End() const noexcept { return end; }

    // return type inferred
    .Len() { .End() - .Start() }

    // Specifying interface matching is optional, but allows for additional diagnostics
    // if a class becomes separated from an interface it is expecting to meet.
    .(Lengthed)  // irrelevant since we defined .Len, but would make it an error if we did not.

    // If you want to be super explicit about what you mean when you say you are complying
    // with an interface, you can implement the functions in the interface agreement:
    .(Stringable) {
        .ToString(); string {
            format("Token(.TokenType:{:s},.start:{},.end:{})\n", .TokenType.String(), .start, .end)
        }
    }
 }

 // Token constructor, returns a new token or an error. Being a 'proc'
 // instead of a 'fn', the caller must error-check the result.
 proc:
 NewToken(type TokenType; start, end uint32); Token {
    error if; end < start {
        InvalidParamError("token end must be >= start; got {}, {}", start, end)
    }
    return; Token{ TokenType: type, start: start, end: end }
 }

 proc:
 create_new_token(type TokenType; start, end uint32); Token {
    // no compound after the expression means we just forward the error
    error unless let; token = NewToken(type, start, end)
 
    print("created new token\n")
 
    return; token
 }

 // Return values can be named, and if you are only interested in returning
 // the named return values, you can just use a naked 'return'
 proc:
 create_new_token(type TokenType; start, end uint32); token Token {
    error unless let; token = NewToken(type, start, end)

    print("created new token\n")

    return
 }

 proc:
 create_new_token(type TokenType; start, end uint32); Token {
    // Exception to the rule: we're a proc, so our proc can return a proc call.
    return; NewToken(type, start, end)
 }

 fn:
 create_new_token(type TokenType; start, end uint32); Token {
    // illegal
    return; NewToken(type, start, end)  // unhandled error case
 }


 struct; Parser {
    Filename string
    Code     []char_t
    offset   size_t
 }

 // Locate will translate a byte-offset to a line and column number for
 // the parser's code. This saves us from having to track line and column
 // numbers until we encounter an error.
 proc of Parser:
 Locate(offset size_t); line, column size_t {
    // Check that the token's bounds are within our code slice.
    error if; offset >= Parser.Code.Len() {
        InvalidParamError("offset is beyond end of code")
    }

    // let allows either memberwise assignment: x, y, z = Vx, Vy, Vz,
    // where each rhs is evaluated before the first assignment.
    // or piece-wise assignment: x = Vx; y = Vy; z = Vz,
    // where each rhs is evaluated per assignment,
    // assignments are executed rtol in both cases.
    let; line, column = 1, 1

    for range i; /*from=0,*/ until=offset /*,step=1*/ {  // until= stop before, thru= stop after.
        // ".Code" is a member reference.
        if .Code[i] == ('\r', '\n') {       // if Code[i] is none of these
            let; line += 1; column = 0
        }
        let; column += 1
    }

    return  // returns the named parameters
 }

 // short-cut for declaring a translation class that would otherwise just be a
 // switch/case/return expression.
 switch:
 classify(c char_t); TokenType {
    case ' ', '\t':
        TokenType.Whitespace
        // cases do not fall through unless you use the fallthrough keyword.

    case '\r', '\n':
        TokenType.EOL

    case 'a'..'z', 'A'..'Z':
        TokenType.Word

    case '0'..'9':
        TokenType.Number

    // the default case is the empty case.
    case:
        TokenType.Invalid
 }

 fn of Parser:
 Next(); token Token, success bool {
    let; start = .offset, .offset += 1

    // success is default initialized false.
    return if; start >= .code.Len

    if type TokenType = classify(.Code[start]); type == TokenType.Invalid {
        return
    }

    while; .offset < .code.Len() && classify(.Code[.offset]) == type {
        let; .offset += 1
    }

    error unless let; token = NewToken(type, start, .offset)

    return; token, true
 }

 fn; swap(x mref <Integral>, y mref <Integral>) {
    let; x, y = y, x
 }

 fn; not_swap(x mref <Integral>, y mref <Integral>) {
    let; x = y; y = x
 }
	package; Lex

	// Input is expected to be unsigned 8-bit ascii values (0..255), so we'll alias
	// uint8 as our character_type.
	alias; char_t = uint8_t

	// "enum" does not allow values specified inside the compound, although we could do
	// this in the 'enum' directive (i.e before the ;), e.g. // enum from=-1; Name { ... }
	// enum also only allows either all-on-one-line or one-per-line,
	enum:
	ParserState {
	Uninitialized
	Parsing
	EOF
	}

	// Identifiers for each type of token with explicit string representations of
	// each value.
	enum; TokenType {
	Invalid = "Invalid/Unrecognized token"
	Space = "Whitespace (space/tab)"
	EOL = "End-of-line"
	Word = "Alphabetic sequence"
	Number = "Numeric sequence"
	}

	proc; show_token_type(t val TokenType) {
	print("token {0:d} is {0:s}\n", t)
	}

	interface:
	Lengthed {
	fn; .Len(); size_t
	}

	interface; Stringable {
	fn; .ToString(); string
	}

	interface; Reader {
	proc; .Read(bytes size_t, into []byte); size_t
	}

	interface; ReaderCloser {
	(Reader)
	fn; .Close()
	}

	interface; ReaderTryCloser {
	(Reader)
	proc; .TryClose()
	}

	struct; Token {
	// Shortcut for a member whose name is also it's type
	TokenType

	start uint32; .Start() // => uint32 Start() const noexcept { return start; }
	end uint32
	.End(); .end // uint32 End() const noexcept { return end; }

	// return type inferred
	.Len() { .End() - .Start() }

	// Specifying interface matching is optional, but allows for additional diagnostics
	// if a class becomes separated from an interface it is expecting to meet.
	.(Lengthed) // irrelevant since we defined .Len, but would make it an error if we did not.

	// If you want to be super explicit about what you mean when you say you are complying
	// with an interface, you can implement the functions in the interface agreement:
	.(Stringable) {
	.ToString(); string {
	format("Token(.TokenType:{:s},.start:{},.end:{})\n", .TokenType.String(), .start, .end)
	}
	}
	}

	// Token constructor, returns a new token or an error. Being a 'proc'
	// instead of a 'fn', the caller must error-check the result.
	proc:
	NewToken(type TokenType; start, end uint32); Token {
	error if; end < start {
	InvalidParamError("token end must be >= start; got {}, {}", start, end)
	}
	return; Token{ TokenType: type, start: start, end: end }
	}

	proc:
	create_new_token(type TokenType; start, end uint32); Token {
	// no compound after the expression means we just forward the error
	error unless let; token = NewToken(type, start, end)

	print("created new token\n")

	return; token
	}

	// Return values can be named, and if you are only interested in returning
	// the named return values, you can just use a naked 'return'
	proc:
	create_new_token(type TokenType; start, end uint32); token Token {
	error unless let; token = NewToken(type, start, end)

	print("created new token\n")

	return
	}

	proc:
	create_new_token(type TokenType; start, end uint32); Token {
	// Exception to the rule: we're a proc, so our proc can return a proc call.
	return; NewToken(type, start, end)
	}

	fn:
	create_new_token(type TokenType; start, end uint32); Token {
	// illegal
	return; NewToken(type, start, end) // unhandled error case
	}


	struct; Parser {
	Filename string
	Code []char_t
	offset size_t
	}

	// Locate will translate a byte-offset to a line and column number for
	// the parser's code. This saves us from having to track line and column
	// numbers until we encounter an error.
	proc of Parser:
	Locate(offset size_t); line, column size_t {
	// Check that the token's bounds are within our code slice.
	error if; offset >= Parser.Code.Len() {
	InvalidParamError("offset is beyond end of code")
	}

	// let allows either memberwise assignment: x, y, z = Vx, Vy, Vz,
	// where each rhs is evaluated before the first assignment.
	// or piece-wise assignment: x = Vx; y = Vy; z = Vz,
	// where each rhs is evaluated per assignment,
	// assignments are executed rtol in both cases.
	let; line, column = 1, 1

	for range i; /from=0,/ until=offset /,step=1/ { // until= stop before, thru= stop after.
	// ".Code" is a member reference.
	if .Code[i] == ('\r', '\n') { // if Code[i] is none of these
	let; line += 1; column = 0
	}
	let; column += 1
	}

	return // returns the named parameters
	}

	// short-cut for declaring a translation class that would otherwise just be a
	// switch/case/return expression.
	switch:
	classify(c char_t); TokenType {
	case ' ', '\t':
	TokenType.Whitespace
	// cases do not fall through unless you use the fallthrough keyword.

	case '\r', '\n':
	TokenType.EOL

	case 'a'..'z', 'A'..'Z':
	TokenType.Word

	case '0'..'9':
	TokenType.Number

	// the default case is the empty case.
	case:
	TokenType.Invalid
	}

	fn of Parser:
	Next(); token Token, success bool {
	let; start = .offset, .offset += 1

	// success is default initialized false.
	return if; start >= .code.Len

	if type TokenType = classify(.Code[start]); type == TokenType.Invalid {
	return
	}

	while; .offset < .code.Len() && classify(.Code[.offset]) == type {
	let; .offset += 1
	}

	error unless let; token = NewToken(type, start, .offset)

	return; token, true
	}

	fn; swap(x mref <Integral>, y mref <Integral>) {
	let; x, y = y, x
	}

	fn; not_swap(x mref <Integral>, y mref <Integral>) {
	let; x = y; y = x
	}
No results found