JarrettBillingsley · September 29, 2012 03:31
diff --git a/gistfile1.d b/gistfile1.d
 // =====================================================================================================================
 // MemPool interface (???)
 // "Interfaces" could be fat pointers.. pointer to a "vtbl" and the "this" object.
 // Of course if the type is statically known, you don't have to pass the vtbl around.

 struct MemPoolMark { ... }

 interface MemPool
 {
    function mark(): MemPoolMark
    function freeToMark(m: &MemPoolMark)
 }

 // =====================================================================================================================
 // Error Type

 // Generics indicated with brackets
 enum Error[T]
 {
    Error(string) // string is a built-in type, probably UTF-8
    OK(T)
 }

 // Macros? Need something to replace the preprocessor. Also everything is an expression, so we can use the "match"
 // (switch replacement) in here as an expression.
 macro checkError[T](val: Error[T]): T =
    match(val)
    {
        case Error(m): return Error(m) // "return" is of polymorphic type so it unifies with anything else.
        case OK(v): v
    }

 // =====================================================================================================================
 // JSON Parser

 // public is visible outside this module, private is module-local.. I suppose some kind of package protection would
 // be useful too (though more sanely-defined than in D)
 public:

 // enum is how you define sum types.
 enum JsonValue
 {
    Null // Commas optional
    Bool(bool)
    Number(double)
    String(string),
    Object(Hash[string, JsonValue]) // again, square brackets for generics
    Array(Array[JsonValue])
 }

 function parseJson(s: string, pool: MemPool): Error[JsonValue]
 {
    let l: Lexer // variables declared with 'let'; default-inited unless otherwise specified (like D)
    #checkError(l.begin(s, pool))

    let ret =
        if(l.tok.type == Tok.LBrace)
            #checkError(l.parseObject()) // macros are instantiated with #name.. maybe. conflicts with length operator
        else if(l.type == Token.LBracket)
            #checkError(l.parseArray())
        else
            l.error("JSON must have an object or an array as its top-level value")

    // tag() would be a builtin, kinda like sizeof() in C, that gets you the tag of an enum type (for when a match
    // would be too cumbersome).
    if(tag(ret) == Error.Error)
        return ret

    #checkError(l.expect(Tok.EOF))
    return Error.OK(ret)
 }

 private:

 // This enum "derives" from an integer and therefore can only have nullary constructors. Works much like an enum
 // in D, though more complete (Tok is an actual type, can't implicitly convert between Tok and uint, must explicitly
 // cast from Tok to uint, auto-generated functions for it -- Tok_toString, Tok_parse, Tok_fromInteger)
 enum Tok : uint
 {
    String
    Number
    True
    False
    Null

    Comma
    Colon
    LBrace
    RBrace
    LBracket
    RBracket

    EOF
 }

 // But here we make our OWN damn strings :P
 // Designated initializer syntax borrowed from D
 // (Both Tok and TokStrings could be generated with a macro I suppose)
 const TokStrings =
 [
    Tok.String: "string"
    Tok.Number: "number"
    Tok.True: "true"
    Tok.False: "false"
    Tok.Null: "null"

    Tok.Comma: ","
    Tok.Colon: ":"
    Tok.LBrace: "{"
    Tok.RBrace: "}"
    Tok.LBracket: "["
    Tok.RBracket: "]"

    Tok.EOF: "<EOF>"
 ]

 // Plain old struct!
 // You can also have structs in enums but I didn't use them here
 struct Token
 {
    type: Tok
    line: uint
    col: uint
    strval: string
    numval: float
 }

 struct Lexer
 {
    line: uint
    col: uint
    source: string
    position: uint
    character: char
    tok: Token
    pool: MemPool
    poolMark: MemPoolMark
 }

 // Okay, syntactic sugar time.
 // When you declare "function Type.name(params..)", it's sugar for "function Type_name(this: &Type, params..)".
 // Thus this function's name is actually Lexer_error, and its first parameter is a Lexer reference (whose name inside
 // the function is 'this').
 function Lexer.error[T](msg: string, vararg): Error[T]
 {
    .pool.freeToMark(.poolMark) // Furthermore, since accessing members of 'this' is very common, .memb is a shortcut
    return Error.Error(msg.vformat(.pool, msg, vararg)) // Fully-qualified enum tags.. hmm

    // Also note that this function has a polymorphic return type, meaning it can be used in any context where any
    // Error[T] is expected and the proper type will be instantiated.
 }

 // Now lemme just talk a little about the error handling. There are no exceptions (probably?), but ADTs let us signal
 // errors in-band while still allowing for the full range of possible return values AND forcing code to check for
 // errors. Sweet. Unfortunately this makes stuff a little awkward, which is where that checkError macro from above
 // comes in. #checkError(exp) will evaluate exp, and if it evaluates to an error value, returns the error value;
 // otherwise it evaluates to the OK value of exp. Simple, though doesn't leave much room for cleaning up. That'd be
 // the domain of a more complex error-handling mechanism..

 function Lexer.jsonError(msg: string, vararg): Error[JsonValue] =
    .error(msg, vararg)

 function Lexer.expected(msg: string): Error[Token] =
    .error("({}:{}): '{}' expected; found '{}' instead", .tok.line, .tok.col, msg, TokStrings[.tok.type])

 function Lexer.begin(source: string, pool: MemPool) : Error[Token]
 {
    .line = 1
    .col = 0
    .source = source
    .position = 0
    .pool = pool
    // all allocations after this point will be kept track of so that we can free them in a single chunk. That's what
    // the ".pool.freeToMark" call is for up in Lexer.error.
    .poolMark = pool.mark()

    .nextChar()
    return #checkError(.nextToken())
 }

 function Lexer.expect(Tok type): Error[Token]
 {
    if(.tok.type != type)
        return Error.Error(.expected(TokStrings[type]))

    let ret = .tok

    if(type != Tok.EOF)
        #checkError(.nextToken())

    return Error.OK(ret)
 }

 function Lexer.isEOF() =          .character == meta(char, max)
 function Lexer.isWhitespace() =   .character in " \t\r\n"
 function Lexer.isNewline() =      .character in "\r\n"
 function Lexer.isHexDigit() =     (.character in '0' ... '9') || (.character in 'a' ... 'f') || (.character in 'A' ... 'F')
 function Lexer.isDecimalDigit() = .character in '0' ... '9'

 // char is a Unicode character.
 function Lexer.hexDigitToInt(c: char): uint
 {
    // Three things: ranges (X .. Y is inclusive X, exclusive Y; X ... Y is inclusive X and Y, just like Ruby). Ranges
    // are not a type, just a mechanism. Second, you can put them on the RHS of 'in' and it acts as shortcut for a
    // double-comparison. So this is the same as "c >= '0' && c <= '9'". Third, cast[Type]exp is the cast syntax, square
    // brackets for consistency with generics.
    if(c in '0' ... '9')
        return cast[uint](c - '0') // result of char - char is int, not char!
    else if(c in 'A' .. 'F')
        return cast[uint](c - 'A') + 10
    else
        return cast[uint](c - 'a') + 10
 }

 function Lexer.readChar(): char
 {
    if(.position >= #.source) // # is the size-of operator, not in the C sense, more like D's .length
        return meta(char, max) // meta() is another built-in construct, used for querying info about types.
    else
        return .source.decode(.position) // decode would be of type function(idx: &uint):char and would decode one code
        // unit from the UTF-8 string and advance idx.
 }

 function Lexer.nextChar()
 {
    .col++
    .character = .readChar()
 }

 function Lexer.nextLine()
 {
    while(.isNewline() && !.isEOF())
    {
        let old = .character

        .nextChar()

        if(.isNewline() && .character != old)
            .nextChar()

        .line++
        .col = 1
    }
 }

 function Lexer.readNumLiteral(): Error[double]
 {
    let neg = false
    let ret: double = 0

    // sign
    if(.character == '-')
    {
        neg = true
        .nextChar()

        if(!.isDecimalDigit())
            return .error("({}:{}): incomplete number token", .line, .col)
    }

    // integral part
    if(.character == '0')
        .nextChar()
    else
    {
        while(.isDecimalDigit())
        {
            ret = (ret * 10) + (.character - '0')
            .nextChar()
        }
    }

    if(.isEOF() || .character !in ".eE") // in/!in can also be used on constant arrays/strings. can be optimized.
        return Error.OK(neg ? -ret : ret) // ternary operator.. do we need it since if() is an expression?

    // fraction
    if(.character == '.')
    {
        .nextChar()

        if(!.isDecimalDigit())
            return .error("({}:{}): incomplete number token", .line, .col)

        let frac = 0.0
        let mag = 10.0

        while(.isDecimalDigit())
        {
            frac += (.character - '0') / mag
            mag *= 10
            .nextChar()
        }

        ret += frac
    }

    // exponent
    if(.character in "eE")
    {
        .nextChar()

        if(!.isDecimalDigit() && .character !in "+-")
            return .error("({}:{}): incomplete number token", .line, .col)

        let negExp = false

        if(.character == '+')
            .nextChar()
        else if(.character == '-')
        {
            negExp = true
            .nextChar()
        }

        if(!.isDecimalDigit())
            return .error("({}:{}): incomplete number token", .line, .col)

        let exp = 0.0

        while(.isDecimalDigit())
        {
            exp = (exp * 10) + (.character - '0')
            .nextChar()
        }

        ret *= 10.pow(negExp? -exp : exp)
    }

    return Error.OK(neg ? -ret : ret)
 }

 function Lexer.readHexDigits(num: uint): Error[uint]
 {
    let ret = 0

    for(i: 0 .. num)
    {
        if(!.isHexDigit())
            return .error("({}:{}): Hexadecimal escape digits expected", .line, .col)

        ret <<= 4
        ret |= .hexDigitToInt(.character)
        .nextChar()
    }

    return Error.OK(ret)
 }

 function Lexer.readEscapeSequence(beginningLine: uint, beginningCol: uint): Error[char]
 {
    .nextChar()

    if(.isEOF())
        return .error("({}:{}): Unterminated string literal", beginningLine, beginningCol)

    match(.character)
    {
        case 'b':  .nextChar(); return Error.OK('\b') // semicolons exist, they're useful for putting multiple statements
        case 'f':  .nextChar(); return Error.OK('\f') // on one line.
        case 'n':  .nextChar(); return Error.OK('\n')
        case 'r':  .nextChar(); return Error.OK('\r')
        case 't':  .nextChar(); return Error.OK('\t')
        case '\\': .nextChar(); return Error.OK('\\')
        case '\"': .nextChar(); return Error.OK('\"')
        case '/':  .nextChar(); return Error.OK('/')

        case 'u':
            .nextChar()

            let x = #checkError(.readHexDigits(4))

            if(x in 0xD800 .. 0xDC00)
            {
                if(.character != '\\')
                    return .error("({}:{}): second surrogate pair character expected", .line, .col)

                .nextChar()

                if(.character != 'u')
                    return .error("({}:{}): second surrogate pair character expected", .line, .col)

                .nextChar()

                let x2 = #checkError(.readHexDigits(4))

                if(x2 !in 0xDC00 .. 0xE000)
                    return .error("({}:{}): invalid surrogate pair sequence", .line, .col)

                x = (x & ~0xD800) << 10
                x2 &= ~0xDC00
                return Error.OK(cast[char](0x10000 + (x << 10 | x2)))
            }
            else if(x in 0xDC00 .. 0xE000)
                return .error("({}:{}): invalid surrogate pair sequence", .line, .col)
            else
                return Error.OK(cast[char]x)

        case _: // this replaces "default:"
            return .error("({}:{}): Invalid string escape sequence '\\{}'", .line, .col, .character)
    }
 }

 function Lexer.readStringLiteral(): Error[string]
 {
    let beginningLine = .line
    let beginningCol = .col
    let buf = StringBuilder(.pool) // using the memory pool to allocate the string as we build it

    // Skip opening quote
    .nextChar()

    while(true)
    {
        if(.isEOF())
            return .error("({}:{}): Unterminated string literal", beginningLine, beginningCol)

        if(.character == '\\')
            buf.addChar(#checkError(.readEscapeSequence(beginningLine, beginningCol)))
        else if(.character == '"')
        {
            .nextChar()
            break
        }
        else if(.character < '\x20')
            return .error("({}:{}): Invalid character in string token", .line, .col)
        else
        {
            buf.addChar(.character)
            .nextChar()
        }
    }

    return buf.finish()
 }

 void Lexer.nextToken(): Error[Token]
 {
    while(true)
    {
        .tok.line = .line
        .tok.col = .col

        match(.character)
        {
            case '\r', '\n':
                .nextLine()
                continue

            case '\"':
                .tok.strval = #checkError(.readStringLiteral())
                .tok.type = Tok.String
                return Error.OK(.tok)

            case '{':
                .nextChar()
                .tok.type = Tok.LBrace
                return

            case '}':
                .nextChar()
                .tok.type = Tok.RBrace
                return

            case '[':
                .nextChar()
                .tok.type = Tok.LBracket
                return

            case ']':
                .nextChar()
                .tok.type = Tok.RBracket
                return

            case ',':
                .nextChar()
                .tok.type = Tok.Comma
                return

            case ':':
                .nextChar()
                .tok.type = Tok.Colon
                return

            case meta(char, max):
                .tok.type = Tok.EOF
                return

            case 't':
                if(!.source[mPosition .. $].startsWith("rue")) // slicing syntax?? why not?
                    return .error("({}:{}): true expected", :line, :col)

                .nextChar()
                .nextChar()
                .nextChar()
                .nextChar()
                .tok.type = Tok.True
                return

            case 'f':
                if(!.source[.position .. $].startsWith("alse"))
                    return .error("({}:{}): false expected", :line, :col)

                .nextChar()
                .nextChar()
                .nextChar()
                .nextChar()
                .nextChar()
                .tok.type = Tok.False
                return

            case 'n':
                if(!.source[.position .. $].startsWith("ull"))
                    return .error("({}:{}): null expected", :line, :col)

                .nextChar()
                .nextChar()
                .nextChar()
                .nextChar()
                .tok.type = Tok.Null
                return

            case '-', '1' ... '9': // ranges work in cases too
                .tok.numval = #checkError(.readNumLiteral())
                .tok.type = Tok.Number
                return

            case _:
                if(.isWhitespace())
                {
                    .nextChar()
                    continue
                }

                return .error("({}:{}): Invalid character '{}'", :line, :col, .character)
        }
    }
 }

 function Lexer.parseValue(): Error[JsonValue]
 {
    match(.tok.type)
    {
        case String:   #checkError(.nextToken()); return Error.OK(JsonValue.String(.tok.strval))
        case Number:   #checkError(.nextToken()); return Error.OK(JsonValue.Number(.tok.numval))
        case True:     #checkError(.nextToken()); return Error.OK(JsonValue.Bool(true))
        case False:    #checkError(.nextToken()); return Error.OK(JsonValue.Bool(false))
        case Null:     #checkError(.nextToken()); return Error.OK(JsonValue.Null)
        case LBrace:                              return .parseObject()
        case LBracket:                            return .parseArray()
        case _:                                   return .error("({}:{}): value expected", .line, .col)
    }
 }

 function Lexer.parseArray(): Error[JsonValue]
 {
    let ret = ArrayBuilder[JsonValue](.pool) // Again, like the StringBuilder

    #checkError(.expect(Tok.LBracket))

    if(.tok.type != Token.RBracket)
    {
        ret.append(#checkError(.parseValue()))

        while(.tok.type == Tok.Comma)
        {
            #checkError(.nextToken())
            ret.append(#checkError(.parseValue()))
        }
    }

    #checkError(.expect(Tok.RBracket))
    return Error.OK(JsonValue.Array(ret.finish()))
 }

 function Lexer.parsePair(): Error[(string, JsonValue)] // Tuple types?? maybe
 {
    let key = #checkError(.expect(Tok.String)).strval
    #checkError(.expect(Tok.Colon))
    let val = #checkError(.parseValue())
    return (key, val)
 }

 function Lexer.parseObject(): Error[JsonValue]
 {
    let h = Hash[string, JsonValue](.pool)

    #checkError(.expect(Tok.LBrace))

    if(.tok.type != Tok.RBrace)
    {
        let (key, val) = #checkError(.parsePair()) // deconstructing assignment is always fun
        h.insert(key, val)

        while(.tok.type == Tok.Comma)
        {
            #checkError(.nextToken())
            (key, val) = #checkError(.parsePair())
            h.insert(key, val)
        }
    }

    #checkError(.expect(Tok.RBrace))
    return Error.OK(JsonValue.Object(h))
 }
	// =====================================================================================================================
	// MemPool interface (???)
	// "Interfaces" could be fat pointers.. pointer to a "vtbl" and the "this" object.
	// Of course if the type is statically known, you don't have to pass the vtbl around.

	struct MemPoolMark { ... }

	interface MemPool
	{
	function mark(): MemPoolMark
	function freeToMark(m: &MemPoolMark)
	}

	// =====================================================================================================================
	// Error Type

	// Generics indicated with brackets
	enum Error[T]
	{
	Error(string) // string is a built-in type, probably UTF-8
	OK(T)
	}

	// Macros? Need something to replace the preprocessor. Also everything is an expression, so we can use the "match"
	// (switch replacement) in here as an expression.
	macro checkError[T](val: Error[T]): T =
	match(val)
	{
	case Error(m): return Error(m) // "return" is of polymorphic type so it unifies with anything else.
	case OK(v): v
	}

	// =====================================================================================================================
	// JSON Parser

	// public is visible outside this module, private is module-local.. I suppose some kind of package protection would
	// be useful too (though more sanely-defined than in D)
	public:

	// enum is how you define sum types.
	enum JsonValue
	{
	Null // Commas optional
	Bool(bool)
	Number(double)
	String(string),
	Object(Hash[string, JsonValue]) // again, square brackets for generics
	Array(Array[JsonValue])
	}

	function parseJson(s: string, pool: MemPool): Error[JsonValue]
	{
	let l: Lexer // variables declared with 'let'; default-inited unless otherwise specified (like D)
	#checkError(l.begin(s, pool))

	let ret =
	if(l.tok.type == Tok.LBrace)
	#checkError(l.parseObject()) // macros are instantiated with #name.. maybe. conflicts with length operator
	else if(l.type == Token.LBracket)
	#checkError(l.parseArray())
	else
	l.error("JSON must have an object or an array as its top-level value")

	// tag() would be a builtin, kinda like sizeof() in C, that gets you the tag of an enum type (for when a match
	// would be too cumbersome).
	if(tag(ret) == Error.Error)
	return ret

	#checkError(l.expect(Tok.EOF))
	return Error.OK(ret)
	}

	private:

	// This enum "derives" from an integer and therefore can only have nullary constructors. Works much like an enum
	// in D, though more complete (Tok is an actual type, can't implicitly convert between Tok and uint, must explicitly
	// cast from Tok to uint, auto-generated functions for it -- Tok_toString, Tok_parse, Tok_fromInteger)
	enum Tok : uint
	{
	String
	Number
	True
	False
	Null

	Comma
	Colon
	LBrace
	RBrace
	LBracket
	RBracket

	EOF
	}

	// But here we make our OWN damn strings :P
	// Designated initializer syntax borrowed from D
	// (Both Tok and TokStrings could be generated with a macro I suppose)
	const TokStrings =
	[
	Tok.String: "string"
	Tok.Number: "number"
	Tok.True: "true"
	Tok.False: "false"
	Tok.Null: "null"

	Tok.Comma: ","
	Tok.Colon: ":"
	Tok.LBrace: "{"
	Tok.RBrace: "}"
	Tok.LBracket: "["
	Tok.RBracket: "]"

	Tok.EOF: "<EOF>"
	]

	// Plain old struct!
	// You can also have structs in enums but I didn't use them here
	struct Token
	{
	type: Tok
	line: uint
	col: uint
	strval: string
	numval: float
	}

	struct Lexer
	{
	line: uint
	col: uint
	source: string
	position: uint
	character: char
	tok: Token
	pool: MemPool
	poolMark: MemPoolMark
	}

	// Okay, syntactic sugar time.
	// When you declare "function Type.name(params..)", it's sugar for "function Type_name(this: &Type, params..)".
	// Thus this function's name is actually Lexer_error, and its first parameter is a Lexer reference (whose name inside
	// the function is 'this').
	function Lexer.error[T](msg: string, vararg): Error[T]
	{
	.pool.freeToMark(.poolMark) // Furthermore, since accessing members of 'this' is very common, .memb is a shortcut
	return Error.Error(msg.vformat(.pool, msg, vararg)) // Fully-qualified enum tags.. hmm

	// Also note that this function has a polymorphic return type, meaning it can be used in any context where any
	// Error[T] is expected and the proper type will be instantiated.
	}

	// Now lemme just talk a little about the error handling. There are no exceptions (probably?), but ADTs let us signal
	// errors in-band while still allowing for the full range of possible return values AND forcing code to check for
	// errors. Sweet. Unfortunately this makes stuff a little awkward, which is where that checkError macro from above
	// comes in. #checkError(exp) will evaluate exp, and if it evaluates to an error value, returns the error value;
	// otherwise it evaluates to the OK value of exp. Simple, though doesn't leave much room for cleaning up. That'd be
	// the domain of a more complex error-handling mechanism..

	function Lexer.jsonError(msg: string, vararg): Error[JsonValue] =
	.error(msg, vararg)

	function Lexer.expected(msg: string): Error[Token] =
	.error("({}:{}): '{}' expected; found '{}' instead", .tok.line, .tok.col, msg, TokStrings[.tok.type])

	function Lexer.begin(source: string, pool: MemPool) : Error[Token]
	{
	.line = 1
	.col = 0
	.source = source
	.position = 0
	.pool = pool
	// all allocations after this point will be kept track of so that we can free them in a single chunk. That's what
	// the ".pool.freeToMark" call is for up in Lexer.error.
	.poolMark = pool.mark()

	.nextChar()
	return #checkError(.nextToken())
	}

	function Lexer.expect(Tok type): Error[Token]
	{
	if(.tok.type != type)
	return Error.Error(.expected(TokStrings[type]))

	let ret = .tok

	if(type != Tok.EOF)
	#checkError(.nextToken())

	return Error.OK(ret)
	}

	function Lexer.isEOF() = .character == meta(char, max)
	function Lexer.isWhitespace() = .character in " \t\r\n"
	function Lexer.isNewline() = .character in "\r\n"
	function Lexer.isHexDigit() = (.character in '0' ... '9') \|\| (.character in 'a' ... 'f') \|\| (.character in 'A' ... 'F')
	function Lexer.isDecimalDigit() = .character in '0' ... '9'

	// char is a Unicode character.
	function Lexer.hexDigitToInt(c: char): uint
	{
	// Three things: ranges (X .. Y is inclusive X, exclusive Y; X ... Y is inclusive X and Y, just like Ruby). Ranges
	// are not a type, just a mechanism. Second, you can put them on the RHS of 'in' and it acts as shortcut for a
	// double-comparison. So this is the same as "c >= '0' && c <= '9'". Third, cast[Type]exp is the cast syntax, square
	// brackets for consistency with generics.
	if(c in '0' ... '9')
	return cast[uint](c - '0') // result of char - char is int, not char!
	else if(c in 'A' .. 'F')
	return cast[uint](c - 'A') + 10
	else
	return cast[uint](c - 'a') + 10
	}

	function Lexer.readChar(): char
	{
	if(.position >= #.source) // # is the size-of operator, not in the C sense, more like D's .length
	return meta(char, max) // meta() is another built-in construct, used for querying info about types.
	else
	return .source.decode(.position) // decode would be of type function(idx: &uint):char and would decode one code
	// unit from the UTF-8 string and advance idx.
	}

	function Lexer.nextChar()
	{
	.col++
	.character = .readChar()
	}

	function Lexer.nextLine()
	{
	while(.isNewline() && !.isEOF())
	{
	let old = .character

	.nextChar()

	if(.isNewline() && .character != old)
	.nextChar()

	.line++
	.col = 1
	}
	}

	function Lexer.readNumLiteral(): Error[double]
	{
	let neg = false
	let ret: double = 0

	// sign
	if(.character == '-')
	{
	neg = true
	.nextChar()

	if(!.isDecimalDigit())
	return .error("({}:{}): incomplete number token", .line, .col)
	}

	// integral part
	if(.character == '0')
	.nextChar()
	else
	{
	while(.isDecimalDigit())
	{
	ret = (ret * 10) + (.character - '0')
	.nextChar()
	}
	}

	if(.isEOF() \|\| .character !in ".eE") // in/!in can also be used on constant arrays/strings. can be optimized.
	return Error.OK(neg ? -ret : ret) // ternary operator.. do we need it since if() is an expression?

	// fraction
	if(.character == '.')
	{
	.nextChar()

	if(!.isDecimalDigit())
	return .error("({}:{}): incomplete number token", .line, .col)

	let frac = 0.0
	let mag = 10.0

	while(.isDecimalDigit())
	{
	frac += (.character - '0') / mag
	mag *= 10
	.nextChar()
	}

	ret += frac
	}

	// exponent
	if(.character in "eE")
	{
	.nextChar()

	if(!.isDecimalDigit() && .character !in "+-")
	return .error("({}:{}): incomplete number token", .line, .col)

	let negExp = false

	if(.character == '+')
	.nextChar()
	else if(.character == '-')
	{
	negExp = true
	.nextChar()
	}

	if(!.isDecimalDigit())
	return .error("({}:{}): incomplete number token", .line, .col)

	let exp = 0.0

	while(.isDecimalDigit())
	{
	exp = (exp * 10) + (.character - '0')
	.nextChar()
	}

	ret *= 10.pow(negExp? -exp : exp)
	}

	return Error.OK(neg ? -ret : ret)
	}

	function Lexer.readHexDigits(num: uint): Error[uint]
	{
	let ret = 0

	for(i: 0 .. num)
	{
	if(!.isHexDigit())
	return .error("({}:{}): Hexadecimal escape digits expected", .line, .col)

	ret <<= 4
	ret \|= .hexDigitToInt(.character)
	.nextChar()
	}

	return Error.OK(ret)
	}

	function Lexer.readEscapeSequence(beginningLine: uint, beginningCol: uint): Error[char]
	{
	.nextChar()

	if(.isEOF())
	return .error("({}:{}): Unterminated string literal", beginningLine, beginningCol)

	match(.character)
	{
	case 'b': .nextChar(); return Error.OK('\b') // semicolons exist, they're useful for putting multiple statements
	case 'f': .nextChar(); return Error.OK('\f') // on one line.
	case 'n': .nextChar(); return Error.OK('\n')
	case 'r': .nextChar(); return Error.OK('\r')
	case 't': .nextChar(); return Error.OK('\t')
	case '\\': .nextChar(); return Error.OK('\\')
	case '\"': .nextChar(); return Error.OK('\"')
	case '/': .nextChar(); return Error.OK('/')

	case 'u':
	.nextChar()

	let x = #checkError(.readHexDigits(4))

	if(x in 0xD800 .. 0xDC00)
	{
	if(.character != '\\')
	return .error("({}:{}): second surrogate pair character expected", .line, .col)

	.nextChar()

	if(.character != 'u')
	return .error("({}:{}): second surrogate pair character expected", .line, .col)

	.nextChar()

	let x2 = #checkError(.readHexDigits(4))

	if(x2 !in 0xDC00 .. 0xE000)
	return .error("({}:{}): invalid surrogate pair sequence", .line, .col)

	x = (x & ~0xD800) << 10
	x2 &= ~0xDC00
	return Error.OK(cast[char](0x10000 + (x << 10 \| x2)))
	}
	else if(x in 0xDC00 .. 0xE000)
	return .error("({}:{}): invalid surrogate pair sequence", .line, .col)
	else
	return Error.OK(cast[char]x)

	case _: // this replaces "default:"
	return .error("({}:{}): Invalid string escape sequence '\\{}'", .line, .col, .character)
	}
	}

	function Lexer.readStringLiteral(): Error[string]
	{
	let beginningLine = .line
	let beginningCol = .col
	let buf = StringBuilder(.pool) // using the memory pool to allocate the string as we build it

	// Skip opening quote
	.nextChar()

	while(true)
	{
	if(.isEOF())
	return .error("({}:{}): Unterminated string literal", beginningLine, beginningCol)

	if(.character == '\\')
	buf.addChar(#checkError(.readEscapeSequence(beginningLine, beginningCol)))
	else if(.character == '"')
	{
	.nextChar()
	break
	}
	else if(.character < '\x20')
	return .error("({}:{}): Invalid character in string token", .line, .col)
	else
	{
	buf.addChar(.character)
	.nextChar()
	}
	}

	return buf.finish()
	}

	void Lexer.nextToken(): Error[Token]
	{
	while(true)
	{
	.tok.line = .line
	.tok.col = .col

	match(.character)
	{
	case '\r', '\n':
	.nextLine()
	continue

	case '\"':
	.tok.strval = #checkError(.readStringLiteral())
	.tok.type = Tok.String
	return Error.OK(.tok)

	case '{':
	.nextChar()
	.tok.type = Tok.LBrace
	return

	case '}':
	.nextChar()
	.tok.type = Tok.RBrace
	return

	case '[':
	.nextChar()
	.tok.type = Tok.LBracket
	return

	case ']':
	.nextChar()
	.tok.type = Tok.RBracket
	return

	case ',':
	.nextChar()
	.tok.type = Tok.Comma
	return

	case ':':
	.nextChar()
	.tok.type = Tok.Colon
	return

	case meta(char, max):
	.tok.type = Tok.EOF
	return

	case 't':
	if(!.source[mPosition .. $].startsWith("rue")) // slicing syntax?? why not?
	return .error("({}:{}): true expected", :line, :col)

	.nextChar()
	.nextChar()
	.nextChar()
	.nextChar()
	.tok.type = Tok.True
	return

	case 'f':
	if(!.source[.position .. $].startsWith("alse"))
	return .error("({}:{}): false expected", :line, :col)

	.nextChar()
	.nextChar()
	.nextChar()
	.nextChar()
	.nextChar()
	.tok.type = Tok.False
	return

	case 'n':
	if(!.source[.position .. $].startsWith("ull"))
	return .error("({}:{}): null expected", :line, :col)

	.nextChar()
	.nextChar()
	.nextChar()
	.nextChar()
	.tok.type = Tok.Null
	return

	case '-', '1' ... '9': // ranges work in cases too
	.tok.numval = #checkError(.readNumLiteral())
	.tok.type = Tok.Number
	return

	case _:
	if(.isWhitespace())
	{
	.nextChar()
	continue
	}

	return .error("({}:{}): Invalid character '{}'", :line, :col, .character)
	}
	}
	}

	function Lexer.parseValue(): Error[JsonValue]
	{
	match(.tok.type)
	{
	case String: #checkError(.nextToken()); return Error.OK(JsonValue.String(.tok.strval))
	case Number: #checkError(.nextToken()); return Error.OK(JsonValue.Number(.tok.numval))
	case True: #checkError(.nextToken()); return Error.OK(JsonValue.Bool(true))
	case False: #checkError(.nextToken()); return Error.OK(JsonValue.Bool(false))
	case Null: #checkError(.nextToken()); return Error.OK(JsonValue.Null)
	case LBrace: return .parseObject()
	case LBracket: return .parseArray()
	case _: return .error("({}:{}): value expected", .line, .col)
	}
	}

	function Lexer.parseArray(): Error[JsonValue]
	{
	let ret = ArrayBuilder[JsonValue](.pool) // Again, like the StringBuilder

	#checkError(.expect(Tok.LBracket))

	if(.tok.type != Token.RBracket)
	{
	ret.append(#checkError(.parseValue()))

	while(.tok.type == Tok.Comma)
	{
	#checkError(.nextToken())
	ret.append(#checkError(.parseValue()))
	}
	}

	#checkError(.expect(Tok.RBracket))
	return Error.OK(JsonValue.Array(ret.finish()))
	}

	function Lexer.parsePair(): Error[(string, JsonValue)] // Tuple types?? maybe
	{
	let key = #checkError(.expect(Tok.String)).strval
	#checkError(.expect(Tok.Colon))
	let val = #checkError(.parseValue())
	return (key, val)
	}

	function Lexer.parseObject(): Error[JsonValue]
	{
	let h = Hash[string, JsonValue](.pool)

	#checkError(.expect(Tok.LBrace))

	if(.tok.type != Tok.RBrace)
	{
	let (key, val) = #checkError(.parsePair()) // deconstructing assignment is always fun
	h.insert(key, val)

	while(.tok.type == Tok.Comma)
	{
	#checkError(.nextToken())
	(key, val) = #checkError(.parsePair())
	h.insert(key, val)
	}
	}

	#checkError(.expect(Tok.RBrace))
	return Error.OK(JsonValue.Object(h))
	}