Created
December 19, 2022 03:57
-
-
Save brecert/768bcac33f9bb1ba68533d4d6a2377f5 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// deno-lint-ignore-file no-unused-vars prefer-const no-cond-assign no-empty | |
import { | |
assert, | |
assertEquals, | |
assertExists, | |
assertInstanceOf, | |
equal, | |
} from "https://deno.land/[email protected]/testing/asserts.ts"; | |
type LiteralT = | |
| ["Integer", bigint] | |
| ["String", string]; | |
type TokenT = | |
| { type: "Name" | "Symbol" | "Open" | "Close"; value: string } | |
| { type: "Separator"; value: undefined } | |
| { type: "Literal"; value: Literal }; | |
const isSeparator = (char: string) => /[\s;]/.test(char); | |
const isSymbol = (char: string) => /[~!@#$%^&*+=.\/`:<>?'-]/.test(char); | |
const isDigit = (char: string) => /[1-9]/.test(char); | |
const isAlpha = (char: string) => /[a-z]/.test(char); | |
// oop time!!!! | |
class Token { | |
constructor( | |
public readonly type: TokenT["type"], | |
public readonly value: TokenT["value"], | |
) {} | |
static Separator = () => new Token("Separator", undefined); | |
static Name = (name: string) => new Token("Name", name); | |
static Symbol = (symbol: string) => new Token("Symbol", symbol); | |
static Literal = (literal: Literal) => new Token("Literal", literal); | |
static Open = (delimiter: string) => new Token("Open", delimiter); | |
static Close = (delimiter: string) => new Token("Close", delimiter); | |
// Utility Literals | |
static Integer = (int: bigint) => Token.Literal(Literal.Integer(int)); | |
static String = (string: string) => Token.Literal(Literal.String(string)); | |
matches(other: Token) { | |
return other.type === this.type && | |
(this.type === "Literal" || this.value === other.value); | |
} | |
toString() { | |
return `Token.${this.type}(${this.value})`; | |
} | |
get [Symbol.toStringTag]() { | |
return `Token.${this.type}`; | |
} | |
} | |
class Literal { | |
constructor(public type: LiteralT["0"], public value: LiteralT["1"]) {} | |
static Integer = (int: bigint) => new Literal("Integer", int); | |
static String = (string: string) => new Literal("String", string); | |
matches(other: Literal) { | |
return other.type === this.type && other.value === this.value; | |
} | |
toString() { | |
return `Literal.${this.type}(${this.value})`; | |
} | |
get [Symbol.toStringTag]() { | |
return `Literal.${this.type}`; | |
} | |
} | |
// class Separator extends Token { | |
// constructor() { | |
// super('Separator') | |
// } | |
// } | |
// class Name extends Token { | |
// constructor(public name: string) { | |
// super('Name') | |
// } | |
// } | |
// class Symbol extends Token { | |
// constructor(public symbol: string) { | |
// super('Symbol') | |
// } | |
// } | |
// class Open extends Token { | |
// constructor(delimiter: string) { | |
// super('Open') | |
// } | |
// } | |
// class Close extends Token { | |
// constructor(delimiter: string) { | |
// super('Close') | |
// } | |
// } | |
// class Literal extends Token { | |
// constructor(public literal: Literal) { | |
// super('Literal') | |
// } | |
// } | |
class SeekingReader<T> { | |
#pos = 0; | |
seekingPos = 0; | |
get pos() { | |
return this.#pos; | |
} | |
set pos(pos: number) { | |
this.#pos = pos; | |
this.seekingPos = pos; | |
} | |
constructor(public list: T[]) {} | |
next() { | |
return this.list[this.pos++]; | |
} | |
*reading() { | |
let val: T; | |
while (val = this.next()) { | |
yield val; | |
} | |
} | |
seek(n = 0) { | |
let val = this.list[this.seekingPos]; | |
this.seekingPos += n; | |
return val; | |
} | |
*seeking(offset = 0, by = 1) { | |
let val: T; | |
this.seekingPos += offset; | |
while (val = this.seek(by)) { | |
yield val; | |
} | |
} | |
peek(n = 1) { | |
return this.list[this.pos + n - 1]; | |
} | |
} | |
class Lexer { | |
#pos = 0; | |
peekingPos = 0; | |
chars: string[]; | |
get pos() { | |
return this.#pos; | |
} | |
set pos(pos: number) { | |
this.#pos = pos; | |
this.peekingPos = pos; | |
} | |
constructor(source: string) { | |
this.chars = [...source]; | |
} | |
nextChar() { | |
return this.chars[this.pos++]; | |
} | |
*nextChars() { | |
let char: string; | |
while (char = this.nextChar()) { | |
yield char; | |
} | |
} | |
seekChar(n = 0) { | |
let char = this.chars[this.peekingPos]; | |
this.peekingPos += n; | |
return char; | |
} | |
*seekChars(offset = 0, by = 1) { | |
let char: string; | |
this.peekingPos += offset; | |
while (char = this.seekChar(by)) { | |
yield char; | |
} | |
} | |
skipSeparators() { | |
let len = 0; | |
for (let char of this.seekChars()) { | |
if (!isSeparator(char)) break; | |
len += 1; | |
} | |
this.pos += len; | |
} | |
*tokens(): Generator<Token> { | |
loop: | |
for (let char of this.seekChars(0, 0)) { | |
let len: number, token: Token; | |
switch (char) { | |
case " ": { | |
this.pos += 1; | |
continue; | |
} | |
case ";": | |
case "\n": { | |
this.skipSeparators(); | |
yield Token.Separator(); | |
continue loop; | |
} | |
case '"': { | |
let [len, token] = this.parseString(); | |
yield token; | |
this.pos += len; | |
break; | |
} | |
default: { | |
if (isDigit(char)) { | |
let [len, token] = this.parseInteger(); | |
yield token; | |
this.pos += len; | |
} else if (isAlpha(char)) { | |
let [len, token] = this.parseName(); | |
yield token; | |
this.pos += len; | |
} else if (isSymbol(char)) { | |
let [len, token] = this.parseSymbol(); | |
yield token; | |
this.pos += len; | |
} else { | |
throw new Error(`Invalid character '${char}' at ${this.pos}`); | |
} | |
} | |
} | |
} | |
} | |
parseSymbol(): [size: number, token: Token] { | |
let len = 0; | |
let symbol = ""; | |
for (let char of this.seekChars()) { | |
if (!isSymbol(char)) break; | |
len += 1; | |
symbol += char; | |
} | |
return [len, Token.Symbol(symbol)]; | |
} | |
parseName(): [size: number, token: Token] { | |
let len = 0; | |
let name = ""; | |
for (let char of this.seekChars()) { | |
if (!isAlpha(char)) break; | |
len += 1; | |
name += char; | |
} | |
return [len, Token.Name(name)]; | |
} | |
// todo: add radix | |
parseInteger(): [size: number, token: Token] { | |
let int = BigInt(0); | |
let len = 0; | |
for (let char of this.seekChars()) { | |
if (!isDigit(char)) break; | |
len += 1; | |
int = int * 10n + BigInt(char); | |
} | |
return [len, Token.Integer(int)]; | |
} | |
parseString(): [size: number, token: Token] { | |
let start = this.peekingPos; | |
let char: string; | |
let result = ""; | |
let len = 1; | |
for (let char of this.seekChars(1)) { | |
len += 1; | |
switch (char) { | |
case '"': { | |
return [len, Token.String(result)]; | |
} | |
default: { | |
result += char; | |
} | |
} | |
} | |
// todo: real syntax error + span not js lol | |
throw new SyntaxError(`string was not closed at ${start}`); | |
} | |
} | |
const Precedence = [ | |
"None", // | |
"AddSub", // +, - | |
"MulDiv", // *, / | |
]; | |
class BindingPower { | |
constructor(public lbp?: number | null, public rbp?: number | null) {} | |
static r(rbp: number) { | |
return new this(undefined, rbp); | |
} | |
static l(lbp: number) { | |
return new this(lbp, undefined); | |
} | |
static i(lbp: number, rbp: number) { | |
return new this(lbp, rbp); | |
} | |
} | |
const BP = (lbp?: number | null, rbp?: number | null) => | |
new BindingPower(lbp, rbp); | |
type BP = BindingPower; | |
// Prefix Pat <Expr> | |
// Infix: <Expr> Pat.. <Expr> | |
// Postfix <Expr> Pat | |
// ? | |
// these would be useful but I'll just pratt them | |
type TokenTrees = TokenTree[]; | |
type TokenTree = | |
| ["Block", TokenTrees] // { ... } | |
| ["List", TokenTrees] // [ ... ] | |
| ["Name", string] // name Name | |
| ["Symbol", string] // < + - | |
| ["Integer", number] // 123 0 | |
| ["String", string]; // "string with spaces" | |
type PatternElement = | |
| BindingPower | |
| Token; | |
type Pattern = PatternElement[]; | |
type PrefixFn = (token: Token) => Output; | |
type InfixFn = (token: Token, lhs: Output) => Output; | |
type Output = unknown; | |
function* filterFrom<T>( | |
iter: IterableIterator<T>, | |
predicate: (value: T) => unknown, | |
) { | |
for (let value of iter) { | |
if (predicate(value)) { | |
yield value; | |
} | |
} | |
} | |
function findIn<T>( | |
iter: IterableIterator<T>, | |
predicate: (value: T) => boolean, | |
) { | |
return filterFrom(iter, predicate).next().value; | |
} | |
type TokenPredicate = (tok: Token) => boolean; | |
class Parser { | |
tokens: SeekingReader<Token>; | |
patterns = { | |
prefix: new Map<Token | TokenPredicate, PrefixFn>(), | |
infix: new Map<Token, InfixFn>(), | |
}; | |
bindingPower = { | |
prefix: new Map<Token, BP>(), | |
infix: new Map<Token | TokenPredicate, BP>(), | |
}; | |
// todo: Generator<Token> | |
constructor(tokens: Token[]) { | |
this.tokens = new SeekingReader(tokens); | |
} | |
getPrefix(token: Token) { | |
let value = findIn( | |
this.patterns.prefix.entries(), | |
([tok]) => typeof tok === "function" ? tok(token) : token.matches(tok), | |
)?.[1](token); | |
assertExists(value, `Invalid prefix: ${token}`); | |
return value; | |
} | |
getInfix(token: Token, lhs: Output) { | |
let value = findIn( | |
this.patterns.infix.entries(), | |
([tok]) => token.matches(tok), | |
)?.[1](token, lhs); | |
assertExists(value, `Invalid infix: ${token}`); | |
return value; | |
} | |
infixBindingPower(token: Token | undefined) { | |
return findIn( | |
this.bindingPower.infix.entries(), | |
([tok]) => | |
typeof tok === "function" | |
? token && tok(token) || false | |
: token?.matches(tok) ?? false, | |
)?.[1]; | |
} | |
prefixBindingPower(token: Token | undefined) { | |
return findIn( | |
this.bindingPower.prefix.entries(), | |
([tok]) => token?.matches(tok) ?? false, | |
)?.[1]; | |
} | |
parse(mbp = 0) { | |
let token = this.tokens.next(); | |
if (token == null) { | |
// end | |
return; | |
} | |
let lhs = this.getPrefix(token); | |
while (true) { | |
let token = this.tokens.peek(); | |
// todo: check if valid infix/postfix start | |
// todo: error? | |
let ibp = this.infixBindingPower(token); | |
console.log("ibp", [this.tokens.pos], ibp, token); | |
if (ibp !== undefined) { | |
// infix requires lbp | |
if (ibp.lbp! < mbp) break; | |
this.tokens.next(); | |
lhs = this.getInfix(token, lhs); | |
// console.log(this.tokens.pos, token, lhs); | |
continue; | |
} else { | |
break; | |
} | |
} | |
return lhs; | |
} | |
// maybe should be not be part of the class | |
parsePattern(pattern: Pattern) { | |
return pattern.map((pat) => { | |
if (pat instanceof BindingPower) { | |
return this.parse(pat.rbp ?? 0); | |
} else { | |
let token = this.tokens.next(); | |
assertEquals( | |
token, | |
pat, | |
`unexpected ${token} at ${this.tokens.pos}. expected ${pat}`, | |
); | |
return token; | |
} | |
}); | |
} | |
} | |
// syn '+ b | |
// syn '- b | |
// syn 'is 'not b | |
// syn 'is b | |
// syn 'not b | |
// syn a '= b | |
// syn a '? b ': c | |
// syn a '> b | |
// syn a '< b | |
// syn a '+ b | |
// syn a '- b | |
// syn a '* b | |
// syn a '/ b | |
let text = ` | |
syn '+ a => "+{a}" | |
syn '- a => "-{a}" | |
syn a '+ b => "{a}+{b}" | |
syn a '- b => "{a}-{b}" | |
syn a '/ b => "{a}-{b}" | |
syn a '* b => "{a}-{b}" | |
syn a '** b => "{a}-{b}" | |
syn a '> b => "{a}>{b}" | |
syn a '< b => "{a}<{b}" | |
syn a '>= b => "{a}>={b}" | |
syn a '<= b => "{a}<={b}" | |
syn a '> b '> c => "{a} > {b} && {b} > {c}" | |
syn a '= b => "{a} = {b}" | |
syn 'if a 'then b 'else c => "" | |
syn a '? b ': c => if a then b else c | |
syn a 'is 'not b => "{a} = {b}" | |
1 > 2 | |
`; | |
let lexer = new Lexer(text); | |
let parser = new Parser([...lexer.tokens()]); | |
let bp = 0; | |
parser.bindingPower.prefix.set(Token.Separator(), BP(null, 0)); | |
parser.bindingPower.infix.set(Token.Separator(), BP(0, 0)); | |
// parser.bindingPower.prefix.set(name => ) | |
// parser.bindingPower.prefix.set(Token.Integer(0n), BP(null, 0)); | |
parser.patterns.prefix.set( | |
Token.Integer(0n), | |
(tok) => (tok.value as Literal).value!, | |
); | |
parser.bindingPower.prefix.set( | |
Token.Name("syn"), | |
BP(null, bp++), | |
); | |
parser.patterns.prefix.set( | |
Token.Separator(), | |
(tok) => { | |
return parser.parse(0)!; | |
}, | |
); | |
parser.patterns.infix.set( | |
Token.Separator(), | |
(tok, lhs) => { | |
return [lhs, parser.parse(1)]; | |
}, | |
); | |
parser.patterns.prefix.set( | |
Token.Name("syn"), | |
(tok) => { | |
let parts: Pattern = []; | |
let output: Output; | |
let substitutions: { at: number; symbol: string }[] = []; | |
let i = 0; | |
for (const tok of parser.tokens.reading()) { | |
if ((tok.matches(Token.Symbol("=>")))) { | |
// parser.tokens.pos -= 1; | |
// console.log("p", parser.parse(1)); | |
output = parser.parse(1); | |
break; | |
} | |
if (tok.matches(Token.Symbol("'"))) { | |
const next = parser.tokens.next(); | |
assert( | |
next.type === "Name" || next.type === "Symbol", | |
`Invalid token in pattern part at ${parser.tokens.pos}`, | |
); | |
parts.push(next); | |
} else if ( | |
tok.type === "Symbol" && (tok.value as string).startsWith("'") | |
) { | |
parts.push(Token.Symbol((tok.value as string).slice(1))); | |
} else if (tok.type === "Name") { | |
parts.push(BP(++bp, ++bp)); | |
substitutions.push({ at: i, symbol: tok.value as string }); | |
} else { | |
throw new Error(`Invalid pattern part ${tok} at ${parser.tokens.pos}`); | |
} | |
i += 1; | |
} | |
const [start, ...pattern] = parts; | |
if (start instanceof Token) { | |
parser.bindingPower.prefix.set( | |
start, | |
BP(null, ++bp), | |
); | |
parser.patterns.prefix.set(start, (_tok) => { | |
return [start.value, ...parser.parsePattern(pattern)]; | |
}); | |
} else if (start instanceof BindingPower) { | |
console.log({ start, pattern }); | |
assertInstanceOf(pattern[0], Token); | |
parser.bindingPower.infix.set( | |
pattern[0], | |
start, | |
); | |
parser.patterns.infix.set(pattern[0], (_tok, lhs) => { | |
let values = [0, lhs, ...parser.parsePattern(pattern.slice(1))]; | |
if (typeof output === "string") { | |
console.log(values) | |
return substitutions.reduce( | |
(prev, { at, symbol }) => | |
prev.replaceAll(`{${symbol}}`, values[at]!.toString()), | |
output, | |
); | |
} else { | |
return output; | |
} | |
for (let i = 0; i < parts.length; i++) { | |
// let part = parts[i] | |
// let value = values[i] | |
} | |
return [ | |
(pattern[0] as Token).value, | |
lhs, | |
parser.parsePattern(pattern.slice(1)), | |
]; | |
const parsed = parser.parsePattern(pattern.slice(1)); | |
// return (output as string).replaceAll("{a}", `${lhs}`) | |
}); | |
} | |
return ["syn", parts, output]; | |
}, | |
); | |
// after syn | |
parser.patterns.prefix.set( | |
(tok) => tok.type === "Name" && parser.prefixBindingPower(tok) == null, | |
(tok) => { | |
return tok.value; | |
}, | |
); | |
// parser.patterns.prefix.set( | |
// Token.Integer(0n), | |
// (tok) => ["int", tok.value], | |
// ); | |
// parser.bindingPower.infix.set( | |
// Token.Symbol("+"), | |
// new BindingPower(3, 4), | |
// ); | |
// parser.bindingPower.infix.set( | |
// Token.Symbol("?"), | |
// new BindingPower(0, 1) | |
// ) | |
// parser.patterns.infix.set( | |
// Token.Symbol("+"), | |
// (tok, lhs) => { | |
// const [rhs] = parser.parsePattern([BP(0, 1)]); | |
// return ["+", lhs, rhs]; | |
// }, | |
// ); | |
// parser.patterns.infix.set( | |
// Token.Symbol("?"), | |
// (tok, lhs) => { | |
// const [then_, _, else_] = parser.parsePattern([ | |
// BP(0, 1), | |
// Token.Symbol(":"), | |
// BP(2, 3), | |
// ]); | |
// return ["if", lhs, "then", then_, "else", else_]; | |
// }, | |
// ); | |
console.log(Deno.inspect(parser.parse(), { colors: true, depth: Infinity })); | |
// console.log(parser.parse()); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment