Created
July 12, 2023 04:15
-
-
Save niw/8ad9cadf7e3b6bb338e1edcf38f5664b to your computer and use it in GitHub Desktop.
Tiny SExpr Interpretor
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Foundation | |
extension String: Error {} | |
// MARK: - Evaluation | |
enum Atom { | |
case symbol(String) | |
case number(Double) | |
} | |
enum SExpr { | |
case atom(Atom) | |
case list(AnyCollection<SExpr>) | |
} | |
extension SExpr: ExpressibleByStringLiteral { | |
typealias StringLiteralType = String | |
init(stringLiteral value: String) { | |
self = .atom(.symbol(value)) | |
} | |
} | |
extension SExpr: ExpressibleByFloatLiteral { | |
typealias FloatLiteralType = Double | |
init(floatLiteral value: Double) { | |
self = .atom(.number(value)) | |
} | |
} | |
extension SExpr: ExpressibleByIntegerLiteral { | |
typealias IntegerLiteralType = Int | |
init(integerLiteral value: Int) { | |
self = .atom(.number(Double(value))) | |
} | |
} | |
extension SExpr: ExpressibleByArrayLiteral { | |
typealias ArrayLiteralElement = SExpr | |
init(arrayLiteral elements: SExpr...) { | |
self = .list(AnyCollection(elements)) | |
} | |
} | |
extension SExpr: CustomStringConvertible { | |
var description: String { | |
switch self { | |
case .atom(let atom): | |
"\(atom)" | |
case .list(let list): | |
"[\(list.map{ $0.description }.joined(separator: " "))]" | |
} | |
} | |
} | |
func eval(_ expr: SExpr) throws -> SExpr { | |
switch expr { | |
case .atom: | |
return expr | |
case .list(let list): | |
if case .atom(.symbol(let symbol)) = list.first { | |
return try apply(symbol, arguments: list.dropFirst()) | |
} else { | |
throw "Unexpected list \(expr)" | |
} | |
} | |
} | |
func apply(_ symbol: String, arguments: AnyCollection<SExpr>) throws -> SExpr { | |
let doubleArguments = try arguments.map { argument in | |
switch try eval(argument) { | |
case .atom(.number(let double)): | |
double | |
default: | |
throw "Unexpected type" | |
} | |
} | |
let op: (Double, Double) -> Double = switch symbol { | |
case "+": (+) | |
case "-": (-) | |
case "*": (*) | |
case "/": (/) | |
default: | |
throw "Unknown operator \(symbol)" | |
} | |
if let firstArgument = doubleArguments.first { | |
let restOfArguments = doubleArguments.dropFirst() | |
return .atom(.number(restOfArguments.reduce(firstArgument) { result, arg in | |
op(result, arg) | |
})) | |
} else { | |
throw "Unexpected arguments \(arguments)" | |
} | |
} | |
#if false | |
do { | |
print(try eval(["+", 1, ["*", 2.4, 3]])) | |
} catch { | |
print(error) | |
} | |
#endif | |
// MARK: - Parser Combinator | |
typealias Parser<Element, Output> = | |
(AnyCollection<Element>) throws -> (Output, AnyCollection<Element>) | |
func result<Element, Output>(_ output: Output) -> Parser<Element, Output> { | |
{ input in | |
(output, input) | |
} | |
} | |
func consume<Element>() -> Parser<Element, Element> { | |
{ input in | |
if let first = input.first { | |
return (first, AnyCollection(input.dropFirst())) | |
} else { | |
throw "Error" | |
} | |
} | |
} | |
func bind<Element, T, U>( | |
_ parser: @escaping Parser<Element, T>, | |
_ factory: @escaping (T) throws -> Parser<Element, U> | |
) -> Parser<Element, U> { | |
{ input in | |
let (output, remainder) = try parser(input) | |
let parser = try factory(output) | |
return try parser(remainder) | |
} | |
} | |
#if false | |
func parserA() -> Parser<Unicode.Scalar, Unicode.Scalar> { | |
bind(consume()) { output in | |
switch output { | |
case "a": | |
return result("a") | |
default: | |
throw "error" | |
} | |
} | |
} | |
#endif | |
func satisfy<Element>( | |
_ condition: @escaping (Element) -> Bool | |
) -> Parser<Element, Element> { | |
bind(consume()) { output in | |
if condition(output) { | |
return result(output) | |
} else { | |
throw "Not satisfied" | |
} | |
} | |
} | |
// Works only for a single character that can be expressed in a single Unicode scalar. | |
extension String { | |
var unicodeScalarParser: Parser<Unicode.Scalar, Unicode.Scalar> { | |
satisfy { $0 == self.unicodeScalars.first } | |
} | |
} | |
extension CharacterSet { | |
var parser: Parser<Unicode.Scalar, Unicode.Scalar> { | |
satisfy { character in | |
self.contains(character) | |
} | |
} | |
} | |
func or<Element, Output>( | |
_ lhs: @escaping Parser<Element, Output>, | |
_ rhs: @escaping Parser<Element, Output> | |
) -> Parser<Element, Output> { | |
{ input in | |
do { | |
return try lhs(input) | |
} catch { | |
return try rhs(input) | |
} | |
} | |
} | |
enum Either<T, U> { | |
case left(T) | |
case right(U) | |
} | |
func either<Element, T, U>( | |
_ lhs: @escaping Parser<Element, T>, | |
_ rhs: @escaping Parser<Element, U> | |
) -> Parser<Element, Either<T, U>> { | |
{ input in | |
do { | |
let (output, remaining) = try lhs(input) | |
return (.left(output), remaining) | |
} catch { | |
let (output, remaining) = try rhs(input) | |
return (.right(output), remaining) | |
} | |
} | |
} | |
func oneOrMore<Element, Output>( | |
_ parser: @escaping Parser<Element, Output> | |
) -> Parser<Element, AnyCollection<Output>> { | |
// this should be loop instead. | |
func recursive(_ buffer: [Output]) -> Parser<Element, AnyCollection<Output>> { | |
bind(parser) { output in | |
var buf = buffer | |
buf.append(output) | |
return or(recursive(buf), result(AnyCollection(buf))) | |
} | |
} | |
return recursive([]) | |
} | |
func zeroOrMore<Element, Output>( | |
_ parser: @escaping Parser<Element, Output> | |
) -> Parser<Element, AnyCollection<Output>> { | |
or(oneOrMore(parser), result(AnyCollection([]))) | |
} | |
#if false | |
do { | |
let input = AnyCollection("!123$".unicodeScalars) | |
let parser = zeroOrMore(or(CharacterSet.alphanumerics.parser, CharacterSet.symbols.parser)) | |
let (output, remaining) = try parser(input) | |
print(Array(output)) | |
print(Array(remaining)) | |
} catch { | |
print(error) | |
} | |
#endif | |
// MARK: - Tokenizer | |
enum SExprToken: Equatable { | |
case leftParentheses | |
case rightParentheses | |
case symbol(String) | |
case number(Double) | |
} | |
extension SExprToken: CustomStringConvertible { | |
var description: String { | |
switch self { | |
case .leftParentheses: | |
"(" | |
case .rightParentheses: | |
")" | |
case .symbol(let string): | |
string | |
case .number(let number): | |
"\(number)" | |
} | |
} | |
} | |
typealias SExprTokenParser = Parser<Unicode.Scalar, SExprToken> | |
let leftParenthesesParser: SExprTokenParser = bind("(".unicodeScalarParser) { _ in | |
result(.leftParentheses) | |
} | |
let rightParenthesesParser: SExprTokenParser = bind(")".unicodeScalarParser) { _ in | |
result(.rightParentheses) | |
} | |
let symbolParser: SExprTokenParser = bind(CharacterSet(charactersIn: "+-*/").parser) { character in | |
result(.symbol(String(character))) | |
} | |
let numberParser: SExprTokenParser = bind(oneOrMore(CharacterSet.decimalDigits.parser)) { digits in | |
let digitsString = String(String.UnicodeScalarView(digits)) | |
guard let double = Double(digitsString) else { | |
throw "\(digitsString) is not a number" | |
} | |
return result(.number(double)) | |
} | |
typealias SExprTokenizer = Parser<Unicode.Scalar, AnyCollection<SExprToken>> | |
func ignoreWhiteSpaces<Output>( | |
then parser: @escaping Parser<Unicode.Scalar, Output> | |
) -> Parser<Unicode.Scalar, Output> { | |
bind(zeroOrMore(CharacterSet.whitespacesAndNewlines.parser)) { _ in | |
parser | |
} | |
} | |
let sexprTokenizer: SExprTokenizer = oneOrMore( | |
ignoreWhiteSpaces(then: | |
or(leftParenthesesParser, or(rightParenthesesParser, or(symbolParser, numberParser))))) | |
#if false | |
do { | |
let source = "(+ 1 (* 2 3))" | |
let (tokens, remaining) = try sexprTokenizer(AnyCollection(source.unicodeScalars)) | |
print(Array(tokens)) | |
print(Array(remaining)) | |
} catch { | |
print(error) | |
} | |
#endif | |
// MARK: - Parser | |
typealias SExprParser = Parser<SExprToken, SExpr> | |
extension SExprToken { | |
var sexpr: SExpr? { | |
switch self { | |
case .symbol(let symbol): | |
.atom(.symbol(symbol)) | |
case .number(let number): | |
.atom(.number(number)) | |
default: | |
nil | |
} | |
} | |
} | |
func parentheses<Output>( | |
_ factory: @escaping () -> Parser<SExprToken, Output> | |
) -> Parser<SExprToken, Output> { | |
bind(satisfy { $0 == .leftParentheses }) { _ in | |
bind(factory()) { output in | |
bind(satisfy { $0 == .rightParentheses }) { _ in | |
result(output) | |
} | |
} | |
} | |
} | |
let atomParser: SExprParser = bind(consume()) { token in | |
guard let sexpr = token.sexpr else { | |
throw "Not atom" | |
} | |
return result(sexpr) | |
} | |
let sexprParser = bind(parentheses { zeroOrMore(or(sexprParser, atomParser)) }) { exprs in | |
result(.list(exprs)) | |
} | |
do { | |
let source = "(+ 1 (* 2 3) (* 4 5))" | |
let (tokens, _) = try sexprTokenizer(AnyCollection(source.unicodeScalars)) | |
print(Array(tokens)) | |
let (sexpr, _) = try sexprParser(tokens) | |
print(sexpr) | |
print(try eval(sexpr)) | |
} catch { | |
print(error) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment