Created
July 13, 2023 06:29
-
-
Save niw/9a6c29b1b42e7931a97f6506dcd118a5 to your computer and use it in GitHub Desktop.
Simple parser combinator, lexer, paresr and evaluator for double value math for simple logic configuration
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Foundation | |
extension String: Error {} | |
// MARK: - Parser Combinator | |
typealias ParserFunction<Element, Output> = | |
(any Collection<Element>) throws -> (Output, any Collection<Element>) | |
protocol Parser<Element, Output> { | |
associatedtype Element | |
associatedtype Output | |
func apply(_ input: some Collection<Element>) throws -> (Output, any Collection<Element>) | |
} | |
struct FunctionParser<Element, Output>: Parser { | |
var function: ParserFunction<Element, Output> | |
func apply(_ input: some Collection<Element>) throws -> (Output, any Collection<Element>) { | |
try function(input) | |
} | |
} | |
func parser<Element, Output>( | |
_ function: @escaping ParserFunction<Element, Output> | |
) -> some Parser<Element, Output> { | |
FunctionParser(function: function) | |
} | |
struct ResultParser<Element, Output>: Parser { | |
var output: Output | |
func apply(_ input: some Collection<Element>) throws -> (Output, any Collection<Element>) { | |
(output, input) | |
} | |
} | |
func result<Element, Output>(_ output: Output) -> some Parser<Element, Output> { | |
ResultParser(output: output) | |
} | |
extension Collection { | |
var tail: any Collection<Element> { | |
dropFirst() | |
} | |
} | |
struct ConsumeParser<Element>: Parser { | |
func apply(_ input: some Collection<Element>) throws -> (Element, any Collection<Element>) { | |
guard let element = input.first else { | |
throw "No element." | |
} | |
return (element, input.tail) | |
} | |
} | |
func consume<Element>() -> some Parser<Element, Element> { | |
ConsumeParser() | |
} | |
extension Parser { | |
func bind<T>(_ factory: @escaping (Output) throws -> some Parser<Element, T>) -> some Parser<Element, T> { | |
parser { input in | |
let (output, remaining) = try apply(input) | |
let parser = try factory(output) | |
return try parser.apply(remaining) | |
} | |
} | |
func satisfy(_ condition: @escaping (Output) throws -> Bool) -> some Parser<Element, Output> { | |
bind { output in | |
guard try condition(output) else { | |
throw "Not satisfied." | |
} | |
// Type can't be inferred here if it is `result(output)` somehow. | |
return ResultParser<Element, Output>(output: output) | |
} | |
} | |
func map<T>(_ transform: @escaping (Output) throws -> T) -> some Parser<Element, T> { | |
bind { output in | |
result(try transform(output)) | |
} | |
} | |
func or(_ other: @escaping () -> some Parser<Element, Output>) -> some Parser<Element, Output> { | |
parser { input in | |
do { | |
return try apply(input) | |
} catch { | |
return try other().apply(input) | |
} | |
} | |
} | |
} | |
extension Parser { | |
typealias RepeatParser = Parser<Element, any Collection<Output>> | |
var one: some RepeatParser { | |
map { output in | |
[output] | |
} | |
} | |
var zeroOrOne: some RepeatParser { | |
one.or { result([]) } | |
} | |
var zeroOrMore: some RepeatParser { | |
parser { input in | |
var outputs = [Output]() | |
var nextInput = input | |
do { | |
while true { | |
let (output, remaining) = try apply(nextInput) | |
outputs.append(output) | |
nextInput = remaining | |
} | |
} catch { | |
} | |
return (outputs, nextInput) | |
} | |
} | |
var oneOrMore: some RepeatParser { | |
one.bind { first in | |
zeroOrMore.bind { others in | |
result(Array(first) + Array(others)) | |
} | |
} | |
} | |
} | |
extension CharacterSet { | |
var parser: some Parser<Unicode.Scalar, Unicode.Scalar> { | |
consume().satisfy { unicodeScalar in | |
contains(unicodeScalar) | |
} | |
} | |
} | |
extension String { | |
var characterSet: CharacterSet { | |
CharacterSet(charactersIn: self) | |
} | |
} | |
extension Unicode.Scalar { | |
var parser: some Parser<Unicode.Scalar, Unicode.Scalar> { | |
consume().satisfy { unicodeScalar in | |
unicodeScalar == self | |
} | |
} | |
} | |
func or<Element, Output>(_ parsers: any Parser<Element, Output>...) -> some Parser<Element, Output> { | |
parser { input in | |
for parser in parsers { | |
do { | |
return try parser.apply(input) | |
} catch { | |
} | |
} | |
throw "Nothing matched" | |
} | |
} | |
func sequence<Element, Output>(_ parsers: any Parser<Element, any Collection<Output>>...) -> some Parser<Element, any Collection<Output>> { | |
parser { input in | |
var outputs = [Output]() | |
var nextInput = input | |
for parser in parsers { | |
let (output, remaining) = try parser.apply(nextInput) | |
outputs.append(contentsOf: output) | |
nextInput = remaining | |
} | |
return (outputs, nextInput) | |
} | |
} | |
// MARK: - Tokenizer | |
enum Token: Equatable { | |
case name(String) | |
case number(Double) | |
case plusOperator | |
case minusOperator | |
case multiplyOperator | |
case divideOperator | |
case leftParentheses | |
case rightParentheses | |
} | |
typealias TokenParser = Parser<Unicode.Scalar, Token> | |
let sign = "+-".characterSet.parser | |
let exponentialIndicator = "eE".characterSet.parser | |
let digit = CharacterSet.decimalDigits.parser | |
let zeroDigit = "0".unicodeScalars.first!.parser | |
let nonZeroDigit = "123456789".characterSet.parser | |
let negativeSign = "-".unicodeScalars.first!.parser | |
let period = ".".unicodeScalars.first!.parser | |
let exponentialPart = sequence( | |
exponentialIndicator.one, | |
sign.zeroOrOne, | |
digit.oneOrMore | |
) | |
let fractionPart = sequence(period.one, digit.oneOrMore) | |
let integerPart = or( | |
sequence(negativeSign.zeroOrOne, nonZeroDigit.one, digit.zeroOrMore), | |
sequence(negativeSign.zeroOrOne, zeroDigit.one) | |
) | |
let floatPart = or( | |
sequence(integerPart, fractionPart, exponentialPart), | |
sequence(integerPart, fractionPart), | |
sequence(integerPart, exponentialPart) | |
) | |
let number = or ( | |
floatPart, | |
integerPart | |
) | |
let numberToken: some TokenParser = number.bind { output in | |
guard let double = Double(String(String.UnicodeScalarView(output))) else { | |
throw "Not number expression" | |
} | |
return ResultParser<Unicode.Scalar, Token>(output: .number(double)) | |
} | |
let namePartPrefixCharacter = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_".characterSet.parser | |
let namePartCharacter = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_".characterSet.parser | |
let namePart = sequence(namePartPrefixCharacter.one, namePartCharacter.zeroOrMore) | |
let name = or( | |
// Not using `sequence` due to recursive `name` call. | |
namePart.bind { a in | |
period.one.bind { b in | |
name.bind { c in | |
result(Array(a) + Array(b) + Array(c)) | |
} | |
} | |
}, | |
namePart | |
) | |
let nameToken: some TokenParser = name.bind { output in | |
ResultParser<Unicode.Scalar, Token>(output: .name(String(String.UnicodeScalarView(output)))) | |
} | |
let leftParenthesesToken: some TokenParser = "(".unicodeScalars.first!.parser.bind { _ in | |
result(.leftParentheses) | |
} | |
let rightParenthesesToken: some TokenParser = ")".unicodeScalars.first!.parser.bind { _ in | |
result(.rightParentheses) | |
} | |
let plusOperatorToken: some TokenParser = "+".unicodeScalars.first!.parser.bind { _ in | |
result(.plusOperator) | |
} | |
let minusOperatorToken: some TokenParser = "-".unicodeScalars.first!.parser.bind { _ in | |
result(.minusOperator) | |
} | |
let multiplyOperatorToken: some TokenParser = "*".unicodeScalars.first!.parser.bind { _ in | |
result(.multiplyOperator) | |
} | |
let divideOperatorToken: some TokenParser = "/".unicodeScalars.first!.parser.bind { _ in | |
result(.divideOperator) | |
} | |
let whitespace = CharacterSet.whitespacesAndNewlines.parser | |
func ignoreWhiteSpaces<Output>(then parser: @autoclosure @escaping () -> some Parser<Unicode.Scalar, Output>) -> some Parser<Unicode.Scalar, Output> { | |
whitespace.zeroOrMore.bind { _ in | |
parser() | |
} | |
} | |
let tokenizer: some Parser<Unicode.Scalar, any Collection<Token>> = or( | |
ignoreWhiteSpaces(then: leftParenthesesToken), | |
ignoreWhiteSpaces(then: rightParenthesesToken), | |
ignoreWhiteSpaces(then: plusOperatorToken), | |
ignoreWhiteSpaces(then: minusOperatorToken), | |
ignoreWhiteSpaces(then: multiplyOperatorToken), | |
ignoreWhiteSpaces(then: divideOperatorToken), | |
ignoreWhiteSpaces(then: nameToken), | |
ignoreWhiteSpaces(then: numberToken) | |
).oneOrMore | |
// MARK: - Parser | |
enum Expression { | |
case symbol(String) | |
case number(Double) | |
indirect case add(Expression, Expression) | |
indirect case subtract(Expression, Expression) | |
indirect case multiply(Expression, Expression) | |
indirect case divide(Expression, Expression) | |
} | |
typealias ExpressionParser = Parser<Token, Expression> | |
extension Token { | |
var parser: some Parser<Token, Token> { | |
consume().satisfy { $0 == self } | |
} | |
} | |
let _factor1: some ExpressionParser = Token.leftParentheses.parser.bind { _ in | |
expression.bind { output in | |
Token.rightParentheses.parser.bind { _ in | |
result(output) | |
} | |
} | |
} | |
let _factor2: some ExpressionParser = consume().bind { token in | |
switch token { | |
case .name(let name): | |
ResultParser<Token, Expression>(output: .symbol(name)) | |
case .number(let number): | |
ResultParser<Token, Expression>(output: .number(number)) | |
default: | |
throw "Not a name or a number" | |
} | |
} | |
let factor: some ExpressionParser = or(_factor1, _factor2) | |
let term: some ExpressionParser = or( | |
factor.bind { left in | |
or( | |
Token.multiplyOperator.parser.bind { _ in | |
term.bind { right in | |
ResultParser<Token, Expression>(output: .multiply(left, right)) | |
} | |
}, | |
Token.divideOperator.parser.bind { _ in | |
term.bind { right in | |
ResultParser<Token, Expression>(output: .divide(left, right)) | |
} | |
} | |
) | |
}, | |
factor | |
) | |
let expression: some ExpressionParser = or( | |
term.bind { left in | |
or( | |
Token.plusOperator.parser.bind { _ in | |
expression.bind { right in | |
ResultParser<Token, Expression>(output: .add(left, right)) | |
} | |
}, | |
Token.minusOperator.parser.bind { _ in | |
expression.bind { right in | |
ResultParser<Token, Expression>(output: .subtract(left, right)) | |
} | |
} | |
) | |
}, | |
term | |
) | |
// MARK: - Evaluator | |
let variables: [String: Double] = [ | |
"cat.a": 3, | |
"cat.b": 40, | |
"dog.a": 10, | |
"dog.b": 20 | |
] | |
func eval(_ expression: Expression) throws -> Double { | |
switch expression { | |
case .symbol(let symbol): | |
guard let value = variables[symbol] else { | |
throw "Unknown variable \(symbol)" | |
} | |
return value | |
case .number(let number): | |
return number | |
case .add(let left, let right): | |
return try eval(left) + eval(right) | |
case .subtract(let left, let right): | |
return try eval(left) - eval(right) | |
case .multiply(let left, let right): | |
return try eval(left) * eval(right) | |
case .divide(let left, let right): | |
return try eval(left) / eval(right) | |
} | |
} | |
do { | |
let (tokens, remaining_unicode_scalars) = try tokenizer.apply("cat.a + cat.b * 6 / 8)".unicodeScalars) | |
print(tokens) | |
print(Array(remaining_unicode_scalars)) | |
let (expression, remaining_tokens) = try expression.apply(tokens) | |
print(expression) | |
print(Array(remaining_tokens)) | |
print(try eval(expression)) | |
} catch { | |
print(error) | |
} | |
// MARK: - Grammer | |
/* | |
// Parser | |
EXPRESSION: | |
term PLUS_OPERATOR expression | |
| term MINUS_OPERATOR expression | |
| term | |
TERM: | |
FACTOR MULTIPLY_OPERATOR TERM | |
| FACTOR DIVIDE_OPERATOR TERM | |
| FACTOR | |
FACTOR: | |
'(' EXPRESSION ')' | |
| NUMBER | |
| NAME | |
; | |
// Tokenizer | |
NAME: | |
NAME_PART | |
| NAME_PART '.' NAME | |
fragment NAME_PART: | |
[_A-Za-z] [_0-9A-Za-z]; | |
PLUS_OPERATOR: | |
'+'; | |
MINUS_OPERATOR: | |
'-'; | |
MULTIPLY_OPERATOR: | |
'*'; | |
DIVIDE_OPERATOR: | |
'/'; | |
NUMBER: | |
INTEGER_PART | |
| FLOAT_PART | |
; | |
fragment INTEGER_PART: | |
NEGATIVE_SIGN? '0' | |
| NEGATIVE_SIGN? NON_ZERO_DIGIT DIGIT* | |
; | |
fragment FLOAT_PART: | |
INTEGER_PART FRACTIONAL_PART | |
| INTEGER_PART EXPONENTIAL_PART | |
| INTEGER_PART FRACTIONAL_PART EXPONENTIAL_PART | |
; | |
fragment FRACTIONAL_PART: | |
'.' DIGIT+; | |
fragment EXPONENTIAL_PART: | |
EXPONENT_INDICATOR SIGN? DIGIT+; | |
fragment NEGATIVE_SIGN: | |
'-'; | |
fragment DIGIT: | |
[0-9]; | |
fragment NON_ZERO_DIGIT: | |
[1-9]; | |
fragment EXPONENT_INDICATOR: | |
[eE]; | |
fragment SIGN: | |
[+-]; | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment