hectr · November 28, 2024 00:27
diff --git a/PrologParser.swift b/PrologParser.swift
 // Derived from https://github.com/photonlines/Python-Prolog-Interpreter

 import Foundation

 // MARK: - Primitives

 /// Rules are used to define relationships between facts and other rules.They
 /// allow us to make conditional statements about our world. Let's say we want to say
 /// that all humans are mortal. We can do so using the rule below: mortal(X) :-
 /// human(X).
 public struct RuleDescription: Hashable, Sendable {
    public let head: Structure
    public let tail: TermDescription
 }

 /// Prolog has only one data type — the term.
 ///
 /// The simplest term is an atom. Atoms can be combined to form compound terms.
 /// Example: are_friends(mark, michael) is a compound term where are_friends is
 /// called a functor and mark and michael are arguments.
 public enum TermDescription: Hashable, Sendable {
    case atom(AtomDescription)
    case conjunction(Conjunction)
    case structure(Structure)
    case variable(name: String)
 }

 public struct Structure: Hashable, Sendable {
    public let arguments: [TermDescription]
    public let functor: String
 }

 public enum AtomDescription: Hashable, Sendable {
    case `true`
    case functor(String)
 }

 public struct Conjunction: Hashable, Sendable {
    public let arguments: [TermDescription]
 }

 // MARK: - Parser

 let TOKEN_REGEX = "[A-Za-z0-9_]+|:\\-|[()\\.,]"
 let ATOM_NAME_REGEX = "^[A-Za-z0-9_]+$"
 let VARIABLE_REGEX = "^[A-Z_][A-Za-z0-9_]*$"

 /// Regex to parse comment strings. The first group captures quoted strings (
 /// double and single). The second group captures regular comments ('%' for
 /// single-line or '/* */' for multi-line).
 let COMMENT_REGEX = "(\\\".*?\\\"|\\'.*?\\')|(/\\*.*?\\*/|%[^\\r\\n]*$)"

 /// Return the input text string with all of the comments removed from it.
 func remove_comments(input_text: String) throws -> String {
    // Create a regular expression Pattern object we can use to find and strip out
    // comments. The MULTILINE flag tells Python to treat each line in the string
    // separately, while the DOTALL flag indicates that we can match patterns
    // which span multiple lines (so our multi-line comments '/* */' can  be
    // processed)

    let regex = try NSRegularExpression(pattern: COMMENT_REGEX, options: [.anchorsMatchLines, .dotMatchesLineSeparators])

    var offset = 0
    let output_text = NSMutableString(string: input_text)
    regex.enumerateMatches(
        in: input_text,
        options: [],
        range: NSRange(location: 0, length: input_text.count),
        using: { result, _, _ in
            guard let result = result else {
                return
            }
            // If we found a match for our 2nd group, it is a comment, so we remove
            if result.numberOfRanges >= 3 {
                let range = result.range(at: 2)
                if range.location != NSNotFound {
                    output_text.deleteCharacters(
                        in: NSRange(
                            location: range.location - offset,
                            length: range.length
                        )
                    )
                    offset += range.length
                }
            }
            // Otherwise, we found a quoted string containing a comment, so we leave
            // it in
        })
    return output_text as String
 }

 /// Convert the input text into a list of tokens we can iterate over / process
 func parse_tokens_from_string(input_text: String) throws -> [String] {
    let regex = try NSRegularExpression(pattern: TOKEN_REGEX, options: [])
    let code = try remove_comments(input_text: input_text)
    let matches = regex.matches(
        in: code,
        options: [],
        range: NSRange(location: 0, length: code.count)
    )
    let tokens = matches.map { result in
        (code as NSString).substring(with: result.range)
    }
    return tokens
 }

 /// NOTE: Instance can only be used once!
 public final class PrologParser {
    typealias VariableName = String

    private var _tokens: [String]
    private var _scope: [VariableName: TermDescription]

    public init(input_text: String) throws {
        self._tokens = try parse_tokens_from_string(input_text: input_text)
        self._scope = [:]
    }

    public func parse_rules() throws -> [RuleDescription] {
        var rules = [RuleDescription]()
        while !_tokens.isEmpty {
            _scope = [:]
            rules.append(try _parse_rule())
        }
        return rules
    }

    public func parse_query() throws -> TermDescription {
        _scope = [:]
        return try _parse_term()
    }

    var _current: String! {
        _tokens.first
    }

    func _pop_current() -> String {
        _tokens.removeFirst() // FIXME: this can crash
    }

    func _parse_atom() throws -> String {
        let name = _pop_current()
        let regex = try NSRegularExpression(
            pattern: ATOM_NAME_REGEX,
            options: []
        )
        guard let _regex = regex.firstMatch(
            in: name,
            options: [],
            range: NSRange(location: 0, length: name.count)
        ), _regex.range.location != NSNotFound
        else {
            throw NSError(
                domain: #file,
                code: #line,
                userInfo: [NSLocalizedDescriptionKey: "Invalid Atom Name: " + name]
            )
        }
        return name
    }

    func _parse_term() throws -> TermDescription {
        // If we encounter an opening parenthesis, we know we're dealing with a
        // conjunction, so we process the list of arguments until we hit a closing
        // parenthesis and return the conjunction object.
        if _current == "(" {
            _ = _pop_current()
            let arguments = try _parse_arguments()
            return .conjunction(Conjunction(arguments: arguments))
        }

        let functor = try _parse_atom()

        // If we have a matching variable, we make sure that variables with the same
        // name within a rule always use one variable object (with the exception of
        // the anonymous '_' variable object).
        let regex = try NSRegularExpression(
            pattern: VARIABLE_REGEX,
            options: []
        )
        if let _regex = regex.firstMatch(
            in: functor,
            options: []
            , range: NSRange(location: 0, length: functor.count)
        ), _regex.range.location != NSNotFound {
            guard functor != "_" else {
                return .variable(name: "_")
            }
            guard let variable = _scope[functor] else {
                let variable = TermDescription.variable(name: functor)
                _scope[functor] = variable
                return variable
            }
            return variable
        } else {
            // If there are no arguments to process, return an atom. Atoms are processed
            // as terms without arguments.
            guard _current == "(" else {
                return .atom(.functor(functor))
            }
            _ = _pop_current()
            let arguments = try _parse_arguments()
            return .structure(Structure(arguments: arguments, functor: functor))
        }
    }

    func _parse_arguments() throws -> [TermDescription] {
        var arguments = [TermDescription]()
        // Keep adding the arguments to our list until we encounter an ending
        // parenthesis ')'
        while _current != ")" {
            arguments.append(try _parse_term())
            if _current != "," && _current != ")" {
                throw NSError(
                    domain: #file,
                    code: #line,
                    userInfo: [NSLocalizedDescriptionKey: "Expected , or ) in term but got " + _current]
                )
            }
            if _current == "," {
                _ = _pop_current()
            }
        }
        _ = _pop_current()
        return arguments
    }

    func _parse_rule() throws -> RuleDescription {
        let term = try _parse_term()
        guard case .structure(let head) = term else {
            throw NSError(
                domain: #file,
                code: #line,
                userInfo: [NSLocalizedDescriptionKey: "Expected structure but got " + String(describing: term)]
            )
        }

        if _current == "." {
            _ = _pop_current()
            // We process facts as rules with the tail set to true:
            return RuleDescription(head: head, tail: .atom(.true))
        }

        guard _current == ":-" else {
            throw NSError(
                domain: #file,
                code: #line,
                userInfo: [NSLocalizedDescriptionKey: "Expected :- in rule but got " + _current]
            )
        }

        _ = _pop_current()

        // Process the rule arguments
        var arguments = [TermDescription]()

        while _current != "." {
            arguments.append(try _parse_term())

            if _current != "," && _current != "." {
                throw NSError(
                    domain: #file,
                    code: #line,
                    userInfo: [NSLocalizedDescriptionKey: "Expected , or . in term but got " + _current]
                )
            }
            if _current == "," {
                _ = _pop_current()
            }
        }
        _ = _pop_current()

        // If we have more than one argument, we return a conjunction, otherwise,
        // we process the item as a regular rule containing a head and a tail

        let tail = arguments.count == 1 ? arguments[0] : .conjunction(Conjunction(arguments: arguments))
        return RuleDescription(head: head, tail: tail)
    }
 }
	// Derived from https://github.com/photonlines/Python-Prolog-Interpreter

	import Foundation

	// MARK: - Primitives

	/// Rules are used to define relationships between facts and other rules.They
	/// allow us to make conditional statements about our world. Let's say we want to say
	/// that all humans are mortal. We can do so using the rule below: mortal(X) :-
	/// human(X).
	public struct RuleDescription: Hashable, Sendable {
	public let head: Structure
	public let tail: TermDescription
	}

	/// Prolog has only one data type — the term.
	///
	/// The simplest term is an atom. Atoms can be combined to form compound terms.
	/// Example: are_friends(mark, michael) is a compound term where are_friends is
	/// called a functor and mark and michael are arguments.
	public enum TermDescription: Hashable, Sendable {
	case atom(AtomDescription)
	case conjunction(Conjunction)
	case structure(Structure)
	case variable(name: String)
	}

	public struct Structure: Hashable, Sendable {
	public let arguments: [TermDescription]
	public let functor: String
	}

	public enum AtomDescription: Hashable, Sendable {
	case `true`
	case functor(String)
	}

	public struct Conjunction: Hashable, Sendable {
	public let arguments: [TermDescription]
	}

	// MARK: - Parser

	let TOKEN_REGEX = "[A-Za-z0-9_]+\|:\\-\|[()\\.,]"
	let ATOM_NAME_REGEX = "^[A-Za-z0-9_]+$"
	let VARIABLE_REGEX = "^[A-Z_][A-Za-z0-9_]*$"

	/// Regex to parse comment strings. The first group captures quoted strings (
	/// double and single). The second group captures regular comments ('%' for
	/// single-line or '/* */' for multi-line).
	let COMMENT_REGEX = "(\\\".?\\\"\|\\'.?\\')\|(/\\.?\\/\|%[^\\r\\n]$)"

	/// Return the input text string with all of the comments removed from it.
	func remove_comments(input_text: String) throws -> String {
	// Create a regular expression Pattern object we can use to find and strip out
	// comments. The MULTILINE flag tells Python to treat each line in the string
	// separately, while the DOTALL flag indicates that we can match patterns
	// which span multiple lines (so our multi-line comments '/* */' can be
	// processed)

	let regex = try NSRegularExpression(pattern: COMMENT_REGEX, options: [.anchorsMatchLines, .dotMatchesLineSeparators])

	var offset = 0
	let output_text = NSMutableString(string: input_text)
	regex.enumerateMatches(
	in: input_text,
	options: [],
	range: NSRange(location: 0, length: input_text.count),
	using: { result, _, _ in
	guard let result = result else {
	return
	}
	// If we found a match for our 2nd group, it is a comment, so we remove
	if result.numberOfRanges >= 3 {
	let range = result.range(at: 2)
	if range.location != NSNotFound {
	output_text.deleteCharacters(
	in: NSRange(
	location: range.location - offset,
	length: range.length
	)
	)
	offset += range.length
	}
	}
	// Otherwise, we found a quoted string containing a comment, so we leave
	// it in
	})
	return output_text as String
	}

	/// Convert the input text into a list of tokens we can iterate over / process
	func parse_tokens_from_string(input_text: String) throws -> [String] {
	let regex = try NSRegularExpression(pattern: TOKEN_REGEX, options: [])
	let code = try remove_comments(input_text: input_text)
	let matches = regex.matches(
	in: code,
	options: [],
	range: NSRange(location: 0, length: code.count)
	)
	let tokens = matches.map { result in
	(code as NSString).substring(with: result.range)
	}
	return tokens
	}

	/// NOTE: Instance can only be used once!
	public final class PrologParser {
	typealias VariableName = String

	private var _tokens: [String]
	private var _scope: [VariableName: TermDescription]

	public init(input_text: String) throws {
	self._tokens = try parse_tokens_from_string(input_text: input_text)
	self._scope = [:]
	}

	public func parse_rules() throws -> [RuleDescription] {
	var rules = [RuleDescription]()
	while !_tokens.isEmpty {
	_scope = [:]
	rules.append(try _parse_rule())
	}
	return rules
	}

	public func parse_query() throws -> TermDescription {
	_scope = [:]
	return try _parse_term()
	}

	var _current: String! {
	_tokens.first
	}

	func _pop_current() -> String {
	_tokens.removeFirst() // FIXME: this can crash
	}

	func _parse_atom() throws -> String {
	let name = _pop_current()
	let regex = try NSRegularExpression(
	pattern: ATOM_NAME_REGEX,
	options: []
	)
	guard let _regex = regex.firstMatch(
	in: name,
	options: [],
	range: NSRange(location: 0, length: name.count)
	), _regex.range.location != NSNotFound
	else {
	throw NSError(
	domain: #file,
	code: #line,
	userInfo: [NSLocalizedDescriptionKey: "Invalid Atom Name: " + name]
	)
	}
	return name
	}

	func _parse_term() throws -> TermDescription {
	// If we encounter an opening parenthesis, we know we're dealing with a
	// conjunction, so we process the list of arguments until we hit a closing
	// parenthesis and return the conjunction object.
	if _current == "(" {
	_ = _pop_current()
	let arguments = try _parse_arguments()
	return .conjunction(Conjunction(arguments: arguments))
	}

	let functor = try _parse_atom()

	// If we have a matching variable, we make sure that variables with the same
	// name within a rule always use one variable object (with the exception of
	// the anonymous '_' variable object).
	let regex = try NSRegularExpression(
	pattern: VARIABLE_REGEX,
	options: []
	)
	if let _regex = regex.firstMatch(
	in: functor,
	options: []
	, range: NSRange(location: 0, length: functor.count)
	), _regex.range.location != NSNotFound {
	guard functor != "_" else {
	return .variable(name: "_")
	}
	guard let variable = _scope[functor] else {
	let variable = TermDescription.variable(name: functor)
	_scope[functor] = variable
	return variable
	}
	return variable
	} else {
	// If there are no arguments to process, return an atom. Atoms are processed
	// as terms without arguments.
	guard _current == "(" else {
	return .atom(.functor(functor))
	}
	_ = _pop_current()
	let arguments = try _parse_arguments()
	return .structure(Structure(arguments: arguments, functor: functor))
	}
	}

	func _parse_arguments() throws -> [TermDescription] {
	var arguments = [TermDescription]()
	// Keep adding the arguments to our list until we encounter an ending
	// parenthesis ')'
	while _current != ")" {
	arguments.append(try _parse_term())
	if _current != "," && _current != ")" {
	throw NSError(
	domain: #file,
	code: #line,
	userInfo: [NSLocalizedDescriptionKey: "Expected , or ) in term but got " + _current]
	)
	}
	if _current == "," {
	_ = _pop_current()
	}
	}
	_ = _pop_current()
	return arguments
	}

	func _parse_rule() throws -> RuleDescription {
	let term = try _parse_term()
	guard case .structure(let head) = term else {
	throw NSError(
	domain: #file,
	code: #line,
	userInfo: [NSLocalizedDescriptionKey: "Expected structure but got " + String(describing: term)]
	)
	}

	if _current == "." {
	_ = _pop_current()
	// We process facts as rules with the tail set to true:
	return RuleDescription(head: head, tail: .atom(.true))
	}

	guard _current == ":-" else {
	throw NSError(
	domain: #file,
	code: #line,
	userInfo: [NSLocalizedDescriptionKey: "Expected :- in rule but got " + _current]
	)
	}

	_ = _pop_current()

	// Process the rule arguments
	var arguments = [TermDescription]()

	while _current != "." {
	arguments.append(try _parse_term())

	if _current != "," && _current != "." {
	throw NSError(
	domain: #file,
	code: #line,
	userInfo: [NSLocalizedDescriptionKey: "Expected , or . in term but got " + _current]
	)
	}
	if _current == "," {
	_ = _pop_current()
	}
	}
	_ = _pop_current()

	// If we have more than one argument, we return a conjunction, otherwise,
	// we process the item as a regular rule containing a head and a tail

	let tail = arguments.count == 1 ? arguments[0] : .conjunction(Conjunction(arguments: arguments))
	return RuleDescription(head: head, tail: tail)
	}
	}