Created
November 28, 2024 00:27
-
-
Save hectr/257879ad83bc14d3c5b9d5ee7990a81c to your computer and use it in GitHub Desktop.
Prolog subset parser.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Derived from https://github.com/photonlines/Python-Prolog-Interpreter | |
import Foundation | |
// MARK: - Primitives | |
/// Rules are used to define relationships between facts and other rules.They | |
/// allow us to make conditional statements about our world. Let's say we want to say | |
/// that all humans are mortal. We can do so using the rule below: mortal(X) :- | |
/// human(X). | |
public struct RuleDescription: Hashable, Sendable { | |
public let head: Structure | |
public let tail: TermDescription | |
} | |
/// Prolog has only one data type — the term. | |
/// | |
/// The simplest term is an atom. Atoms can be combined to form compound terms. | |
/// Example: are_friends(mark, michael) is a compound term where are_friends is | |
/// called a functor and mark and michael are arguments. | |
public enum TermDescription: Hashable, Sendable { | |
case atom(AtomDescription) | |
case conjunction(Conjunction) | |
case structure(Structure) | |
case variable(name: String) | |
} | |
public struct Structure: Hashable, Sendable { | |
public let arguments: [TermDescription] | |
public let functor: String | |
} | |
public enum AtomDescription: Hashable, Sendable { | |
case `true` | |
case functor(String) | |
} | |
public struct Conjunction: Hashable, Sendable { | |
public let arguments: [TermDescription] | |
} | |
// MARK: - Parser | |
let TOKEN_REGEX = "[A-Za-z0-9_]+|:\\-|[()\\.,]" | |
let ATOM_NAME_REGEX = "^[A-Za-z0-9_]+$" | |
let VARIABLE_REGEX = "^[A-Z_][A-Za-z0-9_]*$" | |
/// Regex to parse comment strings. The first group captures quoted strings ( | |
/// double and single). The second group captures regular comments ('%' for | |
/// single-line or '/* */' for multi-line). | |
let COMMENT_REGEX = "(\\\".*?\\\"|\\'.*?\\')|(/\\*.*?\\*/|%[^\\r\\n]*$)" | |
/// Return the input text string with all of the comments removed from it. | |
func remove_comments(input_text: String) throws -> String { | |
// Create a regular expression Pattern object we can use to find and strip out | |
// comments. The MULTILINE flag tells Python to treat each line in the string | |
// separately, while the DOTALL flag indicates that we can match patterns | |
// which span multiple lines (so our multi-line comments '/* */' can be | |
// processed) | |
let regex = try NSRegularExpression(pattern: COMMENT_REGEX, options: [.anchorsMatchLines, .dotMatchesLineSeparators]) | |
var offset = 0 | |
let output_text = NSMutableString(string: input_text) | |
regex.enumerateMatches( | |
in: input_text, | |
options: [], | |
range: NSRange(location: 0, length: input_text.count), | |
using: { result, _, _ in | |
guard let result = result else { | |
return | |
} | |
// If we found a match for our 2nd group, it is a comment, so we remove | |
if result.numberOfRanges >= 3 { | |
let range = result.range(at: 2) | |
if range.location != NSNotFound { | |
output_text.deleteCharacters( | |
in: NSRange( | |
location: range.location - offset, | |
length: range.length | |
) | |
) | |
offset += range.length | |
} | |
} | |
// Otherwise, we found a quoted string containing a comment, so we leave | |
// it in | |
}) | |
return output_text as String | |
} | |
/// Convert the input text into a list of tokens we can iterate over / process | |
func parse_tokens_from_string(input_text: String) throws -> [String] { | |
let regex = try NSRegularExpression(pattern: TOKEN_REGEX, options: []) | |
let code = try remove_comments(input_text: input_text) | |
let matches = regex.matches( | |
in: code, | |
options: [], | |
range: NSRange(location: 0, length: code.count) | |
) | |
let tokens = matches.map { result in | |
(code as NSString).substring(with: result.range) | |
} | |
return tokens | |
} | |
/// NOTE: Instance can only be used once! | |
public final class PrologParser { | |
typealias VariableName = String | |
private var _tokens: [String] | |
private var _scope: [VariableName: TermDescription] | |
public init(input_text: String) throws { | |
self._tokens = try parse_tokens_from_string(input_text: input_text) | |
self._scope = [:] | |
} | |
public func parse_rules() throws -> [RuleDescription] { | |
var rules = [RuleDescription]() | |
while !_tokens.isEmpty { | |
_scope = [:] | |
rules.append(try _parse_rule()) | |
} | |
return rules | |
} | |
public func parse_query() throws -> TermDescription { | |
_scope = [:] | |
return try _parse_term() | |
} | |
var _current: String! { | |
_tokens.first | |
} | |
func _pop_current() -> String { | |
_tokens.removeFirst() // FIXME: this can crash | |
} | |
func _parse_atom() throws -> String { | |
let name = _pop_current() | |
let regex = try NSRegularExpression( | |
pattern: ATOM_NAME_REGEX, | |
options: [] | |
) | |
guard let _regex = regex.firstMatch( | |
in: name, | |
options: [], | |
range: NSRange(location: 0, length: name.count) | |
), _regex.range.location != NSNotFound | |
else { | |
throw NSError( | |
domain: #file, | |
code: #line, | |
userInfo: [NSLocalizedDescriptionKey: "Invalid Atom Name: " + name] | |
) | |
} | |
return name | |
} | |
func _parse_term() throws -> TermDescription { | |
// If we encounter an opening parenthesis, we know we're dealing with a | |
// conjunction, so we process the list of arguments until we hit a closing | |
// parenthesis and return the conjunction object. | |
if _current == "(" { | |
_ = _pop_current() | |
let arguments = try _parse_arguments() | |
return .conjunction(Conjunction(arguments: arguments)) | |
} | |
let functor = try _parse_atom() | |
// If we have a matching variable, we make sure that variables with the same | |
// name within a rule always use one variable object (with the exception of | |
// the anonymous '_' variable object). | |
let regex = try NSRegularExpression( | |
pattern: VARIABLE_REGEX, | |
options: [] | |
) | |
if let _regex = regex.firstMatch( | |
in: functor, | |
options: [] | |
, range: NSRange(location: 0, length: functor.count) | |
), _regex.range.location != NSNotFound { | |
guard functor != "_" else { | |
return .variable(name: "_") | |
} | |
guard let variable = _scope[functor] else { | |
let variable = TermDescription.variable(name: functor) | |
_scope[functor] = variable | |
return variable | |
} | |
return variable | |
} else { | |
// If there are no arguments to process, return an atom. Atoms are processed | |
// as terms without arguments. | |
guard _current == "(" else { | |
return .atom(.functor(functor)) | |
} | |
_ = _pop_current() | |
let arguments = try _parse_arguments() | |
return .structure(Structure(arguments: arguments, functor: functor)) | |
} | |
} | |
func _parse_arguments() throws -> [TermDescription] { | |
var arguments = [TermDescription]() | |
// Keep adding the arguments to our list until we encounter an ending | |
// parenthesis ')' | |
while _current != ")" { | |
arguments.append(try _parse_term()) | |
if _current != "," && _current != ")" { | |
throw NSError( | |
domain: #file, | |
code: #line, | |
userInfo: [NSLocalizedDescriptionKey: "Expected , or ) in term but got " + _current] | |
) | |
} | |
if _current == "," { | |
_ = _pop_current() | |
} | |
} | |
_ = _pop_current() | |
return arguments | |
} | |
func _parse_rule() throws -> RuleDescription { | |
let term = try _parse_term() | |
guard case .structure(let head) = term else { | |
throw NSError( | |
domain: #file, | |
code: #line, | |
userInfo: [NSLocalizedDescriptionKey: "Expected structure but got " + String(describing: term)] | |
) | |
} | |
if _current == "." { | |
_ = _pop_current() | |
// We process facts as rules with the tail set to true: | |
return RuleDescription(head: head, tail: .atom(.true)) | |
} | |
guard _current == ":-" else { | |
throw NSError( | |
domain: #file, | |
code: #line, | |
userInfo: [NSLocalizedDescriptionKey: "Expected :- in rule but got " + _current] | |
) | |
} | |
_ = _pop_current() | |
// Process the rule arguments | |
var arguments = [TermDescription]() | |
while _current != "." { | |
arguments.append(try _parse_term()) | |
if _current != "," && _current != "." { | |
throw NSError( | |
domain: #file, | |
code: #line, | |
userInfo: [NSLocalizedDescriptionKey: "Expected , or . in term but got " + _current] | |
) | |
} | |
if _current == "," { | |
_ = _pop_current() | |
} | |
} | |
_ = _pop_current() | |
// If we have more than one argument, we return a conjunction, otherwise, | |
// we process the item as a regular rule containing a head and a tail | |
let tail = arguments.count == 1 ? arguments[0] : .conjunction(Conjunction(arguments: arguments)) | |
return RuleDescription(head: head, tail: tail) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment