Skip to content

Instantly share code, notes, and snippets.

@hectr
Created November 28, 2024 00:27
Show Gist options
  • Save hectr/257879ad83bc14d3c5b9d5ee7990a81c to your computer and use it in GitHub Desktop.
Save hectr/257879ad83bc14d3c5b9d5ee7990a81c to your computer and use it in GitHub Desktop.
Prolog subset parser.
// Derived from https://github.com/photonlines/Python-Prolog-Interpreter
import Foundation
// MARK: - Primitives
/// Rules are used to define relationships between facts and other rules.They
/// allow us to make conditional statements about our world. Let's say we want to say
/// that all humans are mortal. We can do so using the rule below: mortal(X) :-
/// human(X).
public struct RuleDescription: Hashable, Sendable {
public let head: Structure
public let tail: TermDescription
}
/// Prolog has only one data type — the term.
///
/// The simplest term is an atom. Atoms can be combined to form compound terms.
/// Example: are_friends(mark, michael) is a compound term where are_friends is
/// called a functor and mark and michael are arguments.
public enum TermDescription: Hashable, Sendable {
case atom(AtomDescription)
case conjunction(Conjunction)
case structure(Structure)
case variable(name: String)
}
public struct Structure: Hashable, Sendable {
public let arguments: [TermDescription]
public let functor: String
}
public enum AtomDescription: Hashable, Sendable {
case `true`
case functor(String)
}
public struct Conjunction: Hashable, Sendable {
public let arguments: [TermDescription]
}
// MARK: - Parser
let TOKEN_REGEX = "[A-Za-z0-9_]+|:\\-|[()\\.,]"
let ATOM_NAME_REGEX = "^[A-Za-z0-9_]+$"
let VARIABLE_REGEX = "^[A-Z_][A-Za-z0-9_]*$"
/// Regex to parse comment strings. The first group captures quoted strings (
/// double and single). The second group captures regular comments ('%' for
/// single-line or '/* */' for multi-line).
let COMMENT_REGEX = "(\\\".*?\\\"|\\'.*?\\')|(/\\*.*?\\*/|%[^\\r\\n]*$)"
/// Return the input text string with all of the comments removed from it.
func remove_comments(input_text: String) throws -> String {
// Create a regular expression Pattern object we can use to find and strip out
// comments. The MULTILINE flag tells Python to treat each line in the string
// separately, while the DOTALL flag indicates that we can match patterns
// which span multiple lines (so our multi-line comments '/* */' can be
// processed)
let regex = try NSRegularExpression(pattern: COMMENT_REGEX, options: [.anchorsMatchLines, .dotMatchesLineSeparators])
var offset = 0
let output_text = NSMutableString(string: input_text)
regex.enumerateMatches(
in: input_text,
options: [],
range: NSRange(location: 0, length: input_text.count),
using: { result, _, _ in
guard let result = result else {
return
}
// If we found a match for our 2nd group, it is a comment, so we remove
if result.numberOfRanges >= 3 {
let range = result.range(at: 2)
if range.location != NSNotFound {
output_text.deleteCharacters(
in: NSRange(
location: range.location - offset,
length: range.length
)
)
offset += range.length
}
}
// Otherwise, we found a quoted string containing a comment, so we leave
// it in
})
return output_text as String
}
/// Convert the input text into a list of tokens we can iterate over / process
func parse_tokens_from_string(input_text: String) throws -> [String] {
let regex = try NSRegularExpression(pattern: TOKEN_REGEX, options: [])
let code = try remove_comments(input_text: input_text)
let matches = regex.matches(
in: code,
options: [],
range: NSRange(location: 0, length: code.count)
)
let tokens = matches.map { result in
(code as NSString).substring(with: result.range)
}
return tokens
}
/// NOTE: Instance can only be used once!
public final class PrologParser {
typealias VariableName = String
private var _tokens: [String]
private var _scope: [VariableName: TermDescription]
public init(input_text: String) throws {
self._tokens = try parse_tokens_from_string(input_text: input_text)
self._scope = [:]
}
public func parse_rules() throws -> [RuleDescription] {
var rules = [RuleDescription]()
while !_tokens.isEmpty {
_scope = [:]
rules.append(try _parse_rule())
}
return rules
}
public func parse_query() throws -> TermDescription {
_scope = [:]
return try _parse_term()
}
var _current: String! {
_tokens.first
}
func _pop_current() -> String {
_tokens.removeFirst() // FIXME: this can crash
}
func _parse_atom() throws -> String {
let name = _pop_current()
let regex = try NSRegularExpression(
pattern: ATOM_NAME_REGEX,
options: []
)
guard let _regex = regex.firstMatch(
in: name,
options: [],
range: NSRange(location: 0, length: name.count)
), _regex.range.location != NSNotFound
else {
throw NSError(
domain: #file,
code: #line,
userInfo: [NSLocalizedDescriptionKey: "Invalid Atom Name: " + name]
)
}
return name
}
func _parse_term() throws -> TermDescription {
// If we encounter an opening parenthesis, we know we're dealing with a
// conjunction, so we process the list of arguments until we hit a closing
// parenthesis and return the conjunction object.
if _current == "(" {
_ = _pop_current()
let arguments = try _parse_arguments()
return .conjunction(Conjunction(arguments: arguments))
}
let functor = try _parse_atom()
// If we have a matching variable, we make sure that variables with the same
// name within a rule always use one variable object (with the exception of
// the anonymous '_' variable object).
let regex = try NSRegularExpression(
pattern: VARIABLE_REGEX,
options: []
)
if let _regex = regex.firstMatch(
in: functor,
options: []
, range: NSRange(location: 0, length: functor.count)
), _regex.range.location != NSNotFound {
guard functor != "_" else {
return .variable(name: "_")
}
guard let variable = _scope[functor] else {
let variable = TermDescription.variable(name: functor)
_scope[functor] = variable
return variable
}
return variable
} else {
// If there are no arguments to process, return an atom. Atoms are processed
// as terms without arguments.
guard _current == "(" else {
return .atom(.functor(functor))
}
_ = _pop_current()
let arguments = try _parse_arguments()
return .structure(Structure(arguments: arguments, functor: functor))
}
}
func _parse_arguments() throws -> [TermDescription] {
var arguments = [TermDescription]()
// Keep adding the arguments to our list until we encounter an ending
// parenthesis ')'
while _current != ")" {
arguments.append(try _parse_term())
if _current != "," && _current != ")" {
throw NSError(
domain: #file,
code: #line,
userInfo: [NSLocalizedDescriptionKey: "Expected , or ) in term but got " + _current]
)
}
if _current == "," {
_ = _pop_current()
}
}
_ = _pop_current()
return arguments
}
func _parse_rule() throws -> RuleDescription {
let term = try _parse_term()
guard case .structure(let head) = term else {
throw NSError(
domain: #file,
code: #line,
userInfo: [NSLocalizedDescriptionKey: "Expected structure but got " + String(describing: term)]
)
}
if _current == "." {
_ = _pop_current()
// We process facts as rules with the tail set to true:
return RuleDescription(head: head, tail: .atom(.true))
}
guard _current == ":-" else {
throw NSError(
domain: #file,
code: #line,
userInfo: [NSLocalizedDescriptionKey: "Expected :- in rule but got " + _current]
)
}
_ = _pop_current()
// Process the rule arguments
var arguments = [TermDescription]()
while _current != "." {
arguments.append(try _parse_term())
if _current != "," && _current != "." {
throw NSError(
domain: #file,
code: #line,
userInfo: [NSLocalizedDescriptionKey: "Expected , or . in term but got " + _current]
)
}
if _current == "," {
_ = _pop_current()
}
}
_ = _pop_current()
// If we have more than one argument, we return a conjunction, otherwise,
// we process the item as a regular rule containing a head and a tail
let tail = arguments.count == 1 ? arguments[0] : .conjunction(Conjunction(arguments: arguments))
return RuleDescription(head: head, tail: tail)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment