Created
July 1, 2025 10:21
-
-
Save macleginn/9b21839b3177712c586c656986e2aa56 to your computer and use it in GitHub Desktop.
A parser for FOL expressions equipped with node-delimiting braces and labels
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
from typing import Optional, Tuple, List, Any | |
class TreeNode: | |
"""Represents a node in the parse tree.""" | |
def __init__(self, rule_name: str, content: Any = None, children: List['TreeNode'] = None): | |
self.rule_name = rule_name | |
self.content = content # For terminal nodes | |
self.children = children or [] | |
def pretty_print(self, indent=0): | |
"""Pretty print the tree structure.""" | |
spaces = " " * indent | |
if self.content is not None: | |
print(f"{spaces}{self.rule_name}: '{self.content}'") | |
else: | |
print(f"{spaces}{self.rule_name}:") | |
for child in self.children: | |
child.pretty_print(indent + 1) | |
class FOLTreeParser: | |
"""Parser for FOL expressions using curly braces syntax.""" | |
def __init__(self): | |
self.pos = 0 | |
self.text = "" | |
def parse(self, expression: str) -> TreeNode: | |
"""Parse a FOL expression and return the parse tree.""" | |
self.text = expression.strip() | |
self.pos = 0 | |
return self._parse_node() | |
def _peek(self) -> str: | |
"""Peek at current character without advancing.""" | |
if self.pos >= len(self.text): | |
return "" | |
return self.text[self.pos] | |
def _advance(self) -> str: | |
"""Get current character and advance position.""" | |
if self.pos >= len(self.text): | |
return "" | |
char = self.text[self.pos] | |
self.pos += 1 | |
return char | |
def _skip_whitespace(self): | |
"""Skip whitespace characters.""" | |
while self.pos < len(self.text) and self.text[self.pos].isspace(): | |
self.pos += 1 | |
def _parse_node(self) -> TreeNode: | |
"""Parse a single node (either structured or terminal).""" | |
self._skip_whitespace() | |
if self._peek() == '{': | |
return self._parse_structured_node() | |
else: | |
return self._parse_terminal() | |
def _parse_structured_node(self) -> TreeNode: | |
"""Parse a structured node: {RuleName: content}""" | |
# Consume opening brace | |
self._advance() # '{' | |
self._skip_whitespace() | |
# Parse rule name | |
rule_name = self._parse_rule_name() | |
self._skip_whitespace() | |
# Consume colon | |
if self._peek() != ':': | |
raise ValueError(f"Expected ':' after rule name, got '{self._peek()}' at position {self.pos}") | |
self._advance() # ':' | |
self._skip_whitespace() | |
# Parse content (can be mix of terminals and structured nodes) | |
children = [] | |
while self._peek() != '}' and self.pos < len(self.text): | |
if self._peek() == '{': | |
# Nested structured node | |
children.append(self._parse_structured_node()) | |
else: | |
# Terminal content | |
terminal = self._parse_terminal_content() | |
if terminal.strip(): # Only add non-empty terminals | |
children.append(TreeNode("Terminal", terminal.strip())) | |
self._skip_whitespace() | |
# Consume closing brace | |
if self._peek() != '}': | |
raise ValueError(f"Expected '}}' to close structured node, got '{self._peek()}' at position {self.pos}") | |
self._advance() # '}' | |
return TreeNode(rule_name, children=children) | |
def _parse_rule_name(self) -> str: | |
"""Parse a rule name (alphanumeric characters).""" | |
start = self.pos | |
while (self.pos < len(self.text) and | |
(self.text[self.pos].isalnum() or self.text[self.pos] in '_')): | |
self.pos += 1 | |
if start == self.pos: | |
raise ValueError(f"Expected rule name at position {self.pos}") | |
return self.text[start:self.pos] | |
def _parse_terminal(self) -> TreeNode: | |
"""Parse a terminal node (single token).""" | |
self._skip_whitespace() | |
start = self.pos | |
# Parse until whitespace or special character | |
while (self.pos < len(self.text) and | |
not self.text[self.pos].isspace() and | |
self.text[self.pos] not in '{}:()'): | |
self.pos += 1 | |
if start == self.pos: | |
# Handle special single characters | |
if self._peek() in '()=∀∃¬∧∨→↔⊤⊥,': | |
self.pos += 1 | |
content = self.text[start:self.pos] | |
return TreeNode("Terminal", content) | |
def _parse_terminal_content(self) -> str: | |
"""Parse terminal content until next '{' or '}'.""" | |
start = self.pos | |
depth = 0 | |
while self.pos < len(self.text): | |
char = self.text[self.pos] | |
if char == '{': | |
if depth == 0: | |
break # Found start of next structured node | |
depth += 1 | |
elif char == '}': | |
if depth == 0: | |
break # Found end of current structured node | |
depth -= 1 | |
self.pos += 1 | |
return self.text[start:self.pos] | |
def demo(): | |
"""Demonstrate the parser with example FOL expressions.""" | |
parser = FOLTreeParser() | |
# Test cases | |
test_expressions = [ | |
"{Atomic: {Predicate: P ( {SingleTerm: {Variable: x}} )}}", | |
"{Universal: ∀ {Variable: x} {Implication: ( {Atomic: {Predicate: P ( {SingleTerm: {Variable: x}} )}} → {Atomic: {Predicate: Q ( {SingleTerm: {Variable: x}} )}} )}}", | |
"{Conjunction: ( {Atomic: {Predicate: P ( {SingleTerm: {Variable: x}} )}} ∧ {Negation: ¬ {Atomic: {Predicate: Q ( {SingleTerm: {Variable: y}} )}}} )}", | |
"{Equality: {Variable: x} = {Function: f ( {SingleTerm: {Variable: y}} )}}", | |
"{Existential: ∃ {Variable: y} {Conjunction: ( {Predicate: Loves ( {MultipleTerm: {Variable: y} , {SingleTerm: {Constant: a}}} )} ∧ {Predicate: IsHuman ( {SingleTerm: {Variable: y}} )} )}}" | |
] | |
for i, expr in enumerate(test_expressions, 1): | |
print(f"\n=== Test Case {i} ===") | |
print(f"Expression: {expr}") | |
print("\nParse Tree:") | |
try: | |
tree = parser.parse(expr) | |
tree.pretty_print() | |
except Exception as e: | |
print(f"Error: {e}") | |
def parse_fol_expression(expression: str) -> TreeNode: | |
"""Convenience function to parse a single FOL expression.""" | |
parser = FOLTreeParser() | |
return parser.parse(expression) | |
if __name__ == "__main__": | |
demo() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment