Created
April 10, 2024 11:15
-
-
Save Kreijstal/ecc9b1b7c11cd47c708eb8971a0bb37c to your computer and use it in GitHub Desktop.
something minimal I took ir from another website and expanded from there
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import typing as tp | |
import operator | |
# Defining the basic parser structure | |
ParserP = tp.Callable[[str], tp.Tuple[tp.Any, str]] | |
# Defining the ParserError for exception handling | |
class ParserError(Exception): | |
def __init__(self, msg, content): | |
super().__init__(f"{msg}: {content}") | |
# The parse function to run parsers | |
def parse(p: ParserP, s: str) -> tp.Tuple[tp.Any, str]: | |
(a, s) = p(s) | |
return (a, s) | |
# A parser that consumes any character | |
def anyChar(s): | |
if not s: | |
raise ParserError("Expected a character, got empty string", s) | |
return (s[0], s[1:]) | |
# Parser that consumes a digit | |
def anyDigit(s): | |
if not s: | |
raise ParserError("Expected digit, got empty string", s) | |
if s[0].isdigit(): | |
return (s[0], s[1:]) | |
raise ParserError(f"Expected digit, got {s[0]}", s) | |
def many1(p: ParserP) -> ParserP: | |
def func(s): | |
result = [] | |
try: | |
# Attempting the first parse | |
(a, s) = parse(p, s) | |
result.append(a) | |
except ParserError as e: | |
# If the first parse fails, raise ParserError | |
raise ParserError("many1: no match found at the beginning", s) from e | |
# Continue parsing if there are more characters | |
while s: | |
try: | |
(a, s) = parse(p, s) | |
result.append(a) | |
except ParserError: | |
break | |
return ("".join(result), s) | |
return func | |
def oneOf(collection: tp.Set[str]) -> ParserP: | |
def func(s): | |
if not s: | |
raise ParserError("Expected a character, got empty string", s) | |
if s[0] in collection: | |
return (s[0], s[1:]) | |
raise ParserError("Character not in collection", s) | |
return func | |
def optional(parse: ParserP) -> ParserP: | |
def func(s): | |
try: | |
return parse(s) | |
except ParserError: | |
return None, s | |
return func | |
def peek(parse: ParserP) -> ParserP: | |
def func(s): | |
result, _ = parse(s) | |
return result, s | |
return func | |
def preceded(prefix: ParserP, parser: ParserP) -> ParserP: | |
def func(s): | |
_, s = parse(prefix, s) | |
result, s = parse(parser, s) | |
return result, s | |
return func | |
def recognize(parse: ParserP) -> ParserP: | |
def func(s): | |
original_s = s | |
_, s = parse(s) | |
recognized = original_s[:len(original_s) - len(s)] | |
return recognized, s | |
return func | |
def satisfy(predicate: tp.Callable[[str], bool]) -> ParserP: | |
def func(s): | |
if not s: # Check if the string is empty | |
raise ParserError("Input string is empty", s) | |
elif predicate(s[0]): | |
return s[0], s[1:] | |
else: | |
raise ParserError("Predicate not satisfied", s) | |
return func | |
def delimited(p1: ParserP, p2: ParserP, p3: ParserP) -> ParserP: | |
def func(s): | |
_, s = parse(p1, s) | |
result, s = parse(p2, s) | |
_, s = parse(p3, s) | |
return result, s | |
return func | |
def pair(p1: ParserP, p2: ParserP) -> ParserP: | |
def func(s): | |
result1, s = parse(p1, s) | |
result2, s = parse(p2, s) | |
return (result1, result2), s | |
return func | |
def separated_pair(p1: ParserP, sep_parser: ParserP, p2: ParserP) -> ParserP: | |
def func(s): | |
result1, s = parse(p1, s) | |
_, s = parse(sep_parser, s) | |
result2, s = parse(p2, s) | |
return (result1, result2), s | |
return func | |
def terminated(p1: ParserP, p2: ParserP) -> ParserP: | |
def func(s): | |
result, s = parse(p1, s) | |
_, s = parse(p2, s) | |
return result, s | |
return func | |
def tupleP(parsers: tp.Tuple[ParserP, ...]) -> ParserP: | |
def func(s): | |
results = [] | |
for p in parsers: | |
result, s = parse(p, s) | |
results.append(result) | |
return tuple(results), s | |
return func | |
def separatedTupleP(parsers: tp.Tuple[ParserP], separator: ParserP) -> ParserP: | |
def func(s): | |
results = [] | |
for i, p in enumerate(parsers): | |
result, s = parse(p, s) | |
results.append(result) | |
if i < len(parsers) - 1: # If not the last parser, expect the separator | |
_, s = parse(separator, s) | |
return tuple(results), s | |
return func | |
def count(count: int, p: ParserP) -> ParserP: | |
def func(s): | |
results = [] | |
for _ in range(count): | |
result, s = parse(p, s) | |
results.append(result) | |
return results, s | |
return func | |
def many0(p: ParserP) -> ParserP: | |
def func(s): | |
results = [] | |
while True: | |
try: | |
result, s = parse(p, s) | |
results.append(result) | |
except ParserError: | |
break | |
return results, s | |
return func | |
def separated_list0(p: ParserP, sep: ParserP) -> ParserP: | |
def func(s): | |
results = [] | |
try: | |
first_result, s = parse(p, s) | |
results.append(first_result) | |
while True: | |
_, s = parse(sep, s) | |
result, s = parse(p, s) | |
results.append(result) | |
except ParserError: | |
pass | |
return results, s | |
return func | |
def separated_list1(p: ParserP, sep: ParserP) -> ParserP: | |
def func(s): | |
results, s = parse(separated_list0(p, sep), s) | |
if not results: | |
raise ParserError("Expected at least one match", s) | |
return results, s | |
return func | |
def alternative(parsers: tp.List[ParserP]) -> ParserP: | |
def func(s): | |
for p in parsers: | |
try: | |
return parse(p, s) | |
except ParserError: | |
continue | |
raise ParserError("No alternatives matched", s) | |
return func | |
def take(count: int) -> ParserP: | |
def func(s): | |
if len(s) < count: | |
raise ParserError(f"Expected at least {count} characters", s) | |
return s[:count], s[count:] | |
return func | |
def take_until(predicate: tp.Callable[[str], bool]) -> ParserP: | |
def func(s): | |
result = '' | |
for char in s: | |
if predicate(char): | |
break | |
result += char | |
return result, s[len(result):] | |
return func | |
# Implementing the describe parser | |
def describe(p: ParserP, tag: str) -> ParserP: | |
def func(s): | |
try: | |
(result, s_remain) = parse(p, s) | |
# Tagging the result for clarity in the output tree | |
return ({'tag': tag, 'result': result}, s_remain) | |
except ParserError as e: | |
# Enhancing error messages with the tag | |
raise ParserError(f"{tag} - {e}", s) | |
return func | |
def alpha(s): | |
if s[0].isalpha(): | |
return s[0], s[1:] | |
raise ParserError("Expected alphabetic character", s) | |
def mapP(p: ParserP, transform_func: tp.Callable[[tp.Any], tp.Any]) -> ParserP: | |
def func(s): | |
result, s_remain = parse(p, s) | |
return transform_func(result), s_remain | |
return func | |
def flatMap(p: ParserP, transform_func: tp.Callable[[tp.Any], ParserP]) -> ParserP: | |
def func(s): | |
result, s_remain = parse(p, s) | |
return parse(transform_func(result), s_remain) | |
return func | |
def char(c: str) -> ParserP: | |
return satisfy(lambda x: x == c) | |
def exact(target: str) -> ParserP: | |
char_parsers = [char(c) for c in target] | |
return mapP(tupleP(char_parsers), lambda _: target) | |
numeric = mapP(many1(anyDigit), lambda digits: int(''.join(digits))) | |
def is_whitespace(c: str) -> bool: | |
return c.isspace() | |
# Parser for a single whitespace character | |
whitespace = satisfy(is_whitespace) | |
# Parser that matches zero or more whitespace characters | |
whitespace0 = mapP(many0(whitespace), ''.join) | |
# Parser that matches one or more whitespace characters | |
whitespace1 = mapP(many1(whitespace), ''.join) | |
def addNicks(parsefn, nickList) -> ParserP: | |
# Create a list of parsers that map nicknames to their respective parsing function | |
parsers = [mapP(nick, lambda _: parse(parsefn, real_value)[0]) for nick, real_value in nickList] | |
# Combine these parsers with the original parser using an alternative parser | |
return alternative([parsefn]+parsers) | |
#optional, peek, preceded, recognize, satisfy, delimited, pair, separated_pair, terminated, tupleP, count, many0, many1, separated_list0, separated_list1, alternative, take, take_until, describe, mapP, flatMap, char, exact, numeric, whitespace, whitespace0, whitespace1 | |
''' | |
#separated_pair(whitespace0,char("-"),whitespace0) | |
#separated_pair(whitespace0,char("+"),whitespace0) | |
place=describe(tupleP([describe(alpha,"building"),describe(numeric,"floor"),describe(preceded(char("-"),numeric),"room number")],"place") | |
range=tupleP([numeric,preceded(separated_pair(whitespace0,char("-"),whitespace0),numeric)]) | |
rangeadd=tupleP([range,preceded(separated_pair(whitespace0,char("+"),whitespace0),numeric)] | |
print(parse(range,"01-05")) | |
print(parse(range,"01-05+07")) | |
''' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment