cellularmitosis · April 10, 2025 16:19
diff --git a/README.md b/README.md
diff --git a/parse_exprs.py b/parse_exprs.py
 #!/usr/bin/env python3

 # A general-purpose symbolic expression parser.
 # Copyright 2025 Jason Pepas.
 # Released under the terms of the MIT license, see https://opensource.org/license/mit

 import sys
 import re

 def string_chunkify(text):
    "break 'text' into string and non-string chunks"
    str_regex = r'("(?:[^\"\\]|\\[\s\S])*")'
    return re.split(str_regex, text, flags=re.MULTILINE)

 def tokenize_exprs(text):
    "tokenize the symbolic expressions in 'text'"
    tokens = []
    for chunk in string_chunkify(text):
        if chunk.startswith('"'):
            tokens.append(chunk)
        else:
            # This clever trick comes from Peter Norvig! https://norvig.com/lispy.html
            tokens += (chunk
                .replace('(', ' ( ')
                .replace(')', ' ) ')
                .split())
    return tokens

 def parse_exprs(text):
    "parse the symbolic expressions in 'text' and return a syntax tree."
    def parse_expr_tokens(tokens, offset=0):
        "recursively parse the list of tokens into a syntax tree."
        ast = []
        if len(tokens)-offset == 0:
            return ast
        i = offset
        if tokens[i] != '(':
            raise Exception("Missing '(' before tokens: %s" % tokens[offset:offset+10])
        i += 1
        while i < len(tokens):
            token = tokens[i]
            if token == '(':
                (subast, i) = parse_expr_tokens(tokens, i)
                ast.append(subast)
                continue
            if token == ')':
                i += 1
                return (ast, i)
            else:
                ast.append(token)
                i += 1
                continue
        raise Exception("Missing ')' at end of tokens.")
    tokens = tokenize_exprs(text)
    (ast, i) = parse_expr_tokens(tokens)
    if i != len(tokens):
        raise Exception("Leftover tokens: %s" % tokens[i:])
    return ast

 if __name__ == "__main__":
    with open(sys.argv[-1]) as fd:
        text = fd.read()
    ast = parse_exprs(text)
    import pprint
    pprint.pprint(ast)
	#!/usr/bin/env python3

	# A general-purpose symbolic expression parser.
	# Copyright 2025 Jason Pepas.
	# Released under the terms of the MIT license, see https://opensource.org/license/mit

	import sys
	import re

	def string_chunkify(text):
	"break 'text' into string and non-string chunks"
	str_regex = r'("(?:[^\"\\]\|\\[\s\S])*")'
	return re.split(str_regex, text, flags=re.MULTILINE)

	def tokenize_exprs(text):
	"tokenize the symbolic expressions in 'text'"
	tokens = []
	for chunk in string_chunkify(text):
	if chunk.startswith('"'):
	tokens.append(chunk)
	else:
	# This clever trick comes from Peter Norvig! https://norvig.com/lispy.html
	tokens += (chunk
	.replace('(', ' ( ')
	.replace(')', ' ) ')
	.split())
	return tokens

	def parse_exprs(text):
	"parse the symbolic expressions in 'text' and return a syntax tree."
	def parse_expr_tokens(tokens, offset=0):
	"recursively parse the list of tokens into a syntax tree."
	ast = []
	if len(tokens)-offset == 0:
	return ast
	i = offset
	if tokens[i] != '(':
	raise Exception("Missing '(' before tokens: %s" % tokens[offset:offset+10])
	i += 1
	while i < len(tokens):
	token = tokens[i]
	if token == '(':
	(subast, i) = parse_expr_tokens(tokens, i)
	ast.append(subast)
	continue
	if token == ')':
	i += 1
	return (ast, i)
	else:
	ast.append(token)
	i += 1
	continue
	raise Exception("Missing ')' at end of tokens.")
	tokens = tokenize_exprs(text)
	(ast, i) = parse_expr_tokens(tokens)
	if i != len(tokens):
	raise Exception("Leftover tokens: %s" % tokens[i:])
	return ast

	if __name__ == "__main__":
	with open(sys.argv[-1]) as fd:
	text = fd.read()
	ast = parse_exprs(text)
	import pprint
	pprint.pprint(ast)