Created
September 10, 2021 20:36
-
-
Save Sasszem/308614f96149e5741aeeace72c169936 to your computer and use it in GitHub Desktop.
Optimizing bf2c compiler in python. Possibly not 100% working. Gave up on fixing cuz' I got bored on it.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from dataclasses import dataclass, field, replace | |
import typing | |
from collections.abc import Iterator | |
from enum import Enum | |
from collections import defaultdict | |
####################################### | |
# Base classes for storing operations # | |
####################################### | |
NODE = typing.Union[ | |
"Sequence", | |
"CharIn", | |
"CharOut", | |
"CharOutFixed", | |
"PointerMove", | |
"AddSub", | |
"EndLoopToken", | |
"LinearLoop", | |
"SetOperation", | |
] | |
@dataclass | |
class Sequence: | |
"""A sequence of operations. Might contain sub-sequences, thus implementing a tree""" | |
elements: list[NODE] | |
def add(self, element: NODE) -> None: | |
""""add an element to the sequence""" | |
self.elements.append(element) | |
def __init__(self): | |
"""init the sequence as empty""" | |
self.elements = list() | |
def __getitem__(self, key: int) -> NODE: | |
return self.elements[key] | |
def __iter__(self): | |
return iter(self.elements) | |
def last(self) -> typing.Optional[NODE]: | |
"""return last element or None if empty""" | |
if self.elements: | |
return self.elements[-1] | |
return None | |
@dataclass | |
class CharIn: | |
"""a character input operation into a cell""" | |
offset: int = 0 | |
"""offset of the target cell relative to the current pointer""" | |
@dataclass | |
class CharOut: | |
"""character output operation from a cell""" | |
offset: int = 0 | |
"""relative offset from the pointer""" | |
@dataclass | |
class CharOutFixed: | |
"""a fixed known char out operation""" | |
val: int | |
"""value to print""" | |
@dataclass | |
class PointerMove: | |
"""move the pointer""" | |
val: int | |
"""(signed) amout to move""" | |
@dataclass | |
class AddSub: | |
"""add or sub from a cell""" | |
val: int | |
"""amount to add or sub""" | |
offset: int = 0 | |
"""relative offset""" | |
@dataclass | |
class LinearLoop: | |
"""a linear loop""" | |
keys: dict[int, int] = field(default_factory=dict) | |
offset: int = 0 | |
@dataclass | |
class SetOperation: | |
"""set a cell to a known fixed value""" | |
offset: int | |
val: int | |
@dataclass | |
class EndLoopToken: | |
"""End of loop sentinel""" | |
########## | |
# PARSER # | |
########## | |
class BFParser: | |
"""parse BF code into own representation""" | |
def parse_sequence(self, token_iter: Iterator[str]) -> Sequence: | |
"""parse tokens into a sequence""" | |
seq = Sequence() | |
for token in token_iter: | |
node = self.parse_token(token, token_iter) | |
if node: | |
if isinstance(node, EndLoopToken): | |
break | |
seq.add(node) | |
else: | |
raise RuntimeError("Unbalanced loops!") | |
return seq | |
def parse_token(self, token: str, rest: Iterator[str]) -> typing.Optional[NODE]: | |
"""parse a block | |
a single token or call parse_sequence if found a loop""" | |
if token == ".": | |
return CharOut() | |
if token == ",": | |
return CharIn() | |
if token == ">": | |
return PointerMove(1) | |
if token == "<": | |
return PointerMove(-1) | |
if token == "+": | |
return AddSub(1) | |
if token == "-": | |
return AddSub(-1) | |
if token == "[": | |
return self.parse_sequence(rest) | |
if token == "]": | |
return EndLoopToken() | |
return None | |
# any other char should be ignored | |
def parse_source(self, source: str) -> Sequence: | |
"""parse a full source""" | |
# all we need is to wrap in [] and convert to iter | |
tree = self.parse_token("[", iter(source+"]")) | |
assert isinstance(tree, Sequence), "Toplevel is not a sequence!" | |
return tree | |
def parse_file(self, filename: str) -> Sequence: | |
"""parse a file | |
aka open and then parse source""" | |
with open(filename) as file: | |
return self.parse_source(file.read()) | |
class OperationsCombiner: | |
"""Opt: combine multiple consequent operations into one""" | |
def __call__(self, tree: Sequence) -> Sequence: | |
"""run""" | |
ret_seq = Sequence() | |
for node in tree: | |
if isinstance(node, AddSub): | |
last = ret_seq.last() | |
if isinstance(last, AddSub) and last.offset == node.offset: | |
last.val += node.val | |
else: | |
ret_seq.add(node) | |
elif isinstance(node, SetOperation): | |
last = ret_seq.last() | |
if isinstance(last, typing.Set) and last.offset == node.offset: | |
last.val = node.val | |
else: | |
ret_seq.add(node) | |
elif isinstance(node, PointerMove): | |
last = ret_seq.last() | |
if isinstance(last, PointerMove): | |
last.val += node.val | |
else: | |
ret_seq.add(node) | |
elif isinstance(node, Sequence): | |
ret_seq.add(self(node)) | |
elif isinstance(node, (CharOut, CharIn, CharOutFixed, Sequence, LinearLoop)): | |
ret_seq.add(node) | |
else: | |
raise RuntimeError(f"Unexpected node in graph: {node}") | |
return ret_seq | |
class CompboundGenerator: | |
"""Opt: detect and parse linear loops""" | |
def test_ops(self, tree: Sequence) -> bool: | |
return all(isinstance(x, (AddSub, PointerMove)) for x in tree) | |
# todo: update this when adding a new class! | |
def __call__(self, tree: Sequence) -> Sequence | LinearLoop | SetOperation: | |
"""run""" | |
if self.test_ops(tree): | |
# gather values | |
ptr = 0 | |
muls: dict[int, int] = {} | |
for node in tree: | |
if isinstance(node, (PointerMove)): | |
ptr += node.val | |
elif isinstance(node, AddSub): | |
muls[ptr] = muls.get(ptr, 0) + node.val | |
if muls.get(0, 0) == -1: | |
if len(muls) == 1: | |
return SetOperation(0, 0) | |
else: | |
return LinearLoop(muls) | |
ret_seq = Sequence() | |
for node in tree: | |
if isinstance(node, Sequence): | |
ret_seq.add(self(node)) | |
elif isinstance(node, (CharOut, CharIn, CharOutFixed, AddSub, PointerMove, LinearLoop, SetOperation)): | |
ret_seq.add(node) | |
else: | |
raise RuntimeError(f"Unexpected node in graph: {node}") | |
return ret_seq | |
class OffsetGenerator: | |
def __call__(self, tree: Sequence) -> Sequence: | |
ret_seq = Sequence() | |
new_node: NODE | |
offset = 0 | |
for node in tree: | |
if isinstance(node, PointerMove): | |
offset += node.val | |
elif isinstance(node, (AddSub, CharIn, CharOut, SetOperation)): | |
new_node = replace(node) | |
new_node.offset += offset | |
ret_seq.add(new_node) | |
elif isinstance(node, LinearLoop): | |
new_node = LinearLoop( | |
{k+offset: v for k, v in node.keys.items()}, offset=node.offset + offset) | |
ret_seq.add(new_node) | |
elif isinstance(node, Sequence): | |
ret_seq.add(PointerMove(offset)) | |
offset = 0 | |
ret_seq.add(self(node)) | |
elif isinstance(node, (CharOutFixed)): | |
ret_seq.add(node) | |
else: | |
raise RuntimeError(f"Unexpected node: {node}") | |
if offset: | |
ret_seq.add(PointerMove(offset)) | |
return ret_seq | |
class SimpleNopPruner: | |
def __call__(self, tree: Sequence) -> Sequence: | |
ret_seq = Sequence() | |
for node in tree: | |
if isinstance(node, (PointerMove, AddSub)): | |
if node.val: | |
ret_seq.add(node) | |
elif isinstance(node, (Sequence, CharOut, CharOutFixed, CharIn, LinearLoop, SetOperation)): | |
ret_seq.add(node) | |
else: | |
raise RuntimeError(f"Unexpected node: {node}") | |
return ret_seq | |
@dataclass | |
class CWriter: | |
"""generate C source""" | |
indent_level: int = 0 | |
indent_spaces: int = 4 | |
src: str = "" | |
mem_size = 8192 | |
mask = "0x1fff" # todo refactor me | |
def _write(self, text, end="\n"): | |
"""indent and newline""" | |
self.src += f"{self.indent_level*self.indent_spaces*' '}{text}{end}" | |
def _indent(self, amount=1): | |
"""add or sub indent level""" | |
self.indent_level += amount | |
def write_header(self): | |
"""write C boilerplate header""" | |
self._write("#include <stdio.h>") | |
self._write("") | |
self._write(f"char M[{self.mem_size}];") | |
self._write("int main() {") | |
self._indent() | |
self._write("int PT = 0;") | |
self._write("") | |
def write_footer(self): | |
"""write C boilerplate footer""" | |
self._write("") | |
self._write("") | |
self._write("return 0;") | |
self._indent(-1) | |
self._write("}") | |
@classmethod | |
def _plus(cls, num: int) -> str: | |
"""generate +C if C!=0""" | |
if num: | |
return f" {cls._sign(num)} {abs(num)}" | |
return "" | |
@classmethod | |
def _sign(cls, num: int) -> str: | |
"""get sign as a str""" | |
return '+' if num > 0 else '-' | |
def write_code(self, tree: Sequence): | |
"""write a sequence""" | |
# todo: refactor this into the classes maybe? | |
for node in tree: | |
if isinstance(node, AddSub): | |
self._write( | |
f"M[PT{self._plus(node.offset)}] {self._sign(node.val)}= {abs(node.val)};") | |
elif isinstance(node, PointerMove): | |
self._write( | |
f"PT = (PT {self._sign(node.val)} {abs(node.val)}) & {self.mask};") | |
elif isinstance(node, Sequence): | |
self._write("while (M[PT]) {") | |
self._indent() | |
self.write_code(node) | |
self._indent(-1) | |
self._write("}") | |
elif isinstance(node, CharIn): | |
self._write(f"M[PT{self._plus(node.offset)}] = getchar();") | |
elif isinstance(node, CharOut): | |
self._write(f"putchar(M[PT{self._plus(node.offset)}]);") | |
elif isinstance(node, LinearLoop): | |
lin = "".join( | |
f"M[PT{k:+}] {self._sign(v)}= {abs(v)}*M[PT{self._plus(node.offset)}]; " | |
for k, v in node.keys.items() if k-node.offset | |
) + f"M[PT{self._plus(node.offset)}] = 0;" | |
self._write(lin) | |
elif isinstance(node, SetOperation): | |
self._write(f"M[PT{self._plus(node.offset)}] = {node.val};") | |
elif isinstance(node, CharOutFixed): | |
self._write(f"putchar({node.val});") | |
else: | |
raise RuntimeError(f"Unexpected token: {node}") | |
def __call__(self, tree: Sequence) -> str: | |
"""run""" | |
self.write_header() | |
self.write_code(tree) | |
self.write_footer() | |
return self.src | |
class SimpleConstantAnalyzer: | |
def __init__(self): | |
self.toplevel = True | |
def __call__(self, tree: Sequence) -> Sequence: | |
known_values: dict[int, int] = dict() | |
if self.toplevel: | |
self.toplevel = False | |
known_values = defaultdict(int) | |
ret_seq = Sequence() | |
to_remove = set() | |
offset = 0 | |
for node in tree: | |
# prune if we don't know | |
if isinstance(node, (CharIn)): | |
known_values.pop(offset + node.offset, 0) | |
ret_seq.add(node) | |
elif isinstance(node, (Sequence)): | |
known_values = {} | |
offset = 0 | |
ret_seq.add(self(node)) | |
elif isinstance(node, (CharOut)): | |
if node.offset + offset in known_values: | |
ret_seq.add(CharOutFixed( | |
known_values[node.offset + offset])) | |
else: | |
ret_seq.add(node) | |
elif isinstance(node, (AddSub)): | |
if node.offset + offset in known_values: | |
of = node.offset + offset | |
val = known_values[of] | |
ret_seq.add(SetOperation(of, val + node.val)) | |
known_values[of] = val + node.val | |
else: | |
ret_seq.add(node) | |
elif isinstance(node, PointerMove): | |
offset += node.val | |
ret_seq.add(node) | |
elif isinstance(node, LinearLoop): | |
if node.offset + offset in known_values: | |
val = known_values[node.offset + offset] | |
for of, mul in node.keys.items(): | |
ret_seq.add(SetOperation(of+offset, val*mul)) | |
known_values[of+offset] = val*mul | |
known_values[offset + node.offset] = 0 | |
ret_seq.add(SetOperation(node.offset + offset, 0)) | |
else: | |
ret_seq.add(node) | |
for k in node.keys.keys(): | |
known_values.pop(offset + k, 0) | |
known_values[offset + node.offset] = 0 | |
elif isinstance(node, CharOutFixed): | |
ret_seq.add(node) | |
elif isinstance(node, SetOperation): | |
known_values[offset + node.offset] = node.val | |
ret_seq.add(node) | |
else: | |
raise RuntimeError(f"Unexpected node: {node}") | |
return ret_seq | |
class RemoveUselessOps: | |
"""For each set/add operation, find next thing that mutates the cell and check if it's used in between. If not, pop/combine with the next""" | |
def getNextMutate(self, tree: Sequence, index: int) -> int: | |
"""get the next instruction mutating the cell with offset from point""" | |
# offset = tree[index].offset | |
return 0 | |
########### | |
# STARTUP # | |
########### | |
OPT = [ | |
OperationsCombiner(), | |
CompboundGenerator(), | |
OffsetGenerator(), | |
SimpleNopPruner(), | |
SimpleConstantAnalyzer(), | |
] | |
def save_tree(code: Sequence, fname: str): | |
"""save tree into a file""" | |
with open(fname, "w") as file: | |
file.write(str(code)) | |
def run_optimizations(code: Sequence, times=5, need_debug: bool = False) -> Sequence: | |
"""run all optimizations and might save last step diff""" | |
for j in range(times): | |
for i, optimizer in enumerate(OPT): | |
if need_debug and i == len(OPT)-1: | |
save_tree(code, "before") | |
code = optimizer(code) | |
if need_debug: | |
save_tree(code, "after") | |
return code | |
if __name__ == "__main__": | |
import argparse | |
parser = argparse.ArgumentParser() | |
parser.add_argument("filename", help="input brainfuck source file") | |
parser.add_argument("out", nargs="?", default="out.c", | |
help="output file name (default: out.c)") | |
parser.add_argument( | |
"-d", "--diagnose", help="diagnose last optimization step", action="store_true") | |
args = parser.parse_args() | |
mainTree = BFParser().parse_file(args.filename) | |
mainTree = run_optimizations(mainTree, need_debug=args.diagnose) | |
out = CWriter()(mainTree) | |
with open(args.out, "w") as outf: | |
outf.write(out) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment