Last active
March 14, 2024 16:38
-
-
Save aciceri/913aa9667d89af8e2ab45e99e557c2aa to your computer and use it in GitHub Desktop.
Brainfuck to Python bytecode compiler
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
from sys import exit, stdin | |
from argparse import ArgumentParser, RawDescriptionHelpFormatter, FileType | |
from types import CodeType | |
from dis import dis, opmap | |
import marshal | |
from importlib.util import MAGIC_NUMBER | |
from textwrap import dedent | |
cliParser = ArgumentParser(prog='bf2pyc', | |
formatter_class=RawDescriptionHelpFormatter, | |
description=''' | |
____ __ ____ ____ | |
| __ ) / _|___ \| _ \ _ _ ___ | |
| _ \| |_ __) | |_) | | | |/ __| | |
| |_) | _|/ __/| __/| |_| | (__ | |
|____/|_| |_____|_| \__, |\___| | |
|___/ | |
Simple Brainfuck to Python Bytecode compiler. | |
''') | |
cliParser.add_argument('--version', action='version', version='%(prog)s 0.1') | |
cliParser.add_argument('input', | |
default=stdin, # if no file is supplied | |
type=FileType('r'), | |
nargs='?') | |
cliParser.add_argument('--arraysize', | |
action='store', | |
dest='arraysize', | |
metavar='N', | |
type=int, | |
default=30000, | |
help='size of the brainfuck array') | |
cliParser.add_argument('-o', '--output', | |
default='out.pyc', | |
dest='outputfile', | |
type=str, | |
action='store', | |
help='output .pyc file path, if omitted is "out.pyc"') | |
cliParser.add_argument('-s', '--show', | |
dest='show', | |
action='store_true', | |
help='show the compiled bytecode without creating the output .pyc file') | |
args = cliParser.parse_args() | |
source = args.input.read() | |
arraySize = args.arraysize | |
def parse(src): # parse the brainfuck source | |
stack = [] # to remember if inside a [...] | |
endAt = {} #correspondence between brackes [...] | |
for i, char in enumerate(src): | |
if char == '[': | |
stack.append(i) | |
elif char == ']': | |
endAt[stack.pop()] = i | |
def recParse(start=0, end=len(src)-1): # recursive parser | |
ast = [] | |
i = start | |
while i < end: | |
char = src[i] | |
if char == '+': | |
if ast != [] and isinstance(ast[-1], int): | |
ast[-1] = (ast[-1] + 1) % 256 | |
else: | |
ast.append(1) | |
elif char == '-': | |
if ast != [] and isinstance(ast[-1], int): | |
ast[-1] = (ast[-1] - 1) % 256 | |
else: | |
ast.append(255) | |
elif char in ('>', '<', '.', ','): | |
ast.append(char) | |
elif char == '[': | |
ast.append('[') | |
ast.append(recParse(i+1, endAt[i])) | |
ast.append(']') | |
i = endAt[i] | |
i += 1 | |
return ast # return the abstract syntax tree | |
return recParse() | |
def visit(visitor, ast): # depth visit the ast with the visitor function | |
for child in ast: | |
if isinstance(child, list): | |
visit(visitor, child) | |
else: | |
visitor(child) | |
instructions = bytearray([ | |
opmap['LOAD_CONST'], 1, | |
opmap['EXTENDED_ARG'], 1, | |
opmap['LOAD_CONST'], 5, | |
opmap['IMPORT_NAME'], 4, | |
opmap['IMPORT_FROM'], 5, | |
opmap['STORE_FAST'], 2, | |
opmap['POP_TOP'], 0, | |
opmap['LOAD_CONST'], 1, | |
opmap['STORE_FAST'], 1, | |
opmap['LOAD_CONST'], 1, | |
opmap['BUILD_LIST'], 1, | |
opmap['EXTENDED_ARG'], 1, | |
opmap['LOAD_CONST'], 4, | |
opmap['BINARY_MULTIPLY'], 0, | |
opmap['STORE_FAST'], 0, | |
]) | |
addresses = [] # stack with the last "[" instruction address on the top | |
def visitor(x): | |
if x == '.': | |
instructions.extend([ | |
opmap['LOAD_GLOBAL'], 0, | |
opmap['LOAD_GLOBAL'], 3, | |
opmap['LOAD_FAST'], 0, | |
opmap['LOAD_FAST'], 1, | |
opmap['BINARY_SUBSCR'], 0, | |
opmap['CALL_FUNCTION'], 1, | |
opmap['EXTENDED_ARG'], 1, | |
opmap['LOAD_CONST'], 2, | |
opmap['EXTENDED_ARG'], 1, | |
opmap['LOAD_CONST'], 3, | |
opmap['CALL_FUNCTION_KW'], 2, | |
opmap['POP_TOP'], 0, | |
]) | |
elif x == ',': | |
instructions.extend([ | |
opmap['LOAD_GLOBAL'], 2, | |
opmap['LOAD_FAST'], 2, | |
opmap['LOAD_METHOD'], 6, | |
opmap['LOAD_CONST'], 2, | |
opmap['CALL_METHOD'], 1, | |
opmap['CALL_FUNCTION'], 1, | |
opmap['LOAD_FAST'], 0, | |
opmap['LOAD_FAST'], 1, | |
opmap['STORE_SUBSCR'], 0, | |
]) | |
elif isinstance(x, int): | |
instructions.extend([ | |
opmap['LOAD_FAST'], 0, | |
opmap['LOAD_FAST'], 1, | |
opmap['BINARY_SUBSCR'], 0, | |
opmap['EXTENDED_ARG'], ((x+1) >> 8) & 0xff, | |
opmap['LOAD_CONST'], (x+1) & 0xff, | |
opmap['BINARY_ADD'], 0, | |
opmap['EXTENDED_ARG'], 1, | |
opmap['LOAD_CONST'], 1, | |
opmap['BINARY_MODULO'], 0, | |
opmap['LOAD_FAST'], 0, | |
opmap['LOAD_FAST'], 1, | |
opmap['STORE_SUBSCR'], 0, | |
]) | |
elif x == '<': | |
instructions.extend([ | |
opmap['LOAD_FAST'], 1, | |
opmap['LOAD_CONST'], 2, | |
opmap['BINARY_SUBTRACT'], 0, | |
opmap['STORE_FAST'], 1, | |
]) | |
elif x == '>': | |
instructions.extend([ | |
opmap['LOAD_FAST'], 1, | |
opmap['LOAD_CONST'], 2, | |
opmap['BINARY_ADD'], 0, | |
opmap['STORE_FAST'], 1, | |
]) | |
elif x == '[': | |
addresses.append(len(instructions)) | |
instructions.extend([opmap['NOP'], 0] * 6) | |
elif x == ']': | |
jump = addresses.pop() | |
instructions.extend([ | |
opmap['EXTENDED_ARG'], (jump >> 16) & 0xff, | |
opmap['EXTENDED_ARG'], (jump >> 8) & 0xff, | |
opmap['JUMP_ABSOLUTE'], jump & 0xff | |
]) | |
l = len(instructions) | |
instructions[jump:jump+12] = (opmap['LOAD_FAST'], 0, | |
opmap['LOAD_FAST'], 1, | |
opmap['BINARY_SUBSCR'], 0, | |
opmap['EXTENDED_ARG'], (l >> 16) & 0xff, | |
opmap['EXTENDED_ARG'], (l >> 8) & 0xff, | |
opmap['POP_JUMP_IF_FALSE'], l & 0xff) | |
ast = parse(source) | |
visit(visitor, ast) | |
instructions.extend([ # the last instructions for every program | |
opmap['LOAD_CONST'], 0, | |
opmap['RETURN_VALUE'] | |
]) | |
code = CodeType( | |
0, # argcount | |
0, # kwonlyargcount | |
3, # nlocals | |
1000, # stacksize | |
0, # flags | |
bytes(instructions), # codestring | |
(None, *range(257), '', ('end',), arraySize, ('stdin',)), # consts | |
('print', 'input', 'ord', 'chr', 'sys', 'stdin', 'read'), # names | |
('array', 'pointer', 'stdin'), # varnames | |
args.outputfile, # filename | |
args.outputfile, # name | |
0, # firstlineno | |
bytes(), # lnotab | |
(), # freevars | |
() # cellvars | |
) | |
if args.show: | |
print(dis(code)) # show the bytecode in a readable format | |
exit(0) | |
with open(args.outputfile, 'wb+') as out: | |
# printing the first 16 bytes in the file | |
out.write(MAGIC_NUMBER) # this depends on the the Python version | |
out.write(bytes([0] * 12)) # because of the pyc file format | |
marshal.dump(code, out) | |
exit(0) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment