Last active
April 23, 2023 05:58
-
-
Save mbillingr/c2bdca4f618974e7e8d1449aba792b41 to your computer and use it in GitHub Desktop.
Implementation of a minimal Forth interpreter/compiler on top of Python 3.5.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" pyrx | |
Implementation of a minimal Forth interpreter/compiler on top of Python 3.6. | |
It is based on Rx [1] and should eventually support Retro [2]. | |
Facts & Features: | |
- New words are compiled to Python bytecode (subroutine threading model). | |
- Dynamically typed: the only data type is the Python object. | |
- Literals are evaluated as Python expressions. | |
- The data stack is a global Python list. | |
- We also have a Python list as heap. | |
- The dictionary is a global Python dictionary, indexing into the heap. | |
To-do: | |
- Implement missing features (?) | |
- REPL | |
- String handling | |
[1] https://github.com/crcx/retro12/blob/master/literate/Rx.md | |
[2] https://github.com/crcx/retro12/blob/master/literate/RetroForth.md | |
""" | |
import types | |
import dis | |
stack = [] # data stack | |
aux_stack = [] # auxiliary stack for compiling control structures | |
codes = {} # lookup table for code objects | |
dic = {} # word dictionary | |
class Heap(list): | |
def __init__(self): | |
self.labels = {} | |
def set_label(self, name, pos): | |
self.labels[name] = pos | |
def get_label(self, name): | |
return self.labels[name] | |
def add(self, value, name=None): | |
pos = len(self) | |
self.append(value) | |
if name is not None: | |
self.set_label(name, pos) | |
return pos | |
heap = Heap() | |
compilation = heap.add([], 'compilation') | |
last_word = heap.add(None, 'last_word') | |
heaptest = heap.add(135, 'heaptest') | |
# helper functions | |
def set_top(*args): | |
"""Replace values on top of the stack.""" | |
for i, a in enumerate(args[::-1]): | |
stack[-1 - i] = a | |
def constant(value): | |
"""return index of constant and add it to list if necessary""" | |
cmp = heap[compilation][-1] | |
try: | |
return cmp[6].index(value) | |
except ValueError: | |
cmp[6] += (value,) | |
return len(cmp[6]) - 1 | |
def name(value): | |
"""return index of name and add it to list if necessary""" | |
cmp = heap[compilation][-1] | |
try: | |
return cmp[7].index(value) | |
except ValueError: | |
cmp[7] += (value,) | |
return len(cmp[7]) - 1 | |
def compile_opcode(name, arg=0): | |
cmp = heap[compilation][-1] | |
cmp[5] += bytes([dis.opmap[name], arg]) | |
def compile_byte(value): | |
heap[compilation][-1][5] += bytes([value]) | |
# word Classes determine the interpretion/compilation behavior of words | |
def macro(): | |
""" | |
macro: | |
intepret: execute code | |
compile: execute code | |
""" | |
pos = stack.pop() | |
exec(pos) | |
def word(): | |
""" | |
word: | |
intepret: execute code | |
compile: append call to code | |
""" | |
code = stack.pop() | |
if heap[compilation]: | |
cc = constant(id(code)) | |
compile_opcode('DUP_TOP_TWO') | |
compile_opcode('LOAD_CONST', cc) | |
compile_opcode('BINARY_SUBSCR') | |
compile_opcode('CALL_FUNCTION', 1) | |
compile_opcode('POP_TOP') | |
else: | |
exec(code) | |
def literal(): | |
""" | |
Literal: | |
intepret: push value to stack | |
compile: create code that pushes value to stack | |
""" | |
stack.pop() | |
if heap[compilation]: | |
value = eval(stack.pop()) | |
compile_opcode('LOAD_GLOBAL', name('stack')) | |
compile_opcode('LOAD_CONST', constant(value)) | |
compile_opcode('BUILD_LIST', 1) | |
compile_opcode('INPLACE_ADD') | |
compile_opcode('STORE_GLOBAL', name('stack')) | |
else: | |
stack[-1] = eval(stack[-1]) | |
def data(): | |
""" | |
Data: | |
intepret: leave value on stack | |
compile: create code that pushes value to stack | |
""" | |
if heap[compilation]: | |
compile_opcode('LOAD_GLOBAL', name('stack')) | |
compile_opcode('LOAD_CONST', constant(stack.pop())) | |
compile_opcode('BUILD_LIST', 1) | |
compile_opcode('INPLACE_ADD') | |
compile_opcode('STORE_GLOBAL', name('stack')) | |
def compiler(): | |
""" | |
compiler: | |
intepret: throw error | |
compile: execute code | |
""" | |
if heap[compilation]: | |
exec(stack.pop()) | |
def prepare_code(wordname): | |
heap[compilation].append([0, 0, 0, 10, 0, bytes(), (None, -1), (), (), | |
"<pyrx>", wordname, 0, bytes()]) | |
compile_opcode('LOAD_GLOBAL', name('exec')) | |
compile_opcode('LOAD_GLOBAL', name('codes')) | |
def interpret(string): | |
for token in string.split(): | |
prefix = 'prefix:' + token[0] | |
if prefix in dic: | |
entry = dic[prefix] | |
stack.append(token[1:]) | |
elif token in dic: | |
# try to look up token in dictionary | |
entry = dic[token] | |
else: | |
# otherwise evaluate token as a Python expression | |
stack.append(token) | |
entry = dic['prefix:#'] | |
stack.append(heap[entry + D_XT_OFFSET]) | |
eval(heap[entry + D_CLS_OFFSET]) | |
# some words cannot be nicely defined in lambdas | |
def op_finalize_word(): | |
compile_opcode('POP_TOP') # clean stack (codes) | |
compile_opcode('POP_TOP') # clean stack (exec) | |
compile_opcode('LOAD_CONST', constant(None)) | |
compile_opcode('RETURN_VALUE') | |
code = types.CodeType(*heap[compilation].pop()) | |
add_word(code.co_name, code, word) | |
def op_repeat(): | |
aux_stack.append(len(heap[compilation][-1][5])) | |
def op_again(): | |
compile_opcode('JUMP_ABSOLUTE', aux_stack.pop()) | |
def op_0break(): | |
compile_opcode('LOAD_GLOBAL', name('stack')) | |
compile_opcode('LOAD_CONST', constant(-1)) | |
compile_opcode('BINARY_SUBSCR') | |
target = len(heap[compilation][-1][5]) + 12 | |
compile_opcode('POP_JUMP_IF_TRUE', target) | |
compile_opcode('LOAD_GLOBAL', name('stack')) | |
compile_opcode('LOAD_ATTR', name('pop')) | |
compile_opcode('CALL_FUNCTION') | |
compile_opcode('POP_TOP') | |
compile_opcode('RETURN_VALUE') | |
def op_open_quot(): | |
prepare_code('') | |
def op_close_quot(): | |
compile_opcode('POP_TOP') # clean stack (codes) | |
compile_opcode('POP_TOP') # clean stack (exec) | |
compile_opcode('LOAD_CONST', constant(None)) | |
compile_opcode('RETURN_VALUE') | |
code = types.CodeType(*heap[compilation].pop()) | |
codes[id(code)] = code | |
if heap[compilation]: | |
compile_opcode('LOAD_GLOBAL', name('stack')) | |
compile_opcode('LOAD_CONST', constant(code)) | |
compile_opcode('BUILD_LIST', 1) | |
compile_opcode('INPLACE_ADD') | |
compile_opcode('STORE_FAST', name('stack')) | |
else: | |
stack.append(code) | |
def op_if(): | |
code = stack.pop() | |
flag = stack.pop() | |
if flag: | |
exec(code) | |
def op_ifnot(): | |
code = stack.pop() | |
flag = stack.pop() | |
if not flag: | |
exec(code) | |
def op_choice(): | |
code0 = stack.pop() | |
code1 = stack.pop() | |
flag = stack.pop() | |
exec(code1 if flag else code0) | |
def op_deref(): | |
stack.append(heap[dic[stack.pop()] + D_XT_OFFSET]) | |
data() | |
def add_word(name, xt, cls): | |
if isinstance(xt, types.FunctionType): | |
xt = xt.__code__ | |
pos = len(heap) | |
heap.append(heap[last_word]) | |
heap.append(xt) | |
heap.append(cls.__code__) | |
heap.append(name) | |
dic[name] = pos | |
codes[id(xt)] = xt | |
heap[last_word] = pos | |
return pos | |
D_PREV_OFFSET = 0 | |
D_XT_OFFSET = 1 | |
D_CLS_OFFSET = 2 | |
D_NAME_OFFSET = 3 | |
def init_rx(): | |
# compilation | |
add_word(';', op_finalize_word, macro) | |
add_word(',', lambda: compile_byte(stack.pop()), word) | |
# prefixes | |
add_word('prefix::', lambda: prepare_code(stack.pop()), macro) | |
add_word('prefix:#', lambda: None, literal) | |
add_word('prefix:&', op_deref, macro) | |
# stack operations | |
add_word('dup', lambda: stack.append(stack[-1]), word) | |
add_word('drop', lambda: stack.pop(), word) | |
add_word('swap', lambda: set_top(stack[-1], stack[-2]), word) | |
# memory operations | |
add_word('fetch', lambda: set_top(heap[stack[-1]]), word) | |
add_word('store', lambda: heap.__setitem__(stack.pop(), stack.pop()), word) | |
# arithmetic operations | |
add_word('+', lambda: set_top(stack[-2] + stack.pop()), word) | |
add_word('-', lambda: set_top(stack[-2] - stack.pop()), word) | |
add_word('*', lambda: set_top(stack[-2] * stack.pop()), word) | |
add_word('/', lambda: set_top(stack[-2] / stack.pop()), word) | |
# control structures | |
add_word('repeat', op_repeat, compiler) | |
add_word('again', op_again, compiler) | |
add_word('0;', op_0break, compiler) | |
add_word('[', op_open_quot, macro) | |
add_word(']', op_close_quot, macro) | |
add_word('call', lambda: exec(stack.pop()), word) | |
# conditionals | |
add_word('if', op_if, word) | |
add_word('-if', op_ifnot, word) | |
add_word('choose', op_choice, word) | |
# dictionary | |
add_word('d:link', lambda: stack.append(last_word), word) | |
add_word('d:class', lambda: stack.append(stack.pop() + D_CLS_OFFSET), word) | |
add_word('d:xt', lambda: stack.append(stack.pop() + D_XT_OFFSET), word) | |
add_word('d:lookup', lambda: stack.append(dic[stack.pop()]), word) | |
# classes | |
add_word('class:macro', macro, word) | |
add_word('class:word', word, word) | |
add_word('class:data', data, word) | |
# variables | |
add_word('Compiler', compilation, data) | |
add_word('Heaptest', heaptest, data) | |
add_word('Dictionary', last_word, data) | |
# data types | |
add_word('s:to-number', lambda: stack.append(int(stack.pop())), word) | |
def init_retro(): | |
"""build retro 12 words on top of Rx.""" | |
interpret(':d:last &Dictionary fetch ;') | |
interpret(':reclass d:last d:class store ;') | |
interpret(':immediate &class:macro reclass ;') | |
interpret(':data &class:data reclass ;') | |
add_word('depth', lambda: stack.append(len(stack)), word) | |
interpret(':prefix:@ d:lookup d:xt fetch class:data &fetch class:word ; immediate') | |
interpret(':prefix:! d:lookup d:xt fetch class:data &store class:word ; immediate') | |
interpret(':compiling? @Compiler ;') | |
# we inline instruction names instead of opcode numbers | |
interpret(':prefix:` compiling? [ , ] [ drop ] choose ; immediate') | |
# TODO... | |
def assert_stack(*args): | |
assert len(stack) == len(args) | |
for s, a in zip(stack, args): | |
if a != assert_stack.ignore: | |
assert s == a | |
assert_stack.ignore = lambda: None | |
def test_interpret(string, *args): | |
interpret(string) | |
assert_stack(*args) | |
stack.clear() | |
print('passed:', string) | |
init_rx() | |
# No REPL yet, so here are a few usage examples: | |
test_interpret('5', 5) | |
test_interpret('#5', 5) | |
test_interpret(':x #5 ;') | |
test_interpret(':x #5 ; x', 5) | |
test_interpret('1 2 swap', 2, 1) | |
test_interpret('1 2 -', -1) | |
test_interpret('84 2 /', 42) | |
test_interpret(':x 1 ;') | |
test_interpret(':x 1 ; x', 1) | |
test_interpret(':sqr dup * ;') | |
test_interpret('2 3 + sqr', 25) | |
test_interpret('pow(2,3.14)', 2 ** 3.14) | |
test_interpret('"hello" chr(32) "world!" + +', 'hello world!') | |
test_interpret(':0add repeat 0; + swap again ;') | |
test_interpret('False 1 2 3 4 5 0add', 15) | |
test_interpret(':the-answer 42 ;') | |
test_interpret('the-answer the-answer', 42, 42) | |
test_interpret('[ 1 2 3 + * ]', assert_stack.ignore) | |
test_interpret('[ 3 2 1 + * ] call', 9) | |
test_interpret(':nest 3 [ sqr dup sqr swap drop ] call ;') | |
test_interpret('nest', 81) | |
test_interpret('False [ 1 ] -if', 1) | |
test_interpret('True [ 2 ] if', 2) | |
test_interpret('False [ 1 ] if') | |
test_interpret('True [ 2 ] -if') | |
test_interpret('False [ 3 ] [ 4 ] choose', 4) | |
test_interpret('True [ 3 ] [ 4 ] choose', 3) | |
init_retro() | |
# | |
# License | |
# | |
# pyrx is Copyright (c) 2017, Martin Billinger | |
# | |
# Permission to use, copy, modify, and/or distribute this software for any | |
# purpose with or without fee is hereby granted, provided that the above | |
# copyright notice and this permission notice appear in all copies. | |
# | |
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY | |
# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION | |
# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN | |
# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |
# |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment