Created
January 11, 2021 21:30
-
-
Save jeshan/b70f334f73ebe7465e8ec40d0ad2cd89 to your computer and use it in GitHub Desktop.
Markus Triska's example interpreter and compiler: Source: https://www.metalevel.at/tist/interp.pl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | |
Interpreter and compiler for a simple imperative language. | |
Written May 2006 by Markus Triska ([email protected]) | |
Public domain code. Tested with Scryer Prolog. | |
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ | |
:- use_module(library(clpz)). | |
:- use_module(library(assoc)). | |
:- use_module(library(pio)). | |
:- use_module(library(dcgs)). | |
:- use_module(library(lists)). | |
:- use_module(library(format)). | |
:- use_module(library(charsio)). | |
% interpreter | |
run(AST) :- | |
env_new(Env), | |
interpret(AST, Env, _). | |
interpret(print(P), Env, Env) :- | |
eval(P, Env, Value), | |
format("~w\n", [Value]). | |
interpret(sequence(A,B), Env0, Env) :- | |
interpret(A, Env0, Env1), | |
( A = return(_) -> | |
Env = Env1 | |
; interpret(B, Env1, Env) | |
). | |
interpret(call(Name, Arg), Env0, Env0) :- | |
eval(Arg, Env0, ArgVal), | |
env_func_body(Env0, Name, ArgName, Body), | |
env_clear_variables(Env0, Env1), | |
env_put_var(ArgName, ArgVal, Env1, Env2), | |
interpret(Body, Env2, _). | |
interpret(function(Name,Arg,Body), Env0, Env) :- | |
env_put_func(Name, Arg, Body, Env0, Env). | |
interpret(if(Cond,Then,Else), Env0, Env) :- | |
eval(Cond, Env0, Value), | |
( Value #\= 0 -> | |
interpret(Then, Env0, Env) | |
; interpret(Else, Env0, Env) | |
). | |
interpret(assign(Var, Expr), Env0, Env) :- | |
eval(Expr, Env0, Value), | |
env_put_var(Var, Value, Env0, Env). | |
interpret(while(Cond, Body), Env0, Env) :- | |
eval(Cond, Env0, Value), | |
( Value #\= 0 -> | |
interpret(Body, Env0, Env1), | |
interpret(while(Cond, Body), Env1, Env) | |
; Env = Env0 | |
). | |
interpret(return(Expr), Env0, Value) :- | |
eval(Expr, Env0, Value). | |
interpret(nop, Env, Env). | |
eval(bin(Op,A,B), Env, Value) :- | |
eval(A, Env, VA), | |
eval(B, Env, VB), | |
eval_(Op, VA, VB, Value). | |
eval(v(V), Env, Value) :- | |
env_get_var(Env, V, Value). | |
eval(n(N), _, N). | |
eval(call(Name, Arg), Env0, Value) :- | |
eval(Arg, Env0, ArgVal), | |
env_func_body(Env0, Name, ArgName, Body), | |
env_clear_variables(Env0, Env1), | |
env_put_var(ArgName, ArgVal, Env1, Env2), | |
interpret(Body, Env2, Value). | |
eval_(+, A, B, V) :- V #= A + B. | |
eval_(-, A, B, V) :- V #= A - B. | |
eval_(*, A, B, V) :- V #= A * B. | |
eval_(/, A, B, V) :- V #= A // B. | |
eval_(=, A, B, V) :- goal_truth(A #= B, V). | |
eval_(>, A, B, V) :- goal_truth(A #> B, V). | |
eval_(<, A, B, V) :- goal_truth(A #< B, V). | |
goal_truth(Goal, V) :- ( Goal -> V = 1 ; V = 0). | |
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |
% access and modify the environment | |
env_new(E-E) :- empty_assoc(E). | |
env_put_func(Name, Arg, Body, Vars0-Funcs0, Vars0-Funcs) :- | |
put_assoc(Name, Funcs0, Arg-Body, Funcs). | |
env_func_body(_-Funcs, Name, ArgName, Body) :- | |
get_assoc(Name, Funcs, ArgName-Body). | |
env_put_var(Name, Value, Vars0-Funcs0, Vars-Funcs0) :- | |
put_assoc(Name, Vars0, Value, Vars). | |
env_get_var(Vars-_, Name, Value) :- get_assoc(Name, Vars, Value). | |
env_clear_variables(_-Funcs0, E-Funcs0) :- empty_assoc(E). | |
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |
% compile AST to virtual machine instructions VMs | |
ast_vminstrs(AST, VMs) :- | |
initial_state(S0), | |
phrase(compilation(AST), [S0], [S]), | |
state_vminstrs(S, VMs). | |
initial_state(s([],[],[],0)). | |
state_vminstrs(s(Is0,Fs,_,_), Is) :- | |
reverse([halt|Is0], Is1), | |
maplist(resolve_calls(Fs), Is1, Is). | |
resolve_calls(Fs, I0, I) :- | |
( I0 = call(Name) -> | |
memberchk(Name-Adr, Fs), | |
I = call(Adr) | |
; I = I0 | |
). | |
state(S), [S] --> [S]. | |
state(S0, S), [S] --> [S0]. | |
current_pc(PC) --> state(s(_,_,_,PC)). | |
vminstr(I) --> | |
state(s(Is,Fs,Vs,PC0), s([I|Is],Fs,Vs,PC)), | |
{ I =.. Ls, | |
length(Ls, L), % length of instruction including arguments | |
PC #= PC0 + L }. | |
start_function(Name, Arg) --> | |
state(s(Is,Fs,_,PC), s(Is,[Name-PC|Fs],[Arg-0],PC)). | |
num_variables(Num) --> | |
state(s(_,_,Vs,_)), | |
{ length(Vs, Num0), | |
Num #= Num0 - 1 }. % don't count parameter | |
variable_offset(Name, Offset) --> | |
state(s(Is,Fs,Vs0,PC), s(Is,Fs,Vs,PC)), | |
{ ( memberchk(Name-Offset, Vs0) -> | |
Vs = Vs0 | |
; Vs0 = [_-Curr|_], | |
Offset #= Curr + 1, | |
Vs = [Name-Offset|Vs0] | |
) }. | |
compilation(nop) --> []. | |
compilation(print(P)) --> | |
compilation(P), | |
vminstr(print). | |
compilation(sequence(A,B)) --> | |
compilation(A), | |
compilation(B). | |
compilation(call(Name,Arg)) --> | |
compilation(Arg), | |
vminstr(call(Name)). | |
compilation(function(Name,Arg,Body)) --> | |
vminstr(jmp(Skip)), | |
start_function(Name, Arg), | |
vminstr(alloc(NumVars)), | |
compilation(Body), | |
num_variables(NumVars), | |
current_pc(Skip). | |
compilation(if(Cond,Then,Else)) --> | |
{ Cond = bin(Op,A,B) }, | |
compilation(A), | |
compilation(B), | |
condition(Op, Adr1), | |
compilation(Then), | |
vminstr(jmp(Adr2)), | |
current_pc(Adr1), | |
compilation(Else), | |
current_pc(Adr2). | |
compilation(assign(Var,Expr)) --> | |
variable_offset(Var, Offset), | |
compilation(Expr), | |
vminstr(pop(Offset)). | |
compilation(while(Cond,Body)) --> | |
current_pc(Head), | |
{ Cond = bin(Op,A,B) }, | |
compilation(A), | |
compilation(B), | |
condition(Op, Break), | |
compilation(Body), | |
vminstr(jmp(Head)), | |
current_pc(Break). | |
compilation(return(Expr)) --> | |
compilation(Expr), | |
vminstr(ret). | |
compilation(bin(Op,A,B)) --> | |
compilation(A), | |
compilation(B), | |
{ op_vminstr(Op, VI) }, | |
vminstr(VI). | |
compilation(n(N)) --> | |
vminstr(pushc(N)). | |
compilation(v(V)) --> | |
variable_offset(V, Offset), | |
vminstr(pushv(Offset)). | |
op_vminstr(+, add). | |
op_vminstr(-, sub). | |
op_vminstr(*, mul). | |
op_vminstr(/, div). | |
condition(=, Adr) --> vminstr(jne(Adr)). | |
condition(<, Adr) --> vminstr(jge(Adr)). | |
condition(>, Adr) --> vminstr(jle(Adr)). | |
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |
% map virtual machine instructions to a list of integer codes | |
vminstrs_ints([]) --> []. | |
vminstrs_ints([I|Is]) --> | |
vminstr_ints(I), | |
vminstrs_ints(Is). | |
vminstr_ints(halt) --> [0]. | |
vminstr_ints(alloc(A)) --> [1,A]. | |
vminstr_ints(pushc(C)) --> [2,C]. | |
vminstr_ints(pushv(V)) --> [3,V]. | |
vminstr_ints(pop(V)) --> [4,V]. | |
vminstr_ints(add) --> [5]. | |
vminstr_ints(sub) --> [6]. | |
vminstr_ints(mul) --> [7]. | |
vminstr_ints(div) --> [8]. | |
vminstr_ints(jmp(Adr)) --> [9,Adr]. | |
vminstr_ints(jne(Adr)) --> [10,Adr]. | |
vminstr_ints(jge(Adr)) --> [11,Adr]. | |
vminstr_ints(jle(Adr)) --> [12,Adr]. | |
vminstr_ints(call(Adr)) --> [13,Adr]. | |
vminstr_ints(print) --> [14]. | |
vminstr_ints(ret) --> [15]. | |
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |
% lexical analysis - split input sequence into tokens | |
tokens(Ts) --> | |
whitespace, | |
tokens(Ts). | |
tokens([T|Ts]) --> | |
tok(T), | |
!, % single solution: longest input match | |
tokens(Ts). | |
tokens([]) --> "". | |
tok('{') --> "{". | |
tok('}') --> "}". | |
tok(';') --> ";". | |
tok(',') --> ",". | |
tok('(') --> "(". | |
tok(')') --> ")". | |
tok(rop(=)) --> "==". | |
tok(rop(<)) --> "<". | |
tok(rop(>)) --> ">". | |
tok(aop(+)) --> "+". | |
tok(aop(-)) --> "-". | |
tok(mop(*)) --> "*". | |
tok(mop(/)) --> "/". | |
tok(=) --> "=". | |
tok(ID_or_KW) --> | |
ident(Cs), | |
{ atom_chars(I, Cs), ( keyword(I) -> ID_or_KW = I ; ID_or_KW = id(I) ) }. | |
tok(num(N)) --> number(Cs), { number_chars(N, Cs) }. | |
ident([C|Cs]) --> letter(C), identr(Cs). | |
identr([C|Cs]) --> letter(C), identr(Cs). | |
identr([C|Cs]) --> digit(C), identr(Cs). | |
identr([]) --> []. | |
number([C|Cs]) --> digit(C), number(Cs). | |
number([C]) --> digit(C). | |
letter(C) --> [C], { char_type(C, alpha) }. | |
digit(C) --> [C], { char_type(C, decimal_digit) }. | |
whitespace --> [C], { char_type(C, whitespace) }. | |
keyword(K) :- memberchk(K, [if,else,while,return,print]). | |
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |
% syntax analysis - generate abstract syntax tree (AST) from tokens | |
tokens_ast(Tokens, AST) :- | |
phrase(program(AST), Tokens). | |
program(nop) --> []. | |
program(P) --> func_or_print(FP), program_r(FP, P). | |
program_r(P, P) --> []. | |
program_r(P0, sequence(P0, P1)) --> func_or_print(FP), program_r(FP, P1). | |
func_or_print(F) --> func(F). | |
func_or_print(print(P)) --> stm(print(P)). | |
func(function(Name,Arg,Body)) --> | |
[id(Name)], ['('], [id(Arg)], [')'], block_(Body). | |
stms(S) --> stm(S1), stmr(S1, S). | |
stms(nop) --> []. | |
stmr(S1, sequence(S1, S)) --> stm(S2), stmr(S2, S). | |
stmr(S, S) --> []. | |
stm(call(Name, Arg)) --> [id(Name)], ['('], exp(Arg), [')'], [';']. | |
stm(assign(Id, E)) --> [id(Id)], ['='], exp(E), [';']. | |
stm(if(Cond,S1,S2)) --> [if], cond(Cond), stm(S1), [else], stm(S2). | |
stm(while(Cond, S)) --> [while], cond(Cond), stm(S). | |
stm(return(E)) --> [return], exp(E), [';']. | |
stm(print(E)) --> [print], exp(E), [';']. | |
stm(S) --> block_(S). | |
stm(nop) --> [';']. | |
block_(S) --> ['{'], stms(S), ['}']. | |
cond(bin(Op,A,B)) --> ['('], exp(A), [rop(Op)], exp(B), [')']. | |
exp(E) --> term(E1), expr(E1, E). | |
expr(E1, E) --> [aop(Op)], term(E2), expr(bin(Op, E1, E2), E). | |
expr(E, E) --> []. | |
term(E) --> factor(E1), termr(E1, E). | |
termr(E1, E) --> [mop(Op)], factor(E2), termr(bin(Op, E1, E2), E). | |
termr(E, E) --> []. | |
factor(n(N)) --> [num(N)]. | |
factor(v(Id)) --> [id(Id)]. | |
factor(call(Name, Arg)) --> [id(Name)], ['('], exp(Arg), [')']. | |
factor(E) --> ['('], exp(E), [')']. | |
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |
% AST type definition | |
is_program(nop). | |
is_program(sequence(A,B)) :- | |
( (A = print(E), is_exp(E)) ; is_function(A) ), | |
is_program(B). | |
is_function(function(Name,Arg,Body)) :- | |
atom(Name), | |
atom(Arg), | |
is_stm(Body). | |
is_stm(print(E)) :- | |
is_exp(E). | |
is_stm(sequence(S1,S2)) :- | |
is_stm(S1), | |
is_stm(S2). | |
is_stm(call(Name, Arg)) :- | |
atom(Name), | |
is_exp(Arg). | |
is_stm(if(Cond,Then,Else)) :- | |
is_exp(Cond), | |
is_stm(Then), | |
is_stm(Else). | |
is_stm(while(Cond,Body)) :- | |
is_exp(Cond), | |
is_stm(Body). | |
is_stm(return(E)) :- | |
is_exp(E). | |
is_stm(nop). | |
is_stm(assign(Id, E)) :- | |
atom(Id), | |
is_exp(E). | |
is_exp(n(N)) :- | |
number(N). | |
is_exp(v(V)) :- | |
atom(V). | |
is_exp(call(Id, E)) :- | |
atom(Id), | |
is_exp(E). | |
is_exp(bin(Op,E1,E2)) :- | |
member(Op, [=,#,>,<,+,-,*,/]), | |
is_exp(E1), | |
is_exp(E2). | |
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |
string_ast(String, AST) :- | |
phrase(tokens(Tokens), String), | |
tokens_ast(Tokens, AST). | |
run_file(File) :- | |
( phrase_from_file(tokens(Tokens), File) -> | |
format("\n\ntokens:\n\n~w\n", [Tokens]), | |
( tokens_ast(Tokens, AST) -> | |
% is_program(AST), % type check | |
format("\nAST:\n\n~w\n", [AST]), | |
ast_vminstrs(AST, VMs), | |
format("\n\nVM code:\n\n", []), | |
foldl(display_vminstr, VMs, 0, _), | |
phrase(vminstrs_ints(VMs), Ints), | |
format("\nintcode:\n\n~w\n\n", [Ints]), | |
format("program output:\n\n", []), | |
run(AST), | |
halt | |
; format("syntax error\n", []) | |
) | |
; format("lexical error", []) | |
). | |
display_vminstr(Cmd, N0, N1) :- | |
format("~t~w~5|: ", [N0]), | |
Cmd =.. Ls, | |
length(Ls, L), | |
( L = 1 -> | |
format("~w\n", Ls) | |
; format("~w ~w\n", Ls) | |
), | |
N1 #= N0 + L. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment