Last active
March 23, 2022 07:09
-
-
Save DmitrySoshnikov/afda459222e96e6002ac to your computer and use it in GitHub Desktop.
Educational Stack-based VM with recursion example
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Educational Stack-based VM. | |
* | |
* See also: | |
* - A simplified example without recursion: https://gist.github.com/DmitrySoshnikov/76e1cfb930df8d36145c | |
* - Register-based VM example: https://gist.github.com/DmitrySoshnikov/6407781 | |
* | |
* by Dmitry Soshnikov <[email protected]> | |
* http://dmitrysoshnikov.com | |
* MIT Stye License (C) 2015 | |
*/ | |
var _stack = []; // main evaluation stack | |
var _pc = 0; // program counter | |
var _regs = {$a: 0, $b: 0, $c: 0, $d: 0}; // generic registers | |
var _zf; // zero flag (used in `cmp` instruction) | |
var $sp = 0; // stack pointer | |
var $fp = 0; // frame pointer | |
var _running = false; // whether the machine is running | |
// Checks whether a value is a user-level register. | |
function isReg(v) { | |
return v[0] === '$'; | |
} | |
// Gets the address of the label, or just returns | |
// the address if it's an actual address. | |
function toAddr(v) { | |
if (typeof v === 'string') { | |
return _labels[v + ':']; | |
} | |
return v; | |
} | |
// --------------------------------------------------------------- | |
// 1. Opcodes (directly in assembly mnemonics for brevity). | |
// | |
// In real machine this would be actual binary codes of | |
// instructions corresponding to the assembly mnemonics. | |
// --------------------------------------------------------------- | |
var ops = { | |
// Stops the execution. | |
halt: function() { | |
_running = false; | |
}, | |
// Moves contents to a register. | |
mov: function(r, v) { | |
_regs[r] = eval(v); | |
}, | |
// Sums two registers, result is in the first one. | |
add: function(r1, r2) { | |
_regs[r1] = _regs[r1] + _regs[r2]; | |
}, | |
// Pushes on the stack advancing the stack pointer. | |
push: function(v) { | |
// If a register is pushed, get its value. | |
if (isReg(v)) { | |
v = _regs[v]; | |
} | |
_stack.push(eval(v)); | |
$sp = _stack.length - 1; | |
}, | |
// Pops a value from top of the stack. If the register | |
// is passed, stores value there. | |
pop: function(r) { | |
var v = _stack.pop(); | |
$sp = _stack.length - 1; | |
// Pop may provide a register to store the value. | |
if (r) { | |
_regs[r] = v; | |
} | |
return v; | |
}, | |
/** | |
* Calls a procedure. | |
* Caller pushes params on the stack, and the | |
* `call` in addition does the routine work by saving | |
* the frame pointer (`$fp`), and saving the return address. | |
* The return value is always passed in the `$a` (accumulator) register. | |
* All together it's an Activation Record (AR, aka a "stack frame"): | |
* | |
* AR: | |
* | |
* ----------- | |
* param1 | |
* param2 Caller's responsibility | |
* ... | |
* paramN | |
* ----------- | |
* fp Callee's responsibility | |
* ret addr (pc + 1) | |
* ----------- | |
*/ | |
call: function(proc) { | |
this.push($fp); // Save caller's frame pointer | |
$fp = $sp - 1; // And set it to proc params | |
this.push(_pc + 1); // Save the return address (follows the `call`) | |
this.jmp(toAddr(proc)); // actual control transfer to procedure | |
}, | |
/** | |
* This version uses "Callee clean-up" convention, meaning the callee | |
* removes all passed params from the stack. The `n` param says how many | |
* params (bytes in real machines) to remove from the stack. As the result | |
* of this instruction the whole AR is popped from the stack, and the control is | |
* transferred back to the caller. | |
* | |
* Notice, since the callee cleans everything up, this convention doesn't | |
* support passing of variant number of params. | |
*/ | |
ret: function(n) { | |
n = n || 0; // number of params (bytes) to pop | |
// Return address is restored. | |
_pc = this.pop(); | |
// Don't advance PC in main cycle since we just changed it. | |
_shouldAdvancePc = false; | |
// Params unwind. | |
while (n > 0) { | |
this.pop(); | |
n--; | |
} | |
// Caller's fp is restored. | |
$fp = this.pop(); | |
}, | |
// Unconditional jump to a specific address. | |
jmp: function(addr) { | |
_pc = addr; | |
// Tell CPU don't advance pc since we just changed it in `jmp`. | |
_shouldAdvancePc = false; | |
}, | |
// Compares two operands and updates `_zf` (zero flag). | |
cmp: function(v1, v2) { | |
v1 = isReg(v1) ? _regs[v1] : eval(v1); | |
v2 = isReg(v2) ? _regs[v2] : eval(v2); | |
// Compare is just updating the zero flag: | |
// We support here only the simplest version: | |
// "equal/not-euqal", without "greater", etc ops. | |
// 1 - operands are equal | |
// 0 - not equal | |
_zf = (v1 === v2) ? 1 : 0; | |
}, | |
// Conditional jump if previously compared operands were equal. | |
je: function(addr) { | |
if (_zf === 1) { | |
this.jmp(toAddr(addr)); | |
} | |
}, | |
// The same as `je`, but if compared operands were not equal. | |
jne: function(addr) { | |
if (_zf === 0) { | |
this.jmp(toAddr(addr)); | |
} | |
}, | |
}; | |
// --------------------------------------------------------------- | |
// 2. Assembler. In real world has two passes: at first collects all labels, | |
// in the second replaces labels with addresses and actually translates | |
// to machine code. Here we only will collect the labels, and remove them | |
// from the program, skipping the translation to byte-code for brevity | |
// (the program runner will work directly with assembly mnemonics) | |
// --------------------------------------------------------------- | |
// A map from label id to address in the program. | |
var _labels = {}; | |
function assemble(program) { | |
var translated = []; | |
var labelsCount = 0; | |
for (var i = 0; i < program.length; i++) { | |
// if it's a label, record the address. | |
if (program[i].slice(-1) === ':') { | |
_labels[program[i]] = i - labelsCount; | |
labelsCount++; | |
continue; | |
} | |
translated.push(program[i]); | |
} | |
return translated; | |
} | |
// --------------------------------------------------------------- | |
// 3. CPU | |
// Main CPU cycle of running the program: "fetch, decode, eval". | |
// --------------------------------------------------------------- | |
var _shouldAdvancePc = true; | |
var CPU = { | |
execute: function(program) { | |
//return; | |
_pc = _labels['main:']; // jump to the entry point of the program | |
_running = true; | |
while (_running) { | |
var instr = program[_pc]; // "fetch!" | |
_shouldAdvancePc = true; | |
// ... | |
// here should be decode step, but we skip it | |
// for brevity since work directly with assembler | |
// ... | |
ops[instr[0]].apply(ops, instr.slice(1)); // "eval!" | |
// If it's not unconditional jump (which modifies the pc itself) | |
// just go to the next instruction. | |
if (_shouldAdvancePc) { | |
_pc++; | |
} | |
} | |
} | |
}; | |
// --------------------------------------------------------------- | |
// 4. Test program | |
// --------------------------------------------------------------- | |
var program = [ | |
// The `sum` function (label). | |
'sum:', | |
['mov', '$a', '_stack[$fp]'], | |
['mov', '$b', '_stack[$fp - 1]'], | |
['add', '$a', '$b'], | |
['cmp', '$a', 40], // on first run `$a` is 30, so this is false | |
['je', '.sumret'], // return from recursive call, when `$a` is 40 | |
['push', '$a'], // otherwise, push again params, | |
['push', '$b'], | |
['call', 'sum'], // and do the recusive `sum` call | |
'.sumret:', | |
['ret', 2], | |
// Main entry point. | |
'main:', | |
// Call the `sum` function. | |
// Params are passed (pushed onto the stack) | |
['push', 10], | |
['push', 20], | |
['call', 'sum'], | |
// Exit. | |
['halt'] | |
]; | |
// Debug output of a program. | |
function dumpProgram(program) { | |
program.forEach(function(instr) { | |
if (Array.isArray(instr)) { // actual instruction | |
console.log(' ', instr[0], instr.slice(1).join(', ')); | |
} else if (instr[0] == '.') { // local label | |
console.log(instr); | |
} else { | |
console.log('\n', instr); // label | |
} | |
}); | |
} | |
// Print the example program. | |
console.log('Execute the program:'); | |
dumpProgram(program); | |
// Run the program. | |
CPU.execute(assemble(program)); | |
// Check the result in the accumulator, 40. | |
console.log('\nAccumulator result ($a):', _regs.$a, '\n'); | |
// Execute the program: | |
// | |
// sum: | |
// mov $a, _stack[$fp] | |
// mov $b, _stack[$fp - 1] | |
// add $a, $b | |
// cmp $a, 40 | |
// je .sumret | |
// push $a | |
// push $b | |
// call sum | |
// .sumret: | |
// ret 2 | |
// | |
// main: | |
// push 10 | |
// push 20 | |
// call sum | |
// halt | |
// | |
// Accumulator result ($a): 40 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment