The content below is a copy-paste-modify-add job on existing content. The original content was written by bakpakin. Errors are likely mine.
(Links to source reference commit 0277187)
struct JanetVM {
/* Top level dynamic bindings */
JanetTable *top_dyns;
/* Cache the core environment */
JanetTable *core_env;
/* How many VM stacks have been entered */
int stackn;
/* If this flag is true, suspend on function calls and backwards jumps.
* When this occurs, this flag will be reset to 0. */
int auto_suspend;
/* The current running fiber on the current thread.
* Set and unset by janet_run. */
JanetFiber *fiber;
JanetFiber *root_fiber;
/* The current pointer to the inner most jmp_buf. The current
* return point for panics. */
jmp_buf *signal_buf;
Janet *return_reg;
/* The global registry for c functions. Used to store meta-data
* along with otherwise bare c function pointers. */
JanetCFunRegistry *registry;
size_t registry_cap;
size_t registry_count;
int registry_dirty;
/* Registry for abstract abstract types that can be marshalled.
* We need this to look up the constructors when unmarshalling. */
JanetTable *abstract_registry;
/* Immutable value cache */
const uint8_t **cache;
uint32_t cache_capacity;
uint32_t cache_count;
uint32_t cache_deleted;
uint8_t gensym_counter[8];
/* Garbage collection */
void *blocks;
size_t gc_interval;
size_t next_collection;
size_t block_count;
int gc_suspend;
/* GC roots */
Janet *roots;
size_t root_count;
size_t root_capacity;
/* Scratch memory */
JanetScratch **scratch_mem;
size_t scratch_cap;
size_t scratch_len;
/* Random number generator */
JanetRNG rng;
/* Traversal pointers */
JanetTraversalNode *traversal;
JanetTraversalNode *traversal_top;
JanetTraversalNode *traversal_base;
...
/* Virtual registers
*
* One instruction word
* CC | BB | AA | OP
* DD | DD | DD | OP
* EE | EE | AA | OP
*/
#define A ((*pc >> 8) & 0xFF)
#define B ((*pc >> 16) & 0xFF)
#define C (*pc >> 24)
#define D (*pc >> 8)
#define E (*pc >> 16)
Each instruction is a 32-bit integer, meaning that the instruction set is a constant-width RISC instruction set like MIPS. The opcode of each instruction is the least significant byte of the instruction. The highest bit of this leading byte is reserved for debugging purpose, so there are 128 possible opcodes encodable with this scheme. Not all of these possible opcodes are defined, and undefined opcodes will trap the interpreter and emit a debug signal. Note that this means an unknown opcode is still valid bytecode, it will just put the interpreter into a debug state when executed.
X - Payload bits
O - Opcode bits
4 3 2 1
+----+----+----+----+
| XX | XX | XX | OO |
+----+----+----+----+
Using 8 bits for the opcode leaves 24 bits for the payload, which may or may not be utilized. There are a few instruction variants that divide these payload bits.
0 arg - Used for noops, returning nil, or other instructions that take no arguments. The payload is essentially ignored.
Bytecode structure - XX | XX | XX | OP
- X (ignored)
Example instructions
JOP_NOOP
--JINT_0
-- (does nothing)JOP_RETURN_NIL
--JINT_0
-- (returning nil)
1 arg - All payload bits correspond to a single value, usually a signed or unsigned integer. Used for instructions of 1 argument, like returning a value, yielding a value to the parent fiber, or doing a (relative) jump.
Bytecode structure - DD | DD | DD | OP
- D (24 bits)
Example instructions
JOP_RETURN
--JINT_S
-- (returning a value)JOP_JMP
--JINT_L
-- (doing a relative jump)
yielding a value to the parent fiber
appears to be a sequence of opcodes, not just one. See yield_asm
in src/core/corelib.c
.
2 arg - Payload is split into byte 2 and bytes 3 and 4. The first argument is the 8-bit value from byte 2, and the second argument is the 16-bit value from bytes 3 and 4 (instruction >> 16). Used for instructions of two arguments, like move, normal function calls, conditionals, etc.
Bytecode structure - EE | EE | AA | OP
- A (8 bits)
- E (16 bits)
Example instructions
JOP_MOVE_FAR
,JOP_MOVE_NEAR
--JINT_SS
-- (move)JOP_CALL
--JINT_SS
-- (normal function call)JOP_JUMP_IF
,JOP_JUMP_IF_*
--JINT_SL
-- (conditionals)
3 arg - Bytes 2, 3, and 4 each correspond to an 8-bit argument. Used for arithmetic operations, emitting a signal, etc.
Bytecode structure - CC | BB | AA | OP
- A (8 bits)
- B (8 bits)
- C (8 bits)
Example instructions
JOP_ADD
,JOP_SUBTRACT
, etc. --JINT_SSS
-- (arithmetic operations)JOP_SIGNAL
--JINT_SSU
-- (emitting a signal)
Further details, see:
- All opcodes for the bytecode interpreter -
JanetOpCode
enum - Various types of instructions -
JanetInstructionType
- Look up table for instructions -
JanetInstructionType
janet_instructions
enum - The Janet Abstract Machine
/* Interpreter state */
register Janet *stack;
register uint32_t *pc;
register JanetFunction *func;
/* A stack slot */
struct JanetSlot {
Janet constant; /* If the slot has a constant value */
int32_t index;
int32_t envindex; /* 0 is local, positive number is an upvalue */
uint32_t flags;
};
/* A stack frame on the fiber. Is stored along with the stack values. */
struct JanetStackFrame {
JanetFunction *func;
uint32_t *pc;
JanetFuncEnv *env;
int32_t prevframe;
int32_t flags;
};
/* A function */
struct JanetFunction {
JanetGCObject gc;
JanetFuncDef *def;
JanetFuncEnv *envs[];
};
/* A function definition. Contains information needed to instantiate closures. */
struct JanetFuncDef {
JanetGCObject gc;
int32_t *environments; /* Which environments to capture from parent. */
Janet *constants;
JanetFuncDef **defs;
uint32_t *bytecode;
uint32_t *closure_bitset; /* Bit set indicating which slots can be referenced by closures. */
/* Various debug information */
JanetSourceMapping *sourcemap;
JanetString source;
JanetString name;
int32_t flags;
int32_t slotcount; /* The amount of stack space required for the function */
int32_t arity; /* Not including varargs */
int32_t min_arity; /* Including varargs */
int32_t max_arity; /* Including varargs */
int32_t constants_length;
int32_t bytecode_length;
int32_t environments_length;
int32_t defs_length;
};
/* A function environment */
struct JanetFuncEnv {
JanetGCObject gc;
union {
JanetFiber *fiber;
Janet *values;
} as;
int32_t length; /* Size of environment */
int32_t offset; /* Stack offset when values still on stack. If offset is <= 0, then
environment is no longer on the stack. */
};
/* A lightweight green thread in janet. Does not correspond to
* operating system threads. */
struct JanetFiber {
JanetGCObject gc; /* GC Object stuff */
int32_t flags; /* More flags */
int32_t frame; /* Index of the stack frame */
int32_t stackstart; /* Beginning of next args */
int32_t stacktop; /* Top of stack. Where values are pushed and popped from. */
int32_t capacity; /* How big is the stack memory */
int32_t maxstack; /* Arbitrary defined limit for stack overflow */
JanetTable *env; /* Dynamic bindings table (usually current environment). */
Janet *data; /* Dynamically resized stack memory */
JanetFiber *child; /* Keep linked list of fibers for restarting pending fibers */
Janet last_value; /* Last returned value from a fiber */
...
};
stack = fiber->data + fiber->frame; \
janet_stack_frame(stack)->pc = pc;
pc = janet_stack_frame(stack)->pc; \
(src/core/vm.c - JOP_CALL | JOP_TAILCALL)
pc = func->def->bytecode;
#define janet_stack_frame(s) ((JanetStackFrame *)((s) - JANET_FRAME_SIZE))
/* Number of Janets a frame takes up in the stack
* Should be constant across architectures */
#define JANET_FRAME_SIZE 4
func = janet_stack_frame(stack)->func;
some vm macros
vm_next
- decode opcode from pc (the LSB of the pc) and start next vm loop iterationvm_pcnext
- update pc to next instruction, thenvm_next()
vm_commit
- save pc to fiber(?)vm_restore
- load values for stack, pc, and func from fiber(?)vm_return
- returnsig
, withval
injanet_vm_return_reg
+vm_commit()
vm_return_no_restore
- returnsig
, withval
injanet_vm_return_reg
run_vm
(vm main loop) is called by:
janet_call
- (can be) called by
peg_rule
(src/core/peg.c) - (can be) called by
janet_method_invoke
(src/core/vm.c)- called by
call_nonfn
(src/core/vm.c) - called by
janet_binop_call
(src/core/vm.c) - (can be) called by
janet_mcall
(src/core/vm.c)
- called by
- (can be) called by
janet_continue_no_check
- (can be) called by
janet_continue
(src/core/vm.c) - (can be) called by
janet_continue_no_signal
(src/core/vm.c) - (can be) called by
run_vm
when handlingJOP_RESUME
(src/core/vm.c)
- (can be) called by
A Janet fiber is the type used to represent multiple concurrent processes in Janet. It is basically a wrapper around the idea of a stack. The stack is divided into a number of stack frames (JanetStackFrame * in C), each of which contains information such as the function that created the stack frame, the program counter for the stack frame, a pointer to the previous frame, and the size of the frame. Each stack frame also is paired with a number of registers.
X: Slot
X
X - Stack Top, for next function call.
-----
Frame next
-----
X
X
X
X
X
X
X - Stack 0
-----
Frame 0
-----
X
X
X - Stack -1
-----
Frame -1
-----
X
X
X
X
X - Stack -2
-----
Frame -2
-----
...
...
...
-----
Bottom of stack
Fibers also have an incomplete stack frame for the next function call on top of their stacks. Making a function call involves pushing arguments to this temporary stack, and then invoking either the CALL or TCALL instructions. Arguments for the next function call are pushed via the PUSH, PUSH2, PUSH3, and PUSHA instructions.
An example of this might be seen in the implementation of do_apply
in src/core/cfuns.c
.
The stack of a fiber will grow as large as needed, although by default Janet will limit the maximum size of a fiber's stack. The maximum stack size can be modified on a per-fiber basis.
The slots in the stack are exposed as virtual registers to instructions. They can hold any Janet value.
The slots in the stack are exposed as virtual registers to instructions. They can hold any Janet value.
The '$' prefix indicates that an instruction parameter is acting as a virtual register (slot).
Instruction | Signature | Description |
---|---|---|
add | (add dest lhs rhs) | $dest = $lhs + $rhs |
bapakin: Janet's compiler uses a bitset of live registers, called slots (see regalloc.c) each slot is just an index into the stackframe, and can be any non-negative integer up to 0xFFFF
bakpakin: most instructions expect slots only up to 255, so there is a difference between "short" slots, and "long" slots. The register allocator leaves a couple slots under 256 always free so we can use JOP_MOVENEAR and JOP_MOVEFAR to handle register spilling. The slots are freed when the variable they represent goes out of lexcial scope, so it does a crude form of liveness analysis
andrewchambers: so when you call a function the frame is allocated at the max slot size?
bakpakin: Yep
andrewchambers: I guess the short slots are more like traditional registers
bakpakin: Exactly. i think near and far are the terms used in the code more
bakbakin: As for how much pressure there is on the slots, it seems nothing in the core library uses more than 50 slots, and most functions use less than 10.
andrewchambers: A fiber has its own stack which has variable slots. that is where function local vars and defs go. and there is the constant table which is part of the function.
andrewchambers: ldc 0 0 means load constant 0 into slot 0 ret 0 means return slot 0
Instruction | Signature | Description |
---|---|---|
ldc | (ldc dest index) | $dest = constants[index] |
andrewchambers: slots are part of the fiber
Janet bytecode operates on an array of identical registers that can hold any Janet value (
Janet *
in C). Most instructions have a destination register, and 1 or 2 source registers. Registers are simply indices into the stack frame, which can be thought of as a constant sized array.
/* Get a register less than 256 for temporary use. */
int32_t janetc_allocnear(...
/* Convert a slot to a temporary 1 byte register */
static int32_t janetc_regnear(...
/* Convert a slot to a two byte register */
static int32_t janetc_regfar(...
/* Fiber signals */
typedef enum {
JANET_SIGNAL_OK,
JANET_SIGNAL_ERROR,
JANET_SIGNAL_DEBUG,
JANET_SIGNAL_YIELD,
JANET_SIGNAL_USER0,
JANET_SIGNAL_USER1,
JANET_SIGNAL_USER2,
JANET_SIGNAL_USER3,
JANET_SIGNAL_USER4,
JANET_SIGNAL_USER5,
JANET_SIGNAL_USER6,
JANET_SIGNAL_USER7,
JANET_SIGNAL_USER8,
JANET_SIGNAL_USER9
} JanetSignal;
/* Interpreter main loop */
static JanetSignal run_vm(JanetFiber *fiber, Janet in) {
- The Janet Abstract Machine
- The Implementation of Lua 5.0
- Interesting things about the Lua interpreter
- Crafting Interpreters - Chapter 15 Design Note: Register-Based Bytecode
- Why Register VM and not Stack VM?
- Register allocation page at Wikipedia
- Stack Based vs Register Based Virtual Machine Architecture...
- Virtual Machines for Abstraction...
- Language Implementation Patterns