Last active
August 17, 2018 08:20
-
-
Save bit-hack/5f4ee12e52acdc6ceccf52be743ca192 to your computer and use it in GitHub Desktop.
A small weakly typed virtual machine core in c++
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <assert.h> | |
#include <list> | |
#include <set> | |
#include <string> | |
#include <unordered_map> | |
#include <vector> | |
// ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- | |
enum object_type_t { e_none, e_integer, e_string, e_boolean, e_array }; | |
// ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- | |
struct object_t { | |
object_t(object_type_t type) : type(type) {} | |
virtual ~object_t() {} | |
template <typename type_t> type_t *cast() { | |
assert(type_t::TYPE == type); | |
return static_cast<type_t *>(this); | |
} | |
template <typename type_t> const type_t *cast() const { | |
assert(type_t::TYPE == type); | |
return static_cast<const type_t *>(this); | |
} | |
const object_type_t type; | |
}; | |
// ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- | |
struct object_integer_t : public object_t { | |
static const object_type_t TYPE = e_integer; | |
object_integer_t() : object_t(TYPE) {} | |
object_integer_t(const int32_t value) : object_t(TYPE), value(value) {} | |
int32_t value; | |
}; | |
// ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- | |
struct object_string_t : public object_t { | |
static const object_type_t TYPE = e_string; | |
object_string_t() : object_t(TYPE) {} | |
object_string_t(const std::string &value) : object_t(TYPE), value(value) {} | |
std::string value; | |
}; | |
// ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- | |
struct object_boolean_t : public object_t { | |
static const object_type_t TYPE = e_boolean; | |
object_boolean_t() : object_t(TYPE) {} | |
object_boolean_t(const bool value) : object_t(TYPE), value(value) {} | |
bool value; | |
}; | |
// ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- | |
struct object_array_t : public object_t { | |
static const object_type_t TYPE = e_array; | |
object_array_t() : object_t(TYPE) {} | |
std::vector<object_t *> value; | |
}; | |
// ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- | |
enum opcode_type_t { | |
// binary opcodes | |
e_add, // A + B | |
e_sub, // A - B | |
e_div, // A / B | |
e_mul, // A * B | |
e_mod, // A % B | |
e_and, // A && B | |
e_or, // A || B | |
e_lt, // A < B | |
e_lte, // A <= B | |
e_eq, // A == B | |
e_gte, // A >= B | |
e_gt, // A > B | |
// e_index, // A[B] | |
// e_call, // A(..., ..., ...) | |
// unary opcodes | |
e_uneg, // -A | |
e_unot, // !A | |
// e_uinc, // ++A | |
// e_udec, // --A | |
e_yield, // yield timeslice | |
}; | |
// ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- | |
struct vm_t; | |
typedef void (*opcode_t)(vm_t &, object_t *, object_t *); | |
typedef std::pair<object_type_t, object_type_t> object_type_pair_t; | |
struct object_type_pair_hash_t { | |
std::size_t operator()(const object_type_pair_t t) const { | |
const uint32_t a = t.first; | |
const uint32_t b = t.second; | |
std::hash<uint32_t> hash; | |
return hash(a ^ (b << 8)); // fudge | |
} | |
}; | |
// ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- | |
struct gc_t { | |
gc_t(vm_t &vm) : vm(vm) {} | |
template <class T, class... Types> object_t *alloc(Types &&... args) { | |
// check allocation derives from object_t | |
static_assert(std::is_base_of<object_t, T>::value, | |
"type must derive from object_t"); | |
// perform the allocation and call constructor | |
object_t *obj = new T(std::forward<Types>(args)...); | |
// add to 'allocated' list | |
_allocs.push_back(obj); | |
// this object is alive until the next collection | |
_alive.insert(obj); | |
return obj; | |
} | |
void check_in(object_t *obj) { _alive.insert(obj); } | |
void collect() { | |
std::set<object_t *> mark; | |
std::vector<object_t *> sweep; | |
// insert known alive objects | |
for (object_t *o : _alive) { | |
sweep.push_back(o); | |
} | |
// mark & sweep all objects | |
while (!sweep.empty()) { | |
// pop next item to enumerate | |
object_t *o = sweep.back(); | |
sweep.pop_back(); | |
// mark it as seen and valid | |
mark.insert(o); | |
// expand all children | |
_enumerate(o, mark, sweep); | |
} | |
// delete unreachable objects | |
_delete_unmarked(mark); | |
// clear | |
_alive.clear(); | |
} | |
protected: | |
void _delete_unmarked(std::set<object_t *> &mark) { | |
for (auto itt = _allocs.begin(); itt != _allocs.end();) { | |
if (mark.count(*itt)) { | |
++itt; | |
} else { | |
// release the object | |
delete *itt; | |
itt = _allocs.erase(itt); | |
} | |
} | |
} | |
void _enumerate(object_t *o, std::set<object_t *> &mark, std::vector<object_t *> &sweep) const { | |
// enumerate this object if needed | |
switch (o->type) { | |
case e_array: | |
for (object_t *obj: o->cast<object_array_t>()->value) | |
if (mark.count(obj) == 0) { | |
sweep.push_back(obj); | |
} | |
break; | |
} | |
} | |
vm_t &vm; | |
std::list<object_t *> _allocs; | |
std::set<object_t *> _alive; | |
}; | |
// ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- | |
struct vm_t { | |
vm_t() : gc(*this) {} | |
void opcode_add(opcode_type_t type, object_type_pair_t pair, opcode_t op) { | |
// create a new type pair map if needed | |
if (!opcodes.count(type)) { | |
opcodes[type] = new std::unordered_map<object_type_pair_t, opcode_t, | |
object_type_pair_hash_t>; | |
} | |
// insert opcode into type code map | |
(*opcodes[type])[pair] = op; | |
} | |
object_t *object_pop() { | |
assert(!stack.empty()); | |
object_t *out = stack.back(); | |
stack.pop_back(); | |
return out; | |
} | |
void object_push(object_t *obj) { stack.push_back(obj); } | |
void exec(opcode_type_t op) { | |
const bool unary = is_unary(op); | |
auto i = opcodes.find(op); | |
if (i == opcodes.end()) { | |
assert("no map for opcode category"); | |
} | |
assert(i->second); | |
const auto &map = *i->second; | |
if (stack.size() < (unary ? 1u : 2u)) { | |
assert(!"not enough items on the stack"); | |
} | |
// note: backwards order of pops | |
object_t *b = unary ? nullptr : object_pop(); | |
object_t *a = object_pop(); | |
assert(a && (b || unary)); | |
const auto pair = object_type_pair_t{a->type, (b ? b->type : e_none)}; | |
auto j = map.find(pair); | |
if (j == map.end()) { | |
assert("no opcodes for type pair"); | |
} | |
// dispatch to opcode handler | |
assert(j->second); | |
j->second(*this, a, b); | |
} | |
gc_t gc; | |
protected: | |
bool is_unary(opcode_type_t op) const { | |
switch (op) { | |
case e_uneg: | |
case e_unot: | |
return true; | |
default: | |
return false; | |
} | |
} | |
bool is_binary(opcode_type_t op) const { return !is_unary(op); } | |
typedef std::unordered_map<object_type_pair_t, opcode_t, | |
object_type_pair_hash_t> | |
type_pair_opcode_map_t; | |
// the object stack | |
std::vector<object_t *> stack; | |
// opcode_type -> {type, type} -> function pointer | |
std::unordered_map<opcode_type_t, type_pair_opcode_map_t *> opcodes; | |
}; | |
// ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- | |
void opcode_add_integer_integer(vm_t &vm, object_t *a, object_t *b) { | |
// integer + integer | |
const auto *ai = a->cast<object_integer_t>(); | |
const auto *bi = b->cast<object_integer_t>(); | |
const auto sum = ai->value + bi->value; | |
vm.object_push(vm.gc.alloc<object_integer_t>(sum)); | |
} | |
void opcode_add_string_integer(vm_t &vm, object_t *a, object_t *b) { | |
// string + integer | |
const auto *as = a->cast<object_string_t>(); | |
const auto *bi = b->cast<object_integer_t>(); | |
const std::string sum = as->value + std::to_string(bi->value); | |
vm.object_push(vm.gc.alloc<object_string_t>(sum)); | |
} | |
void opcode_add_string_string(vm_t &vm, object_t *a, object_t *b) { | |
// string + string | |
const auto *as = a->cast<object_string_t>(); | |
const auto *bs = b->cast<object_string_t>(); | |
const std::string sum = as->value + bs->value; | |
vm.object_push(vm.gc.alloc<object_string_t>(sum)); | |
} | |
void opcode_uneg_integer(vm_t &vm, object_t *a, object_t * /* unused */) { | |
// -integer | |
const auto *ai = a->cast<object_integer_t>(); | |
vm.object_push(vm.gc.alloc<object_integer_t>(-ai->value)); | |
} | |
void opcode_unot_boolean(vm_t &vm, object_t *a, object_t * /* unused */) { | |
// !boolean | |
const auto *ab = a->cast<object_boolean_t>(); | |
vm.object_push(vm.gc.alloc<object_boolean_t>(!ab->value)); | |
} | |
void opcode_lt_integer_integer(vm_t &vm, object_t *a, object_t *b) { | |
// integer < integer | |
const auto *ai = a->cast<object_integer_t>(); | |
const auto *bi = b->cast<object_integer_t>(); | |
vm.object_push(vm.gc.alloc<object_boolean_t>(ai < bi)); | |
} | |
void opcode_lte_integer_integer(vm_t &vm, object_t *a, object_t *b) { | |
// integer <= integer | |
const auto *ai = a->cast<object_integer_t>(); | |
const auto *bi = b->cast<object_integer_t>(); | |
vm.object_push(vm.gc.alloc<object_boolean_t>(ai <= bi)); | |
} | |
void opcode_eq_integer_integer(vm_t &vm, object_t *a, object_t *b) { | |
// integer == integer | |
const auto *ai = a->cast<object_integer_t>(); | |
const auto *bi = b->cast<object_integer_t>(); | |
vm.object_push(vm.gc.alloc<object_boolean_t>(ai == bi)); | |
} | |
void opcode_gte_integer_integer(vm_t &vm, object_t *a, object_t *b) { | |
// integer >= integer | |
const auto *ai = a->cast<object_integer_t>(); | |
const auto *bi = b->cast<object_integer_t>(); | |
vm.object_push(vm.gc.alloc<object_boolean_t>(ai >= bi)); | |
} | |
void opcode_gt_integer_integer(vm_t &vm, object_t *a, object_t *b) { | |
// integer > integer | |
const auto *ai = a->cast<object_integer_t>(); | |
const auto *bi = b->cast<object_integer_t>(); | |
vm.object_push(vm.gc.alloc<object_boolean_t>(ai > bi)); | |
} | |
void opcode_eq_string_string(vm_t &vm, object_t *a, object_t *b) { | |
// string == string | |
const auto *as = a->cast<object_string_t>(); | |
const auto *bs = b->cast<object_string_t>(); | |
vm.object_push(vm.gc.alloc<object_boolean_t>(as->value == bs->value)); | |
} | |
void opcode_and_boolean_boolean(vm_t &vm, object_t *a, object_t *b) { | |
// boolean && boolean | |
const auto *as = a->cast<object_boolean_t>(); | |
const auto *bs = b->cast<object_boolean_t>(); | |
vm.object_push(vm.gc.alloc<object_boolean_t>(as->value && bs->value)); | |
} | |
void opcode_or_boolean_boolean(vm_t &vm, object_t *a, object_t *b) { | |
// boolean || boolean | |
const auto *as = a->cast<object_boolean_t>(); | |
const auto *bs = b->cast<object_boolean_t>(); | |
vm.object_push(vm.gc.alloc<object_boolean_t>(as->value || bs->value)); | |
} | |
// ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- | |
void register_opcodes(vm_t &vm) { | |
vm.opcode_add(e_add, object_type_pair_t{e_integer, e_integer}, | |
opcode_add_integer_integer); | |
vm.opcode_add(e_add, object_type_pair_t{e_string, e_integer}, | |
opcode_add_string_integer); | |
vm.opcode_add(e_add, object_type_pair_t{e_string, e_string}, | |
opcode_add_string_string); | |
vm.opcode_add(e_lt, object_type_pair_t{e_integer, e_integer}, | |
opcode_lt_integer_integer); | |
vm.opcode_add(e_lte, object_type_pair_t{e_integer, e_integer}, | |
opcode_lte_integer_integer); | |
vm.opcode_add(e_eq, object_type_pair_t{e_integer, e_integer}, | |
opcode_eq_integer_integer); | |
vm.opcode_add(e_gte, object_type_pair_t{e_integer, e_integer}, | |
opcode_gte_integer_integer); | |
vm.opcode_add(e_gt, object_type_pair_t{e_integer, e_integer}, | |
opcode_gt_integer_integer); | |
vm.opcode_add(e_eq, object_type_pair_t{e_string, e_string}, | |
opcode_eq_string_string); | |
vm.opcode_add(e_and, object_type_pair_t{e_boolean, e_boolean}, | |
opcode_eq_string_string); | |
vm.opcode_add(e_or, object_type_pair_t{e_boolean, e_boolean}, | |
opcode_eq_string_string); | |
// register a unary opcodes | |
vm.opcode_add(e_uneg, object_type_pair_t{e_integer, e_none}, | |
opcode_uneg_integer); | |
vm.opcode_add(e_unot, object_type_pair_t{e_boolean, e_none}, | |
opcode_unot_boolean); | |
} | |
// ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- | |
int main() { | |
vm_t vm; | |
register_opcodes(vm); | |
{ | |
// push two objects on the stack | |
vm.object_push(vm.gc.alloc<object_string_t>("Hello World")); | |
vm.object_push(vm.gc.alloc<object_integer_t>(1234)); | |
// do an add | |
vm.exec(e_add); | |
// check the result | |
const auto *obj = vm.object_pop(); | |
const auto *str = obj->cast<object_string_t>(); | |
printf("%s\n", str->value.c_str()); | |
} | |
{ | |
// push an object on the stack | |
vm.object_push(vm.gc.alloc<object_integer_t>(12345)); | |
// unary minus | |
vm.exec(e_uneg); | |
// check the result | |
const auto *obj = vm.object_pop(); | |
const auto *val = obj->cast<object_integer_t>(); | |
printf("%d\n", val->value); | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment