Skip to content

Instantly share code, notes, and snippets.

@bit-hack
Last active August 17, 2018 08:20
Show Gist options
  • Save bit-hack/5f4ee12e52acdc6ceccf52be743ca192 to your computer and use it in GitHub Desktop.
Save bit-hack/5f4ee12e52acdc6ceccf52be743ca192 to your computer and use it in GitHub Desktop.
A small weakly typed virtual machine core in c++
#include <assert.h>
#include <list>
#include <set>
#include <string>
#include <unordered_map>
#include <vector>
// ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
enum object_type_t { e_none, e_integer, e_string, e_boolean, e_array };
// ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
struct object_t {
object_t(object_type_t type) : type(type) {}
virtual ~object_t() {}
template <typename type_t> type_t *cast() {
assert(type_t::TYPE == type);
return static_cast<type_t *>(this);
}
template <typename type_t> const type_t *cast() const {
assert(type_t::TYPE == type);
return static_cast<const type_t *>(this);
}
const object_type_t type;
};
// ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
struct object_integer_t : public object_t {
static const object_type_t TYPE = e_integer;
object_integer_t() : object_t(TYPE) {}
object_integer_t(const int32_t value) : object_t(TYPE), value(value) {}
int32_t value;
};
// ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
struct object_string_t : public object_t {
static const object_type_t TYPE = e_string;
object_string_t() : object_t(TYPE) {}
object_string_t(const std::string &value) : object_t(TYPE), value(value) {}
std::string value;
};
// ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
struct object_boolean_t : public object_t {
static const object_type_t TYPE = e_boolean;
object_boolean_t() : object_t(TYPE) {}
object_boolean_t(const bool value) : object_t(TYPE), value(value) {}
bool value;
};
// ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
struct object_array_t : public object_t {
static const object_type_t TYPE = e_array;
object_array_t() : object_t(TYPE) {}
std::vector<object_t *> value;
};
// ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
enum opcode_type_t {
// binary opcodes
e_add, // A + B
e_sub, // A - B
e_div, // A / B
e_mul, // A * B
e_mod, // A % B
e_and, // A && B
e_or, // A || B
e_lt, // A < B
e_lte, // A <= B
e_eq, // A == B
e_gte, // A >= B
e_gt, // A > B
// e_index, // A[B]
// e_call, // A(..., ..., ...)
// unary opcodes
e_uneg, // -A
e_unot, // !A
// e_uinc, // ++A
// e_udec, // --A
e_yield, // yield timeslice
};
// ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
struct vm_t;
typedef void (*opcode_t)(vm_t &, object_t *, object_t *);
typedef std::pair<object_type_t, object_type_t> object_type_pair_t;
struct object_type_pair_hash_t {
std::size_t operator()(const object_type_pair_t t) const {
const uint32_t a = t.first;
const uint32_t b = t.second;
std::hash<uint32_t> hash;
return hash(a ^ (b << 8)); // fudge
}
};
// ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
struct gc_t {
gc_t(vm_t &vm) : vm(vm) {}
template <class T, class... Types> object_t *alloc(Types &&... args) {
// check allocation derives from object_t
static_assert(std::is_base_of<object_t, T>::value,
"type must derive from object_t");
// perform the allocation and call constructor
object_t *obj = new T(std::forward<Types>(args)...);
// add to 'allocated' list
_allocs.push_back(obj);
// this object is alive until the next collection
_alive.insert(obj);
return obj;
}
void check_in(object_t *obj) { _alive.insert(obj); }
void collect() {
std::set<object_t *> mark;
std::vector<object_t *> sweep;
// insert known alive objects
for (object_t *o : _alive) {
sweep.push_back(o);
}
// mark & sweep all objects
while (!sweep.empty()) {
// pop next item to enumerate
object_t *o = sweep.back();
sweep.pop_back();
// mark it as seen and valid
mark.insert(o);
// expand all children
_enumerate(o, mark, sweep);
}
// delete unreachable objects
_delete_unmarked(mark);
// clear
_alive.clear();
}
protected:
void _delete_unmarked(std::set<object_t *> &mark) {
for (auto itt = _allocs.begin(); itt != _allocs.end();) {
if (mark.count(*itt)) {
++itt;
} else {
// release the object
delete *itt;
itt = _allocs.erase(itt);
}
}
}
void _enumerate(object_t *o, std::set<object_t *> &mark, std::vector<object_t *> &sweep) const {
// enumerate this object if needed
switch (o->type) {
case e_array:
for (object_t *obj: o->cast<object_array_t>()->value)
if (mark.count(obj) == 0) {
sweep.push_back(obj);
}
break;
}
}
vm_t &vm;
std::list<object_t *> _allocs;
std::set<object_t *> _alive;
};
// ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
struct vm_t {
vm_t() : gc(*this) {}
void opcode_add(opcode_type_t type, object_type_pair_t pair, opcode_t op) {
// create a new type pair map if needed
if (!opcodes.count(type)) {
opcodes[type] = new std::unordered_map<object_type_pair_t, opcode_t,
object_type_pair_hash_t>;
}
// insert opcode into type code map
(*opcodes[type])[pair] = op;
}
object_t *object_pop() {
assert(!stack.empty());
object_t *out = stack.back();
stack.pop_back();
return out;
}
void object_push(object_t *obj) { stack.push_back(obj); }
void exec(opcode_type_t op) {
const bool unary = is_unary(op);
auto i = opcodes.find(op);
if (i == opcodes.end()) {
assert("no map for opcode category");
}
assert(i->second);
const auto &map = *i->second;
if (stack.size() < (unary ? 1u : 2u)) {
assert(!"not enough items on the stack");
}
// note: backwards order of pops
object_t *b = unary ? nullptr : object_pop();
object_t *a = object_pop();
assert(a && (b || unary));
const auto pair = object_type_pair_t{a->type, (b ? b->type : e_none)};
auto j = map.find(pair);
if (j == map.end()) {
assert("no opcodes for type pair");
}
// dispatch to opcode handler
assert(j->second);
j->second(*this, a, b);
}
gc_t gc;
protected:
bool is_unary(opcode_type_t op) const {
switch (op) {
case e_uneg:
case e_unot:
return true;
default:
return false;
}
}
bool is_binary(opcode_type_t op) const { return !is_unary(op); }
typedef std::unordered_map<object_type_pair_t, opcode_t,
object_type_pair_hash_t>
type_pair_opcode_map_t;
// the object stack
std::vector<object_t *> stack;
// opcode_type -> {type, type} -> function pointer
std::unordered_map<opcode_type_t, type_pair_opcode_map_t *> opcodes;
};
// ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
void opcode_add_integer_integer(vm_t &vm, object_t *a, object_t *b) {
// integer + integer
const auto *ai = a->cast<object_integer_t>();
const auto *bi = b->cast<object_integer_t>();
const auto sum = ai->value + bi->value;
vm.object_push(vm.gc.alloc<object_integer_t>(sum));
}
void opcode_add_string_integer(vm_t &vm, object_t *a, object_t *b) {
// string + integer
const auto *as = a->cast<object_string_t>();
const auto *bi = b->cast<object_integer_t>();
const std::string sum = as->value + std::to_string(bi->value);
vm.object_push(vm.gc.alloc<object_string_t>(sum));
}
void opcode_add_string_string(vm_t &vm, object_t *a, object_t *b) {
// string + string
const auto *as = a->cast<object_string_t>();
const auto *bs = b->cast<object_string_t>();
const std::string sum = as->value + bs->value;
vm.object_push(vm.gc.alloc<object_string_t>(sum));
}
void opcode_uneg_integer(vm_t &vm, object_t *a, object_t * /* unused */) {
// -integer
const auto *ai = a->cast<object_integer_t>();
vm.object_push(vm.gc.alloc<object_integer_t>(-ai->value));
}
void opcode_unot_boolean(vm_t &vm, object_t *a, object_t * /* unused */) {
// !boolean
const auto *ab = a->cast<object_boolean_t>();
vm.object_push(vm.gc.alloc<object_boolean_t>(!ab->value));
}
void opcode_lt_integer_integer(vm_t &vm, object_t *a, object_t *b) {
// integer < integer
const auto *ai = a->cast<object_integer_t>();
const auto *bi = b->cast<object_integer_t>();
vm.object_push(vm.gc.alloc<object_boolean_t>(ai < bi));
}
void opcode_lte_integer_integer(vm_t &vm, object_t *a, object_t *b) {
// integer <= integer
const auto *ai = a->cast<object_integer_t>();
const auto *bi = b->cast<object_integer_t>();
vm.object_push(vm.gc.alloc<object_boolean_t>(ai <= bi));
}
void opcode_eq_integer_integer(vm_t &vm, object_t *a, object_t *b) {
// integer == integer
const auto *ai = a->cast<object_integer_t>();
const auto *bi = b->cast<object_integer_t>();
vm.object_push(vm.gc.alloc<object_boolean_t>(ai == bi));
}
void opcode_gte_integer_integer(vm_t &vm, object_t *a, object_t *b) {
// integer >= integer
const auto *ai = a->cast<object_integer_t>();
const auto *bi = b->cast<object_integer_t>();
vm.object_push(vm.gc.alloc<object_boolean_t>(ai >= bi));
}
void opcode_gt_integer_integer(vm_t &vm, object_t *a, object_t *b) {
// integer > integer
const auto *ai = a->cast<object_integer_t>();
const auto *bi = b->cast<object_integer_t>();
vm.object_push(vm.gc.alloc<object_boolean_t>(ai > bi));
}
void opcode_eq_string_string(vm_t &vm, object_t *a, object_t *b) {
// string == string
const auto *as = a->cast<object_string_t>();
const auto *bs = b->cast<object_string_t>();
vm.object_push(vm.gc.alloc<object_boolean_t>(as->value == bs->value));
}
void opcode_and_boolean_boolean(vm_t &vm, object_t *a, object_t *b) {
// boolean && boolean
const auto *as = a->cast<object_boolean_t>();
const auto *bs = b->cast<object_boolean_t>();
vm.object_push(vm.gc.alloc<object_boolean_t>(as->value && bs->value));
}
void opcode_or_boolean_boolean(vm_t &vm, object_t *a, object_t *b) {
// boolean || boolean
const auto *as = a->cast<object_boolean_t>();
const auto *bs = b->cast<object_boolean_t>();
vm.object_push(vm.gc.alloc<object_boolean_t>(as->value || bs->value));
}
// ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
void register_opcodes(vm_t &vm) {
vm.opcode_add(e_add, object_type_pair_t{e_integer, e_integer},
opcode_add_integer_integer);
vm.opcode_add(e_add, object_type_pair_t{e_string, e_integer},
opcode_add_string_integer);
vm.opcode_add(e_add, object_type_pair_t{e_string, e_string},
opcode_add_string_string);
vm.opcode_add(e_lt, object_type_pair_t{e_integer, e_integer},
opcode_lt_integer_integer);
vm.opcode_add(e_lte, object_type_pair_t{e_integer, e_integer},
opcode_lte_integer_integer);
vm.opcode_add(e_eq, object_type_pair_t{e_integer, e_integer},
opcode_eq_integer_integer);
vm.opcode_add(e_gte, object_type_pair_t{e_integer, e_integer},
opcode_gte_integer_integer);
vm.opcode_add(e_gt, object_type_pair_t{e_integer, e_integer},
opcode_gt_integer_integer);
vm.opcode_add(e_eq, object_type_pair_t{e_string, e_string},
opcode_eq_string_string);
vm.opcode_add(e_and, object_type_pair_t{e_boolean, e_boolean},
opcode_eq_string_string);
vm.opcode_add(e_or, object_type_pair_t{e_boolean, e_boolean},
opcode_eq_string_string);
// register a unary opcodes
vm.opcode_add(e_uneg, object_type_pair_t{e_integer, e_none},
opcode_uneg_integer);
vm.opcode_add(e_unot, object_type_pair_t{e_boolean, e_none},
opcode_unot_boolean);
}
// ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
int main() {
vm_t vm;
register_opcodes(vm);
{
// push two objects on the stack
vm.object_push(vm.gc.alloc<object_string_t>("Hello World"));
vm.object_push(vm.gc.alloc<object_integer_t>(1234));
// do an add
vm.exec(e_add);
// check the result
const auto *obj = vm.object_pop();
const auto *str = obj->cast<object_string_t>();
printf("%s\n", str->value.c_str());
}
{
// push an object on the stack
vm.object_push(vm.gc.alloc<object_integer_t>(12345));
// unary minus
vm.exec(e_uneg);
// check the result
const auto *obj = vm.object_pop();
const auto *val = obj->cast<object_integer_t>();
printf("%d\n", val->value);
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment