Last active
August 29, 2015 14:00
-
-
Save mraleph/11093692 to your computer and use it in GitHub Desktop.
tl;dr I want to be able to write `obj.td.lookup()` instead of `obj.ptr().td.handle().lookup()` but Rust's Deref trait does not work for that.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// This is an experiment to express within Rust typesystem a handle-system for | |
// a managed runtime similar to ones used internally by V8 and Dart VM. | |
// | |
// In short: managed runtime needs to know about any pointer into its heap. If | |
// implementation language (e.g. C++) does not provide stack walking | |
// capabilities we have to explicitly tell GC about each and every pointer we | |
// are working with so that it can discover and update them when necessary. | |
// | |
extern crate libc; | |
// Tagged pointer into the heap. For simplicity use LSB tagging scheme: | |
// | |
// If LSB is set it's a pointer else it's an integer shifted by 1 (aka smi). | |
// | |
struct TaggedPointer<T> { value: int } | |
impl<T> TaggedPointer<T> { | |
fn isSmi(&self) -> bool { | |
(self.value & 1) == 0 | |
} | |
fn untag<T>(&self) -> &mut T { | |
assert!(!self.isSmi()); | |
unsafe { &mut *std::cast::transmute::<int, *mut T>(self.value - 1) } | |
} | |
} | |
fn tag<T>(ptr: *mut T) -> TaggedPointer<T> { | |
unsafe { | |
TaggedPointer { value: std::cast::transmute::<*mut T, int>(ptr) + 1 } | |
} | |
} | |
// Type used to mark fields that contain known smi's, e.g. string length. | |
struct Smi; | |
impl TaggedPointer<Smi> { | |
fn toInt(&self) -> int { self.value >> 1 } | |
} | |
// -- HEAP OBJECTS ----------------------------------------------------------- | |
// Enumeration describing all possible objects that can be allocated in the | |
// heap. | |
enum InstanceType { | |
TypeDescriptorType, | |
String | |
} | |
// First word of every object allocated in the heap points to | |
// TypeDescriptor for this object. | |
struct HeapObject { | |
td: TaggedPointer<TypeDescriptor> | |
} | |
// TypeDescriptor itself is a HeapObject so it also has a header. | |
// NOTE(mraleph): here I would prefer to either structurally inherit HeapObject | |
// or at least use Go-like anonymous field to allow transparent access to | |
// the header. | |
struct TypeDescriptor { | |
header: HeapObject, | |
instance_type: InstanceType | |
} | |
// This is the header of the String object. In addition to TypeDescriptor | |
// it has length and hash fields. | |
// NOTE(mraleph): here I would like to be able to encode variable length | |
// array that follows: | |
// | |
// struct String { | |
// ..., | |
// data: u8[] | |
// } | |
// | |
struct String { | |
header: HeapObject, | |
length: TaggedPointer<Smi>, | |
hash: TaggedPointer<Smi> | |
} | |
// -- HANDLES ---------------------------------------------------------------- | |
// NOTE(mraleph): Handles are allocated in a region of memory that is known to | |
// the GC and hold a tagged pointer into the heap. There are two ways to | |
// represent that. In V8 handles are a pointer to the location which stores | |
// a pointer into the heap and they are passed everywhere by value: | |
// | |
// struct Handle<T> { location: *mut TaggedPointer<T> } | |
// | |
// Notice `mut` - GC can move the objects so it has to update the pointer hence | |
// mutability. In the same way these is never such a thing as &T or *T for a | |
// heap allocated T - GC can always come and move some object that T is pointing | |
// to (and there is always at least one object T is pointing to - its | |
// TypeDescriptor). | |
// | |
// However trying to use such Handle<T> encounters an issues: first of all it | |
// is impossible to define a meaningful Deref trait for it. Ideally for a given | |
// Handle we want to write code like `a.header.td` without actually explicitly | |
// unwrapping handle and untagging pointer `a`. Unfortunately Deref trait is not | |
// flexible enough for this: | |
// | |
// impl<T> Deref<T> for Handle<T> { | |
// fn deref<'a>(&'a self) -> &'a T { | |
// // Have to return a value of &'a T here which makes no sense. | |
// // Can only be &'a mut T! | |
// } | |
// } | |
// | |
// Here is how the same trait could have looked like if Deref trait were | |
// flexible with its return type | |
// | |
// impl<T> Deref<T> for Handle<T> { | |
// fn deref<'a>(&'a self) -> &'a mut T { | |
// unsafe { (*self.location).untag() } | |
// } | |
// } | |
// | |
// Unfortunately this will not compile. Thus instead of `a.header.td` we have | |
// to write `a.ptr().header.td`. | |
// | |
// Another issue that seems impossible to tackle within Rust type-system is | |
// automatic wrapping of TaggedPointer<T> into Handle<T> prior to dereference. | |
// Ideally we would like to provide implementation like this | |
// | |
// impl<T> Deref<Handle<T>> for TaggedPointer<T> { | |
// fn deref<'a>(&'a self) -> Handle<T> { | |
// wrap_in_handle(*self) | |
// } | |
// } | |
// | |
// But this will not compile because deref has to return &'a not a raw value. | |
// | |
// It is possible to build a slightly different Handle system where one level | |
// of indirection is shifted out of Handle<T> and pass around &mut Root<T> | |
// where Root is just a wrapper around TaggedPointer | |
// | |
// struct Root<T> { ptr: TaggedPtr<T> } | |
// | |
// But while this handle system allows to produce well-typed DerefMut | |
// implementation: | |
// | |
// impl<T> DerefMut<Root<T>> for TaggedPointer<T> { | |
// // We shifted &mut out of Handle just to be able to type | |
// // this function :-( | |
// fn deref_mut<'a>(&'a mut self) -> &'a mut Root<T> { | |
// let root = allocate_root(); | |
// root.ptr = TaggedPointer{value: ptr}; | |
// root | |
// } | |
// } | |
// | |
// fn allocate_root<'a, T>() -> &'a mut Root<T> { | |
// unsafe { | |
// &mut *(libc::malloc(std::mem::size_of::<Root<T>>() as libc::size_t) as *mut Root<T>) | |
// } | |
// } | |
// | |
// It still does not allow to provide any meaningful implementation of Deref. | |
// | |
struct Handle<T> { | |
location: *mut TaggedPointer<T> | |
} | |
impl<T> Handle<T> { | |
// Unwrap handle to get down to the real pointer into the heap. | |
fn ptr(&self) -> &mut T { | |
unsafe { (*self.location).untag() } | |
} | |
fn raw(&self) -> TaggedPointer<T> { | |
unsafe { *self.location } | |
} | |
} | |
// Handle allocation helpers | |
// NOTE(mraleph): in general handles are allocated in some separete scoped data | |
// structure, which goes away as soon as certain scope is left. This is just | |
// a stab to make the code compile and run. | |
// Ideally Handle should have its region-based lifetime encoded into the | |
// type. But Rust does not seem to provide such capabilities. | |
fn allocate_handle<'a, T>() -> Handle<T> { | |
unsafe { | |
Handle { location: libc::malloc(std::mem::size_of::<TaggedPointer<T>>() as libc::size_t) as *mut TaggedPointer<T> } | |
} | |
} | |
fn to_handle<'a, T>(ptr: TaggedPointer<T>) -> Handle<T> { | |
let h = allocate_handle(); | |
unsafe { *h.location = ptr; } | |
h | |
} | |
// Handle coercion helper. | |
impl<T> Handle<T> { | |
fn asH<U>(&self) -> Handle<U> { | |
Handle { location: unsafe { std::cast::transmute(self.location) } } | |
} | |
} | |
// Behavior of heap allocated objects is defined on handles. | |
// Defining this behavior on objects themselves would lead to unsafe code: | |
// | |
// impl TypeDescriptor { | |
// fn lookup(&mut self, name: Handle<String>) -> ~PropertyDesc { | |
// // Self here is a raw pointer! If GC happens to move it | |
// // then the code might crash. | |
// } | |
// } | |
// | |
impl Handle<TypeDescriptor> { | |
fn lookup(&self, name: Handle<String>) -> ~PropertyDesc { | |
/* Just a stub */ | |
println!("looking up in td = {:x}", self.raw().value); | |
~PropertyDesc | |
} | |
} | |
struct PropertyDesc; | |
impl PropertyDesc { | |
fn get(&self, obj: Handle<HeapObject>) -> TaggedPointer<HeapObject> { | |
TaggedPointer { value: 0 } | |
} | |
} | |
fn Get(val: Handle<HeapObject>, name: Handle<String>) -> TaggedPointer<HeapObject> { | |
// NOTE(mraleph): to_handle and ptr() dance looks very ugly :-( | |
let desc = to_handle(val.ptr().td).lookup(name); | |
desc.get(val) | |
} | |
// -- HEAP ALLOCATION (stubs) ------------------------------------------------ | |
fn Allocate<T>() -> TaggedPointer<T> { | |
unsafe { | |
tag(libc::malloc(std::mem::size_of::<T>() as libc::size_t) as *mut T) | |
} | |
} | |
fn AllocateTD() -> TaggedPointer<TypeDescriptor> { | |
let td = to_handle(Allocate::<TypeDescriptor>()); | |
td.raw() | |
} | |
fn AllocateMetaTD() -> TaggedPointer<TypeDescriptor> { | |
let td = to_handle(Allocate::<TypeDescriptor>()); | |
td.ptr().header.td = td.raw(); | |
td.raw() | |
} | |
fn AllocateString() -> TaggedPointer<String> { | |
let s = to_handle(Allocate::<String>()); | |
s.ptr().header.td = AllocateTD(); | |
s.raw() | |
} | |
fn main() { | |
// NOTE(mraleph): writing | |
// | |
// let meta_td = AllocateMetaTD() | |
// | |
// would lead to unsafe code. Unclear how to fix it :-/ | |
let meta_td = to_handle(AllocateMetaTD()); | |
println!("meta_td = {:x}, meta_td.td = {:x}", | |
meta_td.raw().value, | |
meta_td.ptr().header.td.value); | |
// Some meaningless code just to test. | |
Get(meta_td.asH::<HeapObject>(), to_handle(AllocateString())); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment