Skip to content

Instantly share code, notes, and snippets.

@mbillingr
Created September 10, 2019 07:47
Show Gist options
  • Select an option

  • Save mbillingr/c47846d25f3dbb4d7acfde8cee705dbb to your computer and use it in GitHub Desktop.

Select an option

Save mbillingr/c47846d25f3dbb4d7acfde8cee705dbb to your computer and use it in GitHub Desktop.
A toy Mark-and-Sweep garbage collector with some serious issues
use std::collections::HashSet;
fn main() {
let mut gc = Memory::new();
gc.set_base(estimate_stack_pointer_address());
let s = Box::new(Box::new(inner_main(&mut gc))); // Disaster!
gc.collect_stack();
inner_main(&mut gc);
gc.collect_stack();
println!("==============");
println!("{:?}", **s as usize);
println!("==============");
lisp_main(&mut gc);
println!("==============");
gc.collect_stack();
}
#[inline(never)]
fn inner_main(gc: &mut Memory) -> *const TestStruct {
let x = gc.alloc_default::<TestStruct>();
for _ in 0..3 {
let ptr = gc.allocate(42);
println!("{}", ptr as usize);
}
println!("{:?}", x as usize);
return x
}
#[derive(Debug)]
struct TestStruct {
data: String,
}
impl Default for TestStruct {
fn default() -> Self {
TestStruct {
data: "Hello, World!".into(),
}
}
}
#[inline(never)]
fn lisp_main(gc: &mut Memory) {
let x = lisp_run(gc);
gc.collect_stack();
let x = lisp_run(gc);
gc.collect_stack();
let x = lisp_run(gc);
gc.collect_stack();
let x = lisp_run(gc);
//gc.collect_stack();
println!("{:?}", x);
gc.collect_stack();
}
#[inline(never)]
fn lisp_run(gc: &mut Memory) -> Sexp {
use Sexp::*;
let mut x = Eol;
for i in 0..3 {
x = cons(Int(i), x, gc);
}
//x = Eol;
//gc.collect_stack();
x
}
fn cons(car: Sexp, cdr: Sexp, gc: &mut Memory) -> Sexp {
Sexp::Pair(gc.alloc_copy((car, cdr)))
}
#[derive(Copy, Clone)]
enum Sexp {
Eol,
Int(i64),
Pair(*mut (Sexp, Sexp)),
}
impl Drop for TestStruct {
fn drop(&mut self) {
println!("dropping {}: {:?}", self as *const _ as usize, self)
}
}
impl std::fmt::Debug for Sexp {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Sexp::Eol => write!(f, "'()"),
Sexp::Int(i) => write!(f, "{}", i),
Sexp::Pair(ptr) => unsafe {
write!(f, "{:p}: ({:?} . {:?})", *ptr, (**ptr).0, (**ptr).1)
}
}
}
}
struct Memory {
heap: Vec<Chunk>,
stack_base: usize,
}
impl Memory {
fn new() -> Self {
Memory {
heap: vec![],
stack_base: 0,
}
}
fn set_base(&mut self, stack_base: usize) {
self.stack_base = stack_base;
}
fn alloc_default<T: Default + Drop + 'static>(&mut self) -> *mut T {
let data = T::default();
let chunk = Chunk::Drop(std::mem::size_of::<T>(), Box::new(data));
let ptr = chunk.as_ptr();
self.heap.push(chunk);
assert_eq!(ptr, self.heap.last().unwrap().as_ptr());
ptr
}
fn alloc_copy<T: Copy>(&mut self, value: T) -> *mut T {
let ptr = self.allocate(std::mem::size_of::<T>()) as *mut T;
unsafe {
*ptr = value;
}
ptr
}
fn allocate(&mut self, n_bytes: usize) -> *mut u8 {
let chunk = Chunk::new(n_bytes);
let ptr = chunk.as_ptr();
self.heap.push(chunk);
assert_eq!(ptr, self.heap.last().unwrap().as_ptr());
ptr
}
fn collect_stack(&mut self) {
let current_sp = estimate_stack_pointer_address();
let stack_start = current_sp.min(self.stack_base);
let stack_end = current_sp.max(self.stack_base);
// pretend the stack is an array of aligned pointers
let stack_len = (stack_end - stack_start) / std::mem::size_of::<usize>() + 1;
let stack = unsafe { std::slice::from_raw_parts(stack_start as *const usize, stack_len) };
println!("stack: {:?}", stack);
self.collect(stack)
}
fn collect(&mut self, roots: &[usize]) {
let reachable = self.mark(roots);
self.sweep(reachable);
}
fn mark(&mut self, roots: &[usize]) -> HashSet<usize> {
let mut reachable = HashSet::new();
for root in roots {
//println!("{}", root);
mark_chunks(*root, &self.heap, &mut reachable);
}
reachable
}
fn sweep(&mut self, reachable: HashSet<usize>) {
let heap = std::mem::replace(&mut self.heap, vec![]);
self.heap = heap
.into_iter()
.inspect(|chunk| {
if !reachable.contains(&chunk.start_address()) {
println!("sweep: {:p} {}", chunk.start_address() as *const u8, chunk.start_address())
}
})
.filter(|chunk| reachable.contains(&chunk.start_address()))
.collect();
}
}
enum Chunk {
Raw(Vec<u8>),
Drop(usize, Box<dyn Drop>),
}
impl Chunk {
fn new(n_bytes: usize) -> Self {
Chunk::Raw(vec![0; n_bytes])
}
fn as_ptr<T>(&self) -> *mut T {
match self {
Chunk::Raw(bytes) => &bytes[0] as *const u8 as *mut T,
Chunk::Drop(_, data) => &**data as *const _ as *mut T,
}
}
fn start_address(&self) -> usize {
self.as_ptr::<u8>() as usize
}
fn end_address(&self) -> usize {
match self {
Chunk::Raw(bytes) => self.start_address() + bytes.len(),
Chunk::Drop(size, _) => self.start_address() + size,
}
}
fn mark(&self, address: usize, all_chunks: &[Chunk], reachable: &mut HashSet<usize>) {
//println!("{}..{} ? {}", self.start_address(), self.end_address(), address);
if reachable.contains(&self.start_address()) {
return;
}
if address >= self.start_address() && address < self.end_address() {
reachable.insert(self.start_address());
println!("mark: {:p} {}", self.start_address() as *const u8, address);
for sub_addr in self.slice_of_pointers() {
mark_chunks(*sub_addr as usize, all_chunks, reachable);
}
}
}
fn slice_of_pointers(&self) -> &[*const u8] {
let mut start = self.start_address();
let align = std::mem::align_of::<*const u8>();
start += (align - start % align) % align;
let len = self.end_address().saturating_sub(start) / std::mem::size_of::<*const u8>();
unsafe { std::slice::from_raw_parts(start as *const _, len) }
}
}
fn mark_chunks(address: usize, all_chunks: &[Chunk], reachable: &mut HashSet<usize>) {
for chunk in all_chunks {
chunk.mark(address, all_chunks, reachable);
}
}
#[inline(never)]
fn estimate_stack_pointer_address() -> usize {
let mut sp = 0;
sp = &sp as *const _ as usize;
sp
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment