Forked from DmitrySoshnikov/writing-a-mark-sweep-garbage-collector.cpp
Last active
November 26, 2023 05:38
-
-
Save agentcooper/425c7eec022fa806f4db537c171cba70 to your computer and use it in GitHub Desktop.
Writing a Mark-Sweep Garbage Collector
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Writing a Mark-Sweep Garbage Collector in C++. | |
* | |
* This is a source code for the Lecture 9 from the | |
* Essentials of Garbage Collectors course: | |
* | |
* http://dmitrysoshnikov.com/courses/essentials-of-garbage-collectors/ | |
* | |
* See full details in: | |
* | |
* http://dmitrysoshnikov.com/compilers/writing-a-mark-sweep-garbage-collector/ | |
* | |
* by Dmitry Soshnikov <[email protected]> | |
* | |
* MIT License (C) 2020 | |
*/ | |
#include <iostream> | |
#include <memory> | |
#include <map> | |
#include <vector> | |
#include <setjmp.h> | |
template <typename... T> void print(const T &...t) { | |
(void)std::initializer_list<int>{(std::cout << t << "", 0)...}; | |
std::cout << "\n"; | |
} | |
struct Node; | |
struct Traceable; | |
struct ObjectHeader; | |
/** | |
* Objects tracing information: allocation pointer to header. | |
*/ | |
static std::map<Traceable *, ObjectHeader> traceInfo; | |
/** | |
* Object header. | |
*/ | |
struct ObjectHeader { | |
bool marked; | |
size_t size; | |
}; | |
template <typename T> void printVector(std::vector<T> const &input) { | |
print("\n{"); | |
for (int i = 0; i < input.size(); i++) { | |
print(" ", input.at(i), ", "); | |
} | |
print("}\n"); | |
} | |
/** | |
* The `Traceable` struct is used as a base class | |
* for any object which should be managed by GC. | |
*/ | |
struct Traceable { | |
ObjectHeader &getHeader() { return traceInfo.at(this); } | |
static void *operator new(size_t size) { | |
// Allocate a block: | |
void *object = ::operator new(size); | |
// Create an object header for it: | |
auto header = ObjectHeader{.marked = false, .size = size}; | |
traceInfo.insert(std::make_pair((Traceable *)object, header)); | |
// Result is the actual block: | |
return object; | |
} | |
virtual ~Traceable(){}; | |
}; | |
/** | |
* Traceable Node structure. | |
* | |
* Contains the name of the node, and the | |
* pointers to left and right sub-nodes, | |
* forming a tree. | |
*/ | |
struct Node : public Traceable { | |
char name; | |
Node *left; | |
Node *right; | |
Node(char name, Node *left = nullptr, Node *right = nullptr) | |
: name(name), left(left), right(right) { | |
print("Constructing Node ", name); | |
} | |
virtual ~Node() { print("Destroying Node ", name); } | |
}; | |
void dump(const char *label) { | |
print("\n------------------------------------------------"); | |
print(label); | |
print("\n{"); | |
for (const auto &it : traceInfo) { | |
auto node = reinterpret_cast<Node *>(it.first); | |
print(" [", node->name, "] ", it.first, ": {.marked = ", it.second.marked, | |
", .size = ", it.second.size, "}, "); | |
} | |
print("}\n"); | |
} | |
/** | |
* Go through object fields, and see if we have any | |
* which are recorded in the `traceInfo`. | |
*/ | |
std::vector<Traceable *> getPointers(Traceable *object) { | |
auto p = (uint8_t *)object; | |
auto end = (p + object->getHeader().size); | |
std::vector<Traceable *> result; | |
while (p < end) { | |
auto address = (Traceable *)*(uintptr_t *)p; | |
if (traceInfo.count(address) != 0) { | |
result.emplace_back(address); | |
} | |
p++; | |
} | |
return result; | |
} | |
/** | |
* Frame pointer. | |
*/ | |
intptr_t *__rbp; | |
/** | |
* Stack pointer. | |
*/ | |
intptr_t *__rsp; | |
/** | |
* Main frame stack begin. | |
*/ | |
intptr_t *__stackBegin; | |
#define __READ_RBP() __asm__ volatile("mov %0, x29" : "=r"(__rbp)) | |
#define __READ_RSP() __asm__ volatile("mov %0, sp" : "=r"(__rsp)) | |
/** | |
* Initializes address of the main frame. | |
*/ | |
void gcInit() { | |
// `main` frame pointer: | |
__READ_RBP(); | |
__stackBegin = (intptr_t *)*__rbp; | |
} | |
/** | |
* Traverses the stacks to obtain the roots. | |
*/ | |
std::vector<Traceable *> getRoots() { | |
std::vector<Traceable *> result; | |
// Some local variables (roots) can be stored in registers. | |
// Use `setjmp` to push them all onto the stack. | |
jmp_buf jb; | |
setjmp(jb); | |
__READ_RSP(); | |
auto rsp = (uint8_t *)__rsp; | |
auto top = (uint8_t *)__stackBegin; | |
while (rsp < top) { | |
auto address = (Traceable *)*(uintptr_t *)rsp; | |
if (traceInfo.count(address) != 0) { | |
result.emplace_back(address); | |
} | |
rsp++; | |
} | |
return result; | |
} | |
/** | |
* Mark phase. | |
*/ | |
void mark() { | |
auto worklist = getRoots(); | |
while (!worklist.empty()) { | |
auto o = worklist.back(); | |
worklist.pop_back(); | |
auto &header = o->getHeader(); | |
if (!header.marked) { | |
header.marked = true; | |
for (const auto &p : getPointers(o)) { | |
worklist.push_back(p); | |
} | |
} | |
} | |
} | |
/** | |
* Sweep phase. | |
*/ | |
void sweep() { | |
auto it = traceInfo.begin(); | |
while (it != traceInfo.end()) { | |
if (it->second.marked) { | |
it->second.marked = false; | |
++it; | |
} else { | |
delete it->first; | |
it = traceInfo.erase(it); | |
} | |
} | |
} | |
/** | |
* Mark-Sweep GC. | |
*/ | |
void gc() { | |
mark(); | |
dump("After mark:"); | |
sweep(); | |
dump("After sweep:"); | |
} | |
/* | |
Graph: | |
A -- Root | |
/ \ | |
B C | |
/ \ | |
D E | |
/ \ | |
F G | |
\ | |
H | |
*/ | |
Node *createGraph() { | |
auto H = new Node('H'); | |
auto G = new Node('G', nullptr, H); | |
auto F = new Node('F'); | |
auto E = new Node('E', F, G); | |
auto D = new Node('D'); | |
auto C = new Node('C', D, E); | |
auto B = new Node('B'); | |
auto A = new Node('A', B, C); | |
return A; // Root | |
} | |
int main(int argc, char const *argv[]) { | |
gcInit(); | |
auto A = createGraph(); | |
// Full alive graph: | |
dump("Allocated graph:"); | |
// Detach the whole right sub-tree: | |
A->right = nullptr; | |
// Run GC: | |
gc(); | |
// Manually destroy remaining stuff | |
delete A->left; | |
delete A; | |
return 0; | |
} | |
/* | |
------------------------------------------------ | |
Allocated graph: | |
{ | |
[H] 0x7fed26401770: {.marked = 0, .size = 24}, | |
[G] 0x7fed264017d0: {.marked = 0, .size = 24}, | |
[F] 0x7fed26401830: {.marked = 0, .size = 24}, | |
[E] 0x7fed26401890: {.marked = 0, .size = 24}, | |
[D] 0x7fed264018f0: {.marked = 0, .size = 24}, | |
[C] 0x7fed26401950: {.marked = 0, .size = 24}, | |
[B] 0x7fed264019b0: {.marked = 0, .size = 24}, | |
[A] 0x7fed26401a10: {.marked = 0, .size = 24}, | |
} | |
------------------------------------------------ | |
After mark: | |
{ | |
[H] 0x7fed26401770: {.marked = 0, .size = 24}, | |
[G] 0x7fed264017d0: {.marked = 0, .size = 24}, | |
[F] 0x7fed26401830: {.marked = 0, .size = 24}, | |
[E] 0x7fed26401890: {.marked = 0, .size = 24}, | |
[D] 0x7fed264018f0: {.marked = 0, .size = 24}, | |
[C] 0x7fed26401950: {.marked = 0, .size = 24}, | |
[B] 0x7fed264019b0: {.marked = 1, .size = 24}, | |
[A] 0x7fed26401a10: {.marked = 1, .size = 24}, | |
} | |
------------------------------------------------ | |
After sweep: | |
{ | |
[B] 0x7fed264019b0: {.marked = 0, .size = 24}, | |
[A] 0x7fed26401a10: {.marked = 0, .size = 24}, | |
} | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This version runs on Apple silicon.
Other changes (see the gist diff):
sweep
traceInfo
map values to be a struct, not a pointer to a structCompile:
Run and verify that there are no leaks:
# leaks utility is a part of Xcode leaks --noContent --atExit -- ./a.out