Let's compare Clang and Cuik/TB a bit to get a picture of what i need to do.
Given this C code:
void max_array(size_t n, float* x, float* y) {
for (size_t i = 0; i < n; i++) {
x[i] = x[i] > y[i] ? x[i] : y[i];
}
}Let's compare Clang and Cuik/TB a bit to get a picture of what i need to do.
Given this C code:
void max_array(size_t n, float* x, float* y) {
for (size_t i = 0; i < n; i++) {
x[i] = x[i] > y[i] ? x[i] : y[i];
}
}| // We do a little too much trolling... | |
| // | |
| // Ever wanted to pretend divisions by zero just didn't happen? here you go... | |
| // this is memes, don't try to make this work with C because the optimizer will | |
| // fight you on it. You can apply it to your own language if you really wanted. | |
| #include <stdint.h> | |
| #include <stdio.h> | |
| #define WIN32_LEAN_AND_MEAN | |
| #include <windows.h> |
| // Compile with clang or MSVC (WINDOWS ONLY RN) | |
| // | |
| // Implementing a POC green threads system using safepoints to show how cheap and simple it can | |
| // be done, all you need to do is call SAFEPOINT_POLL in your own language at the top of every | |
| // loop and function body (you can loosen up on this depending on the latency of pausing you're | |
| // willing to pay). Safepoint polling is made cheap because it's a load without a use site | |
| // which means it doesn't introduce a stall and pays a sub-cycle cost because of it (wastes resources | |
| // sure but doesn't block up the rest of execution). | |
| // | |
| // # safepoint poll |
during IPO (interprocedural optimizations) you wanna have some cases around specializing a function without the early exits, they should be hoisted outside of the function and inlining decisions should be made once that's done.
void foo(int* ptr) {
if (ptr != NULL) {
...
}
}I've asked Cliff Click about the caveats after a pause, he's mentioned before that the OS will give you incorrect data:
"after a pause" has a few meanings...
| #define WIN32_LEAN_AND_MEAN | |
| #include <windows.h> | |
| #include <stdlib.h> | |
| #include <stdint.h> | |
| #include <stdio.h> | |
| typedef struct __attribute__((__packed__)) { | |
| // lea RCX, [next] | |
| // or lea RDI, [next] | |
| uint8_t rex; // 0x48 |
| Complete stuff: | |
| https://xmonader.github.io/letsbuildacompiler-pretty/ | |
| Lexers + DFAs: | |
| https://gist.github.com/pervognsen/218ea17743e1442e59bb60d29b1aa725 | |
| Parsing: | |
| https://eli.thegreenplace.net/2012/08/02/parsing-expressions-by-precedence-climbing | |
| Backend: |
This is a trick mentioned by Cliff Click from his time at Azul, if you've got a stack you've got a cheap thread local buffer.
// 2MiB aligned 2MiB stack (size of a large page on x86)
enum { STACK_SIZE = 2*1024*1024 };
char* stack = mmap(NULL, STACK_SIZE, PROT_READ | PROT_WRITE, MAP_HUGE_2MB | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);because now we can store thread locals at the base and always locate the base by chopping off bits
mov rax, rsp
and rax, -0x200000
| inspect = require "inspect" | |
| function sm_parse(source) | |
| local states = {} | |
| for line in source:gmatch("[^\r\n]+") do | |
| local tokens = {} | |
| for w in line:gmatch("[^%s]+") do tokens[#tokens + 1] = w end | |
| -- yell at the user about missing arrows |
| #define _CRT_SECURE_NO_WARNINGS | |
| #include <stdlib.h> | |
| #include <stdio.h> | |
| #include <stdint.h> | |
| #include <stddef.h> | |
| #include <string.h> | |
| #include <stdbool.h> | |
| #include <time.h> | |
| #define WIN32_LEAN_AND_MEAN |