Last active
January 8, 2021 06:23
-
-
Save rrbutani/64614f4527a29f28eccd9ae5dcadd000 to your computer and use it in GitHub Desktop.
varargs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// runtime-length-varargs.c | |
#include <stddef.h> | |
#include <stdint.h> | |
#include <stdarg.h> | |
#include <stdio.h> | |
// In C: | |
int foo(size_t len, ...) { | |
va_list args; | |
va_start(args, len); | |
printf("len: %zu\n", len); | |
for (int i = 0; i < len; i++) { | |
printf("%d ", va_arg(args, int)); | |
} | |
printf("\n"); | |
return len; | |
} | |
#ifndef __x86_64__ | |
#error "These hacks are x86_64 specific; sorry!" | |
#endif | |
// Note: we're assuming that the target function (`foo`) uses the SYS-V ABI | |
// calling convention. If it does not the below will probably result in a | |
// corrupted stack. | |
__attribute__((noinline)) | |
int abi_crimes(size_t len, const int* buf) { | |
// The SYS-V ABI (for x86_64) will pass int sized args in these registers | |
// before using the stack: | |
// - rdi (used for len) | |
// - rsi | |
// - rdx | |
// - rcx | |
// - r8d | |
// - r9d | |
// I'm sure it's possible to be more clever than this (i.e. don't run the | |
// instructions that clobber registers that we don't need to use) but this | |
// will do for now. | |
long rsi = (len >= 1) ? buf[0] : 0, | |
rdx = (len >= 2) ? buf[1] : 0, | |
rcx = (len >= 3) ? buf[2] : 0, | |
r8 = (len >= 4) ? buf[3] : 0, | |
r9 = (len >= 5) ? buf[4] : 0; | |
const size_t NUM_REG_PASSED_ARGS = 5; | |
// GCC supports VLAs in structs (inside functions)! | |
// | |
// Not that we needed more reasons _not_ to do this but Clang hates this: | |
// » variable length array in structure' extension will never be supported | |
__attribute((packed)) | |
struct stack_frame { | |
// Gotta pad to a multiple of 16 bytes; that means len needs to be | |
// rounded up to the next multiple of 2: | |
// | |
// We subtract 5 from len so that we skip things we're going to pass via | |
// the stack. | |
// | |
// 8 bytes per val for the SYS-V ABI so we use `long`: | |
long arr[((len - NUM_REG_PASSED_ARGS) + 1) & (~1)]; | |
}; | |
// the SYS-V ABI calls for a 16 byte alignment for the stack pointer | |
__attribute__ ((aligned (16))) struct stack_frame frame; | |
for (int i = NUM_REG_PASSED_ARGS; i < len; i++) { | |
frame.arr[i - NUM_REG_PASSED_ARGS] = buf[i]; | |
} | |
uint64_t backup_sp; | |
void* stack_frame_ptr = &frame; | |
int ret; | |
// This is dangerous for a lot of reasons but one of them is that we're | |
// assuming that there's nothing after `frame` on our stack that's of any | |
// value. | |
// | |
// We have no real way to make sure the compiler won't (for example) stick | |
// the local variable `i` after frame on our stack but since we've asked to | |
// never be inlined this is okay; we can guarantee that we will never access | |
// anything on our own stack frame after calling the inner function. | |
asm ( | |
"mov %[len], %%rdi\n\t" | |
"mov %[rsi], %%rsi\n\t" | |
"mov %[rdx], %%rdx\n\t" | |
"mov %[rcx], %%rcx\n\t" | |
"mov %[r8], %%r8\n\t" | |
"mov %[r9], %%r9\n\t" | |
"mov %%rsp, %[backup_sp]\n\t" | |
"mov %[frame], %%rsp\n\t" | |
"call foo\n\t" | |
"mov %[backup_sp], %%rsp\n\t" | |
// backup_sp is early clobber | |
: [backup_sp] "=&r" (backup_sp) | |
, [ret] "=a" (ret) | |
: [len] "g" (len) // we put this in rdi | |
, [frame] "g" (stack_frame_ptr) | |
, [rsi] "g" (rsi) | |
, [rdx] "g" (rdx) | |
, [rcx] "g" (rcx) | |
, [r8] "g" (r8) | |
, [r9] "g" (r9) | |
: "rdi" | |
// From the SYS-V ABI: | |
, "rsi", "rdx", "rcx", "r8", "r9", "r10", "r11" | |
); | |
return ret; | |
} | |
// If you just pass in `struct args` gcc produces code that doesn't do the right | |
// thing for the args that are passed in registers (i.e. the corresponding | |
// registers just aren't set correctly before `foo` is called; instead the | |
// entirety of `args` is put on the stack). | |
// | |
// Not completely sure but I think the SYS-V ABI says that if you try to pass a | |
// struct by value, its fields can be put in registers; the entire thing doesn't | |
// have to be passed at once on the stack (which is what happens here). Maybe it | |
// has something to do with the array? | |
// | |
// Update: seems like it. https://godbolt.org/z/13snMj | |
// | |
// In any case, we just go ahead and handle the first 5 args ourselves. | |
const size_t NUM_REG_PASSED_ARGS = 5; | |
// Very tightly coupled to `less_criminal_but_probably_still_ub` and very likely | |
// to break. | |
__attribute((naked)) | |
// not specifying void in the arg list is intentional; we can't list the actual | |
// args since gcc will (annoyingly) go generate a prelude for them if we do even | |
// though the point of a "naked" function is that it won't | |
int foo_wrapped() { | |
// this is a little wasteful; we could patch the return value and use a | |
// jump here instead of a call but it's fine | |
// | |
// `less_criminal_but_probably_still_ub` pushes 3 registers after the actual | |
// stack params; we've got to save these somewhere: | |
asm( | |
// The pointer to the backup location is _after_ the args. | |
// | |
// So we need to get len, multiply it by 8, add it to 0x18 and the stack | |
// pointer and then the address at _that_ location will be our backup | |
// location. | |
// i.e.: `*(%rsp + 0x18 + 8 * len)` | |
// "mov 0x18(%rsp,%rdi,8), %r10\t\n" | |
// | |
// Actually it's: `*(%rsp + 0x18 + 8 * max((len - 5), 0))` | |
// | |
// len, the first arg, is stored in rdi | |
"xor %%r10, %%r10\t\n" | |
"mov %%rdi, %%rax\t\n" | |
"sub %[N], %%rax\t\n" | |
"cmovs %%r10, %%rax\t\n" // Not sure why the `cmov 0, %rax` form segfaults... | |
"mov 0x18(%%rsp,%%rax,8), %%r10\t\n" | |
// Now we can store the registers: | |
"pop %%rax\n\t" | |
"mov %%rax, 0x00(%%r10)\n\t" | |
"pop %%rax\n\t" | |
"mov %%rax, 0x08(%%r10)\n\t" | |
"pop %%rax\n\t" | |
"mov %%rax, 0x10(%%r10)\n\t" | |
// And then, call foo: | |
"call foo\n\t" | |
// Finally, we want to push those three things back on the stack and | |
// return. | |
// | |
// Since `foo` could have clobbered r10 and definitely clobbered rax | |
// (return value) we need to reconstruct our pointer to the backup | |
// location. | |
// | |
// Note that we can't assume that `foo` did not clobber `rdi` (len) so | |
// we're using the fact that `foo` returns `len` to get it from `rax`. | |
// | |
// We could maybe use the frame pointer so that we don't have to rely on | |
// the function we're passing to returning us the length (but then we | |
// maybe break with -fomit-frame-pointer). | |
"xor %%r10, %%r10\n\t" | |
"mov %%rax, %%rdi\n\t" | |
"sub %[N], %%rdi\n\t" | |
"cmovs %%r10, %%rdi\n\t" | |
"mov 0(%%rsp,%%rdi,8), %%r10\n\t" | |
"mov 0x10(%%r10), %%rdi\n\t" | |
"push %%rdi\n\t" | |
"mov 0x08(%%r10), %%rdi\n\t" | |
"push %%rdi\n\t" | |
"mov 0x00(%%r10), %%rdi\n\t" | |
"push %%rdi\n\t" | |
"ret\n\t" | |
: | |
: [N] "N" (NUM_REG_PASSED_ARGS) | |
); | |
} | |
int less_criminal_but_probably_actually_worse(size_t len, const int* buf) { | |
const size_t NUM_REG_PASSED_ARGS = 5; | |
// `gcc` doesn't subtract the stack pointer by the right amount before | |
// calling `foo`. Maybe for alignment reasons? | |
// | |
// Still seems like a bug though; two garbage params end up being passed to | |
// `foo` on the stack. | |
// | |
// Actually I think it's that the code that gcc emits to copy the stuff to | |
// the stack doesn't communicate with the register allocator; it doesn't | |
// seem to know that storing things to the stack after copying over `args` | |
// (it literally invokes memcpy) will mess up the stack pointer. | |
struct args { | |
size_t buf[(len >= NUM_REG_PASSED_ARGS) ? (len - NUM_REG_PASSED_ARGS) : 0]; | |
size_t* backup; | |
}; | |
size_t backup[3] = { 0xDEADBEEF, 0xDEADBEEF, 0xDEADBEEF }; | |
struct args args; | |
args.backup = backup; | |
for (int i = NUM_REG_PASSED_ARGS; i < len; i++) | |
args.buf[i - NUM_REG_PASSED_ARGS] = buf[i]; | |
return foo_wrapped( | |
len, | |
(len >= 1) ? buf[0] : 0, | |
(len >= 2) ? buf[1] : 0, | |
(len >= 3) ? buf[2] : 0, | |
(len >= 4) ? buf[3] : 0, | |
(len >= 5) ? buf[4] : 0, | |
args | |
); | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// runtime-length-varargs.cc | |
#include <array> | |
#include <cstdint> | |
#include <type_traits> | |
#include <vector> | |
extern "C" int abi_crimes(std::size_t len, const int* buf); | |
extern "C" int less_criminal_but_probably_actually_worse(std::size_t len, const int* buf); | |
template < | |
typename Func, | |
typename Arr | |
// typename std::enable_if_t< | |
// std::is_same< | |
// int, | |
// decltype(std::declval<Arr>()[0]) | |
// >::value | |
// >* = nullptr | |
> | |
inline int foo_forwarder(Func func, const Arr& a) { | |
return func(a.size(), a.data()); | |
} | |
// uses `abi_crimes` | |
template <typename Arr> | |
int foo_1(const Arr& a) { | |
return foo_forwarder(abi_crimes, a); | |
} | |
// uses `less_criminal_but_probably_actually_worse` | |
template <typename Arr> | |
int foo_2(const Arr& a) { | |
return foo_forwarder(less_criminal_but_probably_actually_worse, a); | |
} | |
#define LEN 30 | |
#define foo foo_2 | |
int main() { | |
std::array<int, 12> arr = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; | |
foo(arr); | |
for (int i = 0; i < LEN; i++) { | |
std::vector<int> v (i); | |
for (int j = 0; j < i; j++) v[j] = j; | |
foo(v); | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// If the length is known at compile-time: | |
#include <cassert> | |
#include <cstdarg> | |
#include <iostream> | |
#include <utility> | |
#include <vector> | |
// In C: | |
inline void foo_inner(int len, ...) { | |
va_list args; | |
va_start(args, len); | |
std::cout << "len: " << len << std::endl; | |
for (int i = 0; i < len; i++) { | |
int next = va_arg(args, int); | |
std::cout << next << " "; | |
} | |
std::cout << std::endl; | |
} | |
// Downside is you need wrapper functions: | |
void foo_ten(int arr[10]) { | |
foo_inner(10, arr[0], arr[1], arr[2], arr[3], arr[4], arr[5], arr[6], arr[7], arr[8], arr[9]); | |
} | |
// But we can kind of generate this with some template foo: | |
// (mostly lifted from https://stackoverflow.com/a/34994591) | |
template <std::size_t N, typename F, typename T, std::size_t... Idxes> | |
inline auto splat_inner(F func, const T* arr, std::index_sequence<Idxes...>) { | |
return func(N, arr[Idxes]...); | |
} | |
// for vectors: | |
template <std::size_t N, typename F, typename T> | |
inline auto splat(F func, const std::vector<T>& v) { | |
assert(v.size() >= N); | |
// This is illegal for vector specializations like vector<bool>. | |
return splat_inner<N>(func, v.data(), std::make_index_sequence<N>()); | |
} | |
// for arrays: | |
template <std::size_t N, typename F, typename T> | |
inline auto splat(F func, const T (&arr)[N]) { | |
return splat_inner<N>(func, &arr[0], std::make_index_sequence<N>()); | |
} | |
// Wrapper: | |
template <std::size_t N, typename Arr = int(&)[N]> | |
void foo(const Arr& arr) { | |
return splat<N, decltype(foo_inner), int>(foo_inner, arr); | |
} | |
int main() { | |
foo<4>((int[4]){1, 2, 3, 4}); | |
std::vector<int> v = { 10, 9, 8, 7, 6, 5 }; | |
foo<6>(v); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment