Skip to content

Instantly share code, notes, and snippets.

@schaumb
Created April 20, 2022 16:05
Show Gist options
  • Select an option

  • Save schaumb/01aa44263df0d752b0617f39d9365c6d to your computer and use it in GitHub Desktop.

Select an option

Save schaumb/01aa44263df0d752b0617f39d9365c6d to your computer and use it in GitHub Desktop.
make_temporary_for_overwrite, make_function_scoped_for_overwrite
#include <type_traits>
#if __has_include(<alloca.h>)
#include <alloca.h>
#else
#define alloca _alloca
#endif
namespace bxlx {
namespace detail {
template <typename T, typename = void>
[[maybe_unused]] constexpr static bool is_complete_v = false;
template <typename T>
[[maybe_unused]] constexpr static bool is_complete_v<T, std::enable_if_t<(sizeof(T) > 0)>> = true;
template<typename T, typename U = std::conditional_t<std::is_array_v<T>, T[1], T>>
class Wrapper;
template<typename T>
class Ptr;
template<typename T>
constexpr std::enable_if_t<is_complete_v<T>, T*> make_temporary_for_overwrite(detail::Wrapper<T>&& v = detail::Wrapper<T>());
template<typename T, typename U = std::conditional_t<std::is_array_v<T>, T[1], T>>
std::enable_if_t<is_complete_v<T> && std::is_trivially_destructible_v<T>, T*> make_function_scoped_for_overwrite(Ptr<T>&& p);
template<typename V>
constexpr static void destroy_at(V* p) {
if constexpr (std::is_array_v<V>)
for (auto &elem : *p)
::bxlx::detail::destroy_at(std::addressof(elem));
else
p->~V();
}
template<typename T, typename U>
class Wrapper {
constexpr inline Wrapper() noexcept = default;
alignas(alignof(U)) unsigned char v[sizeof(U)];
constexpr inline T* createNGet() {
return ::new (v) U;
}
friend constexpr inline std::enable_if_t<is_complete_v<T>, T*> make_temporary_for_overwrite<T>(detail::Wrapper<T>&&);
~Wrapper() {
auto p = std::launder(reinterpret_cast<T*>(v));
::bxlx::detail::destroy_at(p);
::operator delete(p, v);
}
};
template<typename T>
constexpr inline std::enable_if_t<is_complete_v<T>, T*> make_temporary_for_overwrite(detail::Wrapper<T>&& v) {
return v.createNGet();
}
template<typename T>
constexpr std::enable_if_t<!is_complete_v<T>, T*> make_temporary_for_overwrite() = delete;
template<typename T>
class Ptr {
constexpr explicit inline Ptr(T* val) noexcept : val(val) {}
T* val;
template<typename U>
friend inline std::enable_if_t<is_complete_v<U> && std::is_trivially_destructible_v<U>, U*> make_function_scoped_for_overwrite(Ptr<U>&&);
};
template<typename T, typename U>
inline std::enable_if_t<is_complete_v<T> && std::is_trivially_destructible_v<T>, T*> make_function_scoped_for_overwrite(
Ptr<T>&& p = Ptr<T>{new (alloca(sizeof(Wrapper<U>))) U}
) {
return p.val;
}
template<typename T>
constexpr std::enable_if_t<!std::is_trivially_destructible_v<T> || !is_complete_v<T>, T*> make_function_scoped_for_overwrite() = delete;
} // detail
using detail::make_temporary_for_overwrite;
using detail::make_function_scoped_for_overwrite;
} // bxlx
@schaumb

schaumb commented Dec 9, 2025

Copy link
Copy Markdown
Author
#include <type_traits>
#include <new>
#include <cstddef>
#include <memory>
#include <functional>
#include <ranges>
#include <algorithm>

#if __has_include(<alloca.h>)
#include <alloca.h>
#else
#define alloca _alloca
#endif


namespace bxlx {
namespace detail {
    template <typename T, typename = void>
    [[maybe_unused]] constexpr static bool is_complete_v = false;
    template <typename T>
    [[maybe_unused]] constexpr static bool is_complete_v<T, std::enable_if_t<(sizeof(T) > 0)>> = true;

    template<class T>
    struct destroyer {
        __attribute__((always_inline)) static void operator()(T* ptr) 
        {
            std::destroy_at<T>(ptr);
        }
    };

    template<class T>
    struct function_scoped_ptr {
        __attribute__((always_inline)) consteval explicit function_scoped_ptr() noexcept {}
        __attribute__((always_inline)) explicit function_scoped_ptr(T&& ptr) noexcept : ptr{std::addressof(ptr)} {};
        __attribute__((always_inline)) ~function_scoped_ptr() = default;
        function_scoped_ptr(function_scoped_ptr&&) = delete;
        __attribute__((always_inline)) function_scoped_ptr& operator=(function_scoped_ptr&&) noexcept = default; 

        __attribute__((always_inline)) decltype(auto) operator->(this auto&& self) 
        {
            return self.ptr.operator->();
        }

        __attribute__((always_inline)) decltype(auto) operator*(this auto&& self)
        {
            return self.ptr.operator*();
        }

        __attribute__((always_inline)) decltype(auto) get(this auto&& self) {
            return *self;
        }

        __attribute__((always_inline)) operator bool() const noexcept {
            return static_cast<bool>(ptr);
        }

        __attribute__((always_inline)) bool operator!() const noexcept {
            return !ptr;
        }

        std::unique_ptr<T, destroyer<T>> ptr;
    };

    template<class T, std::size_t N>
    struct function_scoped_ptr<T[N]> {
        __attribute__((always_inline)) consteval explicit function_scoped_ptr() noexcept {}

        __attribute__((always_inline)) explicit function_scoped_ptr(T(&& arg)[N]) noexcept
            : ptr{&arg}
            {}

        __attribute__((always_inline)) decltype(auto) begin(this auto&& self) {
            return self.ptr ? std::begin(*self.ptr) : nullptr;
        }

        __attribute__((always_inline)) decltype(auto) end(this auto&& self) {
            return self.ptr ? std::end(*self.ptr) : nullptr;
        }

        __attribute__((always_inline)) decltype(auto) size(this auto&& self) {
            return self.ptr ? N : 0;
        }

        __attribute__((always_inline)) decltype(auto) operator[](this auto&& self, auto&& arg)
        {
            return (*self.ptr)[std::forward<decltype(arg)>(arg)];
        }

        __attribute__((always_inline)) operator bool() const noexcept {
            return static_cast<bool>(ptr);
        }

        __attribute__((always_inline)) bool operator!() const noexcept {
            return !ptr;
        }

        std::unique_ptr<T[N], destroyer<T[N]>> ptr;
    };

    template<class T, std::size_t N>
    function_scoped_ptr(T (&&)[N]) noexcept -> function_scoped_ptr<T[N]>;


    template<class U>
    struct AllocaAddress {
        __attribute__((always_inline)) explicit AllocaAddress(void*&& res = alloca(sizeof(U) + alignof(U)))
            : addr(res) {}

        void* addr;
    };

    template<typename T>
        requires(!std::is_array_v<T> && is_complete_v<T> && std::is_trivially_default_constructible_v<T>)
    [[nodiscard]] __attribute__((always_inline)) inline T&&
        make_function_scoped_for_overwrite(AllocaAddress<T>&& alloca_res = AllocaAddress<T>{}
    ) {
        return std::move(*new (alloca_res.addr) T);
    }

    template<typename T>
        requires(std::is_bounded_array_v<T> && is_complete_v<T> && std::is_trivially_default_constructible_v<std::remove_all_extents_t<T>>)
    [[nodiscard]] __attribute__((always_inline)) inline T&&
        make_function_scoped_for_overwrite(AllocaAddress<T[1]>&& alloca_res = AllocaAddress<T[1]>{}
    ) {
        return std::move(*new (alloca_res.addr) T[1]);
    }

    template<class T, class ...Args>
    struct FirstArg {
        using U = std::conditional_t<std::is_bounded_array_v<T>, T[1], T>;
        using Ret = T&&;

        struct WithArgument {
            __attribute__((always_inline)) static Ret operator()(void* addr, auto&& ... args) {
            if constexpr (std::is_bounded_array_v<T>) {
                    return std::move(*new (addr) T[1]{{std::forward<decltype(args)>(args)...}, });
                } else {
                    return std::move(*new (addr) T(std::forward<decltype(args)>(args)...));
                }
            }
        };

        __attribute__((always_inline)) inline FirstArg(AllocaAddress<U>&& alloca_res = AllocaAddress<U>{})
            : ref{std::bind_front(WithArgument{}, alloca_res.addr)}
            {}

        template<class Arg>
        __attribute__((always_inline)) inline FirstArg(Arg&& arg, AllocaAddress<U>&& alloca_res = AllocaAddress<U>{})
                : ref{std::bind_front(WithArgument{}, alloca_res.addr, arg)}
            {}

        std::move_only_function<Ret(Args&&...)> ref;
    };

    template<typename T, typename ...Args>
        requires(!std::is_unbounded_array_v<T> && is_complete_v<T>)
    [[nodiscard]] __attribute__((always_inline)) inline function_scoped_ptr<T>
        make_function_scoped(
            FirstArg<T, std::type_identity_t<Args>...>&& arg1 = {},
            Args&&...args
    ) {
        return function_scoped_ptr<T>{std::invoke(arg1.ref, std::forward<Args>(args)...) };
    }
} // detail

    // template<typename T>
    // struct function_scoped_ptr;

    // template<typename T, std::size_t N>
    // struct function_scoped_ptr<T[N]>;

    // template<typename T>
    //     requires(!std::is_array_v<T>)
    // T&& make_function_scoped_for_overwrite();

    // template<typename T>
    //     requires(std::is_bounded_array_v<T>)
    // T&& make_function_scoped_for_overwrite();

    // template<typename T, typename ...Args>
    //     requires(!std::is_array_v<T>)
    // function_scoped_ptr<T> make_function_scoped(Args&&...);

    // template<typename T, typename ...Elements>
    //     requires(std::is_bounded_array_v<T>)
    // function_scoped_ptr<T> make_function_scoped(Elements&& ...);

    using detail::function_scoped_ptr;
    using detail::make_function_scoped;
    using detail::make_function_scoped_for_overwrite;

    // DO NOT USE as function return value
    // DO NOT USE IN A FOR LOOP multiple times
} // bxlx

void fun1();
void fun2();

using type = decltype(rand());

struct BigStruct {
    std::array<std::uint64_t, 1000> arr;
    std::array<std::pair<int, int>, 1000> pos;
    std::vector<int> back = std::vector<int>( std::div(::rand(), 100).rem );
    

    ~BigStruct() {
        printf("Deallocated as expected ");
    }
};

int main() {

    // WHY? The following are the same, and simpler.
    // auto&& v1 = bxlx::make_function_scoped_for_overwrite<type>();
    //   == 
    // type v1;


    // auto&& v = bxlx::make_function_scoped<type>(rand());
    //   == 
    // type v{rand()};

    // auto&& arr = bxlx::make_function_scoped<type[4]>(0, 1, 2, 4);
    //   == 
    // type arr[4]{0, 1, 2, 4};
    

    // --> you can move outer scope, inside function scope
    fun1();

    fun2();

    /*
    bxlx::function_scoped_ptr<BigStruct> v;

    // conditionally storing on stack
    if (rand() % 2 == 0) {
        v = bxlx::make_function_scoped<BigStruct>();
    }
    */

    // but why not optional??
    // because optional allocates on the stack too
}

void fun1(){
    bool found{};
    type arr[2048]; // allocating on the stack, (multiple dimension??)

    for (auto c : std::views::iota(std::div(rand(), 10).rem, std::div(rand(), 100).rem)) {

        if (rand() % 2 == 0) {
            // heavy computation big result depends on condition
            // this goes to the (preallocated) stack

            found = true;
            // fill (initialize) arr with result
            for (auto& e : arr) {
                new (&e) type{rand()};
            }
            break;
        }
        // probably multiple cases where different set/ fill strategy
        // probability to not found
    }
    if (found) {
        for (auto& e : arr) {
            printf("%d ", e);
        }
    }
}


void fun2 () {
    bxlx::function_scoped_ptr<type[2048]> ptr; // not allocating from the stack yet

    for (auto c : std::views::iota(std::div(rand(), 10).rem, std::div(rand(), 100).rem)) {

        if (rand() % 2 == 0) {
            // heavy computation big result depends on condition
            // this goes to the stack (no heap allocation)
            type (&& arr)[2048] = bxlx::make_function_scoped_for_overwrite<type[2048]>();
            // fill (initialize) arr with result

            for (auto& e : arr) {
                new (&e) type{rand()};
            }

            // save result to outer scope
            ptr = bxlx::function_scoped_ptr{std::move(arr)};
            break;
        }
        // probably multiple cases where different set/ fill strategy
        // probability to not found
    }
    // not found -> no stack (foreach handles it)
    for (auto& e : ptr) {
        printf("%d ", e);
    }
}
/*

There is a very important difference between fun1 and fun2, and it relates to when and how the large 8KB array is allocated on the stack. 

The core logic of the functions is identical, but fun2 is more efficient with its memory usage. 

Feature 	        fun1 	                            fun2
Array Allocation	Unconditional (at function start)	Conditional
Memory Efficiency	Less efficient (always uses 8KB)	More efficient (uses 8KB only if needed) 
Stack Management	Simple add/sub rsp	                Standard frame pointer (rbp)


In short, fun2 represents a more optimized and safer way to handle large, conditionally-used stack allocations compared to fun1.
*/

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment