Skip to content

Instantly share code, notes, and snippets.

@SingleAccretion
Last active September 7, 2025 11:21
Show Gist options
  • Save SingleAccretion/b83da9cf9b17d7968ab4e508262feca3 to your computer and use it in GitHub Desktop.
Save SingleAccretion/b83da9cf9b17d7968ab4e508262feca3 to your computer and use it in GitHub Desktop.
[NativeAOT-LLVM] WASI SDK malloc profiling with stack traces

!!!WARNING!!! FOR DEMOSTRATION PURPOSES ONLY !!!WARNING!!!

The idea is to leverage the fact the linker will NOT error out with duplicate symbol definition if it doesn't need to touch the libc's dlmalloc.o.

Csproj:

<Project Sdk="Microsoft.NET.Sdk">
  <PropertyGroup>
    <OutputType>exe</OutputType>
    <TargetFramework>net10.0</TargetFramework>
    <EmitCompilerGeneratedFiles>true</EmitCompilerGeneratedFiles>

    <PublishTrimmed>true</PublishTrimmed>
    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
    <MSBuildEnableWorkloadResolver>false</MSBuildEnableWorkloadResolver>
  </PropertyGroup>

  <ItemGroup>
    <NativeCodeFile Include="dlmalloc.c" />
  </ItemGroup>

  <ItemGroup>
    <PackageReference Include="Microsoft.DotNet.ILCompiler.LLVM" Version="10.0.0-*" />
    <PackageReference Include="runtime.win-x64.Microsoft.DotNet.ILCompiler.LLVM" Version="10.0.0-*" />
  </ItemGroup>

  <Target Name="CompileNativeCode" Inputs="@(NativeCodeFile)" Outputs="@(NativeCodeFile->'%(Filename).o')" BeforeTargets="IlcCompile">
    <Exec Command="$(WASI_SDK_PATH)/bin/clang -c @(NativeCodeFile) -o %(Filename).o" />
    <ItemGroup>
      <NativeLibrary Include="@(NativeCodeFile->'%(Filename).o')" />
    </ItemGroup>
  </Target>
</Project>

dlmalloc.c (from https://github.com/WebAssembly/wasi-libc/blob/main/dlmalloc/src/dlmalloc.c):

// This file is a wrapper around malloc.c, which is the upstream source file.
// It sets configuration flags and controls which symbols are exported.

#include <stddef.h>
#include <stdio.h>
#include <malloc.h>

// Define configuration macros for dlmalloc.

// WebAssembly doesn't have mmap-style memory allocation.
#define HAVE_MMAP 0

// WebAssembly doesn't support shrinking linear memory.
#define MORECORE_CANNOT_TRIM 1

// Disable sanity checks to reduce code size.
#define ABORT __builtin_unreachable()

// If threads are enabled, enable support for threads.
#ifdef _REENTRANT
#define USE_LOCKS 1
#endif

// Make malloc deterministic.
#define LACKS_TIME_H 1

// Disable malloc statistics generation to reduce code size.
#define NO_MALLINFO 1
#define NO_MALLOC_STATS 1

// Align malloc regions to 16, to avoid unaligned SIMD accesses.
#define MALLOC_ALIGNMENT 16

// Declare errno values used by dlmalloc. We define them like this to avoid
// putting specific errno values in the ABI.
extern const int __ENOMEM;
#define ENOMEM __ENOMEM
extern const int __EINVAL;
#define EINVAL __EINVAL

// Define USE_DL_PREFIX so that we leave dlmalloc's names prefixed with 'dl'.
// We define them as "static", and we wrap them with public names below. This
// serves two purposes:
//
// One is to make it easy to control which symbols are exported; dlmalloc
// defines several non-standard functions and we wish to explicitly control
// which functions are part of our public-facing interface.
//
// The other is to protect against compilers optimizing based on the assumption
// that they know what functions with names like "malloc" do. Code in the
// implementation will call functions like "dlmalloc" and assume it can use
// the resulting pointers to access the metadata outside of the nominally
// allocated objects. However, if the function were named "malloc", compilers
// might see code like that and assume it has undefined behavior and can be
// optimized away. By using "dlmalloc" in the implementation, we don't need
// -fno-builtin to avoid this problem.
#define USE_DL_PREFIX 1
#define DLMALLOC_EXPORT static inline

// This isn't declared with DLMALLOC_EXPORT so make it static explicitly.
static size_t dlmalloc_usable_size(void*);

// Include the upstream dlmalloc's malloc.c.
#include "malloc.c"

unsigned char* RhpGetThread();

int is_in_cooperative_mode()
{
    return *(void**)(RhpGetThread() + 0x30) == NULL;
}

extern void alloc_trace_impl_preempt(size_t size);
// extern void free_trace(void *ptr);

static int g_managed_tracing_enabled;
static int g_unmanaged_tracing_enabled;

__attribute__((constructor)) void enabled_unmanaged_malloc_tracing() { g_unmanaged_tracing_enabled = 1;  }

void alloc_trace(size_t size)
{
    if (g_managed_tracing_enabled)
    {
        if (is_in_cooperative_mode())
        {
            printf("[Runtime] Allocating %zu\n", size);
        }
        else
        {
            alloc_trace_impl_preempt(size);
        }
    }
    else if (g_unmanaged_tracing_enabled)
    {
        printf("[Runtime Init] Allocating %zu\n", size);
    }
}

void enable_malloc_tracing()
{
    g_managed_tracing_enabled = 1;
}

void* malloc(size_t size) {
    alloc_trace(size);
    return dlmalloc(size);
}

void free(void* ptr) {
    // free_trace(ptr);
    dlfree(ptr);
}

void* calloc(size_t nmemb, size_t size) {
    alloc_trace(size);
    return dlcalloc(nmemb, size);
}

void* realloc(void* ptr, size_t size) {
    alloc_trace(size);
    return dlrealloc(ptr, size);
}

int posix_memalign(void** memptr, size_t alignment, size_t size) {
    alloc_trace(size);
    return dlposix_memalign(memptr, alignment, size);
}

void* aligned_alloc(size_t alignment, size_t bytes) {
    alloc_trace(bytes);
    return dlmemalign(alignment, bytes);
}

size_t malloc_usable_size(void* ptr) {
    return dlmalloc_usable_size(ptr);
}

// Define these to satisfy musl references.
void* __libc_malloc(size_t) __attribute__((alias("malloc")));
void __libc_free(void*) __attribute__((alias("free")));
void* __libc_calloc(size_t nmemb, size_t size) __attribute__((alias("calloc")));

malloc.c copied verbatim from https://github.com/WebAssembly/wasi-libc/blob/main/dlmalloc/src/malloc.c.

Program.cs:

using System;
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;

internal unsafe class Program
{
    private static bool s_nowTracingMalloc;

    static void Main(string[] args)
    {
        NativeMemory.Alloc(55);   
    }

    [UnmanagedCallersOnly(EntryPoint = "alloc_trace_impl_preempt")]
    public static void TraceMalloc(nint size)
    {
        if (!s_nowTracingMalloc)
        {
            s_nowTracingMalloc = true;
            Console.WriteLine("[Program] Allocating " + size);
            Console.WriteLine(new StackTrace(1));
            s_nowTracingMalloc = false;
        }
    }

    [ModuleInitializer]
    public static void RuntimeInitializedChecker()
    {
        enable_malloc_tracing();
    }

    [DllImport("*")]
    private static extern void enable_malloc_tracing();
}

Build and run:

> dotnet publish -r wasi-wasm
> wasmtime .\bin\Release\net10.0\wasi-wasm\publish\TestConsole.wasm

[Runtime Init] Allocating 17
[Runtime Init] Allocating 4
[Runtime Init] Allocating 48
[Runtime Init] Allocating 36
[Runtime Init] Allocating 32
[Runtime Init] Allocating 4
[Runtime Init] Allocating 4
[Runtime Init] Allocating 12
[Runtime Init] Allocating 50331648
[Runtime Init] Allocating 98380
[Runtime Init] Allocating 32768
[Runtime Init] Allocating 3224
[Runtime Init] Allocating 76
[Runtime Init] Allocating 76
[Runtime Init] Allocating 4
[Runtime Init] Allocating 76
[Runtime Init] Allocating 76
[Runtime Init] Allocating 9728
[Runtime Init] Allocating 44
[Runtime Init] Allocating 400
[Runtime Init] Allocating 24
[Runtime Init] Allocating 1828
[Runtime Init] Allocating 40
[Runtime Init] Allocating 12
[Runtime Init] Allocating 4
[Runtime Init] Allocating 5772
[Runtime Init] Allocating 65536
[Runtime Init] Allocating 20
[Runtime Init] Allocating 1000000
[Runtime Init] Allocating 12
[Runtime Init] Allocating 8
[Runtime Init] Allocating 24
[Runtime Init] Allocating 8
[Runtime Init] Allocating 72
[Runtime Init] Allocating 1
[Program] Allocating 55
[Runtime] Allocating 65536
[Runtime] Allocating 20
   at System.Runtime.InteropServices.NativeMemory.Alloc(UIntPtr)
   at Program.Main(String[] args)

[Program] Allocating 1
   at System.Threading.LowLevelMonitor.Initialize()
   at System.Threading.WaitSubsystem.ThreadWaitInfo..ctor(Thread)
   at System.Threading.Thread..ctor()
   at System.Threading.Thread.InitializeCurrentThread()
   at System.Threading.Thread.WaitForForegroundThreads()
   at Internal.Runtime.CompilerHelpers.StartupCodeHelpers.Shutdown()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment