Skip to content

Instantly share code, notes, and snippets.

@cloud11665
Created August 12, 2025 15:54
Show Gist options
  • Save cloud11665/46158922045f1b0d49fb16603b7ff061 to your computer and use it in GitHub Desktop.
Save cloud11665/46158922045f1b0d49fb16603b7ff061 to your computer and use it in GitHub Desktop.
import torch
from torch.cuda.memory import CUDAPluggableAllocator
from torch.utils import cpp_extension
print(torch.__version__) # 2.7.1+cu126
my_allocator_source = """
#include <iostream>
#include <cuda_runtime_api.h>
#include <cstdio>
extern "C" {
void* my_malloc(ssize_t size, int device, cudaStream_t stream) {
void *ptr;
cudaMalloc(&ptr, size);
fprintf(stderr, "alloc ptr=%p size=%ld device=%d\\n", ptr, size, device);
fflush(stderr);
return ptr;
}
void my_free(void* ptr, ssize_t size, int device, cudaStream_t stream) {
fprintf(stderr, "free ptr=%p size=%ld device=%d\\n", ptr, size, device);
fflush(stderr);
cudaFree(ptr);
}
}
"""
my_allocator_libname = "my_allocator"
my_allocator = cpp_extension.load_inline(
name=my_allocator_libname,
cpp_sources=my_allocator_source,
with_cuda=True,
extra_ldflags=[],
verbose=True,
is_python_module=False,
build_directory="./",
)
pluggable = CUDAPluggableAllocator(
f"./{my_allocator_libname}.so", "my_malloc", "my_free"
)
allocator_handle = pluggable._allocator
pool = torch.cuda.MemPool(allocator_handle)
with torch.cuda.use_mem_pool(pool):
# alloc ptr=0x76055cc00000 size=16777216 device=0
a = torch.arange(1024 * 1024 * 2, device="cuda")
# alloc ptr=0x76055de00000 size=2097152 device=0
print(a)
# segfault
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment