Skip to content

Instantly share code, notes, and snippets.

@papamoose
Last active March 8, 2025 19:53
Show Gist options
  • Save papamoose/2d0c078f5a306d2e905db18140c3e900 to your computer and use it in GitHub Desktop.
Save papamoose/2d0c078f5a306d2e905db18140c3e900 to your computer and use it in GitHub Desktop.
CUDA GPU VRAM Test
/* Compile in any of the following ways
nvcc vram_test.cu -o vram_test
nvcc -Wno-deprecated-gpu-targets vram_test.cu -o vram_test
nvcc -arch=sm_86 vram_test.cu -o vram_test
nvcc -arch=sm_75 -gencode=arch=compute_86,code=sm_86 vram_test.cu -o vram_test
*/
#include <cuda_runtime.h>
#include <iostream>
#include <cstdlib>
int main(int argc, char *argv[]) {
int gpu_id = 0; // Default GPU
// Use command-line argument if provided
if (argc > 1) {
gpu_id = std::atoi(argv[1]);
}
int device_count = 0;
cudaGetDeviceCount(&device_count);
std::cout << "CUDA sees " << device_count << " GPU(s)" << std::endl;
if (gpu_id >= device_count) {
std::cerr << "Error: Invalid GPU ID " << gpu_id << ". Available GPUs: 0 to " << device_count - 1 << std::endl;
return -1;
}
cudaError_t err = cudaSetDevice(gpu_id);
if (err != cudaSuccess) {
std::cerr << "Failed to set GPU " << gpu_id << ": " << cudaGetErrorString(err) << std::endl;
return -1;
}
std::cout << "Using GPU " << gpu_id << std::endl;
const size_t alloc_size = 100 * 1024 * 1024; // 100MB per allocation
size_t total_allocated = 0;
void *d_mem;
while (true) {
err = cudaMalloc(&d_mem, alloc_size);
if (err != cudaSuccess) {
std::cerr << "Memory allocation failed after " << total_allocated / (1024.0 * 1024.0)
<< " MB: " << cudaGetErrorString(err) << std::endl;
break;
}
// Write data to GPU memory
err = cudaMemset(d_mem, 0xAA, alloc_size);
if (err != cudaSuccess) {
std::cerr << "Memory memset failed at " << total_allocated / (1024.0 * 1024.0)
<< " MB: " << cudaGetErrorString(err) << std::endl;
break;
}
// Verify by copying some data back
char *host_mem = new char[alloc_size];
err = cudaMemcpy(host_mem, d_mem, alloc_size, cudaMemcpyDeviceToHost);
if (err != cudaSuccess) {
std::cerr << "Memory copy back failed at " << total_allocated / (1024.0 * 1024.0)
<< " MB: " << cudaGetErrorString(err) << std::endl;
delete[] host_mem;
break;
}
// Check if memory contains expected data (just the first byte)
if (host_mem[0] != (char)0xAA) {
std::cerr << "Memory corruption detected at " << total_allocated / (1024.0 * 1024.0) << " MB!" << std::endl;
delete[] host_mem;
break;
}
delete[] host_mem;
total_allocated += alloc_size;
std::cout << "Successfully tested " << total_allocated / (1024.0 * 1024.0) << " MB" << std::endl;
}
return 0;
}
#!/usr/bin/python3
# python3 -m pip install torch torchvision torchaudio \
# --index-url https://download.pytorch.org/whl/cu126 --break-system-packages --ignore-installed
import torch
import time
def test_vram(device_id=0, target_vram=None):
"""
Test VRAM on a CUDA device by allocating 100MB chunks of memory.
Args:
device_id (int): The ID of the CUDA device to test.
target_vram (int, optional): The maximum amount of VRAM to allocate. Defaults to None.
"""
torch.cuda.set_device(device_id)
device = f"cuda:{device_id}"
print(f"Testing VRAM on device {device}...")
#if target_vram is None:
# # If no limit is specified, use the available VRAM
# max_vram = int(free_memory_mb)
#else:
# max_vram = min(int(target_vram * 1024), int(total_memory_mb))
#print(f"[+] Detected {total_memory_mb / 1024:.2f} GB of total VRAM and {free_memory_mb / 1024:.2f} GB of available VRAM. Testing up to {max_vram / 1024} GB.")
chunk_size_mb = 100
element_size = torch.finfo(torch.float32).bits // 8 # Get the size of a float32 in bytes
allocated_chunks = []
allocated_vram_mb = 0
try:
while True:
# Get total and available memory using mem_get_info
# https://pytorch.org/docs/stable/generated/torch.cuda.mem_get_info.html
mem_info = torch.cuda.mem_get_info(device_id)
free_memory = mem_info[0]
total_memory = mem_info[1]
total_memory_mb = total_memory / (1024 ** 2)
free_memory_mb = free_memory / (1024 ** 2)
if int(free_memory_mb) < int(chunk_size_mb):
print(f"[+] Reached target VRAM limit. Not allocating {int(free_memory_mb)} MB. Allocated {allocated_vram_mb} MB. Verifying memory...")
for i, tensor in enumerate(allocated_chunks):
tensor.fill_(i % 256) # Fill with a known pattern
if not torch.all(tensor == (i % 256)):
print(f"[!] Memory corruption detected in chunk {i + 1}!")
return
print(f"[+] VRAM test passed successfully! Allocated {allocated_vram_mb} MB of VRAM.")
break
else:
print(f"[+] Allocating {chunk_size_mb} MB (Allocated: {allocated_vram_mb} MB, Total: {int(total_memory_mb)} MB)...")
try:
tensor = torch.empty(int(chunk_size_mb * (1024 ** 2) / element_size), dtype=torch.float32, device=device)
allocated_chunks.append(tensor)
allocated_vram_mb += chunk_size_mb
except RuntimeError as e:
print(f"[!] CUDA error: {e}")
break
time.sleep(0.1)
except Exception as e:
print(f"[!] An error occurred: {e}")
finally:
for tensor in allocated_chunks:
del tensor
torch.cuda.empty_cache()
print("[+] Memory cleared.")
if __name__ == "__main__":
test_vram(device_id=0)
test_vram(device_id=1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment