Skip to content

Instantly share code, notes, and snippets.

@damico
damico / test-rocm.py
Created April 10, 2023 18:54
Script for testing PyTorch support with AMD GPUs using ROCM
import torch, grp, pwd, os, subprocess
devices = []
try:
print("\n\nChecking ROCM support...")
result = subprocess.run(['rocminfo'], stdout=subprocess.PIPE)
cmd_str = result.stdout.decode('utf-8')
cmd_split = cmd_str.split('Agent ')
for part in cmd_split:
item_single = part[0:1]
item_double = part[0:2]
@Chillee
Chillee / 1-pw_op_fusion.py
Last active April 6, 2025 19:04
PT 2.0 Benchmarks
import torch
import torch._inductor.config
import time
torch._inductor.config.triton.cudagraphs = False
torch.set_float32_matmul_precision('high')
def bench(f, name=None, iters=100, warmup=5, display=True, profile=False):
for _ in range(warmup):
f()
@ConsciousMachines
ConsciousMachines / Main.cpp
Last active September 26, 2023 18:32
basic ImGui + CUDA + OpenGL
// based on https://gist.github.com/kamino410/09df4ecdf37b03cbd05752a7b2e52d3a
// this adds ImGui to an application with CUDA and OpenGL. the thing is, once you use CUDA, ImGui renders very strangely.
// after 9 hours of debugging I found that putting glBindBuffer before and after the draw call fixes this!
//glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo); // THE MAGIC LINE #1
//glDrawPixels(WIDTH, HEIGHT, GL_RGBA, GL_UNSIGNED_BYTE, 0);
//glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); // THE MAGIC LINE #2
#include "imgui/imgui.h" // version 1.78 and 1.60
#include "imgui/imgui_impl_glfw.h"
@hayunjong83
hayunjong83 / cdpSimplePrint.cu
Created March 5, 2020 07:59
CUDA dynamic parallelism example 1) cdpSimplePrint
#include <iostream>
#include <cstdio>
#include <cstdlib>
#include <helper_cuda.h>
#include <helper_string.h>
__device__ int g_uids = 0;
__device__ void print_info(int depth, int thread, int uid, int parent_uid)
{
@maronsson
maronsson / Instrumentor.h
Last active June 8, 2024 04:27 — forked from TheCherno/Instrumentor.h
Basic Instrumentation Profiler
//
// Basic instrumentation profiler by Cherno
// Usage: include this header file somewhere in your code (eg. precompiled header), and then use like:
//
// Instrumentor::Get().BeginSession("Session Name"); // Begin session
// {
// InstrumentationTimer timer("Profiled Scope Name"); // Place code like this in scopes you'd like to include in profiling
// // Code
// }
@TheCherno
TheCherno / Instrumentor.h
Last active April 2, 2025 21:28
Basic Instrumentation Profiler
//
// Basic instrumentation profiler by Cherno
// Usage: include this header file somewhere in your code (eg. precompiled header), and then use like:
//
// Instrumentor::Get().BeginSession("Session Name"); // Begin session
// {
// InstrumentationTimer timer("Profiled Scope Name"); // Place code like this in scopes you'd like to include in profiling
// // Code
// }
@goldsborough
goldsborough / conv.cu
Last active February 2, 2025 09:14
Convolution with cuDNN
#include <cudnn.h>
#include <cassert>
#include <cstdlib>
#include <iostream>
#include <opencv2/opencv.hpp>
#define checkCUDNN(expression) \
{ \
cudnnStatus_t status = (expression); \
if (status != CUDNN_STATUS_SUCCESS) { \