Skip to content

Instantly share code, notes, and snippets.

View davidberard98's full-sized avatar

David Berard davidberard98

  • PyTorch
  • Menlo Park, CA
View GitHub Profile
// BUILD COMMAND:
// LD_LIBRARY_PATH=/usr/local/cuda-11.6/extras/CUPTI/lib64:$LD_LIBRARY_PATH nvcc -arch=sm_80 -std=c++17 -o cudagraph cudagraph.cu -lcupti
#include <cstddef>
#include <cuda_runtime_api.h>
#include <cstdio>
#include <sys/time.h>
#include <iostream>
#include <cupti.h>
#define N 500000 // tuned such that kernel takes a few microseconds
import torch
import torch._dynamo
import torch._inductor.inductor_prims
def fn(values, boundaries):
return torch.bucketize(values, boundaries)
def fn_ind(values, boundaries):
return torch.ops.prims._inductor_bucketize(values, boundaries)
# $CUDA_HOME/bin/nvcc binary_search_cuda.cu -std=c++17 -o binary_search_cuda -O3 # -Wl,-rpath $CUDA_HOME/lib64
$CUDA_HOME/bin/nvcc dense_to_jagged.cu -std=c++17 -o dense_to_jagged -O3 # -Wl,-rpath $CUDA_HOME/lib64
import torch
import triton
import triton.language as tl
@triton.jit
def dense_to_jagged_triton(
in_ptr,
offsets_ptr,
inverse_offsets_ptr,
out_ptr,
import torch
import time
profiler_events = []
is_enabled = False
def _start_fn(name, args = None):
if is_enabled:
profiler_events.append((name, args, time.time()))
def _stop_fn():
import torch
import triton
import triton.language as tl
@triton.jit
def dense_to_jagged_triton(
inverse_offsets_ptr, offsets_ptr, dense_ptr, out_ptr0, xnumel, XBLOCK: tl.constexpr
):
# xnumel = 33106688
xoffset = tl.program_id(0) * XBLOCK
import torch
def fn(x, y):
return torch.cat([x + y, y]).sin()
a = torch.ones((1024, 256), dtype=torch.float32)
b = torch.ones((1024, 256), dtype=torch.float32) * 2
with torch.profiler.profile(schedule=torch.profiler.schedule(wait=2, warmup=2, repeat=1, active=2), record_shapes=True) as prof:
for _ in range(8):
Traceback (most recent call last):
File "/data/users/dberard/scripts/oncall/112489.py", line 8, in <module>
fn_opt(*inputs)
File "/data/users/dberard/pytorch/torch/_dynamo/eval_frame.py", line 411, in _fn
return fn(*args, **kwargs)
File "/data/users/dberard/pytorch/torch/_dynamo/eval_frame.py", line 559, in catch_errors
return callback(frame, cache_entry, hooks, frame_state)
File "/data/users/dberard/pytorch/torch/_dynamo/convert_frame.py", line 687, in _convert_frame
result = inner_convert(frame, cache_entry, hooks, frame_state)
File "/data/users/dberard/pytorch/torch/_dynamo/convert_frame.py", line 148, in _fn
/data/users/dberard/scripts/oncall/112494.py:6: UserWarning: An output with one or more elements was resized since it had shape [10, 9, 8], which does not match the required output shape [1, 9, 8]. This behavior is deprecated, and in a future PyTorch release outputs will not be resized unless they have zero elements. You can explicitly reuse an out tensor t by resizing it, inplace, to zero elements with t.resize_(0). (Triggered internally at ../aten/src/ATen/native/Resize.cpp:28.)
x = torch.var(correction=4, dim=0, input=x, keepdim=True, out=torch.rand_like(x))
/data/users/dberard/pytorch/torch/_prims_common/wrappers.py:159: UserWarning: An output with one or more elements was resized since it had shape torch.Size([s0, s1, s2]) which does not match the required output shape {str(shape)}. This behavior is deprecated, and in a future PyTorch release outputs will not be resized unless they have zero elements. You can explicitly reuse an out tensor t by resizing it, inplace, to zero elements with t.resize_(0).
/data/users/dberard/scripts/oncall/112502.py:7: UserWarning: An output with one or more elements was resized since it had shape [9, 10], which does not match the required output shape [9]. This behavior is deprecated, and in a future PyTorch release outputs will not be resized unless they have zero elements. You can explicitly reuse an out tensor t by resizing it, inplace, to zero elements with t.resize_(0). (Triggered internally at ../aten/src/ATen/native/Resize.cpp:28.)
x = torch.diag(input=x, diagonal=0,out=torch.rand([9, 10], dtype=torch.float32).to('cpu'))
build succeded
/data/users/dberard/pytorch/torch/_prims_common/wrappers.py:159: UserWarning: An output with one or more elements was resized since it had shape torch.Size([9, 10]) which does not match the required output shape {str(shape)}. This behavior is deprecated, and in a future PyTorch release outputs will not be resized unless they have zero elements. You can explicitly reuse an out tensor t by resizing it, inplace, to zero elements with t.re