A tool to generate files for C extensions of CUDA-relatged libraries for CuPy. Currently covered are cuBLAS, cuSPARSE, and cuSOLVER, which have so many APIs to write their extensions by hands.
./gen.sh
$ CHAINER_DTYPE=float16 python train_ptb.py -d 0 -e 10 | |
#vocab = 10000 | |
epoch iteration perplexity val_perplexity | |
0 500 326440 | |
0 1000 301342 | |
1 1500 298940 inf | |
1 2000 334369 | |
1 2500 334369 | |
2 3000 306202 inf | |
2 3500 339762 |
import multiprocessing | |
import cupy | |
from cupy import cuda | |
from cupy.cuda import nccl | |
from cupy import testing | |
def f(n_devices, device, comm_id, rank): | |
device.use() | |
comm = nccl.NcclCommunicator(n_devices, comm_id, rank) |
{ | |
"_nodetype": "FileAST", | |
"coord": null, | |
"ext": [ | |
{ | |
"_nodetype": "Pragma", | |
"coord": "../utils/fake_libc_include/_fake_typedefs.h:56:9", | |
"string": "GCC diagnostic ignored \"-Wunused-function\"" | |
}, | |
{ |
diff --git a/cupyx/scipy/interpolate/_interpolate.py b/cupyx/scipy/interpolate/_interpolate.py | |
index bab74671e..ec3c5bcac 100644 | |
--- a/cupyx/scipy/interpolate/_interpolate.py | |
+++ b/cupyx/scipy/interpolate/_interpolate.py | |
@@ -22,7 +22,7 @@ INTERVAL_KERNEL = r''' | |
extern "C" { | |
__global__ void find_breakpoint_position( | |
const double* breakpoints, const double* x, long long* out, | |
- bool extrapolate, int total_x, int total_breakpoints, bool asc) { | |
+ bool extrapolate, int total_x, int total_breakpoints, const bool* pasc) { |
#include <cassert> | |
#include <iostream> | |
#include <thread> | |
__global__ void vecAddOne(float *a, int n) { | |
int id = blockIdx.x * blockDim.x + threadIdx.x; | |
if (id < n) | |
a[id] += 1.0f; | |
} |