A tool to generate files for C extensions of CUDA-relatged libraries for CuPy. Currently covered are cuBLAS, cuSPARSE, and cuSOLVER, which have so many APIs to write their extensions by hands.
./gen.sh
| $ CHAINER_DTYPE=float16 python train_ptb.py -d 0 -e 10 | |
| #vocab = 10000 | |
| epoch iteration perplexity val_perplexity | |
| 0 500 326440 | |
| 0 1000 301342 | |
| 1 1500 298940 inf | |
| 1 2000 334369 | |
| 1 2500 334369 | |
| 2 3000 306202 inf | |
| 2 3500 339762 |
| import multiprocessing | |
| import cupy | |
| from cupy import cuda | |
| from cupy.cuda import nccl | |
| from cupy import testing | |
| def f(n_devices, device, comm_id, rank): | |
| device.use() | |
| comm = nccl.NcclCommunicator(n_devices, comm_id, rank) |
| { | |
| "_nodetype": "FileAST", | |
| "coord": null, | |
| "ext": [ | |
| { | |
| "_nodetype": "Pragma", | |
| "coord": "../utils/fake_libc_include/_fake_typedefs.h:56:9", | |
| "string": "GCC diagnostic ignored \"-Wunused-function\"" | |
| }, | |
| { |
| diff --git a/cupyx/scipy/interpolate/_interpolate.py b/cupyx/scipy/interpolate/_interpolate.py | |
| index bab74671e..ec3c5bcac 100644 | |
| --- a/cupyx/scipy/interpolate/_interpolate.py | |
| +++ b/cupyx/scipy/interpolate/_interpolate.py | |
| @@ -22,7 +22,7 @@ INTERVAL_KERNEL = r''' | |
| extern "C" { | |
| __global__ void find_breakpoint_position( | |
| const double* breakpoints, const double* x, long long* out, | |
| - bool extrapolate, int total_x, int total_breakpoints, bool asc) { | |
| + bool extrapolate, int total_x, int total_breakpoints, const bool* pasc) { |
| #include <cassert> | |
| #include <iostream> | |
| #include <thread> | |
| __global__ void vecAddOne(float *a, int n) { | |
| int id = blockIdx.x * blockDim.x + threadIdx.x; | |
| if (id < n) | |
| a[id] += 1.0f; | |
| } |