A tool to generate files for C extensions of CUDA-relatged libraries for CuPy. Currently covered are cuBLAS, cuSPARSE, and cuSOLVER, which have so many APIs to write their extensions by hands.
./gen.sh
#include <cassert> | |
#include <iostream> | |
#include <thread> | |
__global__ void vecAddOne(float *a, int n) { | |
int id = blockIdx.x * blockDim.x + threadIdx.x; | |
if (id < n) | |
a[id] += 1.0f; | |
} |
diff --git a/cupyx/scipy/interpolate/_interpolate.py b/cupyx/scipy/interpolate/_interpolate.py | |
index bab74671e..ec3c5bcac 100644 | |
--- a/cupyx/scipy/interpolate/_interpolate.py | |
+++ b/cupyx/scipy/interpolate/_interpolate.py | |
@@ -22,7 +22,7 @@ INTERVAL_KERNEL = r''' | |
extern "C" { | |
__global__ void find_breakpoint_position( | |
const double* breakpoints, const double* x, long long* out, | |
- bool extrapolate, int total_x, int total_breakpoints, bool asc) { | |
+ bool extrapolate, int total_x, int total_breakpoints, const bool* pasc) { |
{ | |
"_nodetype": "FileAST", | |
"coord": null, | |
"ext": [ | |
{ | |
"_nodetype": "Pragma", | |
"coord": "../utils/fake_libc_include/_fake_typedefs.h:56:9", | |
"string": "GCC diagnostic ignored \"-Wunused-function\"" | |
}, | |
{ |
import multiprocessing | |
import cupy | |
from cupy import cuda | |
from cupy.cuda import nccl | |
from cupy import testing | |
def f(n_devices, device, comm_id, rank): | |
device.use() | |
comm = nccl.NcclCommunicator(n_devices, comm_id, rank) |
$ CHAINER_DTYPE=float16 python train_ptb.py -d 0 -e 10 | |
#vocab = 10000 | |
epoch iteration perplexity val_perplexity | |
0 500 326440 | |
0 1000 301342 | |
1 1500 298940 inf | |
1 2000 334369 | |
1 2500 334369 | |
2 3000 306202 inf | |
2 3500 339762 |
$ CHAINER_DTYPE=float16 python postagging.py -d 0 | |
[nltk_data] Downloading package brown to /home/ext- | |
[nltk_data] mtakagi/nltk_data... | |
[nltk_data] Package brown is already up-to-date! | |
# of sentences: 57340 | |
# of words: 56057 | |
# of pos: 472 | |
epoch main/loss validation/main/loss main/accuracy validation/main/accuracy elapsed_time | |
0 244.875 18.3736 | |
0 373.75 34.9924 |
$ CHAINER_DTYPE=float16 python train_memnn.py tasks_1-20_v1-2/en-10k/qa1_single-supporting-fact_train.txt tasks_1-20_v1-2/en-10k/qa1_single-supporting-fact_test.txt -d 0 | |
Training data: tasks_1-20_v1-2/en-10k/qa1_single-supporting-fact_train.txt: 2000 | |
Test data: tasks_1-20_v1-2/en-10k/qa1_single-supporting-fact_test.txt: 200 | |
epoch main/loss validation/main/loss main/accuracy validation/main/accuracy | |
1 nan nan 0.0017004 0 | |
2 nan nan 0 0 | |
3 nan nan 0 0 | |
4 nan nan 0 0 | |
5 nan nan 0 0 | |
6 nan nan 0 0 |
$ CHAINER_DTYPE=float16 python train_mnist.py -d 0 | |
Device: @cupy:0 | |
# unit: 1000 | |
# Minibatch-size: 100 | |
# epoch: 20 | |
epoch main/loss validation/main/loss main/accuracy validation/main/accuracy elapsed_time | |
1 nan nan 0.0994271 0.0980225 3.91818 | |
2 nan nan 0.0997917 0.0980225 6.22553 | |
3 nan nan 0.0995833 0.0980225 8.72424 |
$ CHAINER_DTYPE=float16 python train_dcgan.py -d 0 | |
Device: @cupy:0 | |
# Minibatch-size: 50 | |
# n_hidden: 100 | |
# epoch: 1000 | |
epoch iteration gen/loss dis/loss ................] 0.01% | |
0 100 nan nan | |
0 200 nan nan | |
0 300 nan nan |