July 1, 2019 03:15 · July 22, 2019 09:10 · November 29, 2020 04:30 · February 1, 2021 23:56 · December 21, 2022 01:11 · January 30, 2023 01:12
 $ CHAINER_DTYPE=float16 python train_ptb.py -d 0 -e 10
 #vocab = 10000
 epoch       iteration   perplexity  val_perplexity
 0           500         326440
 0           1000        301342
 1           1500        298940      inf
 1           2000        334369
 1           2500        334369
 2           3000        306202      inf
 2           3500        339762
 import multiprocessing

 import cupy
 from cupy import cuda
 from cupy.cuda import nccl
 from cupy import testing

 def f(n_devices, device, comm_id, rank):
    device.use()
    comm = nccl.NcclCommunicator(n_devices, comm_id, rank)
 {
    "_nodetype": "FileAST",
    "coord": null,
    "ext": [
        {
            "_nodetype": "Pragma",
            "coord": "../utils/fake_libc_include/_fake_typedefs.h:56:9",
            "string": "GCC diagnostic ignored \"-Wunused-function\""
        },
        {
 diff --git a/cupyx/scipy/interpolate/_interpolate.py b/cupyx/scipy/interpolate/_interpolate.py
 index bab74671e..ec3c5bcac 100644
 --- a/cupyx/scipy/interpolate/_interpolate.py
 +++ b/cupyx/scipy/interpolate/_interpolate.py
 @@ -22,7 +22,7 @@ INTERVAL_KERNEL = r'''
 extern "C" {
 __global__ void find_breakpoint_position(
         const double* breakpoints, const double* x, long long* out,
 -        bool extrapolate, int total_x, int total_breakpoints, bool asc) {
 +        bool extrapolate, int total_x, int total_breakpoints, const bool* pasc) {
 #include <cassert>
 #include <iostream>
 #include <thread>

 __global__ void vecAddOne(float *a, int n) {
    int id = blockIdx.x * blockDim.x + threadIdx.x;
    if (id < n)
        a[id] += 1.0f;
 }
	$ CHAINER_DTYPE=float16 python train_ptb.py -d 0 -e 10
	#vocab = 10000
	epoch iteration perplexity val_perplexity
	0 500 326440
	0 1000 301342
	1 1500 298940 inf
	1 2000 334369
	1 2500 334369
	2 3000 306202 inf
	2 3500 339762
	import multiprocessing

	import cupy
	from cupy import cuda
	from cupy.cuda import nccl
	from cupy import testing

	def f(n_devices, device, comm_id, rank):
	device.use()
	comm = nccl.NcclCommunicator(n_devices, comm_id, rank)
	{
	"_nodetype": "FileAST",
	"coord": null,
	"ext": [
	{
	"_nodetype": "Pragma",
	"coord": "../utils/fake_libc_include/_fake_typedefs.h:56:9",
	"string": "GCC diagnostic ignored \"-Wunused-function\""
	},
	{
	diff --git a/cupyx/scipy/interpolate/_interpolate.py b/cupyx/scipy/interpolate/_interpolate.py
	index bab74671e..ec3c5bcac 100644
	--- a/cupyx/scipy/interpolate/_interpolate.py
	+++ b/cupyx/scipy/interpolate/_interpolate.py
	@@ -22,7 +22,7 @@ INTERVAL_KERNEL = r'''
	extern "C" {
	__global__ void find_breakpoint_position(
	const double* breakpoints, const double* x, long long* out,
	- bool extrapolate, int total_x, int total_breakpoints, bool asc) {
	+ bool extrapolate, int total_x, int total_breakpoints, const bool* pasc) {
	#include <cassert>
	#include <iostream>
	#include <thread>

	__global__ void vecAddOne(float *a, int n) {
	int id = blockIdx.x * blockDim.x + threadIdx.x;
	if (id < n)
	a[id] += 1.0f;
	}