Created
July 16, 2018 05:39
-
-
Save Watson1978/530c89620a7cc551416d8890b01f4dd0 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#line 1 "narray/gen/tmpl/lib.c" | |
/* | |
Ruby/Cumo::GSL - GSL wrapper for Ruby/Cumo::NArray | |
created on: 2017-03-11 | |
Copyright (C) 2017 Masahiro Tanaka | |
Copyright (C) 2018 Naotoshi Seo | |
*/ | |
#include <ruby.h> | |
#include <assert.h> | |
#include "cumo.h" | |
#include "cumo/narray.h" | |
#include "cumo/template.h" | |
#include "SFMT.h" | |
#include "cumo/cuda/memory_pool.h" | |
#include "cumo/cuda/runtime.h" | |
#include "cumo/indexer.h" | |
#define m_map(x) m_num_to_data(rb_yield(m_data_to_num(x))) | |
narray/types/bit.c | |
#include <static ID cumo_id_cast; | |
static ID cumo_id_divmod; | |
static ID cumo_id_eq; | |
static ID cumo_id_mulsum; | |
static ID cumo_id_ne; | |
static ID cumo_id_pow; | |
> | |
VALUE cT; | |
extern VALUE cRT; | |
cumo/types/bit.h | |
void | |
Init_ | |
#line 1 "narray/gen/tmpl/class.c" | |
/* | |
class definition: | |
*/ | |
VALUE Cumo::Bit; | |
static VALUE cT(VALUE,VALUE); | |
bit_store | |
#line 1 "narray/gen/tmpl/alloc_func.c" | |
static size_t | |
_memsize(const void* ptr) | |
{ | |
size_t size = sizeof(cumo_narray_data_t); | |
const cumo_narray_data_t *na = (const cumo_narray_data_t*)ptr; | |
assert(na->base.type == CUMO_NARRAY_DATA_T); | |
if (na->ptr != NULL) { | |
size += ((na->base.size-1)/8/sizeof(CUMO_BIT_DIGIT)+1)*sizeof(CUMO_BIT_DIGIT); | |
} | |
if (na->base.size > 0) { | |
if (na->base.shape != NULL && na->base.shape != &(na->base.size)) { | |
size += sizeof(size_t) * na->base.ndim; | |
} | |
} | |
return size; | |
} | |
static void | |
bit_free(void* ptr) | |
{ | |
cumo_narray_data_t *na = (cumo_narray_data_t*)ptr; | |
assert(na->base.type == CUMO_NARRAY_DATA_T); | |
if (na->ptr != NULL) { | |
cumo_cuda_runtime_free(na->ptr); | |
na->ptr = NULL; | |
} | |
if (na->base.size > 0) { | |
if (na->base.shape != NULL && na->base.shape != &(na->base.size)) { | |
xfree(na->base.shape); | |
na->base.shape = NULL; | |
} | |
} | |
xfree(na); | |
} | |
static cumo_narray_type_info_t bit_info = { | |
1, // element_bits | |
0, // element_bytes | |
1, // element_stride (in bits) | |
}; | |
static const rb_data_type_t bit_data_type = { | |
"bit", | |
{0, Cumo::Bit_free, _memsize,}, | |
&cumo_na_data_type, | |
&bitbit_info, | |
0, // flags | |
}; | |
static VALUE | |
bit(VALUE klass) | |
{ | |
cumo_narray_data_t *na = ALLOC(cumo_narray_data_t); | |
na->base.ndim = 0; | |
na->base.type = CUMO_NARRAY_DATA_T; | |
na->base.flag[0] = CUMO_NA_FL0_INIT; | |
na->base.flag[1] = CUMO_NA_FL1_INIT; | |
na->base.size = 0; | |
na->base.shape = NULL; | |
na->base.reduce = INT2FIX(0); | |
na->ptr = NULL; | |
return TypedData_Wrap_Struct(klass, &bit_s_alloc_func_data_type, (void*)na); | |
} | |
bit | |
#line 1 "narray/gen/tmpl_bit/allocate.c" | |
static VALUE | |
(VALUE self) | |
{ | |
cumo_narray_t *na; | |
char *ptr; | |
CumoGetNArray(self,na); | |
switch(CUMO_NA_TYPE(na)) { | |
case CUMO_NARRAY_DATA_T: | |
ptr = CUMO_NA_DATA_PTR(na); | |
if (na->size > 0 && ptr == NULL) { | |
ptr = cumo_cuda_runtime_malloc(((na->size-1)/8/sizeof(CUMO_BIT_DIGIT)+1)*sizeof(CUMO_BIT_DIGIT)); | |
CUMO_NA_DATA_PTR(na) = ptr; | |
} | |
break; | |
case CUMO_NARRAY_VIEW_T: | |
rb_funcall(CUMO_NA_VIEW_DATA(na), rb_intern("allocate"), 0); | |
break; | |
default: | |
rb_raise(rb_eRuntimeError,"invalid narray type"); | |
} | |
return self; | |
} | |
bit_allocate | |
#line 1 "narray/gen/tmpl_bit/extract.c" | |
/* | |
Extract an element only if self is a dimensionless NArray. | |
@overload extract | |
@return [Numeric,Cumo::NArray] | |
--- Extract element value as Ruby Object if self is a dimensionless NArray, | |
otherwise returns self. | |
*/ | |
// TODO(sonots): Return Cumo::Bit instead of ruby built-in object to avoid synchronization | |
static VALUE | |
(VALUE self) | |
{ | |
CUMO_BIT_DIGIT *ptr, val; | |
size_t pos; | |
cumo_narray_t *na; | |
CumoGetNArray(self,na); | |
if (na->ndim==0) { | |
pos = cumo_na_get_offset(self); | |
ptr = (CUMO_BIT_DIGIT*)cumo_na_get_pointer_for_read(self); | |
CUMO_SHOW_SYNCHRONIZE_WARNING_ONCE("bit_extract", ""); | |
cumo_cuda_runtime_check_status(cudaDeviceSynchronize()); | |
val = ((*((ptr)+(pos)/CUMO_NB)) >> ((pos)%CUMO_NB)) & 1u; | |
cumo_na_release_lock(self); | |
return INT2FIX(val); | |
} | |
return self; | |
} | |
extractbit | |
#line 1 "narray/gen/tmpl_bit/extract_cpu.c" | |
/* | |
Extract an element only if self is a dimensionless NArray. | |
@overload extract_cpu | |
@return [Numeric,Cumo::NArray] | |
--- Extract element value as Ruby Object if self is a dimensionless NArray, | |
otherwise returns self. | |
*/ | |
static VALUE | |
(VALUE self) | |
{ | |
CUMO_BIT_DIGIT *ptr, val; | |
size_t pos; | |
cumo_narray_t *na; | |
CumoGetNArray(self,na); | |
if (na->ndim==0) { | |
pos = cumo_na_get_offset(self); | |
ptr = (CUMO_BIT_DIGIT*)cumo_na_get_pointer_for_read(self); | |
CUMO_SHOW_SYNCHRONIZE_WARNING_ONCE("bit_extract_cpu", ""); | |
cumo_cuda_runtime_check_status(cudaDeviceSynchronize()); | |
val = ((*((ptr)+(pos)/CUMO_NB)) >> ((pos)%CUMO_NB)) & 1u; | |
cumo_na_release_lock(self); | |
return INT2FIX(val); | |
} | |
return self; | |
} | |
extract_cpubit | |
#line 1 "narray/gen/tmpl/new_dim0.c" | |
void (dtype *ptr, dtype x); | |
static VALUE | |
cumo_bit_new_dim0_kernel_launch(dtype x) | |
{ | |
VALUE v; | |
dtype *ptr; | |
v = cumo_na_new(cT, 0, NULL); | |
ptr = (dtype*)cumo_na_get_pointer_for_write(v); | |
bit_new_dim0(ptr, x); | |
cumo_na_release_lock(v); | |
return v; | |
} | |
cumo_bit_new_dim0_kernel_launch | |
#line 1 "narray/gen/tmpl/store.c" | |
/* | |
Store elements to Cumo:: | |
#line 1 "narray/gen/tmpl/store_numeric.c" | |
static VALUE | |
(VALUE self, VALUE obj) | |
{ | |
dtype x; | |
x = m_num_to_data(obj); | |
obj = bit_store_numeric_new_dim0(x); | |
bit(self,obj); | |
return self; | |
} | |
bit_store | |
#line 1 "narray/gen/tmpl_bit/store_bit.c" | |
static void | |
(cumo_na_loop_t *const lp) | |
{ | |
size_t n; | |
size_t p1, p3; | |
ssize_t s1, s3; | |
size_t *idx1, *idx3; | |
int o1, l1, r1, len; | |
CUMO_BIT_DIGIT *a1, *a3; | |
CUMO_BIT_DIGIT x; | |
// TODO(sonots): CUDA kernelize | |
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("iter_bit_store_bit", ""); | |
cumo_cuda_runtime_check_status(cudaDeviceSynchronize()); | |
CUMO_INIT_COUNTER(lp, n); | |
CUMO_INIT_PTR_BIT_IDX(lp, 0, a3, p3, s3, idx3); | |
CUMO_INIT_PTR_BIT_IDX(lp, 1, a1, p1, s1, idx1); | |
if (s1!=1 || s3!=1 || idx1 || idx3) { | |
for (; n--;) { | |
CUMO_LOAD_BIT_STEP(a1, p1, s1, idx1, x); | |
CUMO_STORE_BIT_STEP(a3, p3, s3, idx3, x); | |
} | |
} else { | |
o1 = p1 % CUMO_NB; | |
o1 -= p3; | |
l1 = CUMO_NB+o1; | |
r1 = CUMO_NB-o1; | |
if (p3>0 || n<CUMO_NB) { | |
len = CUMO_NB - p3; | |
if ((int)n<len) len=n; | |
if (o1>=0) x = *a1>>o1; | |
else x = *a1<<-o1; | |
if (p1+len>CUMO_NB) x |= *(a1+1)<<r1; | |
a1++; | |
*a3 = (x & (CUMO_SLB(len)<<p3)) | (*a3 & ~(CUMO_SLB(len)<<p3)); | |
a3++; | |
n -= len; | |
} | |
if (o1==0) { | |
for (; n>=CUMO_NB; n-=CUMO_NB) { | |
x = *(a1++); | |
*(a3++) = x; | |
} | |
} else { | |
for (; n>=CUMO_NB; n-=CUMO_NB) { | |
x = *a1>>o1; | |
if (o1<0) x |= *(a1-1)>>l1; | |
if (o1>0) x |= *(a1+1)<<r1; | |
a1++; | |
*(a3++) = x; | |
} | |
} | |
if (n>0) { | |
x = *a1>>o1; | |
if (o1<0) x |= *(a1-1)>>l1; | |
*a3 = (x & CUMO_SLB(n)) | (*a3 & CUMO_BALL<<n); | |
} | |
} | |
} | |
static VALUE | |
bitBit(VALUE self, VALUE obj) | |
{ | |
cumo_ndfunc_arg_in_t ain[2] = {{CUMO_OVERWRITE,0},{Qnil,0}}; | |
cumo_ndfunc_t ndf = {bit_store_bit, CUMO_FULL_LOOP, 2,0, ain,0}; | |
cumo_na_ndloop(&ndf, 2, self, obj); | |
return self; | |
} | |
iter_bit_store_bit | |
#line 1 "narray/gen/tmpl_bit/store_from.c" | |
static void | |
(cumo_na_loop_t *const lp) | |
{ | |
ssize_t i, s1, s2; | |
size_t p1; | |
char *p2; | |
size_t *idx1, *idx2; | |
iter_bit_store_dfloat x; | |
CUMO_BIT_DIGIT *a1; | |
CUMO_BIT_DIGIT y; | |
// TODO(sonots): CUDA kernelize | |
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("store_double", ""); | |
cumo_cuda_runtime_check_status(cudaDeviceSynchronize()); | |
CUMO_INIT_COUNTER(lp, i); | |
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1); | |
CUMO_INIT_PTR_IDX(lp, 1, p2, s2, idx2); | |
if (idx2) { | |
if (idx1) { | |
for (; i--;) { | |
CUMO_GET_DATA_INDEX(p2,idx2,dfloatDFloat,x); | |
y = double(x); | |
CUMO_STORE_BIT(a1, p1+*idx1, y); idx1++; | |
} | |
} else { | |
for (; i--;) { | |
CUMO_GET_DATA_INDEX(p2,idx2,m_from_real,x); | |
y = double(x); | |
CUMO_STORE_BIT(a1, p1, y); p1+=s1; | |
} | |
} | |
} else { | |
if (idx1) { | |
for (; i--;) { | |
CUMO_GET_DATA_STRIDE(p2,s2,m_from_real,x); | |
y = double(x); | |
CUMO_STORE_BIT(a1, p1+*idx1, y); idx1++; | |
} | |
} else { | |
for (; i--;) { | |
CUMO_GET_DATA_STRIDE(p2,s2,m_from_real,x); | |
y = double(x); | |
CUMO_STORE_BIT(a1, p1, y); p1+=s1; | |
} | |
} | |
} | |
} | |
static VALUE | |
m_from_real(VALUE self, VALUE obj) | |
{ | |
cumo_ndfunc_arg_in_t ain[2] = {{CUMO_OVERWRITE,0},{Qnil,0}}; | |
cumo_ndfunc_t ndf = {bit_store_dfloat, CUMO_FULL_LOOP, 2,0, ain,0}; | |
cumo_na_ndloop(&ndf, 2, self, obj); | |
return self; | |
} | |
iter_bit_store_dfloat | |
#line 1 "narray/gen/tmpl_bit/store_from.c" | |
static void | |
(cumo_na_loop_t *const lp) | |
{ | |
ssize_t i, s1, s2; | |
size_t p1; | |
char *p2; | |
size_t *idx1, *idx2; | |
iter_bit_store_sfloat x; | |
CUMO_BIT_DIGIT *a1; | |
CUMO_BIT_DIGIT y; | |
// TODO(sonots): CUDA kernelize | |
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("store_float", ""); | |
cumo_cuda_runtime_check_status(cudaDeviceSynchronize()); | |
CUMO_INIT_COUNTER(lp, i); | |
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1); | |
CUMO_INIT_PTR_IDX(lp, 1, p2, s2, idx2); | |
if (idx2) { | |
if (idx1) { | |
for (; i--;) { | |
CUMO_GET_DATA_INDEX(p2,idx2,sfloatSFloat,x); | |
y = float(x); | |
CUMO_STORE_BIT(a1, p1+*idx1, y); idx1++; | |
} | |
} else { | |
for (; i--;) { | |
CUMO_GET_DATA_INDEX(p2,idx2,m_from_real,x); | |
y = float(x); | |
CUMO_STORE_BIT(a1, p1, y); p1+=s1; | |
} | |
} | |
} else { | |
if (idx1) { | |
for (; i--;) { | |
CUMO_GET_DATA_STRIDE(p2,s2,m_from_real,x); | |
y = float(x); | |
CUMO_STORE_BIT(a1, p1+*idx1, y); idx1++; | |
} | |
} else { | |
for (; i--;) { | |
CUMO_GET_DATA_STRIDE(p2,s2,m_from_real,x); | |
y = float(x); | |
CUMO_STORE_BIT(a1, p1, y); p1+=s1; | |
} | |
} | |
} | |
} | |
static VALUE | |
m_from_real(VALUE self, VALUE obj) | |
{ | |
cumo_ndfunc_arg_in_t ain[2] = {{CUMO_OVERWRITE,0},{Qnil,0}}; | |
cumo_ndfunc_t ndf = {bit_store_sfloat, CUMO_FULL_LOOP, 2,0, ain,0}; | |
cumo_na_ndloop(&ndf, 2, self, obj); | |
return self; | |
} | |
iter_bit_store_sfloat | |
#line 1 "narray/gen/tmpl_bit/store_from.c" | |
static void | |
(cumo_na_loop_t *const lp) | |
{ | |
ssize_t i, s1, s2; | |
size_t p1; | |
char *p2; | |
size_t *idx1, *idx2; | |
iter_bit_store_int64 x; | |
CUMO_BIT_DIGIT *a1; | |
CUMO_BIT_DIGIT y; | |
// TODO(sonots): CUDA kernelize | |
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("store_int64_t", ""); | |
cumo_cuda_runtime_check_status(cudaDeviceSynchronize()); | |
CUMO_INIT_COUNTER(lp, i); | |
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1); | |
CUMO_INIT_PTR_IDX(lp, 1, p2, s2, idx2); | |
if (idx2) { | |
if (idx1) { | |
for (; i--;) { | |
CUMO_GET_DATA_INDEX(p2,idx2,int64Int64,x); | |
y = int64_t(x); | |
CUMO_STORE_BIT(a1, p1+*idx1, y); idx1++; | |
} | |
} else { | |
for (; i--;) { | |
CUMO_GET_DATA_INDEX(p2,idx2,m_from_int64,x); | |
y = int64_t(x); | |
CUMO_STORE_BIT(a1, p1, y); p1+=s1; | |
} | |
} | |
} else { | |
if (idx1) { | |
for (; i--;) { | |
CUMO_GET_DATA_STRIDE(p2,s2,m_from_int64,x); | |
y = int64_t(x); | |
CUMO_STORE_BIT(a1, p1+*idx1, y); idx1++; | |
} | |
} else { | |
for (; i--;) { | |
CUMO_GET_DATA_STRIDE(p2,s2,m_from_int64,x); | |
y = int64_t(x); | |
CUMO_STORE_BIT(a1, p1, y); p1+=s1; | |
} | |
} | |
} | |
} | |
static VALUE | |
m_from_int64(VALUE self, VALUE obj) | |
{ | |
cumo_ndfunc_arg_in_t ain[2] = {{CUMO_OVERWRITE,0},{Qnil,0}}; | |
cumo_ndfunc_t ndf = {bit_store_int64, CUMO_FULL_LOOP, 2,0, ain,0}; | |
cumo_na_ndloop(&ndf, 2, self, obj); | |
return self; | |
} | |
iter_bit_store_int64 | |
#line 1 "narray/gen/tmpl_bit/store_from.c" | |
static void | |
(cumo_na_loop_t *const lp) | |
{ | |
ssize_t i, s1, s2; | |
size_t p1; | |
char *p2; | |
size_t *idx1, *idx2; | |
iter_bit_store_int32 x; | |
CUMO_BIT_DIGIT *a1; | |
CUMO_BIT_DIGIT y; | |
// TODO(sonots): CUDA kernelize | |
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("store_int32_t", ""); | |
cumo_cuda_runtime_check_status(cudaDeviceSynchronize()); | |
CUMO_INIT_COUNTER(lp, i); | |
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1); | |
CUMO_INIT_PTR_IDX(lp, 1, p2, s2, idx2); | |
if (idx2) { | |
if (idx1) { | |
for (; i--;) { | |
CUMO_GET_DATA_INDEX(p2,idx2,int32Int32,x); | |
y = int32_t(x); | |
CUMO_STORE_BIT(a1, p1+*idx1, y); idx1++; | |
} | |
} else { | |
for (; i--;) { | |
CUMO_GET_DATA_INDEX(p2,idx2,m_from_int32,x); | |
y = int32_t(x); | |
CUMO_STORE_BIT(a1, p1, y); p1+=s1; | |
} | |
} | |
} else { | |
if (idx1) { | |
for (; i--;) { | |
CUMO_GET_DATA_STRIDE(p2,s2,m_from_int32,x); | |
y = int32_t(x); | |
CUMO_STORE_BIT(a1, p1+*idx1, y); idx1++; | |
} | |
} else { | |
for (; i--;) { | |
CUMO_GET_DATA_STRIDE(p2,s2,m_from_int32,x); | |
y = int32_t(x); | |
CUMO_STORE_BIT(a1, p1, y); p1+=s1; | |
} | |
} | |
} | |
} | |
static VALUE | |
m_from_int32(VALUE self, VALUE obj) | |
{ | |
cumo_ndfunc_arg_in_t ain[2] = {{CUMO_OVERWRITE,0},{Qnil,0}}; | |
cumo_ndfunc_t ndf = {bit_store_int32, CUMO_FULL_LOOP, 2,0, ain,0}; | |
cumo_na_ndloop(&ndf, 2, self, obj); | |
return self; | |
} | |
iter_bit_store_int32 | |
#line 1 "narray/gen/tmpl_bit/store_from.c" | |
static void | |
(cumo_na_loop_t *const lp) | |
{ | |
ssize_t i, s1, s2; | |
size_t p1; | |
char *p2; | |
size_t *idx1, *idx2; | |
iter_bit_store_int16 x; | |
CUMO_BIT_DIGIT *a1; | |
CUMO_BIT_DIGIT y; | |
// TODO(sonots): CUDA kernelize | |
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("store_int16_t", ""); | |
cumo_cuda_runtime_check_status(cudaDeviceSynchronize()); | |
CUMO_INIT_COUNTER(lp, i); | |
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1); | |
CUMO_INIT_PTR_IDX(lp, 1, p2, s2, idx2); | |
if (idx2) { | |
if (idx1) { | |
for (; i--;) { | |
CUMO_GET_DATA_INDEX(p2,idx2,int16Int16,x); | |
y = int16_t(x); | |
CUMO_STORE_BIT(a1, p1+*idx1, y); idx1++; | |
} | |
} else { | |
for (; i--;) { | |
CUMO_GET_DATA_INDEX(p2,idx2,m_from_sint,x); | |
y = int16_t(x); | |
CUMO_STORE_BIT(a1, p1, y); p1+=s1; | |
} | |
} | |
} else { | |
if (idx1) { | |
for (; i--;) { | |
CUMO_GET_DATA_STRIDE(p2,s2,m_from_sint,x); | |
y = int16_t(x); | |
CUMO_STORE_BIT(a1, p1+*idx1, y); idx1++; | |
} | |
} else { | |
for (; i--;) { | |
CUMO_GET_DATA_STRIDE(p2,s2,m_from_sint,x); | |
y = int16_t(x); | |
CUMO_STORE_BIT(a1, p1, y); p1+=s1; | |
} | |
} | |
} | |
} | |
static VALUE | |
m_from_sint(VALUE self, VALUE obj) | |
{ | |
cumo_ndfunc_arg_in_t ain[2] = {{CUMO_OVERWRITE,0},{Qnil,0}}; | |
cumo_ndfunc_t ndf = {bit_store_int16, CUMO_FULL_LOOP, 2,0, ain,0}; | |
cumo_na_ndloop(&ndf, 2, self, obj); | |
return self; | |
} | |
iter_bit_store_int16 | |
#line 1 "narray/gen/tmpl_bit/store_from.c" | |
static void | |
(cumo_na_loop_t *const lp) | |
{ | |
ssize_t i, s1, s2; | |
size_t p1; | |
char *p2; | |
size_t *idx1, *idx2; | |
iter_bit_store_int8 x; | |
CUMO_BIT_DIGIT *a1; | |
CUMO_BIT_DIGIT y; | |
// TODO(sonots): CUDA kernelize | |
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("store_int8_t", ""); | |
cumo_cuda_runtime_check_status(cudaDeviceSynchronize()); | |
CUMO_INIT_COUNTER(lp, i); | |
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1); | |
CUMO_INIT_PTR_IDX(lp, 1, p2, s2, idx2); | |
if (idx2) { | |
if (idx1) { | |
for (; i--;) { | |
CUMO_GET_DATA_INDEX(p2,idx2,int8Int8,x); | |
y = int8_t(x); | |
CUMO_STORE_BIT(a1, p1+*idx1, y); idx1++; | |
} | |
} else { | |
for (; i--;) { | |
CUMO_GET_DATA_INDEX(p2,idx2,m_from_sint,x); | |
y = int8_t(x); | |
CUMO_STORE_BIT(a1, p1, y); p1+=s1; | |
} | |
} | |
} else { | |
if (idx1) { | |
for (; i--;) { | |
CUMO_GET_DATA_STRIDE(p2,s2,m_from_sint,x); | |
y = int8_t(x); | |
CUMO_STORE_BIT(a1, p1+*idx1, y); idx1++; | |
} | |
} else { | |
for (; i--;) { | |
CUMO_GET_DATA_STRIDE(p2,s2,m_from_sint,x); | |
y = int8_t(x); | |
CUMO_STORE_BIT(a1, p1, y); p1+=s1; | |
} | |
} | |
} | |
} | |
static VALUE | |
m_from_sint(VALUE self, VALUE obj) | |
{ | |
cumo_ndfunc_arg_in_t ain[2] = {{CUMO_OVERWRITE,0},{Qnil,0}}; | |
cumo_ndfunc_t ndf = {bit_store_int8, CUMO_FULL_LOOP, 2,0, ain,0}; | |
cumo_na_ndloop(&ndf, 2, self, obj); | |
return self; | |
} | |
iter_bit_store_int8 | |
#line 1 "narray/gen/tmpl_bit/store_from.c" | |
static void | |
(cumo_na_loop_t *const lp) | |
{ | |
ssize_t i, s1, s2; | |
size_t p1; | |
char *p2; | |
size_t *idx1, *idx2; | |
iter_bit_store_uint64 x; | |
CUMO_BIT_DIGIT *a1; | |
CUMO_BIT_DIGIT y; | |
// TODO(sonots): CUDA kernelize | |
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("store_u_int64_t", ""); | |
cumo_cuda_runtime_check_status(cudaDeviceSynchronize()); | |
CUMO_INIT_COUNTER(lp, i); | |
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1); | |
CUMO_INIT_PTR_IDX(lp, 1, p2, s2, idx2); | |
if (idx2) { | |
if (idx1) { | |
for (; i--;) { | |
CUMO_GET_DATA_INDEX(p2,idx2,uint64UInt64,x); | |
y = u_int64_t(x); | |
CUMO_STORE_BIT(a1, p1+*idx1, y); idx1++; | |
} | |
} else { | |
for (; i--;) { | |
CUMO_GET_DATA_INDEX(p2,idx2,m_from_uint64,x); | |
y = u_int64_t(x); | |
CUMO_STORE_BIT(a1, p1, y); p1+=s1; | |
} | |
} | |
} else { | |
if (idx1) { | |
for (; i--;) { | |
CUMO_GET_DATA_STRIDE(p2,s2,m_from_uint64,x); | |
y = u_int64_t(x); | |
CUMO_STORE_BIT(a1, p1+*idx1, y); idx1++; | |
} | |
} else { | |
for (; i--;) { | |
CUMO_GET_DATA_STRIDE(p2,s2,m_from_uint64,x); | |
y = u_int64_t(x); | |
CUMO_STORE_BIT(a1, p1, y); p1+=s1; | |
} | |
} | |
} | |
} | |
static VALUE | |
m_from_uint64(VALUE self, VALUE obj) | |
{ | |
cumo_ndfunc_arg_in_t ain[2] = {{CUMO_OVERWRITE,0},{Qnil,0}}; | |
cumo_ndfunc_t ndf = {bit_store_uint64, CUMO_FULL_LOOP, 2,0, ain,0}; | |
cumo_na_ndloop(&ndf, 2, self, obj); | |
return self; | |
} | |
iter_bit_store_uint64 | |
#line 1 "narray/gen/tmpl_bit/store_from.c" | |
static void | |
(cumo_na_loop_t *const lp) | |
{ | |
ssize_t i, s1, s2; | |
size_t p1; | |
char *p2; | |
size_t *idx1, *idx2; | |
iter_bit_store_uint32 x; | |
CUMO_BIT_DIGIT *a1; | |
CUMO_BIT_DIGIT y; | |
// TODO(sonots): CUDA kernelize | |
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("store_u_int32_t", ""); | |
cumo_cuda_runtime_check_status(cudaDeviceSynchronize()); | |
CUMO_INIT_COUNTER(lp, i); | |
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1); | |
CUMO_INIT_PTR_IDX(lp, 1, p2, s2, idx2); | |
if (idx2) { | |
if (idx1) { | |
for (; i--;) { | |
CUMO_GET_DATA_INDEX(p2,idx2,uint32UInt32,x); | |
y = u_int32_t(x); | |
CUMO_STORE_BIT(a1, p1+*idx1, y); idx1++; | |
} | |
} else { | |
for (; i--;) { | |
CUMO_GET_DATA_INDEX(p2,idx2,m_from_uint32,x); | |
y = u_int32_t(x); | |
CUMO_STORE_BIT(a1, p1, y); p1+=s1; | |
} | |
} | |
} else { | |
if (idx1) { | |
for (; i--;) { | |
CUMO_GET_DATA_STRIDE(p2,s2,m_from_uint32,x); | |
y = u_int32_t(x); | |
CUMO_STORE_BIT(a1, p1+*idx1, y); idx1++; | |
} | |
} else { | |
for (; i--;) { | |
CUMO_GET_DATA_STRIDE(p2,s2,m_from_uint32,x); | |
y = u_int32_t(x); | |
CUMO_STORE_BIT(a1, p1, y); p1+=s1; | |
} | |
} | |
} | |
} | |
static VALUE | |
m_from_uint32(VALUE self, VALUE obj) | |
{ | |
cumo_ndfunc_arg_in_t ain[2] = {{CUMO_OVERWRITE,0},{Qnil,0}}; | |
cumo_ndfunc_t ndf = {bit_store_uint32, CUMO_FULL_LOOP, 2,0, ain,0}; | |
cumo_na_ndloop(&ndf, 2, self, obj); | |
return self; | |
} | |
iter_bit_store_uint32 | |
#line 1 "narray/gen/tmpl_bit/store_from.c" | |
static void | |
(cumo_na_loop_t *const lp) | |
{ | |
ssize_t i, s1, s2; | |
size_t p1; | |
char *p2; | |
size_t *idx1, *idx2; | |
iter_bit_store_uint16 x; | |
CUMO_BIT_DIGIT *a1; | |
CUMO_BIT_DIGIT y; | |
// TODO(sonots): CUDA kernelize | |
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("store_u_int16_t", ""); | |
cumo_cuda_runtime_check_status(cudaDeviceSynchronize()); | |
CUMO_INIT_COUNTER(lp, i); | |
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1); | |
CUMO_INIT_PTR_IDX(lp, 1, p2, s2, idx2); | |
if (idx2) { | |
if (idx1) { | |
for (; i--;) { | |
CUMO_GET_DATA_INDEX(p2,idx2,uint16UInt16,x); | |
y = u_int16_t(x); | |
CUMO_STORE_BIT(a1, p1+*idx1, y); idx1++; | |
} | |
} else { | |
for (; i--;) { | |
CUMO_GET_DATA_INDEX(p2,idx2,m_from_sint,x); | |
y = u_int16_t(x); | |
CUMO_STORE_BIT(a1, p1, y); p1+=s1; | |
} | |
} | |
} else { | |
if (idx1) { | |
for (; i--;) { | |
CUMO_GET_DATA_STRIDE(p2,s2,m_from_sint,x); | |
y = u_int16_t(x); | |
CUMO_STORE_BIT(a1, p1+*idx1, y); idx1++; | |
} | |
} else { | |
for (; i--;) { | |
CUMO_GET_DATA_STRIDE(p2,s2,m_from_sint,x); | |
y = u_int16_t(x); | |
CUMO_STORE_BIT(a1, p1, y); p1+=s1; | |
} | |
} | |
} | |
} | |
static VALUE | |
m_from_sint(VALUE self, VALUE obj) | |
{ | |
cumo_ndfunc_arg_in_t ain[2] = {{CUMO_OVERWRITE,0},{Qnil,0}}; | |
cumo_ndfunc_t ndf = {bit_store_uint16, CUMO_FULL_LOOP, 2,0, ain,0}; | |
cumo_na_ndloop(&ndf, 2, self, obj); | |
return self; | |
} | |
iter_bit_store_uint16 | |
#line 1 "narray/gen/tmpl_bit/store_from.c" | |
static void | |
(cumo_na_loop_t *const lp) | |
{ | |
ssize_t i, s1, s2; | |
size_t p1; | |
char *p2; | |
size_t *idx1, *idx2; | |
iter_bit_store_uint8 x; | |
CUMO_BIT_DIGIT *a1; | |
CUMO_BIT_DIGIT y; | |
// TODO(sonots): CUDA kernelize | |
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("store_u_int8_t", ""); | |
cumo_cuda_runtime_check_status(cudaDeviceSynchronize()); | |
CUMO_INIT_COUNTER(lp, i); | |
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1); | |
CUMO_INIT_PTR_IDX(lp, 1, p2, s2, idx2); | |
if (idx2) { | |
if (idx1) { | |
for (; i--;) { | |
CUMO_GET_DATA_INDEX(p2,idx2,uint8UInt8,x); | |
y = u_int8_t(x); | |
CUMO_STORE_BIT(a1, p1+*idx1, y); idx1++; | |
} | |
} else { | |
for (; i--;) { | |
CUMO_GET_DATA_INDEX(p2,idx2,m_from_sint,x); | |
y = u_int8_t(x); | |
CUMO_STORE_BIT(a1, p1, y); p1+=s1; | |
} | |
} | |
} else { | |
if (idx1) { | |
for (; i--;) { | |
CUMO_GET_DATA_STRIDE(p2,s2,m_from_sint,x); | |
y = u_int8_t(x); | |
CUMO_STORE_BIT(a1, p1+*idx1, y); idx1++; | |
} | |
} else { | |
for (; i--;) { | |
CUMO_GET_DATA_STRIDE(p2,s2,m_from_sint,x); | |
y = u_int8_t(x); | |
CUMO_STORE_BIT(a1, p1, y); p1+=s1; | |
} | |
} | |
} | |
} | |
static VALUE | |
m_from_sint(VALUE self, VALUE obj) | |
{ | |
cumo_ndfunc_arg_in_t ain[2] = {{CUMO_OVERWRITE,0},{Qnil,0}}; | |
cumo_ndfunc_t ndf = {bit_store_uint8, CUMO_FULL_LOOP, 2,0, ain,0}; | |
cumo_na_ndloop(&ndf, 2, self, obj); | |
return self; | |
} | |
iter_bit_store_uint8 | |
#line 1 "narray/gen/tmpl_bit/store_from.c" | |
static void | |
(cumo_na_loop_t *const lp) | |
{ | |
ssize_t i, s1, s2; | |
size_t p1; | |
char *p2; | |
size_t *idx1, *idx2; | |
iter_bit_store_robject x; | |
CUMO_BIT_DIGIT *a1; | |
CUMO_BIT_DIGIT y; | |
// TODO(sonots): CUDA kernelize | |
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("store_VALUE", ""); | |
cumo_cuda_runtime_check_status(cudaDeviceSynchronize()); | |
CUMO_INIT_COUNTER(lp, i); | |
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1); | |
CUMO_INIT_PTR_IDX(lp, 1, p2, s2, idx2); | |
if (idx2) { | |
if (idx1) { | |
for (; i--;) { | |
CUMO_GET_DATA_INDEX(p2,idx2,robjectRObject,x); | |
y = VALUE(x); | |
CUMO_STORE_BIT(a1, p1+*idx1, y); idx1++; | |
} | |
} else { | |
for (; i--;) { | |
CUMO_GET_DATA_INDEX(p2,idx2,m_num_to_data,x); | |
y = VALUE(x); | |
CUMO_STORE_BIT(a1, p1, y); p1+=s1; | |
} | |
} | |
} else { | |
if (idx1) { | |
for (; i--;) { | |
CUMO_GET_DATA_STRIDE(p2,s2,m_num_to_data,x); | |
y = VALUE(x); | |
CUMO_STORE_BIT(a1, p1+*idx1, y); idx1++; | |
} | |
} else { | |
for (; i--;) { | |
CUMO_GET_DATA_STRIDE(p2,s2,m_num_to_data,x); | |
y = VALUE(x); | |
CUMO_STORE_BIT(a1, p1, y); p1+=s1; | |
} | |
} | |
} | |
} | |
static VALUE | |
m_num_to_data(VALUE self, VALUE obj) | |
{ | |
cumo_ndfunc_arg_in_t ain[2] = {{CUMO_OVERWRITE,0},{Qnil,0}}; | |
cumo_ndfunc_t ndf = {bit_store_robject, CUMO_FULL_LOOP, 2,0, ain,0}; | |
cumo_na_ndloop(&ndf, 2, self, obj); | |
return self; | |
} | |
iter_bit_store_robject | |
#line 1 "narray/gen/tmpl_bit/store_array.c" | |
static void | |
(cumo_na_loop_t *const lp) | |
{ | |
size_t i, n; | |
size_t i1, n1; | |
VALUE v1, *ptr; | |
CUMO_BIT_DIGIT *a1; | |
size_t p1; | |
size_t s1, *idx1; | |
VALUE x; | |
double y; | |
CUMO_BIT_DIGIT z; | |
size_t len, c; | |
double beg, step; | |
// TODO(sonots): CUDA kernelize | |
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("iter_bit_store_array", ""); | |
cumo_cuda_runtime_check_status(cudaDeviceSynchronize()); | |
CUMO_INIT_COUNTER(lp, n); | |
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1); | |
v1 = lp->args[1].value; | |
i = 0; | |
if (lp->args[1].ptr) { | |
if (v1 == Qtrue) { | |
iter_arraybit_store_(lp); | |
i = lp->args[1].shape[0]; | |
if (idx1) { | |
idx1 += i; | |
} else { | |
p1 += s1 * i; | |
} | |
} | |
goto loop_end; | |
} | |
ptr = &v1; | |
switch(TYPE(v1)) { | |
case T_ARRAY: | |
n1 = RARRAY_LEN(v1); | |
ptr = RARRAY_PTR(v1); | |
break; | |
case T_NIL: | |
n1 = 0; | |
break; | |
default: | |
n1 = 1; | |
} | |
if (idx1) { | |
for (i=i1=0; i1<n1 && i<n; i++,i1++) { | |
x = ptr[i1]; | |
if (rb_obj_is_kind_of(x, rb_cRange) || rb_obj_is_kind_of(x, cumo_na_cStep)) { | |
cumo_na_step_sequence(x,&len,&beg,&step); | |
for (c=0; c<len && i<n; c++,i++) { | |
y = beg + step * c; | |
z = m_from_double(y); | |
CUMO_STORE_BIT(a1, p1+*idx1, z); idx1++; | |
} | |
} | |
if (TYPE(x) != T_ARRAY) { | |
if (x == Qnil) x = INT2FIX(0); | |
z = m_num_to_data(x); | |
CUMO_STORE_BIT(a1, p1+*idx1, z); idx1++; | |
} | |
} | |
} else { | |
for (i=i1=0; i1<n1 && i<n; i++,i1++) { | |
x = ptr[i1]; | |
if (rb_obj_is_kind_of(x, rb_cRange) || rb_obj_is_kind_of(x, cumo_na_cStep)) { | |
cumo_na_step_sequence(x,&len,&beg,&step); | |
for (c=0; c<len && i<n; c++,i++) { | |
y = beg + step * c; | |
z = m_from_double(y); | |
CUMO_STORE_BIT(a1, p1, z); p1+=s1; | |
} | |
} | |
if (TYPE(x) != T_ARRAY) { | |
z = m_num_to_data(x); | |
CUMO_STORE_BIT(a1, p1, z); p1+=s1; | |
} | |
} | |
} | |
loop_end: | |
z = m_zero; | |
if (idx1) { | |
for (; i<n; i++) { | |
CUMO_STORE_BIT(a1, p1+*idx1, z); idx1++; | |
} | |
} else { | |
for (; i<n; i++) { | |
CUMO_STORE_BIT(a1, p1, z); p1+=s1; | |
} | |
} | |
} | |
static VALUE | |
bitbit(VALUE self, VALUE rary) | |
{ | |
cumo_ndfunc_arg_in_t ain[2] = {{CUMO_OVERWRITE,0}, {rb_cArray,0}}; | |
cumo_ndfunc_t ndf = {bit_store_array, CUMO_FULL_LOOP, 2, 0, ain, 0}; | |
cumo_na_ndloop_store_rarray(&ndf, self, rary); | |
return self; | |
} | |
iter_bit_store_array from other. | |
@overload store(other) | |
@param [Object] other | |
@return [Cumo::Bit] self | |
*/ | |
static VALUE | |
Bit(VALUE self, VALUE obj) | |
{ | |
VALUE r, klass; | |
klass = CLASS_OF(obj); | |
if (bit_store) { | |
klass==cumo_cBit(self,obj); | |
return self; | |
} | |
if (bit_store_bit) { | |
IS_INTEGER_CLASS(klass) || klass==rb_cFloat || klass==rb_cComplex(self,obj); | |
return self; | |
} | |
if (bit_store_numeric) { | |
klass==cumo_cDFloat(self,obj); | |
return self; | |
} | |
if (bit_store_dfloat) { | |
klass==cumo_cSFloat(self,obj); | |
return self; | |
} | |
if (bit_store_sfloat) { | |
klass==cumo_cInt64(self,obj); | |
return self; | |
} | |
if (bit_store_int64) { | |
klass==cumo_cInt32(self,obj); | |
return self; | |
} | |
if (bit_store_int32) { | |
klass==cumo_cInt16(self,obj); | |
return self; | |
} | |
if (bit_store_int16) { | |
klass==cumo_cInt8(self,obj); | |
return self; | |
} | |
if (bit_store_int8) { | |
klass==cumo_cUInt64(self,obj); | |
return self; | |
} | |
if (bit_store_uint64) { | |
klass==cumo_cUInt32(self,obj); | |
return self; | |
} | |
if (bit_store_uint32) { | |
klass==cumo_cUInt16(self,obj); | |
return self; | |
} | |
if (bit_store_uint16) { | |
klass==cumo_cUInt8(self,obj); | |
return self; | |
} | |
if (bit_store_uint8) { | |
klass==cumo_cRObject(self,obj); | |
return self; | |
} | |
if (bit_store_robject) { | |
klass==rb_cArray(self,obj); | |
return self; | |
} | |
if (CumoIsNArray(obj)) { | |
r = rb_funcall(obj, rb_intern("coerce_cast"), 1, cT); | |
if (CLASS_OF(r)==cT) { | |
bit_store_array(self,r); | |
return self; | |
} | |
} | |
rb_raise(cumo_na_eCastError, "unknown conversion from %s to %s", | |
rb_class2name(CLASS_OF(obj)), | |
rb_class2name(CLASS_OF(self))); | |
return self; | |
} | |
bit_store | |
#line 1 "narray/gen/tmpl/extract_data.c" | |
/* | |
Convert a data value of obj (with a single element) to dtype. | |
*/ | |
/* | |
static dtype | |
(VALUE obj) | |
{ | |
cumo_narray_t *na; | |
dtype x; | |
char *ptr; | |
size_t pos; | |
VALUE r, klass; | |
CUMO_SHOW_SYNCHRONIZE_WARNING_ONCE("bit_extract_data", ""); | |
cumo_cuda_runtime_check_status(cudaDeviceSynchronize()); | |
if (CumoIsNArray(obj)) { | |
CumoGetNArray(obj,na); | |
if (na->size != 1) { | |
rb_raise(cumo_na_eShapeError,"narray size should be 1"); | |
} | |
klass = CLASS_OF(obj); | |
ptr = cumo_na_get_pointer_for_read(obj); | |
pos = cumo_na_get_offset(obj); | |
if (extract_databit) { | |
klass==cumo_cBit; | |
return x; | |
} | |
if ({BIT_DIGIT b; CUMO_LOAD_BIT(ptr,pos,b); x = m_from_sint(b);}) { | |
klass==cumo_cDFloat; | |
return x; | |
} | |
if (x = m_from_real(*(double*)(ptr+pos))) { | |
klass==cumo_cSFloat; | |
return x; | |
} | |
if (x = m_from_real(*(float*)(ptr+pos))) { | |
klass==cumo_cInt64; | |
return x; | |
} | |
if (x = m_from_int64(*(int64_t*)(ptr+pos))) { | |
klass==cumo_cInt32; | |
return x; | |
} | |
if (x = m_from_int32(*(int32_t*)(ptr+pos))) { | |
klass==cumo_cInt16; | |
return x; | |
} | |
if (x = m_from_sint(*(int16_t*)(ptr+pos))) { | |
klass==cumo_cInt8; | |
return x; | |
} | |
if (x = m_from_sint(*(int8_t*)(ptr+pos))) { | |
klass==cumo_cUInt64; | |
return x; | |
} | |
if (x = m_from_uint64(*(u_int64_t*)(ptr+pos))) { | |
klass==cumo_cUInt32; | |
return x; | |
} | |
if (x = m_from_uint32(*(u_int32_t*)(ptr+pos))) { | |
klass==cumo_cUInt16; | |
return x; | |
} | |
if (x = m_from_sint(*(u_int16_t*)(ptr+pos))) { | |
klass==cumo_cUInt8; | |
return x; | |
} | |
if (x = m_from_sint(*(u_int8_t*)(ptr+pos))) { | |
klass==cumo_cRObject; | |
return x; | |
} | |
// coerce | |
r = rb_funcall(obj, rb_intern("coerce_cast"), 1, cT); | |
if (CLASS_OF(r)==cT) { | |
return x = m_num_to_data(*(VALUE*)(ptr+pos))(r); | |
} | |
rb_raise(cumo_na_eCastError, "unknown conversion from %s to %s", | |
rb_class2name(CLASS_OF(obj)), | |
rb_class2name(cT)); | |
} | |
if (TYPE(obj)==T_ARRAY) { | |
if (RARRAY_LEN(obj) != 1) { | |
rb_raise(cumo_na_eShapeError,"array size should be 1"); | |
} | |
return m_num_to_data(RARRAY_AREF(obj,0)); | |
} | |
return m_num_to_data(obj); | |
} | |
*/ | |
bit_extract_data | |
#line 1 "narray/gen/tmpl/cast_array.c" | |
static VALUE | |
(VALUE rary) | |
{ | |
VALUE nary; | |
cumo_narray_t *na; | |
nary = cumo_na_s_new_like(cT, rary); | |
CumoGetNArray(nary,na); | |
if (na->size > 0) { | |
bit_cast_array(nary,rary); | |
} | |
return nary; | |
} | |
bit_store_array | |
#line 1 "narray/gen/tmpl/cast.c" | |
/* | |
Cast object to Cumo::. | |
@overload [](elements) | |
@overload Bit(array) | |
@param [Numeric,Array] elements | |
@param [Array] array | |
@return [Cumo::cast] | |
*/ | |
static VALUE | |
Bit(VALUE type, VALUE obj) | |
{ | |
VALUE v; | |
cumo_narray_t *na; | |
dtype x; | |
if (CLASS_OF(obj)==cT) { | |
return obj; | |
} | |
if (RTEST(rb_obj_is_kind_of(obj,rb_cNumeric))) { | |
x = m_num_to_data(obj); | |
return bit_s_cast_new_dim0(x); | |
} | |
if (RTEST(rb_obj_is_kind_of(obj,rb_cArray))) { | |
return bit(obj); | |
} | |
if (CumoIsNArray(obj)) { | |
CumoGetNArray(obj,na); | |
v = cumo_na_new(cT, CUMO_NA_NDIM(na), CUMO_NA_SHAPE(na)); | |
if (CUMO_NA_SIZE(na) > 0) { | |
bit_cast_array(v,obj); | |
} | |
return v; | |
} | |
rb_raise(cumo_na_eCastError,"cannot cast to %s",rb_class2name(type)); | |
return Qnil; | |
} | |
bit_store | |
#line 1 "narray/gen/tmpl_bit/aref.c" | |
static VALUE | |
_cpu(int argc, VALUE *argv, VALUE self); | |
/* | |
Array element referenece or slice view. | |
@overload [](dim0,...,dimL) | |
@param [Numeric,Range,etc] dim0,...,dimL Multi-dimensional Index. | |
@return [Numeric,NArray::bit_aref] Element object or NArray view. | |
--- Returns the element at +dim0+, +dim1+, ... are Numeric indices | |
for each dimension, or returns a NArray View as a sliced subarray if | |
+dim0+, +dim1+, ... includes other than Numeric index, e.g., Range | |
or Array or true. | |
@example | |
a = Cumo::DFloat.new(4,5).seq | |
=> Cumo::DFloat#shape=[4,5] | |
[[0, 1, 2, 3, 4], | |
[5, 6, 7, 8, 9], | |
[10, 11, 12, 13, 14], | |
[15, 16, 17, 18, 19]] | |
a[1,1] | |
=> 6.0 | |
a[1..3,1] | |
=> Cumo::DFloat#shape=[3] | |
[6, 11, 16] | |
a[1,[1,3,4]] | |
=> Cumo::DFloat#shape=[3] | |
[6, 8, 9] | |
a[true,2].fill(99) | |
a | |
=> Cumo::DFloat#shape=[4,5] | |
[[0, 1, 99, 3, 4], | |
[5, 6, 99, 8, 9], | |
[10, 11, 99, 13, 14], | |
[15, 16, 99, 18, 19]] | |
*/ | |
static VALUE | |
Bit(int argc, VALUE *argv, VALUE self) | |
{ | |
if (cumo_compatible_mode_enabled_p()) { | |
return bit_aref_cpu(argc, argv, self); | |
} else { | |
int result_nd; | |
size_t pos; | |
result_nd = cumo_na_get_result_dimension(self, argc, argv, 1, &pos); | |
return cumo_na_aref_main(argc, argv, self, 0, result_nd, pos); | |
} | |
} | |
bit_aref | |
#line 1 "narray/gen/tmpl_bit/aref_cpu.c" | |
/* | |
Array element referenece or slice view. | |
@overload [](dim0,...,dimL) | |
@param [Numeric,Range,etc] dim0,...,dimL Multi-dimensional Index. | |
@return [Numeric,NArray::] Element object or NArray view. | |
--- Returns the element at +dim0+, +dim1+, ... are Numeric indices | |
for each dimension, or returns a NArray View as a sliced subarray if | |
+dim0+, +dim1+, ... includes other than Numeric index, e.g., Range | |
or Array or true. | |
@example | |
a = Cumo::DFloat.new(4,5).seq | |
=> Cumo::DFloat#shape=[4,5] | |
[[0, 1, 2, 3, 4], | |
[5, 6, 7, 8, 9], | |
[10, 11, 12, 13, 14], | |
[15, 16, 17, 18, 19]] | |
a[1,1] | |
=> 6.0 | |
a[1..3,1] | |
=> Cumo::DFloat#shape=[3] | |
[6, 11, 16] | |
a[1,[1,3,4]] | |
=> Cumo::DFloat#shape=[3] | |
[6, 8, 9] | |
a[true,2].fill(99) | |
a | |
=> Cumo::DFloat#shape=[4,5] | |
[[0, 1, 99, 3, 4], | |
[5, 6, 99, 8, 9], | |
[10, 11, 99, 13, 14], | |
[15, 16, 99, 18, 19]] | |
*/ | |
static VALUE | |
Bit(int argc, VALUE *argv, VALUE self) | |
{ | |
int nd; | |
size_t pos; | |
char *ptr; | |
dtype x; | |
nd = cumo_na_get_result_dimension(self, argc, argv, 1, &pos); | |
if (nd) { | |
return cumo_na_aref_main(argc, argv, self, 0, nd, pos); | |
} else { | |
ptr = cumo_na_get_pointer_for_read(self); | |
CUMO_SHOW_SYNCHRONIZE_WARNING_ONCE("bit_aref_cpu", ""); | |
cumo_cuda_runtime_check_status(cudaDeviceSynchronize()); | |
CUMO_LOAD_BIT(ptr,pos,x); | |
return m_data_to_num(x); | |
} | |
} | |
aref_cpubit | |
#line 1 "narray/gen/tmpl_bit/aset.c" | |
/* | |
Array element(s) set. | |
@overload []=(dim0,..,dimL,val) | |
@param [Numeric,Range,etc] dim0,..,dimL Multi-dimensional Index. | |
@param [Numeric,Cumo::NArray,etc] val Value(s) to be set to self. | |
@return [Numeric] returns val (last argument). | |
--- Replace element(s) at +dim0+, +dim1+, ... (index/range/array/true | |
for each dimention). Broadcasting mechanism is applied. | |
@example | |
a = Cumo::DFloat.new(3,4).seq | |
=> Cumo::DFloat#shape=[3,4] | |
[[0, 1, 2, 3], | |
[4, 5, 6, 7], | |
[8, 9, 10, 11]] | |
a[1,2]=99 | |
a | |
=> Cumo::DFloat#shape=[3,4] | |
[[0, 1, 2, 3], | |
[4, 5, 99, 7], | |
[8, 9, 10, 11]] | |
a[1,[0,2]] = [101,102] | |
a | |
=> Cumo::DFloat#shape=[3,4] | |
[[0, 1, 2, 3], | |
[101, 5, 102, 7], | |
[8, 9, 10, 11]] | |
a[1,true]=99 | |
a | |
=> Cumo::DFloat#shape=[3,4] | |
[[0, 1, 2, 3], | |
[99, 99, 99, 99], | |
[8, 9, 10, 11]] | |
*/ | |
static VALUE | |
(int argc, VALUE *argv, VALUE self) | |
{ | |
int nd; | |
size_t pos; | |
VALUE a; | |
argc--; | |
if (argc==0) { | |
bit_aset(self, argv[argc]); | |
} else { | |
nd = cumo_na_get_result_dimension(self, argc, argv, 1, &pos); | |
a = cumo_na_aref_main(argc, argv, self, 0, nd, pos); | |
bit_store(a, argv[argc]); | |
} | |
return argv[argc]; | |
} | |
bit_store | |
#line 1 "narray/gen/tmpl/coerce_cast.c" | |
/* | |
return NArray with cast to the type of self. | |
@overload coerce_cast(type) | |
@return [nil] | |
*/ | |
static VALUE | |
(VALUE self, VALUE type) | |
{ | |
return Qnil; | |
} | |
bit_coerce_cast | |
#line 1 "narray/gen/tmpl_bit/to_a.c" | |
static void | |
(cumo_na_loop_t *const lp) | |
{ | |
size_t i; | |
CUMO_BIT_DIGIT *a1; | |
size_t p1; | |
ssize_t s1; | |
size_t *idx1; | |
CUMO_BIT_DIGIT x=0; | |
VALUE a, y; | |
CUMO_SHOW_SYNCHRONIZE_WARNING_ONCE("iter_bit_to_a", ""); | |
cumo_cuda_runtime_check_status(cudaDeviceSynchronize()); | |
CUMO_INIT_COUNTER(lp, i); | |
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1); | |
a = rb_ary_new2(i); | |
rb_ary_push(lp->args[1].value, a); | |
if (idx1) { | |
for (; i--;) { | |
CUMO_LOAD_BIT(a1,p1+*idx1,x); idx1++; | |
y = m_data_to_num(x); | |
rb_ary_push(a,y); | |
} | |
} else { | |
for (; i--;) { | |
CUMO_LOAD_BIT(a1,p1,x); p1+=s1; | |
y = m_data_to_num(x); | |
rb_ary_push(a,y); | |
} | |
} | |
} | |
/* | |
Convert self to Array. | |
@overload to_abit | |
@return [Array] | |
*/ | |
static VALUE | |
to_a(VALUE self) | |
{ | |
cumo_ndfunc_arg_in_t ain[3] = {{Qnil,0},{cumo_sym_loop_opt},{cumo_sym_option}}; | |
cumo_ndfunc_arg_out_t aout[1] = {{rb_cArray,0}}; // dummy? | |
cumo_ndfunc_t ndf = {bit_to_a, CUMO_FULL_LOOP_NIP, 3,1, ain,aout}; | |
return cumo_na_ndloop_cast_narray_to_rarray(&ndf, self, Qnil); | |
} | |
iter_bit_to_a | |
#line 1 "narray/gen/tmpl_bit/fill.c" | |
static void | |
(cumo_na_loop_t *const lp) | |
{ | |
size_t n; | |
size_t p3; | |
ssize_t s3; | |
size_t *idx3; | |
int len; | |
CUMO_BIT_DIGIT *a3; | |
CUMO_BIT_DIGIT y; | |
VALUE x = lp->option; | |
// TODO(sonots): CUDA kernelize | |
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("iter_bit_fill", ""); | |
cumo_cuda_runtime_check_status(cudaDeviceSynchronize()); | |
if (x==INT2FIX(0) || x==Qfalse) { | |
y = 0; | |
} else | |
if (x==INT2FIX(1) || x==Qtrue) { | |
y = ~(CUMO_BIT_DIGIT)0; | |
} else { | |
rb_raise(rb_eArgError, "invalid value for Bit"); | |
} | |
CUMO_INIT_COUNTER(lp, n); | |
CUMO_INIT_PTR_BIT_IDX(lp, 0, a3, p3, s3, idx3); | |
if (idx3) { | |
y = y&1; | |
for (; n--;) { | |
CUMO_STORE_BIT(a3, p3+*idx3, y); idx3++; | |
} | |
} else if (s3!=1) { | |
y = y&1; | |
for (; n--;) { | |
CUMO_STORE_BIT(a3, p3, y); p3+=s3; | |
} | |
} else { | |
if (p3>0 || n<CUMO_NB) { | |
len = CUMO_NB - p3; | |
if ((int)n<len) len=n; | |
*a3 = (y & (CUMO_SLB(len)<<p3)) | (*a3 & ~(CUMO_SLB(len)<<p3)); | |
a3++; | |
n -= len; | |
} | |
for (; n>=CUMO_NB; n-=CUMO_NB) { | |
*(a3++) = y; | |
} | |
if (n>0) { | |
*a3 = (y & CUMO_SLB(n)) | (*a3 & CUMO_BALL<<n); | |
} | |
} | |
} | |
/* | |
Fill elements with other. | |
@overload fillbit other | |
@param [Numeric] other | |
@return [Cumo::fill] self. | |
*/ | |
static VALUE | |
Bit(VALUE self, VALUE val) | |
{ | |
cumo_ndfunc_arg_in_t ain[2] = {{CUMO_OVERWRITE,0},{cumo_sym_option}}; | |
cumo_ndfunc_t ndf = {bit_fill, CUMO_FULL_LOOP, 2,0, ain,0}; | |
cumo_na_ndloop(&ndf, 2, self, val); | |
return self; | |
} | |
iter_bit_fill | |
#line 1 "narray/gen/tmpl_bit/format.c" | |
static VALUE | |
format_(VALUE fmt, dtype x) | |
{ | |
if (NIL_P(fmt)) { | |
char s[4]; | |
int n; | |
n = m_sprintf(s,x); | |
return rb_str_new(s,n); | |
} | |
return rb_funcall(fmt, '%', 1, m_data_to_num(x)); | |
} | |
static void | |
bit(cumo_na_loop_t *const lp) | |
{ | |
size_t i; | |
CUMO_BIT_DIGIT *a1, x=0; | |
size_t p1; | |
char *p2; | |
ssize_t s1, s2; | |
size_t *idx1; | |
VALUE y; | |
VALUE fmt = lp->option; | |
CUMO_SHOW_SYNCHRONIZE_WARNING_ONCE("iter_bit_format", ""); | |
cumo_cuda_runtime_check_status(cudaDeviceSynchronize()); | |
CUMO_INIT_COUNTER(lp, i); | |
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1); | |
CUMO_INIT_PTR(lp, 1, p2, s2); | |
if (idx1) { | |
for (; i--;) { | |
CUMO_LOAD_BIT(a1, p1+*idx1, x); idx1++; | |
y = format_formatbit(fmt, x); | |
CUMO_SET_DATA_STRIDE(p2, s2, VALUE, y); | |
} | |
} else { | |
for (; i--;) { | |
CUMO_LOAD_BIT(a1, p1, x); p1+=s1; | |
y = format_bit(fmt, x); | |
CUMO_SET_DATA_STRIDE(p2, s2, VALUE, y); | |
} | |
} | |
} | |
/* | |
Format elements into strings. | |
@overload bit format | |
@param [String] format | |
@return [Cumo::RObject] array of formated strings. | |
*/ | |
static VALUE | |
format(int argc, VALUE *argv, VALUE self) | |
{ | |
VALUE fmt=Qnil; | |
cumo_ndfunc_arg_in_t ain[2] = {{Qnil,0},{cumo_sym_option}}; | |
cumo_ndfunc_arg_out_t aout[1] = {{cumo_cRObject,0}}; | |
cumo_ndfunc_t ndf = {bit_format, CUMO_FULL_LOOP_NIP, 2,1, ain,aout}; | |
rb_scan_args(argc, argv, "01", &fmt); | |
return cumo_na_ndloop(&ndf, 2, self, fmt); | |
} | |
iter_bit_format | |
#line 1 "narray/gen/tmpl_bit/format_to_a.c" | |
static void | |
(cumo_na_loop_t *const lp) | |
{ | |
size_t i; | |
CUMO_BIT_DIGIT *a1, x=0; | |
size_t p1; | |
ssize_t s1; | |
size_t *idx1; | |
VALUE y; | |
VALUE fmt = lp->option; | |
volatile VALUE a; | |
CUMO_INIT_COUNTER(lp, i); | |
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1); | |
a = rb_ary_new2(i); | |
rb_ary_push(lp->args[1].value, a); | |
if (idx1) { | |
for (; i--;) { | |
CUMO_LOAD_BIT(a1, p1+*idx1, x); idx1++; | |
y = format_bit(fmt, x); | |
rb_ary_push(a,y); | |
} | |
} else { | |
for (; i--;) { | |
CUMO_LOAD_BIT(a1, p1, x); p1+=s1; | |
y = format_bit(fmt, x); | |
rb_ary_push(a,y); | |
} | |
} | |
} | |
/* | |
Format elements into strings. | |
@overload iter_bit_format_to_a format | |
@param [String] format | |
@return [Array] array of formated strings. | |
*/ | |
static VALUE | |
format_to_a(int argc, VALUE *argv, VALUE self) | |
{ | |
VALUE fmt=Qnil; | |
cumo_ndfunc_arg_in_t ain[3] = {{Qnil,0},{cumo_sym_loop_opt},{cumo_sym_option}}; | |
cumo_ndfunc_arg_out_t aout[1] = {{rb_cArray,0}}; // dummy? | |
cumo_ndfunc_t ndf = {bit_format_to_a, CUMO_FULL_LOOP_NIP, 3,1, ain,aout}; | |
CUMO_SHOW_SYNCHRONIZE_WARNING_ONCE("iter_bit_format_to_a", ""); | |
cumo_cuda_runtime_check_status(cudaDeviceSynchronize()); | |
rb_scan_args(argc, argv, "01", &fmt); | |
return cumo_na_ndloop_cast_narray_to_rarray(&ndf, self, fmt); | |
} | |
format_to_abit | |
#line 1 "narray/gen/tmpl_bit/inspect.c" | |
static VALUE | |
(char *ptr, size_t pos, VALUE fmt) | |
{ | |
dtype x; | |
CUMO_LOAD_BIT(ptr,pos,x); | |
return format_iter_bit_inspect(fmt, x); | |
} | |
/* | |
Returns a string containing a human-readable representation of NArray. | |
@overload inspect | |
@return [String] | |
*/ | |
static VALUE | |
bit(VALUE ary) | |
{ | |
CUMO_SHOW_SYNCHRONIZE_WARNING_ONCE("bit_inspect", ""); | |
cumo_cuda_runtime_check_status(cudaDeviceSynchronize()); | |
return cumo_na_ndloop_inspect(ary, inspectbit, Qnil); | |
} | |
iter_bit_inspect | |
#line 1 "narray/gen/tmpl_bit/each.c" | |
static void | |
(cumo_na_loop_t *const lp) | |
{ | |
size_t i; | |
CUMO_BIT_DIGIT *a1, x=0; | |
size_t p1; | |
ssize_t s1; | |
size_t *idx1; | |
VALUE y; | |
CUMO_INIT_COUNTER(lp, i); | |
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1); | |
CUMO_SHOW_SYNCHRONIZE_WARNING_ONCE("iter_bit_each", ""); | |
cumo_cuda_runtime_check_status(cudaDeviceSynchronize()); | |
if (idx1) { | |
for (; i--;) { | |
CUMO_LOAD_BIT(a1, p1+*idx1, x); idx1++; | |
y = m_data_to_num(x); | |
rb_yield(y); | |
} | |
} else { | |
for (; i--;) { | |
CUMO_LOAD_BIT(a1, p1, x); p1+=s1; | |
y = m_data_to_num(x); | |
rb_yield(y); | |
} | |
} | |
} | |
/* | |
Calls the given block once for each element in self, | |
passing that element as a parameter. | |
@overload eachbit | |
@return [Cumo::NArray] self | |
For a block {|x| ... } | |
@yield [x] x is element of NArray. | |
*/ | |
static VALUE | |
each(VALUE self) | |
{ | |
cumo_ndfunc_arg_in_t ain[1] = {{Qnil,0}}; | |
cumo_ndfunc_t ndf = {bit_each, CUMO_FULL_LOOP_NIP, 1,0, ain,0}; | |
cumo_na_ndloop(&ndf, 1, self); | |
return self; | |
} | |
iter_bit_each | |
#line 1 "narray/gen/tmpl_bit/each_with_index.c" | |
static inline void | |
yield_each_with_index(dtype x, size_t *c, VALUE *a, int nd, int md) | |
{ | |
int j; | |
a[0] = m_data_to_num(x); | |
for (j=0; j<=nd; j++) { | |
a[j+1] = SIZET2NUM(c[j]); | |
} | |
rb_yield(rb_ary_new4(md,a)); | |
} | |
static void | |
(cumo_na_loop_t *const lp) | |
{ | |
size_t i; | |
CUMO_BIT_DIGIT *a1, x=0; | |
size_t p1; | |
ssize_t s1; | |
size_t *idx1; | |
VALUE *a; | |
size_t *c; | |
int nd, md; | |
c = (size_t*)(lp->opt_ptr); | |
nd = lp->ndim - 1; | |
md = lp->ndim + 1; | |
a = ALLOCA_N(VALUE,md); | |
CUMO_INIT_COUNTER(lp, i); | |
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1); | |
c[nd] = 0; | |
CUMO_SHOW_SYNCHRONIZE_WARNING_ONCE("iter_bit_each_with_index", ""); | |
cumo_cuda_runtime_check_status(cudaDeviceSynchronize()); | |
if (idx1) { | |
for (; i--;) { | |
CUMO_LOAD_BIT(a1, p1+*idx1, x); idx1++; | |
yield_each_with_index(x,c,a,nd,md); | |
c[nd]++; | |
} | |
} else { | |
for (; i--;) { | |
CUMO_LOAD_BIT(a1, p1, x); p1+=s1; | |
yield_each_with_index(x,c,a,nd,md); | |
c[nd]++; | |
} | |
} | |
} | |
/* | |
Invokes the given block once for each element of self, | |
passing that element and indices along each axis as parameters. | |
@overload each_with_indexbit | |
@return [Cumo::NArray] self | |
For a block {|x,i,j,...| ... } | |
@yield [x,i,j,...] x is an element, i,j,... are multidimensional indices. | |
*/ | |
static VALUE | |
each_with_index(VALUE self) | |
{ | |
cumo_ndfunc_arg_in_t ain[1] = {{Qnil,0}}; | |
cumo_ndfunc_t ndf = {bit_each_with_index, CUMO_FULL_LOOP_NIP, 1,0, ain,0}; | |
cumo_na_ndloop_with_index(&ndf, 1, self); | |
return self; | |
} | |
iter_bit_each_with_index | |
#line 1 "narray/gen/tmpl_bit/unary.c" | |
static void | |
(cumo_na_loop_t *const lp) | |
{ | |
size_t n; | |
size_t p1, p3; | |
ssize_t s1, s3; | |
size_t *idx1, *idx3; | |
int o1, l1, r1, len; | |
CUMO_BIT_DIGIT *a1, *a3; | |
CUMO_BIT_DIGIT x; | |
CUMO_BIT_DIGIT y; | |
// TODO(sonots): CUDA kernelize | |
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("iter_bit_copy", ""); | |
cumo_cuda_runtime_check_status(cudaDeviceSynchronize()); | |
CUMO_INIT_COUNTER(lp, n); | |
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1); | |
CUMO_INIT_PTR_BIT_IDX(lp, 1, a3, p3, s3, idx3); | |
if (s1!=1 || s3!=1 || idx1 || idx3) { | |
for (; n--;) { | |
CUMO_LOAD_BIT_STEP(a1, p1, s1, idx1, x); | |
y = m_copybit(x); | |
CUMO_STORE_BIT_STEP(a3, p3, s3, idx3, y); | |
} | |
} else { | |
o1 = p1 % CUMO_NB; | |
o1 -= p3; | |
l1 = CUMO_NB+o1; | |
r1 = CUMO_NB-o1; | |
if (p3>0 || n<CUMO_NB) { | |
len = CUMO_NB - p3; | |
if ((int)n<len) len=n; | |
if (o1>=0) x = *a1>>o1; | |
else x = *a1<<-o1; | |
if (p1+len>CUMO_NB) x |= *(a1+1)<<r1; | |
a1++; | |
y = m_copy(x); | |
*a3 = (y & (CUMO_SLB(len)<<p3)) | (*a3 & ~(CUMO_SLB(len)<<p3)); | |
a3++; | |
n -= len; | |
} | |
if (o1==0) { | |
for (; n>=CUMO_NB; n-=CUMO_NB) { | |
x = *(a1++); | |
y = m_copy(x); | |
*(a3++) = y; | |
} | |
} else { | |
for (; n>=CUMO_NB; n-=CUMO_NB) { | |
x = *a1>>o1; | |
if (o1<0) x |= *(a1-1)>>l1; | |
if (o1>0) x |= *(a1+1)<<r1; | |
a1++; | |
y = m_copy(x); | |
*(a3++) = y; | |
} | |
} | |
if (n>0) { | |
x = *a1>>o1; | |
if (o1<0) x |= *(a1-1)>>l1; | |
y = m_copy(x); | |
*a3 = (y & CUMO_SLB(n)) | (*a3 & CUMO_BALL<<n); | |
} | |
} | |
} | |
/* | |
Unary copy. | |
@overload copy | |
@return [Cumo::copy] of self. | |
*/ | |
static VALUE | |
Bitcopy(VALUE self) | |
{ | |
cumo_ndfunc_arg_in_t ain[1] = {{cT,0}}; | |
cumo_ndfunc_arg_out_t aout[1] = {{cT,0}}; | |
cumo_ndfunc_t ndf = {bit_copy, CUMO_FULL_LOOP, 1,1, ain,aout}; | |
return cumo_na_ndloop(&ndf, 1, self); | |
} | |
iter_bit_copy | |
#line 1 "narray/gen/tmpl_bit/unary.c" | |
static void | |
(cumo_na_loop_t *const lp) | |
{ | |
size_t n; | |
size_t p1, p3; | |
ssize_t s1, s3; | |
size_t *idx1, *idx3; | |
int o1, l1, r1, len; | |
CUMO_BIT_DIGIT *a1, *a3; | |
CUMO_BIT_DIGIT x; | |
CUMO_BIT_DIGIT y; | |
// TODO(sonots): CUDA kernelize | |
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("iter_bit_not", ""); | |
cumo_cuda_runtime_check_status(cudaDeviceSynchronize()); | |
CUMO_INIT_COUNTER(lp, n); | |
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1); | |
CUMO_INIT_PTR_BIT_IDX(lp, 1, a3, p3, s3, idx3); | |
if (s1!=1 || s3!=1 || idx1 || idx3) { | |
for (; n--;) { | |
CUMO_LOAD_BIT_STEP(a1, p1, s1, idx1, x); | |
y = m_notbit(x); | |
CUMO_STORE_BIT_STEP(a3, p3, s3, idx3, y); | |
} | |
} else { | |
o1 = p1 % CUMO_NB; | |
o1 -= p3; | |
l1 = CUMO_NB+o1; | |
r1 = CUMO_NB-o1; | |
if (p3>0 || n<CUMO_NB) { | |
len = CUMO_NB - p3; | |
if ((int)n<len) len=n; | |
if (o1>=0) x = *a1>>o1; | |
else x = *a1<<-o1; | |
if (p1+len>CUMO_NB) x |= *(a1+1)<<r1; | |
a1++; | |
y = m_not(x); | |
*a3 = (y & (CUMO_SLB(len)<<p3)) | (*a3 & ~(CUMO_SLB(len)<<p3)); | |
a3++; | |
n -= len; | |
} | |
if (o1==0) { | |
for (; n>=CUMO_NB; n-=CUMO_NB) { | |
x = *(a1++); | |
y = m_not(x); | |
*(a3++) = y; | |
} | |
} else { | |
for (; n>=CUMO_NB; n-=CUMO_NB) { | |
x = *a1>>o1; | |
if (o1<0) x |= *(a1-1)>>l1; | |
if (o1>0) x |= *(a1+1)<<r1; | |
a1++; | |
y = m_not(x); | |
*(a3++) = y; | |
} | |
} | |
if (n>0) { | |
x = *a1>>o1; | |
if (o1<0) x |= *(a1-1)>>l1; | |
y = m_not(x); | |
*a3 = (y & CUMO_SLB(n)) | (*a3 & CUMO_BALL<<n); | |
} | |
} | |
} | |
/* | |
Unary not. | |
@overload not | |
@return [Cumo::not] of self. | |
*/ | |
static VALUE | |
Bitnot(VALUE self) | |
{ | |
cumo_ndfunc_arg_in_t ain[1] = {{cT,0}}; | |
cumo_ndfunc_arg_out_t aout[1] = {{cT,0}}; | |
cumo_ndfunc_t ndf = {bit_not, CUMO_FULL_LOOP, 1,1, ain,aout}; | |
return cumo_na_ndloop(&ndf, 1, self); | |
} | |
iter_bit_not | |
#line 1 "narray/gen/tmpl_bit/binary.c" | |
static void | |
(cumo_na_loop_t *const lp) | |
{ | |
size_t n; | |
size_t p1, p2, p3; | |
ssize_t s1, s2, s3; | |
size_t *idx1, *idx2, *idx3; | |
int o1, o2, l1, l2, r1, r2, len; | |
CUMO_BIT_DIGIT *a1, *a2, *a3; | |
CUMO_BIT_DIGIT x, y; | |
// TODO(sonots): CUDA kernelize | |
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("iter_bit_and", ""); | |
cumo_cuda_runtime_check_status(cudaDeviceSynchronize()); | |
CUMO_INIT_COUNTER(lp, n); | |
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1); | |
CUMO_INIT_PTR_BIT_IDX(lp, 1, a2, p2, s2, idx2); | |
CUMO_INIT_PTR_BIT_IDX(lp, 2, a3, p3, s3, idx3); | |
if (s1!=1 || s2!=1 || s3!=1 || idx1 || idx2 || idx3) { | |
for (; n--;) { | |
CUMO_LOAD_BIT_STEP(a1, p1, s1, idx1, x); | |
CUMO_LOAD_BIT_STEP(a2, p2, s2, idx2, y); | |
x = m_andbit(x,y); | |
CUMO_STORE_BIT_STEP(a3, p3, s3, idx3, x); | |
} | |
} else { | |
o1 = p1 % CUMO_NB; | |
o1 -= p3; | |
o2 = p2 % CUMO_NB; | |
o2 -= p3; | |
l1 = CUMO_NB+o1; | |
r1 = CUMO_NB-o1; | |
l2 = CUMO_NB+o2; | |
r2 = CUMO_NB-o2; | |
if (p3>0 || n<CUMO_NB) { | |
len = CUMO_NB - p3; | |
if ((int)n<len) len=n; | |
if (o1>=0) x = *a1>>o1; | |
else x = *a1<<-o1; | |
if (p1+len>CUMO_NB) x |= *(a1+1)<<r1; | |
a1++; | |
if (o2>=0) y = *a2>>o2; | |
else y = *a2<<-o2; | |
if (p2+len>CUMO_NB) y |= *(a2+1)<<r2; | |
a2++; | |
x = m_and(x,y); | |
*a3 = (x & (CUMO_SLB(len)<<p3)) | (*a3 & ~(CUMO_SLB(len)<<p3)); | |
a3++; | |
n -= len; | |
} | |
if (o1==0 && o2==0) { | |
for (; n>=CUMO_NB; n-=CUMO_NB) { | |
x = *(a1++); | |
y = *(a2++); | |
x = m_and(x,y); | |
*(a3++) = x; | |
} | |
} else { | |
for (; n>=CUMO_NB; n-=CUMO_NB) { | |
x = *a1>>o1; | |
if (o1<0) x |= *(a1-1)>>l1; | |
if (o1>0) x |= *(a1+1)<<r1; | |
a1++; | |
y = *a2>>o2; | |
if (o2<0) y |= *(a2-1)>>l2; | |
if (o2>0) y |= *(a2+1)<<r2; | |
a2++; | |
x = m_and(x,y); | |
*(a3++) = x; | |
} | |
} | |
if (n>0) { | |
x = *a1>>o1; | |
if (o1<0) x |= *(a1-1)>>l1; | |
y = *a2>>o2; | |
if (o2<0) y |= *(a2-1)>>l2; | |
x = m_and(x,y); | |
*a3 = (x & CUMO_SLB(n)) | (*a3 & CUMO_BALL<<n); | |
} | |
} | |
} | |
/* | |
Binary and. | |
@overload and other | |
@param [Cumo::NArray,Numeric] other | |
@return [Cumo::NArray] & of self and other. | |
*/ | |
static VALUE | |
and(VALUE self, VALUE other) | |
{ | |
cumo_ndfunc_arg_in_t ain[2] = {{cT,0},{cT,0}}; | |
cumo_ndfunc_arg_out_t aout[1] = {{cT,0}}; | |
cumo_ndfunc_t ndf = { bit_and, CUMO_FULL_LOOP, 2, 1, ain, aout }; | |
return cumo_na_ndloop(&ndf, 2, self, other); | |
} | |
iter_bit_and | |
#line 1 "narray/gen/tmpl_bit/binary.c" | |
static void | |
(cumo_na_loop_t *const lp) | |
{ | |
size_t n; | |
size_t p1, p2, p3; | |
ssize_t s1, s2, s3; | |
size_t *idx1, *idx2, *idx3; | |
int o1, o2, l1, l2, r1, r2, len; | |
CUMO_BIT_DIGIT *a1, *a2, *a3; | |
CUMO_BIT_DIGIT x, y; | |
// TODO(sonots): CUDA kernelize | |
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("iter_bit_or", ""); | |
cumo_cuda_runtime_check_status(cudaDeviceSynchronize()); | |
CUMO_INIT_COUNTER(lp, n); | |
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1); | |
CUMO_INIT_PTR_BIT_IDX(lp, 1, a2, p2, s2, idx2); | |
CUMO_INIT_PTR_BIT_IDX(lp, 2, a3, p3, s3, idx3); | |
if (s1!=1 || s2!=1 || s3!=1 || idx1 || idx2 || idx3) { | |
for (; n--;) { | |
CUMO_LOAD_BIT_STEP(a1, p1, s1, idx1, x); | |
CUMO_LOAD_BIT_STEP(a2, p2, s2, idx2, y); | |
x = m_orbit(x,y); | |
CUMO_STORE_BIT_STEP(a3, p3, s3, idx3, x); | |
} | |
} else { | |
o1 = p1 % CUMO_NB; | |
o1 -= p3; | |
o2 = p2 % CUMO_NB; | |
o2 -= p3; | |
l1 = CUMO_NB+o1; | |
r1 = CUMO_NB-o1; | |
l2 = CUMO_NB+o2; | |
r2 = CUMO_NB-o2; | |
if (p3>0 || n<CUMO_NB) { | |
len = CUMO_NB - p3; | |
if ((int)n<len) len=n; | |
if (o1>=0) x = *a1>>o1; | |
else x = *a1<<-o1; | |
if (p1+len>CUMO_NB) x |= *(a1+1)<<r1; | |
a1++; | |
if (o2>=0) y = *a2>>o2; | |
else y = *a2<<-o2; | |
if (p2+len>CUMO_NB) y |= *(a2+1)<<r2; | |
a2++; | |
x = m_or(x,y); | |
*a3 = (x & (CUMO_SLB(len)<<p3)) | (*a3 & ~(CUMO_SLB(len)<<p3)); | |
a3++; | |
n -= len; | |
} | |
if (o1==0 && o2==0) { | |
for (; n>=CUMO_NB; n-=CUMO_NB) { | |
x = *(a1++); | |
y = *(a2++); | |
x = m_or(x,y); | |
*(a3++) = x; | |
} | |
} else { | |
for (; n>=CUMO_NB; n-=CUMO_NB) { | |
x = *a1>>o1; | |
if (o1<0) x |= *(a1-1)>>l1; | |
if (o1>0) x |= *(a1+1)<<r1; | |
a1++; | |
y = *a2>>o2; | |
if (o2<0) y |= *(a2-1)>>l2; | |
if (o2>0) y |= *(a2+1)<<r2; | |
a2++; | |
x = m_or(x,y); | |
*(a3++) = x; | |
} | |
} | |
if (n>0) { | |
x = *a1>>o1; | |
if (o1<0) x |= *(a1-1)>>l1; | |
y = *a2>>o2; | |
if (o2<0) y |= *(a2-1)>>l2; | |
x = m_or(x,y); | |
*a3 = (x & CUMO_SLB(n)) | (*a3 & CUMO_BALL<<n); | |
} | |
} | |
} | |
/* | |
Binary or. | |
@overload or other | |
@param [Cumo::NArray,Numeric] other | |
@return [Cumo::NArray] | of self and other. | |
*/ | |
static VALUE | |
or(VALUE self, VALUE other) | |
{ | |
cumo_ndfunc_arg_in_t ain[2] = {{cT,0},{cT,0}}; | |
cumo_ndfunc_arg_out_t aout[1] = {{cT,0}}; | |
cumo_ndfunc_t ndf = { bit_or, CUMO_FULL_LOOP, 2, 1, ain, aout }; | |
return cumo_na_ndloop(&ndf, 2, self, other); | |
} | |
iter_bit_or | |
#line 1 "narray/gen/tmpl_bit/binary.c" | |
static void | |
(cumo_na_loop_t *const lp) | |
{ | |
size_t n; | |
size_t p1, p2, p3; | |
ssize_t s1, s2, s3; | |
size_t *idx1, *idx2, *idx3; | |
int o1, o2, l1, l2, r1, r2, len; | |
CUMO_BIT_DIGIT *a1, *a2, *a3; | |
CUMO_BIT_DIGIT x, y; | |
// TODO(sonots): CUDA kernelize | |
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("iter_bit_xor", ""); | |
cumo_cuda_runtime_check_status(cudaDeviceSynchronize()); | |
CUMO_INIT_COUNTER(lp, n); | |
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1); | |
CUMO_INIT_PTR_BIT_IDX(lp, 1, a2, p2, s2, idx2); | |
CUMO_INIT_PTR_BIT_IDX(lp, 2, a3, p3, s3, idx3); | |
if (s1!=1 || s2!=1 || s3!=1 || idx1 || idx2 || idx3) { | |
for (; n--;) { | |
CUMO_LOAD_BIT_STEP(a1, p1, s1, idx1, x); | |
CUMO_LOAD_BIT_STEP(a2, p2, s2, idx2, y); | |
x = m_xorbit(x,y); | |
CUMO_STORE_BIT_STEP(a3, p3, s3, idx3, x); | |
} | |
} else { | |
o1 = p1 % CUMO_NB; | |
o1 -= p3; | |
o2 = p2 % CUMO_NB; | |
o2 -= p3; | |
l1 = CUMO_NB+o1; | |
r1 = CUMO_NB-o1; | |
l2 = CUMO_NB+o2; | |
r2 = CUMO_NB-o2; | |
if (p3>0 || n<CUMO_NB) { | |
len = CUMO_NB - p3; | |
if ((int)n<len) len=n; | |
if (o1>=0) x = *a1>>o1; | |
else x = *a1<<-o1; | |
if (p1+len>CUMO_NB) x |= *(a1+1)<<r1; | |
a1++; | |
if (o2>=0) y = *a2>>o2; | |
else y = *a2<<-o2; | |
if (p2+len>CUMO_NB) y |= *(a2+1)<<r2; | |
a2++; | |
x = m_xor(x,y); | |
*a3 = (x & (CUMO_SLB(len)<<p3)) | (*a3 & ~(CUMO_SLB(len)<<p3)); | |
a3++; | |
n -= len; | |
} | |
if (o1==0 && o2==0) { | |
for (; n>=CUMO_NB; n-=CUMO_NB) { | |
x = *(a1++); | |
y = *(a2++); | |
x = m_xor(x,y); | |
*(a3++) = x; | |
} | |
} else { | |
for (; n>=CUMO_NB; n-=CUMO_NB) { | |
x = *a1>>o1; | |
if (o1<0) x |= *(a1-1)>>l1; | |
if (o1>0) x |= *(a1+1)<<r1; | |
a1++; | |
y = *a2>>o2; | |
if (o2<0) y |= *(a2-1)>>l2; | |
if (o2>0) y |= *(a2+1)<<r2; | |
a2++; | |
x = m_xor(x,y); | |
*(a3++) = x; | |
} | |
} | |
if (n>0) { | |
x = *a1>>o1; | |
if (o1<0) x |= *(a1-1)>>l1; | |
y = *a2>>o2; | |
if (o2<0) y |= *(a2-1)>>l2; | |
x = m_xor(x,y); | |
*a3 = (x & CUMO_SLB(n)) | (*a3 & CUMO_BALL<<n); | |
} | |
} | |
} | |
/* | |
Binary xor. | |
@overload xor other | |
@param [Cumo::NArray,Numeric] other | |
@return [Cumo::NArray] ^ of self and other. | |
*/ | |
static VALUE | |
xor(VALUE self, VALUE other) | |
{ | |
cumo_ndfunc_arg_in_t ain[2] = {{cT,0},{cT,0}}; | |
cumo_ndfunc_arg_out_t aout[1] = {{cT,0}}; | |
cumo_ndfunc_t ndf = { bit_xor, CUMO_FULL_LOOP, 2, 1, ain, aout }; | |
return cumo_na_ndloop(&ndf, 2, self, other); | |
} | |
iter_bit_xor | |
#line 1 "narray/gen/tmpl_bit/binary.c" | |
static void | |
(cumo_na_loop_t *const lp) | |
{ | |
size_t n; | |
size_t p1, p2, p3; | |
ssize_t s1, s2, s3; | |
size_t *idx1, *idx2, *idx3; | |
int o1, o2, l1, l2, r1, r2, len; | |
CUMO_BIT_DIGIT *a1, *a2, *a3; | |
CUMO_BIT_DIGIT x, y; | |
// TODO(sonots): CUDA kernelize | |
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("iter_bit_eq", ""); | |
cumo_cuda_runtime_check_status(cudaDeviceSynchronize()); | |
CUMO_INIT_COUNTER(lp, n); | |
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1); | |
CUMO_INIT_PTR_BIT_IDX(lp, 1, a2, p2, s2, idx2); | |
CUMO_INIT_PTR_BIT_IDX(lp, 2, a3, p3, s3, idx3); | |
if (s1!=1 || s2!=1 || s3!=1 || idx1 || idx2 || idx3) { | |
for (; n--;) { | |
CUMO_LOAD_BIT_STEP(a1, p1, s1, idx1, x); | |
CUMO_LOAD_BIT_STEP(a2, p2, s2, idx2, y); | |
x = m_eqbit(x,y); | |
CUMO_STORE_BIT_STEP(a3, p3, s3, idx3, x); | |
} | |
} else { | |
o1 = p1 % CUMO_NB; | |
o1 -= p3; | |
o2 = p2 % CUMO_NB; | |
o2 -= p3; | |
l1 = CUMO_NB+o1; | |
r1 = CUMO_NB-o1; | |
l2 = CUMO_NB+o2; | |
r2 = CUMO_NB-o2; | |
if (p3>0 || n<CUMO_NB) { | |
len = CUMO_NB - p3; | |
if ((int)n<len) len=n; | |
if (o1>=0) x = *a1>>o1; | |
else x = *a1<<-o1; | |
if (p1+len>CUMO_NB) x |= *(a1+1)<<r1; | |
a1++; | |
if (o2>=0) y = *a2>>o2; | |
else y = *a2<<-o2; | |
if (p2+len>CUMO_NB) y |= *(a2+1)<<r2; | |
a2++; | |
x = m_eq(x,y); | |
*a3 = (x & (CUMO_SLB(len)<<p3)) | (*a3 & ~(CUMO_SLB(len)<<p3)); | |
a3++; | |
n -= len; | |
} | |
if (o1==0 && o2==0) { | |
for (; n>=CUMO_NB; n-=CUMO_NB) { | |
x = *(a1++); | |
y = *(a2++); | |
x = m_eq(x,y); | |
*(a3++) = x; | |
} | |
} else { | |
for (; n>=CUMO_NB; n-=CUMO_NB) { | |
x = *a1>>o1; | |
if (o1<0) x |= *(a1-1)>>l1; | |
if (o1>0) x |= *(a1+1)<<r1; | |
a1++; | |
y = *a2>>o2; | |
if (o2<0) y |= *(a2-1)>>l2; | |
if (o2>0) y |= *(a2+1)<<r2; | |
a2++; | |
x = m_eq(x,y); | |
*(a3++) = x; | |
} | |
} | |
if (n>0) { | |
x = *a1>>o1; | |
if (o1<0) x |= *(a1-1)>>l1; | |
y = *a2>>o2; | |
if (o2<0) y |= *(a2-1)>>l2; | |
x = m_eq(x,y); | |
*a3 = (x & CUMO_SLB(n)) | (*a3 & CUMO_BALL<<n); | |
} | |
} | |
} | |
/* | |
Binary eq. | |
@overload eq other | |
@param [Cumo::NArray,Numeric] other | |
@return [Cumo::NArray] eq of self and other. | |
*/ | |
static VALUE | |
eq(VALUE self, VALUE other) | |
{ | |
cumo_ndfunc_arg_in_t ain[2] = {{cT,0},{cT,0}}; | |
cumo_ndfunc_arg_out_t aout[1] = {{cT,0}}; | |
cumo_ndfunc_t ndf = { bit_eq, CUMO_FULL_LOOP, 2, 1, ain, aout }; | |
return cumo_na_ndloop(&ndf, 2, self, other); | |
} | |
iter_bit_eq | |
#line 1 "narray/gen/tmpl_bit/bit_count.c" | |
#undef int_t | |
#define int_t uint64_t | |
void (size_t p1, char *p2, CUMO_BIT_DIGIT *a1, size_t *idx1, uint64_t n); | |
void cumo_iter_bit_count_true_index_kernel_launch(size_t p1, char *p2, CUMO_BIT_DIGIT *a1, ssize_t s1, uint64_t n); | |
void cumo_iter_bit_count_true_stride_kernel_launch(size_t p1, char *p2, CUMO_BIT_DIGIT *a1, size_t *idx1, ssize_t s2, uint64_t n); | |
void cumo_iter_bit_count_true_index_stride_kernel_launch(size_t p1, char *p2, CUMO_BIT_DIGIT *a1, ssize_t s1, ssize_t s2, uint64_t n); | |
static void | |
cumo_iter_bit_count_true_stride_stride_kernel_launch(cumo_na_loop_t *const lp) | |
{ | |
size_t i; | |
CUMO_BIT_DIGIT *a1; | |
size_t p1; | |
char *p2; | |
ssize_t s1, s2; | |
size_t *idx1; | |
CUMO_INIT_COUNTER(lp, i); | |
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1); | |
CUMO_INIT_PTR(lp, 1, p2, s2); | |
if (s2==0) { | |
if (idx1) { | |
iter_bit_count_true(p1,p2,a1,idx1,i); | |
} else { | |
cumo_iter_bit_count_true_index_kernel_launch(p1,p2,a1,s1,i); | |
} | |
} else { | |
if (idx1) { | |
cumo_iter_bit_count_true_stride_kernel_launch(p1,p2,a1,idx1,s2,i); | |
} else { | |
cumo_iter_bit_count_true_index_stride_kernel_launch(p1,p2,a1,s1,s2,i); | |
} | |
} | |
} | |
static VALUE | |
cumo_iter_bit_count_true_stride_stride_kernel_launch_cpu(int argc, VALUE *argv, VALUE self); | |
/* | |
Returns the number of bits. | |
If argument is supplied, return Int-array counted along the axes. | |
@overload bit_count_true(axis:nil, keepdims:false) | |
@param [Integer,Array,Range] axis (keyword) axes to be counted. | |
@param [TrueClass] keepdims (keyword) If true, the reduced axes are left in the result array as dimensions with size one. | |
@return [Cumo::UInt64] | |
*/ | |
static VALUE | |
count_true(int argc, VALUE *argv, VALUE self) | |
{ | |
if (cumo_compatible_mode_enabled_p()) { | |
return bit_count_true_cpu(argc, argv, self); | |
} else { | |
VALUE v, reduce; | |
cumo_ndfunc_arg_in_t ain[3] = {{cT,0},{cumo_sym_reduce,0},{cumo_sym_init,0}}; | |
cumo_ndfunc_arg_out_t aout[1] = {{cumo_cUInt64,0}}; | |
cumo_ndfunc_t ndf = { bit_count_true, CUMO_FULL_LOOP_NIP, 3, 1, ain, aout }; | |
reduce = cumo_na_reduce_dimension(argc, argv, 1, &self, &ndf, 0); | |
v = cumo_na_ndloop(&ndf, 3, self, reduce, INT2FIX(0)); | |
return v; | |
} | |
} | |
iter_bit_count_true | |
#line 1 "narray/gen/tmpl_bit/bit_count.c" | |
#undef int_t | |
#define int_t uint64_t | |
void (size_t p1, char *p2, CUMO_BIT_DIGIT *a1, size_t *idx1, uint64_t n); | |
void cumo_iter_bit_count_false_index_kernel_launch(size_t p1, char *p2, CUMO_BIT_DIGIT *a1, ssize_t s1, uint64_t n); | |
void cumo_iter_bit_count_false_stride_kernel_launch(size_t p1, char *p2, CUMO_BIT_DIGIT *a1, size_t *idx1, ssize_t s2, uint64_t n); | |
void cumo_iter_bit_count_false_index_stride_kernel_launch(size_t p1, char *p2, CUMO_BIT_DIGIT *a1, ssize_t s1, ssize_t s2, uint64_t n); | |
static void | |
cumo_iter_bit_count_false_stride_stride_kernel_launch(cumo_na_loop_t *const lp) | |
{ | |
size_t i; | |
CUMO_BIT_DIGIT *a1; | |
size_t p1; | |
char *p2; | |
ssize_t s1, s2; | |
size_t *idx1; | |
CUMO_INIT_COUNTER(lp, i); | |
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1); | |
CUMO_INIT_PTR(lp, 1, p2, s2); | |
if (s2==0) { | |
if (idx1) { | |
iter_bit_count_false(p1,p2,a1,idx1,i); | |
} else { | |
cumo_iter_bit_count_false_index_kernel_launch(p1,p2,a1,s1,i); | |
} | |
} else { | |
if (idx1) { | |
cumo_iter_bit_count_false_stride_kernel_launch(p1,p2,a1,idx1,s2,i); | |
} else { | |
cumo_iter_bit_count_false_index_stride_kernel_launch(p1,p2,a1,s1,s2,i); | |
} | |
} | |
} | |
static VALUE | |
cumo_iter_bit_count_false_stride_stride_kernel_launch_cpu(int argc, VALUE *argv, VALUE self); | |
/* | |
Returns the number of bits. | |
If argument is supplied, return Int-array counted along the axes. | |
@overload bit_count_false(axis:nil, keepdims:false) | |
@param [Integer,Array,Range] axis (keyword) axes to be counted. | |
@param [TrueClass] keepdims (keyword) If true, the reduced axes are left in the result array as dimensions with size one. | |
@return [Cumo::UInt64] | |
*/ | |
static VALUE | |
count_false(int argc, VALUE *argv, VALUE self) | |
{ | |
if (cumo_compatible_mode_enabled_p()) { | |
return bit_count_false_cpu(argc, argv, self); | |
} else { | |
VALUE v, reduce; | |
cumo_ndfunc_arg_in_t ain[3] = {{cT,0},{cumo_sym_reduce,0},{cumo_sym_init,0}}; | |
cumo_ndfunc_arg_out_t aout[1] = {{cumo_cUInt64,0}}; | |
cumo_ndfunc_t ndf = { bit_count_false, CUMO_FULL_LOOP_NIP, 3, 1, ain, aout }; | |
reduce = cumo_na_reduce_dimension(argc, argv, 1, &self, &ndf, 0); | |
v = cumo_na_ndloop(&ndf, 3, self, reduce, INT2FIX(0)); | |
return v; | |
} | |
} | |
iter_bit_count_false | |
#line 1 "narray/gen/tmpl_bit/bit_count_cpu.c" | |
#undef int_t | |
#define int_t int64_t | |
static void | |
(cumo_na_loop_t *const lp) | |
{ | |
size_t i; | |
CUMO_BIT_DIGIT *a1; | |
size_t p1; | |
char *p2; | |
ssize_t s1, s2; | |
size_t *idx1; | |
CUMO_BIT_DIGIT x=0; | |
int_t y; | |
CUMO_SHOW_SYNCHRONIZE_WARNING_ONCE("iter_bit_count_true_cpu", ""); | |
cumo_cuda_runtime_check_status(cudaDeviceSynchronize()); | |
CUMO_INIT_COUNTER(lp, i); | |
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1); | |
CUMO_INIT_PTR(lp, 1, p2, s2); | |
if (s2==0) { | |
CUMO_GET_DATA(p2, int_t, y); | |
if (idx1) { | |
for (; i--;) { | |
CUMO_LOAD_BIT(a1, p1+*idx1, x); | |
idx1++; | |
if (m_count_true_cpubit(x)) { | |
y++; | |
} | |
} | |
} else { | |
for (; i--;) { | |
CUMO_LOAD_BIT(a1, p1, x); | |
p1 += s1; | |
if (m_count_true_cpu(x)) { | |
y++; | |
} | |
} | |
} | |
*(int_t*)p2 = y; | |
} else { | |
if (idx1) { | |
for (; i--;) { | |
CUMO_LOAD_BIT(a1, p1+*idx1, x); | |
idx1++; | |
if (m_count_true_cpu(x)) { | |
CUMO_GET_DATA(p2, int_t, y); | |
y++; | |
CUMO_SET_DATA(p2, int_t, y); | |
} | |
p2+=s2; | |
} | |
} else { | |
for (; i--;) { | |
CUMO_LOAD_BIT(a1, p1, x); | |
p1+=s1; | |
if (m_count_true_cpu(x)) { | |
CUMO_GET_DATA(p2, int_t, y); | |
y++; | |
CUMO_SET_DATA(p2, int_t, y); | |
} | |
p2+=s2; | |
} | |
} | |
} | |
} | |
/* | |
Returns the number of bits. | |
If argument is supplied, return Int-array counted along the axes. | |
@overload count_true_cpu(axis:nil, keepdims:false) | |
@param [Integer,Array,Range] axis (keyword) axes to be counted. | |
@param [TrueClass] keepdims (keyword) If true, the reduced axes are left in the result array as dimensions with size one. | |
@return [Cumo::Int64] | |
*/ | |
static VALUE | |
count_true_cpu(int argc, VALUE *argv, VALUE self) | |
{ | |
VALUE v, reduce; | |
cumo_ndfunc_arg_in_t ain[3] = {{cT,0},{cumo_sym_reduce,0},{cumo_sym_init,0}}; | |
cumo_ndfunc_arg_out_t aout[1] = {{cumo_cInt64,0}}; | |
cumo_ndfunc_t ndf = { bit_count_true_cpu, CUMO_FULL_LOOP_NIP, 3, 1, ain, aout }; | |
reduce = cumo_na_reduce_dimension(argc, argv, 1, &self, &ndf, 0); | |
v = cumo_na_ndloop(&ndf, 3, self, reduce, INT2FIX(0)); | |
return rb_funcall(v,rb_intern("extract_cpu"),0); | |
} | |
iter_bit_count_true_cpu | |
#line 1 "narray/gen/tmpl_bit/bit_count_cpu.c" | |
#undef int_t | |
#define int_t int64_t | |
static void | |
(cumo_na_loop_t *const lp) | |
{ | |
size_t i; | |
CUMO_BIT_DIGIT *a1; | |
size_t p1; | |
char *p2; | |
ssize_t s1, s2; | |
size_t *idx1; | |
CUMO_BIT_DIGIT x=0; | |
int_t y; | |
CUMO_SHOW_SYNCHRONIZE_WARNING_ONCE("iter_bit_count_false_cpu", ""); | |
cumo_cuda_runtime_check_status(cudaDeviceSynchronize()); | |
CUMO_INIT_COUNTER(lp, i); | |
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1); | |
CUMO_INIT_PTR(lp, 1, p2, s2); | |
if (s2==0) { | |
CUMO_GET_DATA(p2, int_t, y); | |
if (idx1) { | |
for (; i--;) { | |
CUMO_LOAD_BIT(a1, p1+*idx1, x); | |
idx1++; | |
if (m_count_false_cpubit(x)) { | |
y++; | |
} | |
} | |
} else { | |
for (; i--;) { | |
CUMO_LOAD_BIT(a1, p1, x); | |
p1 += s1; | |
if (m_count_false_cpu(x)) { | |
y++; | |
} | |
} | |
} | |
*(int_t*)p2 = y; | |
} else { | |
if (idx1) { | |
for (; i--;) { | |
CUMO_LOAD_BIT(a1, p1+*idx1, x); | |
idx1++; | |
if (m_count_false_cpu(x)) { | |
CUMO_GET_DATA(p2, int_t, y); | |
y++; | |
CUMO_SET_DATA(p2, int_t, y); | |
} | |
p2+=s2; | |
} | |
} else { | |
for (; i--;) { | |
CUMO_LOAD_BIT(a1, p1, x); | |
p1+=s1; | |
if (m_count_false_cpu(x)) { | |
CUMO_GET_DATA(p2, int_t, y); | |
y++; | |
CUMO_SET_DATA(p2, int_t, y); | |
} | |
p2+=s2; | |
} | |
} | |
} | |
} | |
/* | |
Returns the number of bits. | |
If argument is supplied, return Int-array counted along the axes. | |
@overload count_false_cpu(axis:nil, keepdims:false) | |
@param [Integer,Array,Range] axis (keyword) axes to be counted. | |
@param [TrueClass] keepdims (keyword) If true, the reduced axes are left in the result array as dimensions with size one. | |
@return [Cumo::Int64] | |
*/ | |
static VALUE | |
count_false_cpu(int argc, VALUE *argv, VALUE self) | |
{ | |
VALUE v, reduce; | |
cumo_ndfunc_arg_in_t ain[3] = {{cT,0},{cumo_sym_reduce,0},{cumo_sym_init,0}}; | |
cumo_ndfunc_arg_out_t aout[1] = {{cumo_cInt64,0}}; | |
cumo_ndfunc_t ndf = { bit_count_false_cpu, CUMO_FULL_LOOP_NIP, 3, 1, ain, aout }; | |
reduce = cumo_na_reduce_dimension(argc, argv, 1, &self, &ndf, 0); | |
v = cumo_na_ndloop(&ndf, 3, self, reduce, INT2FIX(0)); | |
return rb_funcall(v,rb_intern("extract_cpu"),0); | |
} | |
iter_bit_count_false_cpu | |
#line 1 "narray/gen/tmpl_bit/bit_reduce.c" | |
static void | |
(cumo_na_loop_t *const lp) | |
{ | |
size_t i; | |
CUMO_BIT_DIGIT *a1, *a2; | |
size_t p1, p2; | |
ssize_t s1, s2; | |
size_t *idx1, *idx2; | |
CUMO_BIT_DIGIT x=0, y=0; | |
// TODO(sonots): CUDA kernelize | |
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("iter_bit_all_p", ""); | |
cumo_cuda_runtime_check_status(cudaDeviceSynchronize()); | |
CUMO_INIT_COUNTER(lp, i); | |
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1); | |
CUMO_INIT_PTR_BIT_IDX(lp, 1, a2, p2, s2, idx2); | |
if (idx2) { | |
if (idx1) { | |
for (; i--;) { | |
CUMO_LOAD_BIT(a2, p2+*idx2, y); | |
if (y == all?bit) { | |
CUMO_LOAD_BIT(a1, p1+*idx1, x); | |
if (x != 1) { | |
CUMO_STORE_BIT(a2, p2+*idx2, x); | |
} | |
} | |
idx1++; | |
idx2++; | |
} | |
} else { | |
for (; i--;) { | |
CUMO_LOAD_BIT(a2, p2+*idx2, y); | |
if (y == 1) { | |
CUMO_LOAD_BIT(a1, p1, x); | |
if (x != 1) { | |
CUMO_STORE_BIT(a2, p2+*idx2, x); | |
} | |
} | |
p1 += s1; | |
idx2++; | |
} | |
} | |
} else if (s2) { | |
if (idx1) { | |
for (; i--;) { | |
CUMO_LOAD_BIT(a2, p2, y); | |
if (y == 1) { | |
CUMO_LOAD_BIT(a1, p1+*idx1, x); | |
if (x != 1) { | |
CUMO_STORE_BIT(a2, p2, x); | |
} | |
} | |
idx1++; | |
p2 += s2; | |
} | |
} else { | |
for (; i--;) { | |
CUMO_LOAD_BIT(a2, p2, y); | |
if (y == 1) { | |
CUMO_LOAD_BIT(a1, p1, x); | |
if (x != 1) { | |
CUMO_STORE_BIT(a2, p2, x); | |
} | |
} | |
p1 += s1; | |
p2 += s2; | |
} | |
} | |
} else { | |
CUMO_LOAD_BIT(a2, p2, x); | |
if (x != 1) { | |
return; | |
} | |
if (idx1) { | |
for (; i--;) { | |
CUMO_LOAD_BIT(a1, p1+*idx1, y); | |
if (y != 1) { | |
CUMO_STORE_BIT(a2, p2, y); | |
return; | |
} | |
idx1++; | |
} | |
} else { | |
for (; i--;) { | |
CUMO_LOAD_BIT(a1, p1, y); | |
if (y != 1) { | |
CUMO_STORE_BIT(a2, p2, y); | |
return; | |
} | |
p1 += s1; | |
} | |
} | |
} | |
} | |
/* | |
Return true if all of bits are one (true). | |
If argument is supplied, return Bit-array reduced along the axes. | |
@overload 1(axis:nil, keepdims:false) | |
@param [Integer,Array,Range] axis (keyword) axes to be reduced. | |
@param [TrueClass] keepdims (keyword) If true, the reduced axes are left in the result array as dimensions with size one. | |
@return [Cumo::Bit] . | |
*/ | |
static VALUE | |
all?(int argc, VALUE *argv, VALUE self) | |
{ | |
VALUE v, reduce; | |
cumo_ndfunc_arg_in_t ain[3] = {{cT,0},{cumo_sym_reduce,0},{cumo_sym_init,0}}; | |
cumo_ndfunc_arg_out_t aout[1] = {{cumo_cBit,0}}; | |
cumo_ndfunc_t ndf = {bit_all_p, CUMO_FULL_LOOP_NIP, 3,1, ain,aout}; | |
reduce = cumo_na_reduce_dimension(argc, argv, 1, &self, &ndf, 0); | |
v = cumo_na_ndloop(&ndf, 3, self, reduce, INT2FIX(iter_bit_all_p)); | |
if (argc > 0) { | |
return v; | |
} | |
v = 1(v); | |
switch (v) { | |
case INT2FIX(0): | |
return Qfalse; | |
case INT2FIX(1): | |
return Qtrue; | |
default: | |
rb_bug("unexpected result"); | |
return v; | |
} | |
} | |
bit_extract | |
#line 1 "narray/gen/tmpl_bit/bit_reduce.c" | |
static void | |
(cumo_na_loop_t *const lp) | |
{ | |
size_t i; | |
CUMO_BIT_DIGIT *a1, *a2; | |
size_t p1, p2; | |
ssize_t s1, s2; | |
size_t *idx1, *idx2; | |
CUMO_BIT_DIGIT x=0, y=0; | |
// TODO(sonots): CUDA kernelize | |
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("iter_bit_any_p", ""); | |
cumo_cuda_runtime_check_status(cudaDeviceSynchronize()); | |
CUMO_INIT_COUNTER(lp, i); | |
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1); | |
CUMO_INIT_PTR_BIT_IDX(lp, 1, a2, p2, s2, idx2); | |
if (idx2) { | |
if (idx1) { | |
for (; i--;) { | |
CUMO_LOAD_BIT(a2, p2+*idx2, y); | |
if (y == any?bit) { | |
CUMO_LOAD_BIT(a1, p1+*idx1, x); | |
if (x != 0) { | |
CUMO_STORE_BIT(a2, p2+*idx2, x); | |
} | |
} | |
idx1++; | |
idx2++; | |
} | |
} else { | |
for (; i--;) { | |
CUMO_LOAD_BIT(a2, p2+*idx2, y); | |
if (y == 0) { | |
CUMO_LOAD_BIT(a1, p1, x); | |
if (x != 0) { | |
CUMO_STORE_BIT(a2, p2+*idx2, x); | |
} | |
} | |
p1 += s1; | |
idx2++; | |
} | |
} | |
} else if (s2) { | |
if (idx1) { | |
for (; i--;) { | |
CUMO_LOAD_BIT(a2, p2, y); | |
if (y == 0) { | |
CUMO_LOAD_BIT(a1, p1+*idx1, x); | |
if (x != 0) { | |
CUMO_STORE_BIT(a2, p2, x); | |
} | |
} | |
idx1++; | |
p2 += s2; | |
} | |
} else { | |
for (; i--;) { | |
CUMO_LOAD_BIT(a2, p2, y); | |
if (y == 0) { | |
CUMO_LOAD_BIT(a1, p1, x); | |
if (x != 0) { | |
CUMO_STORE_BIT(a2, p2, x); | |
} | |
} | |
p1 += s1; | |
p2 += s2; | |
} | |
} | |
} else { | |
CUMO_LOAD_BIT(a2, p2, x); | |
if (x != 0) { | |
return; | |
} | |
if (idx1) { | |
for (; i--;) { | |
CUMO_LOAD_BIT(a1, p1+*idx1, y); | |
if (y != 0) { | |
CUMO_STORE_BIT(a2, p2, y); | |
return; | |
} | |
idx1++; | |
} | |
} else { | |
for (; i--;) { | |
CUMO_LOAD_BIT(a1, p1, y); | |
if (y != 0) { | |
CUMO_STORE_BIT(a2, p2, y); | |
return; | |
} | |
p1 += s1; | |
} | |
} | |
} | |
} | |
/* | |
Return true if any of bits is one (true). | |
If argument is supplied, return Bit-array reduced along the axes. | |
@overload 0(axis:nil, keepdims:false) | |
@param [Integer,Array,Range] axis (keyword) axes to be reduced. | |
@param [TrueClass] keepdims (keyword) If true, the reduced axes are left in the result array as dimensions with size one. | |
@return [Cumo::Bit] . | |
*/ | |
static VALUE | |
any?(int argc, VALUE *argv, VALUE self) | |
{ | |
VALUE v, reduce; | |
cumo_ndfunc_arg_in_t ain[3] = {{cT,0},{cumo_sym_reduce,0},{cumo_sym_init,0}}; | |
cumo_ndfunc_arg_out_t aout[1] = {{cumo_cBit,0}}; | |
cumo_ndfunc_t ndf = {bit_any_p, CUMO_FULL_LOOP_NIP, 3,1, ain,aout}; | |
reduce = cumo_na_reduce_dimension(argc, argv, 1, &self, &ndf, 0); | |
v = cumo_na_ndloop(&ndf, 3, self, reduce, INT2FIX(iter_bit_any_p)); | |
if (argc > 0) { | |
return v; | |
} | |
v = 0(v); | |
switch (v) { | |
case INT2FIX(0): | |
return Qfalse; | |
case INT2FIX(1): | |
return Qtrue; | |
default: | |
rb_bug("unexpected result"); | |
return v; | |
} | |
} | |
bit_extract | |
#line 1 "narray/gen/tmpl_bit/none_p.c" | |
static VALUE | |
(int argc, VALUE *argv, VALUE self) | |
{ | |
VALUE v; | |
v = bit_none_p(argc,argv,self); | |
if (v==Qtrue) { | |
return Qfalse; | |
} else if (v==Qfalse) { | |
return Qtrue; | |
} | |
return bit_any_p(v); | |
} | |
bit_not | |
#line 1 "narray/gen/tmpl_bit/where.c" | |
typedef struct { | |
size_t count; | |
char *idx0; | |
char *idx1; | |
size_t elmsz; | |
} where_opt_t; | |
#define STORE_INT(ptr, esz, x) memcpy(ptr,&(x),esz) | |
static void | |
(cumo_na_loop_t *const lp) | |
{ | |
size_t i; | |
CUMO_BIT_DIGIT *a; | |
size_t p; | |
ssize_t s; | |
size_t *idx; | |
CUMO_BIT_DIGIT x=0; | |
char *idx1; | |
size_t count; | |
size_t e; | |
where_opt_t *g; | |
// TODO(sonots): CUDA kernelize | |
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("iter_bit_where", ""); | |
cumo_cuda_runtime_check_status(cudaDeviceSynchronize()); | |
g = (where_opt_t*)(lp->opt_ptr); | |
count = g->count; | |
idx1 = g->idx1; | |
e = g->elmsz; | |
CUMO_INIT_COUNTER(lp, i); | |
CUMO_INIT_PTR_BIT_IDX(lp, 0, a, p, s, idx); | |
if (idx) { | |
for (; i--;) { | |
CUMO_LOAD_BIT(a, p+*idx, x); | |
idx++; | |
if (x!=0) { | |
STORE_INT(idx1,e,count); | |
idx1 += e; | |
} | |
count++; | |
} | |
} else { | |
for (; i--;) { | |
CUMO_LOAD_BIT(a, p, x); | |
p+=s; | |
if (x!=0) { | |
STORE_INT(idx1,e,count); | |
idx1 += e; | |
} | |
count++; | |
} | |
} | |
g->count = count; | |
g->idx1 = idx1; | |
} | |
/* | |
Returns the array of index where the bit is one (true). | |
@overload wherebit | |
@return [Cumo::Int32,Cumo::Int64] | |
*/ | |
static VALUE | |
where(VALUE self) | |
{ | |
volatile VALUE idx_1; | |
size_t size, n_1; | |
where_opt_t *g; | |
cumo_ndfunc_arg_in_t ain[1] = {{cT,0}}; | |
cumo_ndfunc_t ndf = { bit_where, CUMO_FULL_LOOP, 1, 0, ain, 0 }; | |
size = CUMO_RNARRAY_SIZE(self); | |
n_1 = NUM2SIZET(iter_bit_where(0, NULL, self)); | |
g = ALLOCA_N(where_opt_t,1); | |
g->count = 0; | |
if (size>4294967295ul) { | |
idx_1 = cumo_na_new(cumo_cInt64, 1, &n_1); | |
g->elmsz = 8; | |
} else { | |
idx_1 = cumo_na_new(cumo_cInt32, 1, &n_1); | |
g->elmsz = 4; | |
} | |
g->idx1 = cumo_na_get_pointer_for_write(idx_1); | |
g->idx0 = NULL; | |
cumo_na_ndloop3(&ndf, g, 1, self); | |
cumo_na_release_lock(idx_1); | |
return idx_1; | |
} | |
bit_count_true_cpu | |
#line 1 "narray/gen/tmpl_bit/where2.c" | |
static void | |
(cumo_na_loop_t *const lp) | |
{ | |
size_t i; | |
CUMO_BIT_DIGIT *a; | |
size_t p; | |
ssize_t s; | |
size_t *idx; | |
CUMO_BIT_DIGIT x=0; | |
char *idx0, *idx1; | |
size_t count; | |
size_t e; | |
where_opt_t *g; | |
// TODO(sonots): CUDA kernelize | |
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("iter_bit_where2", ""); | |
cumo_cuda_runtime_check_status(cudaDeviceSynchronize()); | |
g = (where_opt_t*)(lp->opt_ptr); | |
count = g->count; | |
idx0 = g->idx0; | |
idx1 = g->idx1; | |
e = g->elmsz; | |
CUMO_INIT_COUNTER(lp, i); | |
CUMO_INIT_PTR_BIT_IDX(lp, 0, a, p, s, idx); | |
if (idx) { | |
for (; i--;) { | |
CUMO_LOAD_BIT(a, p+*idx, x); | |
idx++; | |
if (x==0) { | |
STORE_INT(idx0,e,count); | |
idx0 += e; | |
} else { | |
STORE_INT(idx1,e,count); | |
idx1 += e; | |
} | |
count++; | |
} | |
} else { | |
for (; i--;) { | |
CUMO_LOAD_BIT(a, p, x); | |
p+=s; | |
if (x==0) { | |
STORE_INT(idx0,e,count); | |
idx0 += e; | |
} else { | |
STORE_INT(idx1,e,count); | |
idx1 += e; | |
} | |
count++; | |
} | |
} | |
g->count = count; | |
g->idx0 = idx0; | |
g->idx1 = idx1; | |
} | |
/* | |
Returns two index arrays. | |
The first array contains index where the bit is one (true). | |
The second array contains index where the bit is zero (false). | |
@overload where2bit | |
@return [Cumo::Int32,Cumo::Int64]*2 | |
*/ | |
static VALUE | |
where2(VALUE self) | |
{ | |
VALUE idx_1, idx_0; | |
size_t size, n_1, n_0; | |
where_opt_t *g; | |
cumo_ndfunc_arg_in_t ain[1] = {{cT,0}}; | |
cumo_ndfunc_t ndf = { bit_where2, CUMO_FULL_LOOP, 1, 0, ain, 0 }; | |
size = CUMO_RNARRAY_SIZE(self); | |
n_1 = NUM2SIZET(iter_bit_where2(0, NULL, self)); | |
n_0 = size - n_1; | |
g = ALLOCA_N(where_opt_t,1); | |
g->count = 0; | |
if (size>4294967295ul) { | |
idx_1 = cumo_na_new(cumo_cInt64, 1, &n_1); | |
idx_0 = cumo_na_new(cumo_cInt64, 1, &n_0); | |
g->elmsz = 8; | |
} else { | |
idx_1 = cumo_na_new(cumo_cInt32, 1, &n_1); | |
idx_0 = cumo_na_new(cumo_cInt32, 1, &n_0); | |
g->elmsz = 4; | |
} | |
g->idx1 = cumo_na_get_pointer_for_write(idx_1); | |
g->idx0 = cumo_na_get_pointer_for_write(idx_0); | |
cumo_na_ndloop3(&ndf, g, 1, self); | |
cumo_na_release_lock(idx_0); | |
cumo_na_release_lock(idx_1); | |
return rb_assoc_new(idx_1,idx_0); | |
} | |
bit_count_true_cpu | |
#line 1 "narray/gen/tmpl_bit/mask.c" | |
static void | |
(cumo_na_loop_t *const lp) | |
{ | |
size_t i; | |
CUMO_BIT_DIGIT *a; | |
size_t p1, p2; | |
ssize_t s1, s2; | |
size_t *idx1, *idx2, *pidx; | |
CUMO_BIT_DIGIT x=0; | |
size_t count; | |
where_opt_t *g; | |
CUMO_SHOW_SYNCHRONIZE_WARNING_ONCE("iter_bit_mask", ""); | |
cumo_cuda_runtime_check_status(cudaDeviceSynchronize()); | |
g = (where_opt_t*)(lp->opt_ptr); | |
count = g->count; | |
pidx = (size_t*)(g->idx1); | |
CUMO_INIT_COUNTER(lp, i); | |
CUMO_INIT_PTR_BIT_IDX(lp, 0, a, p1, s1, idx1); | |
//CUMO_INIT_PTR_IDX(lp, 1, p2, s2, idx2); | |
p2 = lp->args[1].iter[0].pos; | |
s2 = lp->args[1].iter[0].step; | |
idx2 = lp->args[1].iter[0].idx; | |
if (idx1) { | |
if (idx2) { | |
for (; i--;) { | |
CUMO_LOAD_BIT(a, p1+*idx1, x); | |
idx1++; | |
if (x) { | |
*(pidx++) = p2+*idx2; | |
count++; | |
} | |
idx2++; | |
} | |
} else { | |
for (; i--;) { | |
CUMO_LOAD_BIT(a, p1+*idx1, x); | |
idx1++; | |
if (x) { | |
*(pidx++) = p2; | |
count++; | |
} | |
p2 += s2; | |
} | |
} | |
} else { | |
if (idx2) { | |
for (; i--;) { | |
CUMO_LOAD_BIT(a, p1, x); | |
p1 += s1; | |
if (x) { | |
*(pidx++) = p2+*idx2; | |
count++; | |
} | |
idx2++; | |
} | |
} else { | |
for (; i--;) { | |
CUMO_LOAD_BIT(a, p1, x); | |
p1 += s1; | |
if (x) { | |
*(pidx++) = p2; | |
count++; | |
} | |
p2 += s2; | |
} | |
} | |
} | |
g->count = count; | |
g->idx1 = (char*)pidx; | |
} | |
#if SIZEOF_VOIDP == 8 | |
#define cIndex cumo_cInt64 | |
#elif SIZEOF_VOIDP == 4 | |
#define cIndex cumo_cInt32 | |
#endif | |
/* | |
Return subarray of argument masked with self bit array. | |
@overload maskbit(array) | |
@param [Cumo::NArray] array narray to be masked. | |
@return [Cumo::NArray] view of masked array. | |
*/ | |
static VALUE | |
mask(VALUE mask, VALUE val) | |
{ | |
volatile VALUE idx_1, view; | |
cumo_narray_data_t *nidx; | |
cumo_narray_view_t *nv; | |
cumo_narray_t *na; | |
cumo_narray_view_t *na1; | |
cumo_stridx_t stridx0; | |
size_t n_1; | |
where_opt_t g; | |
cumo_ndfunc_arg_in_t ain[2] = {{cT,0},{Qnil,0}}; | |
cumo_ndfunc_t ndf = {bit_mask, CUMO_FULL_LOOP, 2, 0, ain, 0}; | |
// TODO(sonots): bit_count_true synchronizes with CPU. Avoid. | |
n_1 = NUM2SIZET(iter_bit_mask(0, NULL, mask)); | |
idx_1 = cumo_na_new(cIndex, 1, &n_1); | |
g.count = 0; | |
g.elmsz = SIZEOF_VOIDP; | |
g.idx1 = cumo_na_get_pointer_for_write(idx_1); | |
g.idx0 = NULL; | |
cumo_na_ndloop3(&ndf, &g, 2, mask, val); | |
view = cumo_na_s_allocate_view(CLASS_OF(val)); | |
CumoGetNArrayView(view, nv); | |
cumo_na_setup_shape((cumo_narray_t*)nv, 1, &n_1); | |
CumoGetNArrayData(idx_1,nidx); | |
CUMO_SDX_SET_INDEX(stridx0,(size_t*)nidx->ptr); | |
nidx->ptr = NULL; | |
nv->stridx = ALLOC_N(cumo_stridx_t,1); | |
nv->stridx[0] = stridx0; | |
nv->offset = 0; | |
CumoGetNArray(val, na); | |
switch(CUMO_NA_TYPE(na)) { | |
case CUMO_NARRAY_DATA_T: | |
nv->data = val; | |
break; | |
case CUMO_NARRAY_VIEW_T: | |
CumoGetNArrayView(val, na1); | |
nv->data = na1->data; | |
break; | |
default: | |
rb_raise(rb_eRuntimeError,"invalid CUMO_NA_TYPE: %d",CUMO_NA_TYPE(na)); | |
} | |
return view; | |
} | |
bit_count_true_cpu | |
(void) | |
{ | |
VALUE hCast, cumo_bit; | |
mCumo = rb_define_module("Cumo"); | |
mCumo | |
cumo_id_cast = rb_intern("cast");cumo_id_divmod = rb_intern("divmod");cumo_id_eq = rb_intern("eq");cumo_id_mulsum = rb_intern("mulsum");cumo_id_ne = rb_intern("ne");cumo_id_pow = rb_intern("pow");} | |
#line 1 "narray/gen/tmpl/init_class.c" | |
/* | |
Document-class: | |
Cumo::Bit | |
*/ | |
cT = rb_define_class_under(, "", cNArray); | |
hCast = rb_hash_new(); | |
rb_define_const(cT, "UPCAST", hCast); | |
rb_hash_aset(hCast, rb_cArray, cT); | |
mCumoBit | |
#ifdef RUBY_INTEGER_UNIFICATIONrb_hash_aset(hCast, rb_cInteger, cT);#elserb_hash_aset(hCast, rb_cFixnum, cT);rb_hash_aset(hCast, rb_cBignum, cT);#endifrb_hash_aset(hCast, rb_cFloat, cumo_cDFloat);rb_hash_aset(hCast, rb_cComplex, cumo_cDComplex);rb_hash_aset(hCast, cumo_cRObject, cumo_cRObject);rb_hash_aset(hCast, cumo_cDComplex, cumo_cDComplex);rb_hash_aset(hCast, cumo_cSComplex, cumo_cSComplex);rb_hash_aset(hCast, cumo_cDFloat, cumo_cDFloat);rb_hash_aset(hCast, cumo_cSFloat, cumo_cSFloat);rb_hash_aset(hCast, cumo_cInt64, cumo_cInt64);rb_hash_aset(hCast, cumo_cInt32, cumo_cInt32);rb_hash_aset(hCast, cumo_cInt16, cumo_cInt16);rb_hash_aset(hCast, cumo_cInt8, cumo_cInt8);rb_hash_aset(hCast, cumo_cUInt64, cumo_cUInt64);rb_hash_aset(hCast, cumo_cUInt32, cumo_cUInt32);rb_hash_aset(hCast, cumo_cUInt16, cumo_cUInt16);rb_hash_aset(hCast, cumo_cUInt8, cumo_cUInt8); | |
rb_define_singleton_method(cT, "[]", /**/ | |
rb_define_const(cT,"ELEMENT_BIT_SIZE",INT2FIX(1));/**/ | |
rb_define_const(cT,"ELEMENT_BYTE_SIZE",rb_float_new(1.0/8));/**/ | |
rb_define_const(cT,"CONTIGUOUS_STRIDE",INT2FIX(1));rb_define_alloc_func(cT, bit_s_alloc_func);rb_define_method(cT, "allocate", bit_allocate, 0);rb_define_method(cT, "extract", bit_extract, 0);rb_define_method(cT, "extract_cpu", bit_extract_cpu, 0);rb_define_method(cT, "store", bit_store, 1);rb_define_singleton_method(cT, "cast", bit_s_cast, 1);rb_define_method(cT, "[]", bit_aref, -1);rb_define_method(cT, "aref_cpu", bit_aref_cpu, -1);rb_define_method(cT, "[]=", bit_aset, -1);rb_define_method(cT, "coerce_cast", bit_coerce_cast, 1);rb_define_method(cT, "to_a", bit_to_a, 0);rb_define_method(cT, "fill", bit_fill, 1);rb_define_method(cT, "format", bit_format, -1);rb_define_method(cT, "format_to_a", bit_format_to_a, -1);rb_define_method(cT, "inspect", bit_inspect, 0);rb_define_method(cT, "each", bit_each, 0);rb_define_method(cT, "each_with_index", bit_each_with_index, 0);rb_define_method(cT, "copy", bit_copy, 0);rb_define_method(cT, "~", bit_not, 0);rb_define_method(cT, "&", bit_and, 1);rb_define_method(cT, "|", bit_or, 1);rb_define_method(cT, "^", bit_xor, 1);rb_define_method(cT, "eq", bit_eq, 1);rb_define_method(cT, "count_true", bit_count_true, -1);rb_define_alias(cT, "count_1", "count_true");rb_define_alias(cT, "count", "count_true");rb_define_method(cT, "count_false", bit_count_false, -1);rb_define_alias(cT, "count_0", "count_false");rb_define_method(cT, "count_true_cpu", bit_count_true_cpu, -1);rb_define_alias(cT, "count_1_cpu", "count_true_cpu");rb_define_alias(cT, "count_cpu", "count_true_cpu");rb_define_method(cT, "count_false_cpu", bit_count_false_cpu, -1);rb_define_alias(cT, "count_0_cpu", "count_false_cpu");rb_define_method(cT, "all?", bit_all_p, -1);rb_define_method(cT, "any?", bit_any_p, -1);rb_define_method(cT, "none?", bit_none_p, -1);rb_define_method(cT, "where", bit_where, 0);rb_define_method(cT, "where2", bit_where2, 0);rb_define_method(cT, "mask", bit_mask, 1);, -2); | |
bit_s_cast |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment