Created
January 2, 2017 15:10
-
-
Save glyphx/64e0f1ca64bd053a423f2ace76661b23 to your computer and use it in GitHub Desktop.
gpu flag set
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
C:\toolkits\anaconda2-4.2.0\python.exe C:/toolkits/cpu_gpu_test.py | |
1 #define _CUDA_NDARRAY_C | |
2 | |
3 #include <Python.h> | |
4 #include <structmember.h> | |
5 #include "theano_mod_helper.h" | |
6 | |
7 #include <numpy/arrayobject.h> | |
8 #include <iostream> | |
9 | |
10 #include "cuda_ndarray.cuh" | |
11 | |
12 #ifndef CNMEM_DLLEXPORT | |
13 #define CNMEM_DLLEXPORT | |
14 #endif | |
15 | |
16 #include "cnmem.h" | |
17 #include "cnmem.cpp" | |
18 | |
19 //If true, when there is a gpu malloc or free error, we print the size of allocated memory on the device. | |
20 #define COMPUTE_GPU_MEM_USED 0 | |
21 | |
22 //If true, we fill with NAN allocated device memory. | |
23 #define ALLOC_MEMSET 0 | |
24 | |
25 //If true, we print out when we free a device pointer, uninitialize a | |
26 //CudaNdarray, or allocate a device pointer | |
27 #define PRINT_FREE_MALLOC 0 | |
28 | |
29 //If true, we do error checking at the start of functions, to make sure there | |
30 //is not a pre-existing error when the function is called. | |
31 //You probably need to set the environment variable | |
32 //CUDA_LAUNCH_BLOCKING=1, and/or modify the CNDA_THREAD_SYNC | |
33 //preprocessor macro in cuda_ndarray.cuh | |
34 //if you want this to work. | |
35 #define PRECHECK_ERROR 0 | |
36 | |
37 cublasHandle_t handle = NULL; | |
38 int* err_var = NULL; | |
39 | |
40 ///////////////////////// | |
41 // Alloc and Free | |
42 ///////////////////////// | |
43 | |
44 static int g_gpu_context_active = 0; | |
45 | |
46 | |
47 PyObject * | |
48 CudaNdarray_Dimshuffle(PyObject* _unused, PyObject* args); | |
49 static PyObject *CudaNdarray_get_shape(CudaNdarray *self, void *closure); | |
50 | |
51 | |
52 /** | |
53 * | |
54 * In the test program I'm using, the _outstanding_mallocs decreases with every call. | |
55 * This suggests there are more free() calls being made than alloc(), but I can't figure out why. | |
56 * | |
57 */ | |
58 int _outstanding_mallocs[] = {0,0}; | |
59 | |
60 #if COMPUTE_GPU_MEM_USED | |
61 size_t _allocated_size = 0; | |
62 size_t _max_allocated_size = 0; | |
63 | |
64 const int TABLE_SIZE = 10000; | |
65 struct table_struct{ | |
66 void* ptr; | |
67 size_t size; | |
68 }; | |
69 table_struct _alloc_size_table[TABLE_SIZE]; | |
70 #endif | |
71 | |
72 void * device_malloc(size_t size) | |
73 { | |
74 return device_malloc(size, VERBOSE_DEVICE_MALLOC); | |
75 } | |
76 | |
77 ///@TODO: thejaswi: link this option to a theano config variable? | |
78 static bool g_use_cnmem = false; | |
79 static const int g_max_devices = 8; | |
80 int initCnmem(int card_number_provided, int card_nb, size_t mem) { | |
81 static bool cnmemInitialized = false; | |
82 if(cnmemInitialized) { | |
83 return 0; | |
84 } | |
85 // On stderr to be at the same place as "Using gpu device..." | |
86 int numDevices = 0; | |
87 cnmemDevice_t devices[g_max_devices]; | |
88 if(cudaGetDeviceCount(&numDevices) != cudaSuccess) { | |
89 PyErr_Format(PyExc_RuntimeError, | |
90 "initCnmem: 'cudaGetDeviceCount' failed! Reason=%s\n", | |
91 cudaGetErrorString(cudaGetLastError())); | |
92 return -1; | |
93 } | |
94 if(card_number_provided){ | |
95 numDevices = 1; | |
96 int i = 0; | |
97 devices[i].device = card_nb; | |
98 devices[i].size = mem; | |
99 ///@TODO: thejaswi: add support for multiple streams | |
100 devices[i].numStreams = 0; | |
101 devices[i].streams = NULL; | |
102 devices[i].streamSizes = NULL; | |
103 }else{ | |
104 for(int i=0;i<numDevices;++i) { | |
105 devices[i].device = i; | |
106 devices[i].size = mem; | |
107 ///@TODO: thejaswi: add support for multiple streams | |
108 devices[i].numStreams = 0; | |
109 devices[i].streams = NULL; | |
110 } | |
111 } | |
112 | |
113 ///@TODO: thejaswi: passing custom cnmem flags? | |
114 cnmemStatus_t status = cnmemInit(numDevices, devices, CNMEM_FLAGS_DEFAULT); | |
115 if(status != CNMEM_STATUS_SUCCESS) { | |
116 PyErr_Format(PyExc_RuntimeError, | |
117 "initCnmem: cnmemInit call failed! Reason=%s. numdev=%d\n", | |
118 cnmemGetErrorString(status), numDevices); | |
119 return -1; | |
120 } | |
121 cnmemInitialized = true; | |
122 return 0; | |
123 } | |
124 | |
125 void * device_malloc(size_t size, int verbose) | |
126 { | |
127 #if PRECHECK_ERROR | |
128 cudaThreadSynchronize(); | |
129 cudaError_t prevError = cudaGetLastError(); | |
130 if (cudaSuccess != prevError) | |
131 { | |
132 fprintf(stderr, | |
133 "Error existed before calling device_malloc. %s\n", | |
134 cudaGetErrorString(prevError) | |
135 ); | |
136 } | |
137 #endif | |
138 void * rval=NULL; | |
139 ///@TODO: thejaswi: support for multiple-streams? | |
140 if(g_use_cnmem) { | |
141 cnmemStatus_t status = CNMEM_STATUS_SUCCESS; | |
142 status = cnmemMalloc(&rval, size, NULL); | |
143 if(status != CNMEM_STATUS_SUCCESS) { | |
144 PyErr_Format(PyExc_MemoryError, | |
145 "Error allocating %llu bytes of device memory (%s).", | |
146 (unsigned long long)size, cnmemGetErrorString(status)); | |
147 return NULL; | |
148 } | |
149 } | |
150 else { | |
151 cudaError_t err = cudaMalloc(&rval, size); | |
152 if (cudaSuccess != err) | |
153 { | |
154 // Clear the error flag, cudaMalloc doesn't do it. | |
155 // Currently this returns the same thing as err, but if in future | |
156 // it returns something else I still don't see why we should ignore | |
157 // it. All we want to do here is reset the flag. | |
158 cudaGetLastError(); | |
159 if (verbose) | |
160 { | |
161 size_t free = 0, total = 0; | |
162 cudaError_t err2 = cudaMemGetInfo(&free, &total); | |
163 if (err2 != cudaSuccess){ | |
164 cudaGetLastError(); | |
165 fprintf(stderr, | |
166 "Error when trying to find the memory information" | |
167 " on the GPU: %s\n", cudaGetErrorString(err2)); | |
168 } | |
169 #if COMPUTE_GPU_MEM_USED | |
170 fprintf(stderr, | |
171 "Error allocating %llu bytes of device memory (%s)." | |
172 " new total bytes allocated: %llu." | |
173 " Driver report %llu bytes free and %llu bytes total \n", | |
174 (unsigned long long)size, cudaGetErrorString(err), (unsigned long long)_allocated_size, | |
175 (unsigned long long)free, (unsigned long long)total); | |
176 #else | |
177 fprintf(stderr, | |
178 "Error allocating %llu bytes of device memory (%s)." | |
179 " Driver report %llu bytes free and %llu bytes total \n", | |
180 (unsigned long long)size, cudaGetErrorString(err), (unsigned long long)free, (unsigned long long)total); | |
181 #endif | |
182 } | |
183 PyErr_Format(PyExc_MemoryError, | |
184 "Error allocating %llu bytes of device memory (%s).", | |
185 (unsigned long long)size, cudaGetErrorString(err)); | |
186 return NULL; | |
187 } | |
188 } | |
189 if (rval != NULL){ | |
190 // Can it happen that cudaMalloc return cudaSuccess, but return a NULL ptr? | |
191 // Could this be what happen if size is 0? | |
192 _outstanding_mallocs[0] += 1; | |
193 | |
194 #if COMPUTE_GPU_MEM_USED | |
195 _allocated_size += size; | |
196 _max_allocated_size = std::max(_max_allocated_size, _allocated_size); | |
197 int i = 0; | |
198 for(;i<TABLE_SIZE;i++){ | |
199 if(NULL==_alloc_size_table[i].ptr){ | |
200 _alloc_size_table[i].ptr=rval; | |
201 _alloc_size_table[i].size=size; | |
202 break; | |
203 } | |
204 } | |
205 if (i == TABLE_SIZE){ | |
206 fprintf(stderr, | |
207 "When tracking GPU malloc, our table size wasn't big enough." | |
208 " So we loose some tracking. Raise the value of TABLE_SIZE in the file cuda_ndarra.cu"); | |
209 } | |
210 #endif | |
211 } | |
212 //fprintf(stderr, | |
213 //"allocated %li bytes of device memory (%s). new total bytes allocated: %d. ptr: %p\n", | |
214 //(long)size, cudaGetErrorString(err),_allocated_size,rval); | |
215 | |
216 if(ALLOC_MEMSET){ | |
217 //We init them to nan to make sure we catch more debug case. | |
218 cudaMemset(rval, 0xFF, size); | |
219 //printf("MEMSET\n"); | |
220 } | |
221 #if PRINT_FREE_MALLOC | |
222 fprintf(stderr, "device malloc %p of size %d\n", rval, size); | |
223 #endif | |
224 return rval; | |
225 } | |
226 | |
227 int device_free(void *ptr) | |
228 { | |
229 #if PRECHECK_ERROR | |
230 cudaThreadSynchronize(); | |
231 cudaError_t prevError = cudaGetLastError(); | |
232 if (cudaSuccess != prevError) | |
233 { | |
234 fprintf(stderr, | |
235 "Error existed before calling device_free. %s\n", | |
236 cudaGetErrorString(prevError) | |
237 ); | |
238 } | |
239 #endif | |
240 #if PRINT_FREE_MALLOC | |
241 size_t free = 0, total = 0; | |
242 cudaError_t err2 = cudaMemGetInfo(&free, &total); | |
243 if (err2 != cudaSuccess){ | |
244 cudaGetLastError(); | |
245 fprintf(stderr, | |
246 "Error when tring to find the memory information" | |
247 " on the GPU: %s\n", cudaGetErrorString(err2)); | |
248 } | |
249 #if COMPUTE_GPU_MEM_USED | |
250 { | |
251 int i = 0; | |
252 for(;i<TABLE_SIZE;i++) | |
253 if(_alloc_size_table[i].ptr==ptr){ | |
254 break; | |
255 } | |
256 assert(i<TABLE_SIZE); | |
257 fprintf(stderr, "device_free %p of size %d." | |
258 " Driver report %d bytes free and %d bytes total \n", | |
259 ptr, _alloc_size_table[i].size, free, total); | |
260 } | |
261 #else | |
262 fprintf(stderr, "device_free %p." | |
263 " Driver report %d bytes free and %d bytes total \n", | |
264 ptr, free, total); | |
265 #endif | |
266 #endif | |
267 | |
268 // if there is no gpu context, the call to cudaFree will fail; skip it entirely | |
269 if(!g_gpu_context_active) { | |
270 return 0; | |
271 } | |
272 | |
273 ///@TODO: thejaswi: multi-stream support | |
274 if(g_use_cnmem) { | |
275 cnmemStatus_t status = cnmemFree(ptr, NULL); | |
276 if(status != CNMEM_STATUS_SUCCESS) { | |
277 fprintf(stderr, "device_free: cnmemFree call failed! Reason=%s\n", | |
278 cnmemGetErrorString(status)); | |
279 } | |
280 } | |
281 else { | |
282 // We need sync as the Theano's GC could remove intermediate variable that | |
283 // are still needed as the gpu kernel are running or in the queue. | |
284 CNDA_BEGIN_ALLOW_THREADS | |
285 cudaThreadSynchronize(); | |
286 CNDA_END_ALLOW_THREADS | |
287 | |
288 cudaError_t err = cudaFree(ptr); | |
289 if (cudaSuccess != err) | |
290 { | |
291 // Clear the error flag, cudaFree doesn't do it. | |
292 // Currently this returns the same thing as err, but if in future | |
293 // it returns something else I still don't see why we should ignore | |
294 // it. All we want to do here is reset the flag. | |
295 cudaGetLastError(); | |
296 size_t free = 0, total = 0; | |
297 cudaError_t err2 = cudaMemGetInfo(&free, &total); | |
298 if (err2 != cudaSuccess){ | |
299 cudaGetLastError(); | |
300 fprintf(stderr, | |
301 "Error when tring to find the memory information" | |
302 " on the GPU: %s\n", cudaGetErrorString(err2)); | |
303 } | |
304 #if COMPUTE_GPU_MEM_USED | |
305 { | |
306 int i = 0; | |
307 for(;i<TABLE_SIZE;i++) | |
308 if(_alloc_size_table[i].ptr==ptr){ | |
309 break; | |
310 } | |
311 assert(i<TABLE_SIZE); | |
312 fprintf(stderr, | |
313 "Error freeing device pointer %p (%s) of size %llu. %llu byte already allocated." | |
314 " Driver report %llu bytes free and %llu bytes total \n", | |
315 ptr, cudaGetErrorString(err), | |
316 (unsigned long long)_alloc_size_table[i].size, (unsigned long long)_allocated_size, (unsigned long long)free, (unsigned long long)total); | |
317 } | |
318 #else | |
319 fprintf(stderr, | |
320 "Error freeing device pointer %p (%s)." | |
321 " Driver report %llu bytes free and %llu bytes total \n", | |
322 ptr, | |
323 cudaGetErrorString(err), (unsigned long long)free, (unsigned long long)total); | |
324 #endif | |
325 if (NULL != PyErr_Occurred()){ | |
326 fprintf(stderr, | |
327 "device_free: cudaFree() returned an error, but there is already an" | |
328 " Python error set. This happen during the clean up when there is a" | |
329 " first error and the CUDA driver is in a so bad state that it don't" | |
330 " work anymore. We keep the previous error set to help debugging it."); | |
331 return -1; | |
332 } | |
333 PyErr_Format(PyExc_MemoryError, | |
334 "error freeing device pointer %p (%s)", | |
335 ptr, | |
336 cudaGetErrorString(err)); | |
337 return -1; | |
338 } | |
339 } | |
340 _outstanding_mallocs[0] -= (ptr != NULL); | |
341 #if COMPUTE_GPU_MEM_USED | |
342 int i=0; | |
343 size_t total_freed = 0; | |
344 for(;i<TABLE_SIZE;i++) | |
345 if(_alloc_size_table[i].ptr==ptr){ | |
346 _allocated_size -= _alloc_size_table[i].size; | |
347 total_freed += _alloc_size_table[i].size; | |
348 _alloc_size_table[i].ptr=0; | |
349 _alloc_size_table[i].size=0; | |
350 | |
351 break; | |
352 } | |
353 //if(i==TABLE_SIZE) | |
354 // printf("Unallocated unknow size!\n"); | |
355 //fprintf(stderr, "freed %li bytes of device memory (%s). %d already allocated, ptr=%p\n", (long)total_freed, cudaGetErrorString(err),_allocated_size,ptr); | |
356 #endif | |
357 return 0; | |
358 } | |
359 | |
360 static PyObject * | |
361 outstanding_mallocs(PyObject* self, PyObject * args) | |
362 { | |
363 return PyInt_FromLong(_outstanding_mallocs[0]); | |
364 } | |
365 | |
366 | |
367 static void *work_mem = NULL; | |
368 static size_t work_size = 0; | |
369 | |
370 /* | |
371 * Returns a chunk of memory for temporary work inside of an op. You can only | |
372 * request a single chunk of memory at a time since it is reused. | |
373 */ | |
374 void *get_work_mem(size_t sz) { | |
375 if (sz <= work_size) | |
376 return work_mem; | |
377 device_free(work_mem); | |
378 work_mem = device_malloc(sz); | |
379 work_size = sz; | |
380 if (work_mem == NULL) | |
381 work_size = 0; | |
382 return work_mem; | |
383 } | |
384 | |
385 ///////////////////////// | |
386 // Static helper methods | |
387 ///////////////////////// | |
388 | |
389 static void | |
390 CudaNdarray_null_init(CudaNdarray*self) | |
391 { | |
392 self->base = NULL; | |
393 self->nd = -1; | |
394 self->host_structure = NULL; | |
395 self->data_allocated = 0; | |
396 self->dev_structure_fresh = 1; | |
397 self->dev_structure = NULL; | |
398 self->devdata = NULL; | |
399 } | |
400 | |
401 static int | |
402 CudaNdarray_uninit(CudaNdarray*self) | |
403 { | |
404 #if PRINT_FREE_MALLOC | |
405 fprintf(stderr, "CudaNdarray_uninit %p\n", self); | |
406 #endif | |
407 int rval = 0; | |
408 if (self->data_allocated) { | |
409 assert(self->devdata); | |
410 if (device_free(self->devdata)) | |
411 { | |
412 fprintf(stderr, | |
413 "CudaNdarray_uninit: error freeing self->devdata. (self=%p, self->devata=%p)\n", | |
414 self, self->devdata); | |
415 rval = -1; | |
416 } | |
417 self->devdata = NULL; | |
418 self->data_allocated = 0; | |
419 } | |
420 if (self->dev_structure) | |
421 { | |
422 if (device_free(self->dev_structure)) | |
423 { | |
424 fprintf(stderr, | |
425 "CudaNdarray_uninit: error freeing dev_structure memory %p (self=%p)\n", | |
426 self->dev_structure, self); | |
427 rval = -1; | |
428 } | |
429 self->dev_structure = NULL; | |
430 } | |
431 if (self->host_structure) | |
432 { | |
433 free(self->host_structure); | |
434 self->host_structure = NULL; | |
435 } | |
436 self->nd = -1; | |
437 Py_XDECREF(self->base); | |
438 self->base = NULL; | |
439 return rval; | |
440 } | |
441 | |
442 | |
443 //make the rightmost coords change fastest | |
444 //TODO: why does a downward for-loop not work???? | |
445 //TODO: use the log2_dims and driver code to remove / and % | |
446 //TODO: skip the last division (when d == 0) | |
447 #define decl_k_elemwise_unary_rowmajor(name, F) \ | |
448 __global__ void name (unsigned int numEls, \ | |
449 unsigned int nd, \ | |
450 const int * dim, \ | |
451 const float * a_data, const int * a_str, \ | |
452 float * z_data, const int * z_str) \ | |
453 { \ | |
454 const unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x; \ | |
455 const unsigned int numThreads = blockDim.x * gridDim.x; \ | |
456 \ | |
457 for (unsigned int i = idx; i < numEls; i += numThreads) \ | |
458 { \ | |
459 unsigned int ii = i; \ | |
460 const float * a_i = a_data; \ | |
461 float * z_i = z_data; \ | |
462 for (unsigned int _d = 0; _d < nd; ++_d) \ | |
463 { \ | |
464 unsigned int d = nd - _d-1; \ | |
465 int i_d = ii % dim[d]; /* i_d is our position in the d'th dimension */ \ | |
466 ii = ii / dim[d]; \ | |
467 a_i += i_d * a_str[d]; /* increment our a and z pointers by i_d elements */ \ | |
468 z_i += i_d * z_str[d]; \ | |
469 } \ | |
470 z_i[0] = F(a_i[0]); \ | |
471 } \ | |
472 } | |
473 | |
474 template<typename T> __device__ T unary_copy(T a) { return a; } | |
475 decl_k_elemwise_unary_rowmajor(k_elemwise_unary_rowmajor_copy, unary_copy<float>) | |
476 | |
477 template<typename T> __device__ T unary_exp(T a) { return exp(a); } | |
478 decl_k_elemwise_unary_rowmajor(k_elemwise_unary_rowmajor_exp, unary_exp<float>) | |
479 | |
480 ///////////////////////////// | |
481 // Satisfying reqs to be Type | |
482 ///////////////////////////// | |
483 | |
484 //DON'T use directly(if their is other CudaNdarray that point to it, it will cause problem)! use Py_DECREF() instead | |
485 static void | |
486 CudaNdarray_dealloc(CudaNdarray* self) | |
487 { | |
488 if (0) std::cerr << "CudaNdarray dealloc " << self << " " << self->devdata << '\n'; | |
489 if(Py_REFCNT(self) > 1) | |
490 printf("WARNING:CudaNdarray_dealloc called when there is still active reference to it.\n"); | |
491 CudaNdarray_uninit(self); | |
492 Py_TYPE(self)->tp_free((PyObject*)self); | |
493 --_outstanding_mallocs[1]; | |
494 if (0) | |
495 { | |
496 fprintf(stderr, "device_malloc_counts: (device) %i (obj) %i\n", | |
497 _outstanding_mallocs[0], | |
498 _outstanding_mallocs[1]); | |
499 } | |
500 } | |
501 | |
502 static PyObject * | |
503 CudaNdarray_new(PyTypeObject *type, PyObject *args, PyObject *kwds) | |
504 { | |
505 CudaNdarray *self; | |
506 | |
507 self = (CudaNdarray *)type->tp_alloc(type, 0); | |
508 if (self != NULL) | |
509 { | |
510 CudaNdarray_null_init(self); | |
511 ++_outstanding_mallocs[1]; | |
512 } | |
513 return (PyObject *)self; | |
514 } | |
515 static int | |
516 CudaNdarray_init(CudaNdarray *self, PyObject *args, PyObject *kwds) | |
517 { | |
518 PyObject *arr=NULL; | |
519 | |
520 if (! PyArg_ParseTuple(args, "O", &arr)) | |
521 return -1; | |
522 if (! PyArray_Check(arr)) | |
523 { | |
524 PyErr_SetString(PyExc_TypeError, "PyArray arg required"); | |
525 return -1; | |
526 } | |
527 int rval = CudaNdarray_CopyFromArray(self, (PyArrayObject*)arr); | |
528 return rval; | |
529 } | |
530 static PyMemberDef CudaNdarray_members[] = | |
531 { | |
532 /* | |
533 {"first", T_OBJECT_EX, offsetof(CudaNdarray, first), 0, | |
534 "first name"}, | |
535 {"last", T_OBJECT_EX, offsetof(CudaNdarray, last), 0, | |
536 "last name"}, | |
537 {"number", T_INT, offsetof(CudaNdarray, number), 0, | |
538 "noddy number"}, | |
539 */ | |
540 {NULL} /* Sentinel */ | |
541 }; | |
542 | |
543 PyObject * CudaNdarray_CreateArrayObj(CudaNdarray * self, PyObject *args) | |
544 { | |
545 PyObject * dtype = NULL; | |
546 if (args && !PyArg_ParseTuple(args, "|O", &dtype)) | |
547 return NULL; | |
548 if (dtype) { | |
549 PyArray_Descr* dtype2; | |
550 // PyArray_DescrConverter try to convert anything to a PyArray_Descr. | |
551 if(!PyArray_DescrConverter(dtype, &dtype2)) | |
552 { | |
553 PyObject * str = PyObject_Repr(dtype); | |
554 PyErr_Format(PyExc_TypeError, | |
555 "CudaNdarray dtype parameter not understood: %s", | |
556 PyString_AsString(str) | |
557 ); | |
558 Py_CLEAR(str); | |
559 return NULL; | |
560 } | |
561 int typeNum = dtype2->type_num; | |
562 Py_DECREF(dtype2); | |
563 if (typeNum != NPY_FLOAT32) | |
564 { | |
565 PyObject * str = PyObject_Repr(dtype); | |
566 PyErr_Format(PyExc_TypeError, | |
567 "CudaNdarray support only support float32 dtype, provided: %d", | |
568 typeNum | |
569 ); | |
570 Py_CLEAR(str); | |
571 return NULL; | |
572 } | |
573 } | |
574 | |
575 int verbose = 0; | |
576 if(self->nd>=0 && CudaNdarray_SIZE(self)==0){ | |
577 npy_intp * npydims = (npy_intp*)malloc(self->nd * sizeof(npy_intp)); | |
578 assert (npydims); | |
579 for (int i = 0; i < self->nd; ++i) npydims[i] = (npy_intp)(CudaNdarray_HOST_DIMS(self)[i]); | |
580 PyObject * rval = PyArray_SimpleNew(self->nd, npydims, REAL_TYPENUM); | |
581 free(npydims); | |
582 if (!rval){ | |
583 return NULL; | |
584 } | |
585 assert (PyArray_ITEMSIZE((PyArrayObject *)rval) == sizeof(real)); | |
586 return rval; | |
587 } | |
588 if ((self->nd < 0) || (self->devdata == 0)) | |
589 { | |
590 PyErr_SetString(PyExc_ValueError, "can't copy from un-initialized CudaNdarray"); | |
591 return NULL; | |
592 } | |
593 CudaNdarray * contiguous_self = NULL; | |
594 if (CudaNdarray_is_c_contiguous(self)) | |
595 { | |
596 contiguous_self = self; | |
597 Py_INCREF(contiguous_self); | |
598 if (verbose) std::cerr << "CreateArrayObj already contiguous" << contiguous_self << '\n'; | |
599 } | |
600 else | |
601 { | |
602 contiguous_self = (CudaNdarray*)CudaNdarray_Copy(self); | |
603 if (verbose) std::cerr << "CreateArrayObj created contiguous" << contiguous_self << '\n'; | |
604 } | |
605 if (!contiguous_self) | |
606 { | |
607 return NULL; | |
608 } | |
609 | |
610 npy_intp * npydims = (npy_intp*)malloc(self->nd * sizeof(npy_intp)); | |
611 assert (npydims); | |
612 for (int i = 0; i < self->nd; ++i) | |
613 npydims[i] = (npy_intp)(CudaNdarray_HOST_DIMS(self)[i]); | |
614 PyArrayObject * rval = (PyArrayObject *) PyArray_SimpleNew(self->nd, | |
615 npydims, | |
616 REAL_TYPENUM); | |
617 free(npydims); | |
618 if (!rval) | |
619 { | |
620 Py_DECREF(contiguous_self); | |
621 return NULL; | |
622 } | |
623 | |
624 assert (PyArray_ITEMSIZE(rval) == sizeof(real)); | |
625 | |
626 npy_intp rval_size = PyArray_SIZE(rval); | |
627 void *rval_data = PyArray_DATA(rval); | |
628 cudaError_t err; | |
629 CNDA_BEGIN_ALLOW_THREADS; | |
630 | |
631 err = cudaMemcpy(rval_data, contiguous_self->devdata, | |
632 rval_size * sizeof(real), | |
633 cudaMemcpyDeviceToHost | |
634 ); | |
635 //CNDA_THREAD_SYNC; // unneeded because cudaMemcpy is blocking anyway | |
636 CNDA_END_ALLOW_THREADS; | |
637 | |
638 if (cudaSuccess != err) | |
639 { | |
640 PyErr_Format(PyExc_RuntimeError, "error (%s)copying data to host", | |
641 cudaGetErrorString(err)); | |
642 Py_DECREF(rval); | |
643 rval = NULL; | |
644 } | |
645 | |
646 Py_DECREF(contiguous_self); | |
647 return (PyObject *)rval; | |
648 } | |
649 | |
650 // TODO-- we have two functions here, ZEROS and Zeros. | |
651 // ZEROS is meant to be called just from C code (you don't need to pass it PyObject * s) | |
652 // but this naming is very weird, makes it look like a macro | |
653 // we should figure out the correct convention and change to that | |
654 PyObject* CudaNdarray_ZEROS(int n, int * dims) | |
655 { | |
656 | |
657 size_t total_elements = 1; | |
658 | |
659 for(size_t i=0;i<n;i++){ | |
660 // Detect overflow on unsigned integer | |
661 if (dims[i] != 0 && total_elements > (SIZE_MAX / dims[i])) { | |
662 PyErr_Format(PyExc_RuntimeError, | |
663 "Can't store in size_t for the bytes requested %llu * %llu", | |
664 (unsigned long long)total_elements, | |
665 (unsigned long long)dims[i]); | |
666 return NULL; | |
667 } | |
668 total_elements*=dims[i]; | |
669 } | |
670 | |
671 // total_elements now contains the size of the array, in reals | |
672 if (total_elements > (SIZE_MAX / sizeof(real))){ | |
673 PyErr_Format(PyExc_RuntimeError, | |
674 "Can't store in size_t for the bytes requested %llu * 4", | |
675 (unsigned long long)total_elements); | |
676 return NULL; | |
677 } | |
678 size_t total_size = total_elements * sizeof(real); | |
679 | |
680 CudaNdarray* rval = (CudaNdarray*)CudaNdarray_New(); | |
681 if (!rval) | |
682 { | |
683 PyErr_SetString(PyExc_RuntimeError, "CudaNdarray_ZEROS: call to New failed"); | |
684 return NULL; | |
685 } | |
686 | |
687 if (CudaNdarray_alloc_contiguous(rval, n, dims)) | |
688 { | |
689 PyErr_SetString(PyExc_RuntimeError, "CudaNdarray_ZEROS: allocation failed."); | |
690 Py_DECREF(rval); | |
691 return NULL; | |
692 } | |
693 | |
694 // Fill with zeros | |
695 //fprintf(stdout, "Sizeof: %d\n", total_size); | |
696 if (cudaSuccess != cudaMemset(rval->devdata, 0, total_size)) | |
697 { | |
698 PyErr_Format(PyExc_MemoryError, | |
699 "CudaNdarray_ZEROS: Error memsetting %llu bytes of device memory.", | |
700 (unsigned long long)total_size); | |
701 Py_DECREF(rval); | |
702 return NULL; | |
703 } | |
704 | |
705 if (cnda_copy_structure_to_device(rval)) | |
706 { | |
707 PyErr_SetString(PyExc_RuntimeError, "CudaNdarray_ZEROS: syncing structure to device failed"); | |
708 Py_DECREF(rval); | |
709 return NULL; | |
710 } | |
711 return (PyObject*) rval; | |
712 } | |
713 | |
714 // declared as a static method (hence 1st parameter is not used) | |
715 // Based on _Copy and _dimshuffle | |
716 PyObject* CudaNdarray_Zeros(PyObject* _unused, PyObject* shape) | |
717 { | |
718 if(!shape) | |
719 { | |
720 PyErr_SetString(PyExc_TypeError, "CudaNdarray_Zeros: function takes at least 1 argument (0 given)"); | |
721 return NULL; | |
722 } | |
723 if(!PySequence_Check(shape)) | |
724 { | |
725 PyErr_SetString(PyExc_TypeError, "shape argument must be a sequence"); | |
726 return NULL; | |
727 } | |
728 | |
729 int shplen = PySequence_Length(shape); | |
730 | |
731 if (shplen == 0) | |
732 { | |
733 return CudaNdarray_ZEROS(0, NULL); | |
734 } | |
735 | |
736 int* newdims = (int *)malloc(sizeof(int) * shplen); | |
737 | |
738 if (!newdims) | |
739 { | |
740 PyErr_SetString(PyExc_MemoryError, | |
741 "CudaNdarray_Zeros: Failed to allocate temporary space"); | |
742 return NULL; | |
743 } | |
744 | |
745 // start from the end to compute strides | |
746 for (int i = shplen-1; i >= 0; --i) | |
747 { | |
748 PyObject* shp_el_obj = PySequence_GetItem(shape, i); | |
749 if(shp_el_obj == NULL) | |
750 { | |
751 // shouldn't happen since we checked length before... | |
752 PyErr_SetString(PyExc_RuntimeError, "CudaNdarray_Zeros: Index out of bound in sequence"); | |
753 free(newdims); | |
754 return NULL; | |
755 } | |
756 | |
757 int shp_el = PyInt_AsLong(shp_el_obj); | |
758 Py_DECREF(shp_el_obj); | |
759 | |
760 if (shp_el < 0) | |
761 { | |
762 PyErr_SetString(PyExc_ValueError, "CudaNdarray_Zeros: shape must contain only non-negative values for size of a dimension"); | |
763 free(newdims); | |
764 return NULL; | |
765 } | |
766 | |
767 newdims[i] = shp_el; | |
768 } | |
769 | |
770 PyObject* rval = CudaNdarray_ZEROS(shplen,newdims); | |
771 | |
772 free(newdims); | |
773 | |
774 return (PyObject*)rval; | |
775 } | |
776 | |
777 | |
778 | |
779 | |
780 | |
781 PyObject * CudaNdarray_Copy(const CudaNdarray * self) | |
782 { | |
783 PyObject * rval = CudaNdarray_New(); | |
784 if ((!rval) || (-1 == self->nd)) | |
785 { | |
786 return rval; | |
787 } | |
788 if (CudaNdarray_alloc_contiguous((CudaNdarray*)rval, self->nd, CudaNdarray_HOST_DIMS(self))) | |
789 { | |
790 Py_DECREF(rval); | |
791 return NULL; | |
792 } | |
793 if (CudaNdarray_CopyFromCudaNdarray((CudaNdarray*)rval, self)) | |
794 { | |
795 Py_DECREF(rval); | |
796 return NULL; | |
797 } | |
798 return rval; | |
799 } | |
800 PyObject * CudaNdarray_DeepCopy(CudaNdarray * self, PyObject * memo) | |
801 { | |
802 assert(PyDict_Check(memo)); | |
803 PyObject * selfkey = PyInt_FromLong((long)self); | |
804 assert(selfkey); | |
805 if (PyDict_Contains(memo, selfkey)) | |
806 { | |
807 PyObject * rval = PyDict_GetItem(memo, selfkey); | |
808 Py_DECREF(selfkey); | |
809 Py_XINCREF(rval); | |
810 return rval; | |
811 } | |
812 else | |
813 { | |
814 PyObject * rval = CudaNdarray_Copy(self); | |
815 if (0) std::cerr << "DeepCopy created " << rval << " devdata " << ((CudaNdarray*)rval)->devdata << "\n"; | |
816 if (NULL == rval) | |
817 { | |
818 Py_DECREF(selfkey); | |
819 return NULL; | |
820 } | |
821 if (PyDict_SetItem(memo, selfkey, rval)) | |
822 { | |
823 Py_DECREF(rval); | |
824 Py_DECREF(selfkey); | |
825 return NULL; | |
826 } | |
827 Py_DECREF(selfkey); | |
828 return rval; | |
829 } | |
830 } | |
831 PyObject * CudaNdarray_ReduceSum(CudaNdarray * self, PyObject * py_reduce_mask) | |
832 { | |
833 if (!PySequence_Check(py_reduce_mask)) | |
834 { | |
835 PyErr_SetString(PyExc_TypeError, "reduce_mask must be sequence of ints"); | |
836 return NULL; | |
837 } | |
838 int len = PySequence_Length(py_reduce_mask); | |
839 if (len != self->nd) | |
840 { | |
841 PyErr_SetString(PyExc_TypeError, "length of reduce_mask must match self->nd"); | |
842 return NULL; | |
843 } | |
844 CudaNdarray * self_sum = (CudaNdarray*)CudaNdarray_New(); | |
845 if (!self_sum) | |
846 { | |
847 return NULL; | |
848 } | |
849 //TODO: allocate a fixed size dimshuffle_pattern_cache on the stack, | |
850 // and use it if it is big enough. | |
851 int * dimshuffle_pattern = (int*)malloc(len * 2 * sizeof(int)); | |
852 int * sum_dims = dimshuffle_pattern + len; | |
853 int n_remaining_dims = 0; | |
854 if (!dimshuffle_pattern) | |
855 { | |
856 Py_DECREF(self_sum); | |
857 PyErr_SetString(PyExc_MemoryError, "failed to alloc internal storage"); | |
858 return NULL; | |
859 } | |
860 for (int i = 0; i < len; ++i) | |
861 { | |
862 PyObject *o_i = PySequence_GetItem(py_reduce_mask, i); | |
863 int o_i_int = PyInt_AsLong(o_i); | |
864 Py_XDECREF(o_i); | |
865 if (PyErr_Occurred()) | |
866 { | |
867 Py_DECREF(self_sum); | |
868 free(dimshuffle_pattern); | |
869 return NULL; | |
870 } | |
871 if (o_i_int) // this is a dimension over which we are reducing | |
872 { | |
873 sum_dims[i] = 1; | |
874 } | |
875 else | |
876 { | |
877 sum_dims[i] = CudaNdarray_HOST_DIMS(self)[i]; | |
878 dimshuffle_pattern[n_remaining_dims++] = i; | |
879 } | |
880 } | |
881 if (0 || CudaNdarray_alloc_contiguous(self_sum, len, sum_dims) | |
882 || CudaNdarray_reduce_sum(self_sum, self) | |
883 || CudaNdarray_dimshuffle(self_sum, n_remaining_dims, dimshuffle_pattern)) | |
884 { | |
885 Py_DECREF(self_sum); | |
886 free(dimshuffle_pattern); | |
887 return NULL; | |
888 } | |
889 free(dimshuffle_pattern); | |
890 return (PyObject*)self_sum; | |
891 } | |
892 | |
893 // Reshape self to the new shape gived by the tuple shape. | |
894 // | |
895 // If self is c contiguous, it return a view. Otherwise it always do a copy. | |
896 // TODO: make it return a view when the strides allow it even if it is not | |
897 // c contiguous | |
898 PyObject * CudaNdarray_Reshape(CudaNdarray * self, PyObject * shape) | |
899 { | |
900 if(!CudaNdarray_is_c_contiguous(self)) | |
901 { | |
902 // allocate new space | |
903 //TODO: test to see if we can re-use old one and take a new param to | |
904 // use this | |
905 CudaNdarray* rval = (CudaNdarray*) CudaNdarray_Copy(self); | |
906 if (!rval) | |
907 { | |
908 return NULL; | |
909 } | |
910 | |
911 CudaNdarray* ret = (CudaNdarray*) CudaNdarray_Reshape(rval, shape); | |
912 Py_XDECREF(rval); | |
913 return (PyObject*)ret; | |
914 } | |
915 | |
916 // check shape tuple | |
917 unsigned int rval_nd; | |
918 unsigned int * rval_dims; | |
919 size_t rval_size = 1; | |
920 | |
921 if (PyTuple_Check(shape)){ | |
922 // copy shape to integer array | |
923 rval_nd = PyTuple_Size(shape); | |
924 }else if (PyInt_Check(shape)){ | |
925 rval_nd = 1; | |
926 }else{ | |
927 PyErr_SetString(PyExc_TypeError, "shape must be tuple of integers or an integer"); | |
928 return NULL; | |
929 } | |
930 rval_dims = (unsigned int*)malloc(rval_nd * sizeof(int)); | |
931 | |
932 if(PyTuple_Check(shape)){ | |
933 for (int i = 0; i < rval_nd; ++i) | |
934 { | |
935 rval_dims[i] = PyInt_AsLong(PyTuple_GetItem(shape, i)); //GetItem returns borrowed reference | |
936 if (PyErr_Occurred()) //error in AsLong | |
937 { | |
938 free(rval_dims); | |
939 return NULL; | |
940 } | |
941 if(rval_dims[i]<0){ | |
942 PyErr_Format(PyExc_ValueError, "Reshape has invalid dimension %i (must be >=0)",rval_dims[i]); | |
943 free(rval_dims); | |
944 return NULL; | |
945 } | |
946 rval_size = rval_size * rval_dims[i]; | |
947 } | |
948 }else{ | |
949 rval_size = PyInt_AsLong(shape); | |
950 rval_dims[0] = rval_size; | |
951 } | |
952 // calculate new size, assert same as old size | |
953 if (rval_size != CudaNdarray_SIZE(self)) | |
954 { | |
955 PyErr_Format(PyExc_ValueError, "size must remain unchanged, changed from %lld to %lld", CudaNdarray_SIZE(self), rval_size); | |
956 free(rval_dims); | |
957 return NULL; | |
958 } | |
959 if (rval_size==0) | |
960 { | |
961 PyObject * rval = CudaNdarray_NewDims(rval_nd, rval_dims); | |
962 free(rval_dims); | |
963 return rval; | |
964 } | |
965 | |
966 //return a view, not a copy | |
967 //we can do this as we checked self is c_contiguous | |
968 CudaNdarray * rval = (CudaNdarray * )CudaNdarray_New(rval_nd); | |
969 | |
970 if (!rval || 0 != rval->data_allocated | |
971 ||CudaNdarray_set_device_data(rval, CudaNdarray_DEV_DATA(self), self)) | |
972 { | |
973 Py_XDECREF(rval); | |
974 free(rval_dims); | |
975 return NULL; | |
976 } | |
977 //set dim and stride | |
978 int size = 1; | |
979 for (int i = rval_nd-1; i >= 0; --i) | |
980 { | |
981 CudaNdarray_set_stride(rval, i, (rval_dims[i] == 1) ? 0 : size); | |
982 CudaNdarray_set_dim(rval, i, rval_dims[i]); | |
983 size = size * rval_dims[i]; | |
984 } | |
985 free(rval_dims); | |
986 return (PyObject*)rval; | |
987 } | |
988 | |
989 PyObject * CudaNdarray_View(const CudaNdarray * self) | |
990 { | |
991 CudaNdarray * rval = (CudaNdarray*)CudaNdarray_New(self->nd); | |
992 if (!rval || CudaNdarray_set_device_data(rval, CudaNdarray_DEV_DATA(self), self)) | |
993 { | |
994 Py_XDECREF(rval); | |
995 rval = NULL; | |
996 } | |
997 else | |
998 { | |
999 for (int i = 0; i < self->nd; ++i) | |
1000 { | |
1001 CudaNdarray_set_dim(rval, i, CudaNdarray_HOST_DIMS(self)[i]); | |
1002 CudaNdarray_set_stride(rval, i, CudaNdarray_HOST_STRIDES(self)[i]); | |
1003 } | |
1004 } | |
1005 return (PyObject*)rval; | |
1006 } | |
1007 | |
1008 /* | |
1009 * d0,... are the output dims | |
1010 * indices are a list of index to operate on | |
1011 * They are int32 viewed as float32. | |
1012 * a is the output | |
1013 * b is the input | |
1014 * dB0, the source leading dimensions size | |
1015 */ | |
1016 template <int operator_num> | |
1017 __global__ void k_take_3(const int d0, const int d1, const int d2, | |
1018 const npy_int64* indices, | |
1019 float* a, | |
1020 const int sA0, const int sA1, const int sA2, | |
1021 const float* b, const int dB0, | |
1022 const int sB0, const int sB1, const int sB2, | |
1023 int* err){ | |
1024 for (int i0 = blockIdx.x; i0 < d0; i0 += gridDim.x){ | |
1025 npy_int64 idx = indices[i0]; | |
1026 if (idx<0) | |
1027 idx += dB0; // To allow negative indexing. | |
1028 if ((idx < 0) || (idx >= dB0)){ | |
1029 // Any value other the 0 probably work. But to be more safe, I want | |
1030 // to change all bits to prevent problem with concurrent write that | |
1031 // could cross cache line. But this should not happen with the | |
1032 // current code and driver. | |
1033 *err = 0xFFFF; | |
1034 continue; | |
1035 } | |
1036 for (int i1 = threadIdx.x; i1 < d1; i1 += blockDim.x){ | |
1037 for (int i2 = threadIdx.y; i2 < d2; i2 += blockDim.y){ | |
1038 int a_idx = i0*sA0 + i1*sA1 + i2*sA2; | |
1039 int b_idx = idx*sB0 + i1*sB1 + i2*sB2; | |
1040 a[a_idx] = b[b_idx]; | |
1041 } | |
1042 } | |
1043 } | |
1044 } | |
1045 | |
1046 // We try to be similar to the PyArray_TakeFrom function | |
1047 //http://docs.scipy.org/doc/numpy/reference/c-api.array.html | |
1048 //TODO: support other clip mode then raise(clip, wrap) | |
1049 //self is the input that we copy data from. | |
1050 //The indices that we receive MUST be an CudaNdarray(float32) | |
1051 // that is in fact a view to int64 indices | |
1052 PyObject* | |
1053 CudaNdarray_TakeFrom(CudaNdarray * self, PyObject *args){ | |
1054 int verbose = 0; | |
1055 PyObject * indices_obj = NULL; | |
1056 //int axis; Default None, that mean the flattened array. | |
1057 PyObject * axis_obj = Py_None; | |
1058 PyObject * out_obj = Py_None; | |
1059 PyObject * clipmode_obj = NULL; | |
1060 int max_threads = 1; // max threads per blocks | |
1061 | |
1062 if (! PyArg_ParseTuple(args, "O|OOOi", &indices_obj, &axis_obj, | |
1063 &out_obj, &clipmode_obj, &max_threads)) | |
1064 return NULL; | |
1065 | |
1066 //Check argument indices | |
1067 //TODO: if not a numpy.ndarray, convert to numpy.ndarray | |
1068 //TODO: If a CudaNdarray, accept it and suppose the data is int32? is float32 number of int? | |
1069 //TODO: Support ndarray of other dtype then int32 | |
1070 //TODO: support list of indices that are not c_contiguous | |
1071 CudaNdarray * indices = NULL; | |
1072 if (CudaNdarray_Check(indices_obj)) { | |
1073 if (verbose) printf("cudandarray indices\n"); | |
1074 indices = (CudaNdarray*) indices_obj; | |
1075 Py_INCREF(indices); | |
1076 } else if (PyArray_Check(indices_obj)) { | |
1077 if (verbose) printf("ndarray indices\n"); | |
1078 if (PyArray_TYPE((PyArrayObject *)indices_obj) != NPY_INT64) { | |
1079 PyErr_SetString(PyExc_TypeError, | |
1080 "CudaNdarray_TakeFrom: need a ndarray for indices" | |
1081 " with dtype int64"); | |
1082 return NULL; | |
1083 } | |
1084 if (PyArray_NDIM(((PyArrayObject*)indices_obj)) != 1) { | |
1085 PyErr_SetString(PyExc_TypeError, | |
1086 "CudaNdarray_TakeFrom: need a CudaNdarray of" | |
1087 " indices with only 1 dimensions"); | |
1088 return NULL; | |
1089 } | |
1090 // We need indices_obj to be contiguous, in order to take a view | |
1091 // with a different dtype. | |
1092 if (!PyArray_IS_C_CONTIGUOUS((PyArrayObject*) indices_obj)) { | |
1093 PyObject* indices_obj_contig = PyArray_NewCopy((PyArrayObject*) indices_obj, NPY_CORDER); | |
1094 if (!indices_obj_contig) | |
1095 return NULL; | |
1096 indices_obj = indices_obj_contig; | |
1097 } else { | |
1098 // Keep the refcount consistent | |
1099 Py_INCREF(indices_obj); | |
1100 } | |
1101 PyArray_Descr* float32_descr = PyArray_DescrFromType(NPY_FLOAT32); | |
1102 PyObject * indices_float32 = NULL; | |
1103 indices_float32 = PyArray_View((PyArrayObject*)indices_obj, | |
1104 float32_descr, NULL); | |
1105 if (verbose) printf("ndarray indices\n"); | |
1106 if (!indices_float32) { | |
1107 Py_DECREF(indices_obj); | |
1108 return NULL; | |
1109 } | |
1110 | |
1111 indices = (CudaNdarray*) CudaNdarray_New(); | |
1112 if (verbose) printf("\nndarray after new\n"); | |
1113 if (! indices){ | |
1114 Py_DECREF(indices_obj); | |
1115 Py_DECREF(indices_float32); | |
1116 return NULL; | |
1117 } | |
1118 if (CudaNdarray_CopyFromArray(indices, | |
1119 (PyArrayObject *)indices_float32)){ | |
1120 Py_DECREF(indices_obj); | |
1121 Py_DECREF(indices_float32); | |
1122 return NULL; | |
1123 } | |
1124 Py_DECREF(indices_obj); | |
1125 Py_DECREF(indices_float32); | |
1126 } else { | |
1127 PyObject* py_s = PyObject_Str(indices_obj); | |
1128 const char* s = PyString_AsString(py_s); | |
1129 Py_DECREF(py_s); | |
1130 PyErr_Format(PyExc_TypeError, | |
1131 "CudaNdarray_TakeFrom: need an ndarray of int64 or a" | |
1132 " CudaNdarray(float32) that is a view from int64 data" | |
1133 " for indices. Got %s", s); | |
1134 return NULL; | |
1135 } | |
1136 | |
1137 if (verbose) { | |
1138 printf("indices used on the gpu\n"); | |
1139 fprint_CudaNdarray(stdout, indices); | |
1140 PyObject * used_indices = CudaNdarray_CreateArrayObj(indices); | |
1141 PyObject_Print(used_indices, stdout, 0); | |
1142 Py_DECREF(used_indices); | |
1143 } | |
1144 if (verbose) printf("after print of object\n"); | |
1145 if(!CudaNdarray_is_c_contiguous(indices) != 0) { | |
1146 PyErr_SetString(PyExc_NotImplementedError, | |
1147 "CudaNdarray_TakeFrom: The indices must be contiguous in memory."); | |
1148 Py_DECREF(indices); | |
1149 return NULL; | |
1150 } | |
1151 int nb_indices = CudaNdarray_SIZE((CudaNdarray *)indices) / 2;// int64 are 8 bytes, float32 are 4 bytes | |
1152 | |
1153 //Check argument axis | |
1154 //TODO: implement the default and other axis | |
1155 long axis = PyInt_AsLong(axis_obj); | |
1156 | |
1157 if (axis != 0) { | |
1158 PyErr_Format(PyExc_NotImplementedError, | |
1159 "CudaNdarray_TakeFrom: only axis=0 is currently supported." | |
1160 " Got %ld.", axis); | |
1161 Py_DECREF(indices); | |
1162 return NULL; | |
1163 } | |
1164 | |
1165 //Check argument out_obj | |
1166 CudaNdarray * out = NULL; | |
1167 if (out_obj && CudaNdarray_Check(out_obj)) | |
1168 out = (CudaNdarray*) out_obj; | |
1169 if (out && (out->nd != self->nd || | |
1170 CudaNdarray_HOST_DIMS(out)[0] != nb_indices)) | |
1171 out = NULL; | |
1172 int * dims = (int *)malloc(sizeof(int) * self->nd); | |
1173 dims[0] = nb_indices; | |
1174 | |
1175 for (int i=1 ; i<self->nd ; i++) { | |
1176 dims[i] = CudaNdarray_HOST_DIMS(self)[i]; | |
1177 if (out && CudaNdarray_HOST_DIMS(out)[i] != dims[i]) { | |
1178 out = NULL; | |
1179 } | |
1180 } | |
1181 if (!out) { | |
1182 out = (CudaNdarray*)CudaNdarray_New(); | |
1183 if (!out){ | |
1184 Py_DECREF(indices); | |
1185 free(dims); | |
1186 return NULL; | |
1187 } | |
1188 if (CudaNdarray_alloc_contiguous(out, self->nd, dims)) { | |
1189 Py_DECREF(out); | |
1190 Py_DECREF(indices); | |
1191 free(dims); | |
1192 return NULL; | |
1193 } | |
1194 }else { | |
1195 Py_INCREF(out); | |
1196 } | |
1197 | |
1198 //Check argument clipmode | |
1199 if (clipmode_obj) { | |
1200 char * clipmode = PyString_AsString(clipmode_obj); | |
1201 if (! clipmode){ | |
1202 Py_DECREF(indices); | |
1203 Py_DECREF(out); | |
1204 free(dims); | |
1205 return NULL; | |
1206 } | |
1207 if (strcmp(clipmode, "raise") != 0) { | |
1208 PyErr_Format(PyExc_NotImplementedError, | |
1209 "CudaNdarray_TakeFrom: only the raise mode is currently supported. Got '%s'", | |
1210 clipmode); | |
1211 Py_DECREF(indices); | |
1212 Py_DECREF(out); | |
1213 free(dims); | |
1214 return NULL; | |
1215 } | |
1216 } | |
1217 void (*k3)(const int, const int, const int, | |
1218 const npy_int64*, | |
1219 float*, const int, const int, const int, | |
1220 const float*, const int, | |
1221 const int, const int, const int, | |
1222 int*); | |
1223 k3 = k_take_3<CPY>; | |
1224 | |
1225 // Create the memory place that will store the error information. | |
1226 if(init_err_var() != 0) return NULL; | |
1227 | |
1228 dim3 n_blocks(std::min(CudaNdarray_HOST_DIMS(out)[0],65535),1,1); | |
1229 if(CudaNdarray_HOST_DIMS(out)[0] == 0){ | |
1230 // We take 0 elements, so no need for the rest of the code. | |
1231 // This speed up that case AND fix crash otherwise. | |
1232 free(dims); | |
1233 Py_DECREF(indices); | |
1234 return (PyObject *)out; | |
1235 } | |
1236 | |
1237 switch (self->nd) { | |
1238 case 1: | |
1239 { | |
1240 dim3 n_threads(1, 1, 1); | |
1241 if (verbose) | |
1242 printf("cudaGetLastError=%d, nd=%d" | |
1243 " kernel config: (n_blocks.x=%d, n_blocks.y=%d," | |
1244 " n_threads.x=%i, n_threads.y=%i)\n", | |
1245 cudaGetLastError(), self->nd, | |
1246 n_blocks.x, n_blocks.y, n_threads.x, n_threads.y); | |
1247 k3<<<n_blocks, n_threads>>>( | |
1248 dims[0], | |
1249 1, | |
1250 1, | |
1251 (npy_int64*) CudaNdarray_DEV_DATA(indices), | |
1252 CudaNdarray_DEV_DATA(out), | |
1253 CudaNdarray_HOST_STRIDES(out)[0], //strides | |
1254 1, | |
1255 1, | |
1256 CudaNdarray_DEV_DATA(self), | |
1257 CudaNdarray_HOST_DIMS(self)[0], //For indices check | |
1258 CudaNdarray_HOST_STRIDES(self)[0], //strides | |
1259 1, | |
1260 1, | |
1261 err_var); | |
1262 } | |
1263 break; | |
1264 case 2: | |
1265 { | |
1266 dim3 n_threads(std::min(CudaNdarray_HOST_DIMS(out)[1], max_threads), 1, 1); | |
1267 | |
1268 if (verbose) | |
1269 printf("cudaGetLastError=%d, nd=%d" | |
1270 " kernel config: (n_blocks.x=%d, n_blocks.y=%d," | |
1271 " n_threads.x=%i, n_threads.y=%i)\n", | |
1272 cudaGetLastError(), self->nd, | |
1273 n_blocks.x, n_blocks.y, n_threads.x, n_threads.y); | |
1274 | |
1275 k3<<<n_blocks, n_threads>>>( | |
1276 dims[0], //dimensions | |
1277 dims[1], | |
1278 1, | |
1279 (npy_int64*) CudaNdarray_DEV_DATA(indices), | |
1280 CudaNdarray_DEV_DATA(out), | |
1281 CudaNdarray_HOST_STRIDES(out)[0], //strides | |
1282 CudaNdarray_HOST_STRIDES(out)[1], | |
1283 1, | |
1284 CudaNdarray_DEV_DATA(self), | |
1285 CudaNdarray_HOST_DIMS(self)[0], //For indices check | |
1286 CudaNdarray_HOST_STRIDES(self)[0], //strides | |
1287 CudaNdarray_HOST_STRIDES(self)[1], | |
1288 1, | |
1289 err_var); | |
1290 } | |
1291 break; | |
1292 case 3: | |
1293 { | |
1294 int ty = std::min(CudaNdarray_HOST_DIMS(out)[2], max_threads); | |
1295 int tx = std::min(CudaNdarray_HOST_DIMS(out)[1], max_threads / ty); | |
1296 dim3 n_threads(tx, ty, 1); | |
1297 if (verbose) | |
1298 printf("cudaGetLastError=%d, nd=%d" | |
1299 " kernel config: (n_blocks.x=%d, n_blocks.y=%d," | |
1300 " n_threads.x=%i, n_threads.y=%i)\n", | |
1301 cudaGetLastError(), self->nd, | |
1302 n_blocks.x, n_blocks.y, n_threads.x, n_threads.y); | |
1303 k3<<<n_blocks, n_threads>>>( | |
1304 dims[0], //dimensions | |
1305 dims[1], | |
1306 dims[2], | |
1307 (npy_int64*) CudaNdarray_DEV_DATA(indices), | |
1308 CudaNdarray_DEV_DATA(out), | |
1309 CudaNdarray_HOST_STRIDES(out)[0], //strides | |
1310 CudaNdarray_HOST_STRIDES(out)[1], | |
1311 CudaNdarray_HOST_STRIDES(out)[2], | |
1312 CudaNdarray_DEV_DATA(self), | |
1313 CudaNdarray_HOST_DIMS(self)[0], //For indices check | |
1314 CudaNdarray_HOST_STRIDES(self)[0], //strides | |
1315 CudaNdarray_HOST_STRIDES(self)[1], | |
1316 CudaNdarray_HOST_STRIDES(self)[2], | |
1317 err_var); | |
1318 } | |
1319 break; | |
1320 default: | |
1321 PyErr_SetString(PyExc_NotImplementedError, | |
1322 "CudaNdarray_TakeFrom: only input with 1, 2 or 3" | |
1323 " dimensions are currently supported"); | |
1324 | |
1325 } | |
1326 free(dims); | |
1327 CNDA_THREAD_SYNC; | |
1328 cudaError_t err = cudaGetLastError(); | |
1329 if (cudaSuccess != err) { | |
1330 PyErr_Format(PyExc_RuntimeError, | |
1331 "Cuda error: %s: %s.\n", | |
1332 "CudaNdarray_TakeFrom", | |
1333 cudaGetErrorString(err)); | |
1334 Py_DECREF(indices); | |
1335 Py_DECREF(out); | |
1336 return NULL; | |
1337 } | |
1338 | |
1339 int index_err = check_err_var(); | |
1340 Py_DECREF(indices); | |
1341 if (index_err != 0) { | |
1342 Py_DECREF(out); | |
1343 return NULL; | |
1344 } | |
1345 | |
1346 if (verbose) printf("TAKE SUCCEDED\n"); | |
1347 return (PyObject *)out; | |
1348 } | |
1349 | |
1350 | |
1351 PyObject * CudaNdarray_SetStride(CudaNdarray * self, PyObject *args) | |
1352 { | |
1353 int pos, stride; | |
1354 if (! PyArg_ParseTuple(args, "ii", &pos, &stride)) | |
1355 return NULL; | |
1356 if ((pos < 0) || (pos >= self->nd)) | |
1357 { | |
1358 PyErr_Format(PyExc_ValueError, "position argument out of legal range [0, %i)", self->nd); | |
1359 return NULL; | |
1360 } | |
1361 CudaNdarray_set_stride(self, pos, stride); | |
1362 if (cnda_copy_structure_to_device(self)) | |
1363 { | |
1364 return NULL; | |
1365 } | |
1366 Py_INCREF(Py_None); | |
1367 return Py_None; | |
1368 } | |
1369 PyObject * CudaNdarray_SetShapeI(CudaNdarray * self, PyObject *args) | |
1370 { | |
1371 int pos, dim; | |
1372 if (! PyArg_ParseTuple(args, "ii", &pos, &dim)) | |
1373 return NULL; | |
1374 if ((pos < 0) || (pos >= self->nd)) | |
1375 { | |
1376 PyErr_Format(PyExc_ValueError, "position argument out of legal range [0, %i)", self->nd); | |
1377 return NULL; | |
1378 } | |
1379 CudaNdarray_set_dim(self, pos, dim); | |
1380 if (cnda_copy_structure_to_device(self)) | |
1381 { | |
1382 return NULL; | |
1383 } | |
1384 Py_INCREF(Py_None); | |
1385 return Py_None; | |
1386 } | |
1387 | |
1388 static PyObject * | |
1389 CudaNdarray_exp(CudaNdarray* self) | |
1390 { | |
1391 CudaNdarray * rval = (CudaNdarray *)CudaNdarray_New(); | |
1392 if ((NULL == rval) || CudaNdarray_alloc_contiguous(rval, self->nd, CudaNdarray_HOST_DIMS(self))) | |
1393 { | |
1394 Py_XDECREF(rval); | |
1395 return NULL; | |
1396 } | |
1397 unsigned int size = 1; | |
1398 for (int i = 0; i < self->nd; i++) | |
1399 { | |
1400 size *= (unsigned int) CudaNdarray_HOST_DIMS(self)[i]; | |
1401 } | |
1402 unsigned int threads_per_block = std::min(size, (unsigned int)NUM_VECTOR_OP_THREADS_PER_BLOCK); | |
1403 unsigned int n_blocks = std::min(ceil_intdiv(size,threads_per_block), (unsigned int)NUM_VECTOR_OP_BLOCKS); | |
1404 k_elemwise_unary_rowmajor_exp<<<n_blocks,threads_per_block>>>(size, self->nd, CudaNdarray_DEV_DIMS(self), | |
1405 CudaNdarray_DEV_DATA(self), CudaNdarray_DEV_STRIDES(self), | |
1406 CudaNdarray_DEV_DATA(rval), CudaNdarray_DEV_STRIDES(rval)); | |
1407 | |
1408 //TODO: don't do this right away, do it when we need the result | |
1409 CNDA_THREAD_SYNC; | |
1410 cudaError_t err = cudaGetLastError(); | |
1411 if( cudaSuccess != err) | |
1412 { | |
1413 Py_DECREF(rval); | |
1414 PyErr_Format(PyExc_RuntimeError, "Cuda error: %s: %s.\n", "kExp", cudaGetErrorString(err)); | |
1415 return NULL; | |
1416 } | |
1417 | |
1418 return (PyObject*)rval; | |
1419 } | |
1420 | |
1421 static PyMethodDef CudaNdarray_methods[] = | |
1422 { | |
1423 {"__array__", | |
1424 (PyCFunction)CudaNdarray_CreateArrayObj, METH_VARARGS, | |
1425 "Copy from the device to a numpy ndarray"}, | |
1426 {"__copy__", | |
1427 (PyCFunction)CudaNdarray_View, METH_NOARGS, | |
1428 "Create a shallow copy of this object. used by module copy"}, | |
1429 {"__deepcopy__", | |
1430 (PyCFunction)CudaNdarray_DeepCopy, METH_O, | |
1431 "Create a copy of this object"}, | |
1432 {"zeros", | |
1433 (PyCFunction)CudaNdarray_Zeros, METH_STATIC | METH_O, | |
1434 "Create a new CudaNdarray with specified shape, filled with zeros."}, | |
1435 {"copy", | |
1436 (PyCFunction)CudaNdarray_Copy, METH_NOARGS, | |
1437 "Create a copy of this object"}, | |
1438 {"is_c_contiguous", | |
1439 (PyCFunction)CudaNdarray_IS_C_Contiguous, METH_NOARGS, | |
1440 "Return True is the object is c contiguous. False otherwise."}, | |
1441 {"reduce_sum", | |
1442 (PyCFunction)CudaNdarray_ReduceSum, METH_O, | |
1443 "Reduce over the given dimensions by summation"}, | |
1444 {"exp", | |
1445 (PyCFunction)CudaNdarray_exp, METH_NOARGS, | |
1446 "Return the exponential of all elements"}, | |
1447 {"reshape", | |
1448 (PyCFunction)CudaNdarray_Reshape, METH_O, | |
1449 "Return a reshaped view (or copy) of this ndarray\n\ | |
1450 The required argument is a tuple of integers specifying the shape of the new ndarray."}, | |
1451 {"view", | |
1452 (PyCFunction)CudaNdarray_View, METH_NOARGS, | |
1453 "Return an alias of this ndarray"}, | |
1454 {"_set_stride", | |
1455 (PyCFunction)CudaNdarray_SetStride, METH_VARARGS, | |
1456 "For integer arguments (i, s), set the 'i'th stride to 's'"}, | |
1457 {"take", | |
1458 (PyCFunction)CudaNdarray_TakeFrom, METH_VARARGS, | |
1459 "Equivalent of numpy.take"}, | |
1460 {"_set_shape_i", | |
1461 (PyCFunction)CudaNdarray_SetShapeI, METH_VARARGS, | |
1462 "For integer arguments (i, s), set the 'i'th shape to 's'"}, | |
1463 {NULL, NULL, NULL, NULL} /* Sentinel */ | |
1464 }; | |
1465 | |
1466 | |
1467 //////////////////// | |
1468 // Number protocol | |
1469 //////////////////// | |
1470 | |
1471 __global__ void kAdd_contiguous(float* a, float* b, float* dest, unsigned int numEls) { | |
1472 const unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x; | |
1473 const unsigned int numThreads = blockDim.x * gridDim.x; | |
1474 | |
1475 for (unsigned int i = idx; i < numEls; i += numThreads) { | |
1476 dest[i] = a[i] + b[i]; | |
1477 } | |
1478 } | |
1479 | |
1480 // Will be called by __add__ in Python | |
1481 static PyObject * | |
1482 CudaNdarray_add(PyObject* py_self, PyObject * py_other) | |
1483 { | |
1484 if (! CudaNdarray_Check(py_self)) { | |
1485 PyErr_SetString(PyExc_TypeError, "need a CudaNdarray on left"); | |
1486 return NULL; | |
1487 } | |
1488 if (! CudaNdarray_Check(py_other)) { | |
1489 PyErr_SetString(PyExc_TypeError, "need a CudaNdarray on right"); | |
1490 return NULL; | |
1491 } | |
1492 CudaNdarray * self = (CudaNdarray *)py_self; | |
1493 CudaNdarray * other = (CudaNdarray *)py_other; | |
1494 if(!CudaNdarray_is_c_contiguous(self) || !CudaNdarray_is_c_contiguous(other)){ | |
1495 PyErr_SetString(PyExc_TypeError, "We have implementet only the c_contiguous version for now."); | |
1496 return NULL; | |
1497 } | |
1498 | |
1499 //standard elemwise size checks | |
1500 if (self->nd != other->nd) | |
1501 { | |
1502 PyErr_SetString(PyExc_TypeError, "CudaNdarray_add: need same number of dims"); | |
1503 return NULL; | |
1504 } | |
1505 //standard elemwise dim checks | |
1506 unsigned int size = 1; | |
1507 for (int i = 0; i< self->nd; ++i) | |
1508 { | |
1509 if (CudaNdarray_HOST_DIMS(self)[i] != CudaNdarray_HOST_DIMS(other)[i]) | |
1510 { | |
1511 PyErr_SetString(PyExc_TypeError, "need same dimensions"); | |
1512 return NULL; | |
1513 } | |
1514 size *= (unsigned int) CudaNdarray_HOST_DIMS(self)[i]; | |
1515 } | |
1516 CudaNdarray * rval = (CudaNdarray *)CudaNdarray_New(); | |
1517 if (!rval || CudaNdarray_alloc_contiguous(rval, self->nd, CudaNdarray_HOST_DIMS(self))) | |
1518 { | |
1519 Py_XDECREF(rval); | |
1520 return NULL; | |
1521 } | |
1522 | |
1523 if(CudaNdarray_SIZE((CudaNdarray *)py_self)==0 && CudaNdarray_SIZE((CudaNdarray *)py_other)==0){ | |
1524 return (PyObject *) rval; | |
1525 } | |
1526 | |
1527 int threads_per_block = std::min(size, (unsigned int)NUM_VECTOR_OP_THREADS_PER_BLOCK); | |
1528 int n_blocks = std::min(ceil_intdiv(size,(unsigned int)threads_per_block), (unsigned int)NUM_VECTOR_OP_BLOCKS); | |
1529 kAdd_contiguous<<<n_blocks,threads_per_block>>>( | |
1530 self->devdata, other->devdata, rval->devdata, size); | |
1531 CNDA_THREAD_SYNC; | |
1532 cudaError_t err = cudaGetLastError(); | |
1533 if( cudaSuccess != err) | |
1534 { | |
1535 PyErr_Format(PyExc_RuntimeError, "Cuda error: %s: %s.\n", "kAdd", cudaGetErrorString(err)); | |
1536 Py_DECREF(rval); | |
1537 return NULL; | |
1538 } | |
1539 return (PyObject *) rval; | |
1540 } | |
1541 | |
1542 template <int operator_num> | |
1543 __global__ void k_ielem_3(const int d0, const int d1, const int d2, | |
1544 float* a, const int sA0, const int sA1, const int sA2, | |
1545 const float* b, const int sB0, const int sB1, const int sB2){ | |
1546 for (int i0 = blockIdx.x; i0 < d0; i0 += gridDim.x){ | |
1547 for (int i1 = blockIdx.y; i1 < d1; i1 += gridDim.y){ | |
1548 for (int i2 = threadIdx.x; i2 < d2; i2 += blockDim.x){ | |
1549 switch (operator_num) | |
1550 { | |
1551 case IADD: | |
1552 a[i0*sA0 + i1*sA1 + i2*sA2] += b[i0*sB0 + i1*sB1 + i2*sB2]; | |
1553 break; | |
1554 case IDIV: | |
1555 a[i0*sA0 + i1*sA1 + i2*sA2] /= b[i0*sB0 + i1*sB1 + i2*sB2]; | |
1556 break; | |
1557 case CPY: | |
1558 a[i0*sA0 + i1*sA1 + i2*sA2] = b[i0*sB0 + i1*sB1 + i2*sB2]; | |
1559 break; | |
1560 } | |
1561 } | |
1562 } | |
1563 } | |
1564 } | |
1565 | |
1566 template <int operator_num> | |
1567 __global__ void k_ielem_4(const int d0, const int d1, const int d2, const int d3, | |
1568 float* a, const int sA0, const int sA1, | |
1569 const int sA2, const int sA3, | |
1570 const float* b, const int sB0, const int sB1, | |
1571 const int sB2, const int sB3){ | |
1572 for (int i0 = blockIdx.x; i0 < d0; i0 += gridDim.x){ | |
1573 for (int i1 = blockIdx.y; i1 < d1; i1 += gridDim.y){ | |
1574 for (int i2 = threadIdx.x; i2 < d2; i2 += blockDim.x){ | |
1575 for (int i3 = threadIdx.y; i3 < d3; i3 += blockDim.y){ | |
1576 switch (operator_num) { | |
1577 case IADD: | |
1578 a[i0*sA0 + i1*sA1 + i2*sA2 + i3*sA3] | |
1579 += b[i0*sB0 + i1*sB1 + i2*sB2 + i3*sB3]; | |
1580 break; | |
1581 case IDIV: | |
1582 a[i0*sA0 + i1*sA1 + i2*sA2 + i3*sA3] | |
1583 /= b[i0*sB0 + i1*sB1 + i2*sB2 + i3*sB3]; | |
1584 break; | |
1585 case CPY: | |
1586 a[i0*sA0 + i1*sA1 + i2*sA2 + i3*sA3] | |
1587 = b[i0*sB0 + i1*sB1 + i2*sB2 + i3*sB3]; | |
1588 break; | |
1589 } | |
1590 } | |
1591 } | |
1592 } | |
1593 } | |
1594 } | |
1595 | |
1596 template <int operator_num> | |
1597 __global__ void k_ielem_6(const int d0, const int d1, | |
1598 const int d2, const int d3, | |
1599 const int d4, const int d5, | |
1600 float* a, const int sA0, const int sA1, | |
1601 const int sA2, const int sA3, | |
1602 const int sA4, const int sA5, | |
1603 const float* b, const int sB0, const int sB1, | |
1604 const int sB2, const int sB3, | |
1605 const int sB4, const int sB5 | |
1606 ){ | |
1607 for (int i0 = blockIdx.x; i0 < d0; i0 += gridDim.x){ | |
1608 for (int i1 = blockIdx.y; i1 < d1; i1 += gridDim.y){ | |
1609 for (int i2 = blockIdx.z; i2 < d2; i2 += gridDim.z){ | |
1610 for (int i3 = threadIdx.x; i3 < d3; i3 += blockDim.x){ | |
1611 for (int i4 = threadIdx.y; i4 < d4; i4 += blockDim.y){ | |
1612 for (int i5 = threadIdx.z; i5 < d5; i5 += blockDim.z){ | |
1613 switch (operator_num) { | |
1614 case IADD: | |
1615 a[i0*sA0 + i1*sA1 + i2*sA2 + i3*sA3 + i4*sA4 + i5*sA5] | |
1616 += b[i0*sB0 + i1*sB1 + i2*sB2 + i3*sB3 + i4*sB4 + i5*sB5]; | |
1617 break; | |
1618 case IDIV: | |
1619 a[i0*sA0 + i1*sA1 + i2*sA2 + i3*sA3 + i4*sA4 + i5*sA5] | |
1620 /= b[i0*sB0 + i1*sB1 + i2*sB2 + i3*sB3 + i4*sB4 + i5*sB5]; | |
1621 break; | |
1622 case CPY: | |
1623 a[i0*sA0 + i1*sA1 + i2*sA2 + i3*sA3 + i4*sA4 + i5*sA5] | |
1624 = b[i0*sB0 + i1*sB1 + i2*sB2 + i3*sB3 + i4*sB4 + i5*sB5]; | |
1625 break; | |
1626 } | |
1627 } | |
1628 } | |
1629 } | |
1630 } | |
1631 } | |
1632 } | |
1633 } | |
1634 | |
1635 /* | |
1636 CudaNdarray_inplace_elemwise | |
1637 Compute elemwise, working inplace on A. | |
1638 Currently implemented A / B, A + B and A = B | |
1639 (the last is not tested and not used!) | |
1640 | |
1641 py_self - the CudaNdarray that we'll modify (A) | |
1642 py_other - the other argument (B) | |
1643 fct_nb - which operation to perform (operator_t) | |
1644 | |
1645 Returns 0 on success. | |
1646 Returns -1 on failure, and sets Python exception. | |
1647 | |
1648 */ | |
1649 int | |
1650 CudaNdarray_inplace_elemwise(PyObject* py_self, PyObject * py_other, operator_t fct_nb) | |
1651 { | |
1652 int verbose = 0; | |
1653 void (*k3)(const int, const int, const int, | |
1654 float*, const int, const int, const int, | |
1655 const float*, const int, const int, const int); | |
1656 void (*k4)(const int, const int, const int, const int, | |
1657 float*, const int, const int, | |
1658 const int, const int, | |
1659 const float*, const int, const int, | |
1660 const int, const int); | |
1661 void (*k6)(const int, const int, | |
1662 const int, const int, | |
1663 const int, const int, | |
1664 float*, const int, const int, | |
1665 const int, const int, | |
1666 const int, const int, | |
1667 const float*, const int, const int, | |
1668 const int, const int, | |
1669 const int, const int); | |
1670 switch (fct_nb) | |
1671 { | |
1672 case IADD: | |
1673 k3 = k_ielem_3<IADD>; | |
1674 k4 = k_ielem_4<IADD>; | |
1675 k6 = k_ielem_6<IADD>; | |
1676 break; | |
1677 case IDIV: | |
1678 k3 = k_ielem_3<IDIV>; | |
1679 k4 = k_ielem_4<IDIV>; | |
1680 k6 = k_ielem_6<IDIV>; | |
1681 break; | |
1682 case CPY: | |
1683 k3 = k_ielem_3<CPY>; | |
1684 k4 = k_ielem_4<CPY>; | |
1685 k6 = k_ielem_6<CPY>; | |
1686 break; | |
1687 default: | |
1688 assert (0); | |
1689 PyErr_Format( | |
1690 PyExc_TypeError, | |
1691 "CudaNdarray_inplace_elemwise invalid fct_nb (%i).", | |
1692 (int)fct_nb); | |
1693 return -1; | |
1694 } | |
1695 if (!CudaNdarray_Check(py_self)) { | |
1696 PyErr_SetString( | |
1697 PyExc_TypeError, | |
1698 "CudaNdarray_inplace_elemwise need a CudaNdarray on left"); | |
1699 return -1; | |
1700 } | |
1701 CudaNdarray * new_other = NULL; | |
1702 if (!CudaNdarray_Check(py_other)) { | |
1703 new_other = (CudaNdarray*) CudaNdarray_New(); | |
1704 if(!new_other) | |
1705 { | |
1706 return -1; | |
1707 } | |
1708 if(CudaNdarray_CopyFromArray(new_other, (PyArrayObject *) py_other)) | |
1709 { | |
1710 Py_XDECREF(new_other); | |
1711 return -1; | |
1712 } | |
1713 py_other = (PyObject *) new_other; | |
1714 } | |
1715 | |
1716 CudaNdarray * self = (CudaNdarray *)py_self; | |
1717 CudaNdarray * other = (CudaNdarray *)py_other; | |
1718 | |
1719 if (verbose) | |
1720 { | |
1721 fprintf(stderr, | |
1722 "INPLACE ADD/DIV for self->nd=%d other->nd=%d\n", | |
1723 self->nd, other->nd); | |
1724 } | |
1725 | |
1726 //standard elemwise nb dim checks | |
1727 if (self->nd < other->nd) | |
1728 { | |
1729 PyErr_Format( | |
1730 PyExc_TypeError, | |
1731 "CudaNdarray_inplace_elemwise: The destination need more or the" | |
1732 " same number of dimensions then the source. Got %d and %d.", | |
1733 self->nd, other->nd); | |
1734 Py_XDECREF(new_other); | |
1735 return -1; | |
1736 } | |
1737 | |
1738 //broadcast to the same number of dimensions. | |
1739 int* other_dims = (int*) alloca(self->nd * sizeof(int)); | |
1740 int* other_strides = (int*) alloca(self->nd * sizeof(int)); | |
1741 int added_dims = self->nd - other->nd; | |
1742 // Add the added broadcasted dimensions | |
1743 for (int i = 0; i< added_dims; ++i) | |
1744 { | |
1745 other_dims[i] = 1; | |
1746 other_strides[i] = 0; | |
1747 } | |
1748 // Copy the existing dimensions | |
1749 for (int i = 0; i< other->nd; ++i) | |
1750 { | |
1751 other_dims[i+added_dims] = CudaNdarray_HOST_DIMS(other)[i]; | |
1752 other_strides[i+added_dims] = CudaNdarray_HOST_STRIDES(other)[i]; | |
1753 } | |
1754 | |
1755 //standard elemwise dim checks | |
1756 unsigned int size = 1; | |
1757 for (int i = 0; i< self->nd; ++i) | |
1758 { | |
1759 if ((CudaNdarray_HOST_DIMS(self)[i] != other_dims[i]) | |
1760 && (other_dims[i] != 1)) | |
1761 { | |
1762 PyErr_SetString( | |
1763 PyExc_ValueError, | |
1764 "CudaNdarray_inplace_elemwise need same dimensions (or broadcastable dimension)"); | |
1765 Py_XDECREF(new_other); | |
1766 return -1; | |
1767 } | |
1768 // if we're broadcasting other, then make sure it has stride 0 | |
1769 assert ((CudaNdarray_HOST_DIMS(self)[i] == other_dims[i]) | |
1770 || (other_strides[i] == 0)); | |
1771 size *= (unsigned int) CudaNdarray_HOST_DIMS(self)[i]; | |
1772 } | |
1773 | |
1774 if (size==0) | |
1775 { | |
1776 int other_size = CudaNdarray_SIZE((CudaNdarray *)py_other); | |
1777 if (!(other_size == 0 || other_size == 1)) | |
1778 { | |
1779 PyErr_SetString( | |
1780 PyExc_ValueError, | |
1781 "CudaNdarray_inplace_elemwise cannot work inplace on" | |
1782 " un-initialized array when the new value have more than" | |
1783 " 0 or 1 broadcastable dimensions"); | |
1784 Py_XDECREF(new_other); | |
1785 return 0; | |
1786 } | |
1787 Py_XDECREF(new_other); | |
1788 return 0; | |
1789 } | |
1790 | |
1791 switch(self->nd) | |
1792 { | |
1793 case 0: | |
1794 { | |
1795 dim3 n_blocks(1, 1, 1); | |
1796 dim3 n_threads(1); | |
1797 k3<<<n_blocks, n_threads>>>( | |
1798 1, //d0 | |
1799 1, //d1 | |
1800 1, //d2 | |
1801 CudaNdarray_DEV_DATA(self), | |
1802 1, //strides | |
1803 1, | |
1804 1, | |
1805 CudaNdarray_DEV_DATA(other), | |
1806 1, //strides | |
1807 1, | |
1808 1); | |
1809 CNDA_THREAD_SYNC; | |
1810 cudaError_t err = cudaGetLastError(); | |
1811 if (cudaSuccess != err) | |
1812 { | |
1813 PyErr_Format( | |
1814 PyExc_RuntimeError, | |
1815 "CudaNdarray_inplace_elemwise case0: Cuda error: %s: %s.\n", | |
1816 "k3", | |
1817 cudaGetErrorString(err)); | |
1818 Py_XDECREF(new_other); | |
1819 return -1; | |
1820 } | |
1821 } | |
1822 break; | |
1823 case 1: | |
1824 { | |
1825 dim3 n_blocks(1, 1, 1); | |
1826 dim3 n_threads( | |
1827 std::min( | |
1828 CudaNdarray_HOST_DIMS(self)[0], | |
1829 NUM_VECTOR_OP_THREADS_PER_BLOCK)); | |
1830 k3<<<n_blocks, n_threads>>>( | |
1831 1, //dimensions | |
1832 1, | |
1833 CudaNdarray_HOST_DIMS(self)[0], | |
1834 CudaNdarray_DEV_DATA(self), | |
1835 1, //strides | |
1836 1, | |
1837 CudaNdarray_HOST_STRIDES(self)[0], | |
1838 CudaNdarray_DEV_DATA(other), | |
1839 1, //strides | |
1840 1, | |
1841 other_strides[0]); | |
1842 CNDA_THREAD_SYNC; | |
1843 cudaError_t err = cudaGetLastError(); | |
1844 if (cudaSuccess != err) | |
1845 { | |
1846 PyErr_Format( | |
1847 PyExc_RuntimeError, | |
1848 "CudaNdarray_inplace_elemwise case1: Cuda error: %s: %s.\n", | |
1849 "k3", | |
1850 cudaGetErrorString(err)); | |
1851 Py_XDECREF(new_other); | |
1852 return -1; | |
1853 } | |
1854 } | |
1855 break; | |
1856 case 2: | |
1857 { | |
1858 //TODO: if both self and other are f-contiguous | |
1859 // Then flip the block and thread dimensions | |
1860 // to make contiguous reads & writes | |
1861 dim3 n_blocks(1, | |
1862 std::min( | |
1863 CudaNdarray_HOST_DIMS(self)[0], | |
1864 NUM_VECTOR_OP_BLOCKS)); | |
1865 dim3 n_threads( | |
1866 std::min( | |
1867 CudaNdarray_HOST_DIMS(self)[1], | |
1868 NUM_VECTOR_OP_THREADS_PER_BLOCK)); | |
1869 k3<<<n_blocks, n_threads>>>(1, | |
1870 CudaNdarray_HOST_DIMS(self)[0], | |
1871 CudaNdarray_HOST_DIMS(self)[1], | |
1872 CudaNdarray_DEV_DATA(self), | |
1873 1, | |
1874 CudaNdarray_HOST_STRIDES(self)[0], | |
1875 CudaNdarray_HOST_STRIDES(self)[1], | |
1876 CudaNdarray_DEV_DATA(other), | |
1877 1, | |
1878 other_strides[0], | |
1879 other_strides[1]); | |
1880 CNDA_THREAD_SYNC; | |
1881 cudaError_t err = cudaGetLastError(); | |
1882 if (cudaSuccess != err) | |
1883 { | |
1884 PyErr_Format( | |
1885 PyExc_RuntimeError, | |
1886 "CudaNdarray_inplace_elemwise case2: Cuda error: %s: %s.\n", | |
1887 "k3", | |
1888 cudaGetErrorString(err)); | |
1889 Py_XDECREF(new_other); | |
1890 return -1; | |
1891 } | |
1892 } | |
1893 break; | |
1894 case 3: | |
1895 { | |
1896 //TODO: Dimshuffle so that at least one of the arrays | |
1897 // has a contiguous dimension on the thread idx. | |
1898 dim3 n_blocks( | |
1899 std::min( | |
1900 CudaNdarray_HOST_DIMS(self)[0], | |
1901 NUM_VECTOR_OP_BLOCKS), | |
1902 CudaNdarray_HOST_DIMS(self)[1]); | |
1903 while (n_blocks.x * n_blocks.y > NUM_VECTOR_OP_BLOCKS) | |
1904 n_blocks.y /= 2; | |
1905 dim3 n_threads( | |
1906 std::min( | |
1907 CudaNdarray_HOST_DIMS(self)[2], | |
1908 NUM_VECTOR_OP_THREADS_PER_BLOCK)); | |
1909 k3<<<n_blocks, n_threads>>>( | |
1910 CudaNdarray_HOST_DIMS(self)[0], | |
1911 CudaNdarray_HOST_DIMS(self)[1], | |
1912 CudaNdarray_HOST_DIMS(self)[2], | |
1913 CudaNdarray_DEV_DATA(self), | |
1914 CudaNdarray_HOST_STRIDES(self)[0], | |
1915 CudaNdarray_HOST_STRIDES(self)[1], | |
1916 CudaNdarray_HOST_STRIDES(self)[2], | |
1917 CudaNdarray_DEV_DATA(other), | |
1918 other_strides[0], | |
1919 other_strides[1], | |
1920 other_strides[2]); | |
1921 CNDA_THREAD_SYNC; | |
1922 cudaError_t err = cudaGetLastError(); | |
1923 if (cudaSuccess != err) | |
1924 { | |
1925 PyErr_Format( | |
1926 PyExc_RuntimeError, | |
1927 "CudaNdarray_inplace_elemwise case3: Cuda error: %s: %s.\n", | |
1928 "k3", | |
1929 cudaGetErrorString(err)); | |
1930 Py_XDECREF(new_other); | |
1931 return -1; | |
1932 } | |
1933 } | |
1934 break; | |
1935 case 4: | |
1936 { | |
1937 dim3 n_blocks( | |
1938 std::min( | |
1939 CudaNdarray_HOST_DIMS(self)[0], | |
1940 NUM_VECTOR_OP_BLOCKS), | |
1941 CudaNdarray_HOST_DIMS(self)[1] | |
1942 ); | |
1943 while (n_blocks.x * n_blocks.y > NUM_VECTOR_OP_BLOCKS) | |
1944 n_blocks.y /= 2; | |
1945 dim3 n_threads( | |
1946 std::min( | |
1947 CudaNdarray_HOST_DIMS(self)[2], | |
1948 NUM_VECTOR_OP_THREADS_PER_BLOCK) | |
1949 //TODO: DON"T YOU NEED OT PUT DIMS[3] in here??? | |
1950 ); | |
1951 k4<<<n_blocks, n_threads>>>( | |
1952 CudaNdarray_HOST_DIMS(self)[0], | |
1953 CudaNdarray_HOST_DIMS(self)[1], | |
1954 CudaNdarray_HOST_DIMS(self)[2], | |
1955 CudaNdarray_HOST_DIMS(self)[3], | |
1956 CudaNdarray_DEV_DATA(self), | |
1957 CudaNdarray_HOST_STRIDES(self)[0], | |
1958 CudaNdarray_HOST_STRIDES(self)[1], | |
1959 CudaNdarray_HOST_STRIDES(self)[2], | |
1960 CudaNdarray_HOST_STRIDES(self)[3], | |
1961 CudaNdarray_DEV_DATA(other), | |
1962 other_strides[0], | |
1963 other_strides[1], | |
1964 other_strides[2], | |
1965 other_strides[3]); | |
1966 CNDA_THREAD_SYNC; | |
1967 cudaError_t err = cudaGetLastError(); | |
1968 if (cudaSuccess != err) | |
1969 { | |
1970 PyErr_Format( | |
1971 PyExc_RuntimeError, | |
1972 "CudaNdarray_inplace_elemwise case4: Cuda error: %s: %s.\n", | |
1973 "k4", | |
1974 cudaGetErrorString(err)); | |
1975 Py_XDECREF(new_other); | |
1976 return -1; | |
1977 } | |
1978 } | |
1979 break; | |
1980 case 5: | |
1981 { | |
1982 dim3 n_blocks( | |
1983 std::min( | |
1984 CudaNdarray_HOST_DIMS(self)[1], | |
1985 NUM_VECTOR_OP_BLOCKS), | |
1986 CudaNdarray_HOST_DIMS(self)[2]); | |
1987 while (n_blocks.x * n_blocks.y > NUM_VECTOR_OP_BLOCKS) | |
1988 n_blocks.y /= 2; | |
1989 dim3 n_threads( | |
1990 std::min( | |
1991 CudaNdarray_HOST_DIMS(self)[3], | |
1992 NUM_VECTOR_OP_THREADS_PER_BLOCK) | |
1993 //TODO: DON"T YOU NEED OT PUT DIMS[3] in here??? | |
1994 ); | |
1995 for (int i = 0; i < CudaNdarray_HOST_DIMS(self)[0]; ++i) | |
1996 { | |
1997 k4<<<n_blocks, n_threads>>>( | |
1998 CudaNdarray_HOST_DIMS(self)[1], | |
1999 CudaNdarray_HOST_DIMS(self)[2], | |
2000 CudaNdarray_HOST_DIMS(self)[3], | |
2001 CudaNdarray_HOST_DIMS(self)[4], | |
2002 CudaNdarray_DEV_DATA(self) + i * CudaNdarray_HOST_STRIDES(self)[0], | |
2003 CudaNdarray_HOST_STRIDES(self)[1], | |
2004 CudaNdarray_HOST_STRIDES(self)[2], | |
2005 CudaNdarray_HOST_STRIDES(self)[3], | |
2006 CudaNdarray_HOST_STRIDES(self)[4], | |
2007 CudaNdarray_DEV_DATA(other) + i * other_strides[0], | |
2008 other_strides[1], | |
2009 other_strides[2], | |
2010 other_strides[3], | |
2011 other_strides[4]); | |
2012 CNDA_THREAD_SYNC; | |
2013 cudaError_t err = cudaGetLastError(); | |
2014 if( cudaSuccess != err) | |
2015 { | |
2016 PyErr_Format( | |
2017 PyExc_RuntimeError, | |
2018 "CudaNdarray_inplace_elemwise case5: Cuda error: %s: %s. n_block=(%ld,%ld) n_threads=%ld\n", | |
2019 "k5 with loop over k4", | |
2020 cudaGetErrorString(err), | |
2021 (long) n_blocks.x, (long) n_blocks.y, (long) n_threads.x); | |
2022 Py_XDECREF(new_other); | |
2023 return -1; | |
2024 } | |
2025 } | |
2026 } | |
2027 break; | |
2028 case 6: | |
2029 { | |
2030 dim3 n_blocks( | |
2031 std::min( | |
2032 CudaNdarray_HOST_DIMS(self)[0], | |
2033 NUM_VECTOR_OP_BLOCKS), | |
2034 CudaNdarray_HOST_DIMS(self)[1], | |
2035 CudaNdarray_HOST_DIMS(self)[2] | |
2036 ); | |
2037 while (n_blocks.x * n_blocks.y > NUM_VECTOR_OP_BLOCKS) | |
2038 n_blocks.y /= 2; | |
2039 // GTX285(compute capabilities 1.3) don't support n_blocks.z > 1 | |
2040 // (compute capabilities 2.0) support 65535 for n_blocks.z | |
2041 //while (n_blocks.x * n_blocks.y * n_blocks.z > NUM_VECTOR_OP_BLOCKS) | |
2042 // n_blocks.z /= 2; | |
2043 n_blocks.z = 1; | |
2044 dim3 n_threads( | |
2045 std::min( | |
2046 CudaNdarray_HOST_DIMS(self)[3], | |
2047 NUM_VECTOR_OP_THREADS_PER_BLOCK) | |
2048 //TODO: DON'T YOU NEED TO PUT DIMS[4] in here??? | |
2049 //TODO: DON'T YOU NEED TO PUT DIMS[5] in here??? | |
2050 ); | |
2051 k6<<<n_blocks, n_threads>>>( | |
2052 CudaNdarray_HOST_DIMS(self)[0], | |
2053 CudaNdarray_HOST_DIMS(self)[1], | |
2054 CudaNdarray_HOST_DIMS(self)[2], | |
2055 CudaNdarray_HOST_DIMS(self)[3], | |
2056 CudaNdarray_HOST_DIMS(self)[4], | |
2057 CudaNdarray_HOST_DIMS(self)[5], | |
2058 CudaNdarray_DEV_DATA(self), | |
2059 CudaNdarray_HOST_STRIDES(self)[0], | |
2060 CudaNdarray_HOST_STRIDES(self)[1], | |
2061 CudaNdarray_HOST_STRIDES(self)[2], | |
2062 CudaNdarray_HOST_STRIDES(self)[3], | |
2063 CudaNdarray_HOST_STRIDES(self)[4], | |
2064 CudaNdarray_HOST_STRIDES(self)[5], | |
2065 CudaNdarray_DEV_DATA(other), | |
2066 other_strides[0], | |
2067 other_strides[1], | |
2068 other_strides[2], | |
2069 other_strides[3], | |
2070 other_strides[4], | |
2071 other_strides[5]); | |
2072 CNDA_THREAD_SYNC; | |
2073 cudaError_t err = cudaGetLastError(); | |
2074 if (cudaSuccess != err) | |
2075 { | |
2076 PyErr_Format( | |
2077 PyExc_RuntimeError, | |
2078 "CudaNdarray_inplace_elemwise case6: Cuda error: %s: %s. n_blocks=(%ld, %ld, %ld) n_threads=(%ld)\n", | |
2079 "k6", | |
2080 cudaGetErrorString(err), | |
2081 (long) n_blocks.x, (long) n_blocks.y, (long) n_blocks.z, | |
2082 (long) n_threads.x); | |
2083 Py_XDECREF(new_other); | |
2084 return -1; | |
2085 } | |
2086 } | |
2087 break; | |
2088 default: | |
2089 { | |
2090 PyErr_Format( | |
2091 PyExc_NotImplementedError, | |
2092 "inplace_elemwise w nd=%i\n", | |
2093 self->nd); | |
2094 Py_XDECREF(new_other); | |
2095 return -1; | |
2096 } | |
2097 } | |
2098 if (verbose) | |
2099 fprintf(stderr, "INPLACE ADD/DIV end\n"); | |
2100 Py_XDECREF(new_other); | |
2101 return 0; | |
2102 } | |
2103 | |
2104 /* | |
2105 * We need this inplace Add to support IncSubTensor | |
2106 * It returns py_self on success with an additional reference. Else NULL. | |
2107 */ | |
2108 // Will be called by __iadd__ in Python | |
2109 PyObject * | |
2110 CudaNdarray_inplace_add(PyObject* py_self, PyObject * py_other) | |
2111 { | |
2112 if (CudaNdarray_inplace_elemwise(py_self, py_other, IADD)) | |
2113 { | |
2114 return NULL; | |
2115 } | |
2116 Py_INCREF(py_self); | |
2117 return py_self; | |
2118 } | |
2119 | |
2120 /* | |
2121 * We need this inplace div for cuda/tests/test_basic_ops.py:test_shared_options | |
2122 * It returns py_self on success with an additional reference. Else NULL. | |
2123 */ | |
2124 // Will be called by __idiv__ in Python | |
2125 static PyObject * | |
2126 CudaNdarray_inplace_div(PyObject* py_self, PyObject * py_other) | |
2127 { | |
2128 if (CudaNdarray_inplace_elemwise(py_self, py_other, IDIV)) | |
2129 { | |
2130 return NULL; | |
2131 } | |
2132 Py_INCREF(py_self); | |
2133 return py_self; | |
2134 } | |
2135 | |
2136 // The PyNumberMethods struct layout changed in a non-trivial way from 2 to 3. | |
2137 #if PY_MAJOR_VERSION == 3 | |
2138 static PyNumberMethods CudaNdarrayNumberMethods = | |
2139 { | |
2140 (binaryfunc)CudaNdarray_add, //binaryfunc nb_add; __add__ | |
2141 0, //binaryfunc nb_subtract; | |
2142 0, //binaryfunc nb_multiply; | |
2143 0, //binaryfunc nb_remainder; | |
2144 0, //binaryfunc nb_divmod; | |
2145 0, //ternaryfunc nb_power; | |
2146 0, //unaryfunc nb_negative; | |
2147 0, //unaryfunc nb_positive; | |
2148 0, //unaryfunc nb_absolute; | |
2149 0, //inquiry nb_bool; | |
2150 0, //unaryfunc nb_invert; | |
2151 0, //binaryfunc nb_lshift; | |
2152 0, //binaryfunc nb_rshift; | |
2153 0, //binaryfunc nb_and; | |
2154 0, //binaryfunc nb_xor; | |
2155 0, //binaryfunc nb_or; | |
2156 0, //unaryfunc nb_int; | |
2157 0, //void *nb_reserved; | |
2158 0, //unaryfunc nb_float; | |
2159 | |
2160 (binaryfunc)CudaNdarray_inplace_add, //binaryfunc nb_inplace_add; __iadd__ | |
2161 0, //binaryfunc nb_inplace_subtract; | |
2162 0, //binaryfunc nb_inplace_multiply; | |
2163 0, //binaryfunc nb_inplace_remainder; | |
2164 0, //ternaryfunc nb_inplace_power; | |
2165 0, //binaryfunc nb_inplace_lshift; | |
2166 0, //binaryfunc nb_inplace_rshift; | |
2167 0, //binaryfunc nb_inplace_and; | |
2168 0, //binaryfunc nb_inplace_xor; | |
2169 0, //binaryfunc nb_inplace_or; | |
2170 | |
2171 0, //binaryfunc nb_floor_divide; | |
2172 0, //binaryfunc nb_true_divide; | |
2173 0, //binaryfunc nb_inplace_floor_divide; | |
2174 (binaryfunc)CudaNdarray_inplace_div, //binaryfunc nb_inplace_true_divide; __idiv__ | |
2175 | |
2176 0, //unaryfunc nb_index | |
2177 }; | |
2178 #else | |
2179 static PyNumberMethods CudaNdarrayNumberMethods = | |
2180 { | |
2181 (binaryfunc)CudaNdarray_add, //binaryfunc nb_add; __add__ | |
2182 0, //binaryfunc nb_subtract; __sub__ | |
2183 0, //binaryfunc nb_multiply; __mul__ | |
2184 0, //binaryfunc nb_divide; __div__ | |
2185 0, //binaryfunc nb_remainder; __mod__ | |
2186 0, //binaryfunc nb_divmod; __divmod__ | |
2187 0, //ternaryfunc nb_power; __pow__ | |
2188 0, //unaryfunc nb_negative; __neg__ | |
2189 0, //unaryfunc nb_positive; __pos__ | |
2190 0, //unaryfunc nb_absolute; __abs__ | |
2191 0, //inquiry nb_nonzero; __nonzero__ /* Used by PyObject_IsTrue */ | |
2192 0, //unaryfunc nb_invert; __invert__ | |
2193 0, //binaryfunc nb_lshift; __lshift__ | |
2194 0, //binaryfunc nb_rshift; __rshift__ | |
2195 0, //binaryfunc nb_and; __and__ | |
2196 0, //binaryfunc nb_xor; __xor__ | |
2197 0, //binaryfunc nb_or; __or__ | |
2198 0, //coercion nb_coerce; __coerce__ /* Used by the coerce() function */ | |
2199 0, //unaryfunc nb_int; __int__ | |
2200 0, //unaryfunc nb_long; __long__ | |
2201 0, //unaryfunc nb_float; __float__ | |
2202 0, //unaryfunc nb_oct; __oct__ | |
2203 0, //unaryfunc nb_hex; __hex__ | |
2204 | |
2205 /* Added in release 2.0 */ | |
2206 (binaryfunc)CudaNdarray_inplace_add, //binaryfunc nb_inplace_add; __iadd__ | |
2207 0, //binaryfunc nb_inplace_subtract; __isub__ | |
2208 0, //binaryfunc nb_inplace_multiply; __imul__ | |
2209 (binaryfunc)CudaNdarray_inplace_div, //binaryfunc nb_inplace_divide; __idiv__ | |
2210 0, //binaryfunc nb_inplace_remainder; __imod__ | |
2211 0, //ternaryfunc nb_inplace_power; __ipow__ | |
2212 0, //binaryfunc nb_inplace_lshift; __ilshift__ | |
2213 0, //binaryfunc nb_inplace_rshift; __irshift__ | |
2214 0, //binaryfunc nb_inplace_and; __iand__ | |
2215 0, //binaryfunc nb_inplace_xor; __ixor__ | |
2216 0, //binaryfunc nb_inplace_or; __ior__ | |
2217 | |
2218 /* Added in release 2.2 */ | |
2219 0, //binaryfunc nb_floor_divide; __floordiv__ | |
2220 0, //binaryfunc nb_true_divide; __truediv__ | |
2221 0, //binaryfunc nb_inplace_floor_divide; __ifloordiv__ | |
2222 0, //binaryfunc nb_inplace_true_divide; __itruediv__ | |
2223 | |
2224 #if PY_MINOR_VERSION > 4 | |
2225 /* Added in release 2.5 */ | |
2226 0 //unaryfunc nb_index; __index__ | |
2227 #endif | |
2228 }; | |
2229 #endif | |
2230 | |
2231 | |
2232 ///////////////////// | |
2233 // Mapping protocol | |
2234 ///////////////////// | |
2235 | |
2236 // Will by called by __len__ in Python | |
2237 static Py_ssize_t | |
2238 CudaNdarray_len(PyObject * py_self) | |
2239 { | |
2240 CudaNdarray * self = (CudaNdarray*) py_self; | |
2241 if (self->nd <= 0) | |
2242 { | |
2243 return (Py_ssize_t) 0; | |
2244 } | |
2245 else | |
2246 { | |
2247 return (Py_ssize_t) CudaNdarray_HOST_DIMS(self)[0]; | |
2248 } | |
2249 } | |
2250 | |
2251 // Will by called by __getitem__ in Python | |
2252 PyObject * | |
2253 CudaNdarray_Subscript(PyObject * py_self, PyObject * key) | |
2254 { | |
2255 int verbose = 0; | |
2256 if (verbose) fprintf(stderr, "Subscript .... \n"); | |
2257 CudaNdarray * self = (CudaNdarray*) py_self; | |
2258 PyObject * py_rval = NULL; | |
2259 CudaNdarray * rval = NULL; | |
2260 PyObject * intobj = NULL; | |
2261 | |
2262 //PyObject_Print(key, stderr, 0); | |
2263 | |
2264 if (key == Py_Ellipsis) | |
2265 { | |
2266 Py_INCREF(py_self); | |
2267 return py_self; | |
2268 } | |
2269 if ((intobj=PyNumber_Int(key))) //INDEXING BY INTEGER | |
2270 //else if (PyInt_Check(key)) //INDEXING BY INTEGER | |
2271 { | |
2272 int d_idx = PyInt_AsLong(intobj); | |
2273 Py_DECREF(intobj); intobj=NULL; | |
2274 //int d_idx = PyInt_AsLong(key); | |
2275 if (self->nd == 0) | |
2276 { | |
2277 PyErr_SetString(PyExc_IndexError, "0-d arrays can't be indexed"); | |
2278 return NULL; | |
2279 } | |
2280 int d_dim = CudaNdarray_HOST_DIMS(self)[0]; | |
2281 int offset = 0; | |
2282 | |
2283 if ((d_idx >= 0) && (d_idx < d_dim)) | |
2284 { | |
2285 //normal indexing | |
2286 offset += d_idx * CudaNdarray_HOST_STRIDES(self)[0]; | |
2287 } | |
2288 else if ((d_idx < 0) && (d_idx >= -d_dim)) | |
2289 { | |
2290 //end-based indexing | |
2291 // d_idx is negative | |
2292 offset += (d_dim + d_idx) * CudaNdarray_HOST_STRIDES(self)[0]; | |
2293 } | |
2294 else | |
2295 { | |
2296 PyErr_Format(PyExc_IndexError, | |
2297 "index out of bounds. Asked %d, but size of %d", | |
2298 d_idx, d_dim); | |
2299 return NULL; | |
2300 } | |
2301 | |
2302 //allocate our subtensor view | |
2303 py_rval = CudaNdarray_new_nd(self->nd - 1); | |
2304 rval = (CudaNdarray*) py_rval; | |
2305 if (!rval) return NULL; | |
2306 assert (0 == rval->data_allocated); | |
2307 | |
2308 //initialize the view's data pointer to our own. | |
2309 if (CudaNdarray_set_device_data(rval, CudaNdarray_DEV_DATA(self) + offset, self)) | |
2310 { | |
2311 Py_DECREF(rval); | |
2312 return NULL; | |
2313 } | |
2314 for (int d = 1; d < self->nd; ++d) | |
2315 { | |
2316 CudaNdarray_set_stride(rval, d-1, CudaNdarray_HOST_STRIDES(self)[d]); | |
2317 CudaNdarray_set_dim(rval, d-1, CudaNdarray_HOST_DIMS(self)[d]); | |
2318 } | |
2319 } | |
2320 else | |
2321 { | |
2322 PyErr_Clear(); | |
2323 } | |
2324 if (PySlice_Check(key)) //INDEXING BY SLICE | |
2325 { | |
2326 if (verbose) fprintf(stderr, "by slice\n"); | |
2327 if (self->nd == 0) | |
2328 { | |
2329 PyErr_SetString(PyExc_ValueError, "cannot slice a 0-d array"); | |
2330 return NULL; | |
2331 } | |
2332 | |
2333 int d_dim = CudaNdarray_HOST_DIMS(self)[0]; | |
2334 Py_ssize_t start, stop, step, slen; | |
2335 if (PySlice_GetIndicesEx(SLICE_CAST(key), d_dim, &start, &stop, &step, &slen)) | |
2336 { | |
2337 if (verbose) | |
2338 fprintf(stderr, "PySlice_GetIndicesEx failed\n"); | |
2339 return NULL; | |
2340 } | |
2341 if (verbose) | |
2342 { | |
2343 std::cerr << "start " << start << "\n"; | |
2344 std::cerr << "stop " << stop << "\n"; | |
2345 std::cerr << "step " << step << "\n"; | |
2346 std::cerr << "slen " << slen << "\n"; | |
2347 } | |
2348 | |
2349 //allocate our subtensor view | |
2350 py_rval = CudaNdarray_new_nd(self->nd); | |
2351 rval = (CudaNdarray*) py_rval; | |
2352 if (!rval) return NULL; | |
2353 assert (0 == rval->data_allocated); | |
2354 | |
2355 | |
2356 //initialize the view's data pointer to our own. | |
2357 if (CudaNdarray_set_device_data(rval, | |
2358 CudaNdarray_DEV_DATA(self) + start * CudaNdarray_HOST_STRIDES(self)[0], | |
2359 self)) | |
2360 { | |
2361 Py_DECREF(rval); | |
2362 return NULL; | |
2363 } | |
2364 //initialize dimension 0 of rval | |
2365 CudaNdarray_set_stride(rval, 0, | |
2366 (slen == 1) ? 0 : step * CudaNdarray_HOST_STRIDES(self)[0]); | |
2367 CudaNdarray_set_dim(rval, 0, slen); | |
2368 if (verbose) std::cerr << "rval stride " << CudaNdarray_HOST_STRIDES(rval)[0] << "\n"; | |
2369 // initialize dimensions > 0 of rval | |
2370 for (int d = 1; d < self->nd; ++d) | |
2371 { | |
2372 CudaNdarray_set_stride(rval, d, CudaNdarray_HOST_STRIDES(self)[d]); | |
2373 CudaNdarray_set_dim(rval, d, CudaNdarray_HOST_DIMS(self)[d]); | |
2374 } | |
2375 } | |
2376 if (PyTuple_Check(key)) //INDEXING BY TUPLE | |
2377 { | |
2378 if (verbose) fprintf(stderr, "by tuple\n"); | |
2379 //elements of the tuple can be either integers or slices | |
2380 //the dimensionality of the view we will return is diminished for each slice in the tuple | |
2381 | |
2382 if (PyTuple_Size(key) > self->nd) | |
2383 { | |
2384 PyErr_SetString(PyExc_IndexError, "index error"); | |
2385 return NULL; | |
2386 } | |
2387 | |
2388 //calculate the number of dimensions in the return value | |
2389 int rval_nd = self->nd; | |
2390 for (int d = 0; d < PyTuple_Size(key); ++d) | |
2391 { | |
2392 //On some paltform PyInt_Check(<type 'numpy.int64'>) return true, other it return false. | |
2393 //So we use PyArray_IsAnyScalar that should covert everything. | |
2394 rval_nd -= PyArray_IsAnyScalar(PyTuple_GetItem(key, d)); | |
2395 } | |
2396 | |
2397 //allocate our subtensor view | |
2398 py_rval = CudaNdarray_new_nd(rval_nd); | |
2399 rval = (CudaNdarray*) py_rval; | |
2400 if (!rval) return NULL; | |
2401 assert (0 == rval->data_allocated); | |
2402 | |
2403 //initialize the view's data pointer to our own. | |
2404 if (CudaNdarray_set_device_data(rval, CudaNdarray_DEV_DATA(self), self)) | |
2405 { | |
2406 Py_DECREF(rval); | |
2407 return NULL; | |
2408 } | |
2409 | |
2410 // rval_d will refer to the current dimension in the rval. | |
2411 // It will not be incremented for integer keys, but will be incremented for slice | |
2412 // keys | |
2413 int rval_d = 0; | |
2414 | |
2415 for (int d = 0; d < self->nd; ++d) | |
2416 { | |
2417 // keys can be shorter than self->nd. | |
2418 // when that happens, it means that the remaining dimensions are "full slices" | |
2419 if (d >=PyTuple_Size(key)) | |
2420 { | |
2421 CudaNdarray_set_stride(rval, rval_d, CudaNdarray_HOST_STRIDES(self)[d]); | |
2422 CudaNdarray_set_dim(rval, rval_d, CudaNdarray_HOST_DIMS(self)[d]); | |
2423 ++rval_d; | |
2424 } | |
2425 else | |
2426 { | |
2427 PyObject * key_d = PyTuple_GetItem(key, d); | |
2428 | |
2429 if (PySlice_Check(key_d)) | |
2430 { | |
2431 Py_ssize_t start, stop, step, slen; | |
2432 if (PySlice_GetIndicesEx(SLICE_CAST(key_d), CudaNdarray_HOST_DIMS(self)[d], &start, &stop, &step, &slen)) | |
2433 { | |
2434 Py_DECREF(rval); | |
2435 return NULL; | |
2436 } | |
2437 rval->devdata += start * CudaNdarray_HOST_STRIDES(self)[d]; | |
2438 CudaNdarray_set_stride(rval, rval_d, | |
2439 (slen == 1) ? 0 : step * CudaNdarray_HOST_STRIDES(self)[d]); | |
2440 CudaNdarray_set_dim(rval, rval_d, slen); | |
2441 if (0) | |
2442 { | |
2443 std::cerr << "start " << start << "\n"; | |
2444 std::cerr << "stop " << stop << "\n"; | |
2445 std::cerr << "step " << step << "\n"; | |
2446 std::cerr << "slen " << slen << "\n"; | |
2447 } | |
2448 ++rval_d; | |
2449 } | |
2450 else if ((intobj=PyNumber_Int(key_d))) | |
2451 { | |
2452 assert(PyArray_IsAnyScalar(key_d)); | |
2453 int d_idx = PyInt_AsLong(intobj); | |
2454 Py_DECREF(intobj); | |
2455 intobj = NULL; | |
2456 int d_dim = CudaNdarray_HOST_DIMS(self)[d]; | |
2457 | |
2458 if ((d_idx >= 0) && (d_idx < d_dim)) | |
2459 { | |
2460 //normal indexing | |
2461 rval->devdata += d_idx * CudaNdarray_HOST_STRIDES(self)[d]; | |
2462 } | |
2463 else if ((d_idx < 0) && (d_idx >= -d_dim)) | |
2464 { | |
2465 //end-based indexing | |
2466 rval->devdata += (d_dim + d_idx) * CudaNdarray_HOST_STRIDES(self)[d]; | |
2467 } | |
2468 else | |
2469 { | |
2470 PyErr_Format(PyExc_IndexError, | |
2471 "index out of bounds. Asked %d for dimensions %d, but size of %d", | |
2472 d_idx, d, d_dim); | |
2473 Py_DECREF(rval); | |
2474 return NULL; | |
2475 } | |
2476 } | |
2477 else | |
2478 { | |
2479 PyErr_Clear(); // clear the error set by PyNumber_Int | |
2480 PyErr_SetString(PyExc_IndexError, "index must be either int or slice"); | |
2481 Py_DECREF(rval); | |
2482 return NULL; | |
2483 } | |
2484 } | |
2485 } | |
2486 } | |
2487 if (py_rval) | |
2488 { | |
2489 if (verbose) fprint_CudaNdarray(stderr, self); | |
2490 if (verbose) fprint_CudaNdarray(stderr, rval); | |
2491 } | |
2492 else | |
2493 { | |
2494 PyErr_SetString(PyExc_NotImplementedError, "Unknown key type"); | |
2495 return NULL; | |
2496 } | |
2497 return py_rval; | |
2498 } | |
2499 | |
2500 // Will by called by __setitem__ in Python | |
2501 // See http://docs.python.org/dev/py3k/c-api/object.html#PyObject_SetItem | |
2502 // Doesn't handle broadcasting, e.g. a[:] = 5 | |
2503 // Can only be assigned from a CudaNdarray on the right side | |
2504 // Or a ndarray | |
2505 // Or a python scalar with value 0 when the left side part is c contiguous. | |
2506 static int | |
2507 CudaNdarray_setitem(PyObject *o, PyObject *key, PyObject *value) | |
2508 { | |
2509 int verbose = 0; | |
2510 if (verbose) fprintf(stderr, "CudaNdarray_setitem start\n"); | |
2511 // We try to copy directly into this CudaNdarray from the ndarray | |
2512 CudaNdarray* rval = (CudaNdarray*)CudaNdarray_Subscript(o, key); | |
2513 CudaNdarray* new_value = NULL; | |
2514 | |
2515 if(!rval){ | |
2516 // CudaNdarray_Subscript failed and set the error msg. | |
2517 Py_XDECREF(rval); | |
2518 return -1; | |
2519 } | |
2520 | |
2521 if(rval != (CudaNdarray*)o && | |
2522 (rval->data_allocated || | |
2523 // The new array should have a base | |
2524 !(((CudaNdarray*)rval)->base) || | |
2525 // If the original array has no base, the base of the new | |
2526 // array should be the original one | |
2527 (!((CudaNdarray*)o)->base && ((CudaNdarray*)rval)->base != o) || | |
2528 // Else, the two arrays should have the same base | |
2529 (((CudaNdarray*)o)->base && ((CudaNdarray*)rval)->base != ((CudaNdarray*)o)->base))) | |
2530 { | |
2531 // This case shouldn't happen, based on what I see in Subscript | |
2532 // but just in case it happens sometime in the future | |
2533 | |
2534 PyErr_Format(PyExc_RuntimeError, | |
2535 "__getitem__ must return a CudaNdarray that refers to" | |
2536 " the original CudaNdarray, not a copy. rval.base=%p" | |
2537 " o.base=%p o=%p", | |
2538 (((CudaNdarray*)rval)->base), ((CudaNdarray*)o)->base, o); | |
2539 Py_DECREF(rval); | |
2540 return -1; | |
2541 } | |
2542 | |
2543 PyObject * intobj = NULL; | |
2544 if (CudaNdarray_Check(o) && PyArray_Check(value)){ | |
2545 if (verbose) | |
2546 fprintf(stderr, | |
2547 "CudaNdarray_setitem dest is a CudaNdarray and" | |
2548 " value is a ndarray\n"); | |
2549 new_value = (CudaNdarray*) CudaNdarray_New(); | |
2550 if(!new_value) | |
2551 { | |
2552 return -1; | |
2553 } | |
2554 if (CudaNdarray_CopyFromArray(new_value, (PyArrayObject *) value)) | |
2555 { | |
2556 Py_XDECREF(new_value); | |
2557 Py_XDECREF(rval); | |
2558 return -1; | |
2559 } | |
2560 value = (PyObject *) new_value; | |
2561 } | |
2562 else if ((intobj=PyNumber_Int(value))) | |
2563 { | |
2564 if (verbose) | |
2565 fprintf(stderr, | |
2566 "CudaNdarray_setitem dest and value is a python number\n"); | |
2567 if(! CudaNdarray_is_c_contiguous(rval)){ | |
2568 PyErr_SetString(PyExc_NotImplementedError, | |
2569 "CudaNdarray.__setitem__: When the new value is a scalar" | |
2570 " of value 0 the part where we copy to must be c contiguous."); | |
2571 Py_XDECREF(rval); | |
2572 return -1; | |
2573 } | |
2574 | |
2575 long val = PyInt_AsLong(intobj); | |
2576 Py_DECREF(intobj); intobj=NULL; | |
2577 if (val == 0) | |
2578 { | |
2579 cudaError_t err = cudaMemset(rval->devdata, 0, | |
2580 CudaNdarray_SIZE(rval) * sizeof(real)); | |
2581 Py_XDECREF(rval); | |
2582 if (err) | |
2583 { | |
2584 // Clear the error flag, cudaMemset doesn't do it. | |
2585 // Currently this returns the same thing as err, but if in future | |
2586 // it returns something else I still don't see why we should ignore | |
2587 // it. All we want to do here is reset the flag. | |
2588 cudaGetLastError(); | |
2589 PyErr_SetString(PyExc_RuntimeError, | |
2590 "CudaNdarray.__setitem__: cudaMemset failed"); | |
2591 return -1; | |
2592 } | |
2593 return 0; | |
2594 } else { | |
2595 Py_XDECREF(rval); | |
2596 PyErr_SetString(PyExc_NotImplementedError, | |
2597 "CudaNdarray.__setitem__: we support setting only python" | |
2598 " scalar of value 0, numpy nd array and CudaNdarray."); | |
2599 return -1; | |
2600 } | |
2601 } | |
2602 | |
2603 PyErr_Clear(); // clear PyNumber_Int error. | |
2604 | |
2605 if(!CudaNdarray_Check(o) || !CudaNdarray_Check(value)) | |
2606 { | |
2607 PyErr_SetString(PyExc_TypeError, | |
2608 "CudaNdarray.__setitem__: left must be a CudaNdarrays and right" | |
2609 " must be a CudaNdarrays, an ndarray or a python scalar of value 0."); | |
2610 Py_XDECREF(new_value); | |
2611 return -1; | |
2612 } | |
2613 | |
2614 if (verbose) | |
2615 fprintf(stderr, "CudaNdarray_setitem dest and value are CudaNdarray\n"); | |
2616 | |
2617 if (cnda_copy_structure_to_device(rval)) | |
2618 { | |
2619 PyErr_SetString(PyExc_RuntimeError, | |
2620 "CudaNdarray.__setitem__: syncing structure to device failed"); | |
2621 Py_DECREF(rval); | |
2622 Py_XDECREF(new_value); | |
2623 | |
2624 if (verbose) | |
2625 fprintf(stderr, "CudaNdarray_setitem error end\n"); | |
2626 return -1; | |
2627 } | |
2628 | |
2629 PyObject *baseSavedForComparison = rval->base; | |
2630 | |
2631 if (CudaNdarray_CopyFromCudaNdarray(rval, (CudaNdarray*)value, true)) | |
2632 { | |
2633 Py_DECREF((PyObject*)rval); | |
2634 Py_XDECREF(new_value); | |
2635 | |
2636 if (verbose) | |
2637 fprintf(stderr, "CudaNdarray_setitem error end\n"); | |
2638 return -1; | |
2639 } | |
2640 | |
2641 assert (rval->base == baseSavedForComparison); | |
2642 assert (rval->dev_structure_fresh); | |
2643 | |
2644 // Clean up locally-created references | |
2645 Py_DECREF(rval); | |
2646 Py_XDECREF(new_value); | |
2647 | |
2648 return 0; | |
2649 } | |
2650 | |
2651 | |
2652 PyMappingMethods CudaNdarrayMappingMethods = { | |
2653 CudaNdarray_len, //lenfunc mp_length; __len__ | |
2654 CudaNdarray_Subscript, //binaryfunc mp_subscript; __getitem__ | |
2655 CudaNdarray_setitem //objobjargproc mp_ass_subscript; __setitem__ | |
2656 }; | |
2657 | |
2658 //////////////////// | |
2659 // | |
2660 //////////////////// | |
2661 | |
2662 static PyObject * | |
2663 CudaNdarray_get_shape(CudaNdarray *self, void *closure) | |
2664 { | |
2665 if (self->nd < 0) | |
2666 { | |
2667 PyErr_SetString(PyExc_ValueError, "CudaNdarray not initialized"); | |
2668 return NULL; | |
2669 } | |
2670 PyObject * rval = PyTuple_New(self->nd); | |
2671 for (int i = 0; i < self->nd; ++i) | |
2672 { | |
2673 if (!rval || PyTuple_SetItem(rval, i, PyInt_FromLong(CudaNdarray_HOST_DIMS(self)[i]))) | |
2674 { | |
2675 Py_XDECREF(rval); | |
2676 return NULL; | |
2677 } | |
2678 | |
2679 } | |
2680 return rval; | |
2681 } | |
2682 | |
2683 static int | |
2684 CudaNdarray_set_shape(CudaNdarray *self, PyObject *value, void *closure) | |
2685 { | |
2686 PyErr_SetString(PyExc_NotImplementedError, "TODO: call reshape"); | |
2687 return -1; | |
2688 } | |
2689 | |
2690 static PyObject * | |
2691 CudaNdarray_get_strides(CudaNdarray *self, void *closure) | |
2692 { | |
2693 if (self->nd < 0) | |
2694 { | |
2695 PyErr_SetString(PyExc_ValueError, "CudaNdarray not initialized"); | |
2696 return NULL; | |
2697 } | |
2698 PyObject * rval = PyTuple_New(self->nd); | |
2699 for (int i = 0; i < self->nd; ++i) | |
2700 { | |
2701 if (!rval || PyTuple_SetItem(rval, i, PyInt_FromLong(CudaNdarray_HOST_STRIDES(self)[i]))) | |
2702 { | |
2703 Py_XDECREF(rval); | |
2704 return NULL; | |
2705 } | |
2706 | |
2707 } | |
2708 return rval; | |
2709 } | |
2710 | |
2711 static int | |
2712 CudaNdarray_set_strides(CudaNdarray *self, PyObject *value, void *closure) | |
2713 { | |
2714 //npy_intp newstrides_bytes[PyTuple_Size(value)]; | |
2715 if (PyTuple_Check(value)){ | |
2716 if (PyTuple_Size(value) != CudaNdarray_NDIM(self)){ | |
2717 PyErr_SetString(PyExc_ValueError, | |
2718 "The new strides tuple must have the same length" | |
2719 " as the number of dimensions"); | |
2720 return -1; | |
2721 } | |
2722 }else if (PyList_Check(value)){ | |
2723 if (PyList_Size(value) != CudaNdarray_NDIM(self)){ | |
2724 PyErr_SetString(PyExc_ValueError, | |
2725 "The new strides list must have the same length" | |
2726 " as the number of dimensions"); | |
2727 return -1; | |
2728 } | |
2729 }else{ | |
2730 PyErr_SetString(PyExc_ValueError, | |
2731 "The new strides need to be encoded in a tuple or list"); | |
2732 return -1; | |
2733 } | |
2734 npy_intp* newstrides = (npy_intp*) alloca(CudaNdarray_NDIM(self) * sizeof(npy_intp)); | |
2735 if (PyTuple_Check(value)){ | |
2736 for(int i=0; i < CudaNdarray_NDIM(self); i++){ | |
2737 newstrides[i] = PyInt_AsLong(PyTuple_GetItem(value, Py_ssize_t(i))); | |
2738 //newstrides_bytes[i] = newstrides[i] * 4; | |
2739 } | |
2740 }else if (PyList_Check(value)){ | |
2741 for(int i=0; i < CudaNdarray_NDIM(self); i++){ | |
2742 newstrides[i] = PyInt_AsLong(PyList_GetItem(value, Py_ssize_t(i))); | |
2743 //newstrides_bytes[i] = newstrides[i] * 4; | |
2744 } | |
2745 } | |
2746 /* | |
2747 // Do not do this check, as ExtractDiag needs that, and NumPy does not seem | |
2748 // to do it. | |
2749 npy_intp dims[PyTuple_Size(value)]; | |
2750 for(int i=0; i < CudaNdarray_NDIM(self); i++){ | |
2751 dims[i] = CudaNdarray_HOST_DIMS(self)[i]; | |
2752 } | |
2753 if (!PyArray_CheckStrides(4, | |
2754 CudaNdarray_NDIM(self), | |
2755 0, 0, | |
2756 dims, | |
2757 newstrides_bytes)){ | |
2758 PyErr_SetString(PyExc_ValueError, "bad new strides"); | |
2759 return -1; | |
2760 } | |
2761 */ | |
2762 for(int i=0; i < CudaNdarray_NDIM(self); i++){ | |
2763 CudaNdarray_set_stride(self, i, newstrides[i]); | |
2764 } | |
2765 return 0; | |
2766 } | |
2767 | |
2768 static PyObject * | |
2769 CudaNdarray_get_dev_data(CudaNdarray *self, void *closure) | |
2770 { | |
2771 float * p = CudaNdarray_DEV_DATA(self); | |
2772 //printf("get_dev_data %p %li \n", p, (long int)p ); | |
2773 return PyInt_FromSize_t((size_t) CudaNdarray_DEV_DATA(self)); | |
2774 } | |
2775 | |
2776 static int | |
2777 CudaNdarray_set_dev_data(CudaNdarray *self, PyObject *value, void *closure) | |
2778 { | |
2779 Py_ssize_t newdevdata = PyInt_AsSsize_t(value); | |
2780 //printf("set_dev_data %p %li \n",(float*)newdevdata ,newdevdata); | |
2781 if (PyErr_Occurred()) | |
2782 { | |
2783 return -1; | |
2784 } | |
2785 return CudaNdarray_set_device_data(self, (float*)newdevdata, (CudaNdarray*)self->base); | |
2786 } | |
2787 | |
2788 static PyObject * | |
2789 CudaNdarray_get_dtype(CudaNdarray *self, void *closure) | |
2790 { | |
2791 return PyString_FromString("float32"); | |
2792 } | |
2793 | |
2794 static PyObject * | |
2795 CudaNdarray_get_ndim(CudaNdarray *self, void *closure) | |
2796 { | |
2797 return PyInt_FromLong(self->nd); | |
2798 } | |
2799 | |
2800 static PyObject * | |
2801 CudaNdarray_get_base(CudaNdarray *self, void *closure) | |
2802 { | |
2803 PyObject * base = self->base; | |
2804 if (!base) | |
2805 { | |
2806 // We cannot return a NULL pointer, use None instead | |
2807 base = Py_None; | |
2808 } | |
2809 Py_INCREF(base); | |
2810 return base; | |
2811 } | |
2812 | |
2813 void put_in_dict(PyObject * dict, const char * key, int val) | |
2814 { | |
2815 PyObject * k = PyString_FromString(key); | |
2816 PyObject * v = PyInt_FromLong(val); | |
2817 PyDict_SetItem(dict, k, v); | |
2818 Py_DECREF(k); | |
2819 Py_DECREF(v); | |
2820 } | |
2821 | |
2822 PyObject * | |
2823 GetDeviceProperties(PyObject* _unused, PyObject* args) | |
2824 { | |
2825 int dev_id = -1; | |
2826 if (! PyArg_ParseTuple(args, "i", &dev_id)) | |
2827 return NULL; | |
2828 cudaDeviceProp deviceProp; | |
2829 cudaGetDeviceProperties(&deviceProp, dev_id); | |
2830 | |
2831 PyObject * dict = PyDict_New(); | |
2832 PyObject * str= PyString_FromString("name"); | |
2833 PyObject * i = PyString_FromString(deviceProp.name); | |
2834 PyDict_SetItem(dict, str, i); | |
2835 Py_DECREF(str); | |
2836 Py_DECREF(i); | |
2837 | |
2838 put_in_dict(dict, "major", deviceProp.major); | |
2839 put_in_dict(dict, "minor", deviceProp.minor); | |
2840 #if CUDART_VERSION >= 2020 | |
2841 int driverVersion = 0, runtimeVersion = 0; | |
2842 cudaDriverGetVersion(&driverVersion); | |
2843 cudaRuntimeGetVersion(&runtimeVersion); | |
2844 put_in_dict(dict, "driverVersion", driverVersion); | |
2845 put_in_dict(dict, "runtimeVersion", runtimeVersion); | |
2846 #endif | |
2847 #if CUDART_VERSION >= 2000 | |
2848 | |
2849 put_in_dict(dict, "multiProcessorCount", deviceProp.multiProcessorCount); | |
2850 //if ConvertSMVer2Cores is not defined in cuda_runtime_api.h, the run time is too old. | |
2851 int sm_cores = -1; | |
2852 if(deviceProp.major==1) | |
2853 sm_cores = 32; | |
2854 else if(deviceProp.major==2 && deviceProp.minor==0) | |
2855 sm_cores = 32; | |
2856 else if(deviceProp.major==2 && deviceProp.minor==1) | |
2857 sm_cores = 48; | |
2858 put_in_dict(dict, "coresCount", sm_cores * deviceProp.multiProcessorCount); | |
2859 #endif | |
2860 put_in_dict(dict, "totalConstMem", deviceProp.totalConstMem); | |
2861 put_in_dict(dict, "sharedMemPerBlock", deviceProp.sharedMemPerBlock); | |
2862 put_in_dict(dict, "regsPerBlock", deviceProp.regsPerBlock); | |
2863 put_in_dict(dict, "warpSize", deviceProp.warpSize); | |
2864 put_in_dict(dict, "maxThreadsPerBlock", deviceProp.maxThreadsPerBlock); | |
2865 put_in_dict(dict, "maxThreadsDim0", deviceProp.maxThreadsDim[0]); | |
2866 put_in_dict(dict, "maxThreadsDim1", deviceProp.maxThreadsDim[1]); | |
2867 put_in_dict(dict, "maxThreadsDim2", deviceProp.maxThreadsDim[2]); | |
2868 put_in_dict(dict, "maxGridSize0", deviceProp.maxGridSize[0]); | |
2869 put_in_dict(dict, "maxGridSize1", deviceProp.maxGridSize[1]); | |
2870 put_in_dict(dict, "maxGridSize2", deviceProp.maxGridSize[2]); | |
2871 put_in_dict(dict, "memPitch", deviceProp.memPitch); | |
2872 put_in_dict(dict, "textureAlignment", deviceProp.textureAlignment); | |
2873 put_in_dict(dict, "clockRate", deviceProp.clockRate); | |
2874 #if CUDART_VERSION >= 2000 | |
2875 put_in_dict(dict, "deviceOverlap", deviceProp.deviceOverlap); | |
2876 #endif | |
2877 #if CUDART_VERSION >= 2020 | |
2878 put_in_dict(dict, "kernelExecTimeoutEnabled", deviceProp.kernelExecTimeoutEnabled); | |
2879 put_in_dict(dict, "integrated", deviceProp.integrated); | |
2880 put_in_dict(dict, "canMapHostMemory", deviceProp.canMapHostMemory); | |
2881 put_in_dict(dict, "computeMode", deviceProp.computeMode); | |
2882 //in the doc of this fct tell that 0 - Normal mode, 1 - only 1 context, 2 - no context | |
2883 #endif | |
2884 #if CUDART_VERSION >= 3000 | |
2885 put_in_dict(dict, "concurrentKernels", deviceProp.concurrentKernels); | |
2886 #endif | |
2887 #if CUDART_VERSION >= 3010 | |
2888 put_in_dict(dict, "ECCEnabled", deviceProp.ECCEnabled); | |
2889 #endif | |
2890 #if CUDART_VERSION >= 3020 | |
2891 put_in_dict(dict, "tccDriver", deviceProp.tccDriver); | |
2892 #endif | |
2893 | |
2894 return dict; | |
2895 } | |
2896 | |
2897 /* | |
2898 * Returns in *free and *total respectively, the free and total amount of memory available for allocation by the device in bytes. | |
2899 */ | |
2900 PyObject * | |
2901 GetDeviceMemInfo(PyObject* _unused, PyObject* dummy) | |
2902 { | |
2903 size_t free = 0, total = 0; | |
2904 if(g_gpu_context_active == 0){ | |
2905 PyErr_Format(PyExc_RuntimeError, "No gpu device selected yet. Please make sure the gpu device was initialized by Theano before."); | |
2906 return NULL; | |
2907 } | |
2908 | |
2909 cudaError_t err = cudaMemGetInfo(&free, &total); | |
2910 if (err != cudaSuccess){ | |
2911 // Clear the error flag, cudaMemGetInfo doesn't do it. | |
2912 // Currently this returns the same thing as err, but if in future | |
2913 // it returns something else I still don't see why we should ignore | |
2914 // it. All we want to do here is reset the flag. | |
2915 cudaGetLastError(); | |
2916 PyErr_Format(PyExc_RuntimeError, | |
2917 "Error while getting memory info about the gpu: %s", | |
2918 cudaGetErrorString(err)); | |
2919 return NULL; | |
2920 } | |
2921 return PyTuple_Pack(2, PyLong_FromLong(free), PyLong_FromLong(total)); | |
2922 } | |
2923 | |
2924 /* | |
2925 * Synchronize with all the gpu device stream. | |
2926 */ | |
2927 PyObject * | |
2928 CudaNdarray_synchronize(PyObject* _unused, PyObject* dummy) | |
2929 { | |
2930 CNDA_BEGIN_ALLOW_THREADS | |
2931 cudaThreadSynchronize(); | |
2932 CNDA_END_ALLOW_THREADS | |
2933 Py_INCREF(Py_None); | |
2934 return Py_None; | |
2935 } | |
2936 | |
2937 /* | |
2938 * Exist and return true if we link with cublas v2. | |
2939 */ | |
2940 PyObject * | |
2941 CudaNdarray_cublasv2(PyObject* _unused, PyObject* dummy) | |
2942 { | |
2943 Py_INCREF(Py_True); | |
2944 return Py_True; | |
2945 } | |
2946 | |
2947 PyObject * | |
2948 CudaNdarray_select_a_gpu(PyObject* _unused, PyObject* dummy) | |
2949 { | |
2950 void * rval = NULL; | |
2951 cudaError_t err; | |
2952 int num_gpus = 0; | |
2953 | |
2954 err = cudaGetDeviceCount(&num_gpus); | |
2955 if (cudaSuccess != err){ | |
2956 printf("ERR!\\n"); | |
2957 PyErr_Format(PyExc_RuntimeError, | |
2958 "Not able to get number of GPUs (%s).", | |
2959 cudaGetErrorString(err)); | |
2960 return NULL; | |
2961 } | |
2962 | |
2963 for (int device = 0; device < num_gpus; device++) { | |
2964 cudaSetDevice(device); | |
2965 err = cudaDeviceSynchronize(); // << CUDA context gets created here. | |
2966 cudaGetLastError(); // reset the error state | |
2967 if (cudaSuccess == err) | |
2968 break; | |
2969 } | |
2970 | |
2971 if (cudaSuccess != err){ | |
2972 printf("ERR!\\n"); | |
2973 PyErr_Format(PyExc_RuntimeError, | |
2974 "Not able to select available GPU from %d cards (%s).", | |
2975 num_gpus, cudaGetErrorString(err)); | |
2976 return NULL; | |
2977 } | |
2978 | |
2979 Py_INCREF(Py_None); | |
2980 return Py_None; | |
2981 } | |
2982 | |
2983 #if COMPUTE_GPU_MEM_USED | |
2984 /* | |
2985 * Return the size in bytes that Theano currently have allocated on the gpu. | |
2986 */ | |
2987 PyObject * | |
2988 GetTheanoAllocInfo(PyObject* _unused, PyObject* dummy) | |
2989 { | |
2990 PyObject* a = PyLong_FromLong(_allocated_size); | |
2991 PyObject* b = PyLong_FromLong(_max_allocated_size); | |
2992 | |
2993 PyObject* tuple = PyTuple_New(2); | |
2994 PyTuple_SetItem(tuple, 0, a); | |
2995 PyTuple_SetItem(tuple, 1, b); | |
2996 return tuple; | |
2997 } | |
2998 #endif | |
2999 | |
3000 static PyGetSetDef CudaNdarray_getset[] = { | |
3001 {"shape", | |
3002 (getter)CudaNdarray_get_shape, | |
3003 (setter)CudaNdarray_set_shape, | |
3004 "shape of this ndarray (tuple)", | |
3005 NULL}, | |
3006 {"_strides", | |
3007 (getter)CudaNdarray_get_strides, | |
3008 (setter)CudaNdarray_set_strides, | |
3009 "data pointer strides (in elements)", | |
3010 NULL}, | |
3011 {"strides", | |
3012 (getter)CudaNdarray_get_strides, | |
3013 (setter)CudaNdarray_set_strides, | |
3014 "data pointer strides (in elements)", | |
3015 NULL}, | |
3016 //gpudata is needed to allow calling pycuda fct with CudaNdarray input. | |
3017 {"gpudata", | |
3018 (getter)CudaNdarray_get_dev_data, | |
3019 NULL, | |
3020 "device data pointer", | |
3021 NULL}, | |
3022 {"_dev_data", | |
3023 (getter)CudaNdarray_get_dev_data, | |
3024 (setter)CudaNdarray_set_dev_data, | |
3025 "device data pointer", | |
3026 NULL}, | |
3027 {"dtype", | |
3028 (getter)CudaNdarray_get_dtype, | |
3029 NULL, | |
3030 "The dtype of the element. Now always float32", | |
3031 NULL}, | |
3032 {"size", | |
3033 (getter)CudaNdarray_SIZE_Object, | |
3034 NULL, | |
3035 "The number of elements in this object.", | |
3036 NULL}, | |
3037 //mem_size is neede for pycuda.elementwise.ElementwiseKernel Why do they use size and mem_size of the same value? | |
3038 {"mem_size", | |
3039 (getter)CudaNdarray_SIZE_Object, | |
3040 NULL, | |
3041 "The number of elements in this object.", | |
3042 NULL}, | |
3043 {"ndim", | |
3044 (getter)CudaNdarray_get_ndim, | |
3045 NULL, | |
3046 "The number of dimensions in this object.", | |
3047 NULL}, | |
3048 {"base", | |
3049 (getter)CudaNdarray_get_base, | |
3050 NULL, | |
3051 "If this ndarray is a view, base is the original ndarray.", | |
3052 NULL}, | |
3053 | |
3054 {NULL, NULL, NULL, NULL} /* Sentinel */ | |
3055 }; | |
3056 | |
3057 PyObject *CudaNdarray_repr(PyObject *self) | |
3058 { | |
3059 CudaNdarray *object = (CudaNdarray *)self; | |
3060 PyObject * np_object = CudaNdarray_CreateArrayObj(object); | |
3061 PyObject * str = PyObject_Str((PyObject *) np_object); | |
3062 char * cstr = PyString_AsString(str); | |
3063 PyObject * out = PyString_FromFormat("%s%s%s", | |
3064 "CudaNdarray(", | |
3065 cstr, | |
3066 ")"); | |
3067 Py_DECREF(str); | |
3068 Py_DECREF(np_object); | |
3069 #if PY_MAJOR_VERSION >= 3 | |
3070 // In Python 3 PyString_FromFormat return a Bytes object | |
3071 PyObject* out2 = PyObject_Str(out); | |
3072 Py_DECREF(out); | |
3073 return out2; | |
3074 #endif | |
3075 return out; | |
3076 } | |
3077 | |
3078 static PyTypeObject CudaNdarrayType = | |
3079 { | |
3080 #if PY_MAJOR_VERSION >= 3 | |
3081 PyVarObject_HEAD_INIT(NULL, 0) | |
3082 #else | |
3083 PyObject_HEAD_INIT(NULL) | |
3084 0, /*ob_size*/ | |
3085 #endif | |
3086 "CudaNdarray", /*tp_name*/ | |
3087 sizeof(CudaNdarray), /*tp_basicsize*/ | |
3088 0, /*tp_itemsize*/ | |
3089 (destructor)CudaNdarray_dealloc, /*tp_dealloc*/ | |
3090 0, /*tp_print*/ | |
3091 0, /*tp_getattr*/ | |
3092 0, /*tp_setattr*/ | |
3093 0, /*tp_compare*/ | |
3094 CudaNdarray_repr, /*tp_repr*/ | |
3095 &CudaNdarrayNumberMethods, /*tp_as_number*/ | |
3096 0, /*tp_as_sequence*/ | |
3097 &CudaNdarrayMappingMethods,/*tp_as_mapping*/ | |
3098 0, /*tp_hash */ | |
3099 0, /*tp_call*/ | |
3100 0, /*tp_str*/ | |
3101 0, /*tp_getattro*/ | |
3102 0, /*tp_setattro*/ | |
3103 0, /*tp_as_buffer*/ | |
3104 #if PY_MAJOR_VERSION >= 3 | |
3105 // Py_TPFLAGS_CHECKTYPES is always true and was removed in Python 3. | |
3106 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ | |
3107 #else | |
3108 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_CHECKTYPES, /*tp_flags*/ | |
3109 #endif | |
3110 "CudaNdarray objects", /* tp_doc */ | |
3111 0, /* tp_traverse */ | |
3112 0, /* tp_clear */ | |
3113 0, /* tp_richcompare */ | |
3114 0, /* tp_weaklistoffset */ | |
3115 0, /* tp_iter */ | |
3116 0, /* tp_iternext */ | |
3117 CudaNdarray_methods, /* tp_methods */ | |
3118 CudaNdarray_members, /* tp_members */ | |
3119 CudaNdarray_getset, /* tp_getset */ | |
3120 0, /* tp_base */ | |
3121 0, /* tp_dict */ | |
3122 0, /* tp_descr_get */ | |
3123 0, /* tp_descr_set */ | |
3124 0, /* tp_dictoffset */ | |
3125 (initproc)CudaNdarray_init,/* tp_init */ | |
3126 0, /* tp_alloc */ | |
3127 CudaNdarray_new, /* tp_new */ | |
3128 }; | |
3129 | |
3130 static __global__ void get_gpu_ptr_size(int* dst) | |
3131 { | |
3132 dst[0] = sizeof(float*); | |
3133 dst[1] = sizeof(int); | |
3134 } | |
3135 | |
3136 PyObject * | |
3137 CudaNdarray_ptr_int_size(PyObject* _unused, PyObject* args) | |
3138 { | |
3139 int *gpu_data = (int*)device_malloc(sizeof(int)*2); | |
3140 if(gpu_data == NULL){ | |
3141 return NULL; | |
3142 } | |
3143 get_gpu_ptr_size<<<1,1>>>(gpu_data); | |
3144 | |
3145 cudaError_t cudaErr = cudaGetLastError(); | |
3146 if (cudaSuccess != cudaErr){ | |
3147 | |
3148 device_free(gpu_data); | |
3149 return PyErr_Format(PyExc_RuntimeError, | |
3150 "CudaNdarray_ptr_int_size: error when calling the gpu code. (%s)", | |
3151 cudaGetErrorString(cudaErr)); | |
3152 } | |
3153 | |
3154 // Transfer the result to cpu | |
3155 int gpu_sizes[] = {-1,-1}; | |
3156 cublasStatus_t err; | |
3157 err = cublasGetVector(2, sizeof(int), gpu_data, 1, gpu_sizes, 1); | |
3158 device_free(gpu_data); | |
3159 | |
3160 if (CUBLAS_STATUS_SUCCESS != err){ | |
3161 PyErr_SetString(PyExc_RuntimeError, "error copying data to from memory"); | |
3162 return NULL; | |
3163 } | |
3164 return Py_BuildValue("iiii", (int) gpu_sizes[0], (int)sizeof(float*), | |
3165 (int)sizeof(int), (int) gpu_sizes[1]); | |
3166 } | |
3167 | |
3168 static int cublas_init(); | |
3169 static void cublas_shutdown(); | |
3170 // Initialize the gpu. | |
3171 // Takes two optional parameters, the device number and if we should use cnmem. | |
3172 // If the device number is provided, it sets that device to be the active device. | |
3173 // If not provided (usually just to test whether the gpu is available at all), | |
3174 // it does not set an active device. | |
3175 // Raises EnvironmentError or ValueError (as appropriate) if the initialization failed. | |
3176 // cnmem is threaded like a bool. If converted to 0, don't use cnmem. Otherwise, use it. | |
3177 PyObject * | |
3178 CudaNdarray_gpu_init(PyObject* _unused, PyObject* args) | |
3179 { | |
3180 int card_nb = 0; | |
3181 int card_number_provided = 1; | |
3182 float cnmem = 0; // Theano flag lib.cnmem | |
3183 // if we're given something wildly invalid, this will throw a TypeError | |
3184 if(!PyArg_ParseTuple(args, "|if", &card_nb, &cnmem)) | |
3185 return NULL; | |
3186 if(cnmem) | |
3187 g_use_cnmem = true; | |
3188 | |
3189 if(PyTuple_Size(args) == 0) { | |
3190 card_number_provided = 0; | |
3191 card_nb = 0; | |
3192 } | |
3193 | |
3194 int deviceCount; | |
3195 cudaError err = cudaGetDeviceCount(&deviceCount); | |
3196 if(cudaSuccess != err) { | |
3197 return PyErr_Format(PyExc_EnvironmentError, | |
3198 "Unable to get the number of gpus available: %s", | |
3199 cudaGetErrorString(cudaGetLastError())); | |
3200 } | |
3201 | |
3202 // as soon as the first successful call to a cuda* function is made, a | |
3203 // gpu context has been created | |
3204 g_gpu_context_active = 1; | |
3205 | |
3206 if(deviceCount <= 0) { | |
3207 return PyErr_Format(PyExc_EnvironmentError, | |
3208 "Can't use the GPU, no devices support CUDA"); | |
3209 } | |
3210 if(card_number_provided && (card_nb < 0 || card_nb > (deviceCount - 1))) { | |
3211 return PyErr_Format(PyExc_ValueError, | |
3212 "Bad device number %d. Only %d devices available.", | |
3213 card_nb, | |
3214 deviceCount); | |
3215 } | |
3216 | |
3217 cudaDeviceProp deviceProp; | |
3218 err = cudaGetDeviceProperties(&deviceProp, card_nb); | |
3219 if(cudaSuccess != err) { | |
3220 return PyErr_Format(PyExc_EnvironmentError, | |
3221 "Unable to get properties of gpu %i: %s", | |
3222 card_nb, | |
3223 cudaGetErrorString(cudaGetLastError())); | |
3224 } | |
3225 | |
3226 if(deviceProp.major == 9999 && deviceProp.minor == 9999 ){ | |
3227 return PyErr_Format(PyExc_EnvironmentError, | |
3228 "There is no device that supports CUDA"); | |
3229 } | |
3230 | |
3231 if(card_number_provided) { | |
3232 err = cudaSetDevice(card_nb); | |
3233 if(cudaSuccess != err) { | |
3234 return PyErr_Format(PyExc_EnvironmentError, | |
3235 "Unable to set device %i: %s", | |
3236 card_nb, | |
3237 cudaGetErrorString(cudaGetLastError())); | |
3238 } | |
3239 if (cublas_init() == -1) | |
3240 return NULL; | |
3241 } | |
3242 if(card_number_provided && g_use_cnmem) { | |
3243 size_t mem = 0; | |
3244 if (cnmem > 1) | |
3245 mem = cnmem * 1024 * 1024; | |
3246 else{ | |
3247 // Clip to 95% to let memory for the driver. | |
3248 // 98% didn't worked in some cases. | |
3249 if (cnmem > .95){ | |
3250 cnmem = .95; | |
3251 } | |
3252 size_t free = 0, total = 0; | |
3253 cudaError_t err = cudaMemGetInfo(&free, &total); | |
3254 if (err != cudaSuccess){ | |
3255 // Clear the error flag, cudaMemGetInfo doesn't do it. | |
3256 // Currently this returns the same thing as err, but if in future | |
3257 // it returns something else I still don't see why we should ignore | |
3258 // it. All we want to do here is reset the flag. | |
3259 cudaGetLastError(); | |
3260 PyErr_Format(PyExc_RuntimeError, | |
3261 "Error while getting memory info about the gpu: %s", | |
3262 cudaGetErrorString(err)); | |
3263 return NULL; | |
3264 } | |
3265 mem = total * cnmem; | |
3266 } | |
3267 if(initCnmem(card_number_provided, card_nb, mem) == -1){ | |
3268 return NULL; | |
3269 } | |
3270 } | |
3271 | |
3272 Py_INCREF(Py_None); | |
3273 return Py_None; | |
3274 } | |
3275 | |
3276 PyObject * | |
3277 CudaNdarray_active_device_number(PyObject* _unused, PyObject* _unused_args) { | |
3278 // NB: No cuda error checking here; keeps things simple, and it's not | |
3279 // really necessary. | |
3280 int currentDevice; | |
3281 cudaGetDevice(¤tDevice); | |
3282 return PyInt_FromLong(currentDevice); | |
3283 } | |
3284 | |
3285 PyObject * | |
3286 CudaNdarray_active_device_name(PyObject* _unused, PyObject* _unused_args) { | |
3287 // NB: No cuda error checking here; keeps things simple, and it's not | |
3288 // really necessary. | |
3289 int currentDevice; | |
3290 cudaGetDevice(¤tDevice); | |
3291 | |
3292 cudaDeviceProp deviceProp; | |
3293 cudaGetDeviceProperties(&deviceProp, currentDevice); | |
3294 return PyString_FromString(deviceProp.name); | |
3295 } | |
3296 | |
3297 PyObject * | |
3298 CudaNdarray_gpu_shutdown(PyObject* _unused, PyObject* _unused_args) { | |
3299 // Don't handle errors here | |
3300 cublas_shutdown(); | |
3301 g_gpu_context_active = 0; // context has now been closed down | |
3302 if(g_use_cnmem) { | |
3303 cnmemStatus_t status = cnmemFinalize(); | |
3304 if(status != CNMEM_STATUS_SUCCESS) { | |
3305 fprintf(stderr, "CudaNdarray_gpu_shutdown: cnmemFinalize failed! Reason=%s\n", | |
3306 cnmemGetErrorString(status)); | |
3307 if(status == CNMEM_STATUS_CUDA_ERROR) { | |
3308 fprintf(stderr, " Cuda-Reason=%s\n", | |
3309 cudaGetErrorString(cudaGetLastError())); | |
3310 } | |
3311 } | |
3312 } | |
3313 cudaThreadExit(); | |
3314 | |
3315 Py_INCREF(Py_None); | |
3316 return Py_None; | |
3317 } | |
3318 | |
3319 /* | |
3320 * This function is tested in theano/misc/test_pycuda_theano_simple.py | |
3321 */ | |
3322 PyObject * | |
3323 CudaNdarray_from_gpu_pointer(PyObject* _unused, PyObject* args) | |
3324 { | |
3325 int verbose = 0; | |
3326 PyObject *gpu_ptr = NULL; | |
3327 PyObject *shapes = NULL; | |
3328 PyObject *strides = NULL; | |
3329 PyObject *base = NULL; | |
3330 PyObject *rval = NULL; | |
3331 | |
3332 //args should consist of 3 python objects | |
3333 //The first is the gpu ptr | |
3334 //The second if the shape | |
3335 //The third if the strides | |
3336 if (! PyArg_ParseTuple(args, "OOOO", &gpu_ptr, &shapes, &strides, &base)) | |
3337 return NULL; | |
3338 | |
3339 if (verbose) printf("In CudaNdarray_from_gpu_pointer\n"); | |
3340 if (!PyLong_Check(gpu_ptr)) | |
3341 { | |
3342 PyErr_Format(PyExc_Exception, "CudaNdarray_from_gpu_pointer: The gpu pointor is not an long"); | |
3343 return NULL; | |
3344 } | |
3345 | |
3346 Py_ssize_t nd = PyObject_Length(shapes); | |
3347 if (nd < 0) | |
3348 { | |
3349 PyErr_SetString(PyExc_TypeError, "CudaNdarray_from_gpu_pointer: Couldn't get length of second argument"); | |
3350 return NULL; | |
3351 } | |
3352 Py_ssize_t nd_stride = PyObject_Length(strides); | |
3353 if (nd_stride < 0) | |
3354 { | |
3355 PyErr_SetString(PyExc_TypeError, "CudaNdarray_from_gpu_pointer: Couldn't get length of third argument"); | |
3356 return NULL; | |
3357 } | |
3358 | |
3359 if (nd != nd_stride) | |
3360 { | |
3361 PyErr_SetString(PyExc_TypeError, "CudaNdarray_from_gpu_pointer: We need the same number of shapes and strides"); | |
3362 return NULL; | |
3363 } | |
3364 | |
3365 rval = CudaNdarray_New(); | |
3366 | |
3367 if (CudaNdarray_set_nd((CudaNdarray *)rval, nd)) | |
3368 { | |
3369 //CudaNdarray_set_nd set the error msg | |
3370 return NULL; | |
3371 } | |
3372 // set gpu pointeur | |
3373 assert(((CudaNdarray *)rval)->data_allocated == 0); | |
3374 if (CudaNdarray_set_device_data((CudaNdarray *)rval, (float *)PyInt_AsLong(gpu_ptr), base)) | |
3375 { | |
3376 PyErr_SetString(PyExc_TypeError, "CudaNdarray_from_gpu_pointer: Error while setting the gpu pointor"); | |
3377 return NULL; | |
3378 | |
3379 } | |
3380 | |
3381 // Set dims and strides | |
3382 for (int i = nd-1; i >= 0; --i) | |
3383 { | |
3384 PyObject * idx = PyLong_FromLong(i); | |
3385 if (idx == NULL) | |
3386 { | |
3387 PyErr_SetString(PyExc_Exception, "CudaNdarray_from_gpu_pointer: Couldn't make long object to loop over list/tuple"); | |
3388 return NULL; | |
3389 } | |
3390 PyObject* dim_ = PyObject_GetItem(shapes, idx); | |
3391 PyObject* strd_ = PyObject_GetItem(strides, idx); | |
3392 if (!PyInt_Check(dim_)) | |
3393 { | |
3394 PyErr_Format(PyExc_Exception, "CudaNdarray_from_gpu_pointer: shapes[%d] is not an int", i); | |
3395 return NULL; | |
3396 } | |
3397 if (!PyInt_Check(strd_)) | |
3398 { | |
3399 PyErr_Format(PyExc_Exception, "CudaNdarray_from_gpu_pointer: strides[%d] is not an int", i); | |
3400 return NULL; | |
3401 } | |
3402 int dim = PyInt_AsLong(dim_); | |
3403 int strd = PyInt_AsLong(strd_); | |
3404 CudaNdarray_set_stride((CudaNdarray *)rval, i, strd); | |
3405 CudaNdarray_set_dim((CudaNdarray *)rval, i, dim); | |
3406 Py_DECREF(idx); | |
3407 Py_DECREF(dim_); | |
3408 Py_DECREF(strd_); | |
3409 } | |
3410 if (verbose) printf("CudaNdarray_from_gpu_pointer normal return\n"); | |
3411 return rval; | |
3412 } | |
3413 | |
3414 PyObject * | |
3415 CudaNdarray_Dot(PyObject* _unused, PyObject* args) | |
3416 { | |
3417 PyObject *l=NULL; | |
3418 PyObject *r=NULL; | |
3419 PyObject * rval = NULL; | |
3420 | |
3421 //args should consist of two python objects ("OO") | |
3422 if (! PyArg_ParseTuple(args, "OO", &l, &r)) | |
3423 return NULL; | |
3424 | |
3425 if (!CudaNdarray_Check(l) || !CudaNdarray_Check(r)) | |
3426 { | |
3427 PyErr_SetString(PyExc_TypeError, "CudaNdarray arguments required "); | |
3428 goto CudaNdarray_dot_fail; | |
3429 } | |
3430 if (((CudaNdarray*)l)->nd != 2) | |
3431 { | |
3432 PyErr_SetString(PyExc_TypeError, "need 2d CudaNdarray arg for now"); | |
3433 goto CudaNdarray_dot_fail; | |
3434 } | |
3435 if (((CudaNdarray*)r)->nd != 2) | |
3436 { | |
3437 PyErr_SetString(PyExc_TypeError, "need 2d CudaNdarray arg for now"); | |
3438 goto CudaNdarray_dot_fail; | |
3439 } | |
3440 rval = CudaNdarray_New(); | |
3441 if (!rval) | |
3442 { | |
3443 goto CudaNdarray_dot_fail; | |
3444 } | |
3445 int dims[2]; | |
3446 dims[0] = CudaNdarray_HOST_DIMS((CudaNdarray*)l)[0]; | |
3447 dims[1] = CudaNdarray_HOST_DIMS((CudaNdarray*)r)[1]; | |
3448 if (CudaNdarray_alloc_contiguous((CudaNdarray*)rval, 2, dims)) | |
3449 { | |
3450 goto CudaNdarray_dot_fail; | |
3451 } | |
3452 if (CudaNdarray_gemm(1.0, (CudaNdarray*)l, (CudaNdarray*)r, 0.0, (CudaNdarray*)rval)) | |
3453 { | |
3454 goto CudaNdarray_dot_fail; | |
3455 } | |
3456 | |
3457 return rval; | |
3458 | |
3459 CudaNdarray_dot_fail: | |
3460 Py_XDECREF(rval); | |
3461 return NULL; | |
3462 } | |
3463 | |
3464 static PyObject * | |
3465 filter(PyObject* __unsed_self, PyObject *args) // args = (data, broadcastable, strict, storage) | |
3466 { | |
3467 /* | |
3468 * TODO: DOC what this function should do in the various cases of | |
3469 * What is 'strict' supposed to mean in the context of this function? | |
3470 * What do we do with input that could be interpreted as matching the broadcastable pattern in strict vs. non-strict cases? | |
3471 * | |
3472 */ | |
3473 PyObject *py_data=NULL; | |
3474 PyArrayObject * data = NULL; | |
3475 int strict = 0; | |
3476 PyObject * broadcastable=NULL; | |
3477 PyObject * storage=NULL; | |
3478 CudaNdarray * rval=NULL; | |
3479 | |
3480 //Python object references which are provided to the caller are borrowed references | |
3481 if (!PyArg_ParseTuple(args, "OOiO", &py_data, &broadcastable, &strict, &storage)) return NULL; | |
3482 | |
3483 if (!PyTuple_Check(broadcastable)){ | |
3484 PyErr_SetString(PyExc_TypeError, "broadcastable arg should be a tuple of int."); | |
3485 return NULL; | |
3486 } | |
3487 Py_INCREF(py_data); | |
3488 Py_INCREF(broadcastable); | |
3489 | |
3490 CudaNdarray * cnda = (CudaNdarray*)py_data; | |
3491 | |
3492 if (strict || CudaNdarray_Check(py_data)) | |
3493 { | |
3494 //TODO: support non-strict "casting" from a vt to the broadcastable/type/size that we need. | |
3495 if (!CudaNdarray_Check(py_data)) | |
3496 { | |
3497 Py_DECREF(py_data); | |
3498 Py_DECREF(broadcastable); | |
3499 PyErr_SetString(PyExc_TypeError, "strict mode requires CudaNdarray"); | |
3500 return NULL; | |
3501 } | |
3502 if (cnda->nd != PyTuple_Size(broadcastable)) | |
3503 { | |
3504 Py_DECREF(py_data); | |
3505 Py_DECREF(broadcastable); | |
3506 PyErr_Format(PyExc_TypeError, "Wrong rank: %i vs %li", cnda->nd, (long)PyTuple_Size(broadcastable)); | |
3507 return NULL; | |
3508 } | |
3509 for (int i = 0; i < cnda->nd; ++i) | |
3510 { | |
3511 if ((CudaNdarray_HOST_DIMS(cnda)[i] > 1) && PyInt_AsLong(PyTuple_GetItem(broadcastable, Py_ssize_t(i)))) | |
3512 { | |
3513 PyErr_Format(PyExc_TypeError, "Non-unit size in broadcastable vt dimension %i", i); | |
3514 Py_DECREF(py_data); | |
3515 Py_DECREF(broadcastable); | |
3516 return NULL; | |
3517 }else if (CudaNdarray_HOST_DIMS(cnda)[i] == 1 && CudaNdarray_HOST_STRIDES(cnda)[i] != 0){ | |
3518 PyErr_Format(PyExc_TypeError, "Non-zeros strides(%d) on dimension %d of size 1", | |
3519 CudaNdarray_HOST_STRIDES(cnda)[i], i); | |
3520 Py_DECREF(py_data); | |
3521 Py_DECREF(broadcastable); | |
3522 return NULL; | |
3523 } | |
3524 } | |
3525 Py_DECREF(broadcastable); | |
3526 return py_data; | |
3527 } | |
3528 else | |
3529 { | |
3530 data = (PyArrayObject*)PyArray_FromObject(py_data, REAL_TYPENUM, PyTuple_Size(broadcastable), PyTuple_Size(broadcastable)); | |
3531 if (!data) | |
3532 { | |
3533 //err message already defined | |
3534 Py_DECREF(py_data); | |
3535 Py_DECREF(broadcastable); | |
3536 return NULL; | |
3537 } | |
3538 for (int i = 0; i < PyArray_NDIM(data); ++i) | |
3539 { | |
3540 if ((PyArray_DIMS(data)[i] > 1) && PyInt_AsLong(PyTuple_GetItem(broadcastable, Py_ssize_t(i)))) | |
3541 { | |
3542 PyErr_Format(PyExc_TypeError, "Non-unit size in broadcastable dimension %i", i); | |
3543 Py_DECREF(data); | |
3544 Py_DECREF(py_data); | |
3545 Py_DECREF(broadcastable); | |
3546 return NULL; | |
3547 } | |
3548 } | |
3549 if (storage && CudaNdarray_Check(storage)) | |
3550 { | |
3551 rval = (CudaNdarray*) storage; | |
3552 Py_INCREF(rval); | |
3553 } | |
3554 else | |
3555 { | |
3556 rval = (CudaNdarray*) CudaNdarray_New(); | |
3557 } | |
3558 if (rval) | |
3559 { | |
3560 if (CudaNdarray_CopyFromArray(rval, data)) | |
3561 { | |
3562 Py_DECREF(rval); | |
3563 rval = NULL; | |
3564 } | |
3565 } | |
3566 Py_DECREF(data); | |
3567 Py_DECREF(py_data); | |
3568 Py_DECREF(broadcastable); | |
3569 return (PyObject*)rval; | |
3570 } | |
3571 } | |
3572 | |
3573 //TODO-- CudaNdarray_Dot and CudaNdarray_active_device_name are following different capitalization conventions. | |
3574 // Pick one and standardize it, this file is already annoying enough to grep through | |
3575 static PyMethodDef module_methods[] = { | |
3576 {"dimshuffle", CudaNdarray_Dimshuffle, METH_VARARGS, "Returns the dimshuffle of a CudaNdarray."}, | |
3577 {"dot", CudaNdarray_Dot, METH_VARARGS, "Returns the matrix product of two CudaNdarray arguments."}, | |
3578 {"gpu_init", CudaNdarray_gpu_init, METH_VARARGS, "Select the gpu card to use; also usable to test whether CUDA is available."}, | |
3579 {"select_a_gpu", CudaNdarray_select_a_gpu, METH_NOARGS, "Call this method if you want to select a GPU before gpu_init call and let the driver choose the GPU."}, | |
3580 {"active_device_name", CudaNdarray_active_device_name, METH_VARARGS, "Get the name of the active device."}, | |
3581 {"active_device_number", CudaNdarray_active_device_number, METH_VARARGS, "Get the number of the active device."}, | |
3582 {"gpu_shutdown", CudaNdarray_gpu_shutdown, METH_VARARGS, "Shut down the gpu."}, | |
3583 {"device_properties", GetDeviceProperties, METH_VARARGS, "Return a dictionary with the device properties."}, | |
3584 {"mem_info", GetDeviceMemInfo, METH_NOARGS, "Return a tuple with the free and total memory on the gpu in bytes."}, | |
3585 #if COMPUTE_GPU_MEM_USED | |
3586 {"theano_allocated", GetTheanoAllocInfo, METH_NOARGS, "Return the size in bytes of memory Theano currently have allocated on the gpu."}, | |
3587 #endif | |
3588 {"ptr_int_size", CudaNdarray_ptr_int_size, METH_VARARGS, "Return a tuple with the size of gpu pointer, cpu pointer and int in bytes."}, | |
3589 {"filter", filter, METH_VARARGS, "filter(obj, broadcastable, strict, storage) returns a CudaNdarray initialized to obj if it matches the constraints of broadcastable. strict=True prevents any numeric casting. If storage is a CudaNdarray it may be overwritten and used as the return value."}, | |
3590 {"outstanding_mallocs", outstanding_mallocs, METH_VARARGS, "how many more mallocs have been called than free's"}, | |
3591 {"from_gpu_pointer", CudaNdarray_from_gpu_pointer, METH_VARARGS, "Used to create a CudaNdarray from already allocated memory on the gpu.(example by pycuda)"}, | |
3592 {"synchronize", CudaNdarray_synchronize, METH_NOARGS, "Used to synchronize the device"}, | |
3593 {"cublas_v2", CudaNdarray_cublasv2, METH_NOARGS, | |
3594 "Used to know if this version of cuda_ndarray is linked with cublas v2."}, | |
3595 {NULL, NULL, NULL, NULL} /* Sentinel */ | |
3596 }; | |
3597 | |
3598 #define CNDA_MOD_NAME "cuda_ndarray" | |
3599 #define CNDA_DOCSTRING "CUDA implementation of a numpy ndarray-like object." | |
3600 | |
3601 #if PY_MAJOR_VERSION == 3 | |
3602 static struct PyModuleDef cuda_ndarray_moduledef = | |
3603 { | |
3604 PyModuleDef_HEAD_INIT, | |
3605 CNDA_MOD_NAME, | |
3606 CNDA_DOCSTRING, | |
3607 -1, /* size of per-interpreter state of the module, | |
3608 or -1 if the module keeps state in global variables. */ | |
3609 module_methods | |
3610 }; | |
3611 | |
3612 PyMODINIT_FUNC | |
3613 PyInit_cuda_ndarray(void) | |
3614 #else | |
3615 PyMODINIT_FUNC | |
3616 initcuda_ndarray(void) | |
3617 #endif | |
3618 { | |
3619 import_array(); | |
3620 | |
3621 PyObject* m; | |
3622 | |
3623 if (PyType_Ready(&CudaNdarrayType) < 0) { | |
3624 #if PY_MAJOR_VERSION == 3 | |
3625 return NULL; | |
3626 #else | |
3627 return; | |
3628 #endif | |
3629 } | |
3630 | |
3631 #if PY_MAJOR_VERSION == 3 | |
3632 m = PyModule_Create(&cuda_ndarray_moduledef); | |
3633 #else | |
3634 m = Py_InitModule3(CNDA_MOD_NAME, module_methods, CNDA_DOCSTRING); | |
3635 #endif | |
3636 | |
3637 if (m == NULL) { | |
3638 #if PY_MAJOR_VERSION == 3 | |
3639 return NULL; | |
3640 #else | |
3641 return; | |
3642 #endif | |
3643 } | |
3644 | |
3645 Py_INCREF(&CudaNdarrayType); | |
3646 PyModule_AddObject(m, "CudaNdarray", (PyObject *)&CudaNdarrayType); | |
3647 #if COMPUTE_GPU_MEM_USED | |
3648 for(int i=0;i<TABLE_SIZE;i++){ | |
3649 _alloc_size_table[i].ptr=NULL; | |
3650 _alloc_size_table[i].size=0; | |
3651 } | |
3652 #endif | |
3653 // cublasInit(); | |
3654 //if (0&&CUBLAS_STATUS_SUCCESS != cublasGetError()) | |
3655 //{ | |
3656 //std::cerr << "WARNING: initcuda_ndarray: error initializing device\n"; | |
3657 //} | |
3658 if (0) //TODO: is this necessary? | |
3659 { | |
3660 int deviceId = 0; // TODO: what number goes here? | |
3661 cudaSetDevice(deviceId); | |
3662 cudaError_t err = cudaGetLastError(); | |
3663 if( cudaSuccess != err) | |
3664 { | |
3665 std::cerr << "Error in SetDevice:" << cudaGetErrorString(err) << "\n"; | |
3666 } | |
3667 } | |
3668 | |
3669 #if PY_MAJOR_VERSION == 3 | |
3670 return m; | |
3671 #endif | |
3672 } | |
3673 | |
3674 | |
3675 ////////////////////////////////////// | |
3676 // | |
3677 // C API FOR CudaNdarray | |
3678 // | |
3679 ////////////////////////////////////// | |
3680 | |
3681 int | |
3682 CudaNdarray_Check(const PyObject * ob) | |
3683 { | |
3684 //TODO: doesn't work with inheritance | |
3685 return CudaNdarray_CheckExact(ob); | |
3686 } | |
3687 int | |
3688 CudaNdarray_CheckExact(const PyObject * ob) | |
3689 { | |
3690 return ((Py_TYPE(ob) == &CudaNdarrayType) ? 1 : 0); | |
3691 } | |
3692 | |
3693 PyObject * | |
3694 CudaNdarray_New(int nd) | |
3695 { | |
3696 CudaNdarray *self = (CudaNdarray *)CudaNdarrayType.tp_alloc(&CudaNdarrayType, 0); | |
3697 if (self == NULL) | |
3698 { | |
3699 PyErr_SetString(PyExc_RuntimeError, "CudaNdarray_New failed to allocate self"); | |
3700 return NULL; | |
3701 } | |
3702 CudaNdarray_null_init(self); | |
3703 | |
3704 if (nd == 0) | |
3705 { | |
3706 self->nd = 0; | |
3707 } | |
3708 else if (nd > 0) | |
3709 { | |
3710 if (CudaNdarray_set_nd(self, nd)) | |
3711 { | |
3712 Py_DECREF(self); | |
3713 return NULL; | |
3714 } | |
3715 } | |
3716 ++_outstanding_mallocs[1]; | |
3717 return (PyObject *)self; | |
3718 } | |
3719 | |
3720 | |
3721 | |
3722 ////////////////////////////// | |
3723 // | |
3724 // Published helper functions | |
3725 // | |
3726 ////////////////////////////// | |
3727 | |
3728 static int | |
3729 cublas_init() | |
3730 { | |
3731 cublasStatus_t err; | |
3732 err = cublasCreate(&handle); | |
3733 if (CUBLAS_STATUS_SUCCESS != err) | |
3734 { | |
3735 if(CUBLAS_STATUS_NOT_INITIALIZED == err) | |
3736 PyErr_SetString(PyExc_RuntimeError, | |
3737 "cublasCreate() returned this error " | |
3738 "'the CUDA Runtime initialization failed'"); | |
3739 else if(CUBLAS_STATUS_ALLOC_FAILED == err) | |
3740 PyErr_SetString(PyExc_RuntimeError, | |
3741 "cublasCreate() returned this error " | |
3742 "'the resources could not be allocated'"); | |
3743 else | |
3744 PyErr_SetString(PyExc_RuntimeError, | |
3745 "unknow error during returned by cublasCreate()"); | |
3746 return -1; | |
3747 } | |
3748 // Set the default stream as the one to execute on (default) | |
3749 cublasSetStream(handle, NULL); | |
3750 // Pointer to scalars are on the host (also default) | |
3751 cublasSetPointerMode(handle, CUBLAS_POINTER_MODE_HOST); | |
3752 #if CUDA_VERSION >= 5000 | |
3753 // atomics can be used in kernels to speed up operations (not default) | |
3754 // This may lead to a slight variance from run to run in some operations | |
3755 cublasSetAtomicsMode(handle, CUBLAS_ATOMICS_ALLOWED); | |
3756 #endif | |
3757 return 0; | |
3758 } | |
3759 | |
3760 static void | |
3761 cublas_shutdown() | |
3762 { | |
3763 if (handle != NULL) | |
3764 cublasDestroy(handle); | |
3765 // No point in handling any errors here | |
3766 handle = NULL; | |
3767 } | |
3768 | |
3769 int | |
3770 CudaNdarray_CopyFromArray(CudaNdarray * self, PyArrayObject*obj) | |
3771 { | |
3772 int err = CudaNdarray_alloc_contiguous(self, PyArray_NDIM(obj), | |
3773 PyArray_DIMS(obj)); | |
3774 if (err) { | |
3775 return err; | |
3776 } | |
3777 | |
3778 int typenum = PyArray_TYPE(obj); | |
3779 if (typenum != REAL_TYPENUM) | |
3780 { | |
3781 PyErr_SetString(PyExc_TypeError, "can only copy from float arrays"); | |
3782 return -1; | |
3783 } | |
3784 assert( 4 == PyArray_ITEMSIZE(obj)); | |
3785 PyArrayObject * py_src = (PyArrayObject *)PyArray_ContiguousFromAny( | |
3786 (PyObject*)obj, typenum, self->nd, self->nd); | |
3787 if (!py_src) { | |
3788 return -1; | |
3789 } | |
3790 npy_intp py_src_size = PyArray_SIZE(py_src); | |
3791 void *py_src_data = PyArray_DATA(py_src); | |
3792 cudaError_t cerr; | |
3793 CNDA_BEGIN_ALLOW_THREADS; | |
3794 cerr = cudaMemcpy(self->devdata, py_src_data, | |
3795 py_src_size * sizeof(real), | |
3796 cudaMemcpyHostToDevice); | |
3797 //CNDA_THREAD_SYNC; // unneeded because cudaMemcpy is blocking anyway | |
3798 CNDA_END_ALLOW_THREADS; | |
3799 if (cudaSuccess != cerr) | |
3800 { | |
3801 PyErr_Format(PyExc_RuntimeError, | |
3802 "Cuda error '%s' while copying %lli data element" | |
3803 " to device memory", | |
3804 cudaGetErrorString(cerr), | |
3805 (long long)py_src_size); | |
3806 Py_DECREF(py_src); | |
3807 return -1; | |
3808 } | |
3809 Py_DECREF(py_src); | |
3810 return 0; | |
3811 } | |
3812 | |
3813 PyObject * | |
3814 CudaNdarray_new_nd(int nd) | |
3815 { | |
3816 CudaNdarray * rval = (CudaNdarray*) CudaNdarray_New(); | |
3817 if (!rval || CudaNdarray_set_nd(rval, nd)) | |
3818 { | |
3819 Py_XDECREF(rval); | |
3820 rval = NULL; | |
3821 } | |
3822 return (PyObject *) rval; | |
3823 } | |
3824 | |
3825 | |
3826 /** | |
3827 * Initialize 'self' as a view of 'base', with memory storage 'data' | |
3828 */ | |
3829 | |
3830 int CudaNdarray_set_device_data(CudaNdarray * self, float * data, PyObject * base) | |
3831 { | |
3832 if (self->data_allocated) | |
3833 { | |
3834 assert(self->devdata); | |
3835 if (device_free(self->devdata)) | |
3836 { | |
3837 self->devdata = NULL; | |
3838 self->data_allocated = 0; | |
3839 return -1; | |
3840 } | |
3841 } | |
3842 // Get the original base object (base.base.base...) | |
3843 PyObject * orig_base = base; | |
3844 // base is not always a CudaNdarray. It can be a GpuArray from pycuda, ... | |
3845 while (orig_base && CudaNdarray_Check(orig_base) && ((CudaNdarray*) orig_base)->base) | |
3846 { | |
3847 // base_base is itself a view | |
3848 orig_base = ((CudaNdarray*) orig_base)->base; | |
3849 } | |
3850 //N.B. XDECREF and XINCREF are no-ops for NULL pointers | |
3851 if (self->base != orig_base) | |
3852 { | |
3853 Py_XDECREF(self->base); | |
3854 self->base = orig_base; | |
3855 Py_XINCREF(self->base); | |
3856 } | |
3857 self->data_allocated = 0; | |
3858 self->devdata = data; | |
3859 return 0; | |
3860 } | |
3861 | |
3862 static __global__ void k_copy_1d(const int N, const float * x, const int sx, float * y, const int sy) | |
3863 { | |
3864 for (int i = threadIdx.x + blockIdx.x * blockDim.x; i < N; i += gridDim.x*blockDim.x) | |
3865 { | |
3866 y[i*sy] = x[i*sx]; | |
3867 } | |
3868 } | |
3869 | |
3870 // N1 through N4 are the size of y | |
3871 static __global__ void k_copy_4d(const int N1, | |
3872 const int N2, const int N3, const int N4, | |
3873 const float * x, const int sx1, const int sx2, const int sx3, | |
3874 const int sx4, float * y, const int sy1, const int sy2, | |
3875 const int sy3, const int sy4) | |
3876 { | |
3877 // These must be made int instead of unsigned int due to a bug in nvcc | |
3878 int bx = blockIdx.x; | |
3879 int by = blockIdx.y; | |
3880 | |
3881 for (int i = bx; i < N1; i += gridDim.x) | |
3882 { | |
3883 for (int j = by; j < N2; j += gridDim.y) | |
3884 { | |
3885 for (int k = threadIdx.x; k < N3; k += (int) blockDim.x) | |
3886 { | |
3887 for (int l = threadIdx.y; l < N4; l += (int) blockDim.y) | |
3888 { | |
3889 y[i * sy1 + j * sy2 + k * sy3 + l * sy4] = | |
3890 x[i * sx1 + j * sx2 + k * sx3 + l * sx4]; | |
3891 } | |
3892 } | |
3893 } | |
3894 } | |
3895 } | |
3896 | |
3897 //copy from other into self | |
3898 int CudaNdarray_CopyFromCudaNdarray(CudaNdarray * self, | |
3899 const CudaNdarray * other, | |
3900 bool unbroadcast) | |
3901 { | |
3902 int verbose = 0; | |
3903 if (verbose>1) fprintf(stderr, "CudaNdarray_CopyFromCudaNdarray\n"); | |
3904 | |
3905 //standard elemwise size checks | |
3906 if (self->nd == -1) | |
3907 { | |
3908 PyErr_SetString(PyExc_TypeError, | |
3909 "can't copy into un-initialized CudaNdarray"); | |
3910 return -1; | |
3911 } | |
3912 CudaNdarray * new_other = NULL; | |
3913 | |
3914 if (self->nd < other->nd) | |
3915 { | |
3916 PyErr_Format(PyExc_NotImplementedError, | |
3917 "CudaNdarray_CopyFromCudaNdarray: The number of dimensions of the " | |
3918 "destination needs to be >= the number of dimensions of the " | |
3919 "source. Got %d and %d.", self->nd, other->nd); | |
3920 return -1; | |
3921 } | |
3922 else if (self->nd != other->nd) | |
3923 { | |
3924 new_other = (CudaNdarray *) CudaNdarray_View(other); | |
3925 int added_dims = self->nd - other->nd; | |
3926 int* pattern = (int*) alloca(self->nd * sizeof(int)); | |
3927 for(int i = 0; i < added_dims; i++) | |
3928 pattern[i] = -1; | |
3929 for(int i = 0; i < other->nd; i++) | |
3930 pattern[i + added_dims] = i; | |
3931 CudaNdarray_dimshuffle(new_other, self->nd, pattern); | |
3932 other = new_other; | |
3933 } | |
3934 assert(self->nd == other->nd); | |
3935 //standard elemwise dim checks (also compute total size) | |
3936 unsigned int size = 1; | |
3937 unsigned int size_source = 1; | |
3938 for (int i = 0; i< self->nd; ++i) | |
3939 { | |
3940 if ((CudaNdarray_HOST_DIMS(self)[i] != CudaNdarray_HOST_DIMS(other)[i]) | |
3941 && (1!=CudaNdarray_HOST_DIMS(other)[i] || !unbroadcast) ) | |
3942 { | |
3943 PyErr_Format(PyExc_ValueError, | |
3944 "CudaNdarray_CopyFromCudaNdarray:" | |
3945 " need same dimensions for dim %d," | |
3946 " destination=%d, source=%d", | |
3947 i, CudaNdarray_HOST_DIMS(self)[i], | |
3948 CudaNdarray_HOST_DIMS(other)[i]); | |
3949 Py_XDECREF(new_other); | |
3950 return -1; | |
3951 } | |
3952 size *= (unsigned int) CudaNdarray_HOST_DIMS(self)[i]; | |
3953 size_source *= (unsigned int) CudaNdarray_HOST_DIMS(other)[i]; | |
3954 } | |
3955 if (0 == size) | |
3956 { | |
3957 Py_XDECREF(new_other); | |
3958 return 0; //nothing to copy, we're done. | |
3959 } | |
3960 if (CudaNdarray_is_c_contiguous(self) && | |
3961 CudaNdarray_is_c_contiguous(other) && | |
3962 size == size_source) | |
3963 { | |
3964 if (verbose) | |
3965 fprintf(stderr, "Copying contiguous vector with cublasScopy\n"); | |
3966 | |
3967 cublasStatus_t err; | |
3968 err = cublasScopy(handle, size, CudaNdarray_DEV_DATA(other), 1, | |
3969 CudaNdarray_DEV_DATA(self), 1); | |
3970 CNDA_THREAD_SYNC; | |
3971 Py_XDECREF(new_other); | |
3972 if (CUBLAS_STATUS_SUCCESS != err) | |
3973 { | |
3974 PyErr_SetString(PyExc_RuntimeError, "Error copying memory"); | |
3975 return -1; | |
3976 } | |
3977 return 0; | |
3978 } | |
3979 //TODO: rewrite these copy operations to be more efficient | |
3980 // See, for example the transpose example in the cuda_sdk. | |
3981 switch (self->nd) | |
3982 { | |
3983 case 0: // scalar | |
3984 { | |
3985 // THIS CASE SHOULD NEVER HAPPEN BECAUSE SCALARS ARE ALWAYS C CONTIGUOUS | |
3986 assert(0); | |
3987 }; break; | |
3988 case 1: // vector | |
3989 { | |
3990 if (verbose) fprintf(stderr, "Copying non-contiguous vector\n"); | |
3991 if (verbose) fprint_CudaNdarray(stderr, other); | |
3992 unsigned int n_blocks = std::min(size, | |
3993 (unsigned int)NUM_VECTOR_OP_BLOCKS); | |
3994 unsigned int n_threads = std::min(ceil_intdiv(size, n_blocks), | |
3995 (unsigned int)NUM_VECTOR_OP_THREADS_PER_BLOCK); | |
3996 k_copy_1d<<<n_blocks, n_threads>>>(size, | |
3997 CudaNdarray_DEV_DATA(other), | |
3998 CudaNdarray_HOST_STRIDES(other)[0], | |
3999 CudaNdarray_DEV_DATA(self), | |
4000 CudaNdarray_HOST_STRIDES(self)[0]); | |
4001 CNDA_THREAD_SYNC; | |
4002 cudaError_t err = cudaGetLastError(); | |
4003 if( cudaSuccess != err) | |
4004 { | |
4005 PyErr_Format(PyExc_RuntimeError, | |
4006 "Cuda error: %s: %s. (n_blocks=%i," | |
4007 " n_threads_per_block=%i)\n", "k_copy_1d", | |
4008 cudaGetErrorString(err), n_blocks, n_threads); | |
4009 Py_XDECREF(new_other); | |
4010 return -1; | |
4011 } | |
4012 }; break; | |
4013 case 4: // 4-tensor | |
4014 { | |
4015 if (verbose) | |
4016 { | |
4017 if (0 != fprint_CudaNdarray(stderr, other)) | |
4018 { | |
4019 Py_XDECREF(new_other); | |
4020 return -1; | |
4021 } | |
4022 } | |
4023 | |
4024 // The blocks implement the looping over the first two axes so | |
4025 // this needs to be (N1, N2) | |
4026 dim3 n_blocks( std::min(CudaNdarray_HOST_DIMS(self)[0], | |
4027 NUM_VECTOR_OP_BLOCKS), | |
4028 std::min(CudaNdarray_HOST_DIMS(self)[1], | |
4029 NUM_VECTOR_OP_BLOCKS)); | |
4030 // For the threads, just make as many as possible | |
4031 dim3 n_threads( std::min( (unsigned int) CudaNdarray_HOST_DIMS(self)[2], | |
4032 (unsigned int) NUM_VECTOR_OP_THREADS_PER_BLOCK), | |
4033 std::min( (unsigned int) CudaNdarray_HOST_DIMS(self)[3], | |
4034 (unsigned int) NUM_VECTOR_OP_THREADS_PER_BLOCK)); | |
4035 | |
4036 n_threads.x = std::min( (unsigned int) 32, (unsigned int) n_threads.x); | |
4037 n_threads.y = std::min( n_threads.y, NUM_VECTOR_OP_THREADS_PER_BLOCK / n_threads.x); | |
4038 | |
4039 k_copy_4d<<<n_blocks, n_threads>>>( | |
4040 // size of y | |
4041 (unsigned int) CudaNdarray_HOST_DIMS(self)[0], // N1 | |
4042 (unsigned int) CudaNdarray_HOST_DIMS(self)[1], // N2 | |
4043 (unsigned int) CudaNdarray_HOST_DIMS(self)[2], // N3 | |
4044 (unsigned int) CudaNdarray_HOST_DIMS(self)[3], // N4 | |
4045 CudaNdarray_DEV_DATA(other), // x | |
4046 // x strides | |
4047 CudaNdarray_HOST_STRIDES(other)[0], | |
4048 CudaNdarray_HOST_STRIDES(other)[1], | |
4049 CudaNdarray_HOST_STRIDES(other)[2], | |
4050 CudaNdarray_HOST_STRIDES(other)[3], | |
4051 CudaNdarray_DEV_DATA(self), // y | |
4052 // y strides | |
4053 CudaNdarray_HOST_STRIDES(self)[0], | |
4054 CudaNdarray_HOST_STRIDES(self)[1], | |
4055 CudaNdarray_HOST_STRIDES(self)[2], | |
4056 CudaNdarray_HOST_STRIDES(self)[3] | |
4057 ); | |
4058 CNDA_THREAD_SYNC; | |
4059 cudaError_t err = cudaGetLastError(); | |
4060 if( cudaSuccess != err) | |
4061 { | |
4062 PyErr_Format(PyExc_RuntimeError, | |
4063 "Cuda error: %s: %s.", | |
4064 "k_copy_4d", | |
4065 cudaGetErrorString(err)); | |
4066 Py_XDECREF(new_other); | |
4067 return -1; | |
4068 } | |
4069 }; break; | |
4070 default: | |
4071 { | |
4072 cudaError_t err = cudaGetLastError(); | |
4073 if(cudaSuccess != err){ | |
4074 PyErr_Format(PyExc_RuntimeError, | |
4075 "Unexpected Cuda error: %s: %s\n", | |
4076 "CudaNdarray_CopyFromCudaNdarray", | |
4077 cudaGetErrorString(err)); | |
4078 Py_XDECREF(new_other); | |
4079 return -1; | |
4080 } | |
4081 | |
4082 if (verbose) | |
4083 fprintf(stderr, | |
4084 "Copying with default version unbroadcast=%d\n", | |
4085 unbroadcast); | |
4086 // call worker routine | |
4087 unsigned int threads_per_block = std::min(size, | |
4088 (unsigned int)NUM_VECTOR_OP_THREADS_PER_BLOCK); | |
4089 unsigned int n_blocks = std::min(ceil_intdiv(size, threads_per_block), | |
4090 (unsigned int)NUM_VECTOR_OP_BLOCKS); | |
4091 const CudaNdarray * cuda_dims = other; | |
4092 if(unbroadcast) | |
4093 cuda_dims = self; | |
4094 //copy from other into self | |
4095 k_elemwise_unary_rowmajor_copy<<<n_blocks, threads_per_block>>>( | |
4096 size, | |
4097 (unsigned int)other->nd, | |
4098 (const int *)CudaNdarray_DEV_DIMS(cuda_dims), | |
4099 (const float*)CudaNdarray_DEV_DATA(other), | |
4100 (const int *)CudaNdarray_DEV_STRIDES(other), | |
4101 CudaNdarray_DEV_DATA(self), | |
4102 (const int *)CudaNdarray_DEV_STRIDES(self)); | |
4103 CNDA_THREAD_SYNC; | |
4104 err = cudaGetLastError(); | |
4105 if(verbose>1) | |
4106 fprintf(stderr, | |
4107 "INFO k_elemwise_unary_rowmaj (n_blocks=%i," | |
4108 " n_threads_per_block=%i)\n", | |
4109 n_blocks, threads_per_block); | |
4110 if( cudaSuccess != err) | |
4111 { | |
4112 //fprint_CudaNdarray(stderr, self); | |
4113 //fprint_CudaNdarray(stderr, other); | |
4114 PyErr_Format(PyExc_RuntimeError, | |
4115 "Cuda error: %s: %s. (n_blocks=%i," | |
4116 " n_threads_per_block=%i)\n", | |
4117 "k_elemwise_unary_rowmajor_copy", | |
4118 cudaGetErrorString(err), n_blocks, | |
4119 threads_per_block); | |
4120 Py_XDECREF(new_other); | |
4121 return -1; | |
4122 } | |
4123 } | |
4124 }; | |
4125 Py_XDECREF(new_other); | |
4126 return 0; | |
4127 } | |
4128 | |
4129 int CudaNdarray_gemm(float alpha, const CudaNdarray * A, const CudaNdarray * B, float beta, CudaNdarray * C) | |
4130 { | |
4131 if (A->nd != 2) | |
4132 { | |
4133 PyErr_SetString(PyExc_ValueError, "non-matrix arg A to gemm"); | |
4134 return -1; | |
4135 } | |
4136 if (B->nd != 2) | |
4137 { | |
4138 PyErr_SetString(PyExc_ValueError, "non-matrix arg B to gemm"); | |
4139 return -1; | |
4140 } | |
4141 if (C->nd != 2) | |
4142 { | |
4143 PyErr_SetString(PyExc_ValueError, "non-matrix arg C to gemm"); | |
4144 return -1; | |
4145 } | |
4146 | |
4147 // We must allow dimensions to be zeros. | |
4148 if ((CudaNdarray_HOST_DIMS(A)[1] != CudaNdarray_HOST_DIMS(B)[0]) | |
4149 || (CudaNdarray_HOST_DIMS(A)[0] != CudaNdarray_HOST_DIMS(C)[0]) | |
4150 || (CudaNdarray_HOST_DIMS(B)[1] != CudaNdarray_HOST_DIMS(C)[1])) | |
4151 { | |
4152 PyErr_Format(PyExc_ValueError, "dimension mismatch in args to gemm (%i,%i)x(%i,%i)->(%i,%i)", | |
4153 CudaNdarray_HOST_DIMS(A)[0], | |
4154 CudaNdarray_HOST_DIMS(A)[1], | |
4155 CudaNdarray_HOST_DIMS(B)[0], | |
4156 CudaNdarray_HOST_DIMS(B)[1], | |
4157 CudaNdarray_HOST_DIMS(C)[0], | |
4158 CudaNdarray_HOST_DIMS(C)[1]); | |
4159 return -1; | |
4160 } | |
4161 | |
4162 // If matrix A or B has non-unit size and non-unit stride in both | |
4163 // dimensions, we can make a copy. | |
4164 CudaNdarray * A_new = NULL; | |
4165 CudaNdarray * B_new = NULL; | |
4166 if (((CudaNdarray_HOST_DIMS(A)[0] > 1) | |
4167 && (CudaNdarray_HOST_STRIDES(A)[0] != 1) | |
4168 && (CudaNdarray_HOST_DIMS(A)[1] > 1) | |
4169 && (CudaNdarray_HOST_STRIDES(A)[1] != 1)) | |
4170 || (CudaNdarray_HOST_STRIDES(A)[0] < 0) | |
4171 || (CudaNdarray_HOST_STRIDES(A)[1] < 0)) | |
4172 { | |
4173 A_new = (CudaNdarray*) CudaNdarray_Copy(A); | |
4174 if (!A_new) | |
4175 return -1; | |
4176 A = A_new; | |
4177 } | |
4178 | |
4179 if (((CudaNdarray_HOST_DIMS(B)[0] > 1) | |
4180 && (CudaNdarray_HOST_STRIDES(B)[0] != 1) | |
4181 && (CudaNdarray_HOST_DIMS(B)[1] > 1) | |
4182 && (CudaNdarray_HOST_STRIDES(B)[1] != 1)) | |
4183 || (CudaNdarray_HOST_STRIDES(B)[0] < 0) | |
4184 || (CudaNdarray_HOST_STRIDES(B)[1] < 0)) | |
4185 { | |
4186 B_new = (CudaNdarray*) CudaNdarray_Copy(B); | |
4187 if (!B_new) | |
4188 { | |
4189 // If A_new is NULL, meaning A was not copied nothing happens | |
4190 Py_XDECREF(A_new); | |
4191 return -1; | |
4192 } | |
4193 B = B_new; | |
4194 } | |
4195 | |
4196 // If matrix C has non-unit size and non-unit stride in both | |
4197 // dimensions, or negative strides, we can't operate. We cannot copy | |
4198 // C either, because the calling code will expect the result to be | |
4199 // in the original C container. | |
4200 if (((CudaNdarray_HOST_DIMS(C)[0] > 1) | |
4201 && (CudaNdarray_HOST_STRIDES(C)[0] != 1) | |
4202 && (CudaNdarray_HOST_DIMS(C)[1] > 1) | |
4203 && (CudaNdarray_HOST_STRIDES(C)[1] != 1)) | |
4204 || (CudaNdarray_HOST_STRIDES(C)[0] < 0) | |
4205 || (CudaNdarray_HOST_STRIDES(C)[1] < 0)) | |
4206 { | |
4207 PyErr_Format(PyExc_AssertionError, | |
4208 "non-unit or negative stride in gemm arg C (%i,%i) of shape (%i,%i)", | |
4209 CudaNdarray_HOST_STRIDES(C)[0], | |
4210 CudaNdarray_HOST_STRIDES(C)[1], | |
4211 CudaNdarray_HOST_DIMS(C)[0], | |
4212 CudaNdarray_HOST_DIMS(C)[1]); | |
4213 Py_XDECREF(A_new); | |
4214 Py_XDECREF(B_new); | |
4215 return -1; | |
4216 } | |
4217 | |
4218 // the unit integer is divided logically into three fields of 4 bits | |
4219 // the lowermost 4 bits encode the stride pattern of the output | |
4220 // the next higher 4 bits encode the B variable (or y) | |
4221 // the next higher 4 bits encode the C variable (or x) | |
4222 // | |
4223 // the stride pattern for each input is encoded as 0 for unit stride from col to col (Row major) | |
4224 // 1 for unit stride from row to row (Col major) | |
4225 | |
4226 // a stride of 0 implies a dimension of 1 - so we can actually define | |
4227 // a stride of 0 as a 'unit' stride because gemm will never use it. | |
4228 // If a dimension is 0, its stride will not be used either, so we can | |
4229 // consider it a 'unit' stride too. | |
4230 int unit = 0; | |
4231 if (CudaNdarray_HOST_STRIDES(A)[1] == 1 || CudaNdarray_HOST_DIMS(A)[1] <= 1) { | |
4232 unit |= (0x0 << 8); | |
4233 } else if (CudaNdarray_HOST_STRIDES(A)[0] == 1 || CudaNdarray_HOST_DIMS(A)[0] <= 1) { | |
4234 unit |= (0x1 << 8); | |
4235 } else { | |
4236 unit |= (0x2 << 8); | |
4237 } | |
4238 if (CudaNdarray_HOST_STRIDES(B)[1] == 1 || CudaNdarray_HOST_DIMS(B)[1] <= 1) { | |
4239 unit |= (0x0 << 4); | |
4240 } else if (CudaNdarray_HOST_STRIDES(B)[0] == 1 || CudaNdarray_HOST_DIMS(B)[0] <= 1) { | |
4241 unit |= (0x1 << 4); | |
4242 } else { | |
4243 unit |= (0x2 << 4); | |
4244 } | |
4245 if (CudaNdarray_HOST_STRIDES(C)[1] == 1 || CudaNdarray_HOST_DIMS(C)[1] <= 1) { | |
4246 unit |= (0x0 << 0); | |
4247 } else if (CudaNdarray_HOST_STRIDES(C)[0] == 1 || CudaNdarray_HOST_DIMS(C)[0] <= 1) { | |
4248 unit |= (0x1 << 0); | |
4249 } else { | |
4250 unit |= (0x2 << 0); | |
4251 } | |
4252 | |
4253 /* create appropriate strides for malformed matrices that are row or column | |
4254 * vectors | |
4255 */ | |
4256 int sa_0 = (CudaNdarray_HOST_DIMS(A)[0] > 1) ? CudaNdarray_HOST_STRIDES(A)[0] : CudaNdarray_HOST_DIMS(A)[1]; | |
4257 int sa_1 = (CudaNdarray_HOST_DIMS(A)[1] > 1) ? CudaNdarray_HOST_STRIDES(A)[1] : CudaNdarray_HOST_DIMS(A)[0]; | |
4258 int sb_0 = (CudaNdarray_HOST_DIMS(B)[0] > 1) ? CudaNdarray_HOST_STRIDES(B)[0] : CudaNdarray_HOST_DIMS(B)[1]; | |
4259 int sb_1 = (CudaNdarray_HOST_DIMS(B)[1] > 1) ? CudaNdarray_HOST_STRIDES(B)[1] : CudaNdarray_HOST_DIMS(B)[0]; | |
4260 int sc_0 = (CudaNdarray_HOST_DIMS(C)[0] > 1) ? CudaNdarray_HOST_STRIDES(C)[0] : CudaNdarray_HOST_DIMS(C)[1]; | |
4261 int sc_1 = (CudaNdarray_HOST_DIMS(C)[1] > 1) ? CudaNdarray_HOST_STRIDES(C)[1] : CudaNdarray_HOST_DIMS(C)[0]; | |
4262 | |
4263 float* a = CudaNdarray_DEV_DATA(A); | |
4264 float* b = CudaNdarray_DEV_DATA(B); | |
4265 float* c = CudaNdarray_DEV_DATA(C); | |
4266 cublasOperation_t N = CUBLAS_OP_N; | |
4267 cublasOperation_t T = CUBLAS_OP_T; | |
4268 //std::cerr << (unit/256) MOD 16 << (unit / 16) MOD 16 << unit MOD 16<< '\\n'; | |
4269 // There should be no negative stride at that point | |
4270 #define CHK_STRIDE_SGEMM(T0, T1, D0, D1, D2, a, x, sx, y, sy, b, z, sz) \ | |
4271 if (sx == 0){sx = 1;}\ | |
4272 if (sy == 0){sy = 1;}\ | |
4273 if (sz == 0){sz = 1;}\ | |
4274 if ((sx > 0) && (sy > 0) && (sz > 0)) { \ | |
4275 err = cublasSgemm(handle, T0, T1, D0, D1, D2, &a, x, sx, y, sy, &b, z, sz); \ | |
4276 } else { \ | |
4277 PyErr_SetString(PyExc_AssertionError, "negative stride to sGemm");\ | |
4278 Py_XDECREF(A_new);\ | |
4279 Py_XDECREF(B_new);\ | |
4280 return -1; \ | |
4281 } | |
4282 | |
4283 cublasStatus_t err; | |
4284 switch(unit) | |
4285 { | |
4286 case 0x000: CHK_STRIDE_SGEMM(N, N, CudaNdarray_HOST_DIMS(C)[1], CudaNdarray_HOST_DIMS(C)[0], CudaNdarray_HOST_DIMS(A)[1], alpha, b, sb_0, a, sa_0, beta, c, sc_0); break; | |
4287 case 0x100: CHK_STRIDE_SGEMM(N, T, CudaNdarray_HOST_DIMS(C)[1], CudaNdarray_HOST_DIMS(C)[0], CudaNdarray_HOST_DIMS(A)[1], alpha, b, sb_0, a, sa_1, beta, c, sc_0); break; | |
4288 case 0x010: CHK_STRIDE_SGEMM(T, N, CudaNdarray_HOST_DIMS(C)[1], CudaNdarray_HOST_DIMS(C)[0], CudaNdarray_HOST_DIMS(A)[1], alpha, b, sb_1, a, sa_0, beta, c, sc_0); break; | |
4289 case 0x110: CHK_STRIDE_SGEMM(T, T, CudaNdarray_HOST_DIMS(C)[1], CudaNdarray_HOST_DIMS(C)[0], CudaNdarray_HOST_DIMS(A)[1], alpha, b, sb_1, a, sa_1, beta, c, sc_0); break; | |
4290 case 0x001: CHK_STRIDE_SGEMM(T, T, CudaNdarray_HOST_DIMS(C)[0], CudaNdarray_HOST_DIMS(C)[1], CudaNdarray_HOST_DIMS(A)[1], alpha, a, sa_0, b, sb_0, beta, c, sc_1); break; | |
4291 case 0x101: CHK_STRIDE_SGEMM(N, T, CudaNdarray_HOST_DIMS(C)[0], CudaNdarray_HOST_DIMS(C)[1], CudaNdarray_HOST_DIMS(A)[1], alpha, a, sa_1, b, sb_0, beta, c, sc_1); break; | |
4292 case 0x011: CHK_STRIDE_SGEMM(T, N, CudaNdarray_HOST_DIMS(C)[0], CudaNdarray_HOST_DIMS(C)[1], CudaNdarray_HOST_DIMS(A)[1], alpha, a, sa_0, b, sb_1, beta, c, sc_1); break; | |
4293 case 0x111: CHK_STRIDE_SGEMM(N, N, CudaNdarray_HOST_DIMS(C)[0], CudaNdarray_HOST_DIMS(C)[1], CudaNdarray_HOST_DIMS(A)[1], alpha, a, sa_1, b, sb_1, beta, c, sc_1); break; | |
4294 default: PyErr_Format(PyExc_ValueError, "some matrix has no unit stride (unit=%x)", unit); | |
4295 return -1; | |
4296 }; | |
4297 CNDA_THREAD_SYNC; | |
4298 Py_XDECREF(A_new); | |
4299 Py_XDECREF(B_new); | |
4300 | |
4301 if (CUBLAS_STATUS_SUCCESS != err) | |
4302 { | |
4303 PyErr_Format(PyExc_RuntimeError, | |
4304 "cublasSgemm failed (%i) %s\n" | |
4305 " unit=%x N=%d, c.dims=[%d %d], a.dim=[%d %d], alpha=%f, beta=%f, a=%p, b=%p, c=%p" | |
4306 " sa_0=%d, sa_1=%d, sb_0=%d, sb_1=%d, sc_0=%d, sc_1=%d", | |
4307 err, cublasGetErrorString(err), | |
4308 unit, N, | |
4309 CudaNdarray_HOST_DIMS(C)[0], | |
4310 CudaNdarray_HOST_DIMS(C)[1], | |
4311 CudaNdarray_HOST_DIMS(A)[0], CudaNdarray_HOST_DIMS(A)[1], | |
4312 alpha, beta, a, b, c, sa_0, sa_1, sb_0, sb_1, sc_0, sc_1); | |
4313 | |
4314 return -1; | |
4315 } | |
4316 return 0; | |
4317 } | |
4318 | |
4319 int CudaNdarray_sgemv(float alpha, const CudaNdarray * A, const CudaNdarray * B, float beta, CudaNdarray * C) | |
4320 { | |
4321 /** | |
4322 * C <- alpha A B + beta C | |
4323 * A : matrix | |
4324 * B, C: vector | |
4325 * alpha, beta: scalars | |
4326 */ | |
4327 if (A->nd != 2) { PyErr_SetString(PyExc_ValueError, "non-matrix arg to gemv"); return -1; } | |
4328 if (B->nd != 1) { PyErr_SetString(PyExc_ValueError, "non-vector arg to gemv"); return -1; } | |
4329 if (C->nd != 1) { PyErr_SetString(PyExc_ValueError, "non-vector arg to gemv"); return -1; } | |
4330 | |
4331 // We must allow dimensions to be zeros. | |
4332 if ((CudaNdarray_HOST_DIMS(A)[1] != CudaNdarray_HOST_DIMS(B)[0]) | |
4333 || (CudaNdarray_HOST_DIMS(A)[0] != CudaNdarray_HOST_DIMS(C)[0])) | |
4334 { | |
4335 PyErr_Format(PyExc_ValueError, "dimension mismatch in args to gemv (%i,%i)x(%i)->(%i)", | |
4336 CudaNdarray_HOST_DIMS(A)[0], | |
4337 CudaNdarray_HOST_DIMS(A)[1], | |
4338 CudaNdarray_HOST_DIMS(B)[0], | |
4339 CudaNdarray_HOST_DIMS(C)[0]); | |
4340 return -1; | |
4341 } | |
4342 | |
4343 // If matrix A has non-unit size and non-unit stride in both | |
4344 // dimensions, or negative strides, we cannot operate, but we can | |
4345 // make a copy. | |
4346 CudaNdarray * A_new = NULL; | |
4347 CudaNdarray * B_new = NULL; | |
4348 if (((CudaNdarray_HOST_DIMS(A)[0] > 1) | |
4349 && (CudaNdarray_HOST_STRIDES(A)[0] != 1) | |
4350 && (CudaNdarray_HOST_DIMS(A)[1] > 1) | |
4351 && (CudaNdarray_HOST_STRIDES(A)[1] != 1)) | |
4352 || (CudaNdarray_HOST_STRIDES(A)[0] < 0) | |
4353 || (CudaNdarray_HOST_STRIDES(A)[1] < 0)) | |
4354 { | |
4355 A_new = (CudaNdarray*) CudaNdarray_Copy(A); | |
4356 if (!A_new) | |
4357 return -1; | |
4358 A = A_new; | |
4359 } | |
4360 | |
4361 // If vector B as a negative stride, we also have to make a copy. | |
4362 if (CudaNdarray_HOST_STRIDES(B)[0] < 0) | |
4363 { | |
4364 B_new = (CudaNdarray*) CudaNdarray_Copy(B); | |
4365 if (!B_new) | |
4366 { | |
4367 // If A was not copied, A_new is NULL, and Py_XDECREF does not | |
4368 // do anything | |
4369 Py_XDECREF(A_new); | |
4370 return -1; | |
4371 } | |
4372 B = B_new; | |
4373 } | |
4374 | |
4375 // cudablas does not handle negative strides as expected | |
4376 if ( (CudaNdarray_HOST_STRIDES(A)[0] < 0) | |
4377 || (CudaNdarray_HOST_STRIDES(A)[1] < 0)) | |
4378 { | |
4379 PyErr_Format(PyExc_ValueError, "illegal strides in args to gemv (%i,%i)", | |
4380 CudaNdarray_HOST_STRIDES(A)[0], | |
4381 CudaNdarray_HOST_STRIDES(A)[1]); | |
4382 Py_XDECREF(A_new); | |
4383 Py_XDECREF(B_new); | |
4384 return -1; | |
4385 } | |
4386 | |
4387 /* create appropriate strides for malformed matrices that are row or column | |
4388 * vectors | |
4389 */ | |
4390 int sa_0 = (CudaNdarray_HOST_DIMS(A)[0] > 1) ? CudaNdarray_HOST_STRIDES(A)[0] : CudaNdarray_HOST_DIMS(A)[1]; | |
4391 int sa_1 = (CudaNdarray_HOST_DIMS(A)[1] > 1) ? CudaNdarray_HOST_STRIDES(A)[1] : CudaNdarray_HOST_DIMS(A)[0]; | |
4392 int sb_0 = (CudaNdarray_HOST_DIMS(B)[0] > 1) ? CudaNdarray_HOST_STRIDES(B)[0] : 1; | |
4393 int sc_0 = (CudaNdarray_HOST_DIMS(C)[0] > 1) ? CudaNdarray_HOST_STRIDES(C)[0] : 1; | |
4394 | |
4395 if (sa_0 == 0) | |
4396 sa_0 = 1; | |
4397 if (sa_1 == 0) | |
4398 sa_1 = 1; | |
4399 | |
4400 // This is important because we can end up not calling Sgemv at all | |
4401 cublasStatus_t err = CUBLAS_STATUS_SUCCESS; | |
4402 if (CudaNdarray_SIZE(C)) { | |
4403 if ((CudaNdarray_HOST_DIMS(A)[0] <= 1) | |
4404 || ((CudaNdarray_HOST_STRIDES(A)[0] == 1) | |
4405 && (CudaNdarray_HOST_STRIDES(A)[1] > 0))) | |
4406 { | |
4407 err = cublasSgemv(handle, CUBLAS_OP_N, | |
4408 CudaNdarray_HOST_DIMS(A)[0], CudaNdarray_HOST_DIMS(A)[1], | |
4409 &alpha, | |
4410 CudaNdarray_DEV_DATA(A), sa_1, | |
4411 CudaNdarray_DEV_DATA(B), sb_0, | |
4412 &beta, | |
4413 CudaNdarray_DEV_DATA(C), sc_0); | |
4414 } | |
4415 else if ((CudaNdarray_HOST_DIMS(A)[1] <= 1) | |
4416 || ((CudaNdarray_HOST_STRIDES(A)[1] == 1) | |
4417 && (CudaNdarray_HOST_STRIDES(A)[0] > 0))) | |
4418 { | |
4419 err = cublasSgemv(handle, CUBLAS_OP_T, | |
4420 CudaNdarray_HOST_DIMS(A)[1], CudaNdarray_HOST_DIMS(A)[0], | |
4421 &alpha, | |
4422 CudaNdarray_DEV_DATA(A), sa_0, | |
4423 CudaNdarray_DEV_DATA(B), sb_0, | |
4424 &beta, | |
4425 CudaNdarray_DEV_DATA(C), sc_0); | |
4426 } | |
4427 else | |
4428 { | |
4429 PyErr_Format(PyExc_AssertionError, | |
4430 "Unexpected stride pattern in gemv: (%i, %i) x %i -> %i.\n" | |
4431 "Shapes are: (%i, %i) x %i -> %i\n", | |
4432 CudaNdarray_HOST_STRIDES(A)[0], | |
4433 CudaNdarray_HOST_STRIDES(A)[1], | |
4434 CudaNdarray_HOST_STRIDES(B)[0], | |
4435 CudaNdarray_HOST_STRIDES(C)[0], | |
4436 CudaNdarray_HOST_DIMS(A)[0], | |
4437 CudaNdarray_HOST_DIMS(A)[1], | |
4438 CudaNdarray_HOST_DIMS(B)[0], | |
4439 CudaNdarray_HOST_DIMS(C)[0]); | |
4440 Py_XDECREF(A_new); | |
4441 Py_XDECREF(B_new); | |
4442 return -1; | |
4443 } | |
4444 } | |
4445 | |
4446 CNDA_THREAD_SYNC; | |
4447 Py_XDECREF(A_new); | |
4448 Py_XDECREF(B_new); | |
4449 | |
4450 if (CUBLAS_STATUS_SUCCESS != err) | |
4451 { | |
4452 PyErr_Format(PyExc_RuntimeError, | |
4453 "cublasSgemv failed (%i)", | |
4454 err); | |
4455 return -1; | |
4456 } | |
4457 return 0; | |
4458 } | |
4459 | |
4460 int CudaNdarray_sger(float alpha, const CudaNdarray * x, const CudaNdarray * y, CudaNdarray * A) { | |
4461 if (x->nd != 1) { PyErr_SetString(PyExc_ValueError, "non-vector arg x to sger"); return -1; } | |
4462 if (y->nd != 1) { PyErr_SetString(PyExc_ValueError, "non-vector arg y to sger"); return -1; } | |
4463 if (A->nd != 2) { PyErr_SetString(PyExc_ValueError, "non-matrix arg A to sger"); return -1; } | |
4464 | |
4465 if ((CudaNdarray_HOST_DIMS(A)[0] != CudaNdarray_HOST_DIMS(x)[0]) | |
4466 || (CudaNdarray_HOST_DIMS(A)[1] != CudaNdarray_HOST_DIMS(y)[0])) { | |
4467 PyErr_Format(PyExc_ValueError, | |
4468 "dimension mismatch in args to sger (%i)x(%i)->(%i,%i)", | |
4469 CudaNdarray_HOST_DIMS(x)[0], | |
4470 CudaNdarray_HOST_DIMS(y)[0], | |
4471 CudaNdarray_HOST_DIMS(A)[0], | |
4472 CudaNdarray_HOST_DIMS(A)[1]); | |
4473 return -1; | |
4474 } | |
4475 | |
4476 int x_strides = CudaNdarray_HOST_STRIDES(x)[0]; | |
4477 CudaNdarray * x_new = NULL; | |
4478 if(x_strides == 0){ | |
4479 if(CudaNdarray_HOST_DIMS(x)[0] != 1){ | |
4480 PyErr_Format(PyExc_RuntimeError, | |
4481 "CudaNdarray_sger: Invalid input x (should not happen)." | |
4482 " We received a CudaNdarray vector with a stride of 0" | |
4483 " that has more than 1 element!"); | |
4484 return -1; | |
4485 } | |
4486 x_strides = 1; | |
4487 } else if(x_strides < 0){ | |
4488 x_new = (CudaNdarray*) CudaNdarray_Copy(x); | |
4489 x = x_new; | |
4490 x_strides = CudaNdarray_HOST_STRIDES(x)[0]; | |
4491 } | |
4492 | |
4493 int y_strides = CudaNdarray_HOST_STRIDES(y)[0]; | |
4494 CudaNdarray * y_new = NULL; | |
4495 if(y_strides == 0){ | |
4496 if(CudaNdarray_HOST_DIMS(y)[0] != 1){ | |
4497 PyErr_Format(PyExc_RuntimeError, | |
4498 "CudaNdarray_sger: Invalid input y (should not happen)." | |
4499 " We received a CudaNdarray vector with a stride of 0" | |
4500 " that has more than 1 elements!"); | |
4501 Py_XDECREF(x_new); | |
4502 return -1; | |
4503 } | |
4504 y_strides = 1; | |
4505 } else if(y_strides < 0){ | |
4506 y_new = (CudaNdarray*) CudaNdarray_Copy(y); | |
4507 y = y_new; | |
4508 y_strides = CudaNdarray_HOST_STRIDES(y)[0]; | |
4509 } | |
4510 | |
4511 // Create appropriate strides if A is a row or column vector | |
4512 int sa_0 = (CudaNdarray_HOST_DIMS(A)[0] > 1) ? CudaNdarray_HOST_STRIDES(A)[0] | |
4513 : CudaNdarray_HOST_DIMS(A)[1]; | |
4514 int sa_1 = (CudaNdarray_HOST_DIMS(A)[1] > 1) ? CudaNdarray_HOST_STRIDES(A)[1] | |
4515 : CudaNdarray_HOST_DIMS(A)[0]; | |
4516 | |
4517 // This is important because we can end up not calling Sger at all | |
4518 cublasStatus_t err = CUBLAS_STATUS_SUCCESS; | |
4519 if(CudaNdarray_SIZE(A)){ | |
4520 // If A is in col-major | |
4521 if ((CudaNdarray_HOST_DIMS(A)[0] <= 1) | |
4522 || ((CudaNdarray_HOST_STRIDES(A)[0] == 1) | |
4523 && (CudaNdarray_HOST_STRIDES(A)[1] > 0))) | |
4524 { | |
4525 err = cublasSger(handle, CudaNdarray_HOST_DIMS(x)[0], CudaNdarray_HOST_DIMS(y)[0], &alpha, | |
4526 CudaNdarray_DEV_DATA(x), x_strides, | |
4527 CudaNdarray_DEV_DATA(y), y_strides, | |
4528 CudaNdarray_DEV_DATA(A), sa_1); | |
4529 } | |
4530 // Since Sger expects A in col-major, we invert x and y to fake this. | |
4531 else if ((CudaNdarray_HOST_DIMS(A)[1] <= 1) | |
4532 || ((CudaNdarray_HOST_STRIDES(A)[1] == 1) | |
4533 && (CudaNdarray_HOST_STRIDES(A)[0] > 0))) | |
4534 { | |
4535 err = cublasSger(handle, CudaNdarray_HOST_DIMS(y)[0], CudaNdarray_HOST_DIMS(x)[0], &alpha, | |
4536 CudaNdarray_DEV_DATA(y), y_strides, | |
4537 CudaNdarray_DEV_DATA(x), x_strides, | |
4538 CudaNdarray_DEV_DATA(A), sa_0); | |
4539 } | |
4540 // A has to be either c- or f-contiguous, with no negative strides | |
4541 else | |
4542 { | |
4543 PyErr_SetString(PyExc_NotImplementedError, | |
4544 "non-contiguous A, or negative strides, in sger"); | |
4545 Py_XDECREF(x_new); | |
4546 Py_XDECREF(y_new); | |
4547 return -1; | |
4548 } | |
4549 } | |
4550 CNDA_THREAD_SYNC; | |
4551 Py_XDECREF(x_new); | |
4552 Py_XDECREF(y_new); | |
4553 | |
4554 if (CUBLAS_STATUS_SUCCESS != err) | |
4555 { | |
4556 PyErr_Format(PyExc_RuntimeError, | |
4557 "cublasSger failed (%i)", | |
4558 err); | |
4559 return -1; | |
4560 } | |
4561 | |
4562 return 0; | |
4563 } | |
4564 | |
4565 /** | |
4566 * | |
4567 * Precondition: | |
4568 * a->dim[d] == (dims_a[d]==0) ? (1 << log2_dims_a[d]) : dims_a[d] | |
4569 * z->dim[d] == (z_str[d]==0) ? 1 : dims_a[d]; | |
4570 * | |
4571 * TODO: templatize this function to support other reductions. | |
4572 * All that needs to change is the initial value for sum, and the reduction operator. | |
4573 */ | |
4574 | |
4575 static __global__ void kernel_reduce_sum(const unsigned int size_z, | |
4576 const unsigned int nd, | |
4577 const int * dims_a, | |
4578 const int * log2_dims_a, | |
4579 const int * a_str, | |
4580 const float * a_data, | |
4581 const int * z_str, | |
4582 float * z_data) | |
4583 { | |
4584 const unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x; | |
4585 const unsigned int numThreads = blockDim.x * gridDim.x; | |
4586 | |
4587 //structure data contains the strides and dimensions of both a and z | |
4588 // a_dim[0], a_dim[1], ... a_dim[nd-1], | |
4589 // a_log2dim[0], a_log2dim[1], ... a_log2dim[nd-1], | |
4590 // a_str[0], ... a_str[nd-1], | |
4591 // z_str[0], ... z_str[nd-1] | |
4592 extern __shared__ int structure_data[]; | |
4593 for (unsigned int i = threadIdx.x; i < nd; i += blockDim.x) | |
4594 { | |
4595 structure_data[i+0*nd] = dims_a[i]; | |
4596 structure_data[i+1*nd] = log2_dims_a[i]; | |
4597 structure_data[i+2*nd] = a_str[i]; | |
4598 structure_data[i+3*nd] = z_str[i]; | |
4599 } | |
4600 dims_a = structure_data; | |
4601 log2_dims_a = structure_data + nd; | |
4602 a_str = structure_data + 2*nd; | |
4603 z_str = structure_data + 3*nd; | |
4604 | |
4605 __syncthreads(); //wait for all the shared structure to be loaded | |
4606 | |
4607 for (unsigned int i = idx; i < size_z; i += numThreads) | |
4608 { | |
4609 unsigned int ii = i; | |
4610 const float * a_data_i = a_data; | |
4611 float * z_data_i = z_data; | |
4612 unsigned int n_reduce_elements = 1; | |
4613 unsigned int n_reduce_dims = 0; | |
4614 unsigned int reduce_dim0 = nd-1; | |
4615 | |
4616 | |
4617 //In this loop, we locate the initial element of the slice that we'd like to reduce with this thread | |
4618 // At the same time, we [re]calculate the size of that slice (n_reduce_elements) | |
4619 for (unsigned int d = 0; d < nd; ++d) | |
4620 { | |
4621 if (a_str[d] && (!z_str[d])) // this means 'd' is a dimension we are reducing over | |
4622 { | |
4623 n_reduce_elements *= dims_a[d]; | |
4624 n_reduce_dims += 1; | |
4625 reduce_dim0 = (d < reduce_dim0) ? d : reduce_dim0; | |
4626 } | |
4627 else //'d' is not a dimension that we are reducing over | |
4628 { | |
4629 unsigned int pos_d; | |
4630 if (log2_dims_a[d]==-1) //TODO: when things are working, use this switch | |
4631 { | |
4632 // this branch is not preferred, | |
4633 // because the manual said that integer mod and div operations are slow on gpu | |
4634 pos_d = (ii % dims_a[d]); | |
4635 ii = (ii / dims_a[d]); | |
4636 } | |
4637 else | |
4638 { | |
4639 pos_d = (ii & ((1 << log2_dims_a[d])-1)); //take the lower log2_dims bits | |
4640 ii = (ii >> log2_dims_a[d]); //shift those lower log2_dims bits off of ii | |
4641 } | |
4642 a_data_i += pos_d * a_str[d]; | |
4643 z_data_i += pos_d * z_str[d]; | |
4644 } | |
4645 } | |
4646 // now we've got pointers a_data_i and z_data_i into element 0 of the slice over which we are reducing | |
4647 // do a similar loop | |
4648 | |
4649 float sum = 0.0f; | |
4650 switch(n_reduce_dims) | |
4651 { | |
4652 case 0: | |
4653 { | |
4654 sum = a_data_i[0]; | |
4655 } | |
4656 break; | |
4657 case 1: | |
4658 { | |
4659 const int stride = a_str[reduce_dim0]; | |
4660 const float * a_data_i_max = a_data_i + dims_a[reduce_dim0] * stride; | |
4661 while (a_data_i != a_data_i_max) | |
4662 { | |
4663 sum += a_data_i[0]; | |
4664 a_data_i += stride; | |
4665 } | |
4666 } | |
4667 break; | |
4668 case 2: | |
4669 { | |
4670 int rd = reduce_dim0+1; | |
4671 for (; rd < nd; ++rd) | |
4672 { | |
4673 if (a_str[rd] && (!z_str[rd])) // this means 'rd' is a dimension we are reducing over | |
4674 break; | |
4675 } | |
4676 const int stride0 = a_str[reduce_dim0]; | |
4677 const int stride1 = a_str[rd]; | |
4678 for (int ii = 0; ii < dims_a[rd]; ++ii) | |
4679 { | |
4680 const float * a_data_ri = a_data_i + ii * stride1; | |
4681 const float * a_data_ri_max = a_data_ri + dims_a[reduce_dim0] * stride0; | |
4682 while (a_data_ri != a_data_ri_max) | |
4683 { | |
4684 sum += a_data_ri[0]; | |
4685 a_data_ri += stride0; | |
4686 } | |
4687 } | |
4688 }; | |
4689 break; | |
4690 default: | |
4691 { | |
4692 for (unsigned int reduce_i = 0; reduce_i < n_reduce_elements; ++reduce_i) | |
4693 { | |
4694 //TODO: optimize this loop to work more like theano's Elemwise. It's serial code. | |
4695 unsigned int reduce_ii = reduce_i; | |
4696 const float * a_data_ri = a_data_i; | |
4697 | |
4698 //This loop finds the element in the a slice to add. | |
4699 for (unsigned int rd = reduce_dim0; rd < nd; ++rd) | |
4700 { | |
4701 unsigned int pos_d; | |
4702 if (a_str[rd] && (!z_str[rd])) // this means 'd' is a dimension we are reducing over | |
4703 { | |
4704 if (log2_dims_a[rd]==-1) | |
4705 { | |
4706 // this branch is not preferred, | |
4707 // because the manual said that integer mod and div operations are slow on gpu | |
4708 pos_d = (reduce_ii % dims_a[rd]); | |
4709 reduce_ii = (reduce_ii / dims_a[rd]); | |
4710 } | |
4711 else | |
4712 { | |
4713 pos_d = (reduce_ii & ((1 << log2_dims_a[rd])-1)); //take the lower log2_dims bits | |
4714 reduce_ii = (reduce_ii >> log2_dims_a[rd]); //shift those lower log2_dims bits off of ii | |
4715 } | |
4716 a_data_ri += pos_d * a_str[rd]; | |
4717 } | |
4718 } | |
4719 sum += a_data_ri[0]; | |
4720 } | |
4721 } | |
4722 } | |
4723 z_data_i[0] = sum; | |
4724 } | |
4725 } | |
4726 | |
4727 static __global__ void kernel_reduce_sum_1011( | |
4728 const unsigned int d0, | |
4729 const unsigned int d1, | |
4730 const unsigned int d2, | |
4731 const unsigned int d3, | |
4732 const float *A, const int sA0, const int sA1, const int sA2, const int sA3, | |
4733 float * Z, const int sZ0) | |
4734 { | |
4735 const int threadCount = blockDim.x * blockDim.y * blockDim.z; | |
4736 const int threadNum = threadIdx.z * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x; | |
4737 extern __shared__ float buf[]; | |
4738 float mysum = 0.0f; | |
4739 | |
4740 if (warpSize != 32) | |
4741 { | |
4742 return; //TODO: set error code | |
4743 } | |
4744 | |
4745 for (int i0 = threadIdx.z; i0 < d0; i0 += blockDim.z) | |
4746 { | |
4747 float Ai = A[i0 * sA0 + blockIdx.x * sA1 + threadIdx.y * sA2 + threadIdx.x * sA3]; | |
4748 mysum += Ai; | |
4749 } | |
4750 buf[threadNum] = mysum; | |
4751 __syncthreads(); | |
4752 | |
4753 // rest of function is handled by one warp | |
4754 if (threadNum < warpSize) | |
4755 { | |
4756 for (int i = threadNum + warpSize; i < threadCount; i += warpSize) | |
4757 { | |
4758 mysum += buf[i]; | |
4759 } | |
4760 buf[threadNum] = mysum; | |
4761 if (threadNum < 16) | |
4762 { | |
4763 //reduce so that threadNum 0 has the sum of everything | |
4764 if(threadNum + 16 < threadCount) buf[threadNum] += buf[threadNum+16]; | |
4765 if(threadNum + 8 < threadCount) buf[threadNum] += buf[threadNum+8]; | |
4766 if(threadNum + 4 < threadCount) buf[threadNum] += buf[threadNum+4]; | |
4767 if(threadNum + 2 < threadCount) buf[threadNum] += buf[threadNum+2]; | |
4768 if(threadNum + 1 < threadCount) buf[threadNum] += buf[threadNum+1]; | |
4769 if (threadNum == 0) | |
4770 { | |
4771 Z[blockIdx.x*sZ0] = buf[0]; | |
4772 } | |
4773 } | |
4774 } | |
4775 } | |
4776 /** | |
4777 * Dimensions in which the self has size 1 and A has size > 1 are considered summing dimensions | |
4778 * Dimensions in which self has size > 1 and A has size > 1 are considered non-summing dimensions, and in this case their sizes must be equal. | |
4779 */ | |
4780 int | |
4781 CudaNdarray_reduce_sum(CudaNdarray * self, CudaNdarray * A) | |
4782 { | |
4783 int verbose = 0; | |
4784 //check input rank | |
4785 if (self->nd != A->nd) | |
4786 { | |
4787 PyErr_Format(PyExc_TypeError, "Rank mismatch in CudaNdarray_sum: %i vs %i", self->nd, A->nd); | |
4788 return -1; | |
4789 } | |
4790 for (int i = 0; i < self->nd; ++i) | |
4791 { | |
4792 if ((CudaNdarray_HOST_DIMS(self)[i] > 1) && (CudaNdarray_HOST_DIMS(self)[i] != CudaNdarray_HOST_DIMS(A)[i])) | |
4793 { | |
4794 PyErr_Format(PyExc_TypeError, "Dimension mismatch in CudaNdarray_sum: self->dim[%i] == %i , A->dim[%i] = %i", | |
4795 i, CudaNdarray_HOST_DIMS(self)[i], i, CudaNdarray_HOST_DIMS(A)[i]); | |
4796 return -1; | |
4797 } | |
4798 } | |
4799 | |
4800 int n_summations = (unsigned int)CudaNdarray_SIZE(self); | |
4801 if (verbose) | |
4802 { | |
4803 std::cerr << "reduce_sum n_summations " << n_summations << '\n'; | |
4804 std::cerr << "reduce_sum nd " << self->nd << '\n'; | |
4805 fprint_CudaNdarray(stderr, A); | |
4806 fprint_CudaNdarray(stderr, self); | |
4807 } | |
4808 if (0 && (A->nd == 4) //check to see if kernel_reduce_sum_1011 applies | |
4809 && (CudaNdarray_HOST_DIMS(self)[0] == 1) | |
4810 && (CudaNdarray_HOST_DIMS(self)[2] == 1) | |
4811 && (CudaNdarray_HOST_DIMS(self)[3] == 1) | |
4812 ) | |
4813 { | |
4814 dim3 n_threads(CudaNdarray_HOST_DIMS(A)[3], CudaNdarray_HOST_DIMS(A)[2]); | |
4815 dim3 n_blocks(CudaNdarray_HOST_DIMS(A)[1]); | |
4816 while (n_threads.x * n_threads.y * n_threads.z < NUM_VECTOR_OP_THREADS_PER_BLOCK) ++n_threads.z; | |
4817 n_threads.z -= 1; | |
4818 if (n_threads.z > 64) n_threads.z = 64; | |
4819 if (n_threads.z) | |
4820 { | |
4821 if (verbose) printf("trying kernel_reduce_sum_1011\n"); | |
4822 int n_shared = sizeof(float) * n_threads.x * n_threads.y * n_threads.z; | |
4823 kernel_reduce_sum_1011<<<n_blocks, n_threads, n_shared>>>( | |
4824 CudaNdarray_HOST_DIMS(A)[0], | |
4825 CudaNdarray_HOST_DIMS(A)[1], | |
4826 CudaNdarray_HOST_DIMS(A)[2], | |
4827 CudaNdarray_HOST_DIMS(A)[3], | |
4828 CudaNdarray_DEV_DATA(A), | |
4829 CudaNdarray_HOST_STRIDES(A)[0], | |
4830 CudaNdarray_HOST_STRIDES(A)[1], | |
4831 CudaNdarray_HOST_STRIDES(A)[2], | |
4832 CudaNdarray_HOST_STRIDES(A)[3], | |
4833 CudaNdarray_DEV_DATA(self), | |
4834 CudaNdarray_HOST_STRIDES(self)[1]); | |
4835 CNDA_THREAD_SYNC; | |
4836 if (cudaSuccess == cudaGetLastError()) return 0; | |
4837 if (verbose) printf("failed, falling back to kernel_reduce_sum\n"); | |
4838 } | |
4839 } | |
4840 | |
4841 int n_threads_per_block = std::min(n_summations, | |
4842 NUM_VECTOR_OP_THREADS_PER_BLOCK); | |
4843 int n_blocks = std::min(ceil_intdiv(n_summations,n_threads_per_block), | |
4844 NUM_VECTOR_OP_BLOCKS); | |
4845 int n_structure_cache = self->nd * 4 * sizeof(int); | |
4846 | |
4847 if (verbose) | |
4848 { | |
4849 std::cerr << "n_blocks, n_threads_per_block " << n_blocks << ' ' << n_threads_per_block << '\n'; | |
4850 } | |
4851 assert (self->nd > 0); | |
4852 assert (self->nd == A->nd); | |
4853 kernel_reduce_sum<<<n_blocks, n_threads_per_block, n_structure_cache>>>( | |
4854 n_summations, | |
4855 self->nd, | |
4856 CudaNdarray_DEV_DIMS(A), | |
4857 CudaNdarray_DEV_LOG2DIMS(A), | |
4858 CudaNdarray_DEV_STRIDES(A), | |
4859 CudaNdarray_DEV_DATA(A), | |
4860 CudaNdarray_DEV_STRIDES(self), | |
4861 CudaNdarray_DEV_DATA(self)); | |
4862 CNDA_THREAD_SYNC; | |
4863 cudaError_t err = cudaGetLastError(); | |
4864 if (cudaSuccess != err) | |
4865 { | |
4866 PyErr_Format(PyExc_RuntimeError, "Cuda error: %s: %s.\n", "kernel_reduce_sum", cudaGetErrorString(err)); | |
4867 return -1; | |
4868 } | |
4869 return 0; | |
4870 } | |
4871 int | |
4872 CudaNdarray_reduce_prod(CudaNdarray * self, const CudaNdarray * A) | |
4873 { | |
4874 PyErr_SetString(PyExc_NotImplementedError, ""); | |
4875 return -1; | |
4876 } | |
4877 int | |
4878 CudaNdarray_reduce_min(CudaNdarray * self, const CudaNdarray * A) | |
4879 { | |
4880 PyErr_SetString(PyExc_NotImplementedError, ""); | |
4881 return -1; | |
4882 } | |
4883 int | |
4884 CudaNdarray_reduce_max(CudaNdarray * self, const CudaNdarray * A) | |
4885 { | |
4886 PyErr_SetString(PyExc_NotImplementedError, ""); | |
4887 return -1; | |
4888 } | |
4889 | |
4890 | |
4891 /** | |
4892 * | |
4893 * pattern is a permutation of [0, 1, ... self->nd-1] with the following twists: | |
4894 * - an element 'd' of the permutation can be dropped if CudaNdarray_HOST_DIMS(self)[d] == 1 | |
4895 * - any number of '-1' elements can be in the pattern, and they will cause new ranks (with dim==1) to be inserted. | |
4896 * | |
4897 * For example, if CudaNdarray_HOST_DIMS(self) == [4, 5, 1, 6], and pattern = [0,3,-1,-1, 1], then CudaNdarray_HOST_DIMS(self) would be modified to become: | |
4898 * [4, 6, 1, 1, 5] (we dropped the original dim[2]==1, and inserted two singleton dimensions with the -1s. | |
4899 */ | |
4900 int | |
4901 CudaNdarray_dimshuffle(CudaNdarray * self, unsigned int len, const int * pattern) | |
4902 { | |
4903 //TODO: pass a workspace pointer to avoid the internal malloc | |
4904 int * newdims = (int *)malloc(sizeof(int) * (len + len + self->nd)); //we tack on the taken buffer here for speed of not having to malloc twice. | |
4905 int * newstrides = newdims + len; | |
4906 int * dims_taken = newstrides + len; | |
4907 if (!newdims) | |
4908 { | |
4909 PyErr_SetString(PyExc_MemoryError, "CudaNdarray_dimshuffle: Failed to allocate temporary space"); | |
4910 return -1; | |
4911 } | |
4912 for (int i = 0; i < self->nd; ++i) | |
4913 { | |
4914 dims_taken[i] = 0; | |
4915 } | |
4916 for (int i = 0; i < len; ++i) | |
4917 { | |
4918 if (pattern[i] < 0) | |
4919 { | |
4920 newdims[i] = 1; | |
4921 newstrides[i] = 0; | |
4922 } | |
4923 else if(dims_taken[pattern[i]]) | |
4924 { | |
4925 PyErr_Format(PyExc_ValueError, "Cudandarray_dimshuffle: invalid pattern for Cudandarray_dimshuffle. You used the dimensions %d multiple time", | |
4926 pattern[i]); | |
4927 free(newdims); | |
4928 return -1; | |
4929 } | |
4930 else if (pattern[i]>= self->nd) | |
4931 { | |
4932 PyErr_Format(PyExc_ValueError, "Cudandarray_dimshuffle: invalid pattern for Cudandarray_dimshuffle. You asked for a dimensions that don't exist %d for a %d dims CudaNdarray", | |
4933 pattern[i], self->nd); | |
4934 free(newdims); | |
4935 return -1; | |
4936 } | |
4937 else | |
4938 { | |
4939 newdims[i] = CudaNdarray_HOST_DIMS(self)[pattern[i]]; | |
4940 newstrides[i] = CudaNdarray_HOST_STRIDES(self)[pattern[i]]; | |
4941 dims_taken[pattern[i]] = 1; | |
4942 } | |
4943 } | |
4944 //Check if we dropped not broadcastable dims | |
4945 for (int i = 0; i < self->nd; ++i) | |
4946 { | |
4947 if (dims_taken[i]==0 && CudaNdarray_HOST_DIMS(self)[i]!=1) | |
4948 { | |
4949 PyErr_SetString(PyExc_ValueError, "Cudandarray_dimshuffle: You cannot drop a non-broadcastable dimension."); | |
4950 free(newdims); | |
4951 return -1; | |
4952 } | |
4953 } | |
4954 //swap this structure in for the one in self, and sync to the card | |
4955 if (CudaNdarray_set_nd(self, len)) | |
4956 { | |
4957 free(newdims); | |
4958 return -1; | |
4959 } | |
4960 for (int i = 0; i < len; ++i) | |
4961 { | |
4962 CudaNdarray_set_dim(self, i, newdims[i]); | |
4963 CudaNdarray_set_stride(self, i, newstrides[i]); | |
4964 } | |
4965 if (cnda_copy_structure_to_device(self)) | |
4966 { | |
4967 free(newdims); | |
4968 return -1; | |
4969 } | |
4970 free(newdims); | |
4971 return 0; | |
4972 } | |
4973 | |
4974 | |
4975 | |
4976 /** | |
4977 * | |
4978 * This is the function that bind to python. | |
4979 * See CudaNdarray_dimshuffle to call from C. | |
4980 * We use -1 to mean 'x' as in Tensor Dimshuffle. | |
4981 */ | |
4982 PyObject * | |
4983 CudaNdarray_Dimshuffle(PyObject* _unused, PyObject* args) | |
4984 { | |
4985 PyObject * self = NULL; | |
4986 PyObject * pattern_object = NULL; | |
4987 int * pattern = NULL; | |
4988 PyObject * rval = NULL; | |
4989 int success = -1; | |
4990 //const int * dims = NULL; | |
4991 | |
4992 //args should consist of two python objects ("OO") | |
4993 if (! PyArg_ParseTuple(args, "OO", &self, &pattern_object)) | |
4994 return NULL; | |
4995 | |
4996 if (!CudaNdarray_Check(self) ) | |
4997 { | |
4998 PyErr_SetString(PyExc_TypeError, "First argument to cuda_ndarray.dimshuffle must be a CudaNdarray"); | |
4999 return NULL; | |
5000 } | |
5001 | |
5002 //parse pattern_object into int * pattern | |
5003 | |
5004 Py_ssize_t pattern_dim = PyObject_Length(pattern_object); | |
5005 | |
5006 if (pattern_dim < 0) | |
5007 { | |
5008 PyErr_SetString(PyExc_TypeError, "Couldn't get length of third argument to cuda_ndarray.dimshuffle"); | |
5009 return NULL; | |
5010 } | |
5011 | |
5012 pattern = (int *) malloc( pattern_dim * sizeof(int)); | |
5013 | |
5014 for (Py_ssize_t i = 0; i < pattern_dim; i++) | |
5015 { | |
5016 PyObject * idx = PyLong_FromLong(i); | |
5017 | |
5018 if (idx == NULL) | |
5019 { | |
5020 PyErr_SetString(PyExc_Exception, "Couldn't make long object to loop over list/tuple"); | |
5021 goto CudaNdarray_dimshuffle_fail; | |
5022 } | |
5023 | |
5024 long elem_value = 0; | |
5025 | |
5026 PyObject * elem = PyObject_GetItem(pattern_object, idx); | |
5027 | |
5028 if (elem == NULL) | |
5029 { | |
5030 Py_XDECREF( elem); | |
5031 PyErr_SetString(PyExc_ValueError, "Third argument to dimshuffle must be list or tuple of integers"); | |
5032 goto CudaNdarray_dimshuffle_fail; | |
5033 } | |
5034 | |
5035 elem_value = PyInt_AsLong(elem); | |
5036 | |
5037 if (elem_value == -1 && PyErr_Occurred() ) | |
5038 { | |
5039 Py_XDECREF(elem); | |
5040 PyErr_SetString(PyExc_ValueError, "Third argument to dimshuffle must be list or tuple of integers"); | |
5041 goto CudaNdarray_dimshuffle_fail; | |
5042 } | |
5043 | |
5044 pattern[i] = elem_value; | |
5045 | |
5046 Py_XDECREF( elem ); | |
5047 Py_XDECREF( idx ); | |
5048 } | |
5049 | |
5050 //allocate rval | |
5051 rval = (PyObject *) CudaNdarray_View((CudaNdarray *) self); | |
5052 | |
5053 if (rval == NULL) | |
5054 { | |
5055 //CudaNdarray_New should have set the exception string | |
5056 goto CudaNdarray_dimshuffle_fail; | |
5057 } | |
5058 | |
5059 | |
5060 //printf("pattern_dim: %d\n",pattern_dim); | |
5061 //printf("pattern: %d %d\n",pattern[0],pattern[1]); | |
5062 //dims = CudaNdarray_HOST_DIMS( (CudaNdarray *) self); | |
5063 //printf("dims before: %d %d\n",dims[0],dims[1]); | |
5064 | |
5065 success = CudaNdarray_dimshuffle((CudaNdarray *) rval, pattern_dim, pattern); | |
5066 | |
5067 if (success != 0) | |
5068 { | |
5069 //Exception string should already be set by CudaNdarray_dimshuffle | |
5070 goto CudaNdarray_dimshuffle_fail; | |
5071 } | |
5072 | |
5073 free(pattern); | |
5074 | |
5075 return rval; | |
5076 | |
5077 CudaNdarray_dimshuffle_fail: | |
5078 | |
5079 if (pattern != NULL) | |
5080 free(pattern); | |
5081 | |
5082 Py_XDECREF(rval); | |
5083 return NULL; | |
5084 } | |
5085 | |
5086 | |
5087 int | |
5088 cnda_structure_size(int nd) | |
5089 { | |
5090 // dim0, dim1, ... | |
5091 // str0, str1, ... | |
5092 // log2(dim0), log2(dim1), ... | |
5093 return nd + nd + nd; | |
5094 } | |
5095 | |
5096 const int * | |
5097 CudaNdarray_HOST_DIMS(const CudaNdarray * self) | |
5098 { | |
5099 return self->host_structure; | |
5100 } | |
5101 | |
5102 const int * | |
5103 CudaNdarray_HOST_STRIDES(const CudaNdarray * self) | |
5104 { | |
5105 return self->host_structure + self->nd; | |
5106 } | |
5107 const int * | |
5108 CudaNdarray_HOST_LOG2DIMS(const CudaNdarray * self) | |
5109 { | |
5110 return self->host_structure + 2*self->nd; | |
5111 } | |
5112 | |
5113 int | |
5114 CudaNdarray_EqualAndIgnore(CudaNdarray *cnda1, CudaNdarray *cnda2, int ignoreSync, int ignoreBase) | |
5115 { | |
5116 int verbose = 0; | |
5117 | |
5118 if (!ignoreSync && cnda1->dev_structure_fresh != cnda2->dev_structure_fresh) | |
5119 { | |
5120 if(verbose) fprintf(stdout, "CUDANDARRAY_EQUAL FAILED : 1\n"); | |
5121 return 0; | |
5122 } | |
5123 | |
5124 if (cnda1->nd != cnda2->nd) | |
5125 { | |
5126 if(verbose) fprintf(stdout, "CUDANDARRAY_EQUAL FAILED : 2\n"); | |
5127 return 0; | |
5128 } | |
5129 | |
5130 for (int i=0; i < 2*cnda1->nd; i++) | |
5131 { | |
5132 if (cnda1->host_structure[i] != cnda2->host_structure[i]) | |
5133 { | |
5134 if(verbose) | |
5135 fprintf(stdout, "CUDANDARRAY_EQUAL : host_structure : %d, %d, %d\n", i, cnda1->host_structure[i], cnda2->host_structure[i]); | |
5136 return 0; | |
5137 } | |
5138 } | |
5139 | |
5140 if (!ignoreBase && cnda1->base != cnda2->base) | |
5141 { | |
5142 if(verbose) fprintf(stdout, "CUDANDARRAY_EQUAL FAILED : 4"); | |
5143 return 0; | |
5144 } | |
5145 else if (cnda1->data_allocated != cnda2->data_allocated) | |
5146 { | |
5147 if(verbose) fprintf(stdout, "CUDANDARRAY_EQUAL FAILED : 5"); | |
5148 return 0; | |
5149 } | |
5150 else if (cnda1->data_allocated && cnda1->devdata != cnda2->devdata) | |
5151 { | |
5152 if(verbose) fprintf(stdout, "CUDANDARRAY_EQUAL FAILED : 6"); | |
5153 // no need to check devdata if data is not allocated | |
5154 return 0; | |
5155 } | |
5156 | |
5157 return 1; | |
5158 } | |
5159 | |
5160 | |
5161 int | |
5162 CudaNdarray_Equal(CudaNdarray *cnda1, CudaNdarray *cnda2) | |
5163 { | |
5164 return CudaNdarray_EqualAndIgnore(cnda1, cnda2, 0, 0); | |
5165 } | |
5166 | |
5167 int | |
5168 cnda_copy_structure_to_device(const CudaNdarray * self) | |
5169 { | |
5170 //If the device structure do not exists, create it. | |
5171 //We allocate it here as we do not need it often. | |
5172 //In fact, we need it so infrequently that we expect | |
5173 //that most object won't need it. Not allocating it | |
5174 //save a significant when creating object. | |
5175 //This speed up a benchmark by 8% with the gc. | |
5176 if (!self->dev_structure) | |
5177 { | |
5178 int struct_size = cnda_structure_size(self->nd); | |
5179 if (struct_size) | |
5180 { | |
5181 self->dev_structure = (int*)device_malloc(struct_size* sizeof(int)); | |
5182 if (NULL == self->dev_structure) | |
5183 { | |
5184 return -1; | |
5185 } | |
5186 } | |
5187 } | |
5188 if (cublasSetVector(cnda_structure_size(self->nd), | |
5189 sizeof(int), | |
5190 self->host_structure, | |
5191 1, | |
5192 self->dev_structure, | |
5193 1) != CUBLAS_STATUS_SUCCESS) | |
5194 { | |
5195 PyErr_SetString(PyExc_RuntimeError, "error copying structure to device memory"); | |
5196 return -1; | |
5197 } | |
5198 self->dev_structure_fresh = 1; | |
5199 return 0; | |
5200 } | |
5201 | |
5202 const int * | |
5203 CudaNdarray_DEV_DIMS(const CudaNdarray * self) | |
5204 { | |
5205 if (!self->dev_structure_fresh) | |
5206 { | |
5207 if (cnda_copy_structure_to_device(self)) | |
5208 return NULL; | |
5209 } | |
5210 return self->dev_structure; | |
5211 } | |
5212 const int * | |
5213 CudaNdarray_DEV_STRIDES(const CudaNdarray * self) | |
5214 { | |
5215 if (!self->dev_structure_fresh) | |
5216 { | |
5217 if (cnda_copy_structure_to_device(self)) | |
5218 return NULL; | |
5219 } | |
5220 return self->dev_structure + self->nd; | |
5221 } | |
5222 const int * | |
5223 CudaNdarray_DEV_LOG2DIMS(const CudaNdarray * self) | |
5224 { | |
5225 if (!self->dev_structure_fresh) | |
5226 { | |
5227 if (cnda_copy_structure_to_device(self)) | |
5228 return NULL; | |
5229 } | |
5230 return self->dev_structure + 2*self->nd; | |
5231 } | |
5232 float * | |
5233 CudaNdarray_DEV_DATA(const CudaNdarray * self) | |
5234 { | |
5235 return self->devdata; | |
5236 } | |
5237 | |
5238 /** | |
5239 * Return the number of elements in the ndarray (product of the dimensions) | |
5240 */ | |
5241 size_t | |
5242 CudaNdarray_SIZE(const CudaNdarray *self) | |
5243 { | |
5244 if (self->nd == -1) return 0; | |
5245 size_t size = 1; | |
5246 for (int i = 0; i < self->nd; ++i) | |
5247 { | |
5248 size *= CudaNdarray_HOST_DIMS(self)[i]; | |
5249 } | |
5250 return size; | |
5251 } | |
5252 | |
5253 PyObject * | |
5254 CudaNdarray_SIZE_Object(const CudaNdarray *self, void *closure) | |
5255 { | |
5256 return PyInt_FromLong(CudaNdarray_SIZE(self)); | |
5257 } | |
5258 | |
5259 int CudaNdarray_set_device_data(CudaNdarray * self, float * data, const CudaNdarray * base) | |
5260 { | |
5261 return CudaNdarray_set_device_data(self, data, (PyObject *) base); | |
5262 } | |
5263 | |
5264 PyObject * CudaNdarray_IS_C_Contiguous(CudaNdarray * self) | |
5265 { | |
5266 return PyBool_FromLong(CudaNdarray_is_c_contiguous(self)); | |
5267 } | |
5268 | |
5269 int fprint_CudaNdarray(FILE * fd, const CudaNdarray *self) | |
5270 { | |
5271 cudaError_t err = cudaGetLastError(); | |
5272 if( cudaSuccess != err) | |
5273 { | |
5274 PyErr_Format(PyExc_RuntimeError, | |
5275 "Cuda error: %s: %s.", | |
5276 "fprint_CudaNdarray was called with an uncleared error", | |
5277 cudaGetErrorString(err)); | |
5278 return -1; | |
5279 } | |
5280 fprintf(fd, "CudaNdarray <%p, %p> nd=%i dev_structure_fresh=%d data_allocated=%d\n", | |
5281 self, self->devdata, self->nd, self->dev_structure_fresh, self->data_allocated); | |
5282 fprintf(fd, "\tHOST_DIMS: "); | |
5283 for (int i = 0; i < self->nd; ++i) | |
5284 { | |
5285 fprintf(fd, "%i\t", CudaNdarray_HOST_DIMS(self)[i]); | |
5286 } | |
5287 fprintf(fd, "\n\tHOST_STRIDES: "); | |
5288 for (int i = 0; i < self->nd; ++i) | |
5289 { | |
5290 fprintf(fd, "%i\t", CudaNdarray_HOST_STRIDES(self)[i]); | |
5291 } | |
5292 | |
5293 if (self->dev_structure) | |
5294 { | |
5295 int data=0; | |
5296 fprintf(fd, "\n\tDEV_DIMS: "); | |
5nvcc warning : The 'compute_20', 'sm_20', and 'sm_21' architectures are deprecated, and may be removed in a future release (Use -Wno-deprecated-gpu-targets to suppress warning). | |
mod.cu | |
C:\toolkits\anaconda2-4.2.0\lib\site-packages\theano-0.8.2-py2.7.egg\theano\sandbox\cuda\cnmem.cpp(668): warning C4477: 'fprintf' : format string '%lu' requires an argument of type 'unsigned long', but variadic argument 2 has type 'std::size_t' | |
C:\toolkits\anaconda2-4.2.0\lib\site-packages\theano-0.8.2-py2.7.egg\theano\sandbox\cuda\cnmem.cpp(668): note: consider using '%zu' in the format string | |
C:\toolkits\anaconda2-4.2.0\lib\site-packages\theano-0.8.2-py2.7.egg\theano\sandbox\cuda\cnmem.cpp(670): warning C4477: 'fprintf' : format string '%016lx' requires an argument of type 'unsigned long', but variadic argument 1 has type 'std::size_t' | |
C:\toolkits\anaconda2-4.2.0\lib\site-packages\theano-0.8.2-py2.7.egg\theano\sandbox\cuda\cnmem.cpp(670): note: consider using '%zx' in the format string | |
C:\toolkits\anaconda2-4.2.0\lib\site-packages\theano-0.8.2-py2.7.egg\theano\sandbox\cuda\cnmem.cpp(670): warning C4477: 'fprintf' : format string '%016lx' requires an argument of type 'unsigned long', but variadic argument 2 has type 'std::size_t' | |
C:\toolkits\anaconda2-4.2.0\lib\site-packages\theano-0.8.2-py2.7.egg\theano\sandbox\cuda\cnmem.cpp(670): note: consider using '%zx' in the format string | |
C:\toolkits\anaconda2-4.2.0\lib\site-packages\theano-0.8.2-py2.7.egg\theano\sandbox\cuda\cnmem.cpp(670): warning C4477: 'fprintf' : format string '%lu' requires an argument of type 'unsigned long', but variadic argument 3 has type 'std::size_t' | |
C:\toolkits\anaconda2-4.2.0\lib\site-packages\theano-0.8.2-py2.7.egg\theano\sandbox\cuda\cnmem.cpp(670): note: consider using '%zu' in the format string | |
C:\toolkits\anaconda2-4.2.0\lib\site-packages\theano-0.8.2-py2.7.egg\theano\sandbox\cuda\cnmem.cpp(670): warning C4477: 'fprintf' : format string '%016lx' requires an argument of type 'unsigned long', but variadic argument 4 has type 'std::size_t' | |
C:\toolkits\anaconda2-4.2.0\lib\site-packages\theano-0.8.2-py2.7.egg\theano\sandbox\cuda\cnmem.cpp(670): note: consider using '%zx' in the format string | |
C:\toolkits\anaconda2-4.2.0\lib\site-packages\theano-0.8.2-py2.7.egg\theano\sandbox\cuda\cnmem.cpp(670): warning C4477: 'fprintf' : format string '%2lu' requires an argument of type 'unsigned long', but variadic argument 5 has type 'std::size_t' | |
C:\toolkits\anaconda2-4.2.0\lib\site-packages\theano-0.8.2-py2.7.egg\theano\sandbox\cuda\cnmem.cpp(670): note: consider using '%zu' in the format string | |
C:\toolkits\anaconda2-4.2.0\lib\site-packages\theano-0.8.2-py2.7.egg\theano\sandbox\cuda\cnmem.cpp(690): warning C4477: 'fprintf' : format string '%016lx' requires an argument of type 'unsigned long', but variadic argument 3 has type 'std::size_t' | |
C:\toolkits\anaconda2-4.2.0\lib\site-packages\theano-0.8.2-py2.7.egg\theano\sandbox\cuda\cnmem.cpp(690): note: consider using '%zx' in the format string | |
C:\toolkits\anaconda2-4.2.0\lib\site-packages\theano-0.8.2-py2.7.egg\theano\sandbox\cuda\cnmem.cpp(690): warning C4477: 'fprintf' : format string '%lu' requires an argument of type 'unsigned long', but variadic argument 4 has type 'std::size_t' | |
C:\toolkits\anaconda2-4.2.0\lib\site-packages\theano-0.8.2-py2.7.egg\theano\sandbox\cuda\cnmem.cpp(690): note: consider using '%zu' in the format string | |
C:\toolkits\anaconda2-4.2.0\lib\site-packages\theano-0.8.2-py2.7.egg\theano\sandbox\cuda\cnmem.cpp(690): warning C4477: 'fprintf' : format string '%lu' requires an argument of type 'unsigned long', but variadic argument 5 has type 'std::size_t' | |
C:\toolkits\anaconda2-4.2.0\lib\site-packages\theano-0.8.2-py2.7.egg\theano\sandbox\cuda\cnmem.cpp(690): note: consider using '%zu' in the format string | |
mod.cu(803): warning C4311: 'type cast': pointer truncation from 'CudaNdarray *' to 'long' | |
mod.cu(3374): warning C4312: 'type cast': conversion from 'long' to 'float *' of greater size | |
LINK : fatal error LNK1104: cannot open file 'uuid.lib' | |
['nvcc', '-shared', '-O3', '-Xlinker', '/DEBUG', '-D HAVE_ROUND', '-m64', '-297 for (int i = 0; i < self->nd; ++i) | |
5298 { | |
5299 cublasGetVector(1, sizeof(int), | |
5300 self->dev_structure+i, 1, | |
5301 &data, 1); | |
5302 fprintf(fd, "%i\t", data); | |
5303 } | |
5304 fprintf(fd, "\n\tDEV_STRIDES: "); | |
5305 for (int i = 0; i < self->nd; ++i) | |
5306 { | |
5307 cublasGetVector(1, sizeof(int), | |
5308 self->dev_structure + self->nd+i, 1, | |
5309 &data, 1); | |
5310 fprintf(fd, "%i \t", data); | |
5311 } | |
5312 fprintf(fd, "\n"); | |
Xcompiler', '-DCUDA_NDARRAY_CUH=18715462c72ed6afcd7ca5d52813ce90,-DNPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION,/Zi,/MD', '-IC:\\toolkits\\anaconda2-4.2.05313 } | |
5314 else | |
5315 { | |
5316 fprintf(fd, "\n\tdev_structure not allocated\n"); | |
5317 } | |
5318 | |
5319 err = cudaGetLastError(); | |
5320 if( cudaSuccess != err) | |
5321 { | |
\\lib\\site-packages\\theano-05322 PyErr_Format(PyExc_RuntimeError, | |
5323 "Cuda error: %s: %s.", | |
5324 "fprint_CudaNdarray", | |
5325 cudaGetErrorString(err)); | |
5326 return -1; | |
5327 } | |
5328 return 0; | |
5329 } | |
5330 | |
5331 | |
5332 int CudaNdarray_prep_output(CudaNdarray ** arr, int nd, | |
5333 const int * dims, int fortran) | |
5334 { | |
5335 bool allocated = false; | |
5336 if (*arr == NULL) | |
5337 { | |
5338 // This allocates the metadata but not the data | |
5339 *arr = (CudaNdarray *) CudaNdarray_new_nd(nd); | |
5340 if (*arr == NULL) | |
5341 return -1; | |
5342 allocated = true; | |
5343 } | |
5344 | |
5345 if (CudaNdarray_alloc_contiguous(*arr, nd, dims, fortran)) | |
5346 { | |
5347 if (allocated) | |
5348 { | |
5349 Py_DECREF(*arr); | |
5350 *arr = NULL; | |
5351 } | |
5352 return -1; | |
5353 } | |
5354 return 0; | |
5355 } | |
5356 | |
5357 | |
5358 /* | |
5359 Local Variables: | |
5360 mode:c++ | |
5361 c-basic-offset:4 | |
5362 c-file-style:"stroustrup" | |
5363 indent-tabs-mode:nil | |
5364 fill-column:79 | |
5365 End: | |
5366 */ | |
5367 // vim: filetype=cpp:expandtab:shiftwidth=4:tabstop=8:softtabstop=4:textwidth=79 : | |
5368 | |
=============================== | |
C:\toolkits\anaconda2-4.2.0\lib\site-packages\theano-0.8.2-py2.7.egg\theano\sandbox\cuda\cnmem.cpp(888): warning: variable "prev" was set but never used | |
C:\toolkits\anaconda2-4.2.0\lib\site-packages\theano-0.8.2-py2.7.egg\theano\sandbox\cuda\cnmem.cpp(912): warning: variable "result" was set but never used | |
mod.cu(803): warning: conversion from pointer to smaller integer | |
mod.cu(941): warning: pointless comparison of unsigned integer with zero | |
C:\toolkits\anaconda2-4.2.0\lib\site-packages\theano-0.8.2-py2.7.egg\theano\sandbox\cuda\cnmem.cpp(888): warning: variable "prev" was set but never used | |
C:\toolkits\anaconda2-4.2.0\lib\site-packages\theano-0.8.2-py2.7.egg\theano\sandbox\cuda\cnmem.cpp(912): warning: variable "result" was set but never used | |
mod.cu(803): warning: conversion from pointer to smaller integer | |
mod.cu(941): warning: pointless comparison of unsigned integer with zero | |
.8.2-py2.7.egg\\theano\\sandbox\\cuda', '-IC:\\toolkits\\anaconda2-4.2.0\\lib\\site-packages\\numpy\\core\\include', '-IC:\\toolkits\\anaconda2-4.2.0\\include', '-IC:\\toolkits\\anaconda2-4.2.0\\lib\\site-packages\\theano-0.8.2-py2.7.egg\\theano\\gof', '-o', 'C:\\Users\\nobody\\AppData\\Local\\Theano\\compiledir_Windows-10-10.0.14393-Intel64_Family_6_Model_60_Stepping_3_GenuineIntel-2.7.13-64\\cuda_ndarray\\cuda_ndarray.pyd', 'mod.cu', '-LC:\\toolkits\\anaconda2-4.2.0\\libs', '-LC:\\toolkits\\anaconda2-4.2.0', '-lcublas', '-lpython27', '-lcudart'] | |
ERROR (theano.sandbox.cuda): Failed to compile cuda_ndarray.cu: ('nvcc return status', 2, 'for cmd', 'nvcc -shared -O3 -Xlinker /DEBUG -D HAVE_ROUND -m64 -Xcompiler -DCUDA_NDARRAY_CUH=18715462c72ed6afcd7ca5d52813ce90,-DNPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION,/Zi,/MD -IC:\\toolkits\\anaconda2-4.2.0\\lib\\site-packages\\theano-0.8.2-py2.7.egg\\theano\\sandbox\\cuda -IC:\\toolkits\\anaconda2-4.2.0\\lib\\site-packages\\numpy\\core\\include -IC:\\toolkits\\anaconda2-4.2.0\\include -IC:\\toolkits\\anaconda2-4.2.0\\lib\\site-packages\\theano-0.8.2-py2.7.egg\\theano\\gof -o C:\\Users\\nobody\\AppData\\Local\\Theano\\compiledir_Windows-10-10.0.14393-Intel64_Family_6_Model_60_Stepping_3_GenuineIntel-2.7.13-64\\cuda_ndarray\\cuda_ndarray.pyd mod.cu -LC:\\toolkits\\anaconda2-4.2.0\\libs -LC:\\toolkits\\anaconda2-4.2.0 -lcublas -lpython27 -lcudart') | |
WARNING (theano.sandbox.cuda): CUDA is installed, but device gpu is not available (error: cuda unavailable) | |
[Elemwise{exp,no_inplace}(<TensorType(float32, vector)>)] | |
Looping 1000 times took 0.016000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.016000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.032000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.047000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.047000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.063000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.079000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.079000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.094000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.110000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.110000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.125000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.141000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.141000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.157000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.172000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.172000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.188000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.204000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.204000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.219000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.235000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.235000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.250000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.266000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.266000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.282000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.297000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.297000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.313000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.329000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.329000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.344000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.360000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.360000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.375000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.391000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.407000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.407000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.422000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.438000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.438000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.454000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.469000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.469000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.485000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.500000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.500000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.516000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.532000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.532000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.547000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.563000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.563000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.579000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.594000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.594000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.610000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.625000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.625000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.641000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.657000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.657000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.672000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.672000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.688000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.704000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.704000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.719000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.735000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.735000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.750000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.766000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.782000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.782000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.797000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.813000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.829000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.829000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.844000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.860000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.860000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.875000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.891000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.891000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.907000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.922000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.922000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.938000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.954000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.954000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.969000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.985000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 0.985000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.000000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.016000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.032000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.032000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.047000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.063000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.063000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.079000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.079000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.094000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.110000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.110000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.125000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.141000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.141000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.157000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.172000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.172000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.188000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.204000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.204000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.219000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.219000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.235000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.250000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.250000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.266000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.282000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.282000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.297000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.313000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.313000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.329000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.344000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.344000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.360000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.375000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.375000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.391000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.391000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.407000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.422000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.422000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.438000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.454000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.454000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.469000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.485000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.485000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.500000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.518000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.529000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.536000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.536000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.552000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.568000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.568000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.583000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.599000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.599000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.614000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.614000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.630000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.646000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.646000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.661000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.677000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.677000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.693000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.708000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.708000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.724000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.739000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.739000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.755000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.771000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.771000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.786000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.802000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.802000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.818000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.833000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.833000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.849000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.864000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.864000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.880000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.880000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.896000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.911000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.911000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.927000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.943000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.943000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.958000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.974000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.974000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 1.989000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.005000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.005000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.021000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.036000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.036000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.052000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.052000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.068000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.083000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.099000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.099000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.114000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.114000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.130000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.146000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.146000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.161000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.177000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.177000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.193000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.208000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.208000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.224000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.239000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.239000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.255000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.255000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.271000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.286000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.286000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.302000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.318000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.318000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.333000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.349000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.349000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.364000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.380000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.380000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.404000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.413000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.413000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.429000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.444000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.444000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.460000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.475000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.491000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.507000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.507000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.522000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.538000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.554000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.554000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.569000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.585000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.585000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.600000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.616000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.632000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.632000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.647000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.663000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.663000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.679000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.694000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.694000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.710000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.725000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.725000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.741000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.757000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.772000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.772000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.788000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.804000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.804000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.819000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.835000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.835000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.850000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.866000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.866000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.882000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.897000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.897000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.913000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.913000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.929000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.944000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.944000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.960000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.975000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.975000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 2.991000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.007000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.007000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.022000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.038000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.038000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.054000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.069000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.069000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.085000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.085000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.100000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.116000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.116000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.132000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.147000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.147000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.163000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.179000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.179000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.194000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.210000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.210000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.225000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.225000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.241000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.257000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.257000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.277000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.277000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.293000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.308000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.308000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.324000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.340000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.340000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.355000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.373000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.383000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.391000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.391000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.407000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.422000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.422000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.438000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.454000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.454000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.475000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.486000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.496000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.507000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.507000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.523000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.538000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.538000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.554000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.554000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.569000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.585000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.585000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.601000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.616000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.616000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.632000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.648000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.648000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.663000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.679000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.679000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.694000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.694000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.710000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.726000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.741000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.741000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.757000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.757000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.773000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.788000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.788000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.804000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.819000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.819000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.835000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.851000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.851000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.866000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.882000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.882000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.898000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.913000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.913000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.929000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.944000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.960000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.976000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.976000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.991000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 3.991000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.007000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.023000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.023000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.038000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.054000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.054000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.069000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.085000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.085000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.101000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.116000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.116000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.132000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.148000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.148000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.163000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.179000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.179000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.194000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.210000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.210000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.226000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.241000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.241000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.257000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.273000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.273000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.288000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.304000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.304000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.319000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.335000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.335000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.351000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.366000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.366000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.382000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.398000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.398000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.413000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.413000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.429000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.444000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.444000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.460000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.476000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.476000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.507000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.523000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.538000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.554000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.569000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.569000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.585000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.601000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.601000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.616000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.632000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.632000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.648000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.663000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.663000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.679000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.694000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.694000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.710000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.710000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.726000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.741000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.741000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.757000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.773000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.773000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.788000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.804000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.804000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.819000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.835000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.835000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.851000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.866000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.866000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.882000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.898000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.913000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.913000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.929000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.944000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.944000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.960000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 4.976000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.007000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.038000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.054000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.069000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.085000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.085000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.101000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.116000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.116000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.132000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.148000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.148000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.163000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.179000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.179000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.194000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.210000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.210000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.226000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.241000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.241000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.257000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.273000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.273000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.288000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.304000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.319000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.319000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.335000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.351000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.351000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.366000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.382000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.382000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.398000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.413000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.413000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.429000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.444000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.444000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.460000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.476000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.476000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.491000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.507000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.507000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.523000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.538000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.538000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.554000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.569000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.569000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.585000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.601000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.616000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.616000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.632000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.648000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.648000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.663000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.679000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.679000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.694000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.710000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.710000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.726000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.741000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.757000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.757000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.773000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.773000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.788000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.804000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.804000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.819000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.835000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.835000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.851000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.866000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.866000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.882000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.898000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.898000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.913000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.913000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.929000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.944000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.944000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.960000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.976000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.976000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 5.991000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.007000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.007000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.023000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.038000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.054000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.054000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.069000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.085000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.085000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.101000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.116000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.116000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.132000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.148000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.148000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.163000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.163000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.179000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.194000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.194000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.210000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.226000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.226000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.241000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.257000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.257000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.273000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.288000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.288000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.304000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.319000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.319000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.335000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.335000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.351000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.366000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.366000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.382000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.398000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.398000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.413000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.429000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.429000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.444000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.460000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.460000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.476000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.491000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.491000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.507000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.523000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.523000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.538000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.554000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.554000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.569000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.585000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.585000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.601000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.616000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.616000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.632000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.648000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.648000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.663000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.663000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.679000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.694000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.694000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.710000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.726000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.726000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.741000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.741000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.757000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.773000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.773000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.788000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.804000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.804000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.819000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.835000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.851000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.866000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.882000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.898000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.898000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.913000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.929000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.944000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.944000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.960000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.976000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.991000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 6.991000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.007000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.023000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.023000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.038000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.054000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.054000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.069000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.085000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.085000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.101000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.116000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.132000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.132000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.148000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.163000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.163000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.179000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.194000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.194000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.210000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.226000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.226000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.241000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.257000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.257000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.273000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.290000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.293000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.309000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.309000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.324000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.340000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.340000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.356000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.371000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.371000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.387000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.402000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.402000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.418000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.434000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.434000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.449000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.465000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.465000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.481000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.496000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.496000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.512000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.527000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.527000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.543000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.559000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.559000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.574000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.574000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.590000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.606000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.606000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.621000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.637000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.637000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.652000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.668000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.668000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.684000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.699000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.699000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.715000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.731000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.731000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.746000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.746000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.762000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.777000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.777000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.793000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.809000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.809000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.824000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.840000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.840000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.856000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.856000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.871000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.887000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.887000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.902000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.918000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.918000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.934000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.949000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.949000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.965000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.981000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.981000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.996000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 7.996000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.012000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.027000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.027000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.043000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.059000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.059000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.074000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.090000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.090000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.106000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.121000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.121000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.137000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.137000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.152000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.168000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.168000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.184000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.199000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.199000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.215000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.231000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.231000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.246000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.246000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.263000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.279000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.279000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.294000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.310000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.310000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.326000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.326000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.341000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.357000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.357000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.373000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.388000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.388000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.404000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.419000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.419000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.435000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.435000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.451000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.466000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.466000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.482000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.498000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.498000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.520000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.520000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.535000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.535000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.551000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.566000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.566000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.582000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.598000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.598000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.613000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.629000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.629000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.645000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.660000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.660000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.676000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.691000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.691000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.707000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.723000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.723000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.738000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.754000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.754000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.770000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.785000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.785000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.801000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.816000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.816000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.832000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.848000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.848000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.863000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.879000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.879000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.895000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.910000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.910000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.926000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.941000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.941000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.957000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.957000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.973000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 8.988000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.004000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.004000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.020000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.035000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.035000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.051000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.066000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.066000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.082000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.098000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.098000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.113000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.129000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.129000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.145000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.160000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.160000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.176000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.191000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.191000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.207000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.224000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.224000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.240000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.240000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.255000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.271000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.271000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.287000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.302000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.302000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.318000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.333000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.333000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.349000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.349000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.365000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.380000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.380000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.396000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.412000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.412000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.427000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.443000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.443000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.458000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.474000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.474000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.490000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.505000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.505000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.521000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.537000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.537000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.552000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.568000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.583000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.599000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.599000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.615000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.630000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.630000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.646000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.662000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.662000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.677000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.693000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.693000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.708000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.708000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.724000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.740000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.740000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.755000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.771000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.771000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.787000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.802000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.802000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.818000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.834000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.844000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.845000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.860000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.860000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.876000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.892000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.892000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.907000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.923000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.923000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.939000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.954000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.954000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.970000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.970000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 9.985000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 10.001000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 10.001000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 10.017000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 10.032000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 10.032000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 10.048000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 10.064000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 10.064000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 10.079000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 10.095000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 10.095000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 10.110000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 10.126000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 10.126000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 10.142000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 10.142000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 10.157000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 10.173000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 10.173000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 10.189000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 10.204000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 10.204000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 10.220000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 10.235000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 10.235000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 10.251000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 10.267000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 10.267000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 10.286000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 10.297000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 10.307000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 10.312000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 10.327000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 10.327000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 10.343000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 10.343000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 10.358000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 10.374000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 10.374000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 10.390000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 10.405000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 10.405000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 10.421000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 10.437000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Looping 1000 times took 10.437000 seconds | |
Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 | |
1.62323284] | |
Used the cpu | |
Process finished with exit code 0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment