Created
February 16, 2017 12:19
-
-
Save oak-tree/ccec4bf5ec0931c29a11629e1a0f9d46 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| 1 #include <Python.h> | |
| 2 #include <iostream> | |
| 3 #include "theano_mod_helper.h" | |
| 4 #include "cuda_ndarray.cuh" | |
| 5 ////////////////////// | |
| 6 //// Support Code | |
| 7 ////////////////////// | |
| 8 | |
| 9 | |
| 10 namespace { | |
| 11 struct __struct_compiled_op_f4ef5cd450ad67f267508aba71bc9604 { | |
| 12 PyObject* __ERROR; | |
| 13 | |
| 14 PyObject* storage_V3; | |
| 15 PyObject* storage_V1; | |
| 16 | |
| 17 | |
| 18 __struct_compiled_op_f4ef5cd450ad67f267508aba71bc9604() { | |
| 19 // This is only somewhat safe because we: | |
| 20 // 1) Are not a virtual class | |
| 21 // 2) Do not use any virtual classes in the members | |
| 22 // 3) Deal with mostly POD and pointers | |
| 23 | |
| 24 // If this changes, we would have to revise this, but for | |
| 25 // now I am tired of chasing segfaults because | |
| 26 // initialization code had an error and some pointer has | |
| 27 // a junk value. | |
| 28 memset(this, 0, sizeof(*this)); | |
| 29 } | |
| 30 ~__struct_compiled_op_f4ef5cd450ad67f267508aba71bc9604(void) { | |
| 31 cleanup(); | |
| 32 } | |
| 33 | |
| 34 int init(PyObject* __ERROR, PyObject* storage_V3, PyObject* storage_V1) { | |
| 35 Py_XINCREF(storage_V3); | |
| 36 Py_XINCREF(storage_V1); | |
| 37 this->storage_V3 = storage_V3; | |
| 38 this->storage_V1 = storage_V1; | |
| 39 | |
| 40 | |
| 41 | |
| 42 | |
| 43 this->__ERROR = __ERROR; | |
| 44 return 0; | |
| 45 } | |
| 46 void cleanup(void) { | |
| 47 __label_1: | |
| 48 | |
| 49 double __DUMMY_1; | |
| 50 __label_3: | |
| 51 | |
| 52 double __DUMMY_3; | |
| 53 __label_6: | |
| 54 | |
| 55 double __DUMMY_6; | |
| 56 | |
| 57 Py_XDECREF(this->storage_V3); | |
| 58 Py_XDECREF(this->storage_V1); | |
| 59 } | |
| 60 int run(void) { | |
| 61 int __failure = 0; | |
| 62 | |
| 63 PyObject* py_V1; | |
| 64 CudaNdarray * V1; | |
| 65 PyObject* py_V3; | |
| 66 CudaNdarray * V3; | |
| 67 { | |
| 68 | |
| 69 py_V1 = PyList_GET_ITEM(storage_V1, 0); | |
| 70 {Py_XINCREF(py_V1);} | |
| 71 | |
| 72 if (py_V1 == Py_None) | |
| 73 { | |
| 74 V1 = NULL; | |
| 75 } | |
| 76 else | |
| 77 { | |
| 78 | |
| 79 assert(py_V1->ob_refcnt >= 2); // There should be at least one ref from the container object, | |
| 80 // and one ref from the local scope. | |
| 81 | |
| 82 if (CudaNdarray_Check(py_V1)) | |
| 83 { | |
| 84 //fprintf(stderr, "c_extract CNDA object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt)); | |
| 85 V1 = (CudaNdarray*)py_V1; | |
| 86 //std::cerr << "c_extract " << V1 << '\n'; | |
| 87 | |
| 88 | |
| 89 if (V1->nd != 2) | |
| 90 { | |
| 91 PyErr_Format(PyExc_RuntimeError, | |
| 92 "c_extract: Some CudaNdarray has rank %i, it was supposed to have rank 2", | |
| 93 V1->nd); | |
| 94 V1 = NULL; | |
| 95 { | |
| 96 __failure = 2; | |
| 97 if (!PyErr_Occurred()) { | |
| 98 PyErr_SetString(PyExc_RuntimeError, | |
| 99 "Unexpected error in an Op's C code. " | |
| 100 "No Python exception was set."); | |
| 101 } | |
| 102 goto __label_2;}; | |
| 103 } | |
| 104 //std::cerr << "c_extract " << V1 << " nd check passed\n"; | |
| 105 | |
| 106 | |
| 107 if (CudaNdarray_HOST_DIMS(V1)[1] != 1) | |
| 108 { | |
| 109 PyErr_Format(PyExc_RuntimeError, | |
| 110 "c_extract: Some CudaNdarray has dim %i on broadcastable dimension %i", | |
| 111 CudaNdarray_HOST_DIMS(V1)[1], 1); | |
| 112 V1 = NULL; | |
| 113 { | |
| 114 __failure = 2; | |
| 115 if (!PyErr_Occurred()) { | |
| 116 PyErr_SetString(PyExc_RuntimeError, | |
| 117 "Unexpected error in an Op's C code. " | |
| 118 "No Python exception was set."); | |
| 119 } | |
| 120 goto __label_2;}; | |
| 121 } | |
| 122 //std::cerr << "c_extract " << V1 << "dim check 1 passed\n"; | |
| 123 //std::cerr << "c_extract " << V1 << "checking bcast 1 <" << V1->str<< ">\n"; | |
| 124 //std::cerr << "c_extract " << V1->str[1] << "\n"; | |
| 125 if (CudaNdarray_HOST_STRIDES(V1)[1]) | |
| 126 { | |
| 127 //std::cerr << "c_extract bad stride detected...\n"; | |
| 128 PyErr_Format(PyExc_RuntimeError, | |
| 129 "c_extract: Some CudaNdarray has a nonzero stride %i on a broadcastable dimension %i", | |
| 130 CudaNdarray_HOST_STRIDES(V1)[1], 1); | |
| 131 V1 = NULL; | |
| 132 { | |
| 133 __failure = 2; | |
| 134 if (!PyErr_Occurred()) { | |
| 135 PyErr_SetString(PyExc_RuntimeError, | |
| 136 "Unexpected error in an Op's C code. " | |
| 137 "No Python exception was set."); | |
| 138 } | |
| 139 goto __label_2;}; | |
| 140 } | |
| 141 //std::cerr << "c_extract " << V1 << "bcast check 1 passed\n"; | |
| 142 | |
| 143 | |
| 144 assert(V1); | |
| 145 Py_INCREF(py_V1); | |
| 146 } | |
| 147 else if (py_V1 == Py_None) | |
| 148 { | |
| 149 PyErr_SetString(PyExc_TypeError, | |
| 150 "expected a CudaNdarray, not None"); | |
| 151 V1 = NULL; | |
| 152 { | |
| 153 __failure = 2; | |
| 154 if (!PyErr_Occurred()) { | |
| 155 PyErr_SetString(PyExc_RuntimeError, | |
| 156 "Unexpected error in an Op's C code. " | |
| 157 "No Python exception was set."); | |
| 158 } | |
| 159 goto __label_2;}; | |
| 160 } | |
| 161 else | |
| 162 { | |
| 163 //fprintf(stderr, "FAILING c_extract CNDA object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt)); | |
| 164 PyErr_SetString(PyExc_TypeError, "Argument not a CudaNdarray"); | |
| 165 V1 = NULL; | |
| 166 { | |
| 167 __failure = 2; | |
| 168 if (!PyErr_Occurred()) { | |
| 169 PyErr_SetString(PyExc_RuntimeError, | |
| 170 "Unexpected error in an Op's C code. " | |
| 171 "No Python exception was set."); | |
| 172 } | |
| 173 goto __label_2;}; | |
| 174 } | |
| 175 //std::cerr << "c_extract done " << V1 << '\n'; | |
| 176 | |
| 177 | |
| 178 } | |
| 179 | |
| 180 { | |
| 181 | |
| 182 py_V3 = PyList_GET_ITEM(storage_V3, 0); | |
| 183 {Py_XINCREF(py_V3);} | |
| 184 | |
| 185 assert(py_V3->ob_refcnt >= 2); // There should be at least one ref from the container object, | |
| 186 // and one ref from the local scope. | |
| 187 | |
| 188 if (CudaNdarray_Check(py_V3)) | |
| 189 { | |
| 190 //fprintf(stderr, "c_extract CNDA object w refcnt %p %i\n", py_V3, (py_V3->ob_refcnt)); | |
| 191 V3 = (CudaNdarray*)py_V3; | |
| 192 //std::cerr << "c_extract " << V3 << '\n'; | |
| 193 | |
| 194 | |
| 195 if (V3->nd != 2) | |
| 196 { | |
| 197 PyErr_Format(PyExc_RuntimeError, | |
| 198 "c_extract: Some CudaNdarray has rank %i, it was supposed to have rank 2", | |
| 199 V3->nd); | |
| 200 V3 = NULL; | |
| 201 { | |
| 202 __failure = 4; | |
| 203 if (!PyErr_Occurred()) { | |
| 204 PyErr_SetString(PyExc_RuntimeError, | |
| 205 "Unexpected error in an Op's C code. " | |
| 206 "No Python exception was set."); | |
| 207 } | |
| 208 goto __label_4;}; | |
| 209 } | |
| 210 //std::cerr << "c_extract " << V3 << " nd check passed\n"; | |
| 211 | |
| 212 | |
| 213 if (CudaNdarray_HOST_DIMS(V3)[1] != 1) | |
| 214 { | |
| 215 PyErr_Format(PyExc_RuntimeError, | |
| 216 "c_extract: Some CudaNdarray has dim %i on broadcastable dimension %i", | |
| 217 CudaNdarray_HOST_DIMS(V3)[1], 1); | |
| 218 V3 = NULL; | |
| 219 { | |
| 220 __failure = 4; | |
| 221 if (!PyErr_Occurred()) { | |
| 222 PyErr_SetString(PyExc_RuntimeError, | |
| 223 "Unexpected error in an Op's C code. " | |
| 224 "No Python exception was set."); | |
| 225 } | |
| 226 goto __label_4;}; | |
| 227 } | |
| 228 //std::cerr << "c_extract " << V3 << "dim check 1 passed\n"; | |
| 229 //std::cerr << "c_extract " << V3 << "checking bcast 1 <" << V3->str<< ">\n"; | |
| 230 //std::cerr << "c_extract " << V3->str[1] << "\n"; | |
| 231 if (CudaNdarray_HOST_STRIDES(V3)[1]) | |
| 232 { | |
| 233 //std::cerr << "c_extract bad stride detected...\n"; | |
| 234 PyErr_Format(PyExc_RuntimeError, | |
| 235 "c_extract: Some CudaNdarray has a nonzero stride %i on a broadcastable dimension %i", | |
| 236 CudaNdarray_HOST_STRIDES(V3)[1], 1); | |
| 237 V3 = NULL; | |
| 238 { | |
| 239 __failure = 4; | |
| 240 if (!PyErr_Occurred()) { | |
| 241 PyErr_SetString(PyExc_RuntimeError, | |
| 242 "Unexpected error in an Op's C code. " | |
| 243 "No Python exception was set."); | |
| 244 } | |
| 245 goto __label_4;}; | |
| 246 } | |
| 247 //std::cerr << "c_extract " << V3 << "bcast check 1 passed\n"; | |
| 248 | |
| 249 | |
| 250 assert(V3); | |
| 251 Py_INCREF(py_V3); | |
| 252 } | |
| 253 else if (py_V3 == Py_None) | |
| 254 { | |
| 255 PyErr_SetString(PyExc_TypeError, | |
| 256 "expected a CudaNdarray, not None"); | |
| 257 V3 = NULL; | |
| 258 { | |
| 259 __failure = 4; | |
| 260 if (!PyErr_Occurred()) { | |
| 261 PyErr_SetString(PyExc_RuntimeError, | |
| 262 "Unexpected error in an Op's C code. " | |
| 263 "No Python exception was set."); | |
| 264 } | |
| 265 goto __label_4;}; | |
| 266 } | |
| 267 else | |
| 268 { | |
| 269 //fprintf(stderr, "FAILING c_extract CNDA object w refcnt %p %i\n", py_V3, (py_V3->ob_refcnt)); | |
| 270 PyErr_SetString(PyExc_TypeError, "Argument not a CudaNdarray"); | |
| 271 V3 = NULL; | |
| 272 { | |
| 273 __failure = 4; | |
| 274 if (!PyErr_Occurred()) { | |
| 275 PyErr_SetString(PyExc_RuntimeError, | |
| 276 "Unexpected error in an Op's C code. " | |
| 277 "No Python exception was set."); | |
| 278 } | |
| 279 goto __label_4;}; | |
| 280 } | |
| 281 //std::cerr << "c_extract done " << V3 << '\n'; | |
| 282 | |
| 283 | |
| 284 { | |
| 285 // Op class GpuElemwise | |
| 286 | |
| 287 //std::cerr << "C_CODE RoundHalfToEven START\n"; | |
| 288 //standard elemwise size checks | |
| 289 | |
| 290 | |
| 291 int dims[2] = {1,1}; | |
| 292 | |
| 293 | |
| 294 int broadcasts_V3[2] = {0, 1}; | |
| 295 | |
| 296 | |
| 297 //std::cerr << "C_CODE RoundHalfToEven checking input V3\n"; | |
| 298 if (2 != V3->nd) | |
| 299 { | |
| 300 PyErr_Format(PyExc_TypeError, | |
| 301 "need 2 dims, not %i", V3->nd); | |
| 302 { | |
| 303 __failure = 5; | |
| 304 if (!PyErr_Occurred()) { | |
| 305 PyErr_SetString(PyExc_RuntimeError, | |
| 306 "Unexpected error in an Op's C code. " | |
| 307 "No Python exception was set."); | |
| 308 } | |
| 309 goto __label_5;}; | |
| 310 } | |
| 311 for (int i = 0; i< 2; ++i) | |
| 312 { | |
| 313 dims[i] = (dims[i] == 1) ? CudaNdarray_HOST_DIMS(V3)[i] : dims[i]; | |
| 314 if ((!(broadcasts_V3[i] && | |
| 315 CudaNdarray_HOST_DIMS(V3)[i] == 1)) && | |
| 316 (dims[i] != CudaNdarray_HOST_DIMS(V3)[i])) | |
| 317 { | |
| 318 //std::cerr << "C_CODE RoundHalfToEven checking input V3 failed\n"; | |
| 319 PyErr_Format(PyExc_ValueError, | |
| 320 "GpuElemwise. Input dimension mis-match. Input" | |
| 321 " 0 (indices start at 0) has shape[%i] == %i" | |
| 322 ", but the output's size on that axis is %i.", | |
| 323 i, | |
| 324 CudaNdarray_HOST_DIMS(V3)[i], | |
| 325 dims[i] | |
| 326 ); | |
| 327 { | |
| 328 __failure = 5; | |
| 329 if (!PyErr_Occurred()) { | |
| 330 PyErr_SetString(PyExc_RuntimeError, | |
| 331 "Unexpected error in an Op's C code. " | |
| 332 "No Python exception was set."); | |
| 333 } | |
| 334 goto __label_5;}; | |
| 335 } | |
| 336 } | |
| 337 | |
| 338 | |
| 339 Py_XDECREF(V1); | |
| 340 V1 = V3; | |
| 341 Py_INCREF(V1); | |
| 342 for (int i = 0; (i< 2) && (V1); ++i) { | |
| 343 if (dims[i] != CudaNdarray_HOST_DIMS(V1)[i]) | |
| 344 { | |
| 345 PyErr_Format(PyExc_ValueError, | |
| 346 "GpuElemwise. Output dimension mis-match. Output" | |
| 347 " 0 (indices start at 0), working inplace" | |
| 348 " on input 0, has shape[%i] == %i" | |
| 349 ", but the output's size on that axis is %i.", | |
| 350 i, | |
| 351 CudaNdarray_HOST_DIMS(V1)[i], | |
| 352 dims[i] | |
| 353 ); | |
| 354 Py_DECREF(V1); | |
| 355 V1 = NULL; | |
| 356 { | |
| 357 __failure = 5; | |
| 358 if (!PyErr_Occurred()) { | |
| 359 PyErr_SetString(PyExc_RuntimeError, | |
| 360 "Unexpected error in an Op's C code. " | |
| 361 "No Python exception was set."); | |
| 362 } | |
| 363 goto __label_5;}; | |
| 364 } | |
| 365 } | |
| 366 //std::cerr << "ELEMWISE NEW V1 nd" << V1->nd << "\n"; | |
| 367 //std::cerr << "ELEMWISE NEW V1 data" << V1->devdata << "\n"; | |
| 368 | |
| 369 | |
| 370 { | |
| 371 //new block so that failure gotos don't skip over variable initialization | |
| 372 //std::cerr << "calling callkernel\n"; | |
| 373 if (callkernel_node_f4ef5cd450ad67f267508aba71bc9604_0(1, 0, dims | |
| 374 | |
| 375 | |
| 376 , CudaNdarray_DEV_DATA(V3), CudaNdarray_HOST_STRIDES(V3) | |
| 377 | |
| 378 | |
| 379 , CudaNdarray_DEV_DATA(V1), CudaNdarray_HOST_STRIDES(V1) | |
| 380 | |
| 381 | |
| 382 )) | |
| 383 { | |
| 384 // error | |
| 385 | |
| 386 | |
| 387 Py_DECREF(V1); | |
| 388 V1 = NULL; | |
| 389 | |
| 390 | |
| 391 { | |
| 392 __failure = 5; | |
| 393 if (!PyErr_Occurred()) { | |
| 394 PyErr_SetString(PyExc_RuntimeError, | |
| 395 "Unexpected error in an Op's C code. " | |
| 396 "No Python exception was set."); | |
| 397 } | |
| 398 goto __label_5;}; | |
| 399 } | |
| 400 else // no error | |
| 401 { | |
| 402 } | |
| 403 } | |
| 404 //std::cerr << "C_CODE RoundHalfToEven END\n"; | |
| 405 | |
| 406 __label_5: | |
| 407 | |
| 408 double __DUMMY_5; | |
| 409 | |
| 410 } | |
| 411 __label_4: | |
| 412 | |
| 413 //std::cerr << "cleanup " << py_V3 << " " << V3 << "\n"; | |
| 414 //fprintf(stderr, "c_cleanup CNDA py_object w refcnt %p %i\n", py_V3, (py_V3->ob_refcnt)); | |
| 415 if (V3) | |
| 416 { | |
| 417 //fprintf(stderr, "c_cleanup CNDA cn_object w refcnt %p %i\n", V3, (V3->ob_refcnt)); | |
| 418 Py_XDECREF(V3); | |
| 419 } | |
| 420 //std::cerr << "cleanup done" << py_V3 << "\n"; | |
| 421 | |
| 422 {Py_XDECREF(py_V3);} | |
| 423 | |
| 424 double __DUMMY_4; | |
| 425 | |
| 426 } | |
| 427 __label_2: | |
| 428 | |
| 429 if (!__failure) { | |
| 430 | |
| 431 //std::cerr << "sync\n"; | |
| 432 if (NULL == V1) { | |
| 433 // failure: sync None to storage | |
| 434 Py_XDECREF(py_V1); | |
| 435 py_V1 = Py_None; | |
| 436 Py_INCREF(py_V1); | |
| 437 } | |
| 438 else | |
| 439 { | |
| 440 if (py_V1 != (PyObject*)V1) | |
| 441 { | |
| 442 Py_XDECREF(py_V1); | |
| 443 py_V1 = (PyObject*)V1; | |
| 444 Py_INCREF(py_V1); | |
| 445 } | |
| 446 assert(py_V1->ob_refcnt); | |
| 447 } | |
| 448 | |
| 449 PyObject* old = PyList_GET_ITEM(storage_V1, 0); | |
| 450 {Py_XINCREF(py_V1);} | |
| 451 PyList_SET_ITEM(storage_V1, 0, py_V1); | |
| 452 {Py_XDECREF(old);} | |
| 453 } | |
| 454 | |
| 455 //std::cerr << "cleanup " << py_V1 << " " << V1 << "\n"; | |
| 456 //fprintf(stderr, "c_cleanup CNDA py_object w refcnt %p %i\n", py_V1, (py_V1->ob_refcnt)); | |
| 457 if (V1) | |
| 458 { | |
| 459 //fprintf(stderr, "c_cleanup CNDA cn_object w refcnt %p %i\n", V1, (V1->ob_refcnt)); | |
| 460 Py_XDECREF(V1); | |
| 461 } | |
| 462 //std::cerr << "cleanup done" << py_V1 << "\n"; | |
| 463 | |
| 464 {Py_XDECREF(py_V1);} | |
| 465 | |
| 466 double __DUMMY_2; | |
| 467 | |
| 468 } | |
| 469 | |
| 470 | |
| 471 if (__failure) { | |
| 472 // When there is a failure, this code puts the exception | |
| 473 // in __ERROR. | |
| 474 PyObject* err_type = NULL; | |
| 475 PyObject* err_msg = NULL; | |
| 476 PyObject* err_traceback = NULL; | |
| 477 PyErr_Fetch(&err_type, &err_msg, &err_traceback); | |
| 478 if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);} | |
| 479 if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);} | |
| 480 if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);} | |
| 481 PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0); | |
| 482 PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1); | |
| 483 PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2); | |
| 484 PyList_SET_ITEM(__ERROR, 0, err_type); | |
| 485 PyList_SET_ITEM(__ERROR, 1, err_msg); | |
| 486 PyList_SET_ITEM(__ERROR, 2, err_traceback); | |
| 487 {Py_XDECREF(old_err_type);} | |
| 488 {Py_XDECREF(old_err_msg);} | |
| 489 {Py_XDECREF(old_err_traceback);} | |
| 490 } | |
| 491 // The failure code is returned to index what code block failed. | |
| 492 return __failure; | |
| 493 | |
| 494 } | |
| 495 }; | |
| 496 } | |
| 497 | |
| 498 | |
| 499 static int __struct_compiled_op_f4ef5cd450ad67f267508aba71bc9604_executor(__struct_compiled_op_f4ef5cd450ad67f267508aba71bc9604* self) { | |
| 500 return self->run(); | |
| 501 } | |
| 502 | |
| 503 static void __struct_compiled_op_f4ef5cd450ad67f267508aba71bc9604_destructor(void* executor, void* self) { | |
| 504 delete ((__struct_compiled_op_f4ef5cd450ad67f267508aba71bc9604*)self); | |
| 505 } | |
| 506 | |
| 507 ////////////////////// | |
| 508 //// Functions | |
| 509 ////////////////////// | |
| 510 static PyObject * instantiate(PyObject * self, PyObject *argtuple) { | |
| 511 assert(PyTuple_Check(argtuple)); | |
| 512 if (3 != PyTuple_Size(argtuple)){ | |
| 513 PyErr_Format(PyExc_TypeError, "Wrong number of arguments, expected 3, got %i", (int)PyTuple_Size(argtuple)); | |
| 514 return NULL; | |
| 515 } | |
| 516 __struct_compiled_op_f4ef5cd450ad67f267508aba71bc9604* struct_ptr = new __struct_compiled_op_f4ef5cd450ad67f267508aba71bc9604(); | |
| 517 if (struct_ptr->init( PyTuple_GET_ITEM(argtuple, 0),PyTuple_GET_ITEM(argtuple, 1),PyTuple_GET_ITEM(argtuple, 2) ) != 0) { | |
| 518 delete struct_ptr; | |
| 519 return NULL; | |
| 520 } | |
| 521 PyObject* thunk = PyCObject_FromVoidPtrAndDesc((void*)(&__struct_compiled_op_f4ef5cd450ad67f267508aba71bc9604_executor), struct_ptr, __struct_compiled_op_f4ef5cd450ad67f267508aba71bc9604_destructor); | |
| 522 return thunk; } | |
| 523 | |
| 524 ////////////////////// | |
| 525 //// Module init | |
| 526 ////////////////////// | |
| 527 static PyMethodDef MyMethods[] = { | |
| 528 {"instantiate", instantiate, METH_VARARGS, "undocumented"} , | |
| 529 {NULL, NULL, 0, NULL} | |
| 530 }; | |
| 531 PyMODINIT_FUNC initf4ef5cd450ad67f267508aba71bc9604(void){ | |
| 532 (void) Py_InitModule("f4ef5cd450ad67f267508aba71bc9604", MyMethods); | |
| 533 } | |
| 534 | |
| =============================== | |
| mod.cu(373): error: identifier "callkernel_node_f4ef5cd450ad67f267508aba71bc9604_0" is undefined | |
| 1 error detected in the compilation of "/tmp/tmpxft_00010e67_00000000-9_mod.cpp1.ii". | |
| ['nvcc', '-shared', '-O3', '--maxrregcount=32', '-arch=sm_37', '-m64', '-Xcompiler', '-fno-math-errno,-Wno-unused-label,-Wno-unused-variable,-Wno-write-strings,-DCUDA_NDARRAY_CUH=c72d035fdf91890f3b36710688069b2e,-DNPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION,-fPIC,-fvisibility=hidden', '-Xlinker', '-rpath,/home/oak/.theano/compiledir_Linux-4.4--generic-x86_64-with-Ubuntu-16.04-xenial-x86_64-2.7.12-64/cuda_ndarray', '-I/home/oak/.theano/compiledir_Linux-4.4--generic-x86_64-with-Ubuntu-16.04-xenial-x86_64-2.7.12-64/cuda_ndarray', '-I/usr/local/cuda-8.0/include', '-I/home/oak/venv2/local/lib/python2.7/site-packages/numpy/core/include', '-I/usr/include/python2.7', '-I/home/oak/venv2/local/lib/python2.7/site-packages/theano/gof', '-I/home/oak/venv2/local/lib/python2.7/site-packages/theano/sandbox/cuda', '-o', '/home/oak/.theano/compiledir_Linux-4.4--generic-x86_64-with-Ubuntu-16.04-xenial-x86_64-2.7.12-64/tmpi0RVsA/f4ef5cd450ad67f267508aba71bc9604.so', 'mod.cu', '-L/home/oak/.theano/compiledir_Linux-4.4--generic-x86_64-with-Ubuntu-16.04-xenial-x86_64-2.7.12-64/cuda_ndarray', '-L/usr/lib', '-lcudart', '-lcublas', '-lcuda_ndarray', '-lpython2.7'] | |
| Traceback (most recent call last): | |
| File "train.py", line 25, in <module> | |
| model.train(callbacks.get_callbacks(weights_path), gen, gen) | |
| File "/home/oak/git//vsearch/trainer/model/resnet50_siamese_graph.py", line 131, in train | |
| validation_data=valid_gen, callbacks=actions, verbose=verbose) | |
| File "/home/oak/venv2/local/lib/python2.7/site-packages/keras/engine/training.py", line 1455, in fit_generator | |
| self._make_train_function() | |
| File "/home/oak/venv2/local/lib/python2.7/site-packages/keras/engine/training.py", line 768, in _make_train_function | |
| **self._function_kwargs) | |
| File "/home/oak/venv2/local/lib/python2.7/site-packages/keras/backend/theano_backend.py", line 969, in function | |
| return Function(inputs, outputs, updates=updates, **kwargs) | |
| File "/home/oak/venv2/local/lib/python2.7/site-packages/keras/backend/theano_backend.py", line 955, in __init__ | |
| **kwargs) | |
| File "/home/oak/venv2/local/lib/python2.7/site-packages/theano/compile/function.py", line 320, in function | |
| output_keys=output_keys) | |
| File "/home/oak/venv2/local/lib/python2.7/site-packages/theano/compile/pfunc.py", line 479, in pfunc | |
| output_keys=output_keys) | |
| File "/home/oak/venv2/local/lib/python2.7/site-packages/theano/compile/function_module.py", line 1777, in orig_function | |
| defaults) | |
| File "/home/oak/venv2/local/lib/python2.7/site-packages/theano/compile/function_module.py", line 1641, in create | |
| input_storage=input_storage_lists, storage_map=storage_map) | |
| File "/home/oak/venv2/local/lib/python2.7/site-packages/theano/gof/link.py", line 690, in make_thunk | |
| storage_map=storage_map)[:3] | |
| File "/home/oak/venv2/local/lib/python2.7/site-packages/theano/gof/vm.py", line 1003, in make_all | |
| no_recycling)) | |
| File "/home/oak/venv2/local/lib/python2.7/site-packages/theano/sandbox/cuda/__init__.py", line 256, in make_thunk | |
| compute_map, no_recycling) | |
| File "/home/oak/venv2/local/lib/python2.7/site-packages/theano/gof/op.py", line 970, in make_thunk | |
| no_recycling) | |
| File "/home/oak/venv2/local/lib/python2.7/site-packages/theano/gof/op.py", line 879, in make_c_thunk | |
| output_storage=node_output_storage) | |
| File "/home/oak/venv2/local/lib/python2.7/site-packages/theano/gof/cc.py", line 1200, in make_thunk | |
| keep_lock=keep_lock) | |
| File "/home/oak/venv2/local/lib/python2.7/site-packages/theano/gof/cc.py", line 1143, in __compile__ | |
| keep_lock=keep_lock) | |
| File "/home/oak/venv2/local/lib/python2.7/site-packages/theano/gof/cc.py", line 1595, in cthunk_factory | |
| key=key, lnk=self, keep_lock=keep_lock) | |
| File "/home/oak/venv2/local/lib/python2.7/site-packages/theano/gof/cmodule.py", line 1142, in module_from_key | |
| module = lnk.compile_cmodule(location) | |
| File "/home/oak/venv2/local/lib/python2.7/site-packages/theano/gof/cc.py", line 1506, in compile_cmodule | |
| preargs=preargs) | |
| File "/home/oak/venv2/local/lib/python2.7/site-packages/theano/sandbox/cuda/nvcc_compiler.py", line 399, in compile_str | |
| 'for cmd', ' '.join(cmd)) | |
| Exception: ('The following error happened while compiling the node', GpuElemwise{RoundHalfToEven}[(0, 0)](GpuElemwise{Composite{sqrt(clip(i0, i1, i2))}}[(0, 0)].0), '\n', 'nvcc return status', 2, 'for cmd', 'nvcc -shared -O3 --maxrregcount=32 -arch=sm_37 -m64 -Xcompiler -fno-math-errno,-Wno-unused-label,-Wno-unused-variable,-Wno-write-strings,-DCUDA_NDARRAY_CUH=c72d035fdf91890f3b36710688069b2e,-DNPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION,-fPIC,-fvisibility=hidden -Xlinker -rpath,/home/oak/.theano/compiledir_Linux-4.4--generic-x86_64-with-Ubuntu-16.04-xenial-x86_64-2.7.12-64/cuda_ndarray -I/home/oak/.theano/compiledir_Linux-4.4--generic-x86_64-with-Ubuntu-16.04-xenial-x86_64-2.7.12-64/cuda_ndarray -I/usr/local/cuda-8.0/include -I/home/oak/venv2/local/lib/python2.7/site-packages/numpy/core/include -I/usr/include/python2.7 -I/home/oak/venv2/local/lib/python2.7/site-packages/theano/gof -I/home/oak/venv2/local/lib/python2.7/site-packages/theano/sandbox/cuda -o /home/oak/.theano/compiledir_Linux-4.4--generic-x86_64-with-Ubuntu-16.04-xenial-x86_64-2.7.12-64/tmpi0RVsA/f4ef5cd450ad67f267508aba71bc9604.so mod.cu -L/home/oak/.theano/compiledir_Linux-4.4--generic-x86_64-with-Ubuntu-16.04-xenial-x86_64-2.7.12-64/cuda_ndarray -L/usr/lib -lcudart -lcublas -lcuda_ndarray -lpython2.7', '[GpuElemwise{RoundHalfToEven}[(0, 0)](<CudaNdarrayType(float32, col)>)]') | |
| (venv2)$ python train.py | |
| Using Theano backend. | |
| Using gpu device 0: Tesla K80 (CNMeM is enabled with initial size: 95.0% of memory, cuDNN 5105) | |
| /home/oak/venv2/local/lib/python2.7/site-packages/theano/sandbox/cuda/__init__.py:600: UserWarning: Your cuDNN version is more recent than the one Theano officially supports. If you see any problems, try updating Theano or downgrading cuDNN to version 5. | |
| warnings.warn(warn) | |
| Traceback (most recent call last): | |
| File "train.py", line 7, in <module> | |
| model = t_model.TModel() | |
| File "/home/oak/git/vsearch/trainer/model/resnet50_siamese_graph.py", line 93, in __init__ | |
| out_a_base = example_network(input_a) | |
| File "/home/oak/git/vsearch/trainer/model/resnet50_siamese_graph.py", line 46, in example_network | |
| digit_input = Input(shape=(input_dim)) | |
| File "/home/oak/venv2/local/lib/python2.7/site-packages/keras/engine/topology.py", line 1194, in Input | |
| batch_shape = (None,) + tuple(shape) | |
| File "/home/oak/venv2/local/lib/python2.7/site-packages/theano/tensor/var.py", line 553, in __iter__ | |
| raise TypeError(('TensorType does not support iteration. ' | |
| TypeError: TensorType does not support iteration. Maybe you are using builtin.sum instead of theano.tensor.sum? (Maybe .max?) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment