-
-
Save liuyu81/3473376 to your computer and use it in GitHub Desktop.
#include "Python.h" // should be before any standard headers | |
#include <errno.h> | |
#include <pthread.h> | |
static void* | |
non_python_thread(void *python_callback) { | |
//XXX without PyEval_InitThreads() it produces: | |
// Fatal Python error: PyEval_SaveThread: NULL tstate | |
//XXX with PyEval_InitThreads() it deadlocks!! | |
//XXX without PyGILState_*() (with/without PyEval_InitThreads()) it produces: | |
// Exception KeyError: KeyError(139794956588800,) | |
// in <module 'threading' from '/usr/lib/python2.7/threading.pyc'> ignored | |
PyGILState_STATE state = PyGILState_Ensure(); | |
// print something | |
PyObject* stdout = PySys_GetObject("stdout"); | |
if (stdout == NULL) { | |
PyErr_SetString(PyExc_RuntimeError, "no stdout"); | |
goto done; | |
} | |
Py_INCREF(stdout); | |
if (PyFile_WriteString("non-python thread\n", stdout) != 0) { | |
// a write error | |
Py_DECREF(stdout); | |
goto done; | |
} | |
else { | |
#if PY_MAJOR_VERSION < 3 | |
PyFile_SoftSpace(stdout, 0); // softspace = False after a newline | |
#endif | |
Py_DECREF(stdout); | |
} | |
// call python callback | |
PyObject* ignored = PyObject_CallFunctionObjArgs(python_callback, NULL); | |
Py_XDECREF(ignored); // `ignored` may be NULL | |
// fall through | |
done: | |
PyGILState_Release(state); | |
return NULL; //NOTE: it doesn't propagate exceptions | |
} | |
static PyObject * | |
spawn_non_python_thread(PyObject *self, PyObject *args) { | |
/* it should be safe to call PyEval_InitThreads(): current function holds | |
GIL or it is started from the main thread */ | |
//XXX see the above comments near PyGILState_Ensure | |
// I would suggest calling this in the module initializer, rather than on a | |
// per-function basis. It is a matter of efficiency, not safety, though. | |
// -- by LIU Yu <[email protected]> | |
//// PyEval_InitThreads(); | |
PyObject* ret = NULL; | |
PyObject* python_callback = NULL; | |
if (!PyArg_ParseTuple(args, "O:spawn_non_python_thread", &python_callback)) | |
return NULL; // propagate exception | |
Py_INCREF(python_callback); // hold on to it until the thread is finished | |
// Give up GIL when doing C thread creation, this prevents the deadlock | |
// caused by double-locking the GIL. | |
// -- by LIU Yu <[email protected]> | |
Py_BEGIN_ALLOW_THREADS | |
pthread_t tid; | |
int s = 0; | |
// start a single non-python thread | |
if ((s = pthread_create(&tid, NULL, non_python_thread, python_callback))!= 0) { | |
errno = s; // errno!! | |
// Get back GIL so that we can safely call Python API's | |
// -- by LIU Yu <[email protected]> | |
Py_BLOCK_THREADS | |
PyErr_SetFromErrno(PyExc_OSError); goto done; | |
} | |
// join it | |
else if ((s = pthread_join(tid, NULL)) != 0) { | |
errno = s; // errno!! | |
Py_BLOCK_THREADS | |
PyErr_SetFromErrno(PyExc_OSError); goto done; | |
} | |
// check whether non_python_thread raised any Python exceptions | |
else { | |
Py_BLOCK_THREADS | |
if (PyErr_Occurred() == NULL) { // no error | |
ret = Py_None; Py_INCREF(ret); | |
} | |
} | |
// fall through | |
done: | |
Py_UNBLOCK_THREADS | |
Py_END_ALLOW_THREADS | |
Py_DECREF(python_callback); | |
return ret; | |
} | |
static PyMethodDef | |
module_functions[] = { | |
{ "spawn_non_python_thread", spawn_non_python_thread, | |
METH_VARARGS, "func docstring" }, | |
{ NULL } | |
}; | |
// http://python3porting.com/cextensions.html | |
#if PY_MAJOR_VERSION >= 3 | |
#define MOD_ERROR_VAL NULL | |
#define MOD_SUCCESS_VAL(val) val | |
#define MOD_INIT(name) PyMODINIT_FUNC PyInit_##name(void) | |
#define MOD_DEF(ob, name, doc, methods) \ | |
static struct PyModuleDef moduledef = { \ | |
PyModuleDef_HEAD_INIT, name, doc, -1, methods, }; \ | |
ob = PyModule_Create(&moduledef); | |
#else | |
#define MOD_ERROR_VAL | |
#define MOD_SUCCESS_VAL(val) | |
#define MOD_INIT(name) void init##name(void) | |
#define MOD_DEF(ob, name, doc, methods) \ | |
ob = Py_InitModule3(name, methods, doc); | |
#endif | |
MOD_INIT(c_extension_module) | |
{ | |
PyObject *m = NULL; | |
MOD_DEF(m, "c_extension_module", "module docstring", module_functions) | |
if (m == NULL) | |
return MOD_ERROR_VAL; | |
// The decref will not destroy the imported module because another reference | |
// reference is held by sys.module until the interpreter exits. But it seems | |
// PY3K is planning to add module unload support. I have no idea what will | |
// happen by that time. | |
// -- by LIU Yu <[email protected]> | |
PyObject * mod = PyImport_ImportModule("threading"); | |
Py_XDECREF(mod); | |
PyEval_InitThreads(); | |
return MOD_SUCCESS_VAL(m); | |
} |
import c_extension_module | |
def python_callback(): | |
print("python callback called") | |
def python_thread(): | |
print("python thread started") | |
print("python thread ended") | |
# `threading` might implicitly call PyEval_InitThreads() | |
import threading | |
threading.Thread(target=python_thread).start() | |
# there should be no GIL yet (we have only one main thread) | |
c_extension_module.spawn_non_python_thread(python_callback) | |
# if there is no KeyError in threading module; add time.sleep() to reproduce it: | |
####import time; time.sleep(1e-3) | |
#NOTE: if there is no new thread in python_callback or if there are | |
#multiple Python threads before calling the extension module then | |
#there is no error | |
print("exit main thread") |
I tracked the KeyError issue into the standard threading module. And I am pretty sure there exists an implementation fault.
Here is roughly the cause.
The threading module internally maintains an _active[] dictionary of all threads (both Python threads and OS threads are tracked), i.e. so as to correctly cleanup module states when os.fork() is called.
Problem is, 1) when the threading module is NOT imported by the MainThread, and 2) when the MainThread exits so soon such that it is never scheduled (i.e. put into sleep mode and wakeup again), then the threading module will NOT be aware of the MainThread until the interpreter exits.
However, upon the exit of the interpreter, each active thread will delete its own identity from this _active[] dictionary -- including the MainThread. And the KeyError actually complained about the missing of the MainThread's identity, that is, when the MainThread removes itself from the _active[] dictionary.
I came up with another workaround to get rid of the KeyError problem. We can import the threading module, i.e. PyImport_ImportModule("threading"), in the initializer of c_extension_module. This can enforce the MainThread to be correctly logged by the threading module.
We think the same. :) I've already tried
import threading
andPy_BEGIN_ALLOW_THREADS
before seeing your comment.yes
import threading
fixes the KeyError originated here.But imagine you don't see the definition of
python_callback()
. You don't know whether it uses threads or not (same forc_extension_module
). Why would you addimport threading
if none of your code uses threading? Eitherpython_callback()
,c_extension_module
are incorrect or it is a bug in Python.The docs say:
It seems it is a documentation bug in Python because you can't call
PyGILState_Ensure()
"regardless of the current state of Python, or of the global interpreter lock". Though in this case it is natural to release GIL usingPy_BEGIN_ALLOW_THREADS
.ceval.h seems to have an incorrect comment:
Looking at the definition of
PyEval_InitThreads()
it should be safe to call it from a thread that hold GIL or if there is no GIL yet (then current thread will be the main thread).btw, I can't reproduce KeyError using openmp threads though there is a rare
.AttributeError'>: 'NoneType' object has no attribute 'write'
exception on the interpreter shutdown on Python 2.x.