diff --git a/CHANGES.rst b/CHANGES.rst index b7feca5..8ea891d 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -5,7 +5,42 @@ 3.3.3 (unreleased) ================== -- Nothing changed yet. +- Fix multiple crash paths during interpreter shutdown on Python < 3.11 + (observed with uWSGI worker recycling). Three root causes were + identified and fixed: + + 1. ``clear_deleteme_list()`` used a ``PythonAllocator``-backed vector + copy (``PyMem_Malloc``), which could SIGSEGV during early + ``Py_FinalizeEx`` when Python's allocator pools are partially torn + down. Replaced with ``std::swap`` (zero-allocation, + constant-time) and switched the ``deleteme`` vector to + ``std::allocator`` (system ``malloc``). + + 2. ``ThreadState`` objects were allocated via ``PyObject_Malloc``, + placing them in ``pymalloc`` pools that can be disrupted during + finalization. Switched to ``std::malloc`` / ``std::free`` so + ``ThreadState`` memory remains valid throughout ``Py_FinalizeEx``. + + 3. ``_Py_IsFinalizing()`` is only set *after* ``call_py_exitfuncs`` + and ``_PyGC_CollectIfEnabled`` complete inside ``Py_FinalizeEx``, + so code in atexit handlers or ``__del__`` methods could still call + ``greenlet.getcurrent()`` when type objects had already been + invalidated, crashing in ``PyType_IsSubtype``. An atexit handler + is now registered at module init (LIFO = runs first) that sets a + shutdown flag checked by ``getcurrent()``, + ``PyGreenlet_GetCurrent()``, and ``clear_deleteme_list()``. + + Additionally, ``clear_deleteme_list()`` now preserves any pending + Python exception around its cleanup loop, fixing a latent bug where + an unrelated exception (e.g. one set by ``throw()``) could be + swallowed by ``PyErr_WriteUnraisable`` / ``PyErr_Clear`` inside the + loop. + + This is distinct from the dealloc crash fixed in 3.3.2 + (`PR #495 + `_). See `PR #499 + `_ by Nicolas + Bouvrette. 3.3.2 (2026-02-20) diff --git a/src/greenlet/CObjects.cpp b/src/greenlet/CObjects.cpp index c135995..4f7a966 100644 --- a/src/greenlet/CObjects.cpp +++ b/src/greenlet/CObjects.cpp @@ -29,6 +29,11 @@ extern "C" { static PyGreenlet* PyGreenlet_GetCurrent(void) { +#if !GREENLET_PY311 + if (g_greenlet_shutting_down || Py_IsFinalizing()) { + return nullptr; + } +#endif return GET_THREAD_STATE().state().get_current().relinquish_ownership(); } diff --git a/src/greenlet/PyGreenlet.cpp b/src/greenlet/PyGreenlet.cpp index a7a4474..269dfbf 100644 --- a/src/greenlet/PyGreenlet.cpp +++ b/src/greenlet/PyGreenlet.cpp @@ -56,14 +56,12 @@ green_new(PyTypeObject* type, PyObject* UNUSED(args), PyObject* UNUSED(kwds)) PyGreenlet* o = (PyGreenlet*)PyBaseObject_Type.tp_new(type, mod_globs->empty_tuple, mod_globs->empty_dict); if (o) { - // Recall: borrowing or getting the current greenlet - // causes the "deleteme list" to get cleared. So constructing a greenlet - // can do things like cause other greenlets to get finalized. - UserGreenlet* c = new UserGreenlet(o, GET_THREAD_STATE().state().borrow_current()); + // This looks like a memory leak, but isn't. Constructing the + // C++ object assigns it to the pimpl pointer of the Python + // object (o); we'll need that later. + UserGreenlet* c = new UserGreenlet(o, + GET_THREAD_STATE().state().borrow_current()); assert(Py_REFCNT(o) == 1); - // Also: This looks like a memory leak, but isn't. Constructing the - // C++ object assigns it to the pimpl pointer of the Python object (o); - // we'll need that later. assert(c == o->pimpl); } return o; @@ -204,7 +202,7 @@ _green_dealloc_kill_started_non_main_greenlet(BorrowedGreenlet self) // See: https://github.com/python-greenlet/greenlet/issues/411 // https://github.com/python-greenlet/greenlet/issues/351 #if !GREENLET_PY311 - if (_Py_IsFinalizing()) { + if (Py_IsFinalizing()) { self->murder_in_place(); return 1; } diff --git a/src/greenlet/PyModule.cpp b/src/greenlet/PyModule.cpp index a999dc9..51af7f7 100644 --- a/src/greenlet/PyModule.cpp +++ b/src/greenlet/PyModule.cpp @@ -17,6 +17,30 @@ using greenlet::ThreadState; # pragma clang diagnostic ignored "-Wunused-variable" #endif +// On Python < 3.11, _Py_IsFinalizing() is only set AFTER +// call_py_exitfuncs and _PyGC_CollectIfEnabled finish inside +// Py_FinalizeEx. Code running in atexit handlers or __del__ +// methods can still call greenlet.getcurrent(), but by that +// time type objects may have been invalidated, causing +// SIGSEGV in PyType_IsSubtype. This flag is set by an atexit +// handler registered at module init (LIFO = runs first). +#if !GREENLET_PY311 +int g_greenlet_shutting_down = 0; + +static PyObject* +_greenlet_atexit_callback(PyObject* UNUSED(self), PyObject* UNUSED(args)) +{ + g_greenlet_shutting_down = 1; + Py_RETURN_NONE; +} + +static PyMethodDef _greenlet_atexit_method = { + "_greenlet_cleanup", _greenlet_atexit_callback, + METH_NOARGS, NULL +}; +#endif + + PyDoc_STRVAR(mod_getcurrent_doc, "getcurrent() -> greenlet\n" "\n" @@ -26,6 +50,11 @@ PyDoc_STRVAR(mod_getcurrent_doc, static PyObject* mod_getcurrent(PyObject* UNUSED(module)) { +#if !GREENLET_PY311 + if (g_greenlet_shutting_down || Py_IsFinalizing()) { + Py_RETURN_NONE; + } +#endif return GET_THREAD_STATE().state().get_current().relinquish_ownership_o(); } diff --git a/src/greenlet/TThreadState.hpp b/src/greenlet/TThreadState.hpp index cf13816..6821127 100644 --- a/src/greenlet/TThreadState.hpp +++ b/src/greenlet/TThreadState.hpp @@ -1,6 +1,7 @@ #ifndef GREENLET_THREAD_STATE_HPP #define GREENLET_THREAD_STATE_HPP +#include #include #include #include @@ -23,6 +24,13 @@ using greenlet::refs::CreatedModule; using greenlet::refs::PyErrPieces; using greenlet::refs::NewReference; +// Defined in PyModule.cpp; set by an atexit handler to signal +// that the interpreter is shutting down. Only needed on +// Python < 3.11 where _Py_IsFinalizing() is set too late. +#if !GREENLET_PY311 +extern int g_greenlet_shutting_down; +#endif + namespace greenlet { /** * Thread-local state of greenlets. @@ -100,7 +108,13 @@ class ThreadState { /* Strong reference to the trace function, if any. */ OwnedObject tracefunc; - typedef std::vector > deleteme_t; + // Use std::allocator (malloc/free) instead of PythonAllocator + // (PyMem_Malloc) for the deleteme list. During Py_FinalizeEx on + // Python < 3.11, the PyObject_Malloc pool that holds ThreadState + // can be disrupted, corrupting any PythonAllocator-backed + // containers. Using std::allocator makes this vector independent + // of Python's allocator lifecycle. + typedef std::vector deleteme_t; /* A vector of raw PyGreenlet pointers representing things that need deleted when this thread is running. The vector owns the references, but you need to manually INCREF/DECREF as you use @@ -120,7 +134,6 @@ class ThreadState { static std::clock_t _clocks_used_doing_gc; #endif static ImmortalString get_referrers_name; - static PythonAllocator allocator; G_NO_COPIES_OF_CLS(ThreadState); @@ -146,15 +159,21 @@ class ThreadState { public: - static void* operator new(size_t UNUSED(count)) + // Allocate ThreadState with malloc/free rather than Python's object + // allocator. ThreadState outlives many Python objects and must + // remain valid throughout Py_FinalizeEx. On Python < 3.11, + // PyObject_Malloc pools can be disrupted during early finalization, + // corrupting any C++ objects stored in them. + static void* operator new(size_t count) { - return ThreadState::allocator.allocate(1); + void* p = std::malloc(count); + if (!p) throw std::bad_alloc(); + return p; } static void operator delete(void* ptr) { - return ThreadState::allocator.deallocate(static_cast(ptr), - 1); + std::free(ptr); } static void init() @@ -283,11 +302,36 @@ class ThreadState { inline void clear_deleteme_list(const bool murder=false) { if (!this->deleteme.empty()) { - // It's possible we could add items to this list while - // running Python code if there's a thread switch, so we - // need to defensively copy it before that can happen. - deleteme_t copy = this->deleteme; - this->deleteme.clear(); // in case things come back on the list + // Move the list contents out with swap — a constant-time + // pointer exchange that never allocates. The previous code + // used a copy (deleteme_t copy = this->deleteme) which + // allocated through PythonAllocator / PyMem_Malloc; that + // could SIGSEGV during early Py_FinalizeEx on Python < 3.11 + // when the allocator is partially torn down. + deleteme_t copy; + std::swap(copy, this->deleteme); + + // During Py_FinalizeEx cleanup, the GC or atexit handlers + // may have already collected objects in this list, leaving + // dangling pointers. Attempting Py_DECREF on freed memory + // causes a SIGSEGV. On Python < 3.11, + // g_greenlet_shutting_down covers the early stages + // (before Py_IsFinalizing() is set). +#if !GREENLET_PY311 + if (g_greenlet_shutting_down || Py_IsFinalizing()) { + return; + } +#else + if (Py_IsFinalizing()) { + return; + } +#endif + + // Preserve any pending exception so that cleanup-triggered + // errors don't accidentally swallow an unrelated exception + // (e.g. one set by throw() before a switch). + PyErrPieces incoming_err; + for(deleteme_t::iterator it = copy.begin(), end = copy.end(); it != end; ++it ) { @@ -310,6 +354,8 @@ class ThreadState { PyErr_Clear(); } } + + incoming_err.PyErrRestore(); } } @@ -393,7 +439,7 @@ class ThreadState { // Python 3.11+ restructured interpreter finalization so that // these APIs remain safe during shutdown. #if !GREENLET_PY311 - if (_Py_IsFinalizing()) { + if (Py_IsFinalizing()) { this->tracefunc.CLEAR(); if (this->current_greenlet) { this->current_greenlet->murder_in_place(); @@ -527,7 +573,6 @@ class ThreadState { }; ImmortalString ThreadState::get_referrers_name(nullptr); -PythonAllocator ThreadState::allocator; #ifdef Py_GIL_DISABLED std::atomic ThreadState::_clocks_used_doing_gc(0); #else diff --git a/src/greenlet/TThreadStateDestroy.cpp b/src/greenlet/TThreadStateDestroy.cpp index ae0b9ae..c8b1f80 100644 --- a/src/greenlet/TThreadStateDestroy.cpp +++ b/src/greenlet/TThreadStateDestroy.cpp @@ -183,11 +183,7 @@ struct ThreadState_DestroyNoGIL // segfault if we happen to get context switched, and maybe we should // just always implement our own AddPendingCall, but I'd like to see if // this works first -#if GREENLET_PY313 if (Py_IsFinalizing()) { -#else - if (_Py_IsFinalizing()) { -#endif #ifdef GREENLET_DEBUG // No need to log in the general case. Yes, we'll leak, // but we're shutting down so it should be ok. diff --git a/src/greenlet/greenlet.cpp b/src/greenlet/greenlet.cpp index 7722bd0..f492e24 100644 --- a/src/greenlet/greenlet.cpp +++ b/src/greenlet/greenlet.cpp @@ -294,6 +294,41 @@ greenlet_internal_mod_init() noexcept #ifdef Py_GIL_DISABLED PyUnstable_Module_SetGIL(m.borrow(), Py_MOD_GIL_NOT_USED); #endif + +#if !GREENLET_PY311 + // Register an atexit handler that sets g_greenlet_shutting_down. + // Python's atexit is LIFO: registered last = called first. By + // registering here (at import time, after most other libraries), + // our handler runs before their cleanup code, which may try to + // call greenlet.getcurrent() on objects whose type has been + // invalidated. _Py_IsFinalizing() alone is insufficient + // because it is only set AFTER call_py_exitfuncs completes. + { + PyObject* atexit_mod = PyImport_ImportModule("atexit"); + if (atexit_mod) { + PyObject* register_fn = PyObject_GetAttrString(atexit_mod, "register"); + if (register_fn) { + extern PyMethodDef _greenlet_atexit_method; + PyObject* callback = PyCFunction_New(&_greenlet_atexit_method, NULL); + if (callback) { + PyObject* args = PyTuple_Pack(1, callback); + if (args) { + PyObject* result = PyObject_Call(register_fn, args, NULL); + Py_XDECREF(result); + Py_DECREF(args); + } + Py_DECREF(callback); + } + Py_DECREF(register_fn); + } + Py_DECREF(atexit_mod); + } + // Non-fatal: if atexit registration fails, we still have + // the _Py_IsFinalizing() fallback. + PyErr_Clear(); + } +#endif + return m.borrow(); // But really it's the main reference. } catch (const LockInitError& e) { diff --git a/src/greenlet/greenlet_cpython_compat.hpp b/src/greenlet/greenlet_cpython_compat.hpp index f46d977..b4775c0 100644 --- a/src/greenlet/greenlet_cpython_compat.hpp +++ b/src/greenlet/greenlet_cpython_compat.hpp @@ -153,4 +153,12 @@ static inline void PyThreadState_LeaveTracing(PyThreadState *tstate) # define Py_C_RECURSION_LIMIT C_RECURSION_LIMIT #endif +// Py_IsFinalizing() became a public API in Python 3.13. +// Map it to the private _Py_IsFinalizing() on older versions so all +// call sites can use the standard name. Remove this once greenlet +// drops support for Python < 3.13. +#if !GREENLET_PY313 +# define Py_IsFinalizing() _Py_IsFinalizing() +#endif + #endif /* GREENLET_CPYTHON_COMPAT_H */ diff --git a/src/greenlet/tests/test_interpreter_shutdown.py b/src/greenlet/tests/test_interpreter_shutdown.py index 37afc52..a39122a 100644 --- a/src/greenlet/tests/test_interpreter_shutdown.py +++ b/src/greenlet/tests/test_interpreter_shutdown.py @@ -315,6 +315,221 @@ def worker(): self.assertEqual(rc, 0, f"Process crashed (rc={rc}):\n{stdout}{stderr}") self.assertIn("OK: greenlet with active exception at shutdown", stdout) + # ----------------------------------------------------------------- + # getcurrent() / greenlet construction / gettrace() / settrace() + # during finalization + # + # clear_deleteme_list() now uses std::swap (zero-allocation) instead + # of copying the vector, and preserves any pending exception around + # its cleanup loop. This prevents crashes during early Py_FinalizeEx + # on Python < 3.11 and avoids swallowing unrelated exceptions. + # These tests verify no crash occurs on any Python version. + # ----------------------------------------------------------------- + + def test_getcurrent_during_atexit_no_crash(self): + """ + Calling greenlet.getcurrent() inside an atexit handler must not + crash on any Python version. + """ + rc, stdout, stderr = self._run_shutdown_script("""\ + import atexit + import greenlet + + def call_getcurrent_at_exit(): + try: + g = greenlet.getcurrent() + print(f"OK: getcurrent returned {g!r}") + except Exception as e: + print(f"OK: getcurrent raised {type(e).__name__}: {e}") + + atexit.register(call_getcurrent_at_exit) + print("OK: atexit registered") + """) + self.assertEqual(rc, 0, f"Process crashed (rc={rc}):\n{stdout}{stderr}") + self.assertIn("OK: atexit registered", stdout) + self.assertIn("OK:", stdout.split('\n')[-2] if stdout.strip() else "") + + def test_gettrace_during_atexit_no_crash(self): + """ + Calling greenlet.gettrace() during atexit must not crash. + """ + rc, stdout, stderr = self._run_shutdown_script("""\ + import atexit + import greenlet + + def check_at_exit(): + try: + result = greenlet.gettrace() + print(f"OK: gettrace returned {result!r}") + except Exception as e: + print(f"OK: gettrace raised {type(e).__name__}: {e}") + + atexit.register(check_at_exit) + print("OK: registered") + """) + self.assertEqual(rc, 0, f"Process crashed (rc={rc}):\n{stdout}{stderr}") + self.assertIn("OK: registered", stdout) + + def test_settrace_during_atexit_no_crash(self): + """ + Calling greenlet.settrace() during atexit must not crash. + """ + rc, stdout, stderr = self._run_shutdown_script("""\ + import atexit + import greenlet + + def check_at_exit(): + try: + greenlet.settrace(lambda *args: None) + print("OK: settrace succeeded") + except Exception as e: + print(f"OK: settrace raised {type(e).__name__}: {e}") + + atexit.register(check_at_exit) + print("OK: registered") + """) + self.assertEqual(rc, 0, f"Process crashed (rc={rc}):\n{stdout}{stderr}") + self.assertIn("OK: registered", stdout) + + def test_getcurrent_with_active_greenlets_during_atexit(self): + """ + Calling getcurrent() during atexit when active greenlets exist. + This is the exact scenario triggered by uWSGI worker recycling. + """ + rc, stdout, stderr = self._run_shutdown_script("""\ + import atexit + import greenlet + + def worker(): + greenlet.getcurrent().parent.switch("ready") + + greenlets = [] + for i in range(5): + g = greenlet.greenlet(worker) + result = g.switch() + greenlets.append(g) + + def check_at_exit(): + try: + g = greenlet.getcurrent() + print(f"OK: getcurrent returned {g!r}") + except Exception as e: + print(f"OK: getcurrent raised {type(e).__name__}: {e}") + + atexit.register(check_at_exit) + print(f"OK: {len(greenlets)} active greenlets, atexit registered") + """) + self.assertEqual(rc, 0, f"Process crashed (rc={rc}):\n{stdout}{stderr}") + self.assertIn("OK: 5 active greenlets, atexit registered", stdout) + + def test_greenlet_construction_during_atexit_no_crash(self): + """ + Constructing a new greenlet during atexit must not crash. + greenlet.__init__ calls borrow_current() which triggers + clear_deleteme_list() — the same path that crashes in + mod_getcurrent on Python < 3.11 during early Py_FinalizeEx. + """ + rc, stdout, stderr = self._run_shutdown_script("""\ + import atexit + import greenlet + + def create_greenlets_at_exit(): + try: + def noop(): + pass + g = greenlet.greenlet(noop) + print(f"OK: created greenlet {g!r}") + except Exception as e: + print(f"OK: construction raised {type(e).__name__}: {e}") + + atexit.register(create_greenlets_at_exit) + print("OK: atexit registered") + """) + self.assertEqual(rc, 0, f"Process crashed (rc={rc}):\n{stdout}{stderr}") + self.assertIn("OK: atexit registered", stdout) + + def test_greenlet_construction_with_active_greenlets_during_atexit(self): + """ + Constructing new greenlets during atexit when other active + greenlets already exist (maximizes the chance of a non-empty + deleteme list). + """ + rc, stdout, stderr = self._run_shutdown_script("""\ + import atexit + import greenlet + + def worker(): + greenlet.getcurrent().parent.switch("ready") + + greenlets = [] + for i in range(10): + g = greenlet.greenlet(worker) + g.switch() + greenlets.append(g) + + def create_at_exit(): + try: + new_greenlets = [] + for i in range(5): + g = greenlet.greenlet(lambda: None) + new_greenlets.append(g) + print(f"OK: created {len(new_greenlets)} greenlets at exit") + except Exception as e: + print(f"OK: raised {type(e).__name__}: {e}") + + atexit.register(create_at_exit) + print(f"OK: {len(greenlets)} active greenlets, atexit registered") + """) + self.assertEqual(rc, 0, f"Process crashed (rc={rc}):\n{stdout}{stderr}") + self.assertIn("OK: 10 active greenlets, atexit registered", stdout) + + def test_greenlet_construction_with_cross_thread_deleteme_during_atexit(self): + """ + Create greenlets in a worker thread, transfer them to the main + thread, then drop them — populating the deleteme list. Then + construct a new greenlet during atexit. On Python < 3.11 + clear_deleteme_list() could previously crash if the + PythonAllocator vector copy failed during early Py_FinalizeEx; + using std::swap eliminates that allocation. + """ + rc, stdout, stderr = self._run_shutdown_script("""\ + import atexit + import greenlet + import threading + + cross_thread_refs = [] + + def thread_worker(): + # Create greenlets in this thread + def gl_body(): + greenlet.getcurrent().parent.switch("ready") + for _ in range(20): + g = greenlet.greenlet(gl_body) + g.switch() + cross_thread_refs.append(g) + + t = threading.Thread(target=thread_worker) + t.start() + t.join() + + # Dropping these references in the main thread + # causes them to be added to the main thread's + # deleteme list (deferred cross-thread dealloc). + cross_thread_refs.clear() + + def create_at_exit(): + try: + g = greenlet.greenlet(lambda: None) + print(f"OK: created greenlet at exit {g!r}") + except Exception as e: + print(f"OK: raised {type(e).__name__}: {e}") + + atexit.register(create_at_exit) + print("OK: cross-thread setup done, atexit registered") + """) + self.assertEqual(rc, 0, f"Process crashed (rc={rc}):\n{stdout}{stderr}") + self.assertIn("OK: cross-thread setup done, atexit registered", stdout) + if __name__ == '__main__': unittest.main() diff --git a/src/greenlet/tests/test_leaks.py b/src/greenlet/tests/test_leaks.py index 6d7fed7..38a9efc 100644 --- a/src/greenlet/tests/test_leaks.py +++ b/src/greenlet/tests/test_leaks.py @@ -445,7 +445,15 @@ def __call__(self): self.wait_for_pending_cleanups() uss_after = self.get_process_uss() - self.assertLessEqual(uss_after, uss_before, "after attempts %d" % (count,)) + # On Windows, USS can fluctuate by tens of KB between + # measurements due to working set trimming, page table + # updates, etc. Allow a small tolerance so OS-level noise + # doesn't cause false failures. Real leaks produce MBs of + # growth (each iteration creates 20k greenlets), so 512 KB + # is well below the detection threshold for genuine issues. + tolerance = 512 * 1024 if WIN else 0 + self.assertLessEqual(uss_after, uss_before + tolerance, + "after attempts %d" % (count,)) @ignores_leakcheck # Because we're just trying to track raw memory, not objects, and running