diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py index 4c16bbd4cb0acf..14bc05773d7935 100644 --- a/Lib/test/test_capi/test_misc.py +++ b/Lib/test/test_capi/test_misc.py @@ -2665,6 +2665,35 @@ class Test_testinternalcapi(unittest.TestCase): locals().update(get_test_funcs(_testinternalcapi, exclude_prefix='test_lock_')) + @support.skip_emscripten_stack_overflow() + @support.skip_wasi_stack_overflow() + def test_datastack_caches_multiple_chunks(self): + code = textwrap.dedent(""" + import struct + import sys + import _testinternalcapi + + def recurse(n): + _a=_b=_c=_d=_e=_f=_g=_h=_i=_j=None + _k=_l=_m=_n=_o=_p=_q=_r=_s=_t=None + if n: + recurse(n - 1) + + words_per_chunk = 16 * 1024 // struct.calcsize("P") + frame_words = _testinternalcapi.get_co_framesize(recurse.__code__) + depth = max(64, 12 * words_per_chunk // frame_words + 32) + sys.setrecursionlimit(depth + 100) + + recurse(depth) + + count, total_size = _testinternalcapi.get_datastack_cache_stats() + assert count >= 2, (count, total_size, frame_words, depth) + assert total_size <= 8 * 16 * 1024, ( + count, total_size, frame_words, depth + ) + """) + assert_python_ok("-c", code) + @threading_helper.requires_working_threading() class Test_PyLock(unittest.TestCase): diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-04-28-12-30-00.gh-issue-142183.bounded-datastack-cache.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-04-28-12-30-00.gh-issue-142183.bounded-datastack-cache.rst new file mode 100644 index 00000000000000..75a2255610610f --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-04-28-12-30-00.gh-issue-142183.bounded-datastack-cache.rst @@ -0,0 +1,3 @@ +Cache multiple popped Python stack chunks per thread, within a small fixed +memory budget, to avoid allocator thrashing when repeatedly crossing more than +one stack chunk boundary. diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 619f9f50574429..bf252cc0b41766 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -1337,6 +1337,28 @@ get_co_framesize(PyObject *self, PyObject *arg) return PyLong_FromLong(code->co_framesize); } +static PyObject * +get_datastack_cache_stats(PyObject *self, PyObject *Py_UNUSED(ignored)) +{ + PyThreadState *tstate = _PyThreadState_GET(); + Py_ssize_t count = 0; + size_t total_size = 0; + for (_PyStackChunk *chunk = tstate->datastack_cached_chunk; + chunk != NULL; + chunk = chunk->previous) + { + count++; + total_size += chunk->size; + } + + PyObject *size = PyLong_FromSize_t(total_size); + if (size == NULL) { + return NULL; + } + PyObject *res = Py_BuildValue("nN", count, size); + return res; +} + static PyObject * get_co_localskinds(PyObject *self, PyObject *arg) { @@ -2938,6 +2960,7 @@ static PyMethodDef module_functions[] = { {"iframe_getlasti", iframe_getlasti, METH_O, NULL}, {"code_returns_only_none", code_returns_only_none, METH_O, NULL}, {"get_co_framesize", get_co_framesize, METH_O, NULL}, + {"get_datastack_cache_stats", get_datastack_cache_stats, METH_NOARGS, NULL}, {"get_co_localskinds", get_co_localskinds, METH_O, NULL}, {"get_code_var_counts", _PyCFunction_CAST(get_code_var_counts), METH_VARARGS | METH_KEYWORDS, NULL}, diff --git a/Python/pystate.c b/Python/pystate.c index 2df24597e65785..73ac56d9f9200e 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -1726,10 +1726,12 @@ clear_datastack(PyThreadState *tstate) _PyObject_VirtualFree(chunk, chunk->size); chunk = prev; } - if (tstate->datastack_cached_chunk != NULL) { - _PyObject_VirtualFree(tstate->datastack_cached_chunk, - tstate->datastack_cached_chunk->size); - tstate->datastack_cached_chunk = NULL; + chunk = tstate->datastack_cached_chunk; + tstate->datastack_cached_chunk = NULL; + while (chunk != NULL) { + _PyStackChunk *prev = chunk->previous; + _PyObject_VirtualFree(chunk, chunk->size); + chunk = prev; } } @@ -3077,6 +3079,56 @@ _PyInterpreterState_HasFeature(PyInterpreterState *interp, unsigned long feature #define MINIMUM_OVERHEAD 1000 +#define DATA_STACK_CACHE_MAX_SIZE (8 * _PY_DATA_STACK_CHUNK_SIZE) + +static _PyStackChunk * +pop_cached_datastack_chunk(PyThreadState *tstate, int allocate_size) +{ + _PyStackChunk **best_link = NULL; + size_t best_size = (size_t)-1; + + for (_PyStackChunk **link = &tstate->datastack_cached_chunk; + *link != NULL; + link = &(*link)->previous) + { + _PyStackChunk *chunk = *link; + if ((size_t)allocate_size <= chunk->size && chunk->size < best_size) { + best_link = link; + best_size = chunk->size; + } + } + if (best_link == NULL) { + return NULL; + } + + _PyStackChunk *chunk = *best_link; + *best_link = chunk->previous; + chunk->previous = NULL; + chunk->top = 0; + return chunk; +} + +static void +cache_or_free_datastack_chunk(PyThreadState *tstate, _PyStackChunk *chunk) +{ + assert(chunk->previous == NULL); + + size_t cached_size = chunk->size; + for (_PyStackChunk *cached = tstate->datastack_cached_chunk; + cached != NULL; + cached = cached->previous) + { + cached_size += cached->size; + if (cached_size > DATA_STACK_CACHE_MAX_SIZE) { + _PyObject_VirtualFree(chunk, chunk->size); + return; + } + } + + chunk->top = 0; + chunk->previous = tstate->datastack_cached_chunk; + tstate->datastack_cached_chunk = chunk; +} static PyObject ** push_chunk(PyThreadState *tstate, int size) @@ -3086,13 +3138,9 @@ push_chunk(PyThreadState *tstate, int size) allocate_size *= 2; } _PyStackChunk *new; - if (tstate->datastack_cached_chunk != NULL - && (size_t)allocate_size <= tstate->datastack_cached_chunk->size) - { - new = tstate->datastack_cached_chunk; - tstate->datastack_cached_chunk = NULL; + new = pop_cached_datastack_chunk(tstate, allocate_size); + if (new != NULL) { new->previous = tstate->datastack_chunk; - new->top = 0; } else { new = allocate_chunk(allocate_size, tstate->datastack_chunk); @@ -3134,17 +3182,13 @@ _PyThreadState_PopFrame(PyThreadState *tstate, _PyInterpreterFrame * frame) if (base == &tstate->datastack_chunk->data[0]) { _PyStackChunk *chunk = tstate->datastack_chunk; _PyStackChunk *previous = chunk->previous; - _PyStackChunk *cached = tstate->datastack_cached_chunk; // push_chunk ensures that the root chunk is never popped: assert(previous); tstate->datastack_top = &previous->data[previous->top]; tstate->datastack_chunk = previous; tstate->datastack_limit = (PyObject **)(((char *)previous) + previous->size); chunk->previous = NULL; - if (cached != NULL) { - _PyObject_VirtualFree(cached, cached->size); - } - tstate->datastack_cached_chunk = chunk; + cache_or_free_datastack_chunk(tstate, chunk); } else { assert(tstate->datastack_top);