Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions Lib/test/test_capi/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2665,6 +2665,35 @@ class Test_testinternalcapi(unittest.TestCase):
locals().update(get_test_funcs(_testinternalcapi,
exclude_prefix='test_lock_'))

@support.skip_emscripten_stack_overflow()
@support.skip_wasi_stack_overflow()
def test_datastack_caches_multiple_chunks(self):
code = textwrap.dedent("""
import struct
import sys
import _testinternalcapi

def recurse(n):
_a=_b=_c=_d=_e=_f=_g=_h=_i=_j=None
_k=_l=_m=_n=_o=_p=_q=_r=_s=_t=None
if n:
recurse(n - 1)

words_per_chunk = 16 * 1024 // struct.calcsize("P")
frame_words = _testinternalcapi.get_co_framesize(recurse.__code__)
depth = max(64, 12 * words_per_chunk // frame_words + 32)
sys.setrecursionlimit(depth + 100)

recurse(depth)

count, total_size = _testinternalcapi.get_datastack_cache_stats()
assert count >= 2, (count, total_size, frame_words, depth)
assert total_size <= 8 * 16 * 1024, (
count, total_size, frame_words, depth
)
""")
assert_python_ok("-c", code)


@threading_helper.requires_working_threading()
class Test_PyLock(unittest.TestCase):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Cache multiple popped Python stack chunks per thread, within a small fixed
memory budget, to avoid allocator thrashing when repeatedly crossing more than
one stack chunk boundary.
23 changes: 23 additions & 0 deletions Modules/_testinternalcapi.c
Original file line number Diff line number Diff line change
Expand Up @@ -1337,6 +1337,28 @@ get_co_framesize(PyObject *self, PyObject *arg)
return PyLong_FromLong(code->co_framesize);
}

static PyObject *
get_datastack_cache_stats(PyObject *self, PyObject *Py_UNUSED(ignored))
{
PyThreadState *tstate = _PyThreadState_GET();
Py_ssize_t count = 0;
size_t total_size = 0;
for (_PyStackChunk *chunk = tstate->datastack_cached_chunk;
chunk != NULL;
chunk = chunk->previous)
{
count++;
total_size += chunk->size;
}

PyObject *size = PyLong_FromSize_t(total_size);
if (size == NULL) {
return NULL;
}
PyObject *res = Py_BuildValue("nN", count, size);
return res;
}

static PyObject *
get_co_localskinds(PyObject *self, PyObject *arg)
{
Expand Down Expand Up @@ -2938,6 +2960,7 @@ static PyMethodDef module_functions[] = {
{"iframe_getlasti", iframe_getlasti, METH_O, NULL},
{"code_returns_only_none", code_returns_only_none, METH_O, NULL},
{"get_co_framesize", get_co_framesize, METH_O, NULL},
{"get_datastack_cache_stats", get_datastack_cache_stats, METH_NOARGS, NULL},
{"get_co_localskinds", get_co_localskinds, METH_O, NULL},
{"get_code_var_counts", _PyCFunction_CAST(get_code_var_counts),
METH_VARARGS | METH_KEYWORDS, NULL},
Expand Down
74 changes: 59 additions & 15 deletions Python/pystate.c
Original file line number Diff line number Diff line change
Expand Up @@ -1726,10 +1726,12 @@ clear_datastack(PyThreadState *tstate)
_PyObject_VirtualFree(chunk, chunk->size);
chunk = prev;
}
if (tstate->datastack_cached_chunk != NULL) {
_PyObject_VirtualFree(tstate->datastack_cached_chunk,
tstate->datastack_cached_chunk->size);
tstate->datastack_cached_chunk = NULL;
chunk = tstate->datastack_cached_chunk;
tstate->datastack_cached_chunk = NULL;
while (chunk != NULL) {
_PyStackChunk *prev = chunk->previous;
_PyObject_VirtualFree(chunk, chunk->size);
chunk = prev;
}
}

Expand Down Expand Up @@ -3077,6 +3079,56 @@ _PyInterpreterState_HasFeature(PyInterpreterState *interp, unsigned long feature


#define MINIMUM_OVERHEAD 1000
#define DATA_STACK_CACHE_MAX_SIZE (8 * _PY_DATA_STACK_CHUNK_SIZE)

static _PyStackChunk *
pop_cached_datastack_chunk(PyThreadState *tstate, int allocate_size)
{
_PyStackChunk **best_link = NULL;
size_t best_size = (size_t)-1;

for (_PyStackChunk **link = &tstate->datastack_cached_chunk;
*link != NULL;
link = &(*link)->previous)
{
_PyStackChunk *chunk = *link;
if ((size_t)allocate_size <= chunk->size && chunk->size < best_size) {
best_link = link;
best_size = chunk->size;
}
}
if (best_link == NULL) {
return NULL;
}

_PyStackChunk *chunk = *best_link;
*best_link = chunk->previous;
chunk->previous = NULL;
chunk->top = 0;
return chunk;
}

static void
cache_or_free_datastack_chunk(PyThreadState *tstate, _PyStackChunk *chunk)
{
assert(chunk->previous == NULL);

size_t cached_size = chunk->size;
for (_PyStackChunk *cached = tstate->datastack_cached_chunk;
cached != NULL;
cached = cached->previous)
{
cached_size += cached->size;
if (cached_size > DATA_STACK_CACHE_MAX_SIZE) {
_PyObject_VirtualFree(chunk, chunk->size);
return;
}
}

chunk->top = 0;
chunk->previous = tstate->datastack_cached_chunk;
tstate->datastack_cached_chunk = chunk;
}

static PyObject **
push_chunk(PyThreadState *tstate, int size)
Expand All @@ -3086,13 +3138,9 @@ push_chunk(PyThreadState *tstate, int size)
allocate_size *= 2;
}
_PyStackChunk *new;
if (tstate->datastack_cached_chunk != NULL
&& (size_t)allocate_size <= tstate->datastack_cached_chunk->size)
{
new = tstate->datastack_cached_chunk;
tstate->datastack_cached_chunk = NULL;
new = pop_cached_datastack_chunk(tstate, allocate_size);
if (new != NULL) {
new->previous = tstate->datastack_chunk;
new->top = 0;
}
else {
new = allocate_chunk(allocate_size, tstate->datastack_chunk);
Expand Down Expand Up @@ -3134,17 +3182,13 @@ _PyThreadState_PopFrame(PyThreadState *tstate, _PyInterpreterFrame * frame)
if (base == &tstate->datastack_chunk->data[0]) {
_PyStackChunk *chunk = tstate->datastack_chunk;
_PyStackChunk *previous = chunk->previous;
_PyStackChunk *cached = tstate->datastack_cached_chunk;
// push_chunk ensures that the root chunk is never popped:
assert(previous);
tstate->datastack_top = &previous->data[previous->top];
tstate->datastack_chunk = previous;
tstate->datastack_limit = (PyObject **)(((char *)previous) + previous->size);
chunk->previous = NULL;
if (cached != NULL) {
_PyObject_VirtualFree(cached, cached->size);
}
tstate->datastack_cached_chunk = chunk;
cache_or_free_datastack_chunk(tstate, chunk);
}
else {
assert(tstate->datastack_top);
Expand Down
Loading