Skip to content

Commit cfaca85

Browse files
committed
gh-142183: Change data stack to use a resizable array
1 parent 44f1b98 commit cfaca85

File tree

8 files changed

+152
-129
lines changed

8 files changed

+152
-129
lines changed

Include/cpython/pystate.h

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -55,14 +55,14 @@ typedef struct _err_stackitem {
5555
} _PyErr_StackItem;
5656

5757
typedef struct _stack_chunk {
58-
struct _stack_chunk *previous;
5958
size_t size;
60-
size_t top;
61-
PyObject * data[1]; /* Variable sized */
59+
struct _stack_chunk *previous;
60+
PyObject *data[1]; /* Variable sized */
6261
} _PyStackChunk;
6362

64-
/* Minimum size of data stack chunk */
65-
#define _PY_DATA_STACK_CHUNK_SIZE (16*1024)
63+
#define _PY_STACK_CHUNK_MIN_SIZE 4096
64+
#define _PY_STACK_CHUNK_OVERHEADS (offsetof(_PyStackChunk, data))
65+
6666
struct _ts {
6767
/* See Python/ceval.c for comments explaining most fields */
6868

@@ -195,10 +195,9 @@ struct _ts {
195195
/* Unique thread state id. */
196196
uint64_t id;
197197

198-
_PyStackChunk *datastack_chunk;
199-
PyObject **datastack_top;
200-
PyObject **datastack_limit;
201-
_PyStackChunk *datastack_cached_chunk;
198+
_PyStackChunk *stack_chunk_list;
199+
PyObject **stack_top;
200+
PyObject **stack_limit;
202201
/* XXX signal handlers should also be here */
203202

204203
/* The following fields are here to avoid allocation during init.

Include/internal/pycore_debug_offsets.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ typedef struct _Py_DebugOffsets {
106106
uint64_t last_profiled_frame;
107107
uint64_t thread_id;
108108
uint64_t native_thread_id;
109-
uint64_t datastack_chunk;
109+
uint64_t stack_chunk_list;
110110
uint64_t status;
111111
uint64_t holds_gil;
112112
uint64_t gil_requested;
@@ -287,7 +287,7 @@ typedef struct _Py_DebugOffsets {
287287
.last_profiled_frame = offsetof(PyThreadState, last_profiled_frame), \
288288
.thread_id = offsetof(PyThreadState, thread_id), \
289289
.native_thread_id = offsetof(PyThreadState, native_thread_id), \
290-
.datastack_chunk = offsetof(PyThreadState, datastack_chunk), \
290+
.stack_chunk_list = offsetof(PyThreadState, stack_chunk_list), \
291291
.status = offsetof(PyThreadState, _status), \
292292
.holds_gil = offsetof(PyThreadState, holds_gil), \
293293
.gil_requested = offsetof(PyThreadState, gil_requested), \

Include/internal/pycore_interpframe.h

Lines changed: 58 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -330,12 +330,12 @@ static inline bool
330330
_PyThreadState_HasStackSpace(PyThreadState *tstate, int size)
331331
{
332332
assert(
333-
(tstate->datastack_top == NULL && tstate->datastack_limit == NULL)
333+
(tstate->stack_top == NULL && tstate->stack_limit == NULL)
334334
||
335-
(tstate->datastack_top != NULL && tstate->datastack_limit != NULL)
335+
(tstate->stack_top != NULL && tstate->stack_limit != NULL)
336336
);
337-
return tstate->datastack_top != NULL &&
338-
size < tstate->datastack_limit - tstate->datastack_top;
337+
return tstate->stack_top != NULL &&
338+
size < tstate->stack_limit - tstate->stack_top;
339339
}
340340

341341
extern _PyInterpreterFrame *
@@ -352,9 +352,9 @@ _PyFrame_PushUnchecked(PyThreadState *tstate, _PyStackRef func, int null_locals_
352352
CALL_STAT_INC(frames_pushed);
353353
PyFunctionObject *func_obj = (PyFunctionObject *)PyStackRef_AsPyObjectBorrow(func);
354354
PyCodeObject *code = (PyCodeObject *)func_obj->func_code;
355-
_PyInterpreterFrame *new_frame = (_PyInterpreterFrame *)tstate->datastack_top;
356-
tstate->datastack_top += code->co_framesize;
357-
assert(tstate->datastack_top < tstate->datastack_limit);
355+
_PyInterpreterFrame *new_frame = (_PyInterpreterFrame *)tstate->stack_top;
356+
tstate->stack_top += code->co_framesize;
357+
assert(tstate->stack_top < tstate->stack_limit);
358358
_PyFrame_Initialize(tstate, new_frame, func, NULL, code, null_locals_from,
359359
previous);
360360
return new_frame;
@@ -366,9 +366,9 @@ static inline _PyInterpreterFrame *
366366
_PyFrame_PushTrampolineUnchecked(PyThreadState *tstate, PyCodeObject *code, int stackdepth, _PyInterpreterFrame * previous)
367367
{
368368
CALL_STAT_INC(frames_pushed);
369-
_PyInterpreterFrame *frame = (_PyInterpreterFrame *)tstate->datastack_top;
370-
tstate->datastack_top += code->co_framesize;
371-
assert(tstate->datastack_top < tstate->datastack_limit);
369+
_PyInterpreterFrame *frame = (_PyInterpreterFrame *)tstate->stack_top;
370+
tstate->stack_top += code->co_framesize;
371+
assert(tstate->stack_top < tstate->stack_limit);
372372
frame->previous = previous;
373373
frame->f_funcobj = PyStackRef_None;
374374
frame->f_executable = PyStackRef_FromPyObjectNew(code);
@@ -404,6 +404,54 @@ PyAPI_FUNC(_PyInterpreterFrame *)
404404
_PyEvalFramePushAndInit_Ex(PyThreadState *tstate, _PyStackRef func,
405405
PyObject *locals, Py_ssize_t nargs, PyObject *callargs, PyObject *kwargs, _PyInterpreterFrame *previous);
406406

407+
static inline bool
408+
ptr_in_chunk(const char *ptr, const _PyStackChunk *chunk)
409+
{
410+
assert(chunk != NULL);
411+
const char *start = (char *)&chunk->data[0];
412+
const intptr_t offset = ptr - start;
413+
const intptr_t usable_size = (intptr_t)(chunk->size - _PY_STACK_CHUNK_OVERHEADS);
414+
return offset >= 0 && offset < usable_size && start + offset == ptr;
415+
}
416+
417+
static inline uintptr_t
418+
get_offset_in_chunk(const char *ptr, const _PyStackChunk *chunk)
419+
{
420+
assert(chunk != NULL);
421+
assert(chunk->data != NULL);
422+
assert(ptr_in_chunk(ptr, chunk));
423+
424+
return ptr - (char *)chunk;
425+
}
426+
427+
static inline uintptr_t
428+
get_offset_in_chunk_list(char *base, _PyStackChunk *stack_chunk_list)
429+
{
430+
assert(stack_chunk_list != NULL);
431+
assert(base != NULL);
432+
_PyStackChunk *chunk = stack_chunk_list;
433+
do {
434+
if (ptr_in_chunk(base, chunk)) {
435+
return get_offset_in_chunk(base, chunk);
436+
}
437+
chunk = chunk->previous;
438+
} while (chunk);
439+
assert(false); // did not find correct chunk
440+
Py_UNREACHABLE();
441+
}
442+
443+
static inline void *
444+
_Py_ensure_frame_in_current_stack_chunk(PyThreadState *tstate, char *frame)
445+
{
446+
assert(tstate != NULL);
447+
assert(frame != NULL);
448+
if (ptr_in_chunk(frame, tstate->stack_chunk_list)) {
449+
return frame;
450+
}
451+
uintptr_t offset = get_offset_in_chunk_list(frame, tstate->stack_chunk_list->previous);
452+
return ((char *)tstate->stack_chunk_list) + offset;
453+
}
454+
407455
#ifdef __cplusplus
408456
}
409457
#endif

Include/internal/pycore_runtime_init.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,9 @@ extern PyTypeObject _PyExc_MemoryError;
179179
._whence = _PyThreadState_WHENCE_NOTSET, \
180180
.py_recursion_limit = Py_DEFAULT_RECURSION_LIMIT, \
181181
.context_ver = 1, \
182+
.stack_chunk_list = NULL, \
183+
.stack_limit = NULL, \
184+
.stack_top = NULL, \
182185
}
183186

184187

Modules/_remote_debugging/debug_offsets_validation.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,7 @@ validate_fixed_field(
252252
#define PY_REMOTE_DEBUG_THREAD_STATE_FIELDS(APPLY, buffer_size) \
253253
APPLY(thread_state, native_thread_id, sizeof(unsigned long), _Alignof(long), buffer_size); \
254254
APPLY(thread_state, interp, sizeof(uintptr_t), _Alignof(uintptr_t), buffer_size); \
255-
APPLY(thread_state, datastack_chunk, sizeof(uintptr_t), _Alignof(uintptr_t), buffer_size); \
255+
APPLY(thread_state, stack_chunk_list, sizeof(uintptr_t), _Alignof(uintptr_t), buffer_size); \
256256
APPLY(thread_state, status, FIELD_SIZE(PyThreadState, _status), _Alignof(unsigned int), buffer_size); \
257257
APPLY(thread_state, holds_gil, sizeof(int), _Alignof(int), buffer_size); \
258258
APPLY(thread_state, gil_requested, sizeof(int), _Alignof(int), buffer_size); \

Modules/_remote_debugging/frames.c

Lines changed: 13 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ process_single_stack_chunk(
2727
StackChunkInfo *chunk_info
2828
) {
2929
// Start with default size assumption
30-
size_t current_size = _PY_DATA_STACK_CHUNK_SIZE;
30+
size_t current_size = _PY_STACK_CHUNK_MIN_SIZE;
3131

3232
char *this_chunk = PyMem_RawMalloc(current_size);
3333
if (!this_chunk) {
@@ -87,45 +87,32 @@ copy_stack_chunks(RemoteUnwinderObject *unwinder,
8787
size_t count = 0;
8888
size_t max_chunks = 16;
8989

90-
if (read_ptr(unwinder, tstate_addr + (uintptr_t)unwinder->debug_offsets.thread_state.datastack_chunk, &chunk_addr)) {
90+
if (read_ptr(unwinder, tstate_addr + (uintptr_t)unwinder->debug_offsets.thread_state.stack_chunk_list, &chunk_addr)) {
9191
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read initial stack chunk address");
9292
return -1;
9393
}
9494

95+
if (!chunk_addr) {
96+
out_chunks->chunks = NULL;
97+
out_chunks->count = 0;
98+
return 0;
99+
}
100+
95101
chunks = PyMem_RawMalloc(max_chunks * sizeof(StackChunkInfo));
96102
if (!chunks) {
97103
PyErr_NoMemory();
98104
set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate stack chunks array");
99105
return -1;
100106
}
101107

102-
const size_t MAX_STACK_CHUNKS = 4096;
103-
while (chunk_addr != 0 && count < MAX_STACK_CHUNKS) {
104-
// Grow array if needed
105-
if (count >= max_chunks) {
106-
max_chunks *= 2;
107-
StackChunkInfo *new_chunks = PyMem_RawRealloc(chunks, max_chunks * sizeof(StackChunkInfo));
108-
if (!new_chunks) {
109-
PyErr_NoMemory();
110-
set_exception_cause(unwinder, PyExc_MemoryError, "Failed to grow stack chunks array");
111-
goto error;
112-
}
113-
chunks = new_chunks;
114-
}
115-
116-
// Process this chunk
117-
if (process_single_stack_chunk(unwinder, chunk_addr, &chunks[count]) < 0) {
118-
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to process stack chunk");
119-
goto error;
120-
}
121-
122-
// Get next chunk address and increment count
123-
chunk_addr = GET_MEMBER(uintptr_t, chunks[count].local_copy, offsetof(_PyStackChunk, previous));
124-
count++;
108+
// Process this chunk
109+
if (process_single_stack_chunk(unwinder, chunk_addr, &chunks[count]) < 0) {
110+
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to process stack chunk");
111+
goto error;
125112
}
126113

127114
out_chunks->chunks = chunks;
128-
out_chunks->count = count;
115+
out_chunks->count = 1;
129116
return 0;
130117

131118
error:

Python/ceval.c

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1925,8 +1925,13 @@ clear_thread_frame(PyThreadState *tstate, _PyInterpreterFrame * frame)
19251925
assert(frame->owner == FRAME_OWNED_BY_THREAD);
19261926
// Make sure that this is, indeed, the top frame. We can't check this in
19271927
// _PyThreadState_PopFrame, since f_code is already cleared at that point:
1928-
assert((PyObject **)frame + _PyFrame_GetCode(frame)->co_framesize ==
1929-
tstate->datastack_top);
1928+
assert(
1929+
_Py_ensure_frame_in_current_stack_chunk( // the frame might be in a previous stack chunk
1930+
tstate,
1931+
(char *)((PyObject **)frame + _PyFrame_GetCode(frame)->co_framesize)
1932+
)
1933+
== tstate->stack_top
1934+
);
19301935
assert(frame->frame_obj == NULL || frame->frame_obj->f_frame == frame);
19311936
_PyFrame_ClearExceptCode(frame);
19321937
PyStackRef_CLEAR(frame->f_executable);

0 commit comments

Comments
 (0)