Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 8 additions & 9 deletions Include/cpython/pystate.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,14 +55,14 @@ typedef struct _err_stackitem {
} _PyErr_StackItem;

typedef struct _stack_chunk {
struct _stack_chunk *previous;
size_t size;
size_t top;
PyObject * data[1]; /* Variable sized */
struct _stack_chunk *previous;
PyObject *data[1]; /* Variable sized */
} _PyStackChunk;

/* Minimum size of data stack chunk */
#define _PY_DATA_STACK_CHUNK_SIZE (16*1024)
#define _PY_STACK_CHUNK_MIN_SIZE 4096
#define _PY_STACK_CHUNK_OVERHEADS (offsetof(_PyStackChunk, data))

struct _ts {
/* See Python/ceval.c for comments explaining most fields */

Expand Down Expand Up @@ -195,10 +195,9 @@ struct _ts {
/* Unique thread state id. */
uint64_t id;

_PyStackChunk *datastack_chunk;
PyObject **datastack_top;
PyObject **datastack_limit;
_PyStackChunk *datastack_cached_chunk;
_PyStackChunk *stack_chunk_list;
PyObject **stack_top;
PyObject **stack_limit;
/* XXX signal handlers should also be here */

/* The following fields are here to avoid allocation during init.
Expand Down
4 changes: 2 additions & 2 deletions Include/internal/pycore_debug_offsets.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ typedef struct _Py_DebugOffsets {
uint64_t last_profiled_frame;
uint64_t thread_id;
uint64_t native_thread_id;
uint64_t datastack_chunk;
uint64_t stack_chunk_list;
uint64_t status;
uint64_t holds_gil;
uint64_t gil_requested;
Expand Down Expand Up @@ -287,7 +287,7 @@ typedef struct _Py_DebugOffsets {
.last_profiled_frame = offsetof(PyThreadState, last_profiled_frame), \
.thread_id = offsetof(PyThreadState, thread_id), \
.native_thread_id = offsetof(PyThreadState, native_thread_id), \
.datastack_chunk = offsetof(PyThreadState, datastack_chunk), \
.stack_chunk_list = offsetof(PyThreadState, stack_chunk_list), \
.status = offsetof(PyThreadState, _status), \
.holds_gil = offsetof(PyThreadState, holds_gil), \
.gil_requested = offsetof(PyThreadState, gil_requested), \
Expand Down
68 changes: 58 additions & 10 deletions Include/internal/pycore_interpframe.h
Original file line number Diff line number Diff line change
Expand Up @@ -330,12 +330,12 @@ static inline bool
_PyThreadState_HasStackSpace(PyThreadState *tstate, int size)
{
assert(
(tstate->datastack_top == NULL && tstate->datastack_limit == NULL)
(tstate->stack_top == NULL && tstate->stack_limit == NULL)
||
(tstate->datastack_top != NULL && tstate->datastack_limit != NULL)
(tstate->stack_top != NULL && tstate->stack_limit != NULL)
);
return tstate->datastack_top != NULL &&
size < tstate->datastack_limit - tstate->datastack_top;
return tstate->stack_top != NULL &&
size < tstate->stack_limit - tstate->stack_top;
}

extern _PyInterpreterFrame *
Expand All @@ -352,9 +352,9 @@ _PyFrame_PushUnchecked(PyThreadState *tstate, _PyStackRef func, int null_locals_
CALL_STAT_INC(frames_pushed);
PyFunctionObject *func_obj = (PyFunctionObject *)PyStackRef_AsPyObjectBorrow(func);
PyCodeObject *code = (PyCodeObject *)func_obj->func_code;
_PyInterpreterFrame *new_frame = (_PyInterpreterFrame *)tstate->datastack_top;
tstate->datastack_top += code->co_framesize;
assert(tstate->datastack_top < tstate->datastack_limit);
_PyInterpreterFrame *new_frame = (_PyInterpreterFrame *)tstate->stack_top;
tstate->stack_top += code->co_framesize;
assert(tstate->stack_top < tstate->stack_limit);
_PyFrame_Initialize(tstate, new_frame, func, NULL, code, null_locals_from,
previous);
return new_frame;
Expand All @@ -366,9 +366,9 @@ static inline _PyInterpreterFrame *
_PyFrame_PushTrampolineUnchecked(PyThreadState *tstate, PyCodeObject *code, int stackdepth, _PyInterpreterFrame * previous)
{
CALL_STAT_INC(frames_pushed);
_PyInterpreterFrame *frame = (_PyInterpreterFrame *)tstate->datastack_top;
tstate->datastack_top += code->co_framesize;
assert(tstate->datastack_top < tstate->datastack_limit);
_PyInterpreterFrame *frame = (_PyInterpreterFrame *)tstate->stack_top;
tstate->stack_top += code->co_framesize;
assert(tstate->stack_top < tstate->stack_limit);
frame->previous = previous;
frame->f_funcobj = PyStackRef_None;
frame->f_executable = PyStackRef_FromPyObjectNew(code);
Expand Down Expand Up @@ -404,6 +404,54 @@ PyAPI_FUNC(_PyInterpreterFrame *)
_PyEvalFramePushAndInit_Ex(PyThreadState *tstate, _PyStackRef func,
PyObject *locals, Py_ssize_t nargs, PyObject *callargs, PyObject *kwargs, _PyInterpreterFrame *previous);

static inline bool
ptr_in_chunk(const char *ptr, const _PyStackChunk *chunk)
{
assert(chunk != NULL);
const char *start = (char *)&chunk->data[0];
const intptr_t offset = ptr - start;
const intptr_t usable_size = (intptr_t)(chunk->size - _PY_STACK_CHUNK_OVERHEADS);
return offset >= 0 && offset < usable_size && start + offset == ptr;
}

static inline uintptr_t
get_offset_in_chunk(const char *ptr, const _PyStackChunk *chunk)
{
assert(chunk != NULL);
assert(chunk->data != NULL);
assert(ptr_in_chunk(ptr, chunk));

return ptr - (char *)chunk;
}

static inline uintptr_t
get_offset_in_chunk_list(char *base, _PyStackChunk *stack_chunk_list)
{
assert(stack_chunk_list != NULL);
assert(base != NULL);
_PyStackChunk *chunk = stack_chunk_list;
do {
if (ptr_in_chunk(base, chunk)) {
return get_offset_in_chunk(base, chunk);
}
chunk = chunk->previous;
} while (chunk);
assert(false); // did not find correct chunk
Py_UNREACHABLE();
}

static inline void *
_Py_ensure_frame_in_current_stack_chunk(PyThreadState *tstate, char *frame)
{
assert(tstate != NULL);
assert(frame != NULL);
if (ptr_in_chunk(frame, tstate->stack_chunk_list)) {
return frame;
}
uintptr_t offset = get_offset_in_chunk_list(frame, tstate->stack_chunk_list->previous);
return ((char *)tstate->stack_chunk_list) + offset;
}

#ifdef __cplusplus
}
#endif
Expand Down
3 changes: 3 additions & 0 deletions Include/internal/pycore_runtime_init.h
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,9 @@ extern PyTypeObject _PyExc_MemoryError;
._whence = _PyThreadState_WHENCE_NOTSET, \
.py_recursion_limit = Py_DEFAULT_RECURSION_LIMIT, \
.context_ver = 1, \
.stack_chunk_list = NULL, \
.stack_limit = NULL, \
.stack_top = NULL, \
}


Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Change the resizing implementation of the Python stack to use a resizable array instead of a chain of arrays, to avoid degenerate performance cases and allow further JIT optimizations.
2 changes: 1 addition & 1 deletion Modules/_remote_debugging/debug_offsets_validation.h
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ validate_fixed_field(
#define PY_REMOTE_DEBUG_THREAD_STATE_FIELDS(APPLY, buffer_size) \
APPLY(thread_state, native_thread_id, sizeof(unsigned long), _Alignof(long), buffer_size); \
APPLY(thread_state, interp, sizeof(uintptr_t), _Alignof(uintptr_t), buffer_size); \
APPLY(thread_state, datastack_chunk, sizeof(uintptr_t), _Alignof(uintptr_t), buffer_size); \
APPLY(thread_state, stack_chunk_list, sizeof(uintptr_t), _Alignof(uintptr_t), buffer_size); \
APPLY(thread_state, status, FIELD_SIZE(PyThreadState, _status), _Alignof(unsigned int), buffer_size); \
APPLY(thread_state, holds_gil, sizeof(int), _Alignof(int), buffer_size); \
APPLY(thread_state, gil_requested, sizeof(int), _Alignof(int), buffer_size); \
Expand Down
39 changes: 13 additions & 26 deletions Modules/_remote_debugging/frames.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ process_single_stack_chunk(
StackChunkInfo *chunk_info
) {
// Start with default size assumption
size_t current_size = _PY_DATA_STACK_CHUNK_SIZE;
size_t current_size = _PY_STACK_CHUNK_MIN_SIZE;

char *this_chunk = PyMem_RawMalloc(current_size);
if (!this_chunk) {
Expand Down Expand Up @@ -87,45 +87,32 @@ copy_stack_chunks(RemoteUnwinderObject *unwinder,
size_t count = 0;
size_t max_chunks = 16;

if (read_ptr(unwinder, tstate_addr + (uintptr_t)unwinder->debug_offsets.thread_state.datastack_chunk, &chunk_addr)) {
if (read_ptr(unwinder, tstate_addr + (uintptr_t)unwinder->debug_offsets.thread_state.stack_chunk_list, &chunk_addr)) {
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read initial stack chunk address");
return -1;
}

if (!chunk_addr) {
out_chunks->chunks = NULL;
out_chunks->count = 0;
return 0;
}

chunks = PyMem_RawMalloc(max_chunks * sizeof(StackChunkInfo));
if (!chunks) {
PyErr_NoMemory();
set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate stack chunks array");
return -1;
}

const size_t MAX_STACK_CHUNKS = 4096;
while (chunk_addr != 0 && count < MAX_STACK_CHUNKS) {
// Grow array if needed
if (count >= max_chunks) {
max_chunks *= 2;
StackChunkInfo *new_chunks = PyMem_RawRealloc(chunks, max_chunks * sizeof(StackChunkInfo));
if (!new_chunks) {
PyErr_NoMemory();
set_exception_cause(unwinder, PyExc_MemoryError, "Failed to grow stack chunks array");
goto error;
}
chunks = new_chunks;
}

// Process this chunk
if (process_single_stack_chunk(unwinder, chunk_addr, &chunks[count]) < 0) {
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to process stack chunk");
goto error;
}

// Get next chunk address and increment count
chunk_addr = GET_MEMBER(uintptr_t, chunks[count].local_copy, offsetof(_PyStackChunk, previous));
count++;
// Process this chunk
if (process_single_stack_chunk(unwinder, chunk_addr, &chunks[count]) < 0) {
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to process stack chunk");
goto error;
}

out_chunks->chunks = chunks;
out_chunks->count = count;
out_chunks->count = 1;
return 0;

error:
Expand Down
9 changes: 7 additions & 2 deletions Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -1925,8 +1925,13 @@ clear_thread_frame(PyThreadState *tstate, _PyInterpreterFrame * frame)
assert(frame->owner == FRAME_OWNED_BY_THREAD);
// Make sure that this is, indeed, the top frame. We can't check this in
// _PyThreadState_PopFrame, since f_code is already cleared at that point:
assert((PyObject **)frame + _PyFrame_GetCode(frame)->co_framesize ==
tstate->datastack_top);
assert(
_Py_ensure_frame_in_current_stack_chunk( // the frame might be in a previous stack chunk
tstate,
(char *)((PyObject **)frame + _PyFrame_GetCode(frame)->co_framesize)
)
== tstate->stack_top
);
assert(frame->frame_obj == NULL || frame->frame_obj->f_frame == frame);
_PyFrame_ClearExceptCode(frame);
PyStackRef_CLEAR(frame->f_executable);
Expand Down
Loading
Loading