From 7686abe063eb7fbf85f33437b9256e0d9e0f4a4f Mon Sep 17 00:00:00 2001 From: Eric Froemling Date: Thu, 30 Apr 2026 06:27:57 -0700 Subject: [PATCH 1/7] gh-149085: Add `max_threads` keyword to `faulthandler.dump_traceback()` (GH-149106) Add a keyword-only `max_threads` argument to `dump_traceback()` and `dump_traceback_later()`, defaulting to 100 to preserve existing behavior. Allows server processes with many worker threads to dump beyond the historical 100-thread cap (previously a hardcoded `MAX_NTHREADS = 100` in `Python/traceback.c`). The cap matters in practice: tstates are prepended to the PyInterpreterState linked list, so the dump walks newest-first. With more than 100 threads alive, the main thread (oldest, at the tail) is silently elided from watchdog dumps -- exactly the thread that's usually wanted. The hardcoded value is moved to a new internal macro `_Py_TRACEBACK_MAX_NTHREADS` in `pycore_traceback.h` so the in-tree fatal-signal callers all reference one source of truth. --- Doc/library/faulthandler.rst | 31 ++- Doc/whatsnew/3.15.rst | 9 + Include/internal/pycore_faulthandler.h | 3 + .../pycore_global_objects_fini_generated.h | 1 + Include/internal/pycore_global_strings.h | 1 + .../internal/pycore_runtime_init_generated.h | 1 + Include/internal/pycore_traceback.h | 3 +- .../internal/pycore_unicodeobject_generated.h | 4 + Lib/test/test_faulthandler.py | 110 +++++++++++ ...-04-28-16-30-48.gh-issue-149085.5aNgBD.rst | 3 + Modules/clinic/faulthandler.c.h | 185 +++++++++++++----- Modules/faulthandler.c | 58 ++++-- Python/pylifecycle.c | 2 +- Python/traceback.c | 13 +- 14 files changed, 349 insertions(+), 75 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2026-04-28-16-30-48.gh-issue-149085.5aNgBD.rst diff --git a/Doc/library/faulthandler.rst b/Doc/library/faulthandler.rst index 677966a8b2eaab..529e97bae6df8e 100644 --- a/Doc/library/faulthandler.rst +++ b/Doc/library/faulthandler.rst @@ -31,7 +31,8 @@ tracebacks: * Each string is limited to 500 characters. * Only the filename, the function name and the line number are displayed. (no source code) -* It is limited to 100 frames and 100 threads. +* It is limited to 100 frames per thread, and 100 threads + (configurable via *max_threads*). * The order is reversed: the most recent call is shown first. By default, the Python traceback is written to :data:`sys.stderr`. To see @@ -55,16 +56,20 @@ at Python startup. Dumping the traceback --------------------- -.. function:: dump_traceback(file=sys.stderr, all_threads=True) +.. function:: dump_traceback(file=sys.stderr, all_threads=True, *, max_threads=100) Dump the tracebacks of all threads into *file*. If *all_threads* is - ``False``, dump only the current thread. + ``False``, dump only the current thread. *max_threads* caps the number + of threads dumped. .. seealso:: :func:`traceback.print_tb`, which can be used to print a traceback object. .. versionchanged:: 3.5 Added support for passing file descriptor to this function. + .. versionchanged:: next + Added the *max_threads* keyword argument. + Dumping the C stack ------------------- @@ -100,7 +105,7 @@ instead of the stack, even if the operating system supports dumping stacks. Fault handler state ------------------- -.. function:: enable(file=sys.stderr, all_threads=True, c_stack=True) +.. function:: enable(file=sys.stderr, all_threads=True, c_stack=True, *, max_threads=100) Enable the fault handler: install handlers for the :const:`~signal.SIGSEGV`, :const:`~signal.SIGFPE`, :const:`~signal.SIGABRT`, :const:`~signal.SIGBUS` @@ -116,6 +121,8 @@ Fault handler state traceback, unless the system does not support it. See :func:`dump_c_stack` for more information on compatibility. + *max_threads* caps the number of threads dumped when a fatal signal fires. + .. versionchanged:: 3.5 Added support for passing file descriptor to this function. @@ -133,6 +140,9 @@ Fault handler state .. versionchanged:: 3.14 The dump now displays the C stack trace if *c_stack* is true. + .. versionchanged:: next + Added the *max_threads* keyword argument. + .. function:: disable() Disable the fault handler: uninstall the signal handlers installed by @@ -146,7 +156,7 @@ Fault handler state Dumping the tracebacks after a timeout -------------------------------------- -.. function:: dump_traceback_later(timeout, repeat=False, file=sys.stderr, exit=False) +.. function:: dump_traceback_later(timeout, repeat=False, file=sys.stderr, exit=False, *, max_threads=100) Dump the tracebacks of all threads, after a timeout of *timeout* seconds, or every *timeout* seconds if *repeat* is ``True``. If *exit* is ``True``, call @@ -154,7 +164,7 @@ Dumping the tracebacks after a timeout :c:func:`!_exit` exits the process immediately, which means it doesn't do any cleanup like flushing file buffers.) If the function is called twice, the new call replaces previous parameters and resets the timeout. The timer has a - sub-second resolution. + sub-second resolution. *max_threads* caps the number of threads dumped. The *file* must be kept open until the traceback is dumped or :func:`cancel_dump_traceback_later` is called: see :ref:`issue with file @@ -168,6 +178,9 @@ Dumping the tracebacks after a timeout .. versionchanged:: 3.7 This function is now always available. + .. versionchanged:: next + Added the *max_threads* keyword argument. + .. function:: cancel_dump_traceback_later() Cancel the last call to :func:`dump_traceback_later`. @@ -176,11 +189,12 @@ Dumping the tracebacks after a timeout Dumping the traceback on a user signal -------------------------------------- -.. function:: register(signum, file=sys.stderr, all_threads=True, chain=False) +.. function:: register(signum, file=sys.stderr, all_threads=True, chain=False, *, max_threads=100) Register a user signal: install a handler for the *signum* signal to dump the traceback of all threads, or of the current thread if *all_threads* is ``False``, into *file*. Call the previous handler if chain is ``True``. + *max_threads* caps the number of threads dumped. The *file* must be kept open until the signal is unregistered by :func:`unregister`: see :ref:`issue with file descriptors `. @@ -190,6 +204,9 @@ Dumping the traceback on a user signal .. versionchanged:: 3.5 Added support for passing file descriptor to this function. + .. versionchanged:: next + Added the *max_threads* keyword argument. + .. function:: unregister(signum) Unregister a user signal: uninstall the handler of the *signum* signal diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 56b2553a401920..59b9688c18e1ee 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -905,6 +905,15 @@ difflib (Contributed by Jiahao Li in :gh:`134580`.) +faulthandler +------------ + +* Added the *max_threads* parameter in :func:`faulthandler.enable`, + :func:`faulthandler.dump_traceback`, :func:`faulthandler.dump_traceback_later`, + and :func:`faulthandler.register`. + (Contributed by Eric Froemling in :gh:`149085`.) + + functools --------- diff --git a/Include/internal/pycore_faulthandler.h b/Include/internal/pycore_faulthandler.h index 78cd657e6ae5ae..9ddd70d39ed0d5 100644 --- a/Include/internal/pycore_faulthandler.h +++ b/Include/internal/pycore_faulthandler.h @@ -42,6 +42,7 @@ struct faulthandler_user_signal { int chain; _Py_sighandler_t previous; PyInterpreterState *interp; + Py_ssize_t max_threads; }; #endif /* FAULTHANDLER_USER */ @@ -57,6 +58,7 @@ struct _faulthandler_runtime_state { void *exc_handler; #endif int c_stack; + Py_ssize_t max_threads; } fatal_error; struct { @@ -68,6 +70,7 @@ struct _faulthandler_runtime_state { int exit; char *header; size_t header_len; + Py_ssize_t max_threads; /* The main thread always holds this lock. It is only released when faulthandler_thread() is interrupted before this thread exits, or at Python exit. */ diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index 4fd42185d8a4a1..4d6d5ce9c5ea26 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -1897,6 +1897,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(mask)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(match)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(max_length)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(max_threads)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(maxdigits)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(maxevents)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(maxlen)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index f2d43c22069b92..20dcf81ccf15fa 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -620,6 +620,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(mask) STRUCT_FOR_ID(match) STRUCT_FOR_ID(max_length) + STRUCT_FOR_ID(max_threads) STRUCT_FOR_ID(maxdigits) STRUCT_FOR_ID(maxevents) STRUCT_FOR_ID(maxlen) diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index 6ee64a461d8568..1ce91dc51ea0b7 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -1895,6 +1895,7 @@ extern "C" { INIT_ID(mask), \ INIT_ID(match), \ INIT_ID(max_length), \ + INIT_ID(max_threads), \ INIT_ID(maxdigits), \ INIT_ID(maxevents), \ INIT_ID(maxlen), \ diff --git a/Include/internal/pycore_traceback.h b/Include/internal/pycore_traceback.h index 6b5e24979d5321..fbf6bc2c41f51d 100644 --- a/Include/internal/pycore_traceback.h +++ b/Include/internal/pycore_traceback.h @@ -61,7 +61,8 @@ extern void _Py_DumpTraceback( extern const char* _Py_DumpTracebackThreads( int fd, PyInterpreterState *interp, - PyThreadState *current_tstate); + PyThreadState *current_tstate, + Py_ssize_t max_threads); /* Write a Unicode object into the file descriptor fd. Encode the string to ASCII using the backslashreplace error handler. diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index bcb117e1091674..c7c23494845e01 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -2260,6 +2260,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(max_threads); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(maxdigits); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); diff --git a/Lib/test/test_faulthandler.py b/Lib/test/test_faulthandler.py index 5e88ae47775dbd..11df59f2346f31 100644 --- a/Lib/test/test_faulthandler.py +++ b/Lib/test/test_faulthandler.py @@ -719,6 +719,76 @@ def test_dump_traceback_later_fd(self): def test_dump_traceback_later_twice(self): self.check_dump_traceback_later(loops=2) + def test_dump_traceback_max_threads(self): + # max_threads caps the dump and writes "...\n" when truncated. + # Spawn N worker threads, dump with cap < N, and verify the + # marker is present and exactly CAP thread headers are written. + code = dedent(""" + import faulthandler + import sys + import threading + + NTHREADS = 6 + CAP = 3 + + ready = threading.Barrier(NTHREADS + 1) + stop = threading.Event() + + def worker(): + ready.wait() + stop.wait() + + threads = [threading.Thread(target=worker) for _ in range(NTHREADS)] + for t in threads: + t.start() + ready.wait() + try: + faulthandler.dump_traceback(file=sys.stderr, max_threads=CAP) + finally: + stop.set() + for t in threads: + t.join() + """).strip() + proc = script_helper.assert_python_ok('-c', code) + output = proc.err + # Truncation marker is written on its own line when the cap is hit. + self.assertIn(b"\n...\n", output) + # Cap of 3 means exactly 3 thread headers in the dump. + self.assertEqual(output.count(b"Thread 0x"), 3) + + @skip_segfault_on_android + @unittest.skipIf(support.Py_GIL_DISABLED, + "fatal-signal handler only dumps the current thread " + "when the GIL is disabled") + def test_enable_max_threads(self): + # enable(max_threads=N) caps the thread dump produced when a + # fatal signal fires. + code = dedent(""" + import faulthandler + import threading + + NTHREADS = 6 + CAP = 3 + + ready = threading.Barrier(NTHREADS + 1) + stop = threading.Event() + + def worker(): + ready.wait() + stop.wait() + + for _ in range(NTHREADS): + threading.Thread(target=worker, daemon=True).start() + ready.wait() + faulthandler.enable(max_threads=CAP) + faulthandler._sigsegv() + """).strip() + output, exitcode = self.get_output(code) + output = '\n'.join(output) + # Cap of 3 means the dump is truncated with "..." on its own line. + self.assertIn("\n...\n", output) + self.assertNotEqual(exitcode, 0) + @unittest.skipIf(not hasattr(faulthandler, "register"), "need faulthandler.register") def check_register(self, filename=False, all_threads=False, @@ -825,6 +895,46 @@ def test_register_threads(self): def test_register_chain(self): self.check_register(chain=True) + @unittest.skipIf(not hasattr(faulthandler, "register"), + "need faulthandler.register") + def test_register_max_threads(self): + # register(max_threads=N) caps the thread dump produced when + # the registered signal fires. + code = dedent(""" + import faulthandler + import signal + import threading + + NTHREADS = 6 + CAP = 3 + + ready = threading.Barrier(NTHREADS + 1) + stop = threading.Event() + + def worker(): + ready.wait() + stop.wait() + + threads = [threading.Thread(target=worker) for _ in range(NTHREADS)] + for t in threads: + t.start() + ready.wait() + try: + faulthandler.register(signal.SIGUSR1, all_threads=True, + max_threads=CAP) + signal.raise_signal(signal.SIGUSR1) + finally: + stop.set() + for t in threads: + t.join() + """).strip() + proc = script_helper.assert_python_ok('-c', code) + output = proc.err + # Cap of 3 means the dump is truncated with "..." on its own line. + self.assertIn(b"\n...\n", output) + # Cap of 3 means exactly 3 thread headers in the dump. + self.assertEqual(output.count(b"Thread 0x"), 3) + @contextmanager def check_stderr_none(self): stderr = sys.stderr diff --git a/Misc/NEWS.d/next/Library/2026-04-28-16-30-48.gh-issue-149085.5aNgBD.rst b/Misc/NEWS.d/next/Library/2026-04-28-16-30-48.gh-issue-149085.5aNgBD.rst new file mode 100644 index 00000000000000..a5b92287bd0ef8 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-04-28-16-30-48.gh-issue-149085.5aNgBD.rst @@ -0,0 +1,3 @@ +Add a *max_threads* keyword argument to :func:`faulthandler.dump_traceback`, +:func:`faulthandler.dump_traceback_later`, :func:`faulthandler.enable`, and +:func:`faulthandler.register`. diff --git a/Modules/clinic/faulthandler.c.h b/Modules/clinic/faulthandler.c.h index de8280ce26b9ce..e06cfdcfba2993 100644 --- a/Modules/clinic/faulthandler.c.h +++ b/Modules/clinic/faulthandler.c.h @@ -6,23 +6,26 @@ preserve # include "pycore_gc.h" // PyGC_Head # include "pycore_runtime.h" // _Py_ID() #endif +#include "pycore_abstract.h" // _PyNumber_Index() #include "pycore_long.h" // _PyLong_UnsignedInt_Converter() #include "pycore_modsupport.h" // _PyArg_UnpackKeywords() PyDoc_STRVAR(faulthandler_dump_traceback_py__doc__, -"dump_traceback($module, /, file=sys.stderr, all_threads=True)\n" +"dump_traceback($module, /, file=sys.stderr, all_threads=True, *,\n" +" max_threads=100)\n" "--\n" "\n" "Dump the traceback of the current thread into file.\n" "\n" -"Dump the traceback of all threads if all_threads is true."); +"Dump the traceback of all threads if all_threads is true. max_threads\n" +"caps the number of threads dumped."); #define FAULTHANDLER_DUMP_TRACEBACK_PY_METHODDEF \ {"dump_traceback", _PyCFunction_CAST(faulthandler_dump_traceback_py), METH_FASTCALL|METH_KEYWORDS, faulthandler_dump_traceback_py__doc__}, static PyObject * faulthandler_dump_traceback_py_impl(PyObject *module, PyObject *file, - int all_threads); + int all_threads, Py_ssize_t max_threads); static PyObject * faulthandler_dump_traceback_py(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -30,7 +33,7 @@ faulthandler_dump_traceback_py(PyObject *module, PyObject *const *args, Py_ssize PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -39,7 +42,7 @@ faulthandler_dump_traceback_py(PyObject *module, PyObject *const *args, Py_ssize } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(file), &_Py_ID(all_threads), }, + .ob_item = { &_Py_ID(file), &_Py_ID(all_threads), &_Py_ID(max_threads), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -48,17 +51,18 @@ faulthandler_dump_traceback_py(PyObject *module, PyObject *const *args, Py_ssize # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"file", "all_threads", NULL}; + static const char * const _keywords[] = {"file", "all_threads", "max_threads", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "dump_traceback", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; PyObject *file = NULL; int all_threads = 1; + Py_ssize_t max_threads = 100; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 2, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -74,12 +78,33 @@ faulthandler_dump_traceback_py(PyObject *module, PyObject *const *args, Py_ssize goto skip_optional_pos; } } - all_threads = PyObject_IsTrue(args[1]); - if (all_threads < 0) { - goto exit; + if (args[1]) { + all_threads = PyObject_IsTrue(args[1]); + if (all_threads < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_pos; + } } skip_optional_pos: - return_value = faulthandler_dump_traceback_py_impl(module, file, all_threads); + if (!noptargs) { + goto skip_optional_kwonly; + } + { + Py_ssize_t ival = -1; + PyObject *iobj = _PyNumber_Index(args[2]); + if (iobj != NULL) { + ival = PyLong_AsSsize_t(iobj); + Py_DECREF(iobj); + } + if (ival == -1 && PyErr_Occurred()) { + goto exit; + } + max_threads = ival; + } +skip_optional_kwonly: + return_value = faulthandler_dump_traceback_py_impl(module, file, all_threads, max_threads); exit: return return_value; @@ -149,7 +174,8 @@ faulthandler_dump_c_stack_py(PyObject *module, PyObject *const *args, Py_ssize_t } PyDoc_STRVAR(faulthandler_py_enable__doc__, -"enable($module, /, file=sys.stderr, all_threads=True, c_stack=True)\n" +"enable($module, /, file=sys.stderr, all_threads=True, c_stack=True, *,\n" +" max_threads=100)\n" "--\n" "\n" "Enable the fault handler."); @@ -159,7 +185,8 @@ PyDoc_STRVAR(faulthandler_py_enable__doc__, static PyObject * faulthandler_py_enable_impl(PyObject *module, PyObject *file, - int all_threads, int c_stack); + int all_threads, int c_stack, + Py_ssize_t max_threads); static PyObject * faulthandler_py_enable(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -167,7 +194,7 @@ faulthandler_py_enable(PyObject *module, PyObject *const *args, Py_ssize_t nargs PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 3 + #define NUM_KEYWORDS 4 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -176,7 +203,7 @@ faulthandler_py_enable(PyObject *module, PyObject *const *args, Py_ssize_t nargs } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(file), &_Py_ID(all_threads), &_Py_ID(c_stack), }, + .ob_item = { &_Py_ID(file), &_Py_ID(all_threads), &_Py_ID(c_stack), &_Py_ID(max_threads), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -185,18 +212,19 @@ faulthandler_py_enable(PyObject *module, PyObject *const *args, Py_ssize_t nargs # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"file", "all_threads", "c_stack", NULL}; + static const char * const _keywords[] = {"file", "all_threads", "c_stack", "max_threads", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "enable", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[3]; + PyObject *argsbuf[4]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; PyObject *file = NULL; int all_threads = 1; int c_stack = 1; + Py_ssize_t max_threads = 100; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 3, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -221,12 +249,33 @@ faulthandler_py_enable(PyObject *module, PyObject *const *args, Py_ssize_t nargs goto skip_optional_pos; } } - c_stack = PyObject_IsTrue(args[2]); - if (c_stack < 0) { - goto exit; + if (args[2]) { + c_stack = PyObject_IsTrue(args[2]); + if (c_stack < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_pos; + } } skip_optional_pos: - return_value = faulthandler_py_enable_impl(module, file, all_threads, c_stack); + if (!noptargs) { + goto skip_optional_kwonly; + } + { + Py_ssize_t ival = -1; + PyObject *iobj = _PyNumber_Index(args[3]); + if (iobj != NULL) { + ival = PyLong_AsSsize_t(iobj); + Py_DECREF(iobj); + } + if (ival == -1 && PyErr_Occurred()) { + goto exit; + } + max_threads = ival; + } +skip_optional_kwonly: + return_value = faulthandler_py_enable_impl(module, file, all_threads, c_stack, max_threads); exit: return return_value; @@ -280,13 +329,14 @@ faulthandler_is_enabled(PyObject *module, PyObject *Py_UNUSED(ignored)) PyDoc_STRVAR(faulthandler_dump_traceback_later__doc__, "dump_traceback_later($module, /, timeout, repeat=False,\n" -" file=sys.stderr, exit=False)\n" +" file=sys.stderr, exit=False, *, max_threads=100)\n" "--\n" "\n" "Dump the traceback of all threads in timeout seconds.\n" "\n" "If repeat is true, the tracebacks of all threads are dumped every timeout\n" -"seconds. If exit is true, call _exit(1) which is not safe."); +"seconds. If exit is true, call _exit(1) which is not safe. max_threads\n" +"caps the number of threads dumped."); #define FAULTHANDLER_DUMP_TRACEBACK_LATER_METHODDEF \ {"dump_traceback_later", _PyCFunction_CAST(faulthandler_dump_traceback_later), METH_FASTCALL|METH_KEYWORDS, faulthandler_dump_traceback_later__doc__}, @@ -294,7 +344,8 @@ PyDoc_STRVAR(faulthandler_dump_traceback_later__doc__, static PyObject * faulthandler_dump_traceback_later_impl(PyObject *module, PyObject *timeout_obj, int repeat, - PyObject *file, int exit); + PyObject *file, int exit, + Py_ssize_t max_threads); static PyObject * faulthandler_dump_traceback_later(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -302,7 +353,7 @@ faulthandler_dump_traceback_later(PyObject *module, PyObject *const *args, Py_ss PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 4 + #define NUM_KEYWORDS 5 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -311,7 +362,7 @@ faulthandler_dump_traceback_later(PyObject *module, PyObject *const *args, Py_ss } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(timeout), &_Py_ID(repeat), &_Py_ID(file), &_Py_ID(exit), }, + .ob_item = { &_Py_ID(timeout), &_Py_ID(repeat), &_Py_ID(file), &_Py_ID(exit), &_Py_ID(max_threads), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -320,19 +371,20 @@ faulthandler_dump_traceback_later(PyObject *module, PyObject *const *args, Py_ss # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"timeout", "repeat", "file", "exit", NULL}; + static const char * const _keywords[] = {"timeout", "repeat", "file", "exit", "max_threads", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "dump_traceback_later", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[4]; + PyObject *argsbuf[5]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; PyObject *timeout_obj; int repeat = 0; PyObject *file = NULL; int exit = 0; + Py_ssize_t max_threads = 100; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 1, /*maxpos*/ 4, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -358,12 +410,33 @@ faulthandler_dump_traceback_later(PyObject *module, PyObject *const *args, Py_ss goto skip_optional_pos; } } - exit = PyObject_IsTrue(args[3]); - if (exit < 0) { - goto exit; + if (args[3]) { + exit = PyObject_IsTrue(args[3]); + if (exit < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_pos; + } } skip_optional_pos: - return_value = faulthandler_dump_traceback_later_impl(module, timeout_obj, repeat, file, exit); + if (!noptargs) { + goto skip_optional_kwonly; + } + { + Py_ssize_t ival = -1; + PyObject *iobj = _PyNumber_Index(args[4]); + if (iobj != NULL) { + ival = PyLong_AsSsize_t(iobj); + Py_DECREF(iobj); + } + if (ival == -1 && PyErr_Occurred()) { + goto exit; + } + max_threads = ival; + } +skip_optional_kwonly: + return_value = faulthandler_dump_traceback_later_impl(module, timeout_obj, repeat, file, exit, max_threads); exit: return return_value; @@ -391,20 +464,22 @@ faulthandler_cancel_dump_traceback_later_py(PyObject *module, PyObject *Py_UNUSE PyDoc_STRVAR(faulthandler_register_py__doc__, "register($module, /, signum, file=sys.stderr, all_threads=True,\n" -" chain=False)\n" +" chain=False, *, max_threads=100)\n" "--\n" "\n" "Register a handler for the signal \'signum\'.\n" "\n" "Dump the traceback of the current thread, or of all threads if\n" -"all_threads is True, into file."); +"all_threads is True, into file. max_threads caps the number of threads\n" +"dumped."); #define FAULTHANDLER_REGISTER_PY_METHODDEF \ {"register", _PyCFunction_CAST(faulthandler_register_py), METH_FASTCALL|METH_KEYWORDS, faulthandler_register_py__doc__}, static PyObject * faulthandler_register_py_impl(PyObject *module, int signum, PyObject *file, - int all_threads, int chain); + int all_threads, int chain, + Py_ssize_t max_threads); static PyObject * faulthandler_register_py(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -412,7 +487,7 @@ faulthandler_register_py(PyObject *module, PyObject *const *args, Py_ssize_t nar PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 4 + #define NUM_KEYWORDS 5 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -421,7 +496,7 @@ faulthandler_register_py(PyObject *module, PyObject *const *args, Py_ssize_t nar } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(signum), &_Py_ID(file), &_Py_ID(all_threads), &_Py_ID(chain), }, + .ob_item = { &_Py_ID(signum), &_Py_ID(file), &_Py_ID(all_threads), &_Py_ID(chain), &_Py_ID(max_threads), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -430,19 +505,20 @@ faulthandler_register_py(PyObject *module, PyObject *const *args, Py_ssize_t nar # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"signum", "file", "all_threads", "chain", NULL}; + static const char * const _keywords[] = {"signum", "file", "all_threads", "chain", "max_threads", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "register", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[4]; + PyObject *argsbuf[5]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; int signum; PyObject *file = NULL; int all_threads = 1; int chain = 0; + Py_ssize_t max_threads = 100; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 1, /*maxpos*/ 4, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -471,12 +547,33 @@ faulthandler_register_py(PyObject *module, PyObject *const *args, Py_ssize_t nar goto skip_optional_pos; } } - chain = PyObject_IsTrue(args[3]); - if (chain < 0) { - goto exit; + if (args[3]) { + chain = PyObject_IsTrue(args[3]); + if (chain < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_pos; + } } skip_optional_pos: - return_value = faulthandler_register_py_impl(module, signum, file, all_threads, chain); + if (!noptargs) { + goto skip_optional_kwonly; + } + { + Py_ssize_t ival = -1; + PyObject *iobj = _PyNumber_Index(args[4]); + if (iobj != NULL) { + ival = PyLong_AsSsize_t(iobj); + Py_DECREF(iobj); + } + if (ival == -1 && PyErr_Occurred()) { + goto exit; + } + max_threads = ival; + } +skip_optional_kwonly: + return_value = faulthandler_register_py_impl(module, signum, file, all_threads, chain, max_threads); exit: return return_value; @@ -685,4 +782,4 @@ faulthandler__raise_exception(PyObject *module, PyObject *const *args, Py_ssize_ #ifndef FAULTHANDLER__RAISE_EXCEPTION_METHODDEF #define FAULTHANDLER__RAISE_EXCEPTION_METHODDEF #endif /* !defined(FAULTHANDLER__RAISE_EXCEPTION_METHODDEF) */ -/*[clinic end generated code: output=31bf0149d0d02ccf input=a9049054013a1b77]*/ +/*[clinic end generated code: output=2452d767c85130a6 input=a9049054013a1b77]*/ diff --git a/Modules/faulthandler.c b/Modules/faulthandler.c index bc7731c2588dc0..923f6f5b56d32b 100644 --- a/Modules/faulthandler.c +++ b/Modules/faulthandler.c @@ -185,7 +185,8 @@ get_thread_state(void) static void faulthandler_dump_traceback(int fd, int all_threads, - PyInterpreterState *interp) + PyInterpreterState *interp, + Py_ssize_t max_threads) { static volatile int reentrant = 0; @@ -205,7 +206,7 @@ faulthandler_dump_traceback(int fd, int all_threads, PyThreadState *tstate = PyGILState_GetThisThreadState(); if (all_threads == 1) { - (void)_Py_DumpTracebackThreads(fd, NULL, tstate); + (void)_Py_DumpTracebackThreads(fd, NULL, tstate, max_threads); } else { if (all_threads == FT_IGNORE_ALL_THREADS) { @@ -243,16 +244,19 @@ faulthandler.dump_traceback as faulthandler_dump_traceback_py file: object(py_default="sys.stderr") = NULL all_threads: bool = True + * + max_threads: Py_ssize_t = 100 Dump the traceback of the current thread into file. -Dump the traceback of all threads if all_threads is true. +Dump the traceback of all threads if all_threads is true. max_threads +caps the number of threads dumped. [clinic start generated code]*/ static PyObject * faulthandler_dump_traceback_py_impl(PyObject *module, PyObject *file, - int all_threads) -/*[clinic end generated code: output=34efece0ca18314f input=b832ec55e27a7898]*/ + int all_threads, Py_ssize_t max_threads) +/*[clinic end generated code: output=ee1bbc2668e56e77 input=38630eb40e641de6]*/ { PyThreadState *tstate; const char *errmsg; @@ -273,7 +277,7 @@ faulthandler_dump_traceback_py_impl(PyObject *module, PyObject *file, /* gh-128400: Accessing other thread states while they're running * isn't safe if those threads are running. */ _PyEval_StopTheWorld(interp); - errmsg = _Py_DumpTracebackThreads(fd, NULL, tstate); + errmsg = _Py_DumpTracebackThreads(fd, NULL, tstate, max_threads); _PyEval_StartTheWorld(interp); if (errmsg != NULL) { PyErr_SetString(PyExc_RuntimeError, errmsg); @@ -409,7 +413,8 @@ faulthandler_fatal_error(int signum) } faulthandler_dump_traceback(fd, deduce_all_threads(), - fatal_error.interp); + fatal_error.interp, + fatal_error.max_threads); faulthandler_dump_c_stack(fd); _Py_DumpExtensionModules(fd, fatal_error.interp); @@ -485,7 +490,8 @@ faulthandler_exc_handler(struct _EXCEPTION_POINTERS *exc_info) } faulthandler_dump_traceback(fd, deduce_all_threads(), - fatal_error.interp); + fatal_error.interp, + fatal_error.max_threads); faulthandler_dump_c_stack(fd); /* call the next exception handler */ @@ -590,14 +596,17 @@ faulthandler.enable as faulthandler_py_enable file: object(py_default="sys.stderr") = NULL all_threads: bool = True c_stack: bool = True + * + max_threads: Py_ssize_t = 100 Enable the fault handler. [clinic start generated code]*/ static PyObject * faulthandler_py_enable_impl(PyObject *module, PyObject *file, - int all_threads, int c_stack) -/*[clinic end generated code: output=580d89b5eb62f1cb input=77277746a88b25ca]*/ + int all_threads, int c_stack, + Py_ssize_t max_threads) +/*[clinic end generated code: output=7ee655332317c47a input=e64759714f27b466]*/ { int fd; PyThreadState *tstate; @@ -617,6 +626,7 @@ faulthandler_py_enable_impl(PyObject *module, PyObject *file, fatal_error.all_threads = all_threads; fatal_error.interp = PyThreadState_GetInterpreter(tstate); fatal_error.c_stack = c_stack; + fatal_error.max_threads = max_threads; if (faulthandler_enable() < 0) { return NULL; @@ -703,7 +713,8 @@ faulthandler_thread(void *unused) (void)_Py_write_noraise(thread.fd, thread.header, (int)thread.header_len); - errmsg = _Py_DumpTracebackThreads(thread.fd, thread.interp, NULL); + errmsg = _Py_DumpTracebackThreads(thread.fd, thread.interp, NULL, + thread.max_threads); ok = (errmsg == NULL); if (thread.exit) @@ -777,18 +788,22 @@ faulthandler.dump_traceback_later repeat: bool = False file: object(py_default="sys.stderr") = NULL exit: bool = False + * + max_threads: Py_ssize_t = 100 Dump the traceback of all threads in timeout seconds. If repeat is true, the tracebacks of all threads are dumped every timeout -seconds. If exit is true, call _exit(1) which is not safe. +seconds. If exit is true, call _exit(1) which is not safe. max_threads +caps the number of threads dumped. [clinic start generated code]*/ static PyObject * faulthandler_dump_traceback_later_impl(PyObject *module, PyObject *timeout_obj, int repeat, - PyObject *file, int exit) -/*[clinic end generated code: output=a24d80d694d25ba2 input=fd005625ecc2ba9a]*/ + PyObject *file, int exit, + Py_ssize_t max_threads) +/*[clinic end generated code: output=543a0f3807113394 input=6836555ee157ddb4]*/ { PyTime_t timeout, timeout_us; int fd; @@ -861,6 +876,7 @@ faulthandler_dump_traceback_later_impl(PyObject *module, thread.exit = exit; thread.header = header; thread.header_len = header_len; + thread.max_threads = max_threads; /* Arm these locks to serve as events when released */ PyThread_acquire_lock(thread.running, 1); @@ -945,7 +961,8 @@ faulthandler_user(int signum) if (!user->enabled) return; - faulthandler_dump_traceback(user->fd, user->all_threads, user->interp); + faulthandler_dump_traceback(user->fd, user->all_threads, user->interp, + user->max_threads); #ifdef HAVE_SIGACTION if (user->chain) { @@ -995,17 +1012,21 @@ faulthandler.register as faulthandler_register_py file: object(py_default="sys.stderr") = NULL all_threads: bool = True chain: bool = False + * + max_threads: Py_ssize_t = 100 Register a handler for the signal 'signum'. Dump the traceback of the current thread, or of all threads if -all_threads is True, into file. +all_threads is True, into file. max_threads caps the number of threads +dumped. [clinic start generated code]*/ static PyObject * faulthandler_register_py_impl(PyObject *module, int signum, PyObject *file, - int all_threads, int chain) -/*[clinic end generated code: output=1f770cee150a56cd input=ae9de829e850907b]*/ + int all_threads, int chain, + Py_ssize_t max_threads) +/*[clinic end generated code: output=d63a5b4f388dee5f input=c75096a20de502fe]*/ { int fd; user_signal_t *user; @@ -1056,6 +1077,7 @@ faulthandler_register_py_impl(PyObject *module, int signum, PyObject *file, user->all_threads = all_threads; user->chain = chain; user->interp = PyThreadState_GetInterpreter(tstate); + user->max_threads = max_threads; user->enabled = 1; Py_RETURN_NONE; diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 0a88e32bb6b65e..57ce519c3c10ef 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -3342,7 +3342,7 @@ _Py_FatalError_DumpTracebacks(int fd, PyInterpreterState *interp, /* display the current Python stack */ #ifndef Py_GIL_DISABLED - _Py_DumpTracebackThreads(fd, interp, tstate); + _Py_DumpTracebackThreads(fd, interp, tstate, 0); #else _Py_DumpTraceback(fd, tstate); #endif diff --git a/Python/traceback.c b/Python/traceback.c index 1e8c9c879f9aac..f0e0df7101bc21 100644 --- a/Python/traceback.c +++ b/Python/traceback.c @@ -55,7 +55,7 @@ #define MAX_STRING_LENGTH 500 #define MAX_FRAME_DEPTH 100 -#define MAX_NTHREADS 100 +#define DEFAULT_MAX_NTHREADS 100 /* Function from Parser/tokenizer/file_tokenizer.c */ extern char* _PyTokenizer_FindEncodingFilename(int, PyObject *); @@ -1265,8 +1265,13 @@ write_thread_id(int fd, PyThreadState *tstate, int is_current) handlers if signals were received. */ const char* _Py_NO_SANITIZE_THREAD _Py_DumpTracebackThreads(int fd, PyInterpreterState *interp, - PyThreadState *current_tstate) + PyThreadState *current_tstate, + Py_ssize_t max_threads) { + if (max_threads == 0) { + max_threads = DEFAULT_MAX_NTHREADS; + } + if (current_tstate == NULL) { /* _Py_DumpTracebackThreads() is called from signal handlers by faulthandler. @@ -1310,13 +1315,13 @@ _Py_DumpTracebackThreads(int fd, PyInterpreterState *interp, return "unable to get the thread head state"; /* Dump the traceback of each thread */ - unsigned int nthreads = 0; + Py_ssize_t nthreads = 0; _Py_BEGIN_SUPPRESS_IPH do { if (nthreads != 0) PUTS(fd, "\n"); - if (nthreads >= MAX_NTHREADS) { + if (nthreads >= max_threads) { PUTS(fd, "...\n"); break; } From 9d41e2a534aab460dd656ef251adaed5d2d64b93 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Thu, 30 Apr 2026 16:33:13 +0200 Subject: [PATCH 2/7] gh-111264: Add a note about untrusted input to tomllib docs (GH-146209) Co-authored-by: Stan Ulbrych --- Doc/library/tomllib.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Doc/library/tomllib.rst b/Doc/library/tomllib.rst index 2bac968c2bea68..55610784362eb8 100644 --- a/Doc/library/tomllib.rst +++ b/Doc/library/tomllib.rst @@ -19,6 +19,12 @@ support writing TOML. Added TOML 1.1.0 support. See the :ref:`What's New ` for details. +.. warning:: + + Be cautious when parsing data from untrusted sources. + A malicious TOML string may cause the decoder to consume considerable + CPU and memory resources. + Limiting the size of data to be parsed is recommended. .. seealso:: From f0e90d78eb115da99e4ce0d1425593f3b71fd975 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 30 Apr 2026 16:57:29 +0200 Subject: [PATCH 3/7] gh-148829: Move sentinelobject.h to Include/cpython/ (#149186) This C API is not part of the limited C API, so move it to the CPython C API. --- Include/Python.h | 2 +- Include/{ => cpython}/sentinelobject.h | 4 ++-- Makefile.pre.in | 2 +- PCbuild/pythoncore.vcxproj | 2 +- PCbuild/pythoncore.vcxproj.filters | 6 +++--- 5 files changed, 8 insertions(+), 8 deletions(-) rename Include/{ => cpython}/sentinelobject.h (93%) diff --git a/Include/Python.h b/Include/Python.h index 1272e2464f91d1..d5e38b8b0201ee 100644 --- a/Include/Python.h +++ b/Include/Python.h @@ -117,7 +117,7 @@ __pragma(warning(disable: 4201)) #include "cpython/genobject.h" #include "descrobject.h" #include "genericaliasobject.h" -#include "sentinelobject.h" +#include "cpython/sentinelobject.h" #include "warnings.h" #include "weakrefobject.h" #include "structseq.h" diff --git a/Include/sentinelobject.h b/Include/cpython/sentinelobject.h similarity index 93% rename from Include/sentinelobject.h rename to Include/cpython/sentinelobject.h index 9d8577767b7485..0b6ff0f17e6f8c 100644 --- a/Include/sentinelobject.h +++ b/Include/cpython/sentinelobject.h @@ -1,12 +1,12 @@ /* Sentinel object interface */ +#ifndef Py_LIMITED_API #ifndef Py_SENTINELOBJECT_H #define Py_SENTINELOBJECT_H #ifdef __cplusplus extern "C" { #endif -#ifndef Py_LIMITED_API PyAPI_DATA(PyTypeObject) PySentinel_Type; #define PySentinel_Check(op) Py_IS_TYPE((op), &PySentinel_Type) @@ -14,9 +14,9 @@ PyAPI_DATA(PyTypeObject) PySentinel_Type; PyAPI_FUNC(PyObject *) PySentinel_New( const char *name, const char *module_name); -#endif #ifdef __cplusplus } #endif #endif /* !Py_SENTINELOBJECT_H */ +#endif /* !Py_LIMITED_API */ diff --git a/Makefile.pre.in b/Makefile.pre.in index 2ce53c6a816212..0edf55d991a05e 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -1241,7 +1241,6 @@ PYTHON_HEADERS= \ $(srcdir)/Include/pytypedefs.h \ $(srcdir)/Include/rangeobject.h \ $(srcdir)/Include/refcount.h \ - $(srcdir)/Include/sentinelobject.h \ $(srcdir)/Include/setobject.h \ $(srcdir)/Include/sliceobject.h \ $(srcdir)/Include/structmember.h \ @@ -1309,6 +1308,7 @@ PYTHON_HEADERS= \ $(srcdir)/Include/cpython/pystats.h \ $(srcdir)/Include/cpython/pythonrun.h \ $(srcdir)/Include/cpython/pythread.h \ + $(srcdir)/Include/cpython/sentinelobject.h \ $(srcdir)/Include/cpython/setobject.h \ $(srcdir)/Include/cpython/sliceobject.h \ $(srcdir)/Include/cpython/structseq.h \ diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index fb9217fee8bd73..fae4a90b4536fc 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -195,6 +195,7 @@ + @@ -384,7 +385,6 @@ - diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index 1e1d085cd75511..04b6641ae30e7f 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -222,9 +222,6 @@ Include - - Include - Include @@ -522,6 +519,9 @@ Include\cpython + + Include + Include\cpython From cc5f8b5434c8b2a2d7858ef9a8d182344bc781b8 Mon Sep 17 00:00:00 2001 From: John Comeau Date: Thu, 30 Apr 2026 08:00:48 -0700 Subject: [PATCH 4/7] gh-113471: Add custom default Content-Type to http.server (#113475) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Serhiy Storchaka Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> Co-authored-by: donBarbos Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com> --- Doc/library/http.server.rst | 20 +++++++++++++++++++ Doc/whatsnew/3.15.rst | 6 ++++++ Lib/http/server.py | 12 +++++++++-- Lib/test/test_httpservers.py | 11 ++++++++++ ...-12-25-19-14-07.gh-issue-113471.ZQMpbI.rst | 2 ++ 5 files changed, 49 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-12-25-19-14-07.gh-issue-113471.ZQMpbI.rst diff --git a/Doc/library/http.server.rst b/Doc/library/http.server.rst index 33ecaae5c87b01..5f325df55705bd 100644 --- a/Doc/library/http.server.rst +++ b/Doc/library/http.server.rst @@ -390,6 +390,14 @@ instantiation, of which this module provides three different variants: This will be ``"SimpleHTTP/" + __version__``, where ``__version__`` is defined at the module level. + .. attribute:: default_content_type + + Specifies the Content-Type header value sent when the MIME type + cannot be guessed from the file extension of the requested URL. + By default, it is set to ``'application/octet-stream'``. + + .. versionadded:: next + .. attribute:: extensions_map A dictionary mapping suffixes into MIME types, contains custom overrides @@ -528,6 +536,18 @@ The following options are accepted: .. versionadded:: 3.11 +.. option:: --content-type + + Specifies the default Content-Type HTTP header used when the MIME type + cannot be guessed from the URL's file extension. By default, the server + uses ``'application/octet-stream'``: + + .. code-block:: bash + + python -m http.server --content-type text/html + + .. versionadded:: next + .. option:: --tls-cert Specifies a TLS certificate chain for HTTPS connections: diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 59b9688c18e1ee..90d24bf96afeb4 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -964,6 +964,12 @@ http.server `. (Contributed by Hugo van Kemenade in :gh:`146292`.) +* Added :attr:`~http.server.SimpleHTTPRequestHandler.default_content_type` + and the :option:`--content-type ` command-line + option to allow customizing the default ``Content-Type`` header + for files with unknown extensions. + (Contributed by John Comeau and Hugo van Kemenade in :gh:`113471`.) + inspect ------- diff --git a/Lib/http/server.py b/Lib/http/server.py index 568d3bb38deb6c..27ab37303a085c 100644 --- a/Lib/http/server.py +++ b/Lib/http/server.py @@ -727,6 +727,7 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler): """ server_version = "SimpleHTTP" + default_content_type = "application/octet-stream" index_pages = ("index.html", "index.htm") extensions_map = _encodings_map_default = { '.gz': 'application/gzip', @@ -974,7 +975,7 @@ def guess_type(self, path): guess, _ = mimetypes.guess_file_type(path) if guess: return guess - return 'application/octet-stream' + return self.default_content_type nobody = None @@ -1010,9 +1011,10 @@ def _get_best_family(*address): return family, sockaddr -def test(HandlerClass=BaseHTTPRequestHandler, +def test(HandlerClass=SimpleHTTPRequestHandler, ServerClass=ThreadingHTTPServer, protocol="HTTP/1.0", port=8000, bind=None, + content_type=SimpleHTTPRequestHandler.default_content_type, tls_cert=None, tls_key=None, tls_password=None): """Test the HTTP request handler class. @@ -1021,6 +1023,7 @@ def test(HandlerClass=BaseHTTPRequestHandler, """ ServerClass.address_family, addr = _get_best_family(bind, port) HandlerClass.protocol_version = protocol + HandlerClass.default_content_type = content_type if tls_cert: server = ServerClass(addr, HandlerClass, certfile=tls_cert, @@ -1060,6 +1063,10 @@ def _main(args=None): default='HTTP/1.0', help='conform to this HTTP version ' '(default: %(default)s)') + parser.add_argument('--content-type', + default=SimpleHTTPRequestHandler.default_content_type, + help='default content type for unknown extensions ' + '(default: %(default)s)') parser.add_argument('--tls-cert', metavar='PATH', help='path to the TLS certificate chain file') parser.add_argument('--tls-key', metavar='PATH', @@ -1112,6 +1119,7 @@ class HTTPSDualStackServer(DualStackServerMixin, ThreadingHTTPSServer): port=args.port, bind=args.bind, protocol=args.protocol, + content_type=args.content_type, tls_cert=args.tls_cert, tls_key=args.tls_key, tls_password=tls_key_password, diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py index d78b94e3a373d4..1f7a5a42fdaeb7 100644 --- a/Lib/test/test_httpservers.py +++ b/Lib/test/test_httpservers.py @@ -1379,6 +1379,7 @@ class CommandLineTestCase(unittest.TestCase): 'protocol': default_protocol, 'port': default_port, 'bind': default_bind, + 'content_type': 'application/octet-stream', 'tls_cert': None, 'tls_key': None, 'tls_password': None, @@ -1447,6 +1448,16 @@ def test_protocol_flag(self, mock_func): mock_func.assert_called_once_with(**call_args) mock_func.reset_mock() + @mock.patch('http.server.test') + def test_content_type_flag(self, mock_func): + content_types = ['text/html', 'text/plain', 'application/json'] + for content_type in content_types: + with self.subTest(content_type=content_type): + self.invoke_httpd('--content-type', content_type) + call_args = self.args | dict(content_type=content_type) + mock_func.assert_called_once_with(**call_args) + mock_func.reset_mock() + @unittest.skipIf(ssl is None, "requires ssl") @mock.patch('http.server.test') def test_tls_cert_and_key_flags(self, mock_func): diff --git a/Misc/NEWS.d/next/Library/2023-12-25-19-14-07.gh-issue-113471.ZQMpbI.rst b/Misc/NEWS.d/next/Library/2023-12-25-19-14-07.gh-issue-113471.ZQMpbI.rst new file mode 100644 index 00000000000000..99ba9bd1820fc1 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-12-25-19-14-07.gh-issue-113471.ZQMpbI.rst @@ -0,0 +1,2 @@ +Allow :mod:`http.server` to set a default content-type when serving +files with an unknown or missing extension. From bf424816d0643a4c4566f2e33dc4e1beb5ec875c Mon Sep 17 00:00:00 2001 From: sobolevn Date: Thu, 30 Apr 2026 19:18:56 +0300 Subject: [PATCH 5/7] gh-149083: Use `sentinel` in `functools.rst` docs (#149176) --- Doc/library/functools.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/functools.rst b/Doc/library/functools.rst index 265610db3caabd..7da59cba5170b3 100644 --- a/Doc/library/functools.rst +++ b/Doc/library/functools.rst @@ -468,7 +468,7 @@ The :mod:`!functools` module defines the following functions: Roughly equivalent to:: - initial_missing = object() + initial_missing = sentinel('initial_missing') def reduce(function, iterable, /, initial=initial_missing): it = iter(iterable) From a60520da3cb0c571a361b9d0b8d998e78f38b836 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 30 Apr 2026 18:25:32 +0200 Subject: [PATCH 6/7] gh-147991: Speed up tomllib import time (GH-147992) - Use lazy import for regular expressions. - Use frozendict for string escapes Co-authored-by: Taneli Hukkinen Co-authored-by: Petr Viktorin --- Lib/test/test_tomllib/test_misc.py | 19 +++++++++++++++++++ Lib/tomllib/_parser.py | 11 +++++++++-- ...-04-02-05-06-34.gh-issue-147991.2ANtR5.rst | 2 ++ 3 files changed, 30 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2026-04-02-05-06-34.gh-issue-147991.2ANtR5.rst diff --git a/Lib/test/test_tomllib/test_misc.py b/Lib/test/test_tomllib/test_misc.py index 118fde24d88521..abd0842d10b254 100644 --- a/Lib/test/test_tomllib/test_misc.py +++ b/Lib/test/test_tomllib/test_misc.py @@ -9,8 +9,10 @@ from pathlib import Path import sys import tempfile +import textwrap import unittest from test import support +from test.support.script_helper import assert_python_ok from . import tomllib @@ -124,3 +126,20 @@ def test_types_import(self): never imported by tests. """ importlib.import_module(f"{tomllib.__name__}._types") + + def test_lazy_import(self): + # Test the TOML file can be parsed without importing regular + # expressions (tomllib._re) + code = textwrap.dedent(""" + import sys, tomllib, textwrap + document = textwrap.dedent(''' + [metadata] + key = "text" + array = ["array", "of", "text"] + booleans = [true, false] + ''') + tomllib.loads(document) + print("lazy import?", 'tomllib._re' not in sys.modules) + """) + proc = assert_python_ok("-c", code) + self.assertIn(b"lazy import? True", proc.out) diff --git a/Lib/tomllib/_parser.py b/Lib/tomllib/_parser.py index b59d0f7d54bdc3..8aa01301dcea32 100644 --- a/Lib/tomllib/_parser.py +++ b/Lib/tomllib/_parser.py @@ -4,7 +4,11 @@ from __future__ import annotations -from types import MappingProxyType +# Defer loading regular expressions until we actually need them in +# parse_value(). +__lazy_modules__ = ["tomllib._re"] + +import sys from ._re import ( RE_DATETIME, @@ -15,6 +19,9 @@ match_to_number, ) +if sys.version_info < (3, 15): + from types import MappingProxyType as frozendict + TYPE_CHECKING = False if TYPE_CHECKING: from collections.abc import Iterable @@ -42,7 +49,7 @@ KEY_INITIAL_CHARS: Final = BARE_KEY_CHARS | frozenset("\"'") HEXDIGIT_CHARS: Final = frozenset("abcdef" "ABCDEF" "0123456789") -BASIC_STR_ESCAPE_REPLACEMENTS: Final = MappingProxyType( +BASIC_STR_ESCAPE_REPLACEMENTS: Final = frozendict( { "\\b": "\u0008", # backspace "\\t": "\u0009", # tab diff --git a/Misc/NEWS.d/next/Library/2026-04-02-05-06-34.gh-issue-147991.2ANtR5.rst b/Misc/NEWS.d/next/Library/2026-04-02-05-06-34.gh-issue-147991.2ANtR5.rst new file mode 100644 index 00000000000000..581c52926c3565 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-04-02-05-06-34.gh-issue-147991.2ANtR5.rst @@ -0,0 +1,2 @@ +Improve :mod:`tomllib` import time (up to 10x faster). Patch by Victor +Stinner. From b413bc7a1f0946f734d9660239b4e2e8ddc48522 Mon Sep 17 00:00:00 2001 From: elenril Date: Thu, 30 Apr 2026 18:24:23 +0100 Subject: [PATCH 7/7] bpo-39100: _header_value_parser: do not treat a Group as invalid-mailbox (#24872) When an address in an address-list has garbage at the end, the code will currently: 1. change the mailbox in the last parsed address into invalid-mailbox by overriding its token_type; 2. wrap the trailing garbage into another invalid-mailbox and append it to the last parsed address. However, that does not take into account that an address may also contain a Group instead of a single mailbox. In that case, overwriting token_type leads to undesirable results, e.g. parsing an email with the following 'To' header: unlisted-recipients:; (no To-header on input) raises an AttributeError from trying to treat the Group as a Mailbox. Moreover it is questionable whether the previously parsed mailbox should be treated as invalid in addition to the trailing garbage. Address both of the above by wrapping the trailing garbage in a new Address with a single invalid-mailbox, and append it to the AddressList directly. Changes the results of the test_get_address_list_mailboxes_invalid_addresses test, where the address list is now parsed into 4 mailboxes instead of 3 (all but the first one are invalid). --- Lib/email/_header_value_parser.py | 8 +++----- Lib/test/test_email/test__header_value_parser.py | 16 +++++++++++++--- ...2023-09-08-13-10-32.gh-issue-83281.2Plpcj.rst | 2 ++ 3 files changed, 18 insertions(+), 8 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-09-08-13-10-32.gh-issue-83281.2Plpcj.rst diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index 4c5394ab6353ac..f6b45e13271d7a 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -2063,12 +2063,10 @@ def get_address_list(value): address_list.defects.append(errors.InvalidHeaderDefect( "invalid address in address-list")) if value and value[0] != ',': - # Crap after address; treat it as an invalid mailbox. - # The mailbox info will still be available. - mailbox = address_list[-1][0] - mailbox.token_type = 'invalid-mailbox' + # Crap after address: add it to the address list + # as an invalid mailbox token, value = get_invalid_mailbox(value, ',') - mailbox.extend(token) + address_list.append(Address([token])) address_list.defects.append(errors.InvalidHeaderDefect( "invalid address in address-list")) if value: # Must be a , at this point. diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py index e28fe3892015b9..f3c03062572ba5 100644 --- a/Lib/test/test_email/test__header_value_parser.py +++ b/Lib/test/test_email/test__header_value_parser.py @@ -2617,7 +2617,7 @@ def test_get_address_list_mailboxes_invalid_addresses(self): '') self.assertEqual(address_list.token_type, 'address-list') self.assertEqual(len(address_list.mailboxes), 1) - self.assertEqual(len(address_list.all_mailboxes), 3) + self.assertEqual(len(address_list.all_mailboxes), 4) self.assertEqual([str(x) for x in address_list.all_mailboxes], [str(x) for x in address_list.addresses]) self.assertEqual(address_list.mailboxes[0].domain, 'example.com') @@ -2626,11 +2626,13 @@ def test_get_address_list_mailboxes_invalid_addresses(self): self.assertEqual(address_list.addresses[1].token_type, 'address') self.assertEqual(len(address_list.addresses[0].mailboxes), 1) self.assertEqual(len(address_list.addresses[1].mailboxes), 0) - self.assertEqual(len(address_list.addresses[1].mailboxes), 0) + self.assertEqual(len(address_list.addresses[2].mailboxes), 0) + self.assertEqual(len(address_list.addresses[3].mailboxes), 0) self.assertEqual( address_list.addresses[1].all_mailboxes[0].local_part, 'Foo x') + self.assertEqual(address_list.addresses[2].all_mailboxes[0].value, '[]') self.assertEqual( - address_list.addresses[2].all_mailboxes[0].display_name, + address_list.addresses[3].all_mailboxes[0].display_name, "Nobody Is. Special") def test_get_address_list_group_empty(self): @@ -2695,6 +2697,14 @@ def test_get_address_list_group_and_mailboxes(self): self.assertEqual(str(address_list.addresses[1]), str(address_list.mailboxes[2])) + def test_get_address_list_trailing_garbage(self): + address_list = self._test_get_x(parser.get_address_list, + 'unlisted-recipients:; (no To-header on input)', + 'unlisted-recipients:; (no To-header on input)', + 'unlisted-recipients:; ', + [errors.InvalidHeaderDefect]*2 + [errors.ObsoleteHeaderDefect], + '') + def test_invalid_content_disposition(self): content_disp = self._test_parse_x( parser.parse_content_disposition_header, diff --git a/Misc/NEWS.d/next/Library/2023-09-08-13-10-32.gh-issue-83281.2Plpcj.rst b/Misc/NEWS.d/next/Library/2023-09-08-13-10-32.gh-issue-83281.2Plpcj.rst new file mode 100644 index 00000000000000..cf2ae770bd1940 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-09-08-13-10-32.gh-issue-83281.2Plpcj.rst @@ -0,0 +1,2 @@ +:mod:`email`: improve handling trailing garbage in address lists to avoid throwing +AttributeError in certain edge cases