From 9b58c3eac78b7d972f5ce74c82c92f0be224539e Mon Sep 17 00:00:00 2001 From: grantlouisherman Date: Fri, 22 May 2026 17:48:34 -0400 Subject: [PATCH 1/6] type-crash(): throw a no mem error when malloc fails in tokenizer Signed-off-by: grantlouisherman --- Parser/lexer/state.c | 5 ++++- Parser/tokenizer/helpers.c | 2 ++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/Parser/lexer/state.c b/Parser/lexer/state.c index 3663dc3eb7f9f69..5cf9b4d768c3ebb 100644 --- a/Parser/lexer/state.c +++ b/Parser/lexer/state.c @@ -15,8 +15,11 @@ _PyTokenizer_tok_new(void) struct tok_state *tok = (struct tok_state *)PyMem_Calloc( 1, sizeof(struct tok_state)); - if (tok == NULL) + if (tok == NULL) { + PyErr_NoMemory(); return NULL; + } + tok->buf = tok->cur = tok->inp = NULL; tok->fp_interactive = 0; tok->interactive_src_start = NULL; diff --git a/Parser/tokenizer/helpers.c b/Parser/tokenizer/helpers.c index 9542969ad3127b9..c69e66d0ab9b7a8 100644 --- a/Parser/tokenizer/helpers.c +++ b/Parser/tokenizer/helpers.c @@ -193,6 +193,7 @@ _PyTokenizer_new_string(const char *s, Py_ssize_t len, struct tok_state *tok) char* result = (char *)PyMem_Malloc(len + 1); if (!result) { tok->done = E_NOMEM; + PyErr_NoMemory(); return NULL; } memcpy(result, s, len); @@ -221,6 +222,7 @@ _PyTokenizer_translate_newlines(const char *s, int exec_input, int preserve_crlf buf = PyMem_Malloc(needed_length); if (buf == NULL) { tok->done = E_NOMEM; + PyErr_NoMemory(); return NULL; } for (current = buf; *s; s++, current++) { From d19d13666beb9f7ed2590648c1c339311d5f1ac6 Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Fri, 22 May 2026 21:52:40 +0000 Subject: [PATCH 2/6] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../2026-05-22-21-52-38.gh-issue-150207.l2BUtI.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2026-05-22-21-52-38.gh-issue-150207.l2BUtI.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-05-22-21-52-38.gh-issue-150207.l2BUtI.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-05-22-21-52-38.gh-issue-150207.l2BUtI.rst new file mode 100644 index 000000000000000..109a858db6b09bb --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-05-22-21-52-38.gh-issue-150207.l2BUtI.rst @@ -0,0 +1 @@ +fails in the tokenizer's ``_PyTokenizer_translate_newlines`` helper, so the out-of-memory condition is propagated as a proper Python exception instead of only being recorded in ``tok->done``. From dc85268f889076de48b9c5a097613d0eeee2bd1c Mon Sep 17 00:00:00 2001 From: Grant Herman Date: Mon, 1 Jun 2026 07:19:32 -0400 Subject: [PATCH 3/6] Update Misc/NEWS.d/next/Core_and_Builtins/2026-05-22-21-52-38.gh-issue-150207.l2BUtI.rst Co-authored-by: AN Long --- .../2026-05-22-21-52-38.gh-issue-150207.l2BUtI.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-05-22-21-52-38.gh-issue-150207.l2BUtI.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-05-22-21-52-38.gh-issue-150207.l2BUtI.rst index 109a858db6b09bb..12fbffcd170684c 100644 --- a/Misc/NEWS.d/next/Core_and_Builtins/2026-05-22-21-52-38.gh-issue-150207.l2BUtI.rst +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-05-22-21-52-38.gh-issue-150207.l2BUtI.rst @@ -1 +1 @@ -fails in the tokenizer's ``_PyTokenizer_translate_newlines`` helper, so the out-of-memory condition is propagated as a proper Python exception instead of only being recorded in ``tok->done``. +Fix a crash when a memory allocation fails during tokenizer initialization. A proper :exc:`MemoryError` is now raised instead. From 052acb1cda8a9a064295cfc51bb9da756950c0f0 Mon Sep 17 00:00:00 2001 From: grantlouisherman Date: Mon, 1 Jun 2026 13:31:03 -0400 Subject: [PATCH 4/6] addressing aisk diff comments --- Parser/tokenizer/file_tokenizer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Parser/tokenizer/file_tokenizer.c b/Parser/tokenizer/file_tokenizer.c index 8c836a3f7258296..4492dada35e3a74 100644 --- a/Parser/tokenizer/file_tokenizer.c +++ b/Parser/tokenizer/file_tokenizer.c @@ -378,6 +378,7 @@ _PyTokenizer_FromFile(FILE *fp, const char* enc, return NULL; if ((tok->buf = (char *)PyMem_Malloc(BUFSIZ)) == NULL) { _PyTokenizer_Free(tok); + PyErr_NoMemory(); return NULL; } tok->cur = tok->inp = tok->buf; From d39b6226215f5df4871a161799e1a749014d5b5b Mon Sep 17 00:00:00 2001 From: Grant Herman Date: Wed, 3 Jun 2026 18:13:37 -0400 Subject: [PATCH 5/6] Update Parser/tokenizer/file_tokenizer.c Co-authored-by: AN Long --- Parser/tokenizer/file_tokenizer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Parser/tokenizer/file_tokenizer.c b/Parser/tokenizer/file_tokenizer.c index 4492dada35e3a74..a11702557a07af3 100644 --- a/Parser/tokenizer/file_tokenizer.c +++ b/Parser/tokenizer/file_tokenizer.c @@ -378,7 +378,7 @@ _PyTokenizer_FromFile(FILE *fp, const char* enc, return NULL; if ((tok->buf = (char *)PyMem_Malloc(BUFSIZ)) == NULL) { _PyTokenizer_Free(tok); - PyErr_NoMemory(); + PyErr_NoMemory(); return NULL; } tok->cur = tok->inp = tok->buf; From d21b8b812c1c1a0e347fc32d31e402e44089e1a8 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Sat, 6 Jun 2026 02:30:10 +0100 Subject: [PATCH 6/6] gh-150207: Set MemoryError in readline tokenizer init --- Parser/tokenizer/readline_tokenizer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Parser/tokenizer/readline_tokenizer.c b/Parser/tokenizer/readline_tokenizer.c index 0f7769aeb8fd570..917f7b40cfbbfed 100644 --- a/Parser/tokenizer/readline_tokenizer.c +++ b/Parser/tokenizer/readline_tokenizer.c @@ -114,6 +114,7 @@ _PyTokenizer_FromReadline(PyObject* readline, const char* enc, return NULL; if ((tok->buf = (char *)PyMem_Malloc(BUFSIZ)) == NULL) { _PyTokenizer_Free(tok); + PyErr_NoMemory(); return NULL; } tok->cur = tok->inp = tok->buf;