From 94a0e28faebdd771e161a441e56fda6dff54c817 Mon Sep 17 00:00:00 2001 From: ByteFlow Date: Sat, 16 May 2026 18:14:02 +0800 Subject: [PATCH 1/8] fix: Fix race condition in elementtree with free-threading --- Lib/test/test_xml_etree_c.py | 47 +++++++++++++++++++++++ Modules/_elementtree.c | 72 ++++++++++++++++++++++++++---------- 2 files changed, 100 insertions(+), 19 deletions(-) diff --git a/Lib/test/test_xml_etree_c.py b/Lib/test/test_xml_etree_c.py index 270b9d6da8e7b9e..78c0ec62784bde3 100644 --- a/Lib/test/test_xml_etree_c.py +++ b/Lib/test/test_xml_etree_c.py @@ -3,8 +3,10 @@ import struct from test import support from test.support.import_helper import import_fresh_module +from test.support import threading_helper import types import unittest +import threading cET = import_fresh_module('xml.etree.ElementTree', fresh=['_elementtree']) @@ -256,6 +258,51 @@ def test_element_with_children(self): self.check_sizeof(e, self.elementsize + self.extra + struct.calcsize('8P')) +@unittest.skipUnless(cET, 'requires _elementtree') +@threading_helper.requires_working_threading() +class TestElementTreeFreeThreading(unittest.TestCase): + def test_element_extra_race(self): + """Race len(), .attrib, and .clear() to verify fix for gh-149861.""" + root = cET.Element('root') + children = [cET.Element(f'child-{i}') for i in range(5)] + + stop_event = threading.Event() + + def reader_task(): + while not stop_event.is_set(): + # Test element_length + len(root) + # Test element_get_attrib + try: + _ = root.attrib + except AttributeError: + # In a race where clear() just ran, this is expected + # because of the PyErr_SetString we added in C. + pass + + def writer_task(): + while not stop_event.is_set(): + # Test element_add_subelement / extend + root.extend(children) + # Test clear_extra + root.clear() + + # Create a mix of readers and writers + threads = [] + for _ in range(4): + threads.append(threading.Thread(target=reader_task)) + for _ in range(2): + threads.append(threading.Thread(target=writer_task)) + + with threading_helper.start_threads(threads): + # Stress the race for a short duration. + # In CI, 0.5 to 1.0 seconds is usually enough to catch races + # without slowing down the test suite too much. + import time + time.sleep(1.0) + stop_event.set() + + def install_tests(): # Test classes should have __module__ referring to this module. diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index 9e794be5c109ba5..6a4367d88f6bb7d 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -271,19 +271,31 @@ typedef struct { LOCAL(int) create_extra(ElementObject* self, PyObject* attrib) { - self->extra = PyMem_Malloc(sizeof(ElementObjectExtra)); - if (!self->extra) { + int res; + Py_BEGIN_CRITICAL_SECTION(self); + + if (self->extra != NULL) { + res = 0; + goto end; + } + ElementObjectExtra* extra = PyMem_Malloc(sizeof(ElementObjectExtra)); + if (!extra) { PyErr_NoMemory(); - return -1; + res = -1; + goto end; } - self->extra->attrib = Py_XNewRef(attrib); + extra->attrib = Py_XNewRef(attrib); - self->extra->length = 0; - self->extra->allocated = STATIC_CHILDREN; - self->extra->children = self->extra->_children; + extra->length = 0; + extra->allocated = STATIC_CHILDREN; + extra->children = extra->_children; + self->extra = extra; + res = 0; - return 0; + end: + Py_END_CRITICAL_SECTION(); + return res; } LOCAL(void) @@ -297,7 +309,7 @@ dealloc_extra(ElementObjectExtra *extra) Py_XDECREF(extra->attrib); for (i = 0; i < extra->length; i++) - Py_DECREF(extra->children[i]); + Py_XDECREF(extra->children[i]); if (extra->children != extra->_children) { PyMem_Free(extra->children); @@ -311,15 +323,16 @@ clear_extra(ElementObject* self) { ElementObjectExtra *myextra; - if (!self->extra) - return; + Py_BEGIN_CRITICAL_SECTION(self); /* Avoid DECREFs calling into this code again (cycles, etc.) */ myextra = self->extra; self->extra = NULL; - - dealloc_extra(myextra); + Py_END_CRITICAL_SECTION(); + if (myextra) { + dealloc_extra(myextra); + } } /* Convenience internal function to create new Element objects with the given @@ -544,6 +557,7 @@ element_add_subelement(elementtreestate *st, ElementObject *self, return -1; } + Py_BEGIN_CRITICAL_SECTION(self); if (element_resize(self, 1) < 0) return -1; @@ -551,6 +565,7 @@ element_add_subelement(elementtreestate *st, ElementObject *self, self->extra->length++; + Py_END_CRITICAL_SECTION(); return 0; } @@ -560,13 +575,24 @@ element_get_attrib(ElementObject* self) /* return borrowed reference to attrib dictionary */ /* note: this function assumes that the extra section exists */ - PyObject* res = self->extra->attrib; + PyObject *res = NULL; + Py_BEGIN_CRITICAL_SECTION(self); + if (self->extra == NULL) { + PyErr_SetString(PyExc_AttributeError, "extra section does not exist"); + goto end; + } + res = self->extra->attrib; if (!res) { /* create missing dictionary */ - res = self->extra->attrib = PyDict_New(); + res = PyDict_New(); + if (res) { + self->extra->attrib = res; + } } - + Py_XINCREF(res); + end: + Py_END_CRITICAL_SECTION(); return res; } @@ -667,6 +693,7 @@ element_gc_traverse(PyObject *op, visitproc visit, void *arg) Py_VISIT(JOIN_OBJ(self->text)); Py_VISIT(JOIN_OBJ(self->tail)); + Py_BEGIN_CRITICAL_SECTION(self); if (self->extra) { Py_ssize_t i; Py_VISIT(self->extra->attrib); @@ -674,6 +701,7 @@ element_gc_traverse(PyObject *op, visitproc visit, void *arg) for (i = 0; i < self->extra->length; ++i) Py_VISIT(self->extra->children[i]); } + Py_END_CRITICAL_SECTION(); return 0; } @@ -681,6 +709,7 @@ static int element_gc_clear(PyObject *op) { ElementObject *self = _Element_CAST(op); + Py_BEGIN_CRITICAL_SECTION(self); Py_CLEAR(self->tag); _clear_joined_ptr(&self->text); _clear_joined_ptr(&self->tail); @@ -689,6 +718,7 @@ element_gc_clear(PyObject *op) * so fully deallocate it. */ clear_extra(self); + Py_END_CRITICAL_SECTION(); return 0; } @@ -1625,10 +1655,14 @@ static Py_ssize_t element_length(PyObject *op) { ElementObject *self = _Element_CAST(op); - if (!self->extra) - return 0; + Py_ssize_t res = 0; + Py_BEGIN_CRITICAL_SECTION(self); + if (self->extra) { + res = self->extra->length; + } + Py_END_CRITICAL_SECTION(); - return self->extra->length; + return res; } /*[clinic input] From b3cad8bb914a5b281052858cb0d92b0f54a09161 Mon Sep 17 00:00:00 2001 From: ByteFlow Date: Sat, 16 May 2026 18:23:17 +0800 Subject: [PATCH 2/8] clear comments --- Lib/test/test_xml_etree_c.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/Lib/test/test_xml_etree_c.py b/Lib/test/test_xml_etree_c.py index 78c0ec62784bde3..dedb1f217ca8ae2 100644 --- a/Lib/test/test_xml_etree_c.py +++ b/Lib/test/test_xml_etree_c.py @@ -262,7 +262,7 @@ def test_element_with_children(self): @threading_helper.requires_working_threading() class TestElementTreeFreeThreading(unittest.TestCase): def test_element_extra_race(self): - """Race len(), .attrib, and .clear() to verify fix for gh-149861.""" + #Race len(), .attrib, and .clear() to verify fix for gh-149861. root = cET.Element('root') children = [cET.Element(f'child-{i}') for i in range(5)] @@ -270,14 +270,12 @@ def test_element_extra_race(self): def reader_task(): while not stop_event.is_set(): - # Test element_length len(root) - # Test element_get_attrib try: _ = root.attrib except AttributeError: # In a race where clear() just ran, this is expected - # because of the PyErr_SetString we added in C. + # because of the PyErr_SetString is added in C. pass def writer_task(): @@ -287,7 +285,6 @@ def writer_task(): # Test clear_extra root.clear() - # Create a mix of readers and writers threads = [] for _ in range(4): threads.append(threading.Thread(target=reader_task)) @@ -295,9 +292,6 @@ def writer_task(): threads.append(threading.Thread(target=writer_task)) with threading_helper.start_threads(threads): - # Stress the race for a short duration. - # In CI, 0.5 to 1.0 seconds is usually enough to catch races - # without slowing down the test suite too much. import time time.sleep(1.0) stop_event.set() From 1dc4f3696b15ff5b03c7c8acc396a23130b2ef26 Mon Sep 17 00:00:00 2001 From: ByteFlow Date: Sat, 16 May 2026 18:50:03 +0800 Subject: [PATCH 3/8] fix --- Modules/_elementtree.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index 6a4367d88f6bb7d..4a28844d4c6360c 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -572,7 +572,7 @@ element_add_subelement(elementtreestate *st, ElementObject *self, LOCAL(PyObject*) element_get_attrib(ElementObject* self) { - /* return borrowed reference to attrib dictionary */ + /* return new reference to attrib dictionary */ /* note: this function assumes that the extra section exists */ PyObject *res = NULL; @@ -1800,9 +1800,12 @@ _elementtree_Element_set_impl(ElementObject *self, PyObject *key, if (!attrib) return NULL; - if (PyDict_SetItem(attrib, key, value) < 0) + if (PyDict_SetItem(attrib, key, value) < 0) { + Py_DECREF(attrib); return NULL; + } + Py_DECREF(attrib); Py_RETURN_NONE; } @@ -2113,12 +2116,16 @@ element_attrib_getter(PyObject *op, void *closure) { PyObject *res; ElementObject *self = _Element_CAST(op); + Py_BEGIN_CRITICAL_SECTION(self); if (!self->extra) { if (create_extra(self, NULL) < 0) - return NULL; + goto end; } res = element_get_attrib(self); - return Py_XNewRef(res); + + end: + Py_END_CRITICAL_SECTION(); + return res; } /* macro for setter validation */ From 4e1605782a39c45bc5a62401be1c08e5b8533628 Mon Sep 17 00:00:00 2001 From: ByteFlow Date: Sat, 16 May 2026 19:09:08 +0800 Subject: [PATCH 4/8] fix CI tests --- Modules/_elementtree.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index 4a28844d4c6360c..d68c3ff8aea82e1 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -293,7 +293,7 @@ create_extra(ElementObject* self, PyObject* attrib) self->extra = extra; res = 0; - end: + end: ; Py_END_CRITICAL_SECTION(); return res; } @@ -591,7 +591,7 @@ element_get_attrib(ElementObject* self) } } Py_XINCREF(res); - end: + end: ; Py_END_CRITICAL_SECTION(); return res; } @@ -2114,7 +2114,7 @@ element_tail_getter(PyObject *op, void *closure) static PyObject* element_attrib_getter(PyObject *op, void *closure) { - PyObject *res; + PyObject *res = NULL; ElementObject *self = _Element_CAST(op); Py_BEGIN_CRITICAL_SECTION(self); if (!self->extra) { @@ -2123,7 +2123,7 @@ element_attrib_getter(PyObject *op, void *closure) } res = element_get_attrib(self); - end: + end: ; Py_END_CRITICAL_SECTION(); return res; } From 6d8b5599ddc53558bb68497e1d4f5934d17a5c0f Mon Sep 17 00:00:00 2001 From: ByteFlow Date: Sat, 16 May 2026 19:14:07 +0800 Subject: [PATCH 5/8] add news --- .../Library/2026-05-16-19-13-58.gh-issue-149816.Ht-jC5.rst | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2026-05-16-19-13-58.gh-issue-149816.Ht-jC5.rst diff --git a/Misc/NEWS.d/next/Library/2026-05-16-19-13-58.gh-issue-149816.Ht-jC5.rst b/Misc/NEWS.d/next/Library/2026-05-16-19-13-58.gh-issue-149816.Ht-jC5.rst new file mode 100644 index 000000000000000..3f98c33a3293031 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-05-16-19-13-58.gh-issue-149816.Ht-jC5.rst @@ -0,0 +1,4 @@ +Fixes race conditions in :mod:`xml.etree.ElementTree` where the ``extra`` +pointer dereference and ``attrib`` access was unsynchronized . This prevents +potential crashes (core dumps) in the free-threading build caused by +concurrent modification of an element's internal structure. From de9bbf6a68a7fa68c293bf6ab6b4afdf2ce601e3 Mon Sep 17 00:00:00 2001 From: ByteFlow Date: Sun, 17 May 2026 14:43:15 +0800 Subject: [PATCH 6/8] Improve naming of free-threading race test --- Lib/test/test_xml_etree_c.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_xml_etree_c.py b/Lib/test/test_xml_etree_c.py index dedb1f217ca8ae2..c10000780386851 100644 --- a/Lib/test/test_xml_etree_c.py +++ b/Lib/test/test_xml_etree_c.py @@ -261,7 +261,7 @@ def test_element_with_children(self): @unittest.skipUnless(cET, 'requires _elementtree') @threading_helper.requires_working_threading() class TestElementTreeFreeThreading(unittest.TestCase): - def test_element_extra_race(self): + def test_element_concurrent_clear_and_access(self): #Race len(), .attrib, and .clear() to verify fix for gh-149861. root = cET.Element('root') children = [cET.Element(f'child-{i}') for i in range(5)] From 2f71c508c7af5f12373acb5da521fd19a0df1578 Mon Sep 17 00:00:00 2001 From: Ivy Xu Date: Wed, 10 Jun 2026 22:16:21 +0800 Subject: [PATCH 7/8] address review --- Lib/test/test_xml_etree_c.py | 21 +++---- ...-05-16-19-13-58.gh-issue-149816.Ht-jC5.rst | 2 +- Modules/_elementtree.c | 63 +++++++------------ 3 files changed, 32 insertions(+), 54 deletions(-) diff --git a/Lib/test/test_xml_etree_c.py b/Lib/test/test_xml_etree_c.py index c10000780386851..de1b4ed241e2377 100644 --- a/Lib/test/test_xml_etree_c.py +++ b/Lib/test/test_xml_etree_c.py @@ -1,12 +1,12 @@ # xml.etree test for cElementTree import io import struct +import time from test import support from test.support.import_helper import import_fresh_module from test.support import threading_helper import types import unittest -import threading cET = import_fresh_module('xml.etree.ElementTree', fresh=['_elementtree']) @@ -258,18 +258,19 @@ def test_element_with_children(self): self.check_sizeof(e, self.elementsize + self.extra + struct.calcsize('8P')) + @unittest.skipUnless(cET, 'requires _elementtree') @threading_helper.requires_working_threading() class TestElementTreeFreeThreading(unittest.TestCase): def test_element_concurrent_clear_and_access(self): - #Race len(), .attrib, and .clear() to verify fix for gh-149861. + # Race len(), .attrib, and .clear() to verify fix for gh-149861. root = cET.Element('root') children = [cET.Element(f'child-{i}') for i in range(5)] - stop_event = threading.Event() + end_time = time.monotonic() + 1.0 def reader_task(): - while not stop_event.is_set(): + while time.monotonic() < end_time: len(root) try: _ = root.attrib @@ -279,7 +280,7 @@ def reader_task(): pass def writer_task(): - while not stop_event.is_set(): + while time.monotonic() < end_time: # Test element_add_subelement / extend root.extend(children) # Test clear_extra @@ -287,15 +288,11 @@ def writer_task(): threads = [] for _ in range(4): - threads.append(threading.Thread(target=reader_task)) + threads.append(reader_task) for _ in range(2): - threads.append(threading.Thread(target=writer_task)) - - with threading_helper.start_threads(threads): - import time - time.sleep(1.0) - stop_event.set() + threads.append(writer_task) + threading_helper.run_concurrently(worker_func=threads, nthreads=6) def install_tests(): diff --git a/Misc/NEWS.d/next/Library/2026-05-16-19-13-58.gh-issue-149816.Ht-jC5.rst b/Misc/NEWS.d/next/Library/2026-05-16-19-13-58.gh-issue-149816.Ht-jC5.rst index 3f98c33a3293031..5c71f556377f4cb 100644 --- a/Misc/NEWS.d/next/Library/2026-05-16-19-13-58.gh-issue-149816.Ht-jC5.rst +++ b/Misc/NEWS.d/next/Library/2026-05-16-19-13-58.gh-issue-149816.Ht-jC5.rst @@ -1,4 +1,4 @@ Fixes race conditions in :mod:`xml.etree.ElementTree` where the ``extra`` -pointer dereference and ``attrib`` access was unsynchronized . This prevents +pointer dereference and ``attrib`` access was unsynchronized. This prevents potential crashes (core dumps) in the free-threading build caused by concurrent modification of an element's internal structure. diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index d68c3ff8aea82e1..88732f6caddf711 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -271,31 +271,20 @@ typedef struct { LOCAL(int) create_extra(ElementObject* self, PyObject* attrib) { - int res; - Py_BEGIN_CRITICAL_SECTION(self); - - if (self->extra != NULL) { - res = 0; - goto end; - } - ElementObjectExtra* extra = PyMem_Malloc(sizeof(ElementObjectExtra)); - if (!extra) { + self->extra = PyMem_Malloc(sizeof(ElementObjectExtra)); + if (!self->extra) { PyErr_NoMemory(); - res = -1; - goto end; + return -1; } + Py_BEGIN_CRITICAL_SECTION(self); + self->extra->attrib = Py_XNewRef(attrib); - extra->attrib = Py_XNewRef(attrib); - - extra->length = 0; - extra->allocated = STATIC_CHILDREN; - extra->children = extra->_children; - self->extra = extra; - res = 0; + self->extra->length = 0; + self->extra->allocated = STATIC_CHILDREN; + self->extra->children = self->extra->_children; - end: ; Py_END_CRITICAL_SECTION(); - return res; + return 0; } LOCAL(void) @@ -575,23 +564,15 @@ element_get_attrib(ElementObject* self) /* return new reference to attrib dictionary */ /* note: this function assumes that the extra section exists */ - PyObject *res = NULL; + PyObject *res; Py_BEGIN_CRITICAL_SECTION(self); - if (self->extra == NULL) { - PyErr_SetString(PyExc_AttributeError, "extra section does not exist"); - goto end; - } res = self->extra->attrib; if (!res) { /* create missing dictionary */ - res = PyDict_New(); - if (res) { - self->extra->attrib = res; - } + res = self->extra->attrib = PyDict_New(); } Py_XINCREF(res); - end: ; Py_END_CRITICAL_SECTION(); return res; } @@ -709,7 +690,6 @@ static int element_gc_clear(PyObject *op) { ElementObject *self = _Element_CAST(op); - Py_BEGIN_CRITICAL_SECTION(self); Py_CLEAR(self->tag); _clear_joined_ptr(&self->text); _clear_joined_ptr(&self->tail); @@ -718,7 +698,6 @@ element_gc_clear(PyObject *op) * so fully deallocate it. */ clear_extra(self); - Py_END_CRITICAL_SECTION(); return 0; } @@ -1655,9 +1634,12 @@ static Py_ssize_t element_length(PyObject *op) { ElementObject *self = _Element_CAST(op); - Py_ssize_t res = 0; + Py_ssize_t res; Py_BEGIN_CRITICAL_SECTION(self); - if (self->extra) { + if (!self->extra) { + res = 0; + } + else { res = self->extra->length; } Py_END_CRITICAL_SECTION(); @@ -2114,16 +2096,15 @@ element_tail_getter(PyObject *op, void *closure) static PyObject* element_attrib_getter(PyObject *op, void *closure) { - PyObject *res = NULL; + PyObject *res; ElementObject *self = _Element_CAST(op); Py_BEGIN_CRITICAL_SECTION(self); - if (!self->extra) { - if (create_extra(self, NULL) < 0) - goto end; + if (!self->extra && create_extra(self, NULL) < 0){ + res = NULL; + } + else { + res = element_get_attrib(self); } - res = element_get_attrib(self); - - end: ; Py_END_CRITICAL_SECTION(); return res; } From bb270e7b471cbdb71c752a29fd76f8d97d244d7b Mon Sep 17 00:00:00 2001 From: Ivy Xu Date: Wed, 10 Jun 2026 22:44:15 +0800 Subject: [PATCH 8/8] PyErr_SetString in `element_get_attrib()` is removed --- Lib/test/test_xml_etree_c.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/Lib/test/test_xml_etree_c.py b/Lib/test/test_xml_etree_c.py index de1b4ed241e2377..3bb012396393e3f 100644 --- a/Lib/test/test_xml_etree_c.py +++ b/Lib/test/test_xml_etree_c.py @@ -272,12 +272,7 @@ def test_element_concurrent_clear_and_access(self): def reader_task(): while time.monotonic() < end_time: len(root) - try: - _ = root.attrib - except AttributeError: - # In a race where clear() just ran, this is expected - # because of the PyErr_SetString is added in C. - pass + _ = root.attrib def writer_task(): while time.monotonic() < end_time: