From 490d06c4e8b941c7a2b8433e41dcc16e235b6178 Mon Sep 17 00:00:00 2001 From: Cedric Koch-Hofer Date: Wed, 22 Apr 2026 06:34:20 +0000 Subject: [PATCH 1/2] DAOS-18859 test: fix stale evp and missing ev_error check in autotest After commit 8f3ac4a5e1 switched daos_eq_poll from DAOS_EQ_WAIT to DAOS_EQ_NOWAIT, two bugs were introduced in kv_put and kv_get: 1. Stale evp dereference on poll failure: when the inner spin loop exits with rc < 0 (poll error), evp is NOT updated by daos_eq_poll. The code then falls through to access evp->ev_error and call daos_kv_put/daos_kv_get with the stale pointer, which may point to an event still in-flight. This corrupts DAOS internal state and causes a SIGSEGV inside libdaos.so. Fix: add an explicit 'if (rc < 0) break;' guard after the inner spin loop, mirroring the original DAOS_EQ_WAIT code that had 'if (rc < 0) break;' as the first check after polling. 2. Missing ev_error check in kv_put drain loop: the new NOWAIT-based drain loop stopped checking evp->ev_error for each drained event, silently ignoring I/O errors that occurred on in-flight requests. The original DAOS_EQ_WAIT loop checked 'rc = evp->ev_error' on every completion. Fix: restore the ev_error check in the drain loop. Signed-off-by: Cedric Koch-Hofer --- src/utils/daos_autotest.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/utils/daos_autotest.c b/src/utils/daos_autotest.c index 5d9f591b7fa..58a0e1383d8 100644 --- a/src/utils/daos_autotest.c +++ b/src/utils/daos_autotest.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2020-2022 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -498,6 +498,10 @@ kv_put(daos_handle_t oh, daos_size_t size) } } + /* Poll failure: evp is stale, do not dereference it */ + if (rc < 0) + break; + /** Check if completed operation failed */ if (evp->ev_error != DER_SUCCESS) { rc = evp->ev_error; @@ -551,8 +555,11 @@ kv_put(daos_handle_t oh, daos_size_t size) num_events = daos_eq_query(eq, DAOS_EQR_ALL, 0, NULL); while (1) { eq_rc = daos_eq_poll(eq, 1, DAOS_EQ_NOWAIT, 1, &evp); - if (eq_rc > 0) + if (eq_rc > 0) { completions += eq_rc; + if (rc == 0) + rc = evp->ev_error; + } if (eq_rc < 0) { rc = eq_rc; break; @@ -638,6 +645,10 @@ kv_get(daos_handle_t oh, daos_size_t size) } } + /* Poll failure: evp is stale, do not dereference it */ + if (rc < 0) + break; + /** Check if completed operation failed */ if (evp->ev_error != DER_SUCCESS) { rc = evp->ev_error; From 013cec2e068629af969f8e65c0c4831b0eb55769 Mon Sep 17 00:00:00 2001 From: Cedric Koch-Hofer Date: Thu, 21 May 2026 16:33:54 +0000 Subject: [PATCH 2/2] DAOS-18859 test: add diagnostic logging for stale evp condition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add D_ERROR + fprintf(stderr) diagnostic messages in kv_put() and kv_get() that fire when daos_eq_poll() returns rc < 0 (poll error) and the fix prevents the stale evp dereference. With the fix in place, the messages confirm the condition was caught and handled safely — the code breaks out without dereferencing the stale pointer, and the error propagates cleanly. Quick-Functional: true Test-repeat: 5 Test-tag: PoolAutotestTest,test_pool_autotest Signed-off-by: Cedric Koch-Hofer --- src/utils/daos_autotest.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/src/utils/daos_autotest.c b/src/utils/daos_autotest.c index 58a0e1383d8..810799d87bd 100644 --- a/src/utils/daos_autotest.c +++ b/src/utils/daos_autotest.c @@ -499,8 +499,20 @@ kv_put(daos_handle_t oh, daos_size_t size) } /* Poll failure: evp is stale, do not dereference it */ - if (rc < 0) + if (rc < 0) { + D_ERROR("DAOS-18859 kv_put: daos_eq_poll " + "returned " DF_RC ", evp is stale" + " — breaking out safely" + " (fix applied)\n", + DP_RC(rc)); + fprintf(stderr, + "DAOS-18859 kv_put:" + " daos_eq_poll returned %d, evp is" + " stale — breaking out safely" + " (fix applied)\n", + rc); break; + } /** Check if completed operation failed */ if (evp->ev_error != DER_SUCCESS) { @@ -646,8 +658,20 @@ kv_get(daos_handle_t oh, daos_size_t size) } /* Poll failure: evp is stale, do not dereference it */ - if (rc < 0) + if (rc < 0) { + D_ERROR("DAOS-18859 kv_get: daos_eq_poll " + "returned " DF_RC ", evp is stale" + " — breaking out safely" + " (fix applied)\n", + DP_RC(rc)); + fprintf(stderr, + "DAOS-18859 kv_get:" + " daos_eq_poll returned %d, evp is" + " stale — breaking out safely" + " (fix applied)\n", + rc); break; + } /** Check if completed operation failed */ if (evp->ev_error != DER_SUCCESS) {