From 3fa89317d1b80e7bc71d253d3cec786095f3ad59 Mon Sep 17 00:00:00 2001 From: saurabh500 <1623701+saurabh500@users.noreply.github.com> Date: Mon, 6 Apr 2026 14:21:31 +0000 Subject: [PATCH 1/6] Release GIL during blocking ODBC connect/disconnect calls Release the Python GIL during SQLDriverConnect and SQLDisconnect calls so that other Python threads can run concurrently during connection I/O (DNS resolution, TCP handshake, TLS negotiation, SQL Server auth). Changes: - connection.cpp: Wrap SQLDriverConnect_ptr in py::gil_scoped_release - connection.cpp: Conditionally release GIL around SQLDisconnect_ptr (guarded by PyGILState_Check for destructor safety) - connection.cpp: Remove disconnect() from reset() failure paths to avoid GIL release inside pool mutex; callers handle cleanup - connection_pool.cpp: Move connect() outside pool mutex in acquire() (reserve slot inside mutex, connect outside, rollback on failure) - connection_pool.cpp: Move disconnect() outside mutex in release() - connection_pool.cpp: Release _manager_mutex before calling pool acquire/release/close to prevent mutex/GIL lock-ordering deadlocks - Add test_021_concurrent_connection_perf.py with stress tests proving concurrent connection parallelism and no GIL starvation Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- mssql_python/pybind/connection/connection.cpp | 31 +- .../pybind/connection/connection_pool.cpp | 93 ++++-- tests/test_021_concurrent_connection_perf.py | 275 ++++++++++++++++++ 3 files changed, 371 insertions(+), 28 deletions(-) create mode 100644 tests/test_021_concurrent_connection_perf.py diff --git a/mssql_python/pybind/connection/connection.cpp b/mssql_python/pybind/connection/connection.cpp index 32ed5507..cf2bfbdd 100644 --- a/mssql_python/pybind/connection/connection.cpp +++ b/mssql_python/pybind/connection/connection.cpp @@ -85,8 +85,16 @@ void Connection::connect(const py::dict& attrs_before) { #else connStrPtr = const_cast(_connStr.c_str()); #endif - SQLRETURN ret = SQLDriverConnect_ptr(_dbcHandle->get(), nullptr, connStrPtr, SQL_NTS, nullptr, - 0, nullptr, SQL_DRIVER_NOPROMPT); + SQLRETURN ret; + { + // Release the GIL during the blocking ODBC connect call. + // SQLDriverConnect involves DNS resolution, TCP handshake, TLS negotiation, + // and SQL Server authentication — all pure I/O that doesn't need the GIL. + // This allows other Python threads to run concurrently. + py::gil_scoped_release release; + ret = SQLDriverConnect_ptr(_dbcHandle->get(), nullptr, connStrPtr, SQL_NTS, nullptr, + 0, nullptr, SQL_DRIVER_NOPROMPT); + } checkError(ret); updateLastUsed(); } @@ -95,6 +103,11 @@ void Connection::disconnect() { if (_dbcHandle) { LOG("Disconnecting from database"); + // Check if we hold the GIL so we can conditionally release it. + // The GIL is held when called from pybind11-bound methods but may NOT + // be held in destructor paths (C++ shared_ptr ref-count drop, shutdown). + bool hasGil = PyGILState_Check() != 0; + // CRITICAL FIX: Mark all child statement handles as implicitly freed // When we free the DBC handle below, the ODBC driver will automatically free // all child STMT handles. We need to tell the SqlHandle objects about this @@ -135,7 +148,17 @@ void Connection::disconnect() { _allocationsSinceCompaction = 0; } // Release lock before potentially slow SQLDisconnect call - SQLRETURN ret = SQLDisconnect_ptr(_dbcHandle->get()); + SQLRETURN ret; + if (hasGil) { + // Release the GIL during the blocking ODBC disconnect call. + // This allows other Python threads to run while the network + // round-trip completes. + py::gil_scoped_release release; + ret = SQLDisconnect_ptr(_dbcHandle->get()); + } else { + // Destructor / shutdown path — GIL is not held, call directly. + ret = SQLDisconnect_ptr(_dbcHandle->get()); + } checkError(ret); // triggers SQLFreeHandle via destructor, if last owner _dbcHandle.reset(); @@ -375,7 +398,6 @@ bool Connection::reset() { (SQLPOINTER)SQL_RESET_CONNECTION_YES, SQL_IS_INTEGER); if (!SQL_SUCCEEDED(ret)) { LOG("Failed to reset connection (ret=%d). Marking as dead.", ret); - disconnect(); return false; } @@ -387,7 +409,6 @@ bool Connection::reset() { (SQLPOINTER)SQL_TXN_READ_COMMITTED, SQL_IS_INTEGER); if (!SQL_SUCCEEDED(ret)) { LOG("Failed to reset transaction isolation level (ret=%d). Marking as dead.", ret); - disconnect(); return false; } diff --git a/mssql_python/pybind/connection/connection_pool.cpp b/mssql_python/pybind/connection/connection_pool.cpp index 3000a970..385b4b87 100644 --- a/mssql_python/pybind/connection/connection_pool.cpp +++ b/mssql_python/pybind/connection/connection_pool.cpp @@ -16,6 +16,7 @@ std::shared_ptr ConnectionPool::acquire(const std::wstring& connStr, const py::dict& attrs_before) { std::vector> to_disconnect; std::shared_ptr valid_conn = nullptr; + bool needs_connect = false; { std::lock_guard lock(_mutex); auto now = std::chrono::steady_clock::now(); @@ -57,16 +58,33 @@ std::shared_ptr ConnectionPool::acquire(const std::wstring& connStr, } } - // Create new connection if none reusable + // Reserve a slot for a new connection if none reusable. + // The actual connect() call happens outside the mutex to avoid + // holding the mutex during the blocking ODBC call (which releases + // the GIL and could otherwise cause a mutex/GIL deadlock). if (!valid_conn && _current_size < _max_size) { valid_conn = std::make_shared(connStr, true); - valid_conn->connect(attrs_before); ++_current_size; + needs_connect = true; } else if (!valid_conn) { throw std::runtime_error("ConnectionPool::acquire: pool size limit reached"); } } + // Phase 2.5: Connect the new connection outside the mutex. + if (needs_connect) { + try { + valid_conn->connect(attrs_before); + } catch (...) { + // Connect failed — release the reserved slot + { + std::lock_guard lock(_mutex); + if (_current_size > 0) --_current_size; + } + throw; + } + } + // Phase 3: Disconnect expired/bad connections outside lock for (auto& conn : to_disconnect) { try { @@ -79,14 +97,22 @@ std::shared_ptr ConnectionPool::acquire(const std::wstring& connStr, } void ConnectionPool::release(std::shared_ptr conn) { - std::lock_guard lock(_mutex); - if (_pool.size() < _max_size) { - conn->updateLastUsed(); - _pool.push_back(conn); - } else { + bool should_disconnect = false; + { + std::lock_guard lock(_mutex); + if (_pool.size() < _max_size) { + conn->updateLastUsed(); + _pool.push_back(conn); + } else { + should_disconnect = true; + if (_current_size > 0) + --_current_size; + } + } + // Disconnect outside the mutex to avoid holding it during the + // blocking ODBC call (which releases the GIL). + if (should_disconnect) { conn->disconnect(); - if (_current_size > 0) - --_current_size; } } @@ -116,21 +142,35 @@ ConnectionPoolManager& ConnectionPoolManager::getInstance() { std::shared_ptr ConnectionPoolManager::acquireConnection(const std::wstring& connStr, const py::dict& attrs_before) { - std::lock_guard lock(_manager_mutex); - - auto& pool = _pools[connStr]; - if (!pool) { - LOG("Creating new connection pool"); - pool = std::make_shared(_default_max_size, _default_idle_secs); + std::shared_ptr pool; + { + std::lock_guard lock(_manager_mutex); + auto& pool_ref = _pools[connStr]; + if (!pool_ref) { + LOG("Creating new connection pool"); + pool_ref = std::make_shared(_default_max_size, _default_idle_secs); + } + pool = pool_ref; } + // Call acquire() outside _manager_mutex. acquire() may release the GIL + // during the ODBC connect call; holding _manager_mutex across that would + // create a mutex/GIL lock-ordering deadlock. return pool->acquire(connStr, attrs_before); } void ConnectionPoolManager::returnConnection(const std::wstring& conn_str, const std::shared_ptr conn) { - std::lock_guard lock(_manager_mutex); - if (_pools.find(conn_str) != _pools.end()) { - _pools[conn_str]->release((conn)); + std::shared_ptr pool; + { + std::lock_guard lock(_manager_mutex); + auto it = _pools.find(conn_str); + if (it != _pools.end()) { + pool = it->second; + } + } + // Call release() outside _manager_mutex to avoid deadlock. + if (pool) { + pool->release(conn); } } @@ -141,11 +181,18 @@ void ConnectionPoolManager::configure(int max_size, int idle_timeout_secs) { } void ConnectionPoolManager::closePools() { - std::lock_guard lock(_manager_mutex); - for (auto& [conn_str, pool] : _pools) { - if (pool) { - pool->close(); + std::vector> pools_to_close; + { + std::lock_guard lock(_manager_mutex); + for (auto& [conn_str, pool] : _pools) { + if (pool) { + pools_to_close.push_back(pool); + } } + _pools.clear(); + } + // Close pools outside _manager_mutex to avoid deadlock. + for (auto& pool : pools_to_close) { + pool->close(); } - _pools.clear(); } diff --git a/tests/test_021_concurrent_connection_perf.py b/tests/test_021_concurrent_connection_perf.py new file mode 100644 index 00000000..54bcfefc --- /dev/null +++ b/tests/test_021_concurrent_connection_perf.py @@ -0,0 +1,275 @@ +""" +Concurrent connection performance test for GIL release during ODBC operations. + +This test verifies that the GIL is properly released during blocking ODBC +connection establishment (SQLDriverConnect) and teardown (SQLDisconnect), +allowing multiple Python threads to establish connections in parallel. + +Without GIL release, N threads would take ~N * single_connection_time +(serialized by the GIL). With GIL release, threads overlap their I/O and +total wall-clock time is close to single_connection_time. + +Marked with @pytest.mark.stress to run in dedicated performance pipelines. +""" + +import os +import time +import threading +import statistics + +import pytest +import mssql_python +from mssql_python import connect + + +@pytest.fixture(scope="module") +def perf_conn_str(): + """Get connection string from environment.""" + conn_str = os.getenv("DB_CONNECTION_STRING") + if not conn_str: + pytest.skip("DB_CONNECTION_STRING environment variable not set") + return conn_str + + +def _connect_and_close(conn_str: str) -> float: + """Open a connection, close it, and return the elapsed time in seconds.""" + start = time.perf_counter() + conn = connect(conn_str) + conn.close() + return time.perf_counter() - start + + +# ============================================================================ +# GIL Release Performance Tests +# ============================================================================ + + +@pytest.mark.stress +def test_concurrent_connection_gil_release(perf_conn_str): + """ + Verify that concurrent connection establishment achieves parallelism, + proving the GIL is released during SQLDriverConnect. + + Approach: + 1. Measure the baseline: average time for a single connection (no contention). + 2. Launch NUM_THREADS threads, each creating a fresh connection (no pooling). + 3. The wall-clock time for all threads should be much less than + NUM_THREADS * baseline, because connections overlap their I/O. + + We require a speedup > 2x (very conservative). In practice, with proper + GIL release, the speedup approaches NUM_THREADS for I/O-bound work. + """ + NUM_THREADS = 10 + WARMUP_ROUNDS = 2 + BASELINE_ROUNDS = 5 + + # Disable pooling so every connect() creates a brand-new ODBC connection. + mssql_python.pooling(enabled=False) + + # ---- warm-up (prime DNS cache, driver loading, etc.) ---- + for _ in range(WARMUP_ROUNDS): + _connect_and_close(perf_conn_str) + + # ---- baseline: serial single-connection time ---- + serial_times = [_connect_and_close(perf_conn_str) for _ in range(BASELINE_ROUNDS)] + baseline = statistics.median(serial_times) + print(f"\n[BASELINE] Single connection (median of {BASELINE_ROUNDS}): {baseline*1000:.1f} ms") + + # ---- concurrent: N threads each opening a connection ---- + barrier = threading.Barrier(NUM_THREADS) + thread_times = [None] * NUM_THREADS + errors = [] + + def worker(idx): + try: + barrier.wait(timeout=30) + thread_times[idx] = _connect_and_close(perf_conn_str) + except Exception as exc: + errors.append((idx, str(exc))) + + threads = [ + threading.Thread(target=worker, args=(i,), daemon=True) + for i in range(NUM_THREADS) + ] + + wall_start = time.perf_counter() + for t in threads: + t.start() + for t in threads: + t.join(timeout=120) + wall_time = time.perf_counter() - wall_start + + # ---- assertions ---- + assert not errors, f"Thread errors: {errors}" + assert all(t is not None for t in thread_times), "Some threads did not complete" + + serial_estimate = NUM_THREADS * baseline + speedup = serial_estimate / wall_time + + print(f"[CONCURRENT] {NUM_THREADS} threads wall-clock: {wall_time*1000:.1f} ms") + print(f"[SERIAL EST] {NUM_THREADS} × baseline: {serial_estimate*1000:.1f} ms") + print(f"[SPEEDUP] {speedup:.2f}x (>{NUM_THREADS}x means full parallelism)") + + # Conservative threshold: even modest parallelism should beat 2x. + # Without GIL release this would be ~1.0x (fully serialized). + assert speedup > 2.0, ( + f"Concurrent connections are not running in parallel (speedup={speedup:.2f}x). " + f"Expected >2x, got wall_time={wall_time*1000:.1f}ms vs serial_estimate={serial_estimate*1000:.1f}ms. " + f"This likely indicates the GIL is not being released during SQLDriverConnect." + ) + + print(f"[PASSED] GIL release verified — {speedup:.1f}x speedup with {NUM_THREADS} threads") + + +@pytest.mark.stress +def test_concurrent_disconnect_gil_release(perf_conn_str): + """ + Verify that concurrent disconnection also releases the GIL. + + Opens N connections serially, then closes them all concurrently. + Wall-clock time for concurrent close should be much less than + N * single-close time. + """ + NUM_THREADS = 10 + WARMUP_ROUNDS = 2 + BASELINE_ROUNDS = 5 + + mssql_python.pooling(enabled=False) + + # warm-up + for _ in range(WARMUP_ROUNDS): + _connect_and_close(perf_conn_str) + + # baseline: serial close time + close_times = [] + for _ in range(BASELINE_ROUNDS): + conn = connect(perf_conn_str) + start = time.perf_counter() + conn.close() + close_times.append(time.perf_counter() - start) + baseline_close = statistics.median(close_times) + print(f"\n[BASELINE] Single close (median of {BASELINE_ROUNDS}): {baseline_close*1000:.1f} ms") + + # open N connections serially + connections = [connect(perf_conn_str) for _ in range(NUM_THREADS)] + + # concurrent close + barrier = threading.Barrier(NUM_THREADS) + thread_times = [None] * NUM_THREADS + errors = [] + + def close_worker(idx, conn): + try: + barrier.wait(timeout=30) + start = time.perf_counter() + conn.close() + thread_times[idx] = time.perf_counter() - start + except Exception as exc: + errors.append((idx, str(exc))) + + threads = [ + threading.Thread(target=close_worker, args=(i, connections[i]), daemon=True) + for i in range(NUM_THREADS) + ] + + wall_start = time.perf_counter() + for t in threads: + t.start() + for t in threads: + t.join(timeout=60) + wall_time = time.perf_counter() - wall_start + + assert not errors, f"Thread errors: {errors}" + assert all(t is not None for t in thread_times), "Some threads did not complete" + + serial_estimate = NUM_THREADS * baseline_close + speedup = serial_estimate / wall_time if wall_time > 0 else float("inf") + + print(f"[CONCURRENT] {NUM_THREADS} threads close wall-clock: {wall_time*1000:.1f} ms") + print(f"[SERIAL EST] {NUM_THREADS} × baseline: {serial_estimate*1000:.1f} ms") + print(f"[SPEEDUP] {speedup:.2f}x") + + # Disconnect is typically fast, so the speedup may be less dramatic. + # We use a softer threshold of 1.5x. + assert speedup > 1.5, ( + f"Concurrent disconnects are not running in parallel (speedup={speedup:.2f}x). " + f"This likely indicates the GIL is not being released during SQLDisconnect." + ) + + print(f"[PASSED] GIL release on disconnect verified — {speedup:.1f}x speedup") + + +@pytest.mark.stress +def test_mixed_connect_disconnect_under_load(perf_conn_str): + """ + Stress test: threads continuously connect and disconnect while + other threads do CPU-bound Python work. Verifies that GIL release + during ODBC I/O does not starve or deadlock Python threads. + """ + NUM_IO_THREADS = 5 + NUM_CPU_THREADS = 3 + DURATION_SECS = 5 + + mssql_python.pooling(enabled=False) + + stop_event = threading.Event() + io_counts = [0] * NUM_IO_THREADS + cpu_counts = [0] * NUM_CPU_THREADS + errors = [] + + def io_worker(idx): + """Repeatedly connect/disconnect.""" + try: + while not stop_event.is_set(): + conn = connect(perf_conn_str) + conn.close() + io_counts[idx] += 1 + except Exception as exc: + errors.append((f"io-{idx}", str(exc))) + + def cpu_worker(idx): + """Do CPU-bound work (must be able to acquire GIL).""" + try: + while not stop_event.is_set(): + # Busy work that requires the GIL + total = sum(range(10000)) + _ = [x ** 2 for x in range(100)] + cpu_counts[idx] += 1 + except Exception as exc: + errors.append((f"cpu-{idx}", str(exc))) + + threads = [] + for i in range(NUM_IO_THREADS): + threads.append(threading.Thread(target=io_worker, args=(i,), daemon=True)) + for i in range(NUM_CPU_THREADS): + threads.append(threading.Thread(target=cpu_worker, args=(i,), daemon=True)) + + for t in threads: + t.start() + + time.sleep(DURATION_SECS) + stop_event.set() + + for t in threads: + t.join(timeout=30) + + total_io = sum(io_counts) + total_cpu = sum(cpu_counts) + + print(f"\n[MIXED LOAD] Duration: {DURATION_SECS}s") + print(f" I/O threads ({NUM_IO_THREADS}): {total_io} connect/disconnect cycles") + print(f" CPU threads ({NUM_CPU_THREADS}): {total_cpu} iterations") + + assert not errors, f"Errors during mixed load: {errors}" + + # CPU threads must have made progress — if the GIL were held during + # ODBC I/O, CPU threads would be starved. + assert total_cpu > 0, ( + "CPU threads made no progress — GIL may be held during ODBC I/O, " + "starving Python threads." + ) + + # I/O threads must have completed at least a few cycles + assert total_io > 0, "I/O threads made no progress." + + print(f"[PASSED] Mixed I/O + CPU load: no starvation or deadlocks") From 47c61cf383f5ebee18e5c65f0cd9e2c658a1b0f5 Mon Sep 17 00:00:00 2001 From: saurabh500 <1623701+saurabh500@users.noreply.github.com> Date: Mon, 6 Apr 2026 14:57:40 +0000 Subject: [PATCH 2/6] Format code with black --line-length=100 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- tests/test_021_concurrent_connection_perf.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tests/test_021_concurrent_connection_perf.py b/tests/test_021_concurrent_connection_perf.py index 54bcfefc..a253a087 100644 --- a/tests/test_021_concurrent_connection_perf.py +++ b/tests/test_021_concurrent_connection_perf.py @@ -87,10 +87,7 @@ def worker(idx): except Exception as exc: errors.append((idx, str(exc))) - threads = [ - threading.Thread(target=worker, args=(i,), daemon=True) - for i in range(NUM_THREADS) - ] + threads = [threading.Thread(target=worker, args=(i,), daemon=True) for i in range(NUM_THREADS)] wall_start = time.perf_counter() for t in threads: @@ -233,7 +230,7 @@ def cpu_worker(idx): while not stop_event.is_set(): # Busy work that requires the GIL total = sum(range(10000)) - _ = [x ** 2 for x in range(100)] + _ = [x**2 for x in range(100)] cpu_counts[idx] += 1 except Exception as exc: errors.append((f"cpu-{idx}", str(exc))) From 83995e43e596d4adc441dca9a7d9e51bf84926bb Mon Sep 17 00:00:00 2001 From: saurabh500 <1623701+saurabh500@users.noreply.github.com> Date: Mon, 6 Apr 2026 15:04:11 +0000 Subject: [PATCH 3/6] Add test for pool release overflow path (coverage lines 107-110) Add test_pool_release_overflow_disconnects_outside_mutex to exercise the ConnectionPool::release() overflow path where a connection is returned to a pool that is already at max_size. This triggers the should_disconnect branch that disconnects outside the mutex. The disconnect-without-GIL path (connection.cpp lines 160-161) is a C++ destructor safety guard only reachable during interpreter shutdown when the last shared_ptr drops without the GIL held. This cannot be reliably exercised from a Python test since __del__/GC always holds the GIL. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- tests/test_009_pooling.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tests/test_009_pooling.py b/tests/test_009_pooling.py index 1a3e5f09..a8e7d94a 100644 --- a/tests/test_009_pooling.py +++ b/tests/test_009_pooling.py @@ -278,6 +278,35 @@ def try_overflow(): c.close() +def test_pool_release_overflow_disconnects_outside_mutex(conn_str): + """Test that releasing a connection when pool is full disconnects it correctly. + + When a connection is returned to a pool that is already at max_size, + the connection must be disconnected. This exercises the overflow path in + ConnectionPool::release() (connection_pool.cpp lines 107-110) where + should_disconnect is set and disconnect happens outside the mutex. + """ + pooling(max_size=1, idle_timeout=30) + + # Open two connections — both succeed because the pool issues slots + conn1 = connect(conn_str) + conn2 = connect(conn_str) + + # Close conn1 first — returned to the pool (pool now has 1 idle entry) + conn1.close() + + # Close conn2 — pool is full (1 idle already), so this connection + # must be disconnected rather than pooled (overflow path). + conn2.close() + + # Verify the pool is still functional + conn3 = connect(conn_str) + cursor = conn3.cursor() + cursor.execute("SELECT 1") + assert cursor.fetchone()[0] == 1 + conn3.close() + + @pytest.mark.skip("Flaky test - idle timeout behavior needs investigation") def test_pool_idle_timeout_removes_connections(conn_str): """Test that idle_timeout removes connections from the pool after the timeout.""" From 60642be1e7f5fde82327f7c863a07ba0ab6871cd Mon Sep 17 00:00:00 2001 From: saurabh500 <1623701+saurabh500@users.noreply.github.com> Date: Mon, 6 Apr 2026 23:59:17 +0000 Subject: [PATCH 4/6] Address review comments: error handling, pool accounting, test fix - connection.cpp: Suppress checkError() in destructor/shutdown path to avoid std::terminate() during stack unwinding - connection_pool.cpp release(): Move _current_size decrement after successful disconnect to keep pool accounting consistent on failure - connection_pool.cpp closePools(): Keep _manager_mutex held during close to prevent races with new pool creation - test_009_pooling.py: Fix overflow test to use max_size=2 for acquisition then shrink to max_size=1 before release, matching actual pool semantics where max_size limits concurrent connections Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- mssql_python/pybind/connection/connection.cpp | 8 ++++- .../pybind/connection/connection_pool.cpp | 31 ++++++++++--------- tests/test_009_pooling.py | 17 +++++++--- 3 files changed, 35 insertions(+), 21 deletions(-) diff --git a/mssql_python/pybind/connection/connection.cpp b/mssql_python/pybind/connection/connection.cpp index cf2bfbdd..1b5b0045 100644 --- a/mssql_python/pybind/connection/connection.cpp +++ b/mssql_python/pybind/connection/connection.cpp @@ -159,7 +159,13 @@ void Connection::disconnect() { // Destructor / shutdown path — GIL is not held, call directly. ret = SQLDisconnect_ptr(_dbcHandle->get()); } - checkError(ret); + // In destructor/shutdown paths, suppress errors to avoid + // std::terminate() if this throws during stack unwinding. + if (hasGil) { + checkError(ret); + } else if (!SQL_SUCCEEDED(ret)) { + LOG("SQLDisconnect failed in destructor/shutdown path; ignoring error"); + } // triggers SQLFreeHandle via destructor, if last owner _dbcHandle.reset(); } else { diff --git a/mssql_python/pybind/connection/connection_pool.cpp b/mssql_python/pybind/connection/connection_pool.cpp index 385b4b87..7c6b7f70 100644 --- a/mssql_python/pybind/connection/connection_pool.cpp +++ b/mssql_python/pybind/connection/connection_pool.cpp @@ -105,14 +105,19 @@ void ConnectionPool::release(std::shared_ptr conn) { _pool.push_back(conn); } else { should_disconnect = true; - if (_current_size > 0) - --_current_size; } } // Disconnect outside the mutex to avoid holding it during the // blocking ODBC call (which releases the GIL). if (should_disconnect) { - conn->disconnect(); + try { + conn->disconnect(); + } catch (const std::exception& ex) { + LOG("ConnectionPool::release: disconnect failed: %s", ex.what()); + } + std::lock_guard lock(_mutex); + if (_current_size > 0) + --_current_size; } } @@ -181,18 +186,14 @@ void ConnectionPoolManager::configure(int max_size, int idle_timeout_secs) { } void ConnectionPoolManager::closePools() { - std::vector> pools_to_close; - { - std::lock_guard lock(_manager_mutex); - for (auto& [conn_str, pool] : _pools) { - if (pool) { - pools_to_close.push_back(pool); - } + std::lock_guard lock(_manager_mutex); + // Keep _manager_mutex held for the full close operation so that + // acquireConnection()/returnConnection() cannot create or use pools + // while closePools() is in progress. + for (auto& [conn_str, pool] : _pools) { + if (pool) { + pool->close(); } - _pools.clear(); - } - // Close pools outside _manager_mutex to avoid deadlock. - for (auto& pool : pools_to_close) { - pool->close(); } + _pools.clear(); } diff --git a/tests/test_009_pooling.py b/tests/test_009_pooling.py index a8e7d94a..d5b34346 100644 --- a/tests/test_009_pooling.py +++ b/tests/test_009_pooling.py @@ -283,16 +283,23 @@ def test_pool_release_overflow_disconnects_outside_mutex(conn_str): When a connection is returned to a pool that is already at max_size, the connection must be disconnected. This exercises the overflow path in - ConnectionPool::release() (connection_pool.cpp lines 107-110) where - should_disconnect is set and disconnect happens outside the mutex. + ConnectionPool::release() (connection_pool.cpp) where should_disconnect + is set and disconnect happens outside the mutex. + + With the current pool semantics, max_size limits total concurrent + connections, so we acquire two connections with max_size=2, then shrink + the pool to max_size=1 before returning them. The second close hits + the overflow path. """ - pooling(max_size=1, idle_timeout=30) + pooling(max_size=2, idle_timeout=30) - # Open two connections — both succeed because the pool issues slots conn1 = connect(conn_str) conn2 = connect(conn_str) - # Close conn1 first — returned to the pool (pool now has 1 idle entry) + # Shrink idle capacity so first close fills the pool and second overflows + pooling(max_size=1, idle_timeout=30) + + # Close conn1 — returned to the pool (pool now has 1 idle entry) conn1.close() # Close conn2 — pool is full (1 idle already), so this connection From def8f2c50ac51501fddc324e09768163c2745554 Mon Sep 17 00:00:00 2001 From: saurabh500 <1623701+saurabh500@users.noreply.github.com> Date: Tue, 7 Apr 2026 00:02:03 +0000 Subject: [PATCH 5/6] Fix disconnect perf test: verify correctness not speedup ratio Disconnect on localhost is sub-millisecond (~0.3ms), so thread scheduling overhead dominates and speedup ratios are meaningless. Changed the test to verify all concurrent disconnects complete without errors or deadlocks instead of asserting a speedup threshold. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- tests/test_021_concurrent_connection_perf.py | 38 ++++---------------- 1 file changed, 7 insertions(+), 31 deletions(-) diff --git a/tests/test_021_concurrent_connection_perf.py b/tests/test_021_concurrent_connection_perf.py index a253a087..0d4d0753 100644 --- a/tests/test_021_concurrent_connection_perf.py +++ b/tests/test_021_concurrent_connection_perf.py @@ -121,32 +121,21 @@ def worker(idx): @pytest.mark.stress def test_concurrent_disconnect_gil_release(perf_conn_str): """ - Verify that concurrent disconnection also releases the GIL. + Verify that concurrent disconnection works correctly with GIL release. Opens N connections serially, then closes them all concurrently. - Wall-clock time for concurrent close should be much less than - N * single-close time. + On localhost, disconnect is sub-millisecond so thread overhead dominates + and speedup ratios are not meaningful. Instead, we verify that all + concurrent disconnects complete without errors or deadlocks. """ NUM_THREADS = 10 - WARMUP_ROUNDS = 2 - BASELINE_ROUNDS = 5 mssql_python.pooling(enabled=False) # warm-up - for _ in range(WARMUP_ROUNDS): + for _ in range(2): _connect_and_close(perf_conn_str) - # baseline: serial close time - close_times = [] - for _ in range(BASELINE_ROUNDS): - conn = connect(perf_conn_str) - start = time.perf_counter() - conn.close() - close_times.append(time.perf_counter() - start) - baseline_close = statistics.median(close_times) - print(f"\n[BASELINE] Single close (median of {BASELINE_ROUNDS}): {baseline_close*1000:.1f} ms") - # open N connections serially connections = [connect(perf_conn_str) for _ in range(NUM_THREADS)] @@ -179,21 +168,8 @@ def close_worker(idx, conn): assert not errors, f"Thread errors: {errors}" assert all(t is not None for t in thread_times), "Some threads did not complete" - serial_estimate = NUM_THREADS * baseline_close - speedup = serial_estimate / wall_time if wall_time > 0 else float("inf") - - print(f"[CONCURRENT] {NUM_THREADS} threads close wall-clock: {wall_time*1000:.1f} ms") - print(f"[SERIAL EST] {NUM_THREADS} × baseline: {serial_estimate*1000:.1f} ms") - print(f"[SPEEDUP] {speedup:.2f}x") - - # Disconnect is typically fast, so the speedup may be less dramatic. - # We use a softer threshold of 1.5x. - assert speedup > 1.5, ( - f"Concurrent disconnects are not running in parallel (speedup={speedup:.2f}x). " - f"This likely indicates the GIL is not being released during SQLDisconnect." - ) - - print(f"[PASSED] GIL release on disconnect verified — {speedup:.1f}x speedup") + print(f"\n[CONCURRENT] {NUM_THREADS} threads close wall-clock: {wall_time*1000:.1f} ms") + print(f"[PASSED] All {NUM_THREADS} concurrent disconnects completed without errors") @pytest.mark.stress From d09dfd69b3cac80ad3e0051099243100d5258500 Mon Sep 17 00:00:00 2001 From: saurabh500 <1623701+saurabh500@users.noreply.github.com> Date: Thu, 9 Apr 2026 18:01:43 -0700 Subject: [PATCH 6/6] Remove unsafe LOG() call in destructor/shutdown disconnect path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit LOG() internally acquires the GIL via py::gil_scoped_acquire, which is unsafe during interpreter shutdown or stack unwinding — it can deadlock or trigger std::terminate(). Replace with a comment explaining why logging is intentionally skipped. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- mssql_python/pybind/connection/connection.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mssql_python/pybind/connection/connection.cpp b/mssql_python/pybind/connection/connection.cpp index 1b5b0045..aaf172b9 100644 --- a/mssql_python/pybind/connection/connection.cpp +++ b/mssql_python/pybind/connection/connection.cpp @@ -164,7 +164,9 @@ void Connection::disconnect() { if (hasGil) { checkError(ret); } else if (!SQL_SUCCEEDED(ret)) { - LOG("SQLDisconnect failed in destructor/shutdown path; ignoring error"); + // Intentionally no LOG() here: LOG() acquires the GIL internally + // via py::gil_scoped_acquire, which is unsafe during interpreter + // shutdown or stack unwinding (can deadlock or call std::terminate). } // triggers SQLFreeHandle via destructor, if last owner _dbcHandle.reset();