Skip to content
41 changes: 35 additions & 6 deletions mssql_python/pybind/connection/connection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,16 @@ void Connection::connect(const py::dict& attrs_before) {
#else
connStrPtr = const_cast<SQLWCHAR*>(_connStr.c_str());
#endif
SQLRETURN ret = SQLDriverConnect_ptr(_dbcHandle->get(), nullptr, connStrPtr, SQL_NTS, nullptr,
0, nullptr, SQL_DRIVER_NOPROMPT);
SQLRETURN ret;
{
// Release the GIL during the blocking ODBC connect call.
// SQLDriverConnect involves DNS resolution, TCP handshake, TLS negotiation,
// and SQL Server authentication — all pure I/O that doesn't need the GIL.
// This allows other Python threads to run concurrently.
py::gil_scoped_release release;
ret = SQLDriverConnect_ptr(_dbcHandle->get(), nullptr, connStrPtr, SQL_NTS, nullptr,
0, nullptr, SQL_DRIVER_NOPROMPT);
}
checkError(ret);
updateLastUsed();
}
Expand All @@ -95,6 +103,11 @@ void Connection::disconnect() {
if (_dbcHandle) {
LOG("Disconnecting from database");

// Check if we hold the GIL so we can conditionally release it.
// The GIL is held when called from pybind11-bound methods but may NOT
// be held in destructor paths (C++ shared_ptr ref-count drop, shutdown).
bool hasGil = PyGILState_Check() != 0;

// CRITICAL FIX: Mark all child statement handles as implicitly freed
// When we free the DBC handle below, the ODBC driver will automatically free
// all child STMT handles. We need to tell the SqlHandle objects about this
Expand Down Expand Up @@ -135,8 +148,26 @@ void Connection::disconnect() {
_allocationsSinceCompaction = 0;
} // Release lock before potentially slow SQLDisconnect call

SQLRETURN ret = SQLDisconnect_ptr(_dbcHandle->get());
checkError(ret);
SQLRETURN ret;
if (hasGil) {
// Release the GIL during the blocking ODBC disconnect call.
// This allows other Python threads to run while the network
// round-trip completes.
py::gil_scoped_release release;
ret = SQLDisconnect_ptr(_dbcHandle->get());
} else {
// Destructor / shutdown path — GIL is not held, call directly.
ret = SQLDisconnect_ptr(_dbcHandle->get());
}
// In destructor/shutdown paths, suppress errors to avoid
// std::terminate() if this throws during stack unwinding.
if (hasGil) {
checkError(ret);
} else if (!SQL_SUCCEEDED(ret)) {
// Intentionally no LOG() here: LOG() acquires the GIL internally
// via py::gil_scoped_acquire, which is unsafe during interpreter
// shutdown or stack unwinding (can deadlock or call std::terminate).
}
// triggers SQLFreeHandle via destructor, if last owner
_dbcHandle.reset();
} else {
Expand Down Expand Up @@ -375,7 +406,6 @@ bool Connection::reset() {
(SQLPOINTER)SQL_RESET_CONNECTION_YES, SQL_IS_INTEGER);
if (!SQL_SUCCEEDED(ret)) {
LOG("Failed to reset connection (ret=%d). Marking as dead.", ret);
disconnect();
return false;
}

Expand All @@ -387,7 +417,6 @@ bool Connection::reset() {
(SQLPOINTER)SQL_TXN_READ_COMMITTED, SQL_IS_INTEGER);
if (!SQL_SUCCEEDED(ret)) {
LOG("Failed to reset transaction isolation level (ret=%d). Marking as dead.", ret);
disconnect();
return false;
}

Expand Down
82 changes: 65 additions & 17 deletions mssql_python/pybind/connection/connection_pool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ std::shared_ptr<Connection> ConnectionPool::acquire(const std::wstring& connStr,
const py::dict& attrs_before) {
std::vector<std::shared_ptr<Connection>> to_disconnect;
std::shared_ptr<Connection> valid_conn = nullptr;
bool needs_connect = false;
{
std::lock_guard<std::mutex> lock(_mutex);
auto now = std::chrono::steady_clock::now();
Expand Down Expand Up @@ -57,16 +58,33 @@ std::shared_ptr<Connection> ConnectionPool::acquire(const std::wstring& connStr,
}
}

// Create new connection if none reusable
// Reserve a slot for a new connection if none reusable.
// The actual connect() call happens outside the mutex to avoid
// holding the mutex during the blocking ODBC call (which releases
// the GIL and could otherwise cause a mutex/GIL deadlock).
if (!valid_conn && _current_size < _max_size) {
valid_conn = std::make_shared<Connection>(connStr, true);
valid_conn->connect(attrs_before);
++_current_size;
needs_connect = true;
} else if (!valid_conn) {
throw std::runtime_error("ConnectionPool::acquire: pool size limit reached");
}
}

// Phase 2.5: Connect the new connection outside the mutex.
if (needs_connect) {
try {
valid_conn->connect(attrs_before);
} catch (...) {
// Connect failed — release the reserved slot
{
std::lock_guard<std::mutex> lock(_mutex);
if (_current_size > 0) --_current_size;
}
throw;
}
}

// Phase 3: Disconnect expired/bad connections outside lock
for (auto& conn : to_disconnect) {
try {
Expand All @@ -79,12 +97,25 @@ std::shared_ptr<Connection> ConnectionPool::acquire(const std::wstring& connStr,
}

void ConnectionPool::release(std::shared_ptr<Connection> conn) {
std::lock_guard<std::mutex> lock(_mutex);
if (_pool.size() < _max_size) {
conn->updateLastUsed();
_pool.push_back(conn);
} else {
conn->disconnect();
bool should_disconnect = false;
{
std::lock_guard<std::mutex> lock(_mutex);
if (_pool.size() < _max_size) {
conn->updateLastUsed();
_pool.push_back(conn);
} else {
should_disconnect = true;
}
}
// Disconnect outside the mutex to avoid holding it during the
// blocking ODBC call (which releases the GIL).
if (should_disconnect) {
try {
conn->disconnect();
} catch (const std::exception& ex) {
LOG("ConnectionPool::release: disconnect failed: %s", ex.what());
}
std::lock_guard<std::mutex> lock(_mutex);
if (_current_size > 0)
--_current_size;
}
Expand Down Expand Up @@ -116,21 +147,35 @@ ConnectionPoolManager& ConnectionPoolManager::getInstance() {

std::shared_ptr<Connection> ConnectionPoolManager::acquireConnection(const std::wstring& connStr,
const py::dict& attrs_before) {
std::lock_guard<std::mutex> lock(_manager_mutex);

auto& pool = _pools[connStr];
if (!pool) {
LOG("Creating new connection pool");
pool = std::make_shared<ConnectionPool>(_default_max_size, _default_idle_secs);
std::shared_ptr<ConnectionPool> pool;
{
std::lock_guard<std::mutex> lock(_manager_mutex);
auto& pool_ref = _pools[connStr];
if (!pool_ref) {
LOG("Creating new connection pool");
pool_ref = std::make_shared<ConnectionPool>(_default_max_size, _default_idle_secs);
}
pool = pool_ref;
}
// Call acquire() outside _manager_mutex. acquire() may release the GIL
// during the ODBC connect call; holding _manager_mutex across that would
// create a mutex/GIL lock-ordering deadlock.
return pool->acquire(connStr, attrs_before);
}

void ConnectionPoolManager::returnConnection(const std::wstring& conn_str,
const std::shared_ptr<Connection> conn) {
std::lock_guard<std::mutex> lock(_manager_mutex);
if (_pools.find(conn_str) != _pools.end()) {
_pools[conn_str]->release((conn));
std::shared_ptr<ConnectionPool> pool;
{
std::lock_guard<std::mutex> lock(_manager_mutex);
auto it = _pools.find(conn_str);
if (it != _pools.end()) {
pool = it->second;
}
}
// Call release() outside _manager_mutex to avoid deadlock.
if (pool) {
pool->release(conn);
}
}

Expand All @@ -142,6 +187,9 @@ void ConnectionPoolManager::configure(int max_size, int idle_timeout_secs) {

void ConnectionPoolManager::closePools() {
std::lock_guard<std::mutex> lock(_manager_mutex);
// Keep _manager_mutex held for the full close operation so that
// acquireConnection()/returnConnection() cannot create or use pools
// while closePools() is in progress.
for (auto& [conn_str, pool] : _pools) {
if (pool) {
pool->close();
Expand Down
36 changes: 36 additions & 0 deletions tests/test_009_pooling.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,42 @@ def try_overflow():
c.close()


def test_pool_release_overflow_disconnects_outside_mutex(conn_str):
"""Test that releasing a connection when pool is full disconnects it correctly.

When a connection is returned to a pool that is already at max_size,
the connection must be disconnected. This exercises the overflow path in
ConnectionPool::release() (connection_pool.cpp) where should_disconnect
is set and disconnect happens outside the mutex.

With the current pool semantics, max_size limits total concurrent
connections, so we acquire two connections with max_size=2, then shrink
the pool to max_size=1 before returning them. The second close hits
the overflow path.
"""
pooling(max_size=2, idle_timeout=30)

conn1 = connect(conn_str)
conn2 = connect(conn_str)

# Shrink idle capacity so first close fills the pool and second overflows
pooling(max_size=1, idle_timeout=30)

# Close conn1 — returned to the pool (pool now has 1 idle entry)
conn1.close()

# Close conn2 — pool is full (1 idle already), so this connection
# must be disconnected rather than pooled (overflow path).
conn2.close()

# Verify the pool is still functional
conn3 = connect(conn_str)
cursor = conn3.cursor()
cursor.execute("SELECT 1")
assert cursor.fetchone()[0] == 1
conn3.close()


@pytest.mark.skip("Flaky test - idle timeout behavior needs investigation")
def test_pool_idle_timeout_removes_connections(conn_str):
"""Test that idle_timeout removes connections from the pool after the timeout."""
Expand Down
Loading
Loading