diff --git a/mssql_python/pybind/ddbc_bindings.cpp b/mssql_python/pybind/ddbc_bindings.cpp index 5ed7820f..12bab21c 100644 --- a/mssql_python/pybind/ddbc_bindings.cpp +++ b/mssql_python/pybind/ddbc_bindings.cpp @@ -635,9 +635,17 @@ SQLRETURN BindParameters(SQLHANDLE hStmt, const py::list& params, SQLULEN describedSize; SQLSMALLINT describedDigits; SQLSMALLINT nullable; - RETCODE rc = SQLDescribeParam_ptr( - hStmt, static_cast(paramIndex + 1), &describedType, - &describedSize, &describedDigits, &nullable); + // SQLDescribeParam may issue a server round-trip + // (sp_describe_undeclared_parameters). Release the GIL + // around it so in-process Python TCP forwarders can run + // (issue #565 family). + RETCODE rc; + { + py::gil_scoped_release release; + rc = SQLDescribeParam_ptr( + hStmt, static_cast(paramIndex + 1), &describedType, + &describedSize, &describedDigits, &nullable); + } if (!SQL_SUCCEEDED(rc)) { // SQLDescribeParam can fail for generic SELECT statements where // no table column is referenced. Fall back to SQL_VARCHAR as a safe @@ -1384,8 +1392,21 @@ void SqlHandle::free() { return; } - // Handle is valid and not implicitly freed, proceed with normal freeing - SQLFreeHandle_ptr(_type, _handle); + // Handle is valid and not implicitly freed, proceed with normal freeing. + // Release the GIL during the blocking ODBC call (SQLFreeHandle on a STMT + // with an open server-side cursor, or on a DBC, performs network I/O). + // This is critical when the connection is reached through an in-process + // Python TCP forwarder (e.g. paramiko + sshtunnel) - the forwarder + // thread needs the GIL to push bytes, so holding it here deadlocks + // (issue #565). Only release the GIL if it is actually held AND the + // interpreter is not finalizing - gil_scoped_release is unsafe during + // shutdown even if PyGILState_Check() reports the GIL as held. + if (!pythonShuttingDown && PyGILState_Check()) { + py::gil_scoped_release release; + SQLFreeHandle_ptr(_type, _handle); + } else { + SQLFreeHandle_ptr(_type, _handle); + } _handle = nullptr; } } @@ -1400,7 +1421,17 @@ void SqlHandle::close_cursor() { if (!SQLFreeStmt_ptr) { ThrowStdException("SQLFreeStmt function not loaded"); } - SQLRETURN ret = SQLFreeStmt_ptr(_handle, SQL_CLOSE); + // Release the GIL during the blocking SQLFreeStmt(SQL_CLOSE) network + // round-trip; see issue #565 (in-process forwarder deadlock). + // Skip GIL release when the GIL isn't held or the interpreter is + // finalizing - gil_scoped_release is unsafe in shutdown. + SQLRETURN ret; + if (!is_python_finalizing() && PyGILState_Check()) { + py::gil_scoped_release release; + ret = SQLFreeStmt_ptr(_handle, SQL_CLOSE); + } else { + ret = SQLFreeStmt_ptr(_handle, SQL_CLOSE); + } if (ret != SQL_SUCCESS && ret != SQL_SUCCESS_WITH_INFO) { ThrowStdException("SQLFreeStmt(SQL_CLOSE) failed"); } @@ -5695,7 +5726,16 @@ SQLRETURN SQLFreeHandle_wrap(SQLSMALLINT HandleType, SqlHandlePtr Handle) { DriverLoader::getInstance().loadDriver(); // Load the driver } - SQLRETURN ret = SQLFreeHandle_ptr(HandleType, Handle->get()); + // Release the GIL during the blocking SQLFreeHandle network round-trip + // (see issue #565 - in-process Python TCP forwarder deadlock). + // Skip GIL release in shutdown paths where it would crash. + SQLRETURN ret; + if (!is_python_finalizing() && PyGILState_Check()) { + py::gil_scoped_release release; + ret = SQLFreeHandle_ptr(HandleType, Handle->get()); + } else { + ret = SQLFreeHandle_ptr(HandleType, Handle->get()); + } if (!SQL_SUCCEEDED(ret)) { LOG("SQLFreeHandle_wrap: SQLFreeHandle failed with error code - %d", ret); return ret; diff --git a/tests/test_023_ssh_tunnel_gil_release.py b/tests/test_023_ssh_tunnel_gil_release.py index 686570af..8031465e 100644 --- a/tests/test_023_ssh_tunnel_gil_release.py +++ b/tests/test_023_ssh_tunnel_gil_release.py @@ -187,22 +187,119 @@ def _run_forwarded_connect_subprocess() -> int: return 0 +def _run_forwarded_param_query_subprocess() -> int: + """ + Subprocess body for the parametrized-query teardown regression from + issue #565 (latest comment by @jschuba). + + Executes parametrized SELECTs through the in-process forwarder, then + closes the connection. Before the GIL-release fix on the teardown + handle-freeing path, ``conn.close()`` blocks indefinitely inside + ``SQLFreeHandle`` (the server-side cursor opened by the parametrized + SELECT triggers a network round-trip during STMT-handle teardown, + while the GIL is held - starving the in-process forwarder thread). + """ + import mssql_python + from mssql_python import connect + + base = os.environ["DB_CONNECTION_STRING"] + target = _parse_server(base) + if target is None: + print("ERR: could not parse Server=... clause", file=sys.stderr) + return 2 + + fwd_host, fwd_port = _start_forwarder(target) + mssql_python.pooling(enabled=False) + tunneled = _replace_server(base, fwd_host, fwd_port) + + conn = connect(tunneled) + + # Use Connection.execute (same flow as the issue report) so cursors are + # not explicitly closed by the user - the deadlock occurs when conn.close() + # cascades through the still-open server-side cursors. + res = conn.execute("SELECT 1 as result WHERE 1=?", (1,)) + row_qmark = res.fetchall() + + res = conn.execute("SELECT 1 as result WHERE 1=%(parameter)s", {"parameter": 1}) + row_named = res.fetchall() + + # This is the call that hangs in the unfixed binary. + conn.close() + + print(f"OK qmark={row_qmark} named={row_named}", flush=True) + return 0 + + +def _run_forwarded_introspection_subprocess() -> int: + """ + Subprocess body covering the parametrized-query introspection path + (issue #565 family). + + The only actual network round-trip here is ``SQLDescribeParam`` + (``sp_describe_undeclared_parameters``), triggered by executing a + parametrized statement with a ``None`` argument. Before the fix this + call ran with the GIL held and would deadlock when routed through an + in-process Python TCP forwarder. + + ``Connection.getinfo`` and ``conn.autocommit`` are included as local + sanity probes -- per the ODBC spec and MS ODBC driver behavior these + return data cached at login time / driver-side state and do *not* + issue server round-trips, so they should complete instantly with or + without the fix. + """ + import mssql_python + from mssql_python import connect + + base = os.environ["DB_CONNECTION_STRING"] + target = _parse_server(base) + if target is None: + print("ERR: could not parse Server=... clause", file=sys.stderr) + return 2 + + fwd_host, fwd_port = _start_forwarder(target) + mssql_python.pooling(enabled=False) + tunneled = _replace_server(base, fwd_host, fwd_port) + + conn = connect(tunneled) + + # (1) Local sanity probe: SQLGetInfo. Per ODBC spec / MS driver, info + # types like SQL_DBMS_NAME are cached at login time and don't issue a + # round-trip, so this should complete instantly even without any fix. + if hasattr(conn, "getinfo"): + try: + _ = conn.getinfo(17) # SQL_DBMS_NAME + print("[child] getinfo(SQL_DBMS_NAME) OK", flush=True) + except Exception as e: + print(f"[child] getinfo skipped: {type(e).__name__}: {e}", flush=True) + + # (2) The real deadlock path: parametrized execute with a None argument + # forces the driver to issue sp_describe_undeclared_parameters via + # SQLDescribeParam, which is a server round-trip. Without the GIL + # release on SQLDescribeParam this hangs through the in-process forwarder. + res = conn.execute("SELECT ISNULL(?, 42) as result", (None,)) + row_none = res.fetchall() + + # (3) Local sanity probe: SQL_ATTR_AUTOCOMMIT is client-cached and the + # getter does not round-trip; included to lock in current behavior. + _ = conn.autocommit + + conn.close() + + print(f"OK introspection none_param={row_none}", flush=True) + return 0 + + # --------------------------------------------------------------------------- # The actual pytest test. # --------------------------------------------------------------------------- -def test_connect_through_python_tcp_forwarder_does_not_deadlock(): - """ - Regression test for issue #565. - - Routes ``mssql_python.connect()`` through a pure-Python TCP forwarder - in a subprocess and applies a hard watchdog. Before the - connection-attribute GIL-release fix, the subprocess hangs forever - inside ``SQLSetConnectAttr(SQL_ATTR_AUTOCOMMIT, OFF)`` (called by - ``Connection.setautocommit`` immediately after ``SQLDriverConnect`` - returns); with the fix it completes in well under a second. - """ +def _run_subprocess_scenario( + scenario_env_value: str, + expected_marker: bytes, + failure_message: str, +) -> None: + """Shared helper: spawn this file as a subprocess and apply the watchdog.""" base_conn_str = os.getenv("DB_CONNECTION_STRING") if not base_conn_str: pytest.skip("DB_CONNECTION_STRING environment variable not set") @@ -213,10 +310,8 @@ def test_connect_through_python_tcp_forwarder_does_not_deadlock(): env = os.environ.copy() env["DB_CONNECTION_STRING"] = base_conn_str env["PYTHONUNBUFFERED"] = "1" - # Propagate sys.path so the subprocess can find mssql_python even when - # the package is only available via pytest's path manipulation or an - # editable install rooted in the workspace. env["PYTHONPATH"] = os.pathsep.join(sys.path) + env["MSSQL_PYTHON_TEST_565_SCENARIO"] = scenario_env_value proc = subprocess.Popen( [sys.executable, os.path.abspath(__file__)], @@ -229,26 +324,101 @@ def test_connect_through_python_tcp_forwarder_does_not_deadlock(): except subprocess.TimeoutExpired: proc.kill() proc.communicate() - pytest.fail( - f"connect()+SELECT through in-process Python TCP forwarder did " - f"not complete within {WATCHDOG_SECONDS}s — this is the issue " - f"#565 deadlock (GIL held across SQLSetConnectAttr_AUTOCOMMIT)." - ) + pytest.fail(failure_message) assert proc.returncode == 0, ( f"Subprocess exited with {proc.returncode}.\n" f"stdout:\n{out.decode(errors='replace')}\n" f"stderr:\n{err.decode(errors='replace')}" ) - assert b"OK (1,)" in out, ( + assert expected_marker in out, ( f"Unexpected subprocess output.\n" f"stdout:\n{out.decode(errors='replace')}\n" f"stderr:\n{err.decode(errors='replace')}" ) -# Subprocess entry point: when this file is run as a script (by the test -# above), execute the forwarded-connect body. Pytest collection ignores -# this block. +def test_connect_through_python_tcp_forwarder_does_not_deadlock(): + """ + Regression test for issue #565. + + Routes ``mssql_python.connect()`` through a pure-Python TCP forwarder + in a subprocess and applies a hard watchdog. Before the + connection-attribute GIL-release fix, the subprocess hangs forever + inside ``SQLSetConnectAttr(SQL_ATTR_AUTOCOMMIT, OFF)`` (called by + ``Connection.setautocommit`` immediately after ``SQLDriverConnect`` + returns); with the fix it completes in well under a second. + """ + _run_subprocess_scenario( + scenario_env_value="connect", + expected_marker=b"OK (1,)", + failure_message=( + f"connect()+SELECT through in-process Python TCP forwarder did " + f"not complete within {WATCHDOG_SECONDS}s — this is the issue " + f"#565 deadlock (GIL held across SQLSetConnectAttr_AUTOCOMMIT)." + ), + ) + + +def test_param_query_close_through_python_tcp_forwarder_does_not_deadlock(): + """ + Regression test for the parametrized-query teardown variant of issue + #565 (latest comment by @jschuba on the reopened issue). + + Executes parametrized SELECTs through the in-process forwarder and + then calls ``conn.close()``. Before the fix that releases the GIL + around ``SQLFreeHandle`` / ``SQLFreeStmt`` in the handle-teardown + path, ``conn.close()`` deadlocks inside ``SQLFreeHandle(SQL_HANDLE_STMT)`` + because the server-side cursor opened by the parametrized SELECT + triggers a network round-trip during handle teardown while the GIL + is held — starving the in-process forwarder thread. + """ + _run_subprocess_scenario( + scenario_env_value="param_close", + expected_marker=b"OK qmark=[(1,)] named=[(1,)]", + failure_message=( + f"Parametrized-query teardown through in-process Python TCP " + f"forwarder did not complete within {WATCHDOG_SECONDS}s — this " + f"is the issue #565 deadlock variant (GIL held across " + f"SQLFreeHandle/SQLFreeStmt during cursor/connection close)." + ), + ) + + +def test_param_describe_through_python_tcp_forwarder_does_not_deadlock(): + """ + Regression test for the parametrized-query introspection path + (issue #565 family). + + Executing a parametrized statement with a ``None`` argument forces the + MS ODBC driver to issue ``sp_describe_undeclared_parameters`` via + ``SQLDescribeParam``. Before the fix this call was made with the GIL + held and would deadlock when routed through an in-process Python TCP + forwarder. + + The subprocess also exercises ``Connection.getinfo`` and + ``conn.autocommit`` as local sanity probes (both are driver-cached + and should never round-trip). + """ + _run_subprocess_scenario( + scenario_env_value="introspection", + expected_marker=b"OK introspection", + failure_message=( + f"Parametrized-with-None path through in-process Python TCP " + f"forwarder did not complete within {WATCHDOG_SECONDS}s — " + f"this is an issue #565-family deadlock (GIL held across " + f"SQLDescribeParam / sp_describe_undeclared_parameters)." + ), + ) + + +# Subprocess entry point: when this file is run as a script (by the tests +# above), execute the requested scenario. Pytest collection ignores this +# block. if __name__ == "__main__": + scenario = os.environ.get("MSSQL_PYTHON_TEST_565_SCENARIO", "connect") + if scenario == "param_close": + sys.exit(_run_forwarded_param_query_subprocess()) + if scenario == "introspection": + sys.exit(_run_forwarded_introspection_subprocess()) sys.exit(_run_forwarded_connect_subprocess())