Skip to content

Commit faf3b25

Browse files
Guard TEXT binds with embedded NULs at the dbapi layer
dqlite's wire encodes TEXT as NUL-terminated UTF-8, so a Python str with embedded "\x00" cannot round-trip. The wire encoder already rejects with EncodeError (which cursor._call_client wraps as DataError), so the failure was already in the dbapi.Error hierarchy — but the wire-internal diagnostic talked about "null-terminated encoding would lose data", not the actionable workaround. Stdlib sqlite3 accepts the same bind faithfully (sqlite3_bind_text stores the bytes as-is; only sqlite3_column_text truncates at the first NUL on read-back, while sqlite3_column_blob round-trips). So cross-driver code that stores NUL-containing payloads in TEXT works silently on stdlib and raises on dqlite — and the operator reading the wire-internal diagnostic had no breadcrumb pointing at the BLOB column / bytes-bind workaround. Add a pre-encode guard at _convert_bind_param that raises DataError with a diagnostic naming the offset of the offending NUL and the canonical BLOB workaround. Runs after the adapter / __conform__ chain so an adapter that produces a NUL-bearing str is also caught at the adapter site rather than escaping into the wire encoder. Defence-in-depth: the wire-layer rejection stays in place. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent fef8c60 commit faf3b25

2 files changed

Lines changed: 128 additions & 0 deletions

File tree

src/dqlitedbapi/types.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1029,6 +1029,19 @@ def _convert_bind_param(value: Any) -> Any:
10291029
the isinstance built-in arm fires when no exact-type adapter
10301030
matches, then chains the result (the adapter-output chain above)
10311031
if the produced value is itself a datetime/date/time.
1032+
1033+
**TEXT binds with embedded NUL bytes are rejected.** dqlite's
1034+
wire protocol encodes TEXT as NUL-terminated UTF-8, so an
1035+
embedded ``"\\x00"`` cannot round-trip. The dbapi layer raises
1036+
:class:`~dqlitedbapi.exceptions.DataError` (in the ``dbapi.Error``
1037+
hierarchy) BEFORE handing the value to the wire encoder, with a
1038+
diagnostic naming the BLOB workaround. Diverges from stdlib
1039+
``sqlite3``, whose ``sqlite3_bind_text`` accepts embedded NULs
1040+
(TEXT readback via ``sqlite3_column_text`` truncates at the
1041+
first NUL, but the value round-trips faithfully via
1042+
``sqlite3_column_blob``). Cross-driver code targeting both
1043+
stdlib and dqlite must bind NUL-containing payloads as
1044+
``bytes`` / ``memoryview`` on a BLOB column.
10321045
"""
10331046
# User-registered adapter takes precedence. ``type(value)`` not
10341047
# isinstance: stdlib's contract is exact-class match (subclasses
@@ -1118,4 +1131,27 @@ def _convert_bind_param(value: Any) -> Any:
11181131
f"only int / float / str / bytes / bytearray / memoryview / "
11191132
f"bool / None. Register an adapter that returns one of those."
11201133
)
1134+
# Pre-wire NUL guard: dqlite's wire TEXT is NUL-terminated UTF-8
1135+
# (see ``dqlitewire.encode_text``), so embedded NULs cannot
1136+
# round-trip. The wire encoder already rejects with
1137+
# ``EncodeError``, which ``cursor._call_client`` wraps as
1138+
# ``DataError`` — but the wire-layer diagnostic mentions the
1139+
# null-terminated-encoding internal mechanic, not the canonical
1140+
# workaround. Reject at the dbapi layer with a message naming
1141+
# the BLOB workaround so cross-driver operators porting from
1142+
# stdlib ``sqlite3`` (which stores embedded NULs in TEXT
1143+
# faithfully via ``sqlite3_bind_text``) see the actionable hint
1144+
# rather than the wire-internal message. Defence-in-depth: the
1145+
# wire-layer rejection stays in place. Runs AFTER the adapter /
1146+
# ``__conform__`` chain so an adapter that produces a NUL-bearing
1147+
# ``str`` is also caught.
1148+
if isinstance(value, str) and "\x00" in value:
1149+
raise DataError(
1150+
f"TEXT bind with embedded NUL at offset "
1151+
f"{value.index(chr(0))} rejected by dqlite wire "
1152+
f"(NUL-terminated UTF-8). Cross-driver divergence from "
1153+
f"stdlib sqlite3 which preserves NULs in TEXT. Use a "
1154+
f"BLOB column (bind bytes/memoryview) to round-trip "
1155+
f"NUL-containing data."
1156+
)
11211157
return value
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
"""Pin: ``_convert_bind_param`` rejects ``str`` bind values containing
2+
embedded NUL bytes with a ``DataError`` naming the BLOB workaround,
3+
BEFORE the wire encoder runs.
4+
5+
dqlite's wire protocol encodes TEXT as NUL-terminated UTF-8, so an
6+
embedded ``"\\x00"`` cannot round-trip. The wire encoder
7+
(``dqlitewire.encode_text``) already rejects with ``EncodeError``,
8+
which the cursor's ``_call_client`` wraps as ``DataError`` — but the
9+
wire-internal diagnostic doesn't point at the actionable workaround
10+
(use a BLOB column / bind ``bytes``).
11+
12+
Stdlib ``sqlite3.sqlite3_bind_text`` accepts embedded NULs faithfully
13+
(``sqlite3_column_text`` truncates at the first NUL on read-back, but
14+
``sqlite3_column_blob`` round-trips), so cross-driver code that stores
15+
NUL-containing TEXT silently works against stdlib but raises against
16+
dqlite. Surface a discoverable dbapi-layer diagnostic so operators
17+
porting from stdlib see the BLOB hint immediately.
18+
19+
The bytes path is unaffected — bind ``bytes`` / ``memoryview`` against
20+
a BLOB column to round-trip NUL-containing data.
21+
"""
22+
23+
from __future__ import annotations
24+
25+
import pytest
26+
27+
import dqlitedbapi
28+
from dqlitedbapi.exceptions import DataError
29+
from dqlitedbapi.types import _convert_bind_param
30+
31+
32+
def test_text_with_embedded_nul_rejected_with_blob_hint() -> None:
33+
"""The dbapi-layer pre-encode guard fires and names the BLOB
34+
workaround. Catches both the canonical 'embedded NUL' phrasing
35+
and the actionable 'BLOB' workaround hint so operators porting
36+
from stdlib see the path forward without walking ``__cause__``.
37+
"""
38+
with pytest.raises(DataError) as excinfo:
39+
_convert_bind_param("hello\x00world")
40+
message = str(excinfo.value)
41+
assert "embedded NUL" in message
42+
assert "BLOB" in message
43+
# The offset is named so operators can identify the byte in a
44+
# mixed-binary payload.
45+
assert "offset 5" in message
46+
47+
48+
def test_text_nul_rejection_is_in_dbapi_error_hierarchy() -> None:
49+
"""The rejection inherits from ``dbapi.Error`` so cross-driver
50+
code's ``except dbapi.Error:`` arm catches uniformly."""
51+
with pytest.raises(dqlitedbapi.Error):
52+
_convert_bind_param("\x00")
53+
54+
55+
def test_bytes_with_embedded_nul_unaffected() -> None:
56+
"""The BLOB / bytes path round-trips NUL-containing data — this
57+
is the canonical workaround the diagnostic points at. Pin the
58+
bytes pass-through so the guard cannot regress to a blanket
59+
reject.
60+
"""
61+
payload = b"hello\x00world"
62+
assert _convert_bind_param(payload) is payload
63+
payload_ba = bytearray(b"hello\x00world")
64+
assert _convert_bind_param(payload_ba) is payload_ba
65+
payload_mv = memoryview(b"hello\x00world")
66+
assert _convert_bind_param(payload_mv) is payload_mv
67+
68+
69+
def test_text_without_nul_unaffected() -> None:
70+
"""Plain TEXT binds (no embedded NUL) pass through unchanged."""
71+
assert _convert_bind_param("hello world") == "hello world"
72+
assert _convert_bind_param("") == ""
73+
assert _convert_bind_param("unicode: 日本語") == "unicode: 日本語"
74+
75+
76+
def test_adapter_producing_str_with_nul_also_rejected() -> None:
77+
"""The guard runs AFTER the adapter / ``__conform__`` chain so an
78+
adapter that legitimately produces a NUL-bearing ``str`` (e.g. a
79+
caller's misregistered serialiser) is also caught with the same
80+
actionable diagnostic, rather than escaping past the adapter site
81+
into the wire encoder.
82+
"""
83+
84+
class _NeedsAdapter:
85+
pass
86+
87+
dqlitedbapi.register_adapter(_NeedsAdapter, lambda _v: "leading\x00trailing")
88+
try:
89+
with pytest.raises(DataError, match="embedded NUL"):
90+
_convert_bind_param(_NeedsAdapter())
91+
finally:
92+
dqlitedbapi.unregister_adapter(_NeedsAdapter)

0 commit comments

Comments
 (0)