diff --git a/planning/changes/active/2026-06-14.04-audit-correctness/change.md b/planning/changes/active/2026-06-14.04-audit-correctness/change.md new file mode 100644 index 0000000..4bdad35 --- /dev/null +++ b/planning/changes/active/2026-06-14.04-audit-correctness/change.md @@ -0,0 +1,40 @@ +--- +status: draft +date: 2026-06-14 +slug: audit-correctness +supersedes: null +superseded_by: null +pr: null +outcome: null +--- + +# Change: Deep-audit correctness + public-API fixes + +**Lane:** full-ish, but the spec is the [2026-06-14 deep audit](../../../audits/2026-06-14-deep-audit.md) +(each finding carries root cause + suggested direction), so this bundle is a +plan-light TDD sweep of the confirmed code-level findings. + +## Goal + +Close the confirmed **correctness** and **public-API** findings from the deep +audit that #62/#63 did not already cover. + +## Fixes + +| # | Finding | File | Fix | +|---|---------|------|-----| +| L1 | RetryBudget token withdrawn before `Retry-After > max_delay` give-up | `retry.py` | evaluate the give-up guard *before* `budget.try_withdraw()`, in both `AsyncRetry` and `Retry` | +| L3 | `_parse_retry_after` `OverflowError` on huge digit string crashes the loop | `retry.py` | broaden guard to `except (ValueError, OverflowError)` | +| Nit1 | `full_jitter_delay` raises `OverflowError` at `attempt_index >= 1024` despite docstring claiming saturation | `_backoff.py` | clamp the exponent (or guard the `**`) so the documented `inf`-saturation/`max_delay` clamp actually holds; fix the docstring to match | +| Nit3 | `_strip_userinfo` emits malformed `http:///path` when authority has creds but no host | `_internal/redaction.py` | when hostname is empty, preserve the original credential-free authority shape instead of a triple-slash URL | +| Nit6 | `_contains_custom_type` uses bare `msgspec.*` at runtime → `NameError` if msgspec absent | `decoders/msgspec.py` | gate behind `is_msgspec_installed` (raise the friendly `ImportError`) so a direct call without the extra fails cleanly | +| Nit2 | `_is_streaming_body_async` doesn't mark sync iterables non-replayable (async invariant rests on an undocumented httpx2 detail) | `_internal/status.py` | symmetrize the async detector to also treat sync iterables as streaming bodies, so the replay guard is explicit rather than relying on httpx2 | +| L2 | `RetryBudget` docstring claims "asyncio-safe" without the blocking caveat | `resilience/budget.py` | qualify the docstring: the `threading.Lock` is correctness-safe but can briefly block the loop thread when shared sync↔async | +| L8 | `middleware/__init__.py` has no `__all__`, leaking 9+ star-import symbols | `middleware/__init__.py` | add an explicit `__all__` of the ten public names (matches sibling subpackages) | + +## Verification + +- [ ] Each fix lands TDD-first (failing test → fix → green); reproducers from the audit drive the tests. +- [ ] Sync/async parity for L1 (both `Retry` and `AsyncRetry`). +- [ ] `just test` — 100% coverage; `just lint` — clean. +- [ ] Grep guard: no new `httpx2._`. diff --git a/src/httpware/_internal/redaction.py b/src/httpware/_internal/redaction.py index 8b4809c..1192d7f 100644 --- a/src/httpware/_internal/redaction.py +++ b/src/httpware/_internal/redaction.py @@ -36,19 +36,35 @@ _REDACTED = "REDACTED" +def _reassemble(scheme: str, netloc: str, path: str, query: str, fragment: str) -> str: + """Like ``urlunsplit``, but avoid the spurious triple-slash for an empty authority. + + ``urlunsplit(("http", "", "/path", ...))`` yields ``http:///path`` for a + netloc-using scheme. When userinfo stripping leaves no host (e.g. + ``http://user:pass@/path``) we want ``http:/path`` (scheme + path), not a + triple-slash. With a non-empty netloc this delegates to ``urlunsplit``, so + normal URLs are byte-identical. + """ + if netloc: + return urlunsplit((scheme, netloc, path, query, fragment)) + tail = path + if query: + tail += "?" + query + if fragment: + tail += "#" + fragment + return f"{scheme}:{tail}" if scheme else tail + + def _strip_userinfo(url: str) -> str: if "@" not in url or "://" not in url: return url parts = urlsplit(url) if parts.username is None and parts.password is None: return url - hostname = parts.hostname or "" - if ":" in hostname: # IPv6 literal — re-wrap in brackets - hostname = f"[{hostname}]" - netloc = hostname - if parts.port is not None: - netloc = f"{netloc}:{parts.port}" - return urlunsplit((parts.scheme, netloc, parts.path, parts.query, parts.fragment)) + # Strip the "user:pass@" prefix from the raw netloc to preserve host:port + # exactly (including IPv6 brackets), rather than reconstructing from parts. + netloc = parts.netloc.split("@", 1)[1] if "@" in parts.netloc else parts.netloc + return _reassemble(parts.scheme, netloc, parts.path, parts.query, parts.fragment) def _mask_component(component: str) -> tuple[str, bool]: @@ -87,7 +103,7 @@ def _mask_query(url: str) -> str: if not changed: return url # common-path guard: nothing sensitive, leave bytes untouched - return urlunsplit((parts.scheme, parts.netloc, parts.path, new_query, new_fragment)) + return _reassemble(parts.scheme, parts.netloc, parts.path, new_query, new_fragment) def redact_url(url: str) -> str: diff --git a/src/httpware/_internal/status.py b/src/httpware/_internal/status.py index f7465f0..0a5507f 100644 --- a/src/httpware/_internal/status.py +++ b/src/httpware/_internal/status.py @@ -29,19 +29,24 @@ def _raise_on_status_error(response: httpx2.Response) -> None: raise exc_class(response) +def _is_replayable_type(value: object) -> bool: + """Return True if value is a replayable type (safe to replay across retry attempts).""" + return isinstance(value, (bytes, bytearray, memoryview, str, dict, list, tuple)) + + def _is_streaming_body_async(value: object) -> bool: - """Return True if value is an async-iterable that cannot be safely replayed for retry.""" + """Return True if value is a non-replayable body (async-iterable or sync non-replayable iterable).""" if value is None: return False - if isinstance(value, (bytes, bytearray, memoryview, str, dict)): + if _is_replayable_type(value): return False - return hasattr(value, "__aiter__") + return hasattr(value, "__aiter__") or hasattr(value, "__iter__") def _is_streaming_body_sync(value: object) -> bool: """Return True if value is a sync iterable body that cannot be safely replayed for retry.""" if value is None: return False - if isinstance(value, (bytes, bytearray, memoryview, str, dict, list, tuple)): + if _is_replayable_type(value): return False return hasattr(value, "__iter__") diff --git a/src/httpware/decoders/msgspec.py b/src/httpware/decoders/msgspec.py index 4c6c66e..5043309 100644 --- a/src/httpware/decoders/msgspec.py +++ b/src/httpware/decoders/msgspec.py @@ -26,6 +26,8 @@ def _contains_custom_type(info: "msgspec.inspect.Type") -> bool: makes the walk both correct (a Struct is a valid target) and safe against infinite recursion on self-referential struct definitions. """ + if not import_checker.is_msgspec_installed: + raise ImportError(MISSING_DEPENDENCY_MESSAGE) if isinstance(info, msgspec.inspect.CustomType): return True for name in dir(info): diff --git a/src/httpware/middleware/__init__.py b/src/httpware/middleware/__init__.py index 4920854..80cbbd5 100644 --- a/src/httpware/middleware/__init__.py +++ b/src/httpware/middleware/__init__.py @@ -11,6 +11,19 @@ import httpx2 +__all__ = [ + "AsyncMiddleware", + "AsyncNext", + "Middleware", + "Next", + "after_response", + "async_after_response", + "async_before_request", + "async_on_error", + "before_request", + "on_error", +] + AsyncNext: TypeAlias = Callable[[httpx2.Request], Awaitable[httpx2.Response]] diff --git a/src/httpware/middleware/resilience/_backoff.py b/src/httpware/middleware/resilience/_backoff.py index f9050ca..3c7d8af 100644 --- a/src/httpware/middleware/resilience/_backoff.py +++ b/src/httpware/middleware/resilience/_backoff.py @@ -17,10 +17,13 @@ def full_jitter_delay( `attempt_index` is 0 for the first retry, 1 for the second, etc. - Uses ``2.0 **`` (float exponentiation) rather than ``2 **`` so that - ``attempt_index >= 1024`` saturates to ``math.inf`` and ``min`` clamps to - ``max_delay`` — ``2 ** 1024`` would raise ``OverflowError`` during the - int→float conversion. + For large ``attempt_index`` (>= 1024), ``2.0 ** attempt_index`` raises + ``OverflowError``. That is caught and the ceiling is clamped directly to + ``max_delay``, which is exactly what ``min`` would produce for an infinite + exponentiation result. """ - ceiling = min(max_delay, base_delay * (2.0**attempt_index)) + try: + ceiling = min(max_delay, base_delay * (2.0**attempt_index)) + except OverflowError: + ceiling = max_delay return _random_uniform(0.0, ceiling) diff --git a/src/httpware/middleware/resilience/budget.py b/src/httpware/middleware/resilience/budget.py index 965fdf4..71347ef 100644 --- a/src/httpware/middleware/resilience/budget.py +++ b/src/httpware/middleware/resilience/budget.py @@ -2,10 +2,14 @@ See planning/specs/2026-06-05-retry-and-retry-budget-design.md for the contract. -Thread-safe and asyncio-safe: all mutations go through a threading.Lock. -A single RetryBudget instance is safe to share across threads, across -coroutines on one event loop, and across (sync Client, AsyncClient) pairs -in the same process. +Thread-safe and asyncio-safe: all mutations go through a threading.Lock, +which ensures no torn state across concurrent accesses. When a RetryBudget +is shared between a sync Client (pool thread) and an AsyncClient (event-loop +thread), a sync thread holding the lock can briefly block the loop thread's +acquisition; the critical section (purge + append/compare) is intentionally +tiny to bound this latency. Safe to share across threads, across coroutines +on one event loop, and across (sync Client, AsyncClient) pairs in the same +process. """ import math diff --git a/src/httpware/middleware/resilience/retry.py b/src/httpware/middleware/resilience/retry.py index 802c7d9..7a74cec 100644 --- a/src/httpware/middleware/resilience/retry.py +++ b/src/httpware/middleware/resilience/retry.py @@ -58,7 +58,7 @@ def _parse_retry_after(value: str) -> float | None: """Parse a Retry-After header value. Returns None on malformed input.""" try: return max(0.0, float(int(value))) # clamp: negative integers are malformed servers - except ValueError: + except (ValueError, OverflowError): pass try: parsed = email.utils.parsedate_to_datetime(value) @@ -159,6 +159,24 @@ async def __call__(self, request: httpx2.Request, next: AsyncNext) -> httpx2.Res ) raise last_exc + retry_after: float | None = None + if self.respect_retry_after and last_response is not None: + header = last_response.headers.get("Retry-After") + if header is not None: + retry_after = _parse_retry_after(header) + + if retry_after is not None and retry_after > self.max_delay: + if last_exc is None: # pragma: no cover — retry_after requires last_response which requires last_exc + msg = "AsyncRetry: retry_after path reached with no last_exc" + raise AssertionError(msg) + last_exc.add_note( + _RETRY_AFTER_EXCEEDS_MAX_DELAY_NOTE.format( + retry_after=retry_after, + max_delay=self.max_delay, + ), + ) + raise last_exc + if not self.budget.try_withdraw(): _emit_event( _LOGGER, @@ -178,23 +196,6 @@ async def __call__(self, request: httpx2.Request, next: AsyncNext) -> httpx2.Res attempts=attempt + 1, ) from last_exc - retry_after: float | None = None - if self.respect_retry_after and last_response is not None: - header = last_response.headers.get("Retry-After") - if header is not None: - retry_after = _parse_retry_after(header) - - if retry_after is not None and retry_after > self.max_delay: - if last_exc is None: # pragma: no cover — retry_after requires last_response which requires last_exc - msg = "AsyncRetry: retry_after path reached with no last_exc" - raise AssertionError(msg) - last_exc.add_note( - _RETRY_AFTER_EXCEEDS_MAX_DELAY_NOTE.format( - retry_after=retry_after, - max_delay=self.max_delay, - ), - ) - raise last_exc if retry_after is not None: delay = retry_after else: @@ -297,6 +298,24 @@ def __call__(self, request: httpx2.Request, next: Next) -> httpx2.Response: # n ) raise last_exc + retry_after: float | None = None + if self.respect_retry_after and last_response is not None: + header = last_response.headers.get("Retry-After") + if header is not None: + retry_after = _parse_retry_after(header) + + if retry_after is not None and retry_after > self.max_delay: + if last_exc is None: # pragma: no cover — retry_after requires last_response which requires last_exc + msg = "Retry: retry_after path reached with no last_exc" + raise AssertionError(msg) + last_exc.add_note( + _RETRY_AFTER_EXCEEDS_MAX_DELAY_NOTE.format( + retry_after=retry_after, + max_delay=self.max_delay, + ), + ) + raise last_exc + if not self.budget.try_withdraw(): _emit_event( _LOGGER, @@ -316,23 +335,6 @@ def __call__(self, request: httpx2.Request, next: Next) -> httpx2.Response: # n attempts=attempt + 1, ) from last_exc - retry_after: float | None = None - if self.respect_retry_after and last_response is not None: - header = last_response.headers.get("Retry-After") - if header is not None: - retry_after = _parse_retry_after(header) - - if retry_after is not None and retry_after > self.max_delay: - if last_exc is None: # pragma: no cover — retry_after requires last_response which requires last_exc - msg = "Retry: retry_after path reached with no last_exc" - raise AssertionError(msg) - last_exc.add_note( - _RETRY_AFTER_EXCEEDS_MAX_DELAY_NOTE.format( - retry_after=retry_after, - max_delay=self.max_delay, - ), - ) - raise last_exc if retry_after is not None: delay = retry_after else: diff --git a/tests/test_backoff.py b/tests/test_backoff.py index 5f3167a..7fff231 100644 --- a/tests/test_backoff.py +++ b/tests/test_backoff.py @@ -33,3 +33,20 @@ def test_full_jitter_delay_uses_injected_random() -> None: _random_uniform=lambda _lo, hi: hi, ) assert delay == BASE_DELAY + + +def test_full_jitter_delay_large_attempt_index_does_not_raise() -> None: + """attempt_index >= 1024 must not raise OverflowError; result must be in [0, max_delay].""" + delay = full_jitter_delay(2000, base_delay=BASE_DELAY, max_delay=MAX_DELAY) + assert 0.0 <= delay <= MAX_DELAY + + +def test_full_jitter_delay_large_attempt_index_clamped_to_max_delay() -> None: + """With a deterministic _random_uniform that returns the ceiling, it must return exactly max_delay.""" + delay = full_jitter_delay( + 2000, + base_delay=BASE_DELAY, + max_delay=MAX_DELAY, + _random_uniform=lambda _lo, hi: hi, + ) + assert delay == MAX_DELAY diff --git a/tests/test_decoders_msgspec.py b/tests/test_decoders_msgspec.py index f71ccd4..899cf26 100644 --- a/tests/test_decoders_msgspec.py +++ b/tests/test_decoders_msgspec.py @@ -12,7 +12,7 @@ from httpware import AsyncClient, DecodeError from httpware._internal import import_checker from httpware.decoders import ResponseDecoder -from httpware.decoders.msgspec import MsgspecDecoder +from httpware.decoders.msgspec import MsgspecDecoder, _contains_custom_type class _Item(msgspec.Struct): @@ -230,3 +230,12 @@ def test_msgspec_can_decode_unhashable_model_does_not_raise() -> None: decoder._can_decode_results = MagicMock() # noqa: SLF001 decoder._can_decode_results.get.side_effect = TypeError("unhashable type") # noqa: SLF001 assert decoder.can_decode(_Item) is True + + +def test_contains_custom_type_raises_import_error_when_msgspec_absent( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """When msgspec is absent, _contains_custom_type must raise ImportError, not NameError.""" + monkeypatch.setattr(import_checker, "is_msgspec_installed", False) + with pytest.raises(ImportError, match="MsgspecDecoder requires"): + _contains_custom_type(None) # ty: ignore[invalid-argument-type] -- guard fires before isinstance checks diff --git a/tests/test_public_api.py b/tests/test_public_api.py index c1b8f2e..94b27ae 100644 --- a/tests/test_public_api.py +++ b/tests/test_public_api.py @@ -1,6 +1,7 @@ """Public API surface — what `from httpware import ...` exposes.""" import httpware +import httpware.middleware def test_all_exports_resolve() -> None: @@ -82,3 +83,26 @@ def test_expected_exports() -> None: def test_missing_decoder_error_exported() -> None: assert "MissingDecoderError" in httpware.__all__ assert httpware.MissingDecoderError.__module__ == "httpware.errors" + + +def test_middleware_module_all_contains_exactly_ten_public_names() -> None: + """httpware.middleware.__all__ must list the 10 public protocol/decorator names only.""" + expected = { + "AsyncMiddleware", + "AsyncNext", + "Middleware", + "Next", + "after_response", + "async_after_response", + "async_before_request", + "async_on_error", + "before_request", + "on_error", + } + assert set(httpware.middleware.__all__) == expected + + +def test_middleware_module_all_does_not_leak_internals() -> None: + """httpware.middleware.__all__ must not expose imported helpers or submodules.""" + leaked = {"httpx2", "Protocol", "Callable", "Awaitable", "TypeAlias", "runtime_checkable", "chain", "resilience"} + assert not leaked & set(httpware.middleware.__all__) diff --git a/tests/test_redaction.py b/tests/test_redaction.py index 4d250c5..012f9c3 100644 --- a/tests/test_redaction.py +++ b/tests/test_redaction.py @@ -58,3 +58,28 @@ def test_redact_url_masks_whitespace_padded_key() -> None: result = redact_url("https://example.test/p?%20api_key=topsecret") assert "topsecret" not in result assert "REDACTED" in result + + +def test_strip_userinfo_no_hostname_does_not_produce_triple_slash() -> None: + """http://user:pass@/path must strip creds without yielding http:///path.""" + result = redact_url("http://user:pass@/path") + assert "user" not in result + assert "pass" not in result + assert ":///" not in result # must not be triple-slash + assert result == "http:/path" # no authority, just scheme + path + + +def test_redact_url_at_sign_in_path_without_userinfo_is_unchanged() -> None: + """An `@` in the path (not credentials) leaves the URL untouched.""" + assert redact_url("https://example.test/@handle?page=2") == "https://example.test/@handle?page=2" + + +def test_strip_userinfo_no_hostname_preserves_query_and_fragment() -> None: + """The no-authority reconstruction keeps the query (masked) and fragment, with no triple-slash.""" + result = redact_url("http://user:pass@/path?api_key=secret#section") + assert ":///" not in result + assert "user" not in result + assert "pass" not in result + assert "secret" not in result + assert "api_key=REDACTED" in result + assert "#section" in result diff --git a/tests/test_retry.py b/tests/test_retry.py index 70f6790..e2edc01 100644 --- a/tests/test_retry.py +++ b/tests/test_retry.py @@ -465,6 +465,30 @@ async def streamed_files() -> typing.AsyncIterator[bytes]: assert _is_streaming_body(streamed_files()) is True +def test_is_streaming_body_async_true_for_sync_generator() -> None: + """_is_streaming_body_async must return True for sync generators — they're non-replayable.""" + + def sync_gen() -> typing.Iterator[bytes]: + yield b"x" # pragma: no cover + + assert _is_streaming_body(sync_gen()) is True + + +def test_is_streaming_body_async_false_for_bytes() -> None: + """_is_streaming_body_async must return False for bytes (replayable).""" + assert _is_streaming_body(b"bytes") is False + + +def test_is_streaming_body_async_false_for_list() -> None: + """_is_streaming_body_async must return False for list (replayable).""" + assert _is_streaming_body([b"chunk1", b"chunk2"]) is False + + +def test_is_streaming_body_async_false_for_tuple() -> None: + """_is_streaming_body_async must return False for tuple (replayable).""" + assert _is_streaming_body((b"chunk1", b"chunk2")) is False + + async def test_retry_refuses_streamed_body_request() -> None: """AsyncRetry must not replay a request with a streaming body — re-raise with a PEP-678 note. @@ -689,6 +713,42 @@ async def test_retry_event_url_attribute_masks_query_secret(caplog: pytest.LogCa assert "api_key=REDACTED" in giving_up[0].url # ty: ignore[unresolved-attribute] +async def test_retry_after_exceeds_max_delay_does_not_consume_budget_token() -> None: + """When Retry-After > max_delay, give up without consuming a budget token.""" + sleeper = _SleepRecorder() + budget = RetryBudget(ttl=10.0, min_retries_per_sec=10.0, percent_can_retry=0.2) + handler = _ResponseSequenceWithHeaders( + [ + (HTTPStatus.SERVICE_UNAVAILABLE, {"Retry-After": "9999"}), + (HTTPStatus.OK, {}), # never reached + ] + ) + client = _client( + handler, + retry=AsyncRetry(_sleep=sleeper, budget=budget, base_delay=0.01, max_delay=2.5), + ) + with pytest.raises(ServiceUnavailableError): + await client.get("https://example.test/x") + # Retry-After > max_delay: give up without spending a budget token. + assert len(budget._withdrawn) == 0 # noqa: SLF001 — implementation-detail access for invariant + + +async def test_retry_after_huge_digit_string_does_not_crash() -> None: + """A very large Retry-After integer string must fall back to jitter, not crash with OverflowError.""" + sleeper = _SleepRecorder() + handler = _ResponseSequenceWithHeaders( + [ + (HTTPStatus.SERVICE_UNAVAILABLE, {"Retry-After": "9" * 400}), + (HTTPStatus.OK, {}), + ] + ) + client = _client(handler, retry=AsyncRetry(_sleep=sleeper, base_delay=0.01, max_delay=0.05)) + response = await client.get("https://example.test/x") + assert response.status_code == HTTPStatus.OK + assert len(sleeper.calls) == 1 + assert 0.0 <= sleeper.calls[0] <= 0.05 # noqa: PLR2004 — 0.05 matches max_delay literal above + + async def test_method_ineligible_with_streaming_body_does_not_attach_streaming_note() -> None: """POST with a streaming body that gets a 503 raises ServiceUnavailableError WITHOUT the streaming-note. diff --git a/tests/test_retry_sync.py b/tests/test_retry_sync.py index fe0803f..ffe796b 100644 --- a/tests/test_retry_sync.py +++ b/tests/test_retry_sync.py @@ -563,6 +563,42 @@ def test_retry_event_url_attribute_masks_query_secret_sync(caplog: pytest.LogCap assert "api_key=REDACTED" in giving_up[0].url # ty: ignore[unresolved-attribute] +def test_retry_after_exceeds_max_delay_does_not_consume_budget_token_sync() -> None: + """When Retry-After > max_delay, give up without consuming a budget token (sync).""" + sleeper = _SleepRecorder() + budget = RetryBudget(ttl=10.0, min_retries_per_sec=10.0, percent_can_retry=0.2) + handler = _ResponseSequenceWithHeaders( + [ + (HTTPStatus.SERVICE_UNAVAILABLE, {"Retry-After": "9999"}), + (HTTPStatus.OK, {}), # never reached + ] + ) + client = _client( + handler, + retry=Retry(_sleep=sleeper, budget=budget, base_delay=0.01, max_delay=2.5), + ) + with pytest.raises(ServiceUnavailableError): + client.get("https://example.test/x") + # Retry-After > max_delay: give up without spending a budget token. + assert len(budget._withdrawn) == 0 # noqa: SLF001 — implementation-detail access for invariant + + +def test_retry_after_huge_digit_string_does_not_crash_sync() -> None: + """A very large Retry-After integer string must fall back to jitter, not crash (sync).""" + sleeper = _SleepRecorder() + handler = _ResponseSequenceWithHeaders( + [ + (HTTPStatus.SERVICE_UNAVAILABLE, {"Retry-After": "9" * 400}), + (HTTPStatus.OK, {}), + ] + ) + client = _client(handler, retry=Retry(_sleep=sleeper, base_delay=0.01, max_delay=0.05)) + response = client.get("https://example.test/x") + assert response.status_code == HTTPStatus.OK + assert len(sleeper.calls) == 1 + assert 0.0 <= sleeper.calls[0] <= 0.05 # noqa: PLR2004 — 0.05 matches max_delay literal above + + def test_method_ineligible_with_streaming_body_does_not_attach_streaming_note() -> None: """POST with a streaming body that gets a 503 raises WITHOUT the streaming-note (sync).