diff --git a/examples/ablation/diagnostics/agent_loop_deepseek_auto_rewrites_20260702.md b/examples/ablation/diagnostics/agent_loop_deepseek_auto_rewrites_20260702.md index e2e9760..92cf786 100644 --- a/examples/ablation/diagnostics/agent_loop_deepseek_auto_rewrites_20260702.md +++ b/examples/ablation/diagnostics/agent_loop_deepseek_auto_rewrites_20260702.md @@ -50,6 +50,14 @@ Regressed in this live run: The live run also exposed a follow-up issue: DeepSeek sometimes appends a process word to the source phrase, e.g. `how is soil created from rocks weathering`. A source-cleanup rule strips trailing process words such as `weathering` before producing rewrites. With that cleanup, `237373` again reaches the gold document at turn 1 / call 1 in a targeted live smoke. +## Blood-Borne Transmission Follow-Up + +The remaining high-call miss `54544` (`blood diseases that are sexually transmitted`) used a lay phrase while the gold document uses `STI`, `blood borne infection`, and `sexual and blood borne transmission routes`. A bounded medical synonym rewrite now fires only when the query contains blood, sexually transmitted/sexual wording, and a disease/infection term. + +| QID | Original query | Auto rewrite | Gold rank in `deep_search` | DeepSeek targeted smoke | +| --- | --- | --- | ---: | --- | +| 54544 | blood diseases that are sexually transmitted | sexual blood borne transmission routes | 1 | reach=yes, first relevant turn 1 / call 1 | + ## Interpretation Prompt-visible hints alone were not enough: DeepSeek often generated nearby but non-gold rewrites. Running the deterministic rewrite hints inside `deep_search` removes that planning variance for cheap, bounded patterns such as dropping noisy numeric years and rewriting "created from" process questions into answer-shaped phrases. diff --git a/src/synaptic/agent_tools.py b/src/synaptic/agent_tools.py index ed8419d..3417d5e 100644 --- a/src/synaptic/agent_tools.py +++ b/src/synaptic/agent_tools.py @@ -55,6 +55,18 @@ r"(?Pcreated|made|formed|produced)\s+from\s+(?P.+)", re.IGNORECASE, ) +_BLOOD_RE = re.compile( + r"\b(?:bloodborne|blood-borne|blood(?![\s-]+(?:pressure|sugar|glucose|tests?)\b))\b", + re.IGNORECASE, +) +_SEXUAL_TRANSMISSION_RE = re.compile( + r"\b(?:sexually\s+transmitted|sexual(?:ly)?\s+transmission|stds?|stis?)\b", + re.IGNORECASE, +) +_DISEASE_OR_INFECTION_RE = re.compile( + r"\b(?:diseases?|infections?|stds?|stis?)\b", + re.IGNORECASE, +) _PROCESS_TRAILING_WORDS = { "breakdown", "created", @@ -190,6 +202,16 @@ def add(candidate: str, reason: str) -> None: "retry with an answer-shaped process phrase using the same subject and source", ) + if ( + _BLOOD_RE.search(query) + and _SEXUAL_TRANSMISSION_RE.search(query) + and _DISEASE_OR_INFECTION_RE.search(query) + ): + add( + "sexual blood borne transmission routes", + "medical pages often describe this as sexual and blood-borne transmission rather than blood diseases", + ) + return hints[:3] diff --git a/tests/test_agent_tools.py b/tests/test_agent_tools.py index 1f09191..cfb27a1 100644 --- a/tests/test_agent_tools.py +++ b/tests/test_agent_tools.py @@ -230,6 +230,57 @@ def test_query_rewrite_hints_preserve_non_plural_ss_source(): assert "making policy clas pieces" not in queries +def test_query_rewrite_hints_blood_sexual_infection_terms(): + hints = _query_rewrite_hints("blood diseases that are sexually transmitted") + queries = [h.args["query"] for h in hints] + + assert "sexual blood borne transmission routes" in queries + + +@pytest.mark.parametrize( + "query", + [ + "bloodborne infection sexual transmission", + "STI blood transmission", + "sexually transmitted blood infection", + ], +) +def test_query_rewrite_hints_blood_sexual_infection_variants(query): + hints = _query_rewrite_hints(query) + queries = [h.args["query"] for h in hints] + + assert "sexual blood borne transmission routes" in queries + + +def test_query_rewrite_hints_blood_sexual_requires_disease_terms(): + hints = _query_rewrite_hints("blood pressure changes during sexual activity") + queries = [h.args["query"] for h in hints] + + assert "sexual blood borne transmission routes" not in queries + + +def test_query_rewrite_hints_blood_sexual_avoids_non_transmission_context(): + hints = _query_rewrite_hints("blood pressure medication sexual dysfunction infection risk") + queries = [h.args["query"] for h in hints] + + assert "sexual blood borne transmission routes" not in queries + + +@pytest.mark.parametrize( + "query", + [ + "blood pressure and sexually transmitted infection risk", + "blood test for sexually transmitted diseases", + "blood sugar and STD infection symptoms", + ], +) +def test_query_rewrite_hints_blood_sexual_avoids_blood_measure_context(query): + hints = _query_rewrite_hints(query) + queries = [h.args["query"] for h in hints] + + assert "sexual blood borne transmission routes" not in queries + + @pytest.mark.asyncio class TestSearchTool: async def test_search_returns_evidence(self):