diff --git a/examples/ablation/diagnostics/agent_loop_deepseek_auto_rewrites_20260702.md b/examples/ablation/diagnostics/agent_loop_deepseek_auto_rewrites_20260702.md index 92cf786..744e775 100644 --- a/examples/ablation/diagnostics/agent_loop_deepseek_auto_rewrites_20260702.md +++ b/examples/ablation/diagnostics/agent_loop_deepseek_auto_rewrites_20260702.md @@ -58,6 +58,16 @@ The remaining high-call miss `54544` (`blood diseases that are sexually transmit | --- | --- | --- | ---: | --- | | 54544 | blood diseases that are sexually transmitted | sexual blood borne transmission routes | 1 | reach=yes, first relevant turn 1 / call 1 | +## Answer-Shaped Rewrite Follow-Up + +Current deterministic `deep_search` also showed that several misses were caused by answer-page phrasing rather than graph traversal failure. Bounded rewrites now preserve the query entity while switching to the wording commonly used in the relevant passage. + +| QID | Original query | Auto rewrite examples | Gold rank in `deep_search` | DeepSeek targeted smoke | +| --- | --- | --- | ---: | --- | +| 319564 | how much fiber is in carrots | one cup carrots grams fiber; one cup cooked carrots grams fiber | 3 | reach=yes, first relevant turn 1 / call 1 | +| 155234 | do bigger tires affect gas mileage | tire size factors influence gas mileage; tire width versus gas mileage | 1 | reach=yes, first relevant turn 1 / call 1 | +| 208145 | how bicycle tire tubes are sized | bicycle tire tube size sidewall ETRTO metric imperial; bicycle tire sidewall tube size printed raised numbers | 1 | reach=yes, first relevant turn 1 / call 1 | + ## Interpretation Prompt-visible hints alone were not enough: DeepSeek often generated nearby but non-gold rewrites. Running the deterministic rewrite hints inside `deep_search` removes that planning variance for cheap, bounded patterns such as dropping noisy numeric years and rewriting "created from" process questions into answer-shaped phrases. diff --git a/src/synaptic/agent_tools.py b/src/synaptic/agent_tools.py index 3417d5e..ef39ebd 100644 --- a/src/synaptic/agent_tools.py +++ b/src/synaptic/agent_tools.py @@ -67,6 +67,29 @@ r"\b(?:diseases?|infections?|stds?|stis?)\b", re.IGNORECASE, ) +_FIBER_IN_RE = re.compile( + r"\bhow\s+much\s+fiber\s+(?:is|are)\s+in\s+(?P.+)", + re.IGNORECASE, +) +_FIBER_CONTENT_IN_RE = re.compile( + r"\bfiber\s+content\s+(?:in|of)\s+(?P.+)", + re.IGNORECASE, +) +_FIBER_TRAILING_WORDS = {"fiber", "content", "gram", "grams", "per", "serving", "servings"} +_TIRE_GAS_RE = re.compile( + r"\b(?:tires?|tyres?)\b.*\b(?:gas\s+mileage|fuel\s+economy)\b" + r"|\b(?:gas\s+mileage|fuel\s+economy)\b.*\b(?:tires?|tyres?)\b", + re.IGNORECASE, +) +_TIRE_SIZE_CONTEXT_RE = re.compile( + r"\b(?:bigger|larger|smaller|wider|narrower|width|size|sized|diameter)\b", + re.IGNORECASE, +) +_BICYCLE_TUBE_SIZE_RE = re.compile( + r"\b(?:bicycle|bike)\b.*\b(?:tires?|tyres?)\b.*\btubes?\b.*\b(?:sized?|sizing|sizes?)\b" + r"|\b(?:sized?|sizing|sizes?)\b.*\b(?:bicycle|bike)\b.*\b(?:tires?|tyres?)\b.*\btubes?\b", + re.IGNORECASE, +) _PROCESS_TRAILING_WORDS = { "breakdown", "created", @@ -212,6 +235,39 @@ def add(candidate: str, reason: str) -> None: "medical pages often describe this as sexual and blood-borne transmission rather than blood diseases", ) + fiber = _FIBER_IN_RE.search(query) or _FIBER_CONTENT_IN_RE.search(query) + if fiber: + food = _normalise_food_rewrite_tail(fiber.group("food")) + if food: + add( + f"one cup {food} grams fiber", + "nutrition answers often state fiber per cup and in grams rather than repeating the question wording", + ) + add( + f"one cup cooked {food} grams fiber", + "vegetable nutrition pages often report cooked serving sizes with grams of fiber", + ) + + if _TIRE_GAS_RE.search(query) and _TIRE_SIZE_CONTEXT_RE.search(query): + add( + "tire size factors influence gas mileage", + "vehicle-efficiency pages often describe tire size/width as factors that influence gas mileage", + ) + add( + "tire width versus gas mileage", + "retry with the answer-heading phrasing used by tire efficiency pages", + ) + + if _BICYCLE_TUBE_SIZE_RE.search(query): + add( + "bicycle tire tube size sidewall ETRTO metric imperial", + "bike tube sizing pages often point to sidewall numbers and ETRTO/metric/imperial size labels", + ) + add( + "bicycle tire sidewall tube size printed raised numbers", + "retry with the answer-text phrase that says tube sizes are printed on the tire sidewall", + ) + return hints[:3] @@ -227,6 +283,17 @@ def _normalise_process_source(source: str) -> str: return " ".join(tokens) +def _normalise_rewrite_tail(value: str) -> str: + return " ".join(value.strip(" ?.!").split()) + + +def _normalise_food_rewrite_tail(value: str) -> str: + tokens = _normalise_rewrite_tail(value).split() + while tokens and tokens[-1].lower() in _FIBER_TRAILING_WORDS: + tokens.pop() + return " ".join(tokens) + + def _node_to_summary( node: Node, *, diff --git a/tests/test_agent_tools.py b/tests/test_agent_tools.py index cfb27a1..8198491 100644 --- a/tests/test_agent_tools.py +++ b/tests/test_agent_tools.py @@ -281,6 +281,66 @@ def test_query_rewrite_hints_blood_sexual_avoids_blood_measure_context(query): assert "sexual blood borne transmission routes" not in queries +def test_query_rewrite_hints_fiber_serving_size_terms(): + hints = _query_rewrite_hints("how much fiber is in carrots") + queries = [h.args["query"] for h in hints] + + assert "one cup carrots grams fiber" in queries + assert "one cup cooked carrots grams fiber" in queries + + +@pytest.mark.parametrize( + "query", + [ + "fiber content in carrots", + "fiber content in carrots grams", + ], +) +def test_query_rewrite_hints_fiber_content_terms(query): + hints = _query_rewrite_hints(query) + queries = [h.args["query"] for h in hints] + + assert "one cup carrots grams fiber" in queries + assert "one cup cooked carrots grams fiber" in queries + + +def test_query_rewrite_hints_tire_gas_mileage_terms(): + hints = _query_rewrite_hints("do bigger tires affect gas mileage") + queries = [h.args["query"] for h in hints] + + assert "tire size factors influence gas mileage" in queries + assert "tire width versus gas mileage" in queries + + +def test_query_rewrite_hints_tire_gas_mileage_requires_tire_terms(): + hints = _query_rewrite_hints("does driving fast affect gas mileage") + queries = [h.args["query"] for h in hints] + + assert "tire size factors influence gas mileage" not in queries + + +def test_query_rewrite_hints_tire_gas_mileage_requires_size_context(): + hints = _query_rewrite_hints("does driving fast affect gas mileage when you have winter tires") + queries = [h.args["query"] for h in hints] + + assert "tire size factors influence gas mileage" not in queries + + +def test_query_rewrite_hints_bicycle_tube_size_terms(): + hints = _query_rewrite_hints("how bicycle tire tubes are sized") + queries = [h.args["query"] for h in hints] + + assert "bicycle tire tube size sidewall ETRTO metric imperial" in queries + assert "bicycle tire sidewall tube size printed raised numbers" in queries + + +def test_query_rewrite_hints_bicycle_tube_size_requires_tube_terms(): + hints = _query_rewrite_hints("how bicycle tires are sized") + queries = [h.args["query"] for h in hints] + + assert "bicycle tire tube size sidewall ETRTO metric imperial" not in queries + + @pytest.mark.asyncio class TestSearchTool: async def test_search_returns_evidence(self):