diff --git a/examples/ablation/diagnostics/agent_loop_deepseek_auto_rewrites_20260702.md b/examples/ablation/diagnostics/agent_loop_deepseek_auto_rewrites_20260702.md index 17347cd..e2e9760 100644 --- a/examples/ablation/diagnostics/agent_loop_deepseek_auto_rewrites_20260702.md +++ b/examples/ablation/diagnostics/agent_loop_deepseek_auto_rewrites_20260702.md @@ -22,7 +22,34 @@ | 91711 | no | yes | turn 1 / call 1 | | 237373 | no | yes | turn 1 / call 1 | +## 50-Query Follow-Up + +After merging the wider `deep_search` evidence controls and auto-run rewrites, a fresh DeepSeek 50-query run reached `25/50`, up from the prior `23/50`. + +| Metric | Prior 50-query run | Post rewrite run | +| --- | ---: | ---: | +| Reach | 23/50 | 25/50 | +| Mean turns | 4.14 | 4.00 | +| Mean tool calls | 5.78 | 6.08 | +| Mean first relevant turn | 1.70 | 1.16 | +| Empty tool calls | 11 | 9 | + +Net new hits: + +- `1101278` - do prince harry and william have last names +- `293992` - how many product lines does coca cola have +- `208145` - how bicycle tire tubes are sized +- `14151` - age requirements for name change +- `91711` - child psychiatrist salary 2016 + +Regressed in this live run: + +- `178627` - effects of detox juice cleanse +- `45924` - average temperatures las vegas by month +- `208494` - how big do newfypoo's get + +The live run also exposed a follow-up issue: DeepSeek sometimes appends a process word to the source phrase, e.g. `how is soil created from rocks weathering`. A source-cleanup rule strips trailing process words such as `weathering` before producing rewrites. With that cleanup, `237373` again reaches the gold document at turn 1 / call 1 in a targeted live smoke. + ## Interpretation Prompt-visible hints alone were not enough: DeepSeek often generated nearby but non-gold rewrites. Running the deterministic rewrite hints inside `deep_search` removes that planning variance for cheap, bounded patterns such as dropping noisy numeric years and rewriting "created from" process questions into answer-shaped phrases. - diff --git a/src/synaptic/agent_tools.py b/src/synaptic/agent_tools.py index c03dda0..ed8419d 100644 --- a/src/synaptic/agent_tools.py +++ b/src/synaptic/agent_tools.py @@ -55,6 +55,18 @@ r"(?Pcreated|made|formed|produced)\s+from\s+(?P.+)", re.IGNORECASE, ) +_PROCESS_TRAILING_WORDS = { + "breakdown", + "created", + "formation", + "formed", + "forming", + "made", + "process", + "processes", + "produced", + "weathering", +} # --- Shared result shape --- @@ -168,8 +180,7 @@ def add(candidate: str, reason: str) -> None: process = _PROCESS_FROM_RE.search(query) if process: subject = process.group("subject") - source = process.group("source") - source_singular = source[:-1] if source.lower().endswith("s") else source + source_singular = _normalise_process_source(process.group("source")) add( f"making {subject} {source_singular} pieces", "process questions often use answer-text verbs like making/forming rather than created", @@ -182,6 +193,18 @@ def add(candidate: str, reason: str) -> None: return hints[:3] +def _normalise_process_source(source: str) -> str: + tokens = source.strip(" ?.!").split() + while len(tokens) > 1 and tokens[-1].lower() in _PROCESS_TRAILING_WORDS: + tokens.pop() + if not tokens: + return "" + last = tokens[-1] + if last.lower().endswith("s") and not last.lower().endswith("ss"): + tokens[-1] = last[:-1] + return " ".join(tokens) + + def _node_to_summary( node: Node, *, diff --git a/tests/test_agent_tools.py b/tests/test_agent_tools.py index 9ccabb4..1f09191 100644 --- a/tests/test_agent_tools.py +++ b/tests/test_agent_tools.py @@ -214,6 +214,22 @@ def test_query_rewrite_hints_process_from_question(): assert "small pieces of rock form soil" in queries +def test_query_rewrite_hints_strip_trailing_process_words(): + hints = _query_rewrite_hints("how is soil created from rocks weathering process") + queries = [h.args["query"] for h in hints] + + assert "making soil rock pieces" in queries + assert "making soil rocks weathering process pieces" not in queries + + +def test_query_rewrite_hints_preserve_non_plural_ss_source(): + hints = _query_rewrite_hints("how is policy created from class") + queries = [h.args["query"] for h in hints] + + assert "making policy class pieces" in queries + assert "making policy clas pieces" not in queries + + @pytest.mark.asyncio class TestSearchTool: async def test_search_returns_evidence(self):