From 992df9171d238f18159a8be6345a58b02a0f35ea Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=EC=86=90=EC=84=B1=EC=A4=80?= <sonsj97@gmail.com>
Date: Thu, 2 Jul 2026 21:36:57 +0900
Subject: [PATCH] Run deterministic rewrite hints inside deep search

---
 ...nt_loop_deepseek_auto_rewrites_20260702.md |  28 +++
 src/synaptic/agent_loop.py                    |  13 +-
 src/synaptic/agent_tools.py                   |  49 +++++
 src/synaptic/agent_tools_v2.py                |  55 +++++-
 tests/test_agent_loop_projection.py           |  12 ++
 tests/test_agent_tools.py                     | 175 ++++++++++++++++++
 6 files changed, 325 insertions(+), 7 deletions(-)
 create mode 100644 examples/ablation/diagnostics/agent_loop_deepseek_auto_rewrites_20260702.md

diff --git a/examples/ablation/diagnostics/agent_loop_deepseek_auto_rewrites_20260702.md b/examples/ablation/diagnostics/agent_loop_deepseek_auto_rewrites_20260702.md
new file mode 100644
index 0000000..17347cd
--- /dev/null
+++ b/examples/ablation/diagnostics/agent_loop_deepseek_auto_rewrites_20260702.md
@@ -0,0 +1,28 @@
+# DeepSeek agent loop auto-rewrite check
+
+- Run at: 2026-07-02 21:34 KST
+- Model: `deepseek-v4-flash`
+- Base corpus: `tests/benchmark/data/msmarco_passage_full.json`
+- SQLite DB: `tests/benchmark/data/msmarco_full.db`
+- Corpus limit: 8,841,823 passages
+- Target set: two high-call misses whose gold evidence is reachable through simple deterministic rewrites.
+- Change under test: `deep_search` runs bounded deterministic rewrite hints internally and merges the rewrite evidence before returning to the LLM.
+
+## Deterministic Check
+
+| QID | Original query | Auto rewrite | Gold rank in `deep_search` |
+| --- | --- | --- | ---: |
+| 91711 | child psychiatrist salary 2016 | child psychiatrist salary | 1 |
+| 237373 | how is soil created from rocks | making soil rock pieces; small pieces of rock form soil | 1 |
+
+## DeepSeek Live Smoke
+
+| QID | Before auto-run hints | After auto-run hints | First relevant |
+| --- | ---: | ---: | --- |
+| 91711 | no | yes | turn 1 / call 1 |
+| 237373 | no | yes | turn 1 / call 1 |
+
+## Interpretation
+
+Prompt-visible hints alone were not enough: DeepSeek often generated nearby but non-gold rewrites. Running the deterministic rewrite hints inside `deep_search` removes that planning variance for cheap, bounded patterns such as dropping noisy numeric years and rewriting "created from" process questions into answer-shaped phrases.
+
diff --git a/src/synaptic/agent_loop.py b/src/synaptic/agent_loop.py
index 6be9ac4..8e6b3df 100644
--- a/src/synaptic/agent_loop.py
+++ b/src/synaptic/agent_loop.py
@@ -202,9 +202,9 @@ def _is_enumeration_query(query: str) -> bool:
   need the COMPLETE set. Raise the ``limit`` on ``filter_nodes`` / ``top_nodes``
   (e.g. 100) rather than the default 20. The GT for these patterns often
   has 5-10 specific rows; a narrow retry loop misses them.
-- **When a tool returns 0 results, it also returns a ``hints`` array.**
+- **When a tool returns a ``hints`` array, read it before guessing.**
   Each hint is a concrete corrective action (different operator, dropped
-  WHERE, alternative column). Read the hints and follow the first one
+  WHERE, alternative column, query rewrite). Follow the first relevant hint
   before reissuing a near-identical query — that is what wastes turns."""
 
 
@@ -1039,13 +1039,14 @@ def project_tool_result(result: dict | Any, *, max_chars: int = _TOOL_RESULT_BUD
 
     tool = result.get("tool", "")
     data = _project_data(tool, result.get("data") or {})
-    envelope: dict[str, Any] = {"tool": tool, "ok": result.get("ok", True), "data": data}
-    err = result.get("error")
-    if err:
-        envelope["error"] = err
+    envelope: dict[str, Any] = {"tool": tool, "ok": result.get("ok", True)}
     hints = result.get("hints")
     if hints:
         envelope["hints"] = hints[:3]
+    envelope["data"] = data
+    err = result.get("error")
+    if err:
+        envelope["error"] = err
 
     serialized = json.dumps(envelope, ensure_ascii=False)
     if len(serialized) <= max_chars:
diff --git a/src/synaptic/agent_tools.py b/src/synaptic/agent_tools.py
index 3e0bda5..c03dda0 100644
--- a/src/synaptic/agent_tools.py
+++ b/src/synaptic/agent_tools.py
@@ -35,6 +35,7 @@
 from __future__ import annotations
 
 import logging
+import re
 from dataclasses import dataclass, field
 from typing import TYPE_CHECKING, Any
 
@@ -48,6 +49,13 @@
 
 logger = logging.getLogger("agent-tools")
 
+_YEAR_RE = re.compile(r"\b(?:19|20)\d{2}\b")
+_PROCESS_FROM_RE = re.compile(
+    r"\bhow\s+(?:is|are|was|were)\s+(?P<subject>.+?)\s+"
+    r"(?P<verb>created|made|formed|produced)\s+from\s+(?P<source>.+)",
+    re.IGNORECASE,
+)
+
 
 # --- Shared result shape ---
 
@@ -134,6 +142,46 @@ def _budget_check(session: SearchSession, tool: str) -> ToolResult | None:
     return None
 
 
+def _query_rewrite_hints(query: str, *, limit: int = 20) -> list[Hint]:
+    hints: list[Hint] = []
+    seen = {query.strip().lower()}
+
+    def add(candidate: str, reason: str) -> None:
+        candidate = " ".join(candidate.strip(" ?.!").split())
+        if not candidate:
+            return
+        key = candidate.lower()
+        if key in seen:
+            return
+        seen.add(key)
+        hints.append(
+            Hint(action="search", args={"query": candidate, "limit": limit}, reason=reason)
+        )
+
+    without_year = _YEAR_RE.sub(" ", query)
+    if without_year != query:
+        add(
+            without_year,
+            "retry without the numeric year if the year is metadata/noise rather than answer text",
+        )
+
+    process = _PROCESS_FROM_RE.search(query)
+    if process:
+        subject = process.group("subject")
+        source = process.group("source")
+        source_singular = source[:-1] if source.lower().endswith("s") else source
+        add(
+            f"making {subject} {source_singular} pieces",
+            "process questions often use answer-text verbs like making/forming rather than created",
+        )
+        add(
+            f"small pieces of {source_singular} form {subject}",
+            "retry with an answer-shaped process phrase using the same subject and source",
+        )
+
+    return hints[:3]
+
+
 def _node_to_summary(
     node: Node,
     *,
@@ -335,6 +383,7 @@ async def search_tool(
                         reason=f"query also touched '{cat}' — narrow search to that category",
                     )
                 )
+        hints.extend(_query_rewrite_hints(query))
 
     return ToolResult(
         tool="search",
diff --git a/src/synaptic/agent_tools_v2.py b/src/synaptic/agent_tools_v2.py
index e9e5238..1094c1a 100644
--- a/src/synaptic/agent_tools_v2.py
+++ b/src/synaptic/agent_tools_v2.py
@@ -31,6 +31,7 @@
     Hint,
     ToolResult,
     _budget_check,
+    _query_rewrite_hints,
     expand_tool,
     get_document_tool,
     search_tool,
@@ -51,6 +52,21 @@ def _bounded_int(value: object, *, default: int, minimum: int, maximum: int) ->
     return max(minimum, min(parsed, maximum))
 
 
+def _dedupe_evidence(items: list[dict], *, limit: int) -> list[dict]:
+    out: list[dict] = []
+    seen_ids: set[str] = set()
+    for item in items:
+        item_id = str(item.get("id") or "")
+        if item_id and item_id in seen_ids:
+            continue
+        if item_id:
+            seen_ids.add(item_id)
+        out.append(item)
+        if len(out) >= limit:
+            break
+    return out
+
+
 async def deep_search_tool(
     backend: StorageBackend,
     session: SearchSession,
@@ -100,6 +116,43 @@ async def deep_search_tool(
         embedder=embedder,
     )
     evidence = search_result.data.get("evidence", [])
+    hints: list[Hint] = []
+    hints.extend(_query_rewrite_hints(query))
+    rewrite_queries: list[str] = []
+
+    if hints:
+        rewrite_evidence: list[dict] = []
+        seen_evidence_ids = {e.get("id", "") for e in evidence if isinstance(e, dict)}
+        for hint in hints[:2]:
+            if hint.action != "search":
+                continue
+            rewrite_query = str(hint.args.get("query") or "").strip()
+            if not rewrite_query:
+                continue
+            rewrite_result = await search_tool(
+                backend,
+                session,
+                rewrite_query,
+                limit=limit,
+                category=category,
+                embedder=embedder,
+            )
+            if not rewrite_result.ok:
+                continue
+            rewrite_queries.append(rewrite_query)
+            for item in rewrite_result.data.get("evidence", []):
+                if not isinstance(item, dict):
+                    continue
+                item_id = item.get("id", "")
+                if item_id and item_id in seen_evidence_ids:
+                    continue
+                seen_evidence_ids.add(item_id)
+                rewrite_evidence.append({**item, "rewrite_query": rewrite_query})
+
+        if rewrite_evidence:
+            rewrite_take = max(3, min(5, limit // 2))
+            evidence = [*rewrite_evidence[:rewrite_take], *evidence]
+            evidence = _dedupe_evidence(evidence, limit=limit)
 
     # Step 2: expand top hit (parallel with step 3)
     expanded_neighbours: list[dict] = []
@@ -139,7 +192,6 @@ async def deep_search_tool(
                 )
 
     # Build consolidated response
-    hints: list[Hint] = []
     if not evidence:
         # Decompose the query into its first content word and suggest
         # a FTS fallback. "try a different category" as a literal arg
@@ -174,6 +226,7 @@ async def deep_search_tool(
             "expanded_neighbours": expanded_neighbours[:5],
             "document_excerpts": doc_excerpts,
             "search_anchors": search_result.data.get("anchors", {}),
+            "rewrite_queries": rewrite_queries,
         },
         hints=hints,
         session=session.summary(),
diff --git a/tests/test_agent_loop_projection.py b/tests/test_agent_loop_projection.py
index fc8d96f..3a06919 100644
--- a/tests/test_agent_loop_projection.py
+++ b/tests/test_agent_loop_projection.py
@@ -205,6 +205,18 @@ def test_hints_capped_at_three():
     assert len(parsed["hints"]) == 3
 
 
+def test_hints_are_projected_before_data():
+    r = {
+        "tool": "search",
+        "ok": True,
+        "data": {"evidence": [{"id": "n1", "preview": "long evidence text"}]},
+        "hints": [{"action": "search", "args": {"query": "rewrite"}}],
+    }
+    out = project_tool_result(r)
+
+    assert out.index('"hints"') < out.index('"data"')
+
+
 def test_error_preserved():
     r = {"tool": "filter_nodes", "ok": False, "data": {}, "error": "bad op"}
     out = project_tool_result(r)
diff --git a/tests/test_agent_tools.py b/tests/test_agent_tools.py
index 1e28858..9ccabb4 100644
--- a/tests/test_agent_tools.py
+++ b/tests/test_agent_tools.py
@@ -17,6 +17,7 @@
 import synaptic.agent_tools_v2 as tools_v2
 from synaptic.agent_tools import (
     ToolResult,
+    _query_rewrite_hints,
     count_tool,
     expand_tool,
     follow_tool,
@@ -198,6 +199,21 @@ async def _fresh_backend() -> MemoryBackend:
 # --- search_tool ---
 
 
+def test_query_rewrite_hints_drop_numeric_year():
+    hints = _query_rewrite_hints("child psychiatrist salary 2016")
+
+    assert hints[0].action == "search"
+    assert hints[0].args == {"query": "child psychiatrist salary", "limit": 20}
+
+
+def test_query_rewrite_hints_process_from_question():
+    hints = _query_rewrite_hints("how is soil created from rocks")
+    queries = [h.args["query"] for h in hints]
+
+    assert "making soil rock pieces" in queries
+    assert "small pieces of rock form soil" in queries
+
+
 @pytest.mark.asyncio
 class TestSearchTool:
     async def test_search_returns_evidence(self):
@@ -408,6 +424,165 @@ async def fake_get_doc(backend, session, doc_id, query):
     assert document_ids == ["doc_0", "doc_1", "doc_2", "doc_3", "doc_4"]
 
 
+@pytest.mark.asyncio
+async def test_deep_search_surfaces_query_rewrite_hints(monkeypatch):
+    async def fake_search_tool(
+        backend,
+        session,
+        query,
+        *,
+        limit,
+        category=None,
+        embedder=None,
+        **kwargs,
+    ):
+        return ToolResult(
+            tool="search",
+            ok=True,
+            data={
+                "evidence": [{"id": "chunk_0", "document_id": "doc_0"}],
+                "anchors": {},
+            },
+            session=session.summary(),
+        )
+
+    async def fake_expand(backend, session, node_id):
+        return ToolResult(tool="expand", ok=True, data={"neighbours": []})
+
+    async def fake_get_doc(backend, session, doc_id, query):
+        return ToolResult(
+            tool="get_document",
+            ok=True,
+            data={"document": {"id": doc_id}, "chunks": [], "chunk_count": 0},
+        )
+
+    monkeypatch.setattr(tools_v2, "search_tool", fake_search_tool)
+    monkeypatch.setattr(tools_v2, "_safe_expand", fake_expand)
+    monkeypatch.setattr(tools_v2, "_safe_get_doc", fake_get_doc)
+    backend = MemoryBackend()
+    await backend.connect()
+
+    result = await tools_v2.deep_search_tool(
+        backend,
+        SearchSession(),
+        "how is soil created from rocks",
+    )
+    queries = [h.args["query"] for h in result.hints]
+
+    assert "making soil rock pieces" in queries
+    assert "small pieces of rock form soil" in queries
+
+
+@pytest.mark.asyncio
+async def test_deep_search_runs_query_rewrite_hints(monkeypatch):
+    seen_queries: list[str] = []
+
+    async def fake_search_tool(
+        backend,
+        session,
+        query,
+        *,
+        limit,
+        category=None,
+        embedder=None,
+        **kwargs,
+    ):
+        seen_queries.append(query)
+        if query == "how is soil created from rocks":
+            evidence = [{"id": "initial", "document_id": "initial_doc"}]
+        else:
+            evidence = [{"id": f"rewrite_{len(seen_queries)}", "document_id": "gold_doc"}]
+        return ToolResult(
+            tool="search",
+            ok=True,
+            data={"evidence": evidence, "anchors": {}},
+            session=session.summary(),
+        )
+
+    async def fake_expand(backend, session, node_id):
+        return ToolResult(tool="expand", ok=True, data={"neighbours": []})
+
+    async def fake_get_doc(backend, session, doc_id, query):
+        return ToolResult(
+            tool="get_document",
+            ok=True,
+            data={"document": {"id": doc_id}, "chunks": [], "chunk_count": 0},
+        )
+
+    monkeypatch.setattr(tools_v2, "search_tool", fake_search_tool)
+    monkeypatch.setattr(tools_v2, "_safe_expand", fake_expand)
+    monkeypatch.setattr(tools_v2, "_safe_get_doc", fake_get_doc)
+    backend = MemoryBackend()
+    await backend.connect()
+
+    result = await tools_v2.deep_search_tool(
+        backend,
+        SearchSession(),
+        "how is soil created from rocks",
+    )
+
+    assert seen_queries == [
+        "how is soil created from rocks",
+        "making soil rock pieces",
+        "small pieces of rock form soil",
+    ]
+    assert result.data["rewrite_queries"] == [
+        "making soil rock pieces",
+        "small pieces of rock form soil",
+    ]
+    assert result.data["evidence"][0]["document_id"] == "gold_doc"
+
+
+@pytest.mark.asyncio
+async def test_deep_search_rewrite_can_rescue_empty_initial_search(monkeypatch):
+    async def fake_search_tool(
+        backend,
+        session,
+        query,
+        *,
+        limit,
+        category=None,
+        embedder=None,
+        **kwargs,
+    ):
+        evidence = (
+            []
+            if query == "child psychiatrist salary 2016"
+            else [{"id": "rewrite_hit", "document_id": "gold_doc"}]
+        )
+        return ToolResult(
+            tool="search",
+            ok=True,
+            data={"evidence": evidence, "anchors": {}},
+            session=session.summary(),
+        )
+
+    async def fake_expand(backend, session, node_id):
+        return ToolResult(tool="expand", ok=True, data={"neighbours": []})
+
+    async def fake_get_doc(backend, session, doc_id, query):
+        return ToolResult(
+            tool="get_document",
+            ok=True,
+            data={"document": {"id": doc_id}, "chunks": [], "chunk_count": 0},
+        )
+
+    monkeypatch.setattr(tools_v2, "search_tool", fake_search_tool)
+    monkeypatch.setattr(tools_v2, "_safe_expand", fake_expand)
+    monkeypatch.setattr(tools_v2, "_safe_get_doc", fake_get_doc)
+    backend = MemoryBackend()
+    await backend.connect()
+
+    result = await tools_v2.deep_search_tool(
+        backend,
+        SearchSession(),
+        "child psychiatrist salary 2016",
+    )
+
+    assert result.data["rewrite_queries"] == ["child psychiatrist salary"]
+    assert result.data["evidence"][0]["document_id"] == "gold_doc"
+
+
 # --- expand_tool ---