From 992df9171d238f18159a8be6345a58b02a0f35ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EC=86=90=EC=84=B1=EC=A4=80?= Date: Thu, 2 Jul 2026 21:36:57 +0900 Subject: [PATCH] Run deterministic rewrite hints inside deep search --- ...nt_loop_deepseek_auto_rewrites_20260702.md | 28 +++ src/synaptic/agent_loop.py | 13 +- src/synaptic/agent_tools.py | 49 +++++ src/synaptic/agent_tools_v2.py | 55 +++++- tests/test_agent_loop_projection.py | 12 ++ tests/test_agent_tools.py | 175 ++++++++++++++++++ 6 files changed, 325 insertions(+), 7 deletions(-) create mode 100644 examples/ablation/diagnostics/agent_loop_deepseek_auto_rewrites_20260702.md diff --git a/examples/ablation/diagnostics/agent_loop_deepseek_auto_rewrites_20260702.md b/examples/ablation/diagnostics/agent_loop_deepseek_auto_rewrites_20260702.md new file mode 100644 index 0000000..17347cd --- /dev/null +++ b/examples/ablation/diagnostics/agent_loop_deepseek_auto_rewrites_20260702.md @@ -0,0 +1,28 @@ +# DeepSeek agent loop auto-rewrite check + +- Run at: 2026-07-02 21:34 KST +- Model: `deepseek-v4-flash` +- Base corpus: `tests/benchmark/data/msmarco_passage_full.json` +- SQLite DB: `tests/benchmark/data/msmarco_full.db` +- Corpus limit: 8,841,823 passages +- Target set: two high-call misses whose gold evidence is reachable through simple deterministic rewrites. +- Change under test: `deep_search` runs bounded deterministic rewrite hints internally and merges the rewrite evidence before returning to the LLM. + +## Deterministic Check + +| QID | Original query | Auto rewrite | Gold rank in `deep_search` | +| --- | --- | --- | ---: | +| 91711 | child psychiatrist salary 2016 | child psychiatrist salary | 1 | +| 237373 | how is soil created from rocks | making soil rock pieces; small pieces of rock form soil | 1 | + +## DeepSeek Live Smoke + +| QID | Before auto-run hints | After auto-run hints | First relevant | +| --- | ---: | ---: | --- | +| 91711 | no | yes | turn 1 / call 1 | +| 237373 | no | yes | turn 1 / call 1 | + +## Interpretation + +Prompt-visible hints alone were not enough: DeepSeek often generated nearby but non-gold rewrites. Running the deterministic rewrite hints inside `deep_search` removes that planning variance for cheap, bounded patterns such as dropping noisy numeric years and rewriting "created from" process questions into answer-shaped phrases. + diff --git a/src/synaptic/agent_loop.py b/src/synaptic/agent_loop.py index 6be9ac4..8e6b3df 100644 --- a/src/synaptic/agent_loop.py +++ b/src/synaptic/agent_loop.py @@ -202,9 +202,9 @@ def _is_enumeration_query(query: str) -> bool: need the COMPLETE set. Raise the ``limit`` on ``filter_nodes`` / ``top_nodes`` (e.g. 100) rather than the default 20. The GT for these patterns often has 5-10 specific rows; a narrow retry loop misses them. -- **When a tool returns 0 results, it also returns a ``hints`` array.** +- **When a tool returns a ``hints`` array, read it before guessing.** Each hint is a concrete corrective action (different operator, dropped - WHERE, alternative column). Read the hints and follow the first one + WHERE, alternative column, query rewrite). Follow the first relevant hint before reissuing a near-identical query — that is what wastes turns.""" @@ -1039,13 +1039,14 @@ def project_tool_result(result: dict | Any, *, max_chars: int = _TOOL_RESULT_BUD tool = result.get("tool", "") data = _project_data(tool, result.get("data") or {}) - envelope: dict[str, Any] = {"tool": tool, "ok": result.get("ok", True), "data": data} - err = result.get("error") - if err: - envelope["error"] = err + envelope: dict[str, Any] = {"tool": tool, "ok": result.get("ok", True)} hints = result.get("hints") if hints: envelope["hints"] = hints[:3] + envelope["data"] = data + err = result.get("error") + if err: + envelope["error"] = err serialized = json.dumps(envelope, ensure_ascii=False) if len(serialized) <= max_chars: diff --git a/src/synaptic/agent_tools.py b/src/synaptic/agent_tools.py index 3e0bda5..c03dda0 100644 --- a/src/synaptic/agent_tools.py +++ b/src/synaptic/agent_tools.py @@ -35,6 +35,7 @@ from __future__ import annotations import logging +import re from dataclasses import dataclass, field from typing import TYPE_CHECKING, Any @@ -48,6 +49,13 @@ logger = logging.getLogger("agent-tools") +_YEAR_RE = re.compile(r"\b(?:19|20)\d{2}\b") +_PROCESS_FROM_RE = re.compile( + r"\bhow\s+(?:is|are|was|were)\s+(?P.+?)\s+" + r"(?Pcreated|made|formed|produced)\s+from\s+(?P.+)", + re.IGNORECASE, +) + # --- Shared result shape --- @@ -134,6 +142,46 @@ def _budget_check(session: SearchSession, tool: str) -> ToolResult | None: return None +def _query_rewrite_hints(query: str, *, limit: int = 20) -> list[Hint]: + hints: list[Hint] = [] + seen = {query.strip().lower()} + + def add(candidate: str, reason: str) -> None: + candidate = " ".join(candidate.strip(" ?.!").split()) + if not candidate: + return + key = candidate.lower() + if key in seen: + return + seen.add(key) + hints.append( + Hint(action="search", args={"query": candidate, "limit": limit}, reason=reason) + ) + + without_year = _YEAR_RE.sub(" ", query) + if without_year != query: + add( + without_year, + "retry without the numeric year if the year is metadata/noise rather than answer text", + ) + + process = _PROCESS_FROM_RE.search(query) + if process: + subject = process.group("subject") + source = process.group("source") + source_singular = source[:-1] if source.lower().endswith("s") else source + add( + f"making {subject} {source_singular} pieces", + "process questions often use answer-text verbs like making/forming rather than created", + ) + add( + f"small pieces of {source_singular} form {subject}", + "retry with an answer-shaped process phrase using the same subject and source", + ) + + return hints[:3] + + def _node_to_summary( node: Node, *, @@ -335,6 +383,7 @@ async def search_tool( reason=f"query also touched '{cat}' — narrow search to that category", ) ) + hints.extend(_query_rewrite_hints(query)) return ToolResult( tool="search", diff --git a/src/synaptic/agent_tools_v2.py b/src/synaptic/agent_tools_v2.py index e9e5238..1094c1a 100644 --- a/src/synaptic/agent_tools_v2.py +++ b/src/synaptic/agent_tools_v2.py @@ -31,6 +31,7 @@ Hint, ToolResult, _budget_check, + _query_rewrite_hints, expand_tool, get_document_tool, search_tool, @@ -51,6 +52,21 @@ def _bounded_int(value: object, *, default: int, minimum: int, maximum: int) -> return max(minimum, min(parsed, maximum)) +def _dedupe_evidence(items: list[dict], *, limit: int) -> list[dict]: + out: list[dict] = [] + seen_ids: set[str] = set() + for item in items: + item_id = str(item.get("id") or "") + if item_id and item_id in seen_ids: + continue + if item_id: + seen_ids.add(item_id) + out.append(item) + if len(out) >= limit: + break + return out + + async def deep_search_tool( backend: StorageBackend, session: SearchSession, @@ -100,6 +116,43 @@ async def deep_search_tool( embedder=embedder, ) evidence = search_result.data.get("evidence", []) + hints: list[Hint] = [] + hints.extend(_query_rewrite_hints(query)) + rewrite_queries: list[str] = [] + + if hints: + rewrite_evidence: list[dict] = [] + seen_evidence_ids = {e.get("id", "") for e in evidence if isinstance(e, dict)} + for hint in hints[:2]: + if hint.action != "search": + continue + rewrite_query = str(hint.args.get("query") or "").strip() + if not rewrite_query: + continue + rewrite_result = await search_tool( + backend, + session, + rewrite_query, + limit=limit, + category=category, + embedder=embedder, + ) + if not rewrite_result.ok: + continue + rewrite_queries.append(rewrite_query) + for item in rewrite_result.data.get("evidence", []): + if not isinstance(item, dict): + continue + item_id = item.get("id", "") + if item_id and item_id in seen_evidence_ids: + continue + seen_evidence_ids.add(item_id) + rewrite_evidence.append({**item, "rewrite_query": rewrite_query}) + + if rewrite_evidence: + rewrite_take = max(3, min(5, limit // 2)) + evidence = [*rewrite_evidence[:rewrite_take], *evidence] + evidence = _dedupe_evidence(evidence, limit=limit) # Step 2: expand top hit (parallel with step 3) expanded_neighbours: list[dict] = [] @@ -139,7 +192,6 @@ async def deep_search_tool( ) # Build consolidated response - hints: list[Hint] = [] if not evidence: # Decompose the query into its first content word and suggest # a FTS fallback. "try a different category" as a literal arg @@ -174,6 +226,7 @@ async def deep_search_tool( "expanded_neighbours": expanded_neighbours[:5], "document_excerpts": doc_excerpts, "search_anchors": search_result.data.get("anchors", {}), + "rewrite_queries": rewrite_queries, }, hints=hints, session=session.summary(), diff --git a/tests/test_agent_loop_projection.py b/tests/test_agent_loop_projection.py index fc8d96f..3a06919 100644 --- a/tests/test_agent_loop_projection.py +++ b/tests/test_agent_loop_projection.py @@ -205,6 +205,18 @@ def test_hints_capped_at_three(): assert len(parsed["hints"]) == 3 +def test_hints_are_projected_before_data(): + r = { + "tool": "search", + "ok": True, + "data": {"evidence": [{"id": "n1", "preview": "long evidence text"}]}, + "hints": [{"action": "search", "args": {"query": "rewrite"}}], + } + out = project_tool_result(r) + + assert out.index('"hints"') < out.index('"data"') + + def test_error_preserved(): r = {"tool": "filter_nodes", "ok": False, "data": {}, "error": "bad op"} out = project_tool_result(r) diff --git a/tests/test_agent_tools.py b/tests/test_agent_tools.py index 1e28858..9ccabb4 100644 --- a/tests/test_agent_tools.py +++ b/tests/test_agent_tools.py @@ -17,6 +17,7 @@ import synaptic.agent_tools_v2 as tools_v2 from synaptic.agent_tools import ( ToolResult, + _query_rewrite_hints, count_tool, expand_tool, follow_tool, @@ -198,6 +199,21 @@ async def _fresh_backend() -> MemoryBackend: # --- search_tool --- +def test_query_rewrite_hints_drop_numeric_year(): + hints = _query_rewrite_hints("child psychiatrist salary 2016") + + assert hints[0].action == "search" + assert hints[0].args == {"query": "child psychiatrist salary", "limit": 20} + + +def test_query_rewrite_hints_process_from_question(): + hints = _query_rewrite_hints("how is soil created from rocks") + queries = [h.args["query"] for h in hints] + + assert "making soil rock pieces" in queries + assert "small pieces of rock form soil" in queries + + @pytest.mark.asyncio class TestSearchTool: async def test_search_returns_evidence(self): @@ -408,6 +424,165 @@ async def fake_get_doc(backend, session, doc_id, query): assert document_ids == ["doc_0", "doc_1", "doc_2", "doc_3", "doc_4"] +@pytest.mark.asyncio +async def test_deep_search_surfaces_query_rewrite_hints(monkeypatch): + async def fake_search_tool( + backend, + session, + query, + *, + limit, + category=None, + embedder=None, + **kwargs, + ): + return ToolResult( + tool="search", + ok=True, + data={ + "evidence": [{"id": "chunk_0", "document_id": "doc_0"}], + "anchors": {}, + }, + session=session.summary(), + ) + + async def fake_expand(backend, session, node_id): + return ToolResult(tool="expand", ok=True, data={"neighbours": []}) + + async def fake_get_doc(backend, session, doc_id, query): + return ToolResult( + tool="get_document", + ok=True, + data={"document": {"id": doc_id}, "chunks": [], "chunk_count": 0}, + ) + + monkeypatch.setattr(tools_v2, "search_tool", fake_search_tool) + monkeypatch.setattr(tools_v2, "_safe_expand", fake_expand) + monkeypatch.setattr(tools_v2, "_safe_get_doc", fake_get_doc) + backend = MemoryBackend() + await backend.connect() + + result = await tools_v2.deep_search_tool( + backend, + SearchSession(), + "how is soil created from rocks", + ) + queries = [h.args["query"] for h in result.hints] + + assert "making soil rock pieces" in queries + assert "small pieces of rock form soil" in queries + + +@pytest.mark.asyncio +async def test_deep_search_runs_query_rewrite_hints(monkeypatch): + seen_queries: list[str] = [] + + async def fake_search_tool( + backend, + session, + query, + *, + limit, + category=None, + embedder=None, + **kwargs, + ): + seen_queries.append(query) + if query == "how is soil created from rocks": + evidence = [{"id": "initial", "document_id": "initial_doc"}] + else: + evidence = [{"id": f"rewrite_{len(seen_queries)}", "document_id": "gold_doc"}] + return ToolResult( + tool="search", + ok=True, + data={"evidence": evidence, "anchors": {}}, + session=session.summary(), + ) + + async def fake_expand(backend, session, node_id): + return ToolResult(tool="expand", ok=True, data={"neighbours": []}) + + async def fake_get_doc(backend, session, doc_id, query): + return ToolResult( + tool="get_document", + ok=True, + data={"document": {"id": doc_id}, "chunks": [], "chunk_count": 0}, + ) + + monkeypatch.setattr(tools_v2, "search_tool", fake_search_tool) + monkeypatch.setattr(tools_v2, "_safe_expand", fake_expand) + monkeypatch.setattr(tools_v2, "_safe_get_doc", fake_get_doc) + backend = MemoryBackend() + await backend.connect() + + result = await tools_v2.deep_search_tool( + backend, + SearchSession(), + "how is soil created from rocks", + ) + + assert seen_queries == [ + "how is soil created from rocks", + "making soil rock pieces", + "small pieces of rock form soil", + ] + assert result.data["rewrite_queries"] == [ + "making soil rock pieces", + "small pieces of rock form soil", + ] + assert result.data["evidence"][0]["document_id"] == "gold_doc" + + +@pytest.mark.asyncio +async def test_deep_search_rewrite_can_rescue_empty_initial_search(monkeypatch): + async def fake_search_tool( + backend, + session, + query, + *, + limit, + category=None, + embedder=None, + **kwargs, + ): + evidence = ( + [] + if query == "child psychiatrist salary 2016" + else [{"id": "rewrite_hit", "document_id": "gold_doc"}] + ) + return ToolResult( + tool="search", + ok=True, + data={"evidence": evidence, "anchors": {}}, + session=session.summary(), + ) + + async def fake_expand(backend, session, node_id): + return ToolResult(tool="expand", ok=True, data={"neighbours": []}) + + async def fake_get_doc(backend, session, doc_id, query): + return ToolResult( + tool="get_document", + ok=True, + data={"document": {"id": doc_id}, "chunks": [], "chunk_count": 0}, + ) + + monkeypatch.setattr(tools_v2, "search_tool", fake_search_tool) + monkeypatch.setattr(tools_v2, "_safe_expand", fake_expand) + monkeypatch.setattr(tools_v2, "_safe_get_doc", fake_get_doc) + backend = MemoryBackend() + await backend.connect() + + result = await tools_v2.deep_search_tool( + backend, + SearchSession(), + "child psychiatrist salary 2016", + ) + + assert result.data["rewrite_queries"] == ["child psychiatrist salary"] + assert result.data["evidence"][0]["document_id"] == "gold_doc" + + # --- expand_tool ---