Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# DeepSeek agent loop auto-rewrite check

- Run at: 2026-07-02 21:34 KST
- Model: `deepseek-v4-flash`
- Base corpus: `tests/benchmark/data/msmarco_passage_full.json`
- SQLite DB: `tests/benchmark/data/msmarco_full.db`
- Corpus limit: 8,841,823 passages
- Target set: two high-call misses whose gold evidence is reachable through simple deterministic rewrites.
- Change under test: `deep_search` runs bounded deterministic rewrite hints internally and merges the rewrite evidence before returning to the LLM.

## Deterministic Check

| QID | Original query | Auto rewrite | Gold rank in `deep_search` |
| --- | --- | --- | ---: |
| 91711 | child psychiatrist salary 2016 | child psychiatrist salary | 1 |
| 237373 | how is soil created from rocks | making soil rock pieces; small pieces of rock form soil | 1 |

## DeepSeek Live Smoke

| QID | Before auto-run hints | After auto-run hints | First relevant |
| --- | ---: | ---: | --- |
| 91711 | no | yes | turn 1 / call 1 |
| 237373 | no | yes | turn 1 / call 1 |

## Interpretation

Prompt-visible hints alone were not enough: DeepSeek often generated nearby but non-gold rewrites. Running the deterministic rewrite hints inside `deep_search` removes that planning variance for cheap, bounded patterns such as dropping noisy numeric years and rewriting "created from" process questions into answer-shaped phrases.

13 changes: 7 additions & 6 deletions src/synaptic/agent_loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,9 +202,9 @@ def _is_enumeration_query(query: str) -> bool:
need the COMPLETE set. Raise the ``limit`` on ``filter_nodes`` / ``top_nodes``
(e.g. 100) rather than the default 20. The GT for these patterns often
has 5-10 specific rows; a narrow retry loop misses them.
- **When a tool returns 0 results, it also returns a ``hints`` array.**
- **When a tool returns a ``hints`` array, read it before guessing.**
Each hint is a concrete corrective action (different operator, dropped
WHERE, alternative column). Read the hints and follow the first one
WHERE, alternative column, query rewrite). Follow the first relevant hint
before reissuing a near-identical query — that is what wastes turns."""


Expand Down Expand Up @@ -1039,13 +1039,14 @@ def project_tool_result(result: dict | Any, *, max_chars: int = _TOOL_RESULT_BUD

tool = result.get("tool", "")
data = _project_data(tool, result.get("data") or {})
envelope: dict[str, Any] = {"tool": tool, "ok": result.get("ok", True), "data": data}
err = result.get("error")
if err:
envelope["error"] = err
envelope: dict[str, Any] = {"tool": tool, "ok": result.get("ok", True)}
hints = result.get("hints")
if hints:
envelope["hints"] = hints[:3]
envelope["data"] = data
err = result.get("error")
if err:
envelope["error"] = err

serialized = json.dumps(envelope, ensure_ascii=False)
if len(serialized) <= max_chars:
Expand Down
49 changes: 49 additions & 0 deletions src/synaptic/agent_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
from __future__ import annotations

import logging
import re
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, Any

Expand All @@ -48,6 +49,13 @@

logger = logging.getLogger("agent-tools")

_YEAR_RE = re.compile(r"\b(?:19|20)\d{2}\b")
_PROCESS_FROM_RE = re.compile(
r"\bhow\s+(?:is|are|was|were)\s+(?P<subject>.+?)\s+"
r"(?P<verb>created|made|formed|produced)\s+from\s+(?P<source>.+)",
re.IGNORECASE,
)


# --- Shared result shape ---

Expand Down Expand Up @@ -134,6 +142,46 @@ def _budget_check(session: SearchSession, tool: str) -> ToolResult | None:
return None


def _query_rewrite_hints(query: str, *, limit: int = 20) -> list[Hint]:
hints: list[Hint] = []
seen = {query.strip().lower()}

def add(candidate: str, reason: str) -> None:
candidate = " ".join(candidate.strip(" ?.!").split())
if not candidate:
return
key = candidate.lower()
if key in seen:
return
seen.add(key)
hints.append(
Hint(action="search", args={"query": candidate, "limit": limit}, reason=reason)
)

without_year = _YEAR_RE.sub(" ", query)
if without_year != query:
add(
without_year,
"retry without the numeric year if the year is metadata/noise rather than answer text",
)

process = _PROCESS_FROM_RE.search(query)
if process:
subject = process.group("subject")
source = process.group("source")
source_singular = source[:-1] if source.lower().endswith("s") else source
add(
f"making {subject} {source_singular} pieces",
"process questions often use answer-text verbs like making/forming rather than created",
)
add(
f"small pieces of {source_singular} form {subject}",
"retry with an answer-shaped process phrase using the same subject and source",
)

return hints[:3]


def _node_to_summary(
node: Node,
*,
Expand Down Expand Up @@ -335,6 +383,7 @@ async def search_tool(
reason=f"query also touched '{cat}' — narrow search to that category",
)
)
hints.extend(_query_rewrite_hints(query))

return ToolResult(
tool="search",
Expand Down
55 changes: 54 additions & 1 deletion src/synaptic/agent_tools_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
Hint,
ToolResult,
_budget_check,
_query_rewrite_hints,
expand_tool,
get_document_tool,
search_tool,
Expand All @@ -51,6 +52,21 @@ def _bounded_int(value: object, *, default: int, minimum: int, maximum: int) ->
return max(minimum, min(parsed, maximum))


def _dedupe_evidence(items: list[dict], *, limit: int) -> list[dict]:
out: list[dict] = []
seen_ids: set[str] = set()
for item in items:
item_id = str(item.get("id") or "")
if item_id and item_id in seen_ids:
continue
if item_id:
seen_ids.add(item_id)
out.append(item)
if len(out) >= limit:
break
return out


async def deep_search_tool(
backend: StorageBackend,
session: SearchSession,
Expand Down Expand Up @@ -100,6 +116,43 @@ async def deep_search_tool(
embedder=embedder,
)
evidence = search_result.data.get("evidence", [])
hints: list[Hint] = []
hints.extend(_query_rewrite_hints(query))
rewrite_queries: list[str] = []

if hints:
rewrite_evidence: list[dict] = []
seen_evidence_ids = {e.get("id", "") for e in evidence if isinstance(e, dict)}
for hint in hints[:2]:
if hint.action != "search":
continue
rewrite_query = str(hint.args.get("query") or "").strip()
if not rewrite_query:
continue
rewrite_result = await search_tool(
backend,
session,
rewrite_query,
limit=limit,
category=category,
embedder=embedder,
)
if not rewrite_result.ok:
continue
rewrite_queries.append(rewrite_query)
for item in rewrite_result.data.get("evidence", []):
if not isinstance(item, dict):
continue
item_id = item.get("id", "")
if item_id and item_id in seen_evidence_ids:
continue
seen_evidence_ids.add(item_id)
rewrite_evidence.append({**item, "rewrite_query": rewrite_query})

if rewrite_evidence:
rewrite_take = max(3, min(5, limit // 2))
evidence = [*rewrite_evidence[:rewrite_take], *evidence]
evidence = _dedupe_evidence(evidence, limit=limit)

# Step 2: expand top hit (parallel with step 3)
expanded_neighbours: list[dict] = []
Expand Down Expand Up @@ -139,7 +192,6 @@ async def deep_search_tool(
)

# Build consolidated response
hints: list[Hint] = []
if not evidence:
# Decompose the query into its first content word and suggest
# a FTS fallback. "try a different category" as a literal arg
Expand Down Expand Up @@ -174,6 +226,7 @@ async def deep_search_tool(
"expanded_neighbours": expanded_neighbours[:5],
"document_excerpts": doc_excerpts,
"search_anchors": search_result.data.get("anchors", {}),
"rewrite_queries": rewrite_queries,
},
hints=hints,
session=session.summary(),
Expand Down
12 changes: 12 additions & 0 deletions tests/test_agent_loop_projection.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,18 @@ def test_hints_capped_at_three():
assert len(parsed["hints"]) == 3


def test_hints_are_projected_before_data():
r = {
"tool": "search",
"ok": True,
"data": {"evidence": [{"id": "n1", "preview": "long evidence text"}]},
"hints": [{"action": "search", "args": {"query": "rewrite"}}],
}
out = project_tool_result(r)

assert out.index('"hints"') < out.index('"data"')


def test_error_preserved():
r = {"tool": "filter_nodes", "ok": False, "data": {}, "error": "bad op"}
out = project_tool_result(r)
Expand Down
Loading
Loading