Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# DeepSeek agent loop targeted limit-10 check

- Run at: 2026-07-02 20:47 KST
- Model: `deepseek-v4-flash`
- Base corpus: `tests/benchmark/data/msmarco_passage_full.json`
- SQLite DB: `tests/benchmark/data/msmarco_full.db`
- Corpus limit: 8,841,823 passages
- Target set: six high-call misses from the prior 50-query DeepSeek run
- Change under test: expose `limit`/`read_top_k` to the agent tool schema and widen the default `deep_search` evidence pool from 5 to 10.

## Result

The targeted high-call miss set improved from `0/6` reached in the prior 50-query run to `2/6` reached after the wider `deep_search` evidence pool.

| QID | Query | Prior 50-query run | Limit-10 targeted run | First relevant |
| --- | --- | ---: | ---: | --- |
| 54544 | blood diseases that are sexually transmitted | no | no | - |
| 293992 | how many product lines does coca cola have | no | yes | turn 2 / call 3 |
| 208145 | how bicycle tire tubes are sized | no | no | - |
| 14151 | age requirements for name change | no | yes | turn 1 / call 1 |
| 91711 | child psychiatrist salary 2016 | no | no | - |
| 237373 | how is soil created from rocks | no | no | - |

## Interpretation

This is not a full replacement for the 50-query gate, but it validates the specific bottleneck seen in high-call misses: some questions needed a wider first evidence pool rather than more repeated follow-up searches. The change keeps a hard runtime cap (`limit <= 20`, `read_top_k <= 5`) so broad searches can recover more candidates without letting the LLM request unbounded context.

40 changes: 38 additions & 2 deletions src/synaptic/agent_loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,20 @@ def _is_enumeration_query(query: str) -> bool:
"properties": {
"query": {"type": "string"},
"category": {"type": "string"},
"limit": {
"type": "integer",
"description": (
"Evidence items to return; use 10-20 for broad public-web "
"questions where the first few hits may miss the answer."
),
},
"read_top_k": {
"type": "integer",
"description": (
"How many top parent documents to read; keep 1-3 unless "
"the snippets are inconclusive."
),
},
},
"required": ["query"],
},
Expand All @@ -231,7 +245,13 @@ def _is_enumeration_query(query: str) -> bool:
"description": "Basic text search. Returns top candidate nodes.",
"parameters": {
"type": "object",
"properties": {"query": {"type": "string"}},
"properties": {
"query": {"type": "string"},
"limit": {
"type": "integer",
"description": "Candidate evidence items to return; capped by the runtime.",
},
},
"required": ["query"],
},
},
Expand Down Expand Up @@ -456,6 +476,14 @@ class AgentSearchResult:
# --- Internals -----------------------------------------------------


def _bounded_int(value: Any, *, default: int, minimum: int, maximum: int) -> int:
try:
parsed = int(value)
except (TypeError, ValueError):
parsed = default
return max(minimum, min(parsed, maximum))


async def _dispatch_tool(
name: str,
args: dict,
Expand Down Expand Up @@ -486,10 +514,18 @@ async def _dispatch_tool(
session,
args.get("query", ""),
category=args.get("category"),
limit=_bounded_int(args.get("limit"), default=10, minimum=1, maximum=20),
read_top_k=_bounded_int(args.get("read_top_k"), default=2, minimum=0, maximum=5),
embedder=embedder,
)
elif name == "search":
r = await search_tool(backend, session, args.get("query", ""), embedder=embedder)
r = await search_tool(
backend,
session,
args.get("query", ""),
limit=_bounded_int(args.get("limit"), default=10, minimum=1, maximum=20),
embedder=embedder,
)
elif name == "expand":
r = await expand_tool(
backend,
Expand Down
12 changes: 11 additions & 1 deletion src/synaptic/agent_tools_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,20 @@
logger = logging.getLogger("agent-tools-v2")


def _bounded_int(value: object, *, default: int, minimum: int, maximum: int) -> int:
try:
parsed = int(value)
except (TypeError, ValueError):
parsed = default
return max(minimum, min(parsed, maximum))


async def deep_search_tool(
backend: StorageBackend,
session: SearchSession,
query: str,
*,
limit: int = 5,
limit: int = 10,
category: str | None = None,
read_top_k: int = 2,
embedder: object | None = None,
Expand All @@ -74,6 +82,8 @@ async def deep_search_tool(
embedder: Optional embedder for EvidenceSearch.
reranker: Optional cross-encoder reranker.
"""
limit = _bounded_int(limit, default=10, minimum=1, maximum=20)
read_top_k = _bounded_int(read_top_k, default=2, minimum=0, maximum=5)
budget = _budget_check(session, "deep_search")
if budget is not None:
return budget
Expand Down
70 changes: 70 additions & 0 deletions tests/test_agent_efficiency.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,14 @@
from synaptic.agent_loop import (
_EFFICIENCY_DIRECTIVE,
AGENT_SYSTEM,
AGENT_TOOLS,
_args_key,
_bounded_int,
_dispatch_tool,
_result_count,
run_agent_loop,
)
from synaptic.agent_tools import ToolResult
from synaptic.backends.sqlite_graph import SqliteGraphBackend
from synaptic.models import ConsolidationLevel, Node, NodeKind

Expand All @@ -45,6 +49,72 @@ def test_agent_system_preserves_query_constraints_for_followups():
assert "named entity, attribute, and relation" in AGENT_SYSTEM


def _tool_schema(name: str) -> dict:
for tool in AGENT_TOOLS:
fn = tool["function"]
if fn["name"] == name:
return fn
raise AssertionError(f"missing tool schema: {name}")


def test_agent_tool_schema_exposes_search_limits():
deep_props = _tool_schema("deep_search")["parameters"]["properties"]
assert {"query", "category", "limit", "read_top_k"}.issubset(deep_props)

search_props = _tool_schema("search")["parameters"]["properties"]
assert {"query", "limit"}.issubset(search_props)


def test_bounded_int_clamps_tool_limits():
assert _bounded_int("15", default=10, minimum=1, maximum=20) == 15
assert _bounded_int(" 15 ", default=10, minimum=1, maximum=20) == 15
assert _bounded_int(None, default=10, minimum=1, maximum=20) == 10
assert _bounded_int("invalid", default=10, minimum=1, maximum=20) == 10
assert _bounded_int(99, default=10, minimum=1, maximum=20) == 20
assert _bounded_int(-5, default=10, minimum=1, maximum=20) == 1


@pytest.mark.asyncio
async def test_dispatch_passes_agent_requested_search_limits(monkeypatch):
import synaptic.agent_tools as agent_tools
import synaptic.agent_tools_v2 as agent_tools_v2

calls: list[tuple] = []

async def fake_deep_search_tool(
backend,
session,
query,
*,
category=None,
limit=10,
read_top_k=2,
embedder=None,
):
calls.append(("deep_search", query, category, limit, read_top_k))
return ToolResult(tool="deep_search", ok=True, data={"evidence": []})

async def fake_search_tool(backend, session, query, *, limit=10, embedder=None):
calls.append(("search", query, limit))
return ToolResult(tool="search", ok=True, data={"evidence": []})

monkeypatch.setattr(agent_tools_v2, "deep_search_tool", fake_deep_search_tool)
monkeypatch.setattr(agent_tools, "search_tool", fake_search_tool)

await _dispatch_tool(
"deep_search",
{"query": "q", "category": "cat", "limit": 99, "read_top_k": 9},
None,
None,
)
await _dispatch_tool("search", {"query": "q", "limit": -5}, None, None)

assert calls == [
("deep_search", "q", "cat", 20, 5),
("search", "q", 1),
]


# --- fake client (reused shape) ---------------------------------------


Expand Down
134 changes: 134 additions & 0 deletions tests/test_agent_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

import pytest

import synaptic.agent_tools_v2 as tools_v2
from synaptic.agent_tools import (
ToolResult,
count_tool,
Expand Down Expand Up @@ -274,6 +275,139 @@ async def test_search_hints_on_empty(self):
assert len(result.hints) > 0


# --- deep_search_tool ---


@pytest.mark.asyncio
async def test_deep_search_defaults_to_wider_evidence_pool(monkeypatch):
captured_limits: list[int] = []

async def fake_search_tool(
backend,
session,
query,
*,
limit,
category=None,
embedder=None,
**kwargs,
):
captured_limits.append(limit)
return ToolResult(
tool="search",
ok=True,
data={"evidence": [], "anchors": {}},
session=session.summary(),
)

monkeypatch.setattr(tools_v2, "search_tool", fake_search_tool)
backend = MemoryBackend()
await backend.connect()

result = await tools_v2.deep_search_tool(backend, SearchSession(), "broad question")

assert result.ok is True
assert captured_limits == [10]


@pytest.mark.asyncio
async def test_deep_search_caps_evidence_pool(monkeypatch):
captured_limits: list[int] = []

async def fake_search_tool(
backend,
session,
query,
*,
limit,
category=None,
embedder=None,
**kwargs,
):
captured_limits.append(limit)
return ToolResult(
tool="search",
ok=True,
data={"evidence": [], "anchors": {}},
session=session.summary(),
)

monkeypatch.setattr(tools_v2, "search_tool", fake_search_tool)
backend = MemoryBackend()
await backend.connect()

result = await tools_v2.deep_search_tool(
backend,
SearchSession(),
"broad question",
limit=99,
read_top_k="invalid",
)

assert result.ok is True
assert captured_limits == [20]


@pytest.mark.asyncio
async def test_deep_search_caps_document_reads(monkeypatch):
document_ids: list[str] = []

async def fake_search_tool(
backend,
session,
query,
*,
limit,
category=None,
embedder=None,
**kwargs,
):
return ToolResult(
tool="search",
ok=True,
data={
"evidence": [
{"id": f"chunk_{idx}", "document_id": f"doc_{idx}"} for idx in range(6)
],
"anchors": {},
},
session=session.summary(),
)

async def fake_expand(backend, session, node_id):
return ToolResult(
tool="expand",
ok=True,
data={"seed": {"id": node_id}, "neighbours": []},
session=session.summary(),
)

async def fake_get_doc(backend, session, doc_id, query):
document_ids.append(doc_id)
return ToolResult(
tool="get_document",
ok=True,
data={"document": {"id": doc_id}, "chunks": [], "chunk_count": 0},
session=session.summary(),
)

monkeypatch.setattr(tools_v2, "search_tool", fake_search_tool)
monkeypatch.setattr(tools_v2, "_safe_expand", fake_expand)
monkeypatch.setattr(tools_v2, "_safe_get_doc", fake_get_doc)
backend = MemoryBackend()
await backend.connect()

result = await tools_v2.deep_search_tool(
backend,
SearchSession(),
"broad question",
read_top_k=99,
)

assert result.ok is True
assert document_ids == ["doc_0", "doc_1", "doc_2", "doc_3", "doc_4"]


# --- expand_tool ---


Expand Down
Loading