Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions examples/ablation/diagnostics/public_scale_20260702.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ PYTHONUNBUFFERED=1 SYNAPTIC_SQLITE_FTS_AND_FIRST_THRESHOLD=20 SYNAPTIC_SQLITE_FT
# Put DEEPSEEK_API_KEY in shell env, the repo .env, or the parent workspace .env.
# Do not put the key in docs, commands, JSONL, or DBs.
uv run python examples/ablation/configure_deepseek_env.py
uv run python examples/ablation/run_agent_loop_benchmarks.py --llm-preset deepseek --preflight-only --preflight-timeout 15
PYTHONUNBUFFERED=1 SYNAPTIC_SQLITE_FTS_AND_FIRST_THRESHOLD=20 SYNAPTIC_SQLITE_FTS_LEXICAL_RERANK_POOL=500 uv run python examples/ablation/run_agent_loop_benchmarks.py --llm-preset deepseek --subset 20 --msmarco-path tests/benchmark/data/msmarco_passage_full.json --corpus-limit 8841823 --sqlite-db-path tests/benchmark/data/msmarco_full.db --max-turns 5 --llm-timeout 180 --preflight-timeout 15 --out-jsonl examples/ablation/diagnostics/agent_loop_deepseek_v4_flash_20.jsonl --resume

# Historical local Ollama fallback smoke when the H100/Qwen3.6 tunnel is down.
Expand Down
39 changes: 30 additions & 9 deletions examples/ablation/run_agent_loop_benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,11 @@ def _llm_preflight_error_message(base_url: str, model: str, exc: BaseException)
)


def _validate_preflight_flags(*, preflight_only: bool, skip_preflight: bool) -> None:
if preflight_only and skip_preflight:
raise SystemExit("--preflight-only cannot be combined with --skip-preflight")


async def _preflight_llm_endpoint(
client: object,
*,
Expand Down Expand Up @@ -517,7 +522,7 @@ async def amain(argv: list[str] | None = None) -> int:
_load_local_env()
parser = argparse.ArgumentParser()
parser.add_argument("--msmarco-path", type=Path, default=DEFAULT_MSMARCO_PATH)
parser.add_argument("--sqlite-db-path", type=Path, required=True)
parser.add_argument("--sqlite-db-path", type=Path, default=None)
parser.add_argument("--subset", type=int, default=20)
parser.add_argument("--corpus-limit", type=int, default=0)
parser.add_argument(
Expand Down Expand Up @@ -550,6 +555,11 @@ async def amain(argv: list[str] | None = None) -> int:
action="store_true",
help="Skip the initial /v1/models endpoint check.",
)
parser.add_argument(
"--preflight-only",
action="store_true",
help="Validate the resolved LLM endpoint/key/model and exit before loading data.",
)
parser.add_argument("--no-sufficiency-gate", action="store_true")
parser.add_argument(
"--allow-zero-tool-answer",
Expand Down Expand Up @@ -579,23 +589,19 @@ async def amain(argv: list[str] | None = None) -> int:
raise SystemExit("--preflight-timeout must be positive")
if args.resume and args.out_jsonl is None:
raise SystemExit("--resume requires --out-jsonl")
_validate_preflight_flags(
preflight_only=args.preflight_only,
skip_preflight=args.skip_preflight,
)
args.llm_base_url, args.model, args.api_key_env = _resolve_llm_settings(
preset=args.llm_preset,
llm_base_url=args.llm_base_url,
model=args.model,
api_key_env=args.api_key_env,
)
if not args.msmarco_path.exists():
raise SystemExit(f"{args.msmarco_path} does not exist")
if not args.sqlite_db_path.exists():
raise SystemExit(f"{args.sqlite_db_path} does not exist")

from openai import AsyncOpenAI

data = json.loads(args.msmarco_path.read_text(encoding="utf-8"))
query_items = list(data["queries"].items())[: args.subset]
qrels = data["qrels"]
n_docs = args.corpus_limit or int(data.get("corpus_size") or 0)
api_key = os.environ.get(args.api_key_env) or ""
local_endpoint = any(marker in args.llm_base_url for marker in ("localhost", "127.0.0.1"))
if not api_key and not local_endpoint:
Expand All @@ -610,6 +616,21 @@ async def amain(argv: list[str] | None = None) -> int:
model=args.model,
timeout_sec=args.preflight_timeout,
)
if args.preflight_only:
print(f"LLM preflight OK: base_url={args.llm_base_url} model={args.model}")
return 0

if not args.msmarco_path.exists():
raise SystemExit(f"{args.msmarco_path} does not exist")
if args.sqlite_db_path is None:
raise SystemExit("--sqlite-db-path is required unless --preflight-only is set")
if not args.sqlite_db_path.exists():
raise SystemExit(f"{args.sqlite_db_path} does not exist")

data = json.loads(args.msmarco_path.read_text(encoding="utf-8"))
query_items = list(data["queries"].items())[: args.subset]
qrels = data["qrels"]
n_docs = args.corpus_limit or int(data.get("corpus_size") or 0)

backend = SqliteGraphBackend(str(args.sqlite_db_path))
await backend.connect()
Expand Down
27 changes: 27 additions & 0 deletions tests/test_agent_search_benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
from pathlib import Path
from types import SimpleNamespace

import pytest

RUNNER_PATH = (
Path(__file__).resolve().parents[1] / "examples" / "ablation" / "run_agent_search_benchmarks.py"
)
Expand Down Expand Up @@ -212,6 +214,31 @@ def test_agent_loop_local_preset_keeps_existing_defaults() -> None:
)


def test_agent_loop_preflight_only_rejects_skip_preflight() -> None:
with pytest.raises(SystemExit, match="cannot be combined"):
loop_runner._validate_preflight_flags(preflight_only=True, skip_preflight=True)


@pytest.mark.asyncio
async def test_agent_loop_preflight_only_skips_benchmark_inputs(monkeypatch, capsys) -> None:
calls = []

async def fake_preflight(client, *, base_url, model, timeout_sec) -> None:
calls.append((base_url, model, timeout_sec))

key_value = "value-for-test"
monkeypatch.setenv("DEEPSEEK_API_KEY", key_value)
monkeypatch.setattr(loop_runner, "_preflight_llm_endpoint", fake_preflight)

assert await loop_runner.amain(["--llm-preset", "deepseek", "--preflight-only"]) == 0

captured = capsys.readouterr()
assert calls == [("https://api.deepseek.com/v1", "deepseek-v4-flash", 10.0)]
assert "LLM preflight OK" in captured.out
assert key_value not in captured.out
assert key_value not in captured.err


def test_llm_preflight_error_message_names_endpoint_and_skip_hint() -> None:
msg = loop_runner._llm_preflight_error_message(
"http://127.0.0.1:18012/v1",
Expand Down
Loading