From 4a2fa98ed496dc507744dcdb5710c6ba0c73a437 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=EC=86=90=EC=84=B1=EC=A4=80?= <sonsj97@gmail.com>
Date: Thu, 2 Jul 2026 19:22:21 +0900
Subject: [PATCH] Add agent loop LLM preflight-only mode

---
 .../diagnostics/public_scale_20260702.md      |  1 +
 .../ablation/run_agent_loop_benchmarks.py     | 39 ++++++++++++++-----
 tests/test_agent_search_benchmarks.py         | 27 +++++++++++++
 3 files changed, 58 insertions(+), 9 deletions(-)

diff --git a/examples/ablation/diagnostics/public_scale_20260702.md b/examples/ablation/diagnostics/public_scale_20260702.md
index 65c5ace..5881657 100644
--- a/examples/ablation/diagnostics/public_scale_20260702.md
+++ b/examples/ablation/diagnostics/public_scale_20260702.md
@@ -48,6 +48,7 @@ PYTHONUNBUFFERED=1 SYNAPTIC_SQLITE_FTS_AND_FIRST_THRESHOLD=20 SYNAPTIC_SQLITE_FT
 # Put DEEPSEEK_API_KEY in shell env, the repo .env, or the parent workspace .env.
 # Do not put the key in docs, commands, JSONL, or DBs.
 uv run python examples/ablation/configure_deepseek_env.py
+uv run python examples/ablation/run_agent_loop_benchmarks.py --llm-preset deepseek --preflight-only --preflight-timeout 15
 PYTHONUNBUFFERED=1 SYNAPTIC_SQLITE_FTS_AND_FIRST_THRESHOLD=20 SYNAPTIC_SQLITE_FTS_LEXICAL_RERANK_POOL=500 uv run python examples/ablation/run_agent_loop_benchmarks.py --llm-preset deepseek --subset 20 --msmarco-path tests/benchmark/data/msmarco_passage_full.json --corpus-limit 8841823 --sqlite-db-path tests/benchmark/data/msmarco_full.db --max-turns 5 --llm-timeout 180 --preflight-timeout 15 --out-jsonl examples/ablation/diagnostics/agent_loop_deepseek_v4_flash_20.jsonl --resume
 
 # Historical local Ollama fallback smoke when the H100/Qwen3.6 tunnel is down.
diff --git a/examples/ablation/run_agent_loop_benchmarks.py b/examples/ablation/run_agent_loop_benchmarks.py
index 4add631..44e6c61 100644
--- a/examples/ablation/run_agent_loop_benchmarks.py
+++ b/examples/ablation/run_agent_loop_benchmarks.py
@@ -341,6 +341,11 @@ def _llm_preflight_error_message(base_url: str, model: str, exc: BaseException)
     )
 
 
+def _validate_preflight_flags(*, preflight_only: bool, skip_preflight: bool) -> None:
+    if preflight_only and skip_preflight:
+        raise SystemExit("--preflight-only cannot be combined with --skip-preflight")
+
+
 async def _preflight_llm_endpoint(
     client: object,
     *,
@@ -517,7 +522,7 @@ async def amain(argv: list[str] | None = None) -> int:
     _load_local_env()
     parser = argparse.ArgumentParser()
     parser.add_argument("--msmarco-path", type=Path, default=DEFAULT_MSMARCO_PATH)
-    parser.add_argument("--sqlite-db-path", type=Path, required=True)
+    parser.add_argument("--sqlite-db-path", type=Path, default=None)
     parser.add_argument("--subset", type=int, default=20)
     parser.add_argument("--corpus-limit", type=int, default=0)
     parser.add_argument(
@@ -550,6 +555,11 @@ async def amain(argv: list[str] | None = None) -> int:
         action="store_true",
         help="Skip the initial /v1/models endpoint check.",
     )
+    parser.add_argument(
+        "--preflight-only",
+        action="store_true",
+        help="Validate the resolved LLM endpoint/key/model and exit before loading data.",
+    )
     parser.add_argument("--no-sufficiency-gate", action="store_true")
     parser.add_argument(
         "--allow-zero-tool-answer",
@@ -579,23 +589,19 @@ async def amain(argv: list[str] | None = None) -> int:
         raise SystemExit("--preflight-timeout must be positive")
     if args.resume and args.out_jsonl is None:
         raise SystemExit("--resume requires --out-jsonl")
+    _validate_preflight_flags(
+        preflight_only=args.preflight_only,
+        skip_preflight=args.skip_preflight,
+    )
     args.llm_base_url, args.model, args.api_key_env = _resolve_llm_settings(
         preset=args.llm_preset,
         llm_base_url=args.llm_base_url,
         model=args.model,
         api_key_env=args.api_key_env,
     )
-    if not args.msmarco_path.exists():
-        raise SystemExit(f"{args.msmarco_path} does not exist")
-    if not args.sqlite_db_path.exists():
-        raise SystemExit(f"{args.sqlite_db_path} does not exist")
 
     from openai import AsyncOpenAI
 
-    data = json.loads(args.msmarco_path.read_text(encoding="utf-8"))
-    query_items = list(data["queries"].items())[: args.subset]
-    qrels = data["qrels"]
-    n_docs = args.corpus_limit or int(data.get("corpus_size") or 0)
     api_key = os.environ.get(args.api_key_env) or ""
     local_endpoint = any(marker in args.llm_base_url for marker in ("localhost", "127.0.0.1"))
     if not api_key and not local_endpoint:
@@ -610,6 +616,21 @@ async def amain(argv: list[str] | None = None) -> int:
             model=args.model,
             timeout_sec=args.preflight_timeout,
         )
+    if args.preflight_only:
+        print(f"LLM preflight OK: base_url={args.llm_base_url} model={args.model}")
+        return 0
+
+    if not args.msmarco_path.exists():
+        raise SystemExit(f"{args.msmarco_path} does not exist")
+    if args.sqlite_db_path is None:
+        raise SystemExit("--sqlite-db-path is required unless --preflight-only is set")
+    if not args.sqlite_db_path.exists():
+        raise SystemExit(f"{args.sqlite_db_path} does not exist")
+
+    data = json.loads(args.msmarco_path.read_text(encoding="utf-8"))
+    query_items = list(data["queries"].items())[: args.subset]
+    qrels = data["qrels"]
+    n_docs = args.corpus_limit or int(data.get("corpus_size") or 0)
 
     backend = SqliteGraphBackend(str(args.sqlite_db_path))
     await backend.connect()
diff --git a/tests/test_agent_search_benchmarks.py b/tests/test_agent_search_benchmarks.py
index 02d39f6..5515faa 100644
--- a/tests/test_agent_search_benchmarks.py
+++ b/tests/test_agent_search_benchmarks.py
@@ -8,6 +8,8 @@
 from pathlib import Path
 from types import SimpleNamespace
 
+import pytest
+
 RUNNER_PATH = (
     Path(__file__).resolve().parents[1] / "examples" / "ablation" / "run_agent_search_benchmarks.py"
 )
@@ -212,6 +214,31 @@ def test_agent_loop_local_preset_keeps_existing_defaults() -> None:
     )
 
 
+def test_agent_loop_preflight_only_rejects_skip_preflight() -> None:
+    with pytest.raises(SystemExit, match="cannot be combined"):
+        loop_runner._validate_preflight_flags(preflight_only=True, skip_preflight=True)
+
+
+@pytest.mark.asyncio
+async def test_agent_loop_preflight_only_skips_benchmark_inputs(monkeypatch, capsys) -> None:
+    calls = []
+
+    async def fake_preflight(client, *, base_url, model, timeout_sec) -> None:
+        calls.append((base_url, model, timeout_sec))
+
+    key_value = "value-for-test"
+    monkeypatch.setenv("DEEPSEEK_API_KEY", key_value)
+    monkeypatch.setattr(loop_runner, "_preflight_llm_endpoint", fake_preflight)
+
+    assert await loop_runner.amain(["--llm-preset", "deepseek", "--preflight-only"]) == 0
+
+    captured = capsys.readouterr()
+    assert calls == [("https://api.deepseek.com/v1", "deepseek-v4-flash", 10.0)]
+    assert "LLM preflight OK" in captured.out
+    assert key_value not in captured.out
+    assert key_value not in captured.err
+
+
 def test_llm_preflight_error_message_names_endpoint_and_skip_hint() -> None:
     msg = loop_runner._llm_preflight_error_message(
         "http://127.0.0.1:18012/v1",