diff --git a/.github/workflows/harness-integration.yml b/.github/workflows/harness-integration.yml index 075ee5cf3..ab20929a8 100644 --- a/.github/workflows/harness-integration.yml +++ b/.github/workflows/harness-integration.yml @@ -7,8 +7,7 @@ on: paths: - "src/agentex/lib/core/harness/**" - "src/agentex/lib/adk/_modules/**" - - "tests/lib/core/harness/test_harness_pydantic_ai_*.py" - - "tests/lib/core/harness/test_harness_langgraph_*.py" + - "tests/lib/core/harness/test_harness_*.py" - ".github/workflows/harness-integration.yml" jobs: @@ -34,14 +33,15 @@ jobs: run: ./scripts/test tests/lib/core/harness/ -v # Offline harness integration tests (sync / async / temporal channels) for each - # migrated harness. These use fake streams / TestModel + fake streaming/tracing - # and require no live infrastructure. Future harness migration PRs (6-8) add - # their harness to the matrix below and their test paths to the triggers above. + # harness. These use fake streams / TestModel + fake streaming/tracing and + # require no live infrastructure. All five harnesses are now covered; the + # trigger above uses a `test_harness_*.py` glob so new suites are picked up + # automatically. live-matrix: runs-on: ubuntu-latest strategy: matrix: - harness: [pydantic_ai, langgraph] + harness: [pydantic_ai, langgraph, openai, claude_code, codex] channel: [sync, async, temporal] fail-fast: false name: ${{ matrix.harness }}-${{ matrix.channel }} diff --git a/CHANGELOG.md b/CHANGELOG.md index 8f81295a9..277809f54 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,11 @@ ## Unreleased +### ⚠ BREAKING CHANGES + +* **harness:** removed the deprecated bespoke tracing handlers `create_langgraph_tracing_handler` / `create_pydantic_ai_tracing_handler` (and their `AgentexLangGraphTracingHandler` / `AgentexPydanticAITracingHandler` classes) from the public `agentex.lib.adk` surface. Span tracing is now derived from the canonical `StreamTaskMessage*` stream by `UnifiedEmitter` — wrap your run in the harness `*Turn` and drive `UnifiedEmitter.yield_turn` / `auto_send_turn`. The `agentex init` templates were migrated accordingly. +* **harness:** each harness now exposes exactly `__sync.py` + `__turn.py` under `agentex.lib.adk._modules`. The OpenAI harness `OpenAITurn` and `convert_openai_to_agentex_events` moved to `agentex.lib.adk._modules._openai_turn` / `_openai_sync`; back-compat shims remain at `agentex.lib.adk.providers._modules.{openai_turn,sync_provider}` for one release. Public facade names (`stream_pydantic_ai_events`, `stream_langgraph_events`, `emit_langgraph_messages`, etc.) are unchanged. + ### Features * **tracing:** emit OTel metrics for async span queue depth, batch drain, and SGP export success/failure (HTTP status labels). Disable SDK-side recording with ``AGENTEX_TRACING_METRICS=0``. diff --git a/adk/docs/harness.md b/adk/docs/harness.md index 6a9d8947a..d81835a03 100644 --- a/adk/docs/harness.md +++ b/adk/docs/harness.md @@ -39,14 +39,17 @@ Every harness tap produces a sequence of these. Everything downstream (delivery, ## Per-harness taps: `convert__to_agentex_events` -A tap is an async generator that translates the harness's native event stream into `StreamTaskMessage*` events. The currently shipped taps are: +A tap is an async generator that translates the harness's native event stream into `StreamTaskMessage*` events. The shipped taps are: | Harness | Tap function | Exported from | |---|---|---| | pydantic-ai | `convert_pydantic_ai_to_agentex_events` | `agentex.lib.adk` | | LangGraph | `convert_langgraph_to_agentex_events` | `agentex.lib.adk` | +| claude-code | `convert_claude_code_to_agentex_events` | `agentex.lib.adk` | +| codex | `convert_codex_to_agentex_events` | `agentex.lib.adk` | +| OpenAI Agents | `convert_openai_to_agentex_events` | `agentex.lib.adk.providers._modules.sync_provider` | -Taps for claude-code and codex will be added in subsequent PRs (AGX1-420, AGX1-421) and exported from `agentex.lib.adk` in the same way. +Each harness also provides a `HarnessTurn` wrapper that pairs its tap's event stream with usage extraction: `PydanticAITurn`, `LangGraphTurn`, `ClaudeCodeTurn`, `CodexTurn`, and `OpenAITurn`. --- @@ -157,11 +160,13 @@ Spans are derived from the canonical stream by `SpanDeriver` (pure, no `adk` dep ## Usage examples by channel -### Sync ACP (pydantic-ai tap) +### Sync ACP (`yield_turn`) + +Build the harness's `HarnessTurn` wrapper and iterate `emitter.yield_turn(turn)` — the emitter forwards each event to the caller and traces spans as a side effect: ```python import agentex.lib.adk as adk -from agentex.lib.adk import UnifiedEmitter, convert_pydantic_ai_to_agentex_events +from agentex.lib.adk import UnifiedEmitter, ClaudeCodeTurn @acp.on_message_send async def handle(params): @@ -172,13 +177,12 @@ async def handle(params): trace_id=task_id, parent_span_id=turn_span.id if turn_span else None, ) - tap = convert_pydantic_ai_to_agentex_events(pydantic_stream) - # wrap tap in a HarnessTurn then yield_turn, or yield directly: - async for event in tap: + turn = ClaudeCodeTurn(claude_code_stream) # any HarnessTurn + async for event in emitter.yield_turn(turn): yield event ``` -For the pre-unified sync path the tap is still yielded directly; `UnifiedEmitter.yield_turn` is the forward-looking integration point when a `HarnessTurn` wrapper is available. +Every harness follows the same shape — swap `ClaudeCodeTurn` for `PydanticAITurn`, `LangGraphTurn`, `CodexTurn`, or `OpenAITurn` and feed it that harness's native stream. ### Async Temporal (auto-send) diff --git a/docs/superpowers/plans/2026-06-18-unified-harness-surface-pr4-pydantic-ai.md b/docs/superpowers/plans/2026-06-18-unified-harness-surface-pr4-pydantic-ai.md deleted file mode 100644 index 2fa1892fe..000000000 --- a/docs/superpowers/plans/2026-06-18-unified-harness-surface-pr4-pydantic-ai.md +++ /dev/null @@ -1,246 +0,0 @@ -# Unified Harness Surface — PR 4: pydantic-ai Migration Plan - -> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. - -**Goal:** Migrate the pydantic-ai harness onto the unified harness surface so it emits streaming + persisted messages + tracing + turn usage through ONE source of truth, over both delivery channels (yield + auto-send), with no public regression — and ship its 3 integration test agents (sync/async/temporal). - -**Architecture:** Wrap a pydantic-ai run as a `HarnessTurn` (canonical `StreamTaskMessage*` stream + normalized `TurnUsage`). Reuse the existing `convert_pydantic_ai_to_agentex_events` mapping as the tap. Reimplement the existing public auto-send helper on top of `UnifiedEmitter.auto_send_turn`, and route sync ACP agents through `UnifiedEmitter.yield_turn`. Retire the bespoke `_pydantic_ai_tracing` handler in favor of the surface's derived spans (keep the old symbol as a deprecated shim). - -**Tech Stack:** Python 3, pydantic-ai (`pydantic_ai`), pydantic v2, pytest + pytest-asyncio, the `agentex.lib.core.harness` package from PRs 1-3. - -**Foundation:** `src/agentex/lib/core/harness/` (`UnifiedEmitter`, `SpanTracer`, `SpanDeriver`, `HarnessTurn`, `TurnUsage`, `TurnResult`, `yield_events`, `auto_send`, conformance scaffold). Design: `docs/superpowers/specs/2026-06-18-unified-harness-surface-design.md`. - ---- - -## Dependencies (must land first) - -- **AGX1-373** — cross-channel conformance equivalence + `Full` wire reconciliation. PR 4's conformance fixtures register into the upgraded cross-channel runner. **Do not start Task 6 until 373 is merged into the foundation branch.** -- **AGX1-375** — public `adk` import path for the harness surface. If merged, import the surface via the public path in this PR; if not, import from `agentex.lib.core.harness` and add a follow-up note. (Tasks below assume `from agentex.lib.core.harness import UnifiedEmitter, TurnUsage, ...`; swap to the public path if 375 landed.) - -This is one PR (target < 1000 lines code, excluding any recorded fixtures). The 3 test agents are the largest chunk; if the diff exceeds budget, split the test agents into a follow-up PR 4b (note in the PR description). - ---- - -## File Structure - -- Modify `src/agentex/lib/adk/_modules/_pydantic_ai_sync.py` — add an optional `on_result` callback to `convert_pydantic_ai_to_agentex_events` (additive) so usage can be captured. Behavior unchanged when omitted. -- Create `src/agentex/lib/adk/_modules/_pydantic_ai_turn.py` — `PydanticAITurn(HarnessTurn)` + `pydantic_ai_usage_to_turn_usage(...)`. -- Modify `src/agentex/lib/adk/_modules/_pydantic_ai_async.py` — reimplement `stream_pydantic_ai_events` on `UnifiedEmitter.auto_send_turn`, preserving signature + return. -- Modify `src/agentex/lib/adk/_modules/_pydantic_ai_tracing.py` — mark `create_pydantic_ai_tracing_handler` / `AgentexPydanticAITracingHandler` deprecated (docstring + `DeprecationWarning`); keep importable. -- Create `tests/lib/core/harness/conformance/test_pydantic_ai_conformance.py` — register pydantic-ai fixtures into the cross-channel conformance runner. -- Create `examples/tutorials/harness-pydantic-ai-{sync,async,temporal}/` — 3 test agents (modeled on the `sync-pydantic-ai` / `default-pydantic-ai` / `temporal-pydantic-ai` CLI templates) using the unified surface. -- Modify `.github/workflows/harness-integration.yml` — enable the pydantic-ai rows of the `live-matrix` job. -- Modify `.github/workflows/agentex-tutorials-test.yml` (or its agent list) — include the 3 new test agents if that workflow enumerates agents. - ---- - -## Task 1: Expose the pydantic-ai run result for usage capture - -**Files:** -- Modify: `src/agentex/lib/adk/_modules/_pydantic_ai_sync.py` -- Test: `tests/lib/adk/test_pydantic_ai_sync.py` (create if absent) - -The converter already iterates the pydantic-ai event stream and currently *ignores* `AgentRunResultEvent` (the terminal event carrying the run result + usage). Add an optional callback so a caller can capture it without changing existing behavior. - -- [ ] **Step 1: Write the failing test.** - -```python -import pytest -from agentex.lib.adk._modules._pydantic_ai_sync import convert_pydantic_ai_to_agentex_events - - -class _FakeResultEvent: # stand-in for pydantic_ai.run.AgentRunResultEvent - def __init__(self, result): - self.result = result - - -async def _stream(events): - for e in events: - yield e - - -@pytest.mark.asyncio -async def test_on_result_callback_receives_terminal_event(monkeypatch): - # When the stream ends with an AgentRunResultEvent, on_result is invoked with it, - # and the converter still yields no extra events for it. - captured = {} - # Use a real AgentRunResultEvent if constructable; otherwise patch isinstance check. - # (Implementer: see Step 3 note — match the real terminal event type.) - ... -``` - -Implementer note: the exact terminal event type is `pydantic_ai.run.AgentRunResultEvent` (already imported in `_pydantic_ai_sync.py`). Write the test to feed a stream ending in a real `AgentRunResultEvent` (construct it as the installed pydantic-ai version requires; inspect `python -c "import pydantic_ai.run, inspect; print(inspect.signature(pydantic_ai.run.AgentRunResultEvent))"`). Assert `on_result` is called once with that event and that the converter yields the same `StreamTaskMessage*` sequence as without the callback (no behavior change for the streaming output). - -- [ ] **Step 2: Run** `uv run pytest tests/lib/adk/test_pydantic_ai_sync.py -v` — expect FAIL (no `on_result` param). - -- [ ] **Step 3: Implement.** Add `on_result: Callable[[AgentRunResultEvent], None] | None = None` (and an async-callable variant if needed) to `convert_pydantic_ai_to_agentex_events`. In the existing `elif isinstance(event, (FunctionToolCallEvent, FinalResultEvent, AgentRunResultEvent))` branch, when the event is an `AgentRunResultEvent` and `on_result` is set, call it (await if it's a coroutine). Keep yielding nothing for it. No other change. - -- [ ] **Step 4: Run** the test — expect PASS, plus run the existing `_pydantic_ai_sync` tests if any to confirm no regression. - -- [ ] **Step 5: Commit** `feat(pydantic-ai): optional on_result callback to expose run result for usage capture`. - ---- - -## Task 2: Normalize pydantic-ai usage to `TurnUsage` - -**Files:** -- Create: `src/agentex/lib/adk/_modules/_pydantic_ai_turn.py` -- Test: `tests/lib/adk/test_pydantic_ai_turn.py` - -- [ ] **Step 1: Verify the real usage shape FIRST.** Run `uv run python -c "from pydantic_ai.usage import RunUsage; import inspect; print([f for f in RunUsage.model_fields])"` (the type/name may be `RunUsage` or `Usage` depending on the installed version). Record the exact field names (commonly: `input_tokens`, `output_tokens`, `total_tokens`, `requests`, and a cache/`details` field). The mapping in Step 3 MUST use the real field names. - -- [ ] **Step 2: Write the failing test.** - -```python -from agentex.lib.adk._modules._pydantic_ai_turn import pydantic_ai_usage_to_turn_usage - - -def test_usage_normalization_maps_fields(): - # Build a usage object matching the installed pydantic-ai RunUsage shape - # (see Task 2 Step 1 for the real fields), then assert the mapping. - usage_obj = ... # construct RunUsage(input_tokens=10, output_tokens=20, requests=2, ...) - tu = pydantic_ai_usage_to_turn_usage(usage_obj, model="openai:gpt-4o") - assert tu.model == "openai:gpt-4o" - assert tu.input_tokens == 10 - assert tu.output_tokens == 20 - assert tu.num_llm_calls == 2 -``` - -- [ ] **Step 3: Implement** `pydantic_ai_usage_to_turn_usage(usage, model) -> TurnUsage` mapping the verified RunUsage fields onto `TurnUsage` (`input_tokens`, `output_tokens`, `total_tokens`, `cached_input_tokens` if available, `num_llm_calls` ← `requests`). Use `getattr(usage, "", None)` defensively so a version field rename degrades to `None` rather than crashing. Then implement `PydanticAITurn`: - -```python -class PydanticAITurn: - """A pydantic-ai run as a HarnessTurn: canonical event stream + normalized usage.""" - - def __init__(self, stream, model: str | None = None): - self._stream = stream - self._model = model - self._usage = TurnUsage(model=model) - - @property - async def events(self): - def _capture(result_event): - run_result = getattr(result_event, "result", None) - usage_obj = run_result.usage() if run_result is not None else None - if usage_obj is not None: - self._usage = pydantic_ai_usage_to_turn_usage(usage_obj, self._model) - async for ev in convert_pydantic_ai_to_agentex_events(self._stream, on_result=_capture): - yield ev - - def usage(self) -> TurnUsage: - return self._usage -``` - -(Verify `run_result.usage()` is the correct accessor for the installed version; adjust if it's an attribute.) - -- [ ] **Step 4: Add a `PydanticAITurn` test** that feeds a small stream ending in an `AgentRunResultEvent` whose `result.usage()` returns a known usage, drives `turn.events` to exhaustion, then asserts `turn.usage()` reflects the normalized values and that `events` yielded the expected `StreamTaskMessage*`. Confirm `usage()` BEFORE exhaustion returns the default (documented single-pass contract). - -- [ ] **Step 5: Run** the tests — expect PASS. - -- [ ] **Step 6: Commit** `feat(pydantic-ai): PydanticAITurn HarnessTurn + usage normalization`. - ---- - -## Task 3: Reimplement the auto-send helper on the unified surface - -**Files:** -- Modify: `src/agentex/lib/adk/_modules/_pydantic_ai_async.py` -- Test: `tests/lib/adk/test_pydantic_ai_async.py` - -`stream_pydantic_ai_events(stream, task_id, ...)` currently hand-drives `adk.streaming`. Reimplement it to delegate to `UnifiedEmitter.auto_send_turn(PydanticAITurn(stream, model))`, preserving its signature and return value (the accumulated final text). Feature-add: traces by default. - -- [ ] **Step 1: Capture current behavior as a characterization test.** Before changing anything, write a test that runs the CURRENT `stream_pydantic_ai_events` over a fixture stream with a fake `adk.streaming` and records the messages produced (text, tool request/response). This is the backward-compat baseline ("equivalent messages before/after" from the design). - -- [ ] **Step 2: Run** it green against the current implementation. Commit the test alone: `test(pydantic-ai): characterize stream_pydantic_ai_events output`. - -- [ ] **Step 3: Reimplement** `stream_pydantic_ai_events` to build a `PydanticAITurn` and call `UnifiedEmitter(task_id=task_id, trace_id=, parent_span_id=, streaming=).auto_send_turn(turn)`, returning `result.final_text`. Resolve `trace_id`/`parent_span_id` the same way the module does today (from the streaming/tracing context vars it already reads). Preserve the exact public signature and return type. - -- [ ] **Step 4: Run** the characterization test — it must still pass (same messages). Adjust the test only if AGX1-373 deliberately changed the tool-message wire shape; in that case assert the post-373 shape and note it. Confirm tracing now occurs by default (assert spans via a fake tracer). - -- [ ] **Step 5: Commit** `refactor(pydantic-ai): reimplement stream_pydantic_ai_events on UnifiedEmitter (default tracing)`. - ---- - -## Task 4: Route sync ACP delivery through the surface + deprecate the bespoke tracing handler - -**Files:** -- Modify: `src/agentex/lib/adk/_modules/_pydantic_ai_tracing.py` -- (Reference) the sync ACP usage pattern in the pydantic-ai docs/templates. - -- [ ] **Step 1: Deprecate the bespoke tracing handler.** Add a `DeprecationWarning` (via `warnings.warn(...)`) and a docstring note to `create_pydantic_ai_tracing_handler` / `AgentexPydanticAITracingHandler` stating the unified surface (`UnifiedEmitter`, which derives spans from the canonical stream) supersedes it. Keep the symbols importable and functional (no removal — backward compat). - -- [ ] **Step 2: Confirm the sync path.** The sync tap remains `convert_pydantic_ai_to_agentex_events`. Document (in the module docstring of `_pydantic_ai_sync.py`) the recommended sync ACP usage: - -```python -turn = PydanticAITurn(agent.run_stream_events(...), model=...) -async for event in emitter.yield_turn(turn): - yield event -``` - -No code change beyond the docstring (the sync converter already yields the canonical stream; `yield_turn` adds tracing). Add a test that `emitter.yield_turn(PydanticAITurn(...))` forwards the same events the bare converter would and derives spans. - -- [ ] **Step 3: Run** tests; **Commit** `refactor(pydantic-ai): deprecate bespoke tracing handler; document unified sync path`. - ---- - -## Task 5: pydantic-ai cross-channel conformance fixtures - -**Files:** -- Create: `tests/lib/core/harness/conformance/test_pydantic_ai_conformance.py` - -**Blocked by AGX1-373** (the cross-channel conformance runner). Once 373 is merged into the foundation branch: - -- [ ] **Step 1: Record canonical fixtures.** For 3-4 representative pydantic-ai runs (text-only; single tool; reasoning/thinking; multi-step text+tool), capture the `StreamTaskMessage*` sequence the tap produces (run `convert_pydantic_ai_to_agentex_events` over recorded `AgentStreamEvent` inputs, or hand-author the canonical sequences). Store as `Fixture(name=..., events=[...])`. - -- [ ] **Step 2: Register** each fixture with the conformance runner and let the cross-channel parametrized test (from AGX1-373) assert yield-vs-auto-send equivalence + span equivalence for each. Register/parametrize within THIS module (per the runner's documented per-module registry semantics). - -- [ ] **Step 3: Run** `./scripts/test tests/lib/core/harness/ -v` — all green. **Commit** `test(pydantic-ai): cross-channel conformance fixtures`. - ---- - -## Task 6: Three integration test agents (sync / async / temporal) - -**Files:** -- Create: `examples/tutorials/harness-pydantic-ai-sync/` , `…-async/` , `…-temporal/` (each a minimal Agentex agent). -- Modify: `.github/workflows/harness-integration.yml` (enable pydantic-ai `live-matrix` rows). -- Modify: `.github/workflows/agentex-tutorials-test.yml` if it enumerates agents. - -Each agent is the smallest agent that exercises one delivery channel through the unified surface with the pydantic-ai harness. - -- [ ] **Step 1: Scaffold from the existing templates.** Base each agent on the corresponding CLI template: `sync-pydantic-ai`, `default-pydantic-ai` (async), `temporal-pydantic-ai` (under `src/agentex/lib/cli/templates/`). In each, the message handler builds `PydanticAITurn(agent.run_stream_events(params.content.content), model=...)` and: - - sync agent: `async for ev in emitter.yield_turn(turn): yield ev` - - async + temporal agents: `await emitter.auto_send_turn(turn)` (temporal: inside the activity, as the template already structures it). - Use a tiny pydantic-ai agent with ONE trivial tool so the run exercises text + a tool call + tool response. - -- [ ] **Step 2: Write an integration test per agent** that drives it with a fixed prompt and asserts: valid ordered messages (text + tool request + tool response) and a well-formed span tree. Use the repo's existing tutorial-agent test harness pattern (see `agentex-tutorials-test.yml` and how current tutorial agents are tested). - -- [ ] **Step 3: Wire CI.** In `.github/workflows/harness-integration.yml`, replace the `if: false` placeholder `live-matrix` job (or add a real matrix) with the pydantic-ai × {sync, async, temporal} entries, each running its agent's integration test. If `agentex-tutorials-test.yml` enumerates agents, add the three there too. `log`/document any agent-type not covered (none expected for pydantic-ai). - -- [ ] **Step 4: Run** the integration tests locally (as far as the env allows) and the conformance + unit suites. **Commit** `test(pydantic-ai): sync/async/temporal integration agents + enable CI live-matrix rows`. - ---- - -## Task 7: Full suite, type check, and backward-compat audit - -- [ ] **Step 1:** `./scripts/test tests/lib/core/harness/ tests/lib/adk/ -v` — all green on 3.12 + 3.13. -- [ ] **Step 2:** `uv run pyright src/agentex/lib/` (or the harness + pydantic modules) — 0 new errors. -- [ ] **Step 3: Backward-compat audit.** Confirm the public signatures are unchanged: `convert_pydantic_ai_to_agentex_events` (only gained an optional kwarg), `stream_pydantic_ai_events` (same signature + return), `create_pydantic_ai_tracing_handler` (still importable, now warns). Grep the repo + templates for callers and confirm none broke. -- [ ] **Step 4:** If any fix was needed, **Commit** `chore(pydantic-ai): type/back-compat fixes`. - ---- - -## Self-Review checklist (run before opening the PR) - -- Every public symbol that existed before still exists with the same signature (additive-only): `convert_pydantic_ai_to_agentex_events`, `stream_pydantic_ai_events`, `create_pydantic_ai_tracing_handler`. -- The auto-send helper returns the same final text as before (characterization test passes, or the post-373 shape is asserted with a note). -- Tracing is now on by default for both channels and is overridable (emitter `tracer=False`). -- Usage normalization uses the REAL pydantic-ai usage field names (verified in Task 2 Step 1), with defensive `getattr`. -- Conformance fixtures register per-module and pass the cross-channel assertion from AGX1-373. -- 3 test agents exist and their CI rows are enabled. -- No `# type: ignore` added without justification. - -## Notes for the PR description - -- Link AGX1-373 (dependency) and AGX1-375 (import path); note AGX1-374 (reasoning/mixed-ordering auto_send tests) is foundation-level and orthogonal. -- State the diff size; if test agents pushed it over budget, note the PR 4b split. -- This is the template the langgraph (PR 5) and openai (PR 6) migrations follow. diff --git a/examples/tutorials/00_sync/030_langgraph/README.md b/examples/tutorials/00_sync/030_langgraph/README.md index e5b1db0f7..5a68792cc 100644 --- a/examples/tutorials/00_sync/030_langgraph/README.md +++ b/examples/tutorials/00_sync/030_langgraph/README.md @@ -1,43 +1,50 @@ -# Tutorial 030: Sync LangGraph Agent +# Tutorial: Sync LangGraph Agent -This tutorial demonstrates how to build a **synchronous** LangGraph agent on AgentEx with: -- Tool calling (ReAct pattern) -- Streaming token output -- Multi-turn conversation memory via AgentEx checkpointer -- Tracing integration +This tutorial demonstrates how to build a **synchronous** LangGraph agent on AgentEx +using the **unified harness surface**: -## Graph Structure +```python +turn = LangGraphTurn(stream, model=None) +emitter = UnifiedEmitter(task_id=task_id, trace_id=task_id, ...) +async for event in emitter.yield_turn(turn): + yield event +``` -![Graph](graph.png) +The `LangGraphTurn` + `UnifiedEmitter` path replaces calling the lower-level +``convert_langgraph_to_agentex_events`` helper directly. ## Key Concepts -### Sync ACP -The sync ACP model uses HTTP request/response for communication. The `@acp.on_message_send` handler receives a message and yields streaming events back to the client. +### Unified Harness + +`LangGraphTurn` implements the `HarnessTurn` protocol: it wraps the raw +LangGraph `astream()` generator and exposes `events` (an async generator of +`TaskMessageUpdate`) and `usage()` (token counts captured from the final +`AIMessage`). + +`UnifiedEmitter.yield_turn(turn)` iterates the turn's events and yields them +to the sync ACP handler unchanged. The same `LangGraphTurn` object can also be +passed to `UnifiedEmitter.auto_send_turn` in the async/temporal channels. -### LangGraph Integration -- **StateGraph**: Defines the agent's state machine with `AgentState` (message history) -- **ToolNode**: Automatically executes tool calls from the LLM -- **tools_condition**: Routes between tool execution and final response -- **Checkpointer**: Uses AgentEx's HTTP checkpointer for cross-request memory +### AGX1-377 Note -### Streaming -The agent streams tokens as they're generated using `convert_langgraph_to_agentex_events()`, which converts LangGraph's stream events into AgentEx `TaskMessageUpdate` events. +LangGraph emits tool requests as `StreamTaskMessageFull` events (from "updates" +node outputs). The `SpanDeriver` does not open tool spans from Full events +today; that gap is tracked in AGX1-373. ## Files | File | Description | |------|-------------| -| `project/acp.py` | ACP server and message handler | -| `project/graph.py` | LangGraph state graph definition | +| `project/acp.py` | ACP server using unified harness (LangGraphTurn + yield_turn) | +| `project/graph.py` | LangGraph state graph (weather example) | | `project/tools.py` | Tool definitions (weather example) | | `tests/test_agent.py` | Integration tests | -| `manifest.yaml` | Agent configuration | +| `manifest.yaml` | Agent configuration (name: s030-langgraph) | ## Running Locally ```bash -# From this directory agentex agents run ``` diff --git a/examples/tutorials/00_sync/030_langgraph/graph.png b/examples/tutorials/00_sync/030_langgraph/graph.png deleted file mode 100644 index 16d22a1e7..000000000 Binary files a/examples/tutorials/00_sync/030_langgraph/graph.png and /dev/null differ diff --git a/examples/tutorials/00_sync/030_langgraph/manifest.yaml b/examples/tutorials/00_sync/030_langgraph/manifest.yaml index bfe005626..9a52a3dce 100644 --- a/examples/tutorials/00_sync/030_langgraph/manifest.yaml +++ b/examples/tutorials/00_sync/030_langgraph/manifest.yaml @@ -17,7 +17,7 @@ local_development: agent: acp_type: sync name: s030-langgraph - description: A sync LangGraph agent with tool calling and streaming + description: A sync LangGraph agent using the unified harness surface (LangGraphTurn + UnifiedEmitter.yield_turn) temporal: enabled: false @@ -47,7 +47,7 @@ deployment: global: agent: name: "s030-langgraph" - description: "A sync LangGraph agent with tool calling and streaming" + description: "A sync LangGraph agent using the unified harness surface" replicaCount: 1 resources: requests: diff --git a/examples/tutorials/00_sync/030_langgraph/project/acp.py b/examples/tutorials/00_sync/030_langgraph/project/acp.py index 517a00322..e42b0f4ea 100644 --- a/examples/tutorials/00_sync/030_langgraph/project/acp.py +++ b/examples/tutorials/00_sync/030_langgraph/project/acp.py @@ -1,8 +1,20 @@ -""" -ACP (Agent Communication Protocol) handler for Agentex. - -This is the API layer — it manages the graph lifecycle and streams -tokens and tool calls from the LangGraph graph to the Agentex frontend. +"""ACP handler for the sync LangGraph agent. + +Uses the unified harness surface: ``LangGraphTurn`` wraps the LangGraph +``astream()`` generator, and ``UnifiedEmitter.yield_turn`` converts it into +the AgentEx ``TaskMessageUpdate`` event stream expected by the sync ACP. + +Properties of the unified surface: +- Tracing is wired through the tracing manager (no bespoke handler boilerplate). +- No manual text-delta accumulation for the span output. +- Tool calls are emitted as ``StreamTaskMessageFull`` (not Start+Delta+Done) + via the same code path as the async/temporal channels. +- Usage data (token counts) is captured on the ``LangGraphTurn`` object and + can be read after the turn completes. + +AGX1-377 note: LangGraph emits tool requests as ``StreamTaskMessageFull`` +events (from "updates"). The ``SpanDeriver`` does not open tool spans from +Full events today; that gap is tracked in AGX1-373. """ from __future__ import annotations @@ -16,29 +28,29 @@ import agentex.lib.adk as adk from project.graph import create_graph -from agentex.lib.adk import create_langgraph_tracing_handler, convert_langgraph_to_agentex_events from agentex.lib.types.acp import SendMessageParams from agentex.lib.types.tracing import SGPTracingProcessorConfig from agentex.lib.utils.logging import make_logger from agentex.lib.sdk.fastacp.fastacp import FastACP +from agentex.lib.core.harness.emitter import UnifiedEmitter from agentex.types.task_message_delta import TextDelta from agentex.types.task_message_update import TaskMessageUpdate from agentex.types.task_message_content import TaskMessageContent +from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config logger = make_logger(__name__) -# Register the Agentex tracing processor so spans are shipped to the backend add_tracing_processor_config( SGPTracingProcessorConfig( sgp_api_key=os.environ.get("SGP_API_KEY", ""), sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""), sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""), - )) -# Create ACP server + ) +) + acp = FastACP.create(acp_type="sync") -# Compiled graph (lazy-initialized on first request) _graph = None @@ -54,41 +66,42 @@ async def get_graph(): async def handle_message_send( params: SendMessageParams, ) -> TaskMessageContent | list[TaskMessageContent] | AsyncGenerator[TaskMessageUpdate, None]: - """Handle incoming messages from Agentex, streaming tokens and tool calls.""" + """Handle incoming messages, streaming tokens and tool calls via unified harness.""" graph = await get_graph() - thread_id = params.task.id + task_id = params.task.id user_message = params.content.content - logger.info(f"Processing message for thread {thread_id}") + logger.info(f"Processing message for task {task_id}") async with adk.tracing.span( - trace_id=thread_id, + trace_id=task_id, + task_id=task_id, name="message", input={"message": user_message}, data={"__span_type__": "AGENT_WORKFLOW"}, ) as turn_span: - callback = create_langgraph_tracing_handler( - trace_id=thread_id, - parent_span_id=turn_span.id if turn_span else None, - ) - stream = graph.astream( {"messages": [{"role": "user", "content": user_message}]}, - config={ - "configurable": {"thread_id": thread_id}, - "callbacks": [callback], - }, + config={"configurable": {"thread_id": task_id}}, stream_mode=["messages", "updates"], ) + turn = LangGraphTurn(stream, model=None) + emitter = UnifiedEmitter( + task_id=task_id, + trace_id=task_id, + parent_span_id=turn_span.id if turn_span else None, + ) + final_text = "" - async for event in convert_langgraph_to_agentex_events(stream): - # Accumulate text deltas for span output + async for event in emitter.yield_turn(turn): + # Accumulate text deltas so the span's final_output is the assistant + # text (matching the async tutorial), not the usage metrics. delta = getattr(event, "delta", None) if isinstance(delta, TextDelta) and delta.text_delta: final_text += delta.text_delta yield event if turn_span: - turn_span.output = {"final_output": final_text} + turn_span.output = {"final_output": final_text, "usage": turn.usage().model_dump()} diff --git a/examples/tutorials/00_sync/030_langgraph/project/graph.py b/examples/tutorials/00_sync/030_langgraph/project/graph.py index 53728cd58..6709719e5 100644 --- a/examples/tutorials/00_sync/030_langgraph/project/graph.py +++ b/examples/tutorials/00_sync/030_langgraph/project/graph.py @@ -1,8 +1,7 @@ -""" -LangGraph graph definition. +"""LangGraph graph definition for the 030_langgraph sync agent. -Defines the state, nodes, edges, and compiles the graph. -The compiled graph is the boundary between this module and the API layer. +Identical to ``030_langgraph/project/graph.py`` — the graph definition is not +affected by the harness migration. Only ``acp.py`` changes. """ from __future__ import annotations @@ -35,15 +34,12 @@ class AgentState(TypedDict): """State schema for the agent graph.""" + messages: Annotated[list[Any], add_messages] async def create_graph(): - """Create and compile the agent graph with checkpointer. - - Returns: - A compiled LangGraph StateGraph ready for invocation. - """ + """Create and compile the agent graph with checkpointer.""" llm = ChatOpenAI( model=MODEL_NAME, reasoning={"effort": "high", "summary": "auto"}, @@ -56,9 +52,7 @@ def agent_node(state: AgentState) -> dict[str, Any]: """Process the current state and generate a response.""" messages = state["messages"] if not messages or not isinstance(messages[0], SystemMessage): - system_content = SYSTEM_PROMPT.format( - timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S") - ) + system_content = SYSTEM_PROMPT.format(timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S")) messages = [SystemMessage(content=system_content)] + messages response = llm_with_tools.invoke(messages) return {"messages": [response]} diff --git a/examples/tutorials/00_sync/030_langgraph/project/tools.py b/examples/tutorials/00_sync/030_langgraph/project/tools.py index 1b402a906..b3e5dba34 100644 --- a/examples/tutorials/00_sync/030_langgraph/project/tools.py +++ b/examples/tutorials/00_sync/030_langgraph/project/tools.py @@ -1,9 +1,4 @@ -""" -Tool definitions for the LangGraph agent. - -Add your custom tools here. Each tool should be a function decorated with @tool -or created using the Tool class. -""" +"""Tool definitions for the 030_langgraph sync agent.""" from langchain_core.tools import Tool @@ -17,16 +12,13 @@ def get_weather(city: str) -> str: Returns: A string describing the weather conditions. """ - # TODO: Replace with actual weather API call return f"The weather in {city} is sunny and 72°F" -# Define tools weather_tool = Tool( name="get_weather", func=get_weather, description="Get the current weather for a city. Input should be a city name.", ) -# Export all tools as a list TOOLS = [weather_tool] diff --git a/examples/tutorials/00_sync/030_langgraph/pyproject.toml b/examples/tutorials/00_sync/030_langgraph/pyproject.toml index fc9f99971..33bea16b5 100644 --- a/examples/tutorials/00_sync/030_langgraph/pyproject.toml +++ b/examples/tutorials/00_sync/030_langgraph/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "hatchling.build" [project] name = "s030-langgraph" version = "0.1.0" -description = "A sync LangGraph agent with tool calling and streaming" +description = "A sync LangGraph agent using the unified harness surface" readme = "README.md" requires-python = ">=3.12" dependencies = [ diff --git a/examples/tutorials/00_sync/030_langgraph/tests/test_agent.py b/examples/tutorials/00_sync/030_langgraph/tests/test_agent.py index 36fcf418f..dabd83e76 100644 --- a/examples/tutorials/00_sync/030_langgraph/tests/test_agent.py +++ b/examples/tutorials/00_sync/030_langgraph/tests/test_agent.py @@ -1,14 +1,8 @@ """ -Tests for the sync LangGraph agent. +Tests for the sync harness LangGraph agent. -This test suite validates: -- Non-streaming message sending with tool-calling LangGraph agent -- Streaming message sending with token-by-token output - -To run these tests: -1. Make sure the agent is running (via docker-compose or `agentex agents run`) -2. Set the AGENTEX_API_BASE_URL environment variable if not using default -3. Run: pytest test_agent.py -v +Validates the unified harness surface (LangGraphTurn + UnifiedEmitter.yield_turn) +end-to-end against a live AgentEx server. Configuration: - AGENTEX_API_BASE_URL: Base URL for the AgentEx server (default: http://localhost:5003) @@ -25,26 +19,22 @@ from agentex.types.agent_rpc_params import ParamsCreateTaskRequest, ParamsSendMessageRequest from agentex.lib.sdk.fastacp.base.base_acp_server import uuid -# Configuration from environment variables AGENTEX_API_BASE_URL = os.environ.get("AGENTEX_API_BASE_URL", "http://localhost:5003") AGENT_NAME = os.environ.get("AGENT_NAME", "s030-langgraph") @pytest.fixture def client(): - """Create an AgentEx client instance for testing.""" return Agentex(base_url=AGENTEX_API_BASE_URL) @pytest.fixture def agent_name(): - """Return the agent name for testing.""" return AGENT_NAME @pytest.fixture def agent_id(client, agent_name): - """Retrieve the agent ID based on the agent name.""" agents = client.agents.list() for agent in agents: if agent.name == agent_name: @@ -53,10 +43,7 @@ def agent_id(client, agent_name): class TestNonStreamingMessages: - """Test non-streaming message sending with LangGraph agent.""" - def test_send_simple_message(self, client: Agentex, agent_name: str): - """Test sending a simple message and receiving a response.""" response = client.agents.send_message( agent_name=agent_name, params=ParamsSendMessageRequest( @@ -72,7 +59,6 @@ def test_send_simple_message(self, client: Agentex, agent_name: str): assert len(result) >= 1 def test_tool_calling(self, client: Agentex, agent_name: str): - """Test that the agent can use tools (e.g., weather tool).""" response = client.agents.send_message( agent_name=agent_name, params=ParamsSendMessageRequest( @@ -88,12 +74,10 @@ def test_tool_calling(self, client: Agentex, agent_name: str): assert len(result) >= 1 def test_multiturn_conversation(self, client: Agentex, agent_name: str, agent_id: str): - """Test multi-turn conversation with memory via LangGraph checkpointer.""" task_response = client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex)) task = task_response.result assert task is not None - # First message response1 = client.agents.send_message( agent_name=agent_name, params=ParamsSendMessageRequest( @@ -107,7 +91,6 @@ def test_multiturn_conversation(self, client: Agentex, agent_name: str, agent_id ) assert response1.result is not None - # Second message - agent should remember the name response2 = client.agents.send_message( agent_name=agent_name, params=ParamsSendMessageRequest( @@ -126,10 +109,7 @@ def test_multiturn_conversation(self, client: Agentex, agent_name: str, agent_id class TestStreamingMessages: - """Test streaming message sending with LangGraph agent.""" - def test_stream_simple_message(self, client: Agentex, agent_name: str): - """Test streaming a simple message response.""" stream = client.agents.send_message_stream( agent_name=agent_name, params=ParamsSendMessageRequest( @@ -140,14 +120,11 @@ def test_stream_simple_message(self, client: Agentex, agent_name: str): ) ), ) - aggregated_content, chunks = collect_streaming_response(stream) - assert aggregated_content is not None assert len(chunks) > 1, "No chunks received in streaming response." def test_stream_tool_calling(self, client: Agentex, agent_name: str): - """Test streaming with tool calls.""" stream = client.agents.send_message_stream( agent_name=agent_name, params=ParamsSendMessageRequest( @@ -158,9 +135,7 @@ def test_stream_tool_calling(self, client: Agentex, agent_name: str): ) ), ) - aggregated_content, chunks = collect_streaming_response(stream) - assert aggregated_content is not None assert len(chunks) > 0, "No chunks received in streaming response." diff --git a/examples/tutorials/00_sync/040_pydantic_ai/README.md b/examples/tutorials/00_sync/040_pydantic_ai/README.md index 02c3b57c7..ef52c7c77 100644 --- a/examples/tutorials/00_sync/040_pydantic_ai/README.md +++ b/examples/tutorials/00_sync/040_pydantic_ai/README.md @@ -1,46 +1,52 @@ -# Tutorial 040: Sync Pydantic AI Agent +# Sync Pydantic AI Agent -This tutorial demonstrates how to build a **synchronous** Pydantic AI agent on AgentEx with: -- Tool calling (Pydantic AI handles the tool loop internally) -- Streaming token output (including token-by-token tool-call argument streaming) +A minimal **synchronous** Pydantic AI agent that drives the **unified harness +surface** (`UnifiedEmitter.yield_turn` + `PydanticAITurn`) on the sync +(HTTP-yield) channel. -## Key Concepts +## Why this agent exists -### Sync ACP -The sync ACP model uses HTTP request/response for communication. The `@acp.on_message_send` handler receives a message and yields streaming events back to the client. +This agent is the sync coverage for the unified surface: it shows an agent +author wiring the sync channel through `UnifiedEmitter.yield_turn` and getting +automatic span derivation (tool spans nested under the per-turn span) for free, +exactly like the async/temporal channels. -### Pydantic AI Integration -- **Agent**: A single `pydantic_ai.Agent` that owns the model and tools. No graph required — Pydantic AI runs its own tool-call loop until the model is done. -- **`@agent.tool_plain`**: Registers a Python function as a tool. Pydantic AI infers the schema from type hints and docstring. -- **`agent.run_stream_events(...)`**: Yields `AgentStreamEvent`s (PartStartEvent / PartDeltaEvent / PartEndEvent / FunctionToolResultEvent) as the model produces them. +## How it wires the unified surface -### Streaming -The agent streams tokens and tool-call arguments as they're generated using `convert_pydantic_ai_to_agentex_events()`, which adapts Pydantic AI's stream into AgentEx `TaskMessageUpdate` events. Notably, **tool-call arguments stream as `ToolRequestDelta` tokens** rather than arriving as a single complete payload — a richer experience than what OpenAI Agents SDK currently exposes. +In `project/acp.py`: -## Files +```python +emitter = UnifiedEmitter( + task_id=task_id, + trace_id=task_id, + parent_span_id=turn_span.id if turn_span else None, +) +async with agent.run_stream_events(user_message) as stream: + turn = PydanticAITurn(stream, model=MODEL_NAME) # coalesce off: stream tool-call arg tokens + async for ev in emitter.yield_turn(turn): + yield ev +``` -| File | Description | -|------|-------------| -| `project/acp.py` | ACP server and message handler | -| `project/agent.py` | Pydantic AI agent + tool registration | -| `project/tools.py` | Tool definitions (weather example) | -| `tests/test_agent.py` | Integration tests | -| `manifest.yaml` | Agent configuration | +- `coalesce_tool_requests=False` (the default) preserves token-by-token + tool-call argument streaming on the sync channel. +- The `UnifiedEmitter` is constructed from the ACP/streaming context + (`task_id` + `trace_id` + `parent_span_id`) so tool spans nest under the + per-turn `AGENT_WORKFLOW` span automatically. -## Running Locally +## Files -```bash -# From this directory -agentex agents run -``` +- `project/acp.py` — sync ACP handler using `emitter.yield_turn(...)`. +- `project/agent.py` — builds the `pydantic_ai.Agent` with one tool. +- `project/tools.py` — `get_weather(city)` returning a constant. +- `tests/test_agent.py` — live integration test (requires a running agent). -## Running Tests +## Tools -```bash -pytest tests/test_agent.py -v -``` +- `get_weather(city: str) -> str`: returns a fixed "sunny and 72°F" string so a + run deterministically exercises text + a tool call + a tool response. -## Notes +## Offline coverage -- Multi-turn conversation memory is not wired in this tutorial. Pydantic AI does not ship a checkpointer like LangGraph; to add memory, load prior messages via `adk.messages.list(task_id=...)` and pass them to `agent.run_stream_events(..., message_history=...)`. -- Reasoning/thinking tokens are not exercised here because `gpt-4o-mini` does not emit `ThinkingPart`s. Swap to a reasoning-capable model (e.g. `openai:o1-mini` via Pydantic AI's appropriate provider) if you want to test that branch end-to-end. +Offline integration tests for the same wiring (pydantic-ai `TestModel` + fake +streaming/tracing, no network) live in the SDK repo under +`tests/lib/core/harness/` (the pydantic-ai sync suite). diff --git a/examples/tutorials/00_sync/040_pydantic_ai/manifest.yaml b/examples/tutorials/00_sync/040_pydantic_ai/manifest.yaml index 68d3b4a00..9563de39c 100644 --- a/examples/tutorials/00_sync/040_pydantic_ai/manifest.yaml +++ b/examples/tutorials/00_sync/040_pydantic_ai/manifest.yaml @@ -17,7 +17,7 @@ local_development: agent: acp_type: sync name: s040-pydantic-ai - description: A sync Pydantic AI agent with tool calling and streaming + description: A sync Pydantic AI harness test agent using the unified emitter surface temporal: enabled: false @@ -47,7 +47,7 @@ deployment: global: agent: name: "s040-pydantic-ai" - description: "A sync Pydantic AI agent with tool calling and streaming" + description: "A sync Pydantic AI harness test agent using the unified emitter surface" replicaCount: 1 resources: requests: diff --git a/examples/tutorials/00_sync/040_pydantic_ai/project/acp.py b/examples/tutorials/00_sync/040_pydantic_ai/project/acp.py index 0c096893f..f23cd7960 100644 --- a/examples/tutorials/00_sync/040_pydantic_ai/project/acp.py +++ b/examples/tutorials/00_sync/040_pydantic_ai/project/acp.py @@ -1,7 +1,17 @@ -"""ACP (Agent Communication Protocol) handler for Agentex. - -This is the API layer — it owns the agent lifecycle and streams tokens -and tool calls from the Pydantic AI agent to the Agentex frontend. +"""ACP handler for the sync harness Pydantic AI test agent. + +This agent exercises the UNIFIED HARNESS SURFACE on the sync (HTTP-yield) +channel — ``UnifiedEmitter.yield_turn(PydanticAITurn(...))`` — rather than the +bare ``convert_pydantic_ai_to_agentex_events`` converter used by the +``040_pydantic_ai`` tutorial. The unified surface gives the sync channel the +same tracing (span derivation) the async/temporal channels get for free. + +Flow: +1. Open a per-turn AGENT_WORKFLOW span via ``adk.tracing.span``. +2. Construct a ``UnifiedEmitter`` from the ACP/streaming context (task_id + + trace_id + parent_span_id) so tool spans nest under the turn span. +3. Wrap ``agent.run_stream_events(...)`` in a ``PydanticAITurn`` and forward + events with ``emitter.yield_turn(turn)`` — yielding each to the client. """ from __future__ import annotations @@ -14,17 +24,15 @@ load_dotenv() import agentex.lib.adk as adk -from project.agent import create_agent -from agentex.lib.adk import ( - create_pydantic_ai_tracing_handler, - convert_pydantic_ai_to_agentex_events, -) +from project.agent import MODEL_NAME, create_agent from agentex.lib.types.acp import SendMessageParams +from agentex.lib.core.harness import UnifiedEmitter from agentex.lib.types.tracing import SGPTracingProcessorConfig from agentex.lib.utils.logging import make_logger from agentex.lib.sdk.fastacp.fastacp import FastACP from agentex.types.task_message_update import TaskMessageUpdate from agentex.types.task_message_content import TaskMessageContent +from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config logger = make_logger(__name__) @@ -54,7 +62,7 @@ def get_agent(): async def handle_message_send( params: SendMessageParams, ) -> TaskMessageContent | list[TaskMessageContent] | AsyncGenerator[TaskMessageUpdate, None]: - """Handle incoming messages from Agentex, streaming tokens and tool calls.""" + """Handle incoming messages, streaming events through the unified surface.""" agent = get_agent() task_id = params.task.id @@ -68,11 +76,17 @@ async def handle_message_send( input={"message": user_message}, data={"__span_type__": "AGENT_WORKFLOW"}, ) as turn_span: - tracing_handler = create_pydantic_ai_tracing_handler( + # Construct the UnifiedEmitter from the ACP/streaming context so tracing + # is automatic: tool spans nest under this turn's span. + emitter = UnifiedEmitter( + task_id=task_id, trace_id=task_id, parent_span_id=turn_span.id if turn_span else None, - task_id=task_id, ) + async with agent.run_stream_events(user_message) as stream: - async for event in convert_pydantic_ai_to_agentex_events(stream, tracing_handler=tracing_handler): - yield event + # PydanticAITurn preserves token-by-token tool-call argument + # streaming (Start+Delta+Done) on the sync/HTTP channel. + turn = PydanticAITurn(stream, model=MODEL_NAME) + async for ev in emitter.yield_turn(turn): + yield ev diff --git a/examples/tutorials/00_sync/040_pydantic_ai/project/agent.py b/examples/tutorials/00_sync/040_pydantic_ai/project/agent.py index 2c0f6f10c..72fd74173 100644 --- a/examples/tutorials/00_sync/040_pydantic_ai/project/agent.py +++ b/examples/tutorials/00_sync/040_pydantic_ai/project/agent.py @@ -1,4 +1,4 @@ -"""Pydantic AI agent definition. +"""Pydantic AI agent definition for the sync harness test agent. The Agent is the boundary between this module and the API layer (acp.py). Pydantic AI handles its own tool-call loop internally — no graph required. @@ -12,6 +12,8 @@ from project.tools import get_weather +__all__ = ["create_agent", "MODEL_NAME"] + MODEL_NAME = "openai:gpt-4o-mini" SYSTEM_PROMPT = """You are a helpful AI assistant with access to tools. @@ -29,9 +31,7 @@ def create_agent() -> Agent: """Build and return the Pydantic AI agent with tools registered.""" agent = Agent( MODEL_NAME, - system_prompt=SYSTEM_PROMPT.format( - timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S") - ), + system_prompt=SYSTEM_PROMPT.format(timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S")), ) agent.tool_plain(get_weather) diff --git a/examples/tutorials/00_sync/040_pydantic_ai/project/tools.py b/examples/tutorials/00_sync/040_pydantic_ai/project/tools.py index bab87942a..d649c75f1 100644 --- a/examples/tutorials/00_sync/040_pydantic_ai/project/tools.py +++ b/examples/tutorials/00_sync/040_pydantic_ai/project/tools.py @@ -1,8 +1,8 @@ -"""Tool definitions for the Pydantic AI agent. +"""Tool definitions for the sync harness Pydantic AI agent. Pydantic AI tools are registered directly on the Agent via decorators -(see project.agent). This module hosts the bare functions so they're -easy to unit-test in isolation. +(see project.agent). This module hosts the bare function so it is easy to +unit-test in isolation. """ from __future__ import annotations diff --git a/examples/tutorials/00_sync/040_pydantic_ai/pyproject.toml b/examples/tutorials/00_sync/040_pydantic_ai/pyproject.toml index 3e645fa15..748a9f3cb 100644 --- a/examples/tutorials/00_sync/040_pydantic_ai/pyproject.toml +++ b/examples/tutorials/00_sync/040_pydantic_ai/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "hatchling.build" [project] name = "s040-pydantic-ai" version = "0.1.0" -description = "A sync Pydantic AI agent with tool calling and streaming" +description = "A sync Pydantic AI harness test agent using the unified emitter surface" readme = "README.md" requires-python = ">=3.12" dependencies = [ diff --git a/examples/tutorials/00_sync/040_pydantic_ai/tests/test_agent.py b/examples/tutorials/00_sync/040_pydantic_ai/tests/test_agent.py index d3deed1c7..4aad12a56 100644 --- a/examples/tutorials/00_sync/040_pydantic_ai/tests/test_agent.py +++ b/examples/tutorials/00_sync/040_pydantic_ai/tests/test_agent.py @@ -1,8 +1,10 @@ -"""Tests for the sync Pydantic AI agent. +"""Live tests for the sync Pydantic AI agent. -This test suite validates: -- Non-streaming message sending with tool-calling Pydantic AI agent -- Streaming message sending with token-by-token output +These tests require a running agent (server + deployed agent) and exercise the +unified-surface sync handler end-to-end over the wire. + +Offline coverage of the same wiring (TestModel + fake streaming/tracing) lives +in the SDK repo under ``tests/lib/core/harness/`` (the pydantic-ai sync suite). To run these tests: 1. Make sure the agent is running (via docker-compose or `agentex agents run`) @@ -50,7 +52,7 @@ def agent_id(client, agent_name): class TestNonStreamingMessages: - """Test non-streaming message sending with Pydantic AI agent.""" + """Test non-streaming message sending with the unified-surface sync agent.""" def test_send_simple_message(self, client: Agentex, agent_name: str): """Test sending a simple message and receiving a response.""" @@ -86,7 +88,7 @@ def test_tool_calling(self, client: Agentex, agent_name: str): class TestStreamingMessages: - """Test streaming message sending with Pydantic AI agent.""" + """Test streaming message sending through the unified yield_turn path.""" def test_stream_simple_message(self, client: Agentex, agent_name: str): """Test streaming a simple message response.""" @@ -107,10 +109,10 @@ def test_stream_simple_message(self, client: Agentex, agent_name: str): assert len(chunks) > 1, "No chunks received in streaming response." def test_stream_tool_calling(self, client: Agentex, agent_name: str): - """Test streaming with tool calls. + """Test streaming with tool calls through the unified surface. - This exercises the headline Pydantic AI converter feature: - tool-call argument tokens streaming through as ToolRequestDelta. + Exercises token-by-token tool-call argument streaming (coalesce off), + which the unified yield_turn path preserves on the sync channel. """ stream = client.agents.send_message_stream( agent_name=agent_name, diff --git a/examples/tutorials/00_sync/050_openai_agents_local_sandbox/.dockerignore b/examples/tutorials/00_sync/050_openai_agents/.dockerignore similarity index 100% rename from examples/tutorials/00_sync/050_openai_agents_local_sandbox/.dockerignore rename to examples/tutorials/00_sync/050_openai_agents/.dockerignore diff --git a/examples/tutorials/00_sync/harness_langgraph/Dockerfile b/examples/tutorials/00_sync/050_openai_agents/Dockerfile similarity index 73% rename from examples/tutorials/00_sync/harness_langgraph/Dockerfile rename to examples/tutorials/00_sync/050_openai_agents/Dockerfile index 9d492198f..c9ccd6f54 100644 --- a/examples/tutorials/00_sync/harness_langgraph/Dockerfile +++ b/examples/tutorials/00_sync/050_openai_agents/Dockerfile @@ -23,16 +23,16 @@ RUN uv pip install --system --upgrade pip setuptools wheel ENV UV_HTTP_TIMEOUT=1000 # Copy pyproject.toml and README.md to install dependencies -COPY 00_sync/harness_langgraph/pyproject.toml /app/harness_langgraph/pyproject.toml -COPY 00_sync/harness_langgraph/README.md /app/harness_langgraph/README.md +COPY 00_sync/050_openai_agents/pyproject.toml /app/050_openai_agents/pyproject.toml +COPY 00_sync/050_openai_agents/README.md /app/050_openai_agents/README.md -WORKDIR /app/harness_langgraph +WORKDIR /app/050_openai_agents # Copy the project code -COPY 00_sync/harness_langgraph/project /app/harness_langgraph/project +COPY 00_sync/050_openai_agents/project /app/050_openai_agents/project # Copy the test files -COPY 00_sync/harness_langgraph/tests /app/harness_langgraph/tests +COPY 00_sync/050_openai_agents/tests /app/050_openai_agents/tests # Copy shared test utilities COPY test_utils /app/test_utils @@ -44,7 +44,7 @@ RUN uv pip install --system .[dev] ENV PYTHONPATH=/app # Set test environment variables -ENV AGENT_NAME=s-harness-langgraph +ENV AGENT_NAME=s050-openai-agents # Run the agent using uvicorn CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] diff --git a/examples/tutorials/00_sync/060_harness_openai/README.md b/examples/tutorials/00_sync/050_openai_agents/README.md similarity index 85% rename from examples/tutorials/00_sync/060_harness_openai/README.md rename to examples/tutorials/00_sync/050_openai_agents/README.md index e22e9aa8b..98cec3f9a 100644 --- a/examples/tutorials/00_sync/060_harness_openai/README.md +++ b/examples/tutorials/00_sync/050_openai_agents/README.md @@ -9,8 +9,8 @@ The OpenAI Agents SDK produces native streaming events. This tutorial wraps a `Runner.run_streamed` result in an `OpenAITurn` — the provider -> canonical `StreamTaskMessage*` adapter — and forwards the canonical stream to the frontend via `UnifiedEmitter.yield_turn`. The same `OpenAITurn` flows unchanged through -`auto_send_turn` in the async (`130_harness_openai`) and temporal -(`140_harness_openai`) variants; only the delivery method differs. +`auto_send_turn` in the async (`10_async/00_base/120_openai_agents`) and temporal +(`10_async/10_temporal/120_openai_agents`) variants; only the delivery method differs. ```python result = Runner.run_streamed(starting_agent=agent, input=user_message) diff --git a/examples/tutorials/00_sync/060_harness_openai/manifest.yaml b/examples/tutorials/00_sync/050_openai_agents/manifest.yaml similarity index 84% rename from examples/tutorials/00_sync/060_harness_openai/manifest.yaml rename to examples/tutorials/00_sync/050_openai_agents/manifest.yaml index 4967c1f8d..bdb47e8d8 100644 --- a/examples/tutorials/00_sync/060_harness_openai/manifest.yaml +++ b/examples/tutorials/00_sync/050_openai_agents/manifest.yaml @@ -2,10 +2,10 @@ build: context: root: ../../ include_paths: - - 00_sync/060_harness_openai + - 00_sync/050_openai_agents - test_utils - dockerfile: 00_sync/060_harness_openai/Dockerfile - dockerignore: 00_sync/060_harness_openai/.dockerignore + dockerfile: 00_sync/050_openai_agents/Dockerfile + dockerignore: 00_sync/050_openai_agents/.dockerignore local_development: agent: @@ -16,7 +16,7 @@ local_development: agent: acp_type: sync - name: s060-harness-openai + name: s050-openai-agents description: A sync OpenAI Agents SDK agent on the unified harness surface temporal: @@ -46,7 +46,7 @@ deployment: global: agent: - name: "s060-harness-openai" + name: "s050-openai-agents" description: "A sync OpenAI Agents SDK agent on the unified harness surface" replicaCount: 1 resources: diff --git a/examples/tutorials/00_sync/050_openai_agents_local_sandbox/project/__init__.py b/examples/tutorials/00_sync/050_openai_agents/project/__init__.py similarity index 100% rename from examples/tutorials/00_sync/050_openai_agents_local_sandbox/project/__init__.py rename to examples/tutorials/00_sync/050_openai_agents/project/__init__.py diff --git a/examples/tutorials/00_sync/060_harness_openai/project/acp.py b/examples/tutorials/00_sync/050_openai_agents/project/acp.py similarity index 100% rename from examples/tutorials/00_sync/060_harness_openai/project/acp.py rename to examples/tutorials/00_sync/050_openai_agents/project/acp.py diff --git a/examples/tutorials/00_sync/060_harness_openai/project/agent.py b/examples/tutorials/00_sync/050_openai_agents/project/agent.py similarity index 100% rename from examples/tutorials/00_sync/060_harness_openai/project/agent.py rename to examples/tutorials/00_sync/050_openai_agents/project/agent.py diff --git a/examples/tutorials/00_sync/060_harness_openai/project/tools.py b/examples/tutorials/00_sync/050_openai_agents/project/tools.py similarity index 100% rename from examples/tutorials/00_sync/060_harness_openai/project/tools.py rename to examples/tutorials/00_sync/050_openai_agents/project/tools.py diff --git a/examples/tutorials/00_sync/060_harness_openai/pyproject.toml b/examples/tutorials/00_sync/050_openai_agents/pyproject.toml similarity index 95% rename from examples/tutorials/00_sync/060_harness_openai/pyproject.toml rename to examples/tutorials/00_sync/050_openai_agents/pyproject.toml index 39cceb8f2..48d2481dd 100644 --- a/examples/tutorials/00_sync/060_harness_openai/pyproject.toml +++ b/examples/tutorials/00_sync/050_openai_agents/pyproject.toml @@ -3,7 +3,7 @@ requires = ["hatchling"] build-backend = "hatchling.build" [project] -name = "s060-harness-openai" +name = "s050-openai-agents" version = "0.1.0" description = "A sync OpenAI Agents SDK agent on the unified harness surface" readme = "README.md" diff --git a/examples/tutorials/00_sync/060_harness_openai/tests/test_agent.py b/examples/tutorials/00_sync/050_openai_agents/tests/test_agent.py similarity index 100% rename from examples/tutorials/00_sync/060_harness_openai/tests/test_agent.py rename to examples/tutorials/00_sync/050_openai_agents/tests/test_agent.py diff --git a/examples/tutorials/00_sync/050_openai_agents_local_sandbox/Dockerfile b/examples/tutorials/00_sync/050_openai_agents_local_sandbox/Dockerfile deleted file mode 100644 index 8e0ec22df..000000000 --- a/examples/tutorials/00_sync/050_openai_agents_local_sandbox/Dockerfile +++ /dev/null @@ -1,50 +0,0 @@ -# syntax=docker/dockerfile:1.3 -FROM python:3.12-slim -COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ - -# Install system dependencies -RUN apt-get update && apt-get install -y \ - htop \ - vim \ - curl \ - tar \ - python3-dev \ - postgresql-client \ - build-essential \ - libpq-dev \ - gcc \ - cmake \ - netcat-openbsd \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - -RUN uv pip install --system --upgrade pip setuptools wheel - -ENV UV_HTTP_TIMEOUT=1000 - -# Copy pyproject.toml and README.md to install dependencies -COPY 00_sync/050_openai_agents_local_sandbox/pyproject.toml /app/050_openai_agents_local_sandbox/pyproject.toml -COPY 00_sync/050_openai_agents_local_sandbox/README.md /app/050_openai_agents_local_sandbox/README.md - -WORKDIR /app/050_openai_agents_local_sandbox - -# Copy the project code -COPY 00_sync/050_openai_agents_local_sandbox/project /app/050_openai_agents_local_sandbox/project - -# Copy the test files -COPY 00_sync/050_openai_agents_local_sandbox/tests /app/050_openai_agents_local_sandbox/tests - -# Copy shared test utilities -COPY test_utils /app/test_utils - -# Install the required Python packages with dev dependencies -RUN uv pip install --system .[dev] - -# Set environment variables -ENV PYTHONPATH=/app - -# Set test environment variables -ENV AGENT_NAME=s050-openai-agents-local-sandbox - -# Run the agent using uvicorn -CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] diff --git a/examples/tutorials/00_sync/050_openai_agents_local_sandbox/README.md b/examples/tutorials/00_sync/050_openai_agents_local_sandbox/README.md deleted file mode 100644 index 9c2c81d7d..000000000 --- a/examples/tutorials/00_sync/050_openai_agents_local_sandbox/README.md +++ /dev/null @@ -1,113 +0,0 @@ -# Tutorial 050: Sync OpenAI Agents SDK with a Local Sandbox - -This tutorial demonstrates how to build a **synchronous** agent on AgentEx using the -[OpenAI Agents SDK](https://developers.openai.com/api/docs/guides/agents) and its -**sandbox** runtime, running with the **local** (`unix_local`) backend. - -The agent is a "local sandbox assistant": it answers questions by actually running -real shell commands (e.g. `python3 --version`, `ls /tmp`, `python3 -c "..."`) -instead of guessing. - -## Key Concepts - -### Sync ACP -The sync ACP model uses HTTP request/response for communication. The -`@acp.on_message_send` handler receives a message, runs the agent, and returns the -agent's final answer as a `TextContent`. - -### OpenAI Agents SDK Sandbox -The OpenAI Agents SDK ships `agents.sandbox`, which lets you give an agent -**capabilities** (instead of hand-written tools) that the runtime turns into real -tools backed by a sandbox: - -- **`SandboxAgent`**: an `Agent` that is granted sandbox capabilities. -- **Capabilities** (`from agents.sandbox.capabilities import Shell, Filesystem, Memory`): - each capability expands into a set of real tools. This tutorial uses `Shell`, which - lets the model run real shell commands. -- **`SandboxRunConfig`** + a sandbox **client**: tells the runtime *where* the tools - actually execute. - -### The LOCAL sandbox (`UnixLocalSandboxClient`) -This tutorial uses the local backend -(`from agents.sandbox.sandboxes.unix_local import UnixLocalSandboxClient, UnixLocalSandboxClientOptions`), -`backend_id="unix_local"`. The local sandbox runs shell commands **ON THE HOST** — -the agent's own container/process. There is **no Docker, no Temporal, and no remote -sandbox infrastructure** involved. This makes it the simplest way to give an agent a -real shell. - -The sandbox is wired up through the SDK's `RunConfig`: - -```python -from agents import Runner, set_tracing_disabled -from agents.run_config import RunConfig -from agents.sandbox import SandboxAgent, SandboxRunConfig -from agents.sandbox.capabilities import Shell -from agents.sandbox.sandboxes.unix_local import ( - UnixLocalSandboxClient, - UnixLocalSandboxClientOptions, -) - -set_tracing_disabled(True) # avoid api.openai.com tracing 401 behind a gateway - -agent = SandboxAgent( - name="Local Sandbox Assistant", - instructions="...use the shell tools to actually run commands...", - capabilities=[Shell()], -) -run_config = RunConfig( - sandbox=SandboxRunConfig( - client=UnixLocalSandboxClient(), - options=UnixLocalSandboxClientOptions(), - ) -) -result = await Runner.run(agent, input="what's the python version?", run_config=run_config) -print(result.final_output) -``` - -`Runner.run` drives the full tool-call loop internally: the model issues shell -commands, the local sandbox runs them on the host, the output is fed back, and the -loop continues until the model produces a final answer. - -## Files - -| File | Description | -|------|-------------| -| `project/acp.py` | ACP server and message handler (runs the sandbox agent) | -| `project/agent.py` | `SandboxAgent` + `RunConfig(sandbox=...)` wiring + `run_agent` | -| `project/tools.py` | Sandbox capability factory (`Shell`) | -| `tests/test_agent.py` | Integration tests | -| `manifest.yaml` | Agent configuration | - -## Running Locally - -```bash -# From this directory -agentex agents run -``` - -Set `OPENAI_API_KEY` (or `LITELLM_API_KEY` if you're behind the Scale LiteLLM -gateway) in your environment or in a `.env` file in `project/` so the agent can call -the model. - -## Running Tests - -```bash -pytest tests/test_agent.py -v -``` - -## Notes - -- **No infra required.** Because this uses the `unix_local` backend, the shell tools - run directly in the agent's process — no Docker daemon, no Temporal, no remote - sandbox. Swap the client for a remote/containerized backend to isolate execution. -- **Tracing.** `set_tracing_disabled(True)` turns off the OpenAI Agents SDK's native - tracer (which would otherwise try to ship traces to `api.openai.com`). The manifest - also sets `OPENAI_AGENTS_DISABLE_TRACING=1`. AgentEx/SGP tracing still runs via the - tracing manager configured in `acp.py` when SGP credentials are present. -- **Capabilities are the tools.** To let the agent do more, add capabilities in - `project/tools.py` (e.g. `Filesystem()`, `Memory()`). - -## Further Reading - -- OpenAI Agents SDK guide: https://developers.openai.com/api/docs/guides/agents -- The next evolution of the Agents SDK: https://openai.com/index/the-next-evolution-of-the-agents-sdk/ diff --git a/examples/tutorials/00_sync/050_openai_agents_local_sandbox/manifest.yaml b/examples/tutorials/00_sync/050_openai_agents_local_sandbox/manifest.yaml deleted file mode 100644 index 8ae5b98a1..000000000 --- a/examples/tutorials/00_sync/050_openai_agents_local_sandbox/manifest.yaml +++ /dev/null @@ -1,61 +0,0 @@ -build: - context: - root: ../../ - include_paths: - - 00_sync/050_openai_agents_local_sandbox - - test_utils - dockerfile: 00_sync/050_openai_agents_local_sandbox/Dockerfile - dockerignore: 00_sync/050_openai_agents_local_sandbox/.dockerignore - -local_development: - agent: - port: 8000 - host_address: host.docker.internal - paths: - acp: project/acp.py - -agent: - acp_type: sync - name: s050-openai-agents-local-sandbox - description: A sync OpenAI Agents SDK agent using a local (unix_local) sandbox - - temporal: - enabled: false - - credentials: - - env_var_name: OPENAI_API_KEY - secret_name: openai-api-key - secret_key: api-key - - env_var_name: REDIS_URL - secret_name: redis-url-secret - secret_key: url - - env_var_name: SGP_API_KEY - secret_name: sgp-api-key - secret_key: api-key - - env_var_name: SGP_ACCOUNT_ID - secret_name: sgp-account-id - secret_key: account-id - - env_var_name: SGP_CLIENT_BASE_URL - secret_name: sgp-client-base-url - secret_key: url - - env: - OPENAI_AGENTS_DISABLE_TRACING: "1" - -deployment: - image: - repository: "" - tag: "latest" - - global: - agent: - name: "s050-openai-agents-local-sandbox" - description: "A sync OpenAI Agents SDK agent using a local (unix_local) sandbox" - replicaCount: 1 - resources: - requests: - cpu: "500m" - memory: "1Gi" - limits: - cpu: "1000m" - memory: "2Gi" diff --git a/examples/tutorials/00_sync/050_openai_agents_local_sandbox/project/acp.py b/examples/tutorials/00_sync/050_openai_agents_local_sandbox/project/acp.py deleted file mode 100644 index 005d679bf..000000000 --- a/examples/tutorials/00_sync/050_openai_agents_local_sandbox/project/acp.py +++ /dev/null @@ -1,77 +0,0 @@ -"""ACP (Agent Communication Protocol) handler for Agentex. - -This is the API layer — it owns the agent lifecycle and runs the OpenAI Agents -SDK *sandbox* agent for each incoming message, returning the agent's final -answer to the Agentex frontend. - -The agent uses the LOCAL sandbox backend (``UnixLocalSandboxClient``), which runs -shell commands on the host (this process/container). The OpenAI Agents SDK runs -its tool-call loop internally via ``Runner.run`` and returns the final output, so -this sync handler returns a single ``TextContent`` rather than streaming tokens. -""" - -from __future__ import annotations - -import os - -from dotenv import load_dotenv - -load_dotenv() - -from agentex.lib import adk -from project.agent import run_agent -from agentex.lib.types.acp import SendMessageParams -from agentex.lib.types.tracing import SGPTracingProcessorConfig -from agentex.lib.utils.logging import make_logger -from agentex.types.text_content import TextContent -from agentex.lib.sdk.fastacp.fastacp import FastACP -from agentex.types.task_message_content import TaskMessageContent -from agentex.lib.core.tracing.tracing_processor_manager import ( - add_tracing_processor_config, -) - -logger = make_logger(__name__) - -# LiteLLM proxy auth: copy LITELLM_API_KEY to OPENAI_API_KEY for OpenAI client -# compatibility, so the same example works behind the Scale LiteLLM gateway. -_litellm_key = os.environ.get("LITELLM_API_KEY") -if _litellm_key and not os.environ.get("OPENAI_API_KEY"): - os.environ["OPENAI_API_KEY"] = _litellm_key - -SGP_API_KEY = os.environ.get("SGP_API_KEY", "") -SGP_ACCOUNT_ID = os.environ.get("SGP_ACCOUNT_ID", "") -SGP_CLIENT_BASE_URL = os.environ.get("SGP_CLIENT_BASE_URL", "") - -if SGP_API_KEY and SGP_ACCOUNT_ID: - add_tracing_processor_config( - SGPTracingProcessorConfig( - sgp_api_key=SGP_API_KEY, - sgp_account_id=SGP_ACCOUNT_ID, - sgp_base_url=SGP_CLIENT_BASE_URL, - ) - ) - -acp = FastACP.create(acp_type="sync") - - -@acp.on_message_send -async def handle_message_send( - params: SendMessageParams, -) -> TaskMessageContent: - """Handle incoming messages by running the local-sandbox agent.""" - task_id = params.task.id - user_message = params.content.content - logger.info(f"Processing message for task {task_id}") - - async with adk.tracing.span( - trace_id=task_id, - task_id=task_id, - name="message", - input={"message": user_message}, - data={"__span_type__": "AGENT_WORKFLOW"}, - ) as turn_span: - final_output = await run_agent(user_message) - if turn_span: - turn_span.output = {"final_output": final_output} - - return TextContent(author="agent", content=final_output) diff --git a/examples/tutorials/00_sync/050_openai_agents_local_sandbox/project/agent.py b/examples/tutorials/00_sync/050_openai_agents_local_sandbox/project/agent.py deleted file mode 100644 index d674d14c9..000000000 --- a/examples/tutorials/00_sync/050_openai_agents_local_sandbox/project/agent.py +++ /dev/null @@ -1,92 +0,0 @@ -"""OpenAI Agents SDK local-sandbox agent definition. - -This mirrors the Pydantic AI tutorial (040): the agent is the boundary between -this module and the API layer (acp.py). The difference is the runtime — here we -use the OpenAI Agents SDK ``SandboxAgent`` together with the **local** sandbox -backend (``UnixLocalSandboxClient``). - -The local sandbox runs shell commands ON THE HOST — the agent's own -container/process. There is no Docker, no Temporal, and no remote sandbox -infrastructure. The OpenAI Agents SDK runs its own tool-call loop internally: -when the model decides to run a shell command, the sandbox executes it locally -and feeds the output back to the model until it produces a final answer. -""" - -from __future__ import annotations - -from datetime import datetime - -from agents import Runner, set_tracing_disabled -from agents.sandbox import SandboxAgent, SandboxRunConfig -from agents.run_config import RunConfig -from agents.sandbox.sandboxes.unix_local import ( - UnixLocalSandboxClient, - UnixLocalSandboxClientOptions, -) - -from project.tools import get_capabilities - -# Disable the openai-agents SDK's native tracer so it doesn't ship traces to -# api.openai.com using OPENAI_API_KEY (which may be a gateway/proxy key and would -# 401). Agentex tracing still runs via the tracing manager configured in acp.py. -set_tracing_disabled(True) - -MODEL_NAME = "gpt-4o-mini" -INSTRUCTIONS = """You are a local sandbox assistant. - -Current date and time: {timestamp} - -You have access to shell tools that run real commands on the local machine. - -Guidelines: -- ALWAYS use the shell tools to actually run commands — never guess or make up - output. If the user asks for the Python version, run `python3 --version`. If - they ask to list files, run `ls`. If they ask you to compute something, use - `python3 -c "..."`. -- Run the minimal command(s) needed to answer the question. -- Report the real command output back to the user, concisely. -""" - - -def create_agent() -> SandboxAgent: - """Build and return the OpenAI Agents SDK sandbox agent. - - The agent is granted shell capabilities (see ``project.tools``). The actual - sandbox backend (where the shell commands run) is supplied at run time via - the ``RunConfig`` returned by ``create_run_config``. - """ - return SandboxAgent( - name="Local Sandbox Assistant", - model=MODEL_NAME, - instructions=INSTRUCTIONS.format( - timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S") - ), - capabilities=get_capabilities(), - ) - - -def create_run_config() -> RunConfig: - """Build the RunConfig that points the agent at the LOCAL sandbox backend. - - ``UnixLocalSandboxClient`` (backend_id="unix_local") runs shell commands on - the host — the agent's own process — so no Docker or remote infra is needed. - """ - return RunConfig( - sandbox=SandboxRunConfig( - client=UnixLocalSandboxClient(), - options=UnixLocalSandboxClientOptions(), - ) - ) - - -async def run_agent(user_message: str) -> str: - """Run the sandbox agent on a single user message and return the final text. - - The OpenAI Agents SDK handles the full tool-call loop internally: the model - issues shell commands, the local sandbox runs them on the host, and the - output is fed back until the model produces a final answer. - """ - agent = create_agent() - run_config = create_run_config() - result = await Runner.run(agent, input=user_message, run_config=run_config, max_turns=10) - return result.final_output diff --git a/examples/tutorials/00_sync/050_openai_agents_local_sandbox/project/tools.py b/examples/tutorials/00_sync/050_openai_agents_local_sandbox/project/tools.py deleted file mode 100644 index 0ad8f25ac..000000000 --- a/examples/tutorials/00_sync/050_openai_agents_local_sandbox/project/tools.py +++ /dev/null @@ -1,29 +0,0 @@ -"""Sandbox capabilities for the OpenAI Agents SDK local-sandbox agent. - -Unlike the Pydantic AI tutorial (040), this agent does not register hand-written -Python functions as tools. Instead it is given *capabilities* — the OpenAI Agents -SDK sandbox runtime turns each capability into a real set of tools (run a shell -command, read a file, etc.) backed by an actual sandbox backend. - -Here we use the ``Shell`` capability, which lets the model run real shell commands. -With the local (``unix_local``) backend those commands execute ON THE HOST — the -agent's own process/container — so there is no Docker, Temporal, or remote infra -involved. This module hosts the capability factory so the agent wiring in -``project.agent`` stays readable and the capability set is easy to extend -(e.g. add ``Filesystem()`` or ``Memory()``). -""" - -from __future__ import annotations - -from agents.sandbox.capabilities import Shell - - -def get_capabilities() -> list: - """Return the sandbox capabilities the agent is allowed to use. - - Returns: - A list of OpenAI Agents SDK sandbox capabilities. We grant ``Shell`` so - the agent can run real shell commands on the local machine. Add - ``Filesystem()`` or ``Memory()`` here to expand what the agent can do. - """ - return [Shell()] diff --git a/examples/tutorials/00_sync/050_openai_agents_local_sandbox/pyproject.toml b/examples/tutorials/00_sync/050_openai_agents_local_sandbox/pyproject.toml deleted file mode 100644 index 472a6bef7..000000000 --- a/examples/tutorials/00_sync/050_openai_agents_local_sandbox/pyproject.toml +++ /dev/null @@ -1,36 +0,0 @@ -[build-system] -requires = ["hatchling"] -build-backend = "hatchling.build" - -[project] -name = "s050-openai-agents-local-sandbox" -version = "0.1.0" -description = "A sync OpenAI Agents SDK agent using a local (unix_local) sandbox" -readme = "README.md" -requires-python = ">=3.12" -dependencies = [ - "agentex-sdk", - "scale-gp", - "openai-agents>=0.14.3,<0.15", -] - -[project.optional-dependencies] -dev = [ - "pytest", - "pytest-asyncio", - "httpx", - "black", - "isort", - "flake8", -] - -[tool.hatch.build.targets.wheel] -packages = ["project"] - -[tool.black] -line-length = 88 -target-version = ['py312'] - -[tool.isort] -profile = "black" -line_length = 88 diff --git a/examples/tutorials/00_sync/050_openai_agents_local_sandbox/tests/test_agent.py b/examples/tutorials/00_sync/050_openai_agents_local_sandbox/tests/test_agent.py deleted file mode 100644 index 52ed1bf2f..000000000 --- a/examples/tutorials/00_sync/050_openai_agents_local_sandbox/tests/test_agent.py +++ /dev/null @@ -1,148 +0,0 @@ -"""Tests for the sync OpenAI Agents SDK local-sandbox agent. - -This test suite validates: -- Sending a message that requires the agent to actually run a shell command in - the LOCAL sandbox (unix_local backend) and receiving a non-empty response. - -To run these tests: -1. Make sure the agent is running (via docker-compose or `agentex agents run`) -2. Set the AGENTEX_API_BASE_URL environment variable if not using default -3. Run: pytest test_agent.py -v - -Configuration: -- AGENTEX_API_BASE_URL: Base URL for the AgentEx server (default: http://localhost:5003) -- AGENT_NAME: Name of the agent to test (default: s050-openai-agents-local-sandbox) -""" - -import os - -import pytest -from test_utils.sync import validate_text_in_string - -from agentex import Agentex -from agentex.types import TextContentParam -from agentex.types.agent_rpc_params import ParamsSendMessageRequest - -AGENTEX_API_BASE_URL = os.environ.get("AGENTEX_API_BASE_URL", "http://localhost:5003") -AGENT_NAME = os.environ.get("AGENT_NAME", "s050-openai-agents-local-sandbox") - - -@pytest.fixture -def client(): - """Create an AgentEx client instance for testing.""" - return Agentex(base_url=AGENTEX_API_BASE_URL) - - -@pytest.fixture -def agent_name(): - """Return the agent name for testing.""" - return AGENT_NAME - - -@pytest.fixture -def agent_id(client, agent_name): - """Retrieve the agent ID based on the agent name.""" - agents = client.agents.list() - for agent in agents: - if agent.name == agent_name: - return agent.id - raise ValueError(f"Agent with name {agent_name} not found.") - - -def _response_text(result) -> str: - """Flatten a send_message result into a single string for assertions. - - Result items may be a bare string, a ``TextContent`` (``.content`` is the - string), or a ``TaskMessage`` wrapping a ``TextContent`` (``.content`` is the - ``TextContent``, whose ``.content`` is the string). Dig through ``.content`` - until we reach a string. - """ - - def _text_of(obj, _depth: int = 0) -> str: - if isinstance(obj, str): - return obj - if _depth > 5: - return "" - inner = getattr(obj, "content", None) - if inner is None: - return "" - return _text_of(inner, _depth + 1) - - parts = [t for t in (_text_of(item) for item in result) if t] - return "\n".join(parts) - - -class TestLocalSandboxMessages: - """Test the local-sandbox OpenAI Agents SDK agent.""" - - def test_send_simple_message(self, client: Agentex, agent_name: str): - """Test sending a simple message and receiving a response.""" - response = client.agents.send_message( - agent_name=agent_name, - params=ParamsSendMessageRequest( - content=TextContentParam( - author="user", - content="Hello! What can you help me with?", - type="text", - ) - ), - ) - result = response.result - assert result is not None - assert len(result) >= 1 - - def test_shell_python_version(self, client: Agentex, agent_name: str): - """Test that the agent uses its shell to run a real command. - - We ask it to print the Python version. The agent should run - `python3 --version` in the local sandbox and report the real output, - which always starts with "Python 3". - """ - response = client.agents.send_message( - agent_name=agent_name, - params=ParamsSendMessageRequest( - content=TextContentParam( - author="user", - content=( - "Use your shell to print the Python version on this " - "machine, then tell me what it is." - ), - type="text", - ) - ), - ) - result = response.result - assert result is not None - assert len(result) >= 1 - - text = _response_text(result) - assert text, "Expected a non-empty response from the sandbox agent." - # The sandbox runs on Python 3.12, so the real output contains "Python 3". - validate_text_in_string("Python 3", text) - - def test_shell_compute(self, client: Agentex, agent_name: str): - """Test that the agent uses python3 in the sandbox to compute a value.""" - response = client.agents.send_message( - agent_name=agent_name, - params=ParamsSendMessageRequest( - content=TextContentParam( - author="user", - content=( - "Use python3 in your shell to compute 21 * 2 and tell me " - "the result." - ), - type="text", - ) - ), - ) - result = response.result - assert result is not None - assert len(result) >= 1 - - text = _response_text(result) - assert text, "Expected a non-empty response from the sandbox agent." - validate_text_in_string("42", text) - - -if __name__ == "__main__": - pytest.main([__file__, "-v"]) diff --git a/examples/tutorials/00_sync/060_harness_openai/Dockerfile b/examples/tutorials/00_sync/060_harness_openai/Dockerfile deleted file mode 100644 index 1bd4f4860..000000000 --- a/examples/tutorials/00_sync/060_harness_openai/Dockerfile +++ /dev/null @@ -1,50 +0,0 @@ -# syntax=docker/dockerfile:1.3 -FROM python:3.12-slim -COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ - -# Install system dependencies -RUN apt-get update && apt-get install -y \ - htop \ - vim \ - curl \ - tar \ - python3-dev \ - postgresql-client \ - build-essential \ - libpq-dev \ - gcc \ - cmake \ - netcat-openbsd \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - -RUN uv pip install --system --upgrade pip setuptools wheel - -ENV UV_HTTP_TIMEOUT=1000 - -# Copy pyproject.toml and README.md to install dependencies -COPY 00_sync/060_harness_openai/pyproject.toml /app/060_harness_openai/pyproject.toml -COPY 00_sync/060_harness_openai/README.md /app/060_harness_openai/README.md - -WORKDIR /app/060_harness_openai - -# Copy the project code -COPY 00_sync/060_harness_openai/project /app/060_harness_openai/project - -# Copy the test files -COPY 00_sync/060_harness_openai/tests /app/060_harness_openai/tests - -# Copy shared test utilities -COPY test_utils /app/test_utils - -# Install the required Python packages with dev dependencies -RUN uv pip install --system .[dev] - -# Set environment variables -ENV PYTHONPATH=/app - -# Set test environment variables -ENV AGENT_NAME=s060-harness-openai - -# Run the agent using uvicorn -CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] diff --git a/examples/tutorials/00_sync/060_harness_openai/.dockerignore b/examples/tutorials/00_sync/070_codex/.dockerignore similarity index 100% rename from examples/tutorials/00_sync/060_harness_openai/.dockerignore rename to examples/tutorials/00_sync/070_codex/.dockerignore diff --git a/examples/tutorials/00_sync/harness_codex/Dockerfile b/examples/tutorials/00_sync/070_codex/Dockerfile similarity index 74% rename from examples/tutorials/00_sync/harness_codex/Dockerfile rename to examples/tutorials/00_sync/070_codex/Dockerfile index 72713b95d..75abf677d 100644 --- a/examples/tutorials/00_sync/harness_codex/Dockerfile +++ b/examples/tutorials/00_sync/070_codex/Dockerfile @@ -23,16 +23,16 @@ RUN uv pip install --system --upgrade pip setuptools wheel ENV UV_HTTP_TIMEOUT=1000 # Copy pyproject.toml and README.md to install dependencies -COPY 00_sync/harness_codex/pyproject.toml /app/harness_codex/pyproject.toml -COPY 00_sync/harness_codex/README.md /app/harness_codex/README.md +COPY 00_sync/070_codex/pyproject.toml /app/070_codex/pyproject.toml +COPY 00_sync/070_codex/README.md /app/070_codex/README.md -WORKDIR /app/harness_codex +WORKDIR /app/070_codex # Copy the project code -COPY 00_sync/harness_codex/project /app/harness_codex/project +COPY 00_sync/070_codex/project /app/070_codex/project # Copy the test files -COPY 00_sync/harness_codex/tests /app/harness_codex/tests +COPY 00_sync/070_codex/tests /app/070_codex/tests # Copy shared test utilities COPY test_utils /app/test_utils @@ -44,7 +44,7 @@ RUN uv pip install --system .[dev] ENV PYTHONPATH=/app # Set test environment variables -ENV AGENT_NAME=s-harness-codex +ENV AGENT_NAME=s070-codex # Run the agent using uvicorn CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] diff --git a/examples/tutorials/00_sync/harness_codex/README.md b/examples/tutorials/00_sync/070_codex/README.md similarity index 95% rename from examples/tutorials/00_sync/harness_codex/README.md rename to examples/tutorials/00_sync/070_codex/README.md index 5f3396cfa..3abb2766f 100644 --- a/examples/tutorials/00_sync/harness_codex/README.md +++ b/examples/tutorials/00_sync/070_codex/README.md @@ -1,4 +1,4 @@ -# harness_codex (sync) +# 070_codex (sync) Tutorial agent demonstrating the `convert_codex_to_agentex_events` tap, `CodexTurn`, and `UnifiedEmitter` for a **sync** (HTTP-yield) ACP agent. @@ -27,7 +27,7 @@ The offline tests inject a fake subprocess and never invoke the real CLI: ```bash cd /path/to/scale-agentex-python -uv run --all-packages --all-extras pytest examples/tutorials/00_sync/harness_codex/tests/test_agent.py -q +uv run --all-packages --all-extras pytest examples/tutorials/00_sync/070_codex/tests/test_agent.py -q ``` ## Running live integration tests diff --git a/examples/tutorials/00_sync/harness_codex/conftest.py b/examples/tutorials/00_sync/070_codex/conftest.py similarity index 100% rename from examples/tutorials/00_sync/harness_codex/conftest.py rename to examples/tutorials/00_sync/070_codex/conftest.py diff --git a/examples/tutorials/00_sync/harness_codex/manifest.yaml b/examples/tutorials/00_sync/070_codex/manifest.yaml similarity index 86% rename from examples/tutorials/00_sync/harness_codex/manifest.yaml rename to examples/tutorials/00_sync/070_codex/manifest.yaml index 52943f8f2..87dad2847 100644 --- a/examples/tutorials/00_sync/harness_codex/manifest.yaml +++ b/examples/tutorials/00_sync/070_codex/manifest.yaml @@ -2,10 +2,10 @@ build: context: root: ../../ include_paths: - - 00_sync/harness_codex + - 00_sync/070_codex - test_utils - dockerfile: 00_sync/harness_codex/Dockerfile - dockerignore: 00_sync/harness_codex/.dockerignore + dockerfile: 00_sync/070_codex/Dockerfile + dockerignore: 00_sync/070_codex/.dockerignore local_development: agent: @@ -16,7 +16,7 @@ local_development: agent: acp_type: sync - name: s-harness-codex + name: s070-codex description: Sync tutorial agent driving the unified harness surface via local codex CLI subprocess temporal: @@ -46,7 +46,7 @@ deployment: global: agent: - name: "s-harness-codex" + name: "s070-codex" description: "Sync tutorial agent driving the unified harness surface via local codex CLI subprocess" replicaCount: 1 resources: diff --git a/examples/tutorials/00_sync/060_harness_openai/project/__init__.py b/examples/tutorials/00_sync/070_codex/project/__init__.py similarity index 100% rename from examples/tutorials/00_sync/060_harness_openai/project/__init__.py rename to examples/tutorials/00_sync/070_codex/project/__init__.py diff --git a/examples/tutorials/00_sync/harness_codex/project/acp.py b/examples/tutorials/00_sync/070_codex/project/acp.py similarity index 100% rename from examples/tutorials/00_sync/harness_codex/project/acp.py rename to examples/tutorials/00_sync/070_codex/project/acp.py diff --git a/examples/tutorials/00_sync/harness_codex/pyproject.toml b/examples/tutorials/00_sync/070_codex/pyproject.toml similarity index 96% rename from examples/tutorials/00_sync/harness_codex/pyproject.toml rename to examples/tutorials/00_sync/070_codex/pyproject.toml index ca7d8ac18..88bbb9cca 100644 --- a/examples/tutorials/00_sync/harness_codex/pyproject.toml +++ b/examples/tutorials/00_sync/070_codex/pyproject.toml @@ -3,7 +3,7 @@ requires = ["hatchling"] build-backend = "hatchling.build" [project] -name = "s-harness-codex" +name = "s070-codex" version = "0.1.0" description = "Sync tutorial agent driving the unified harness surface via local codex CLI subprocess" readme = "README.md" diff --git a/examples/tutorials/00_sync/harness_codex/tests/test_agent.py b/examples/tutorials/00_sync/070_codex/tests/test_agent.py similarity index 99% rename from examples/tutorials/00_sync/harness_codex/tests/test_agent.py rename to examples/tutorials/00_sync/070_codex/tests/test_agent.py index b2d5b6498..94aa2aaf2 100644 --- a/examples/tutorials/00_sync/harness_codex/tests/test_agent.py +++ b/examples/tutorials/00_sync/070_codex/tests/test_agent.py @@ -145,7 +145,7 @@ async def test_on_result_callback_receives_session_id(self): LIVE = os.environ.get("CODEX_LIVE_TESTS", "") == "1" AGENTEX_API_BASE_URL = os.environ.get("AGENTEX_API_BASE_URL", "http://localhost:5003") -AGENT_NAME = os.environ.get("AGENT_NAME", "s-harness-codex") +AGENT_NAME = os.environ.get("AGENT_NAME", "s070-codex") @pytest.mark.skipif(not LIVE, reason="Set CODEX_LIVE_TESTS=1 and ensure codex CLI + OPENAI_API_KEY are available") diff --git a/examples/tutorials/00_sync/harness_langgraph/README.md b/examples/tutorials/00_sync/harness_langgraph/README.md deleted file mode 100644 index 86367f162..000000000 --- a/examples/tutorials/00_sync/harness_langgraph/README.md +++ /dev/null @@ -1,55 +0,0 @@ -# Tutorial: Sync Harness LangGraph Agent - -This tutorial demonstrates how to build a **synchronous** LangGraph agent on AgentEx -using the **unified harness surface**: - -```python -turn = LangGraphTurn(stream, model=None) -emitter = UnifiedEmitter(task_id=task_id, trace_id=task_id, ...) -async for event in emitter.yield_turn(turn): - yield event -``` - -Compare with ``030_langgraph``, which uses the bespoke -``convert_langgraph_to_agentex_events`` helper directly. - -## Key Concepts - -### Unified Harness - -`LangGraphTurn` implements the `HarnessTurn` protocol: it wraps the raw -LangGraph `astream()` generator and exposes `events` (an async generator of -`TaskMessageUpdate`) and `usage()` (token counts captured from the final -`AIMessage`). - -`UnifiedEmitter.yield_turn(turn)` iterates the turn's events and yields them -to the sync ACP handler unchanged. The same `LangGraphTurn` object can also be -passed to `UnifiedEmitter.auto_send_turn` in the async/temporal channels. - -### AGX1-377 Note - -LangGraph emits tool requests as `StreamTaskMessageFull` events (from "updates" -node outputs). The `SpanDeriver` does not open tool spans from Full events -today; that gap is tracked in AGX1-373. - -## Files - -| File | Description | -|------|-------------| -| `project/acp.py` | ACP server using unified harness (LangGraphTurn + yield_turn) | -| `project/graph.py` | LangGraph state graph (identical to 030_langgraph) | -| `project/tools.py` | Tool definitions (weather example) | -| `tests/test_agent.py` | Integration tests | -| `manifest.yaml` | Agent configuration (name: s-harness-langgraph) | - -## Running Locally - -```bash -agentex agents run -``` - -## Running Tests - -```bash -pytest tests/test_agent.py -v -``` diff --git a/examples/tutorials/00_sync/harness_langgraph/manifest.yaml b/examples/tutorials/00_sync/harness_langgraph/manifest.yaml deleted file mode 100644 index 1f57678f2..000000000 --- a/examples/tutorials/00_sync/harness_langgraph/manifest.yaml +++ /dev/null @@ -1,58 +0,0 @@ -build: - context: - root: ../../ - include_paths: - - 00_sync/harness_langgraph - - test_utils - dockerfile: 00_sync/harness_langgraph/Dockerfile - dockerignore: 00_sync/harness_langgraph/.dockerignore - -local_development: - agent: - port: 8000 - host_address: host.docker.internal - paths: - acp: project/acp.py - -agent: - acp_type: sync - name: s-harness-langgraph - description: A sync LangGraph agent using the unified harness surface (LangGraphTurn + UnifiedEmitter.yield_turn) - - temporal: - enabled: false - - credentials: - - env_var_name: OPENAI_API_KEY - secret_name: openai-api-key - secret_key: api-key - - env_var_name: REDIS_URL - secret_name: redis-url-secret - secret_key: url - - env_var_name: SGP_API_KEY - secret_name: sgp-api-key - secret_key: api-key - - env_var_name: SGP_ACCOUNT_ID - secret_name: sgp-account-id - secret_key: account-id - - env_var_name: SGP_CLIENT_BASE_URL - secret_name: sgp-client-base-url - secret_key: url - -deployment: - image: - repository: "" - tag: "latest" - - global: - agent: - name: "s-harness-langgraph" - description: "A sync LangGraph agent using the unified harness surface" - replicaCount: 1 - resources: - requests: - cpu: "500m" - memory: "1Gi" - limits: - cpu: "1000m" - memory: "2Gi" diff --git a/examples/tutorials/00_sync/harness_langgraph/project/acp.py b/examples/tutorials/00_sync/harness_langgraph/project/acp.py deleted file mode 100644 index f609f1682..000000000 --- a/examples/tutorials/00_sync/harness_langgraph/project/acp.py +++ /dev/null @@ -1,107 +0,0 @@ -"""ACP handler for sync harness LangGraph agent. - -Uses the unified harness surface: ``LangGraphTurn`` wraps the LangGraph -``astream()`` generator, and ``UnifiedEmitter.yield_turn`` converts it into -the AgentEx ``TaskMessageUpdate`` event stream expected by the sync ACP. - -Differences from ``030_langgraph`` (bespoke path): -- No ``create_langgraph_tracing_handler`` boilerplate. -- No manual text-delta accumulation for the span output. -- Tool calls are emitted as ``StreamTaskMessageFull`` (not Start+Delta+Done) - via the same code path as the async/temporal channels. -- Usage data (token counts) is captured on the ``LangGraphTurn`` object and - can be read after the turn completes. - -AGX1-377 note: LangGraph emits tool requests as ``StreamTaskMessageFull`` -events (from "updates"). The ``SpanDeriver`` does not open tool spans from -Full events today; that gap is tracked in AGX1-373. -""" - -from __future__ import annotations - -import os -from typing import AsyncGenerator - -from dotenv import load_dotenv - -load_dotenv() - -import agentex.lib.adk as adk -from project.graph import create_graph -from agentex.lib.types.acp import SendMessageParams -from agentex.lib.types.tracing import SGPTracingProcessorConfig -from agentex.lib.utils.logging import make_logger -from agentex.lib.sdk.fastacp.fastacp import FastACP -from agentex.lib.core.harness.emitter import UnifiedEmitter -from agentex.types.task_message_delta import TextDelta -from agentex.types.task_message_update import TaskMessageUpdate -from agentex.types.task_message_content import TaskMessageContent -from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn -from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config - -logger = make_logger(__name__) - -add_tracing_processor_config( - SGPTracingProcessorConfig( - sgp_api_key=os.environ.get("SGP_API_KEY", ""), - sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""), - sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""), - ) -) - -acp = FastACP.create(acp_type="sync") - -_graph = None - - -async def get_graph(): - """Get or create the compiled graph instance.""" - global _graph - if _graph is None: - _graph = await create_graph() - return _graph - - -@acp.on_message_send -async def handle_message_send( - params: SendMessageParams, -) -> TaskMessageContent | list[TaskMessageContent] | AsyncGenerator[TaskMessageUpdate, None]: - """Handle incoming messages, streaming tokens and tool calls via unified harness.""" - graph = await get_graph() - - task_id = params.task.id - user_message = params.content.content - - logger.info(f"Processing message for task {task_id}") - - async with adk.tracing.span( - trace_id=task_id, - task_id=task_id, - name="message", - input={"message": user_message}, - data={"__span_type__": "AGENT_WORKFLOW"}, - ) as turn_span: - stream = graph.astream( - {"messages": [{"role": "user", "content": user_message}]}, - config={"configurable": {"thread_id": task_id}}, - stream_mode=["messages", "updates"], - ) - - turn = LangGraphTurn(stream, model=None) - emitter = UnifiedEmitter( - task_id=task_id, - trace_id=task_id, - parent_span_id=turn_span.id if turn_span else None, - ) - - final_text = "" - async for event in emitter.yield_turn(turn): - # Accumulate text deltas so the span's final_output is the assistant - # text (matching the async tutorial), not the usage metrics. - delta = getattr(event, "delta", None) - if isinstance(delta, TextDelta) and delta.text_delta: - final_text += delta.text_delta - yield event - - if turn_span: - turn_span.output = {"final_output": final_text, "usage": turn.usage().model_dump()} diff --git a/examples/tutorials/00_sync/harness_langgraph/project/graph.py b/examples/tutorials/00_sync/harness_langgraph/project/graph.py deleted file mode 100644 index 4516087d2..000000000 --- a/examples/tutorials/00_sync/harness_langgraph/project/graph.py +++ /dev/null @@ -1,67 +0,0 @@ -"""LangGraph graph definition for the harness_langgraph sync agent. - -Identical to ``030_langgraph/project/graph.py`` — the graph definition is not -affected by the harness migration. Only ``acp.py`` changes. -""" - -from __future__ import annotations - -from typing import Any, Annotated -from datetime import datetime -from typing_extensions import TypedDict - -from langgraph.graph import START, StateGraph -from langchain_openai import ChatOpenAI -from langgraph.prebuilt import ToolNode, tools_condition -from langchain_core.messages import SystemMessage -from langgraph.graph.message import add_messages - -from project.tools import TOOLS -from agentex.lib.adk import create_checkpointer - -MODEL_NAME = "gpt-5" -SYSTEM_PROMPT = """You are a helpful AI assistant with access to tools. - -Current date and time: {timestamp} - -Guidelines: -- Be concise and helpful -- Use tools when they would help answer the user's question -- If you're unsure, ask clarifying questions -- Always provide accurate information -""" - - -class AgentState(TypedDict): - """State schema for the agent graph.""" - - messages: Annotated[list[Any], add_messages] - - -async def create_graph(): - """Create and compile the agent graph with checkpointer.""" - llm = ChatOpenAI( - model=MODEL_NAME, - reasoning={"effort": "high", "summary": "auto"}, - ) - llm_with_tools = llm.bind_tools(TOOLS) - - checkpointer = await create_checkpointer() - - def agent_node(state: AgentState) -> dict[str, Any]: - """Process the current state and generate a response.""" - messages = state["messages"] - if not messages or not isinstance(messages[0], SystemMessage): - system_content = SYSTEM_PROMPT.format(timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S")) - messages = [SystemMessage(content=system_content)] + messages - response = llm_with_tools.invoke(messages) - return {"messages": [response]} - - builder = StateGraph(AgentState) - builder.add_node("agent", agent_node) - builder.add_node("tools", ToolNode(tools=TOOLS)) - builder.add_edge(START, "agent") - builder.add_conditional_edges("agent", tools_condition, "tools") - builder.add_edge("tools", "agent") - - return builder.compile(checkpointer=checkpointer) diff --git a/examples/tutorials/00_sync/harness_langgraph/project/tools.py b/examples/tutorials/00_sync/harness_langgraph/project/tools.py deleted file mode 100644 index f02587430..000000000 --- a/examples/tutorials/00_sync/harness_langgraph/project/tools.py +++ /dev/null @@ -1,24 +0,0 @@ -"""Tool definitions for the harness_langgraph sync agent.""" - -from langchain_core.tools import Tool - - -def get_weather(city: str) -> str: - """Get the current weather for a city. - - Args: - city: The name of the city to get weather for. - - Returns: - A string describing the weather conditions. - """ - return f"The weather in {city} is sunny and 72°F" - - -weather_tool = Tool( - name="get_weather", - func=get_weather, - description="Get the current weather for a city. Input should be a city name.", -) - -TOOLS = [weather_tool] diff --git a/examples/tutorials/00_sync/harness_langgraph/tests/test_agent.py b/examples/tutorials/00_sync/harness_langgraph/tests/test_agent.py deleted file mode 100644 index 2eb561cec..000000000 --- a/examples/tutorials/00_sync/harness_langgraph/tests/test_agent.py +++ /dev/null @@ -1,144 +0,0 @@ -""" -Tests for the sync harness LangGraph agent. - -Validates the unified harness surface (LangGraphTurn + UnifiedEmitter.yield_turn) -end-to-end against a live AgentEx server. - -Configuration: -- AGENTEX_API_BASE_URL: Base URL for the AgentEx server (default: http://localhost:5003) -- AGENT_NAME: Name of the agent to test (default: s-harness-langgraph) -""" - -import os - -import pytest -from test_utils.sync import validate_text_in_string, collect_streaming_response - -from agentex import Agentex -from agentex.types import TextContent, TextContentParam -from agentex.types.agent_rpc_params import ParamsCreateTaskRequest, ParamsSendMessageRequest -from agentex.lib.sdk.fastacp.base.base_acp_server import uuid - -AGENTEX_API_BASE_URL = os.environ.get("AGENTEX_API_BASE_URL", "http://localhost:5003") -AGENT_NAME = os.environ.get("AGENT_NAME", "s-harness-langgraph") - - -@pytest.fixture -def client(): - return Agentex(base_url=AGENTEX_API_BASE_URL) - - -@pytest.fixture -def agent_name(): - return AGENT_NAME - - -@pytest.fixture -def agent_id(client, agent_name): - agents = client.agents.list() - for agent in agents: - if agent.name == agent_name: - return agent.id - raise ValueError(f"Agent with name {agent_name} not found.") - - -class TestNonStreamingMessages: - def test_send_simple_message(self, client: Agentex, agent_name: str): - response = client.agents.send_message( - agent_name=agent_name, - params=ParamsSendMessageRequest( - content=TextContentParam( - author="user", - content="Hello! What can you help me with?", - type="text", - ) - ), - ) - result = response.result - assert result is not None - assert len(result) >= 1 - - def test_tool_calling(self, client: Agentex, agent_name: str): - response = client.agents.send_message( - agent_name=agent_name, - params=ParamsSendMessageRequest( - content=TextContentParam( - author="user", - content="What's the weather in San Francisco?", - type="text", - ) - ), - ) - result = response.result - assert result is not None - assert len(result) >= 1 - - def test_multiturn_conversation(self, client: Agentex, agent_name: str, agent_id: str): - task_response = client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex)) - task = task_response.result - assert task is not None - - response1 = client.agents.send_message( - agent_name=agent_name, - params=ParamsSendMessageRequest( - content=TextContentParam( - author="user", - content="My name is Alice. Remember that.", - type="text", - ), - task_id=task.id, - ), - ) - assert response1.result is not None - - response2 = client.agents.send_message( - agent_name=agent_name, - params=ParamsSendMessageRequest( - content=TextContentParam( - author="user", - content="What is my name?", - type="text", - ), - task_id=task.id, - ), - ) - assert response2.result is not None - for message in response2.result: - if isinstance(message.content, TextContent): - validate_text_in_string("alice", message.content.content.lower()) - - -class TestStreamingMessages: - def test_stream_simple_message(self, client: Agentex, agent_name: str): - stream = client.agents.send_message_stream( - agent_name=agent_name, - params=ParamsSendMessageRequest( - content=TextContentParam( - author="user", - content="Tell me a short joke.", - type="text", - ) - ), - ) - aggregated_content, chunks = collect_streaming_response(stream) - assert aggregated_content is not None - assert len(chunks) > 1, "No chunks received in streaming response." - - def test_stream_tool_calling(self, client: Agentex, agent_name: str): - stream = client.agents.send_message_stream( - agent_name=agent_name, - params=ParamsSendMessageRequest( - content=TextContentParam( - author="user", - content="What's the weather in New York?", - type="text", - ) - ), - ) - aggregated_content, chunks = collect_streaming_response(stream) - assert aggregated_content is not None - assert len(chunks) > 0, "No chunks received in streaming response." - - -if __name__ == "__main__": - pytest.main([__file__, "-v"]) diff --git a/examples/tutorials/00_sync/harness_pydantic_ai/Dockerfile b/examples/tutorials/00_sync/harness_pydantic_ai/Dockerfile deleted file mode 100644 index 3a9412fa9..000000000 --- a/examples/tutorials/00_sync/harness_pydantic_ai/Dockerfile +++ /dev/null @@ -1,50 +0,0 @@ -# syntax=docker/dockerfile:1.3 -FROM python:3.12-slim -COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ - -# Install system dependencies -RUN apt-get update && apt-get install -y \ - htop \ - vim \ - curl \ - tar \ - python3-dev \ - postgresql-client \ - build-essential \ - libpq-dev \ - gcc \ - cmake \ - netcat-openbsd \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - -RUN uv pip install --system --upgrade pip setuptools wheel - -ENV UV_HTTP_TIMEOUT=1000 - -# Copy pyproject.toml and README.md to install dependencies -COPY 00_sync/harness_pydantic_ai/pyproject.toml /app/harness_pydantic_ai/pyproject.toml -COPY 00_sync/harness_pydantic_ai/README.md /app/harness_pydantic_ai/README.md - -WORKDIR /app/harness_pydantic_ai - -# Copy the project code -COPY 00_sync/harness_pydantic_ai/project /app/harness_pydantic_ai/project - -# Copy the test files -COPY 00_sync/harness_pydantic_ai/tests /app/harness_pydantic_ai/tests - -# Copy shared test utilities -COPY test_utils /app/test_utils - -# Install the required Python packages with dev dependencies -RUN uv pip install --system .[dev] - -# Set environment variables -ENV PYTHONPATH=/app - -# Set test environment variables -ENV AGENT_NAME=s-harness-pydantic-ai - -# Run the agent using uvicorn -CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] diff --git a/examples/tutorials/00_sync/harness_pydantic_ai/README.md b/examples/tutorials/00_sync/harness_pydantic_ai/README.md deleted file mode 100644 index 1466bc4e7..000000000 --- a/examples/tutorials/00_sync/harness_pydantic_ai/README.md +++ /dev/null @@ -1,54 +0,0 @@ -# Sync Pydantic AI Harness Test Agent - -A minimal **synchronous** Pydantic AI agent that drives the **unified harness -surface** (`UnifiedEmitter.yield_turn` + `PydanticAITurn`) on the sync -(HTTP-yield) channel. - -## Why this agent exists - -The `00_sync/040_pydantic_ai` tutorial streams via the bare -`convert_pydantic_ai_to_agentex_events` converter and does **not** exercise the -unified `yield_turn` path. This harness test agent is the sync coverage for the -unified surface: it proves an agent author can wire the sync channel through -`UnifiedEmitter` and get automatic span derivation (tool spans nested under the -per-turn span) for free, exactly like the async/temporal channels. - -## How it wires the unified surface - -In `project/acp.py`: - -```python -emitter = UnifiedEmitter( - task_id=task_id, - trace_id=task_id, - parent_span_id=turn_span.id if turn_span else None, -) -async with agent.run_stream_events(user_message) as stream: - turn = PydanticAITurn(stream, model=MODEL_NAME) # coalesce off: stream tool-call arg tokens - async for ev in emitter.yield_turn(turn): - yield ev -``` - -- `coalesce_tool_requests=False` (the default) preserves token-by-token - tool-call argument streaming on the sync channel. -- The `UnifiedEmitter` is constructed from the ACP/streaming context - (`task_id` + `trace_id` + `parent_span_id`) so tool spans nest under the - per-turn `AGENT_WORKFLOW` span automatically. - -## Files - -- `project/acp.py` — sync ACP handler using `emitter.yield_turn(...)`. -- `project/agent.py` — builds the `pydantic_ai.Agent` with one tool. -- `project/tools.py` — `get_weather(city)` returning a constant. -- `tests/test_agent.py` — live integration test (requires a running agent). - -## Tools - -- `get_weather(city: str) -> str`: returns a fixed "sunny and 72°F" string so a - run deterministically exercises text + a tool call + a tool response. - -## Offline coverage - -Offline integration tests for the same wiring (pydantic-ai `TestModel` + fake -streaming/tracing, no network) live in the SDK repo at -`tests/lib/core/harness/test_harness_pydantic_ai_sync.py`. diff --git a/examples/tutorials/00_sync/harness_pydantic_ai/manifest.yaml b/examples/tutorials/00_sync/harness_pydantic_ai/manifest.yaml deleted file mode 100644 index 55d8f5d2b..000000000 --- a/examples/tutorials/00_sync/harness_pydantic_ai/manifest.yaml +++ /dev/null @@ -1,58 +0,0 @@ -build: - context: - root: ../../ - include_paths: - - 00_sync/harness_pydantic_ai - - test_utils - dockerfile: 00_sync/harness_pydantic_ai/Dockerfile - dockerignore: 00_sync/harness_pydantic_ai/.dockerignore - -local_development: - agent: - port: 8000 - host_address: host.docker.internal - paths: - acp: project/acp.py - -agent: - acp_type: sync - name: s-harness-pydantic-ai - description: A sync Pydantic AI harness test agent using the unified emitter surface - - temporal: - enabled: false - - credentials: - - env_var_name: OPENAI_API_KEY - secret_name: openai-api-key - secret_key: api-key - - env_var_name: REDIS_URL - secret_name: redis-url-secret - secret_key: url - - env_var_name: SGP_API_KEY - secret_name: sgp-api-key - secret_key: api-key - - env_var_name: SGP_ACCOUNT_ID - secret_name: sgp-account-id - secret_key: account-id - - env_var_name: SGP_CLIENT_BASE_URL - secret_name: sgp-client-base-url - secret_key: url - -deployment: - image: - repository: "" - tag: "latest" - - global: - agent: - name: "s-harness-pydantic-ai" - description: "A sync Pydantic AI harness test agent using the unified emitter surface" - replicaCount: 1 - resources: - requests: - cpu: "500m" - memory: "1Gi" - limits: - cpu: "1000m" - memory: "2Gi" diff --git a/examples/tutorials/00_sync/harness_pydantic_ai/project/acp.py b/examples/tutorials/00_sync/harness_pydantic_ai/project/acp.py deleted file mode 100644 index f23cd7960..000000000 --- a/examples/tutorials/00_sync/harness_pydantic_ai/project/acp.py +++ /dev/null @@ -1,92 +0,0 @@ -"""ACP handler for the sync harness Pydantic AI test agent. - -This agent exercises the UNIFIED HARNESS SURFACE on the sync (HTTP-yield) -channel — ``UnifiedEmitter.yield_turn(PydanticAITurn(...))`` — rather than the -bare ``convert_pydantic_ai_to_agentex_events`` converter used by the -``040_pydantic_ai`` tutorial. The unified surface gives the sync channel the -same tracing (span derivation) the async/temporal channels get for free. - -Flow: -1. Open a per-turn AGENT_WORKFLOW span via ``adk.tracing.span``. -2. Construct a ``UnifiedEmitter`` from the ACP/streaming context (task_id + - trace_id + parent_span_id) so tool spans nest under the turn span. -3. Wrap ``agent.run_stream_events(...)`` in a ``PydanticAITurn`` and forward - events with ``emitter.yield_turn(turn)`` — yielding each to the client. -""" - -from __future__ import annotations - -import os -from typing import AsyncGenerator - -from dotenv import load_dotenv - -load_dotenv() - -import agentex.lib.adk as adk -from project.agent import MODEL_NAME, create_agent -from agentex.lib.types.acp import SendMessageParams -from agentex.lib.core.harness import UnifiedEmitter -from agentex.lib.types.tracing import SGPTracingProcessorConfig -from agentex.lib.utils.logging import make_logger -from agentex.lib.sdk.fastacp.fastacp import FastACP -from agentex.types.task_message_update import TaskMessageUpdate -from agentex.types.task_message_content import TaskMessageContent -from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn -from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config - -logger = make_logger(__name__) - -add_tracing_processor_config( - SGPTracingProcessorConfig( - sgp_api_key=os.environ.get("SGP_API_KEY", ""), - sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""), - sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""), - ) -) - -acp = FastACP.create(acp_type="sync") - -_agent = None - - -def get_agent(): - """Get or create the Pydantic AI agent instance.""" - global _agent - if _agent is None: - _agent = create_agent() - return _agent - - -@acp.on_message_send -async def handle_message_send( - params: SendMessageParams, -) -> TaskMessageContent | list[TaskMessageContent] | AsyncGenerator[TaskMessageUpdate, None]: - """Handle incoming messages, streaming events through the unified surface.""" - agent = get_agent() - task_id = params.task.id - - user_message = params.content.content - logger.info(f"Processing message for task {task_id}") - - async with adk.tracing.span( - trace_id=task_id, - task_id=task_id, - name="message", - input={"message": user_message}, - data={"__span_type__": "AGENT_WORKFLOW"}, - ) as turn_span: - # Construct the UnifiedEmitter from the ACP/streaming context so tracing - # is automatic: tool spans nest under this turn's span. - emitter = UnifiedEmitter( - task_id=task_id, - trace_id=task_id, - parent_span_id=turn_span.id if turn_span else None, - ) - - async with agent.run_stream_events(user_message) as stream: - # PydanticAITurn preserves token-by-token tool-call argument - # streaming (Start+Delta+Done) on the sync/HTTP channel. - turn = PydanticAITurn(stream, model=MODEL_NAME) - async for ev in emitter.yield_turn(turn): - yield ev diff --git a/examples/tutorials/00_sync/harness_pydantic_ai/project/agent.py b/examples/tutorials/00_sync/harness_pydantic_ai/project/agent.py deleted file mode 100644 index 72fd74173..000000000 --- a/examples/tutorials/00_sync/harness_pydantic_ai/project/agent.py +++ /dev/null @@ -1,39 +0,0 @@ -"""Pydantic AI agent definition for the sync harness test agent. - -The Agent is the boundary between this module and the API layer (acp.py). -Pydantic AI handles its own tool-call loop internally — no graph required. -""" - -from __future__ import annotations - -from datetime import datetime - -from pydantic_ai import Agent - -from project.tools import get_weather - -__all__ = ["create_agent", "MODEL_NAME"] - -MODEL_NAME = "openai:gpt-4o-mini" -SYSTEM_PROMPT = """You are a helpful AI assistant with access to tools. - -Current date and time: {timestamp} - -Guidelines: -- Be concise and helpful -- Use tools when they would help answer the user's question -- If you're unsure, ask clarifying questions -- Always provide accurate information -""" - - -def create_agent() -> Agent: - """Build and return the Pydantic AI agent with tools registered.""" - agent = Agent( - MODEL_NAME, - system_prompt=SYSTEM_PROMPT.format(timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S")), - ) - - agent.tool_plain(get_weather) - - return agent diff --git a/examples/tutorials/00_sync/harness_pydantic_ai/project/tools.py b/examples/tutorials/00_sync/harness_pydantic_ai/project/tools.py deleted file mode 100644 index d649c75f1..000000000 --- a/examples/tutorials/00_sync/harness_pydantic_ai/project/tools.py +++ /dev/null @@ -1,20 +0,0 @@ -"""Tool definitions for the sync harness Pydantic AI agent. - -Pydantic AI tools are registered directly on the Agent via decorators -(see project.agent). This module hosts the bare function so it is easy to -unit-test in isolation. -""" - -from __future__ import annotations - - -def get_weather(city: str) -> str: - """Get the current weather for a city. - - Args: - city: The name of the city to get weather for. - - Returns: - A string describing the weather conditions. - """ - return f"The weather in {city} is sunny and 72°F" diff --git a/examples/tutorials/00_sync/harness_pydantic_ai/pyproject.toml b/examples/tutorials/00_sync/harness_pydantic_ai/pyproject.toml deleted file mode 100644 index 08f709a4a..000000000 --- a/examples/tutorials/00_sync/harness_pydantic_ai/pyproject.toml +++ /dev/null @@ -1,36 +0,0 @@ -[build-system] -requires = ["hatchling"] -build-backend = "hatchling.build" - -[project] -name = "s-harness-pydantic-ai" -version = "0.1.0" -description = "A sync Pydantic AI harness test agent using the unified emitter surface" -readme = "README.md" -requires-python = ">=3.12" -dependencies = [ - "agentex-sdk", - "scale-gp", - "pydantic-ai-slim[openai]>=1.0,<2", -] - -[project.optional-dependencies] -dev = [ - "pytest", - "pytest-asyncio", - "httpx", - "black", - "isort", - "flake8", -] - -[tool.hatch.build.targets.wheel] -packages = ["project"] - -[tool.black] -line-length = 88 -target-version = ['py312'] - -[tool.isort] -profile = "black" -line_length = 88 diff --git a/examples/tutorials/00_sync/harness_pydantic_ai/tests/test_agent.py b/examples/tutorials/00_sync/harness_pydantic_ai/tests/test_agent.py deleted file mode 100644 index 96da95fdc..000000000 --- a/examples/tutorials/00_sync/harness_pydantic_ai/tests/test_agent.py +++ /dev/null @@ -1,138 +0,0 @@ -"""Live tests for the sync harness Pydantic AI agent. - -These tests require a running agent (server + deployed agent) and exercise the -unified-surface sync handler end-to-end over the wire. They mirror the -``040_pydantic_ai`` tutorial tests but target this harness agent. - -Offline coverage of the same wiring (TestModel + fake streaming/tracing) lives -in ``tests/lib/core/harness/test_harness_pydantic_ai_sync.py`` in the SDK repo. - -To run these tests: -1. Make sure the agent is running (via docker-compose or `agentex agents run`) -2. Set the AGENTEX_API_BASE_URL environment variable if not using default -3. Run: pytest test_agent.py -v - -Configuration: -- AGENTEX_API_BASE_URL: Base URL for the AgentEx server (default: http://localhost:5003) -- AGENT_NAME: Name of the agent to test (default: s-harness-pydantic-ai) -""" - -import os - -import pytest -from test_utils.sync import validate_text_in_string, collect_streaming_response - -from agentex import Agentex -from agentex.types import TextContentParam -from agentex.types.agent_rpc_params import ParamsSendMessageRequest - -AGENTEX_API_BASE_URL = os.environ.get("AGENTEX_API_BASE_URL", "http://localhost:5003") -AGENT_NAME = os.environ.get("AGENT_NAME", "s-harness-pydantic-ai") - - -@pytest.fixture -def client(): - """Create an AgentEx client instance for testing.""" - return Agentex(base_url=AGENTEX_API_BASE_URL) - - -@pytest.fixture -def agent_name(): - """Return the agent name for testing.""" - return AGENT_NAME - - -@pytest.fixture -def agent_id(client, agent_name): - """Retrieve the agent ID based on the agent name.""" - agents = client.agents.list() - for agent in agents: - if agent.name == agent_name: - return agent.id - raise ValueError(f"Agent with name {agent_name} not found.") - - -class TestNonStreamingMessages: - """Test non-streaming message sending with the unified-surface sync agent.""" - - def test_send_simple_message(self, client: Agentex, agent_name: str): - """Test sending a simple message and receiving a response.""" - response = client.agents.send_message( - agent_name=agent_name, - params=ParamsSendMessageRequest( - content=TextContentParam( - author="user", - content="Hello! What can you help me with?", - type="text", - ) - ), - ) - result = response.result - assert result is not None - assert len(result) >= 1 - - def test_tool_calling(self, client: Agentex, agent_name: str): - """Test that the agent can use tools (e.g., weather tool).""" - response = client.agents.send_message( - agent_name=agent_name, - params=ParamsSendMessageRequest( - content=TextContentParam( - author="user", - content="What's the weather in San Francisco?", - type="text", - ) - ), - ) - result = response.result - assert result is not None - assert len(result) >= 1 - - -class TestStreamingMessages: - """Test streaming message sending through the unified yield_turn path.""" - - def test_stream_simple_message(self, client: Agentex, agent_name: str): - """Test streaming a simple message response.""" - stream = client.agents.send_message_stream( - agent_name=agent_name, - params=ParamsSendMessageRequest( - content=TextContentParam( - author="user", - content="Tell me a short joke.", - type="text", - ) - ), - ) - - aggregated_content, chunks = collect_streaming_response(stream) - - assert aggregated_content is not None - assert len(chunks) > 1, "No chunks received in streaming response." - - def test_stream_tool_calling(self, client: Agentex, agent_name: str): - """Test streaming with tool calls through the unified surface. - - Exercises token-by-token tool-call argument streaming (coalesce off), - which the unified yield_turn path preserves on the sync channel. - """ - stream = client.agents.send_message_stream( - agent_name=agent_name, - params=ParamsSendMessageRequest( - content=TextContentParam( - author="user", - content="What's the weather in New York? Respond with the temperature.", - type="text", - ) - ), - ) - - aggregated_content, chunks = collect_streaming_response(stream) - - assert aggregated_content is not None - assert len(chunks) > 0, "No chunks received in streaming response." - # The weather tool always returns "72°F", so the agent's reply should mention it. - validate_text_in_string("72", aggregated_content) - - -if __name__ == "__main__": - pytest.main([__file__, "-v"]) diff --git a/examples/tutorials/10_async/00_base/100_langgraph/README.md b/examples/tutorials/10_async/00_base/100_langgraph/README.md index 6f6c6a36b..cd2fa6dd6 100644 --- a/examples/tutorials/10_async/00_base/100_langgraph/README.md +++ b/examples/tutorials/10_async/00_base/100_langgraph/README.md @@ -1,46 +1,52 @@ -# Tutorial 100: Async LangGraph Agent +# Tutorial: Async LangGraph Agent -This tutorial demonstrates how to build an **asynchronous** LangGraph agent on AgentEx with: -- Task-based event handling via Redis -- Tool calling (ReAct pattern) -- Multi-turn conversation memory via AgentEx checkpointer -- Tracing integration +This tutorial demonstrates how to build an **async** LangGraph agent on AgentEx +using the **unified harness surface**: -## Graph Structure +```python +turn = LangGraphTurn(stream, model=None) +emitter = UnifiedEmitter(task_id=task_id, trace_id=task_id, ...) +result = await emitter.auto_send_turn(turn) +``` + +The `LangGraphTurn` + `UnifiedEmitter.auto_send_turn` path replaces calling the +lower-level ``stream_langgraph_events`` helper directly. + +## Key Concepts + +### Unified Harness + +`LangGraphTurn` implements the `HarnessTurn` protocol: it wraps the raw +LangGraph `astream()` generator and exposes `events` (an async generator of +`TaskMessageUpdate`) and `usage()` (token counts captured from the final +`AIMessage`). -![Graph](graph.png) +`UnifiedEmitter.auto_send_turn(turn)` pushes each event to Redis via +`streaming_task_message_context`, accumulates the final text, and returns a +`TurnResult(final_text=..., usage=...)`. -## Sync vs Async: Key Differences +The same `LangGraphTurn` object can also be passed to +`UnifiedEmitter.yield_turn` in the sync channel. -| Aspect | Sync (Tutorial 030) | Async (This Tutorial) | -|--------|--------------------|-----------------------| -| **ACP Type** | `sync` | `async` | -| **Handler** | `@acp.on_message_send` | `@acp.on_task_event_send` | -| **Response** | HTTP streaming (yields) | Redis streaming | -| **Message Echo** | Implicit | Explicit (`adk.messages.create`) | -| **Streaming Helper** | `convert_langgraph_to_agentex_events()` | `stream_langgraph_events()` | -| **Extra Handlers** | None | `on_task_create`, `on_task_cancel` | +### AGX1-377 Note -### When to use Async? -- Long-running tasks that may exceed HTTP timeout -- Agents that need to push updates asynchronously -- Multi-step workflows where the client polls for results -- Production agents that need reliable message delivery via Redis +LangGraph emits tool requests as `StreamTaskMessageFull` events (from "updates" +node outputs). The `SpanDeriver` does not open tool spans from Full events +today; that gap is tracked in AGX1-373. ## Files | File | Description | |------|-------------| -| `project/acp.py` | ACP server with async event handlers | -| `project/graph.py` | LangGraph state graph definition | +| `project/acp.py` | ACP server using unified harness (LangGraphTurn + auto_send_turn) | +| `project/graph.py` | LangGraph state graph (weather example) | | `project/tools.py` | Tool definitions (weather example) | | `tests/test_agent.py` | Integration tests | -| `manifest.yaml` | Agent configuration | +| `manifest.yaml` | Agent configuration (name: ab100-langgraph) | ## Running Locally ```bash -# From this directory agentex agents run ``` diff --git a/examples/tutorials/10_async/00_base/100_langgraph/graph.png b/examples/tutorials/10_async/00_base/100_langgraph/graph.png deleted file mode 100644 index 16d22a1e7..000000000 Binary files a/examples/tutorials/10_async/00_base/100_langgraph/graph.png and /dev/null differ diff --git a/examples/tutorials/10_async/00_base/100_langgraph/manifest.yaml b/examples/tutorials/10_async/00_base/100_langgraph/manifest.yaml index 1b0b5d490..13d64f524 100644 --- a/examples/tutorials/10_async/00_base/100_langgraph/manifest.yaml +++ b/examples/tutorials/10_async/00_base/100_langgraph/manifest.yaml @@ -17,7 +17,7 @@ local_development: agent: acp_type: async name: ab100-langgraph - description: An async LangGraph agent with tool calling and Redis streaming + description: An async LangGraph agent using the unified harness surface (LangGraphTurn + UnifiedEmitter.auto_send_turn) temporal: enabled: false @@ -47,7 +47,7 @@ deployment: global: agent: name: "ab100-langgraph" - description: "An async LangGraph agent with tool calling and Redis streaming" + description: "An async LangGraph agent using the unified harness surface" replicaCount: 1 resources: requests: diff --git a/examples/tutorials/10_async/00_base/100_langgraph/project/acp.py b/examples/tutorials/10_async/00_base/100_langgraph/project/acp.py index 2585fefd6..198446607 100644 --- a/examples/tutorials/10_async/00_base/100_langgraph/project/acp.py +++ b/examples/tutorials/10_async/00_base/100_langgraph/project/acp.py @@ -1,7 +1,21 @@ -""" -ACP handler for async LangGraph agent. - -Uses the async ACP model with Redis streaming instead of HTTP yields. +"""ACP handler for the async LangGraph agent. + +Uses the unified harness surface: ``LangGraphTurn`` wraps the LangGraph +``astream()`` generator, and ``UnifiedEmitter.auto_send_turn`` streams events +to Redis and returns a ``TurnResult`` with the accumulated final text. + +Properties of the unified surface: +- Tracing is wired through the tracing manager (no bespoke handler boilerplate). +- A single ``UnifiedEmitter.auto_send_turn(LangGraphTurn(stream))`` call + replaces bespoke event-streaming helpers. +- Tool calls/responses go through ``streaming_task_message_context`` + (same code path as text deltas), making the event stream channel-agnostic. +- Usage data (token counts) is captured on ``LangGraphTurn.usage()`` after + ``auto_send_turn`` returns. + +AGX1-377 note: LangGraph emits tool requests as ``StreamTaskMessageFull`` +events (from "updates"). The ``SpanDeriver`` does not open tool spans from +Full events today; that gap is tracked in AGX1-373. """ from __future__ import annotations @@ -14,12 +28,13 @@ import agentex.lib.adk as adk from project.graph import create_graph -from agentex.lib.adk import stream_langgraph_events, create_langgraph_tracing_handler from agentex.lib.types.acp import SendEventParams, CancelTaskParams, CreateTaskParams from agentex.lib.types.fastacp import AsyncACPConfig from agentex.lib.types.tracing import SGPTracingProcessorConfig from agentex.lib.utils.logging import make_logger from agentex.lib.sdk.fastacp.fastacp import FastACP +from agentex.lib.core.harness.emitter import UnifiedEmitter +from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config logger = make_logger(__name__) @@ -29,7 +44,8 @@ sgp_api_key=os.environ.get("SGP_API_KEY", ""), sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""), sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""), - )) + ) +) acp = FastACP.create( acp_type="async", @@ -48,40 +64,39 @@ async def get_graph(): @acp.on_task_event_send async def handle_task_event_send(params: SendEventParams): - """Handle incoming events, streaming tokens and tool calls via Redis.""" + """Handle incoming events, streaming tokens and tool calls via unified harness.""" graph = await get_graph() task_id = params.task.id user_message = params.event.content.content logger.info(f"Processing message for thread {task_id}") - # Echo the user's message await adk.messages.create(task_id=task_id, content=params.event.content) async with adk.tracing.span( trace_id=task_id, + task_id=task_id, name="message", input={"message": user_message}, data={"__span_type__": "AGENT_WORKFLOW"}, ) as turn_span: - callback = create_langgraph_tracing_handler( - trace_id=task_id, - parent_span_id=turn_span.id if turn_span else None, - ) - stream = graph.astream( {"messages": [{"role": "user", "content": user_message}]}, - config={ - "configurable": {"thread_id": task_id}, - "callbacks": [callback], - }, + config={"configurable": {"thread_id": task_id}}, stream_mode=["messages", "updates"], ) - final_output = await stream_langgraph_events(stream, task_id) + turn = LangGraphTurn(stream, model=None) + emitter = UnifiedEmitter( + task_id=task_id, + trace_id=task_id, + parent_span_id=turn_span.id if turn_span else None, + ) + + result = await emitter.auto_send_turn(turn) if turn_span: - turn_span.output = {"final_output": final_output} + turn_span.output = {"final_output": result.final_text} @acp.on_task_create diff --git a/examples/tutorials/10_async/00_base/100_langgraph/project/graph.py b/examples/tutorials/10_async/00_base/100_langgraph/project/graph.py index af6e31313..d63f28390 100644 --- a/examples/tutorials/10_async/00_base/100_langgraph/project/graph.py +++ b/examples/tutorials/10_async/00_base/100_langgraph/project/graph.py @@ -1,7 +1,7 @@ -""" -LangGraph graph definition. +"""LangGraph graph definition for the 100_langgraph async agent. -Defines the state, nodes, edges, and compiles the graph. +Identical to ``100_langgraph/project/graph.py`` — the graph definition is not +affected by the harness migration. Only ``acp.py`` changes. """ from __future__ import annotations @@ -34,6 +34,7 @@ class AgentState(TypedDict): """State schema for the agent graph.""" + messages: Annotated[list[Any], add_messages] @@ -51,9 +52,7 @@ def agent_node(state: AgentState) -> dict[str, Any]: """Process the current state and generate a response.""" messages = state["messages"] if not messages or not isinstance(messages[0], SystemMessage): - system_content = SYSTEM_PROMPT.format( - timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S") - ) + system_content = SYSTEM_PROMPT.format(timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S")) messages = [SystemMessage(content=system_content)] + messages response = llm_with_tools.invoke(messages) return {"messages": [response]} diff --git a/examples/tutorials/10_async/00_base/100_langgraph/project/tools.py b/examples/tutorials/10_async/00_base/100_langgraph/project/tools.py index 1b402a906..e421528fc 100644 --- a/examples/tutorials/10_async/00_base/100_langgraph/project/tools.py +++ b/examples/tutorials/10_async/00_base/100_langgraph/project/tools.py @@ -1,9 +1,4 @@ -""" -Tool definitions for the LangGraph agent. - -Add your custom tools here. Each tool should be a function decorated with @tool -or created using the Tool class. -""" +"""Tool definitions for the 100_langgraph async agent.""" from langchain_core.tools import Tool @@ -17,16 +12,13 @@ def get_weather(city: str) -> str: Returns: A string describing the weather conditions. """ - # TODO: Replace with actual weather API call return f"The weather in {city} is sunny and 72°F" -# Define tools weather_tool = Tool( name="get_weather", func=get_weather, description="Get the current weather for a city. Input should be a city name.", ) -# Export all tools as a list TOOLS = [weather_tool] diff --git a/examples/tutorials/10_async/00_base/100_langgraph/pyproject.toml b/examples/tutorials/10_async/00_base/100_langgraph/pyproject.toml index fecbc6149..715477bac 100644 --- a/examples/tutorials/10_async/00_base/100_langgraph/pyproject.toml +++ b/examples/tutorials/10_async/00_base/100_langgraph/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "hatchling.build" [project] name = "ab100-langgraph" version = "0.1.0" -description = "An async LangGraph agent with tool calling and Redis streaming" +description = "An async LangGraph agent using the unified harness surface" readme = "README.md" requires-python = ">=3.12" dependencies = [ diff --git a/examples/tutorials/10_async/00_base/100_langgraph/tests/test_agent.py b/examples/tutorials/10_async/00_base/100_langgraph/tests/test_agent.py index 948db1558..b80d7a8f9 100644 --- a/examples/tutorials/10_async/00_base/100_langgraph/tests/test_agent.py +++ b/examples/tutorials/10_async/00_base/100_langgraph/tests/test_agent.py @@ -1,14 +1,8 @@ """ -Tests for the async LangGraph agent. +Tests for the async harness LangGraph agent. -This test suite validates: -- Non-streaming event sending and polling -- Streaming event sending - -To run these tests: -1. Make sure the agent is running (via docker-compose or `agentex agents run`) -2. Set the AGENTEX_API_BASE_URL environment variable if not using default -3. Run: pytest test_agent.py -v +Validates the unified harness surface (LangGraphTurn + UnifiedEmitter.auto_send_turn) +end-to-end against a live AgentEx server. Configuration: - AGENTEX_API_BASE_URL: Base URL for the AgentEx server (default: http://localhost:5003) @@ -25,14 +19,12 @@ from agentex.types.agent_rpc_params import ParamsCreateTaskRequest from agentex.lib.sdk.fastacp.base.base_acp_server import uuid -# Configuration from environment variables AGENTEX_API_BASE_URL = os.environ.get("AGENTEX_API_BASE_URL", "http://localhost:5003") AGENT_NAME = os.environ.get("AGENT_NAME", "ab100-langgraph") @pytest_asyncio.fixture async def client(): - """Create an AsyncAgentex client instance for testing.""" client = AsyncAgentex(base_url=AGENTEX_API_BASE_URL) yield client await client.close() @@ -40,13 +32,11 @@ async def client(): @pytest.fixture def agent_name(): - """Return the agent name for testing.""" return AGENT_NAME @pytest_asyncio.fixture async def agent_id(client, agent_name): - """Retrieve the agent ID based on the agent name.""" agents = await client.agents.list() for agent in agents: if agent.name == agent_name: @@ -55,14 +45,9 @@ async def agent_id(client, agent_name): class TestNonStreamingEvents: - """Test non-streaming event sending and polling.""" - @pytest.mark.asyncio async def test_send_event(self, client: AsyncAgentex, agent_id: str): - """Test sending an event to the async LangGraph agent.""" - task_response = await client.agents.create_task( - agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex) - ) + task_response = await client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex)) task = task_response.result assert task is not None @@ -78,10 +63,7 @@ async def test_send_event(self, client: AsyncAgentex, agent_id: str): @pytest.mark.asyncio async def test_tool_calling(self, client: AsyncAgentex, agent_id: str): - """Test that the agent can use tools (e.g., weather tool).""" - task_response = await client.agents.create_task( - agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex) - ) + task_response = await client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex)) task = task_response.result assert task is not None @@ -97,14 +79,9 @@ async def test_tool_calling(self, client: AsyncAgentex, agent_id: str): class TestStreamingEvents: - """Test streaming event sending.""" - @pytest.mark.asyncio async def test_send_event_and_stream(self, client: AsyncAgentex, agent_id: str): - """Test sending an event and streaming the response.""" - task_response = await client.agents.create_task( - agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex) - ) + task_response = await client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex)) task = task_response.result assert task is not None diff --git a/examples/tutorials/10_async/00_base/110_pydantic_ai/README.md b/examples/tutorials/10_async/00_base/110_pydantic_ai/README.md index 6046b579a..db56979cc 100644 --- a/examples/tutorials/10_async/00_base/110_pydantic_ai/README.md +++ b/examples/tutorials/10_async/00_base/110_pydantic_ai/README.md @@ -1,63 +1,52 @@ -# Tutorial 110 (async/base): Pydantic AI Agent +# Async Pydantic AI Agent -This tutorial demonstrates how to build an **async** Pydantic AI agent on AgentEx with: -- Tool calling (Pydantic AI handles the tool loop internally) -- Streaming token output via Redis (text + reasoning tokens stream as deltas) -- Task lifecycle hooks (create / event-send / cancel) +A minimal **async** (Redis-streaming) Pydantic AI agent that drives the +**unified harness surface** (`UnifiedEmitter.auto_send_turn` + `PydanticAITurn`) +directly. -This is the async counterpart to the sync tutorial at [`00_sync/040_pydantic_ai`](../../../00_sync/040_pydantic_ai/). +## Why this agent exists -## Key Concepts +This agent calls `emitter.auto_send_turn(...)` **explicitly** at the +agent-author level, making the unified-surface wiring visible and giving the +async channel direct coverage. -### Async ACP -Unlike sync ACP (HTTP request/response with chunked streaming back), async ACP uses **Redis** for streaming. The HTTP call returns immediately when an event is acknowledged; the agent then pushes updates to Redis on its own schedule. The UI subscribes to Redis to receive deltas. +## How it wires the unified surface -### Pydantic AI Integration -- **Agent**: A single `pydantic_ai.Agent` that owns the model and tools. No graph required. -- **`@agent.tool_plain`**: Registers a Python function as a tool. Pydantic AI infers the schema from type hints and docstring. -- **`agent.run_stream_events(...)`**: Yields `AgentStreamEvent`s (`PartStartEvent` / `PartDeltaEvent` / `PartEndEvent` / `FunctionToolResultEvent`) as the model produces them. +In `project/acp.py`: -### Streaming -The helper `stream_pydantic_ai_events(stream, task_id)` consumes the Pydantic AI event stream and writes Agentex updates to Redis via `adk.streaming.streaming_task_message_context(...)`: -- **Text and thinking tokens** stream as Redis deltas inside coalesced contexts. -- **Tool requests and tool responses** are emitted as **discrete full messages** (no token-level arg streaming). To stream tool-call argument tokens, use the sync converter — see [`00_sync/040_pydantic_ai`](../../../00_sync/040_pydantic_ai/). - -## Files - -| File | Description | -|------|-------------| -| `project/acp.py` | Async ACP server with task lifecycle handlers | -| `project/agent.py` | Pydantic AI agent + tool registration | -| `project/tools.py` | Tool definitions (weather example) | -| `tests/test_agent.py` | Integration tests | -| `manifest.yaml` | Agent configuration | - -## Running Locally - -```bash -# From this directory -agentex agents run +```python +emitter = UnifiedEmitter( + task_id=task_id, + trace_id=task_id, + parent_span_id=turn_span.id if turn_span else None, +) +async with agent.run_stream_events(user_message, message_history=previous_messages) as stream: + turn = PydanticAITurn(tee_messages(stream), model=MODEL_NAME, coalesce_tool_requests=True) + result = await emitter.auto_send_turn(turn) ``` -## Running Tests +- `coalesce_tool_requests=True` is required on the async/auto_send path until + AGX1-377 lands: tool requests are delivered as a single `Full(tool_request)` + rather than streamed `Start + Delta + Done`. +- The `UnifiedEmitter` is constructed from the ACP context (`task_id` + + `trace_id` + `parent_span_id`) so messages auto-send to the task stream + (Redis) and tracing is automatic. +- Multi-turn memory is persisted via `adk.state` (pydantic-ai message history + round-tripped through `ModelMessagesTypeAdapter`). -```bash -pytest tests/test_agent.py -v -``` +## Files -## Sync vs Async — How the Code Differs +- `project/acp.py` — async ACP handler using `emitter.auto_send_turn(...)`. +- `project/agent.py` — builds the `pydantic_ai.Agent` with one tool. +- `project/tools.py` — `get_weather(city)` returning a constant. +- `tests/test_agent.py` — live integration test (requires a running agent). -This tutorial uses the same `project/agent.py` and `project/tools.py` as the sync version. The only meaningful differences live in `project/acp.py`: +## Tools -| Concern | Sync (`s040-pydantic-ai`) | Async (`ab110-pydantic-ai`) | -|---|---|---| -| ACP type | `FastACP.create(acp_type="sync")` | `FastACP.create(acp_type="async", config=AsyncACPConfig(type="base"))` | -| Handler hook | `@acp.on_message_send` returns/yields events | `@acp.on_task_event_send` returns nothing | -| Stream output | `yield event` (chunked HTTP) | `await context.stream_update(...)` (Redis) | -| Tool calls | Args stream as `ToolRequestDelta` tokens | Args arrive in one full message | -| Lifecycle | Ephemeral (no task hooks) | `on_task_create` + `on_task_cancel` form a durable task contract | +- `get_weather(city: str) -> str`: returns a fixed "sunny and 72°F" string. -## Notes +## Offline coverage -- Multi-turn conversation memory is not wired here. Pydantic AI does not ship a checkpointer; to add memory, load prior messages via `adk.messages.list(task_id=...)` and pass them to `agent.run_stream_events(..., message_history=...)`. -- Reasoning/thinking tokens are not exercised by `gpt-4o-mini`. Swap to a reasoning-capable model if you want to test that branch end-to-end. +Offline integration tests for the same wiring (pydantic-ai `TestModel` + fake +streaming/tracing, no network) live in the SDK repo under +`tests/lib/core/harness/` (the pydantic-ai async suite). diff --git a/examples/tutorials/10_async/00_base/110_pydantic_ai/manifest.yaml b/examples/tutorials/10_async/00_base/110_pydantic_ai/manifest.yaml index 583b07251..4aca13d44 100644 --- a/examples/tutorials/10_async/00_base/110_pydantic_ai/manifest.yaml +++ b/examples/tutorials/10_async/00_base/110_pydantic_ai/manifest.yaml @@ -17,7 +17,7 @@ local_development: agent: acp_type: async name: ab110-pydantic-ai - description: An async Pydantic AI agent with tool calling and Redis streaming + description: An async Pydantic AI harness test agent using the unified emitter surface temporal: enabled: false @@ -38,7 +38,7 @@ agent: - env_var_name: SGP_CLIENT_BASE_URL secret_name: sgp-client-base-url secret_key: url - + deployment: image: repository: "" @@ -47,7 +47,7 @@ deployment: global: agent: name: "ab110-pydantic-ai" - description: "An async Pydantic AI agent with tool calling and Redis streaming" + description: "An async Pydantic AI harness test agent using the unified emitter surface" replicaCount: 1 resources: requests: diff --git a/examples/tutorials/10_async/00_base/110_pydantic_ai/project/acp.py b/examples/tutorials/10_async/00_base/110_pydantic_ai/project/acp.py index dc8a2de21..95b638f8b 100644 --- a/examples/tutorials/10_async/00_base/110_pydantic_ai/project/acp.py +++ b/examples/tutorials/10_async/00_base/110_pydantic_ai/project/acp.py @@ -1,13 +1,14 @@ -"""ACP handler for async Pydantic AI agent. +"""ACP handler for the async harness Pydantic AI test agent. -Uses the async ACP model with Redis streaming instead of HTTP yields. -Text and reasoning tokens stream as Redis deltas; tool requests and -responses are persisted as discrete full messages. +This agent exercises the UNIFIED HARNESS SURFACE on the async (Redis-streaming) +channel — ``UnifiedEmitter.auto_send_turn(PydanticAITurn(...))`` +— calling it directly rather than via the ``stream_pydantic_ai_events`` helper +(which the ``110_pydantic_ai`` tutorial uses). This makes the unified-surface +wiring explicit at the agent-author level. Multi-turn memory is persisted via ``adk.state``: on each turn we load the previous pydantic-ai ``message_history`` from state, run the agent with it, -then save the updated history back. Without this, every turn would be a -fresh stateless run and the agent would forget the prior conversation. +then save the updated history back. """ from __future__ import annotations @@ -23,17 +24,15 @@ from pydantic_ai.messages import ModelMessagesTypeAdapter import agentex.lib.adk as adk -from project.agent import create_agent -from agentex.lib.adk import ( - stream_pydantic_ai_events, - create_pydantic_ai_tracing_handler, -) +from project.agent import MODEL_NAME, create_agent from agentex.lib.types.acp import SendEventParams, CancelTaskParams, CreateTaskParams +from agentex.lib.core.harness import UnifiedEmitter from agentex.lib.types.fastacp import AsyncACPConfig from agentex.lib.types.tracing import SGPTracingProcessorConfig from agentex.lib.utils.logging import make_logger from agentex.lib.utils.model_utils import BaseModel from agentex.lib.sdk.fastacp.fastacp import FastACP +from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config logger = make_logger(__name__) @@ -66,9 +65,7 @@ class ConversationState(BaseModel): ``history_json`` holds the pydantic-ai message history serialized by ``ModelMessagesTypeAdapter`` — pydantic-ai's official way to round-trip - ``ModelMessage`` objects through JSON. We can't use a plain - ``list[ModelMessage]`` field because ``ModelMessage`` is a discriminated - union of runtime types, not a stable Pydantic schema. + ``ModelMessage`` objects through JSON. """ history_json: str = "[]" @@ -77,11 +74,7 @@ class ConversationState(BaseModel): @acp.on_task_create async def handle_task_create(params: CreateTaskParams): - """Initialize per-task state on task creation. - - A fresh task starts with no message history; the conversation is built - up by ``handle_task_event_send`` on each subsequent user message. - """ + """Initialize per-task state on task creation.""" logger.info(f"Task created: {params.task.id}") await adk.state.create( task_id=params.task.id, @@ -92,7 +85,7 @@ async def handle_task_create(params: CreateTaskParams): @acp.on_task_event_send async def handle_task_event_send(params: SendEventParams): - """Handle each user message: load prior history, run the agent, save updated history.""" + """Handle each user message through the unified auto_send_turn path.""" agent = get_agent() task_id = params.task.id agent_id = params.agent.id @@ -103,9 +96,7 @@ async def handle_task_event_send(params: SendEventParams): # Echo the user's message into the task history. await adk.messages.create(task_id=task_id, content=params.event.content) - # Load the previous conversation history from state. If state is missing - # (e.g. task wasn't initialised via on_task_create), fall back to a fresh - # one so the agent still responds — just without memory of prior turns. + # Load the previous conversation history from state (fall back to fresh). task_state = await adk.state.get_by_task_and_agent(task_id=task_id, agent_id=agent_id) if task_state is None: state = ConversationState() @@ -123,15 +114,15 @@ async def handle_task_event_send(params: SendEventParams): input={"message": user_message}, data={"__span_type__": "AGENT_WORKFLOW"}, ) as turn_span: - tracing_handler = create_pydantic_ai_tracing_handler( + # Construct the UnifiedEmitter from the ACP context so tracing is + # automatic and messages are auto-sent to the task stream (Redis). + emitter = UnifiedEmitter( + task_id=task_id, trace_id=task_id, parent_span_id=turn_span.id if turn_span else None, - task_id=task_id, ) - # Wrap the pydantic-ai event stream so we can capture the final - # AgentRunResultEvent (which carries the full message list for the - # next turn) without changing the streaming-helper's signature. + # Capture the terminal AgentRunResultEvent to persist message history. captured_messages: list[Any] = [] async def tee_messages(upstream) -> AsyncIterator[Any]: @@ -141,9 +132,13 @@ async def tee_messages(upstream) -> AsyncIterator[Any]: yield event async with agent.run_stream_events(user_message, message_history=previous_messages) as stream: - final_output = await stream_pydantic_ai_events( - tee_messages(stream), task_id, tracing_handler=tracing_handler + # The unified auto_send path delivers streamed tool requests natively + # (Start+Delta+Done), so no coalescing workaround is needed. + turn = PydanticAITurn( + tee_messages(stream), + model=MODEL_NAME, ) + result = await emitter.auto_send_turn(turn) # Save the updated message history so the next turn picks up here. if captured_messages: @@ -156,7 +151,7 @@ async def tee_messages(upstream) -> AsyncIterator[Any]: ) if turn_span: - turn_span.output = {"final_output": final_output} + turn_span.output = {"final_output": result.final_text} @acp.on_task_cancel diff --git a/examples/tutorials/10_async/00_base/110_pydantic_ai/project/agent.py b/examples/tutorials/10_async/00_base/110_pydantic_ai/project/agent.py index 2c0f6f10c..e7b764d82 100644 --- a/examples/tutorials/10_async/00_base/110_pydantic_ai/project/agent.py +++ b/examples/tutorials/10_async/00_base/110_pydantic_ai/project/agent.py @@ -1,4 +1,4 @@ -"""Pydantic AI agent definition. +"""Pydantic AI agent definition for the async harness test agent. The Agent is the boundary between this module and the API layer (acp.py). Pydantic AI handles its own tool-call loop internally — no graph required. @@ -12,6 +12,8 @@ from project.tools import get_weather +__all__ = ["create_agent", "MODEL_NAME"] + MODEL_NAME = "openai:gpt-4o-mini" SYSTEM_PROMPT = """You are a helpful AI assistant with access to tools. @@ -29,9 +31,7 @@ def create_agent() -> Agent: """Build and return the Pydantic AI agent with tools registered.""" agent = Agent( MODEL_NAME, - system_prompt=SYSTEM_PROMPT.format( - timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S") - ), + system_prompt=SYSTEM_PROMPT.format(timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S")), ) agent.tool_plain(get_weather) diff --git a/examples/tutorials/10_async/00_base/110_pydantic_ai/project/tools.py b/examples/tutorials/10_async/00_base/110_pydantic_ai/project/tools.py index 98f65d509..0f16a7cb0 100644 --- a/examples/tutorials/10_async/00_base/110_pydantic_ai/project/tools.py +++ b/examples/tutorials/10_async/00_base/110_pydantic_ai/project/tools.py @@ -1,8 +1,8 @@ -"""Tool definitions for the async Pydantic AI agent. +"""Tool definitions for the async harness Pydantic AI agent. Pydantic AI tools are registered directly on the Agent via decorators -(see project.agent). This module hosts the bare functions so they're -easy to unit-test in isolation. +(see project.agent). This module hosts the bare function so it is easy to +unit-test in isolation. """ from __future__ import annotations diff --git a/examples/tutorials/10_async/00_base/110_pydantic_ai/pyproject.toml b/examples/tutorials/10_async/00_base/110_pydantic_ai/pyproject.toml index f5cd32e0a..257918014 100644 --- a/examples/tutorials/10_async/00_base/110_pydantic_ai/pyproject.toml +++ b/examples/tutorials/10_async/00_base/110_pydantic_ai/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "hatchling.build" [project] name = "ab110-pydantic-ai" version = "0.1.0" -description = "An async Pydantic AI agent with tool calling and Redis streaming" +description = "An async Pydantic AI harness test agent using the unified emitter surface" readme = "README.md" requires-python = ">=3.12" dependencies = [ diff --git a/examples/tutorials/10_async/00_base/110_pydantic_ai/tests/test_agent.py b/examples/tutorials/10_async/00_base/110_pydantic_ai/tests/test_agent.py index a31322d30..ce573a697 100644 --- a/examples/tutorials/10_async/00_base/110_pydantic_ai/tests/test_agent.py +++ b/examples/tutorials/10_async/00_base/110_pydantic_ai/tests/test_agent.py @@ -1,8 +1,10 @@ -"""Tests for the async Pydantic AI agent. +"""Live tests for the async Pydantic AI agent. -This test suite validates: -- Non-streaming event sending and polling -- Streaming event sending +These tests require a running agent (server + deployed agent) and exercise the +unified-surface async handler end-to-end over the wire. + +Offline coverage of the same wiring (TestModel + fake streaming/tracing) lives +in the SDK repo under ``tests/lib/core/harness/`` (the pydantic-ai async suite). To run these tests: 1. Make sure the agent is running (via docker-compose or `agentex agents run`) @@ -53,14 +55,12 @@ async def agent_id(client, agent_name): class TestNonStreamingEvents: - """Test non-streaming event sending and polling.""" + """Test non-streaming event sending through the unified auto_send_turn path.""" @pytest.mark.asyncio async def test_send_event(self, client: AsyncAgentex, agent_id: str): - """Test sending an event to the async Pydantic AI agent.""" - task_response = await client.agents.create_task( - agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex) - ) + """Test sending an event to the async harness Pydantic AI agent.""" + task_response = await client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex)) task = task_response.result assert task is not None @@ -77,9 +77,7 @@ async def test_send_event(self, client: AsyncAgentex, agent_id: str): @pytest.mark.asyncio async def test_tool_calling(self, client: AsyncAgentex, agent_id: str): """Test that the agent can use tools (e.g., weather tool).""" - task_response = await client.agents.create_task( - agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex) - ) + task_response = await client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex)) task = task_response.result assert task is not None @@ -100,9 +98,7 @@ class TestStreamingEvents: @pytest.mark.asyncio async def test_send_event_and_stream(self, client: AsyncAgentex, agent_id: str): """Test sending an event and streaming the response.""" - task_response = await client.agents.create_task( - agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex) - ) + task_response = await client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex)) task = task_response.result assert task is not None diff --git a/examples/tutorials/00_sync/harness_pydantic_ai/.dockerignore b/examples/tutorials/10_async/00_base/120_openai_agents/.dockerignore similarity index 100% rename from examples/tutorials/00_sync/harness_pydantic_ai/.dockerignore rename to examples/tutorials/10_async/00_base/120_openai_agents/.dockerignore diff --git a/examples/tutorials/10_async/00_base/harness_langgraph/Dockerfile b/examples/tutorials/10_async/00_base/120_openai_agents/Dockerfile similarity index 70% rename from examples/tutorials/10_async/00_base/harness_langgraph/Dockerfile rename to examples/tutorials/10_async/00_base/120_openai_agents/Dockerfile index 3e0bd696a..76fe0fdef 100644 --- a/examples/tutorials/10_async/00_base/harness_langgraph/Dockerfile +++ b/examples/tutorials/10_async/00_base/120_openai_agents/Dockerfile @@ -23,16 +23,16 @@ RUN uv pip install --system --upgrade pip setuptools wheel ENV UV_HTTP_TIMEOUT=1000 # Copy pyproject.toml and README.md to install dependencies -COPY 10_async/00_base/harness_langgraph/pyproject.toml /app/harness_langgraph/pyproject.toml -COPY 10_async/00_base/harness_langgraph/README.md /app/harness_langgraph/README.md +COPY 10_async/00_base/120_openai_agents/pyproject.toml /app/120_openai_agents/pyproject.toml +COPY 10_async/00_base/120_openai_agents/README.md /app/120_openai_agents/README.md -WORKDIR /app/harness_langgraph +WORKDIR /app/120_openai_agents # Copy the project code -COPY 10_async/00_base/harness_langgraph/project /app/harness_langgraph/project +COPY 10_async/00_base/120_openai_agents/project /app/120_openai_agents/project # Copy the test files -COPY 10_async/00_base/harness_langgraph/tests /app/harness_langgraph/tests +COPY 10_async/00_base/120_openai_agents/tests /app/120_openai_agents/tests # Copy shared test utilities COPY test_utils /app/test_utils @@ -44,7 +44,7 @@ RUN uv pip install --system .[dev] pytest-asyncio httpx ENV PYTHONPATH=/app # Set test environment variables -ENV AGENT_NAME=a-harness-langgraph +ENV AGENT_NAME=ab120-openai-agents # Run the agent using uvicorn CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] diff --git a/examples/tutorials/10_async/00_base/130_harness_openai/README.md b/examples/tutorials/10_async/00_base/120_openai_agents/README.md similarity index 92% rename from examples/tutorials/10_async/00_base/130_harness_openai/README.md rename to examples/tutorials/10_async/00_base/120_openai_agents/README.md index ac439e4ed..0b55b00a2 100644 --- a/examples/tutorials/10_async/00_base/130_harness_openai/README.md +++ b/examples/tutorials/10_async/00_base/120_openai_agents/README.md @@ -5,7 +5,7 @@ delivers its output through the **unified harness surface**. ## What this demonstrates -Same `OpenAITurn` adapter as the sync tutorial (`060_harness_openai`), but the +Same `OpenAITurn` adapter as the sync tutorial (`050_openai_agents`), but the async ACP pushes the turn to the task stream via `UnifiedEmitter.auto_send_turn` instead of yielding over HTTP. `auto_send_turn` returns a `TurnResult` with the accumulated final text and normalized usage. diff --git a/examples/tutorials/10_async/00_base/130_harness_openai/manifest.yaml b/examples/tutorials/10_async/00_base/120_openai_agents/manifest.yaml similarity index 82% rename from examples/tutorials/10_async/00_base/130_harness_openai/manifest.yaml rename to examples/tutorials/10_async/00_base/120_openai_agents/manifest.yaml index 7e67675fa..bd8d5cce5 100644 --- a/examples/tutorials/10_async/00_base/130_harness_openai/manifest.yaml +++ b/examples/tutorials/10_async/00_base/120_openai_agents/manifest.yaml @@ -2,10 +2,10 @@ build: context: root: ../../../ include_paths: - - 10_async/00_base/130_harness_openai + - 10_async/00_base/120_openai_agents - test_utils - dockerfile: 10_async/00_base/130_harness_openai/Dockerfile - dockerignore: 10_async/00_base/130_harness_openai/.dockerignore + dockerfile: 10_async/00_base/120_openai_agents/Dockerfile + dockerignore: 10_async/00_base/120_openai_agents/.dockerignore local_development: agent: @@ -16,7 +16,7 @@ local_development: agent: acp_type: async - name: ab130-harness-openai + name: ab120-openai-agents description: An async OpenAI Agents SDK agent on the unified harness surface temporal: @@ -46,7 +46,7 @@ deployment: global: agent: - name: "ab130-harness-openai" + name: "ab120-openai-agents" description: "An async OpenAI Agents SDK agent on the unified harness surface" replicaCount: 1 resources: diff --git a/examples/tutorials/00_sync/harness_codex/project/__init__.py b/examples/tutorials/10_async/00_base/120_openai_agents/project/__init__.py similarity index 100% rename from examples/tutorials/00_sync/harness_codex/project/__init__.py rename to examples/tutorials/10_async/00_base/120_openai_agents/project/__init__.py diff --git a/examples/tutorials/10_async/00_base/130_harness_openai/project/acp.py b/examples/tutorials/10_async/00_base/120_openai_agents/project/acp.py similarity index 100% rename from examples/tutorials/10_async/00_base/130_harness_openai/project/acp.py rename to examples/tutorials/10_async/00_base/120_openai_agents/project/acp.py diff --git a/examples/tutorials/10_async/00_base/130_harness_openai/project/agent.py b/examples/tutorials/10_async/00_base/120_openai_agents/project/agent.py similarity index 100% rename from examples/tutorials/10_async/00_base/130_harness_openai/project/agent.py rename to examples/tutorials/10_async/00_base/120_openai_agents/project/agent.py diff --git a/examples/tutorials/10_async/00_base/130_harness_openai/project/tools.py b/examples/tutorials/10_async/00_base/120_openai_agents/project/tools.py similarity index 100% rename from examples/tutorials/10_async/00_base/130_harness_openai/project/tools.py rename to examples/tutorials/10_async/00_base/120_openai_agents/project/tools.py diff --git a/examples/tutorials/10_async/00_base/130_harness_openai/pyproject.toml b/examples/tutorials/10_async/00_base/120_openai_agents/pyproject.toml similarity index 95% rename from examples/tutorials/10_async/00_base/130_harness_openai/pyproject.toml rename to examples/tutorials/10_async/00_base/120_openai_agents/pyproject.toml index c05e8c1c6..f48fab49f 100644 --- a/examples/tutorials/10_async/00_base/130_harness_openai/pyproject.toml +++ b/examples/tutorials/10_async/00_base/120_openai_agents/pyproject.toml @@ -3,7 +3,7 @@ requires = ["hatchling"] build-backend = "hatchling.build" [project] -name = "ab130-harness-openai" +name = "ab120-openai-agents" version = "0.1.0" description = "An async OpenAI Agents SDK agent on the unified harness surface" readme = "README.md" diff --git a/examples/tutorials/10_async/00_base/130_harness_openai/tests/test_agent.py b/examples/tutorials/10_async/00_base/120_openai_agents/tests/test_agent.py similarity index 100% rename from examples/tutorials/10_async/00_base/130_harness_openai/tests/test_agent.py rename to examples/tutorials/10_async/00_base/120_openai_agents/tests/test_agent.py diff --git a/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/Dockerfile b/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/Dockerfile deleted file mode 100644 index 1272027cf..000000000 --- a/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/Dockerfile +++ /dev/null @@ -1,50 +0,0 @@ -# syntax=docker/dockerfile:1.3 -FROM python:3.12-slim -COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ - -# Install system dependencies -RUN apt-get update && apt-get install -y \ - htop \ - vim \ - curl \ - tar \ - python3-dev \ - postgresql-client \ - build-essential \ - libpq-dev \ - gcc \ - cmake \ - netcat-openbsd \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - -RUN uv pip install --system --upgrade pip setuptools wheel - -ENV UV_HTTP_TIMEOUT=1000 - -# Copy pyproject.toml and README.md to install dependencies -COPY 10_async/00_base/120_openai_agents_local_sandbox/pyproject.toml /app/120_openai_agents_local_sandbox/pyproject.toml -COPY 10_async/00_base/120_openai_agents_local_sandbox/README.md /app/120_openai_agents_local_sandbox/README.md - -WORKDIR /app/120_openai_agents_local_sandbox - -# Copy the project code -COPY 10_async/00_base/120_openai_agents_local_sandbox/project /app/120_openai_agents_local_sandbox/project - -# Copy the test files -COPY 10_async/00_base/120_openai_agents_local_sandbox/tests /app/120_openai_agents_local_sandbox/tests - -# Copy shared test utilities -COPY test_utils /app/test_utils - -# Install the required Python packages with dev dependencies -RUN uv pip install --system .[dev] pytest-asyncio httpx - -# Set environment variables -ENV PYTHONPATH=/app - -# Set test environment variables -ENV AGENT_NAME=ab120-openai-agents-local-sandbox - -# Run the agent using uvicorn -CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] diff --git a/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/README.md b/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/README.md deleted file mode 100644 index 58d422b39..000000000 --- a/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/README.md +++ /dev/null @@ -1,119 +0,0 @@ -# Tutorial 120: Async OpenAI Agents SDK with a Local Sandbox - -This tutorial demonstrates how to build an **async (non-Temporal)** agent on AgentEx -using the [OpenAI Agents SDK](https://developers.openai.com/api/docs/guides/agents) -and its **sandbox** runtime, running with the **local** (`unix_local`) backend. - -The agent is a "local sandbox assistant": it answers questions by actually running -real shell commands (e.g. `python3 --version`, `ls /tmp`, `python3 -c "..."`) -instead of guessing. - -This mirrors the Pydantic AI async tutorial (`110_pydantic_ai`): same async ACP -model (`acp_type: async`, `temporal.enabled: false`), same per-task `adk.state` -multi-turn memory pattern. The difference is the runtime — here we use the OpenAI -Agents SDK `SandboxAgent` with the local sandbox backend. - -## Key Concepts - -### Async ACP (base) -The async ACP model is event-driven: `on_task_create` initializes per-task state, -and `on_task_event_send` handles each user message. Conversation history is -persisted across turns via `adk.state`. - -### OpenAI Agents SDK Sandbox -The OpenAI Agents SDK ships `agents.sandbox`, which lets you give an agent -**capabilities** (instead of hand-written tools) that the runtime turns into real -tools backed by a sandbox: - -- **`SandboxAgent`**: an `Agent` that is granted sandbox capabilities. -- **Capabilities** (`from agents.sandbox.capabilities import Shell, Filesystem, Memory`): - each capability expands into a set of real tools. This tutorial uses `Shell`, which - lets the model run real shell commands. -- **`SandboxRunConfig`** + a sandbox **client**: tells the runtime *where* the tools - actually execute. - -### The LOCAL sandbox (`UnixLocalSandboxClient`) -This tutorial uses the local backend -(`from agents.sandbox.sandboxes.unix_local import UnixLocalSandboxClient, UnixLocalSandboxClientOptions`), -`backend_id="unix_local"`. The local sandbox runs shell commands **ON THE HOST** — -the agent's own container/process. There is **no Docker, no Temporal, and no remote -sandbox infrastructure** involved. - -The sandbox is wired up through the SDK's `RunConfig`: - -```python -from agents import Runner, set_tracing_disabled -from agents.run_config import RunConfig -from agents.sandbox import SandboxAgent, SandboxRunConfig -from agents.sandbox.capabilities import Shell -from agents.sandbox.sandboxes.unix_local import ( - UnixLocalSandboxClient, - UnixLocalSandboxClientOptions, -) - -set_tracing_disabled(True) # avoid api.openai.com tracing 401 behind a gateway - -agent = SandboxAgent( - name="Local Sandbox Assistant", - instructions="...use the shell tools to actually run commands...", - capabilities=[Shell()], -) -run_config = RunConfig( - sandbox=SandboxRunConfig( - client=UnixLocalSandboxClient(), - options=UnixLocalSandboxClientOptions(), - ) -) -result = await Runner.run(agent, input=input_list, run_config=run_config) -print(result.final_output) -``` - -`Runner.run` drives the full tool-call loop internally: the model issues shell -commands, the local sandbox runs them on the host, the output is fed back, and the -loop continues until the model produces a final answer. Because the loop is -self-contained, the async handler runs the agent and persists a single final -`TextContent` rather than streaming tokens. - -## Files - -| File | Description | -|------|-------------| -| `project/acp.py` | Async ACP server + handlers (`adk.state` multi-turn, runs the sandbox agent) | -| `project/agent.py` | `SandboxAgent` + `RunConfig(sandbox=...)` wiring + `run_agent` | -| `project/tools.py` | Sandbox capability factory (`Shell`) | -| `tests/test_agent.py` | Integration tests (polling pattern) | -| `manifest.yaml` | Agent configuration | - -## Running Locally - -```bash -# From this directory -agentex agents run -``` - -Set `OPENAI_API_KEY` (or `LITELLM_API_KEY` if you're behind the Scale LiteLLM -gateway) in your environment or in a `.env` file in `project/` so the agent can call -the model. - -## Running Tests - -```bash -pytest tests/test_agent.py -v -``` - -## Notes - -- **No infra required.** Because this uses the `unix_local` backend, the shell tools - run directly in the agent's process — no Docker daemon, no Temporal, no remote - sandbox. Swap the client for a remote/containerized backend to isolate execution. -- **Tracing.** `set_tracing_disabled(True)` turns off the OpenAI Agents SDK's native - tracer (which would otherwise try to ship traces to `api.openai.com`). The manifest - also sets `OPENAI_AGENTS_DISABLE_TRACING=1`. AgentEx/SGP tracing still runs via the - tracing manager configured in `acp.py` when SGP credentials are present. -- **Capabilities are the tools.** To let the agent do more, add capabilities in - `project/tools.py` (e.g. `Filesystem()`, `Memory()`). - -## Further Reading - -- OpenAI Agents SDK guide: https://developers.openai.com/api/docs/guides/agents -- The Temporal variant of this tutorial: `10_async/10_temporal/120_openai_agents_local_sandbox` diff --git a/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/manifest.yaml b/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/manifest.yaml deleted file mode 100644 index e0c3c0596..000000000 --- a/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/manifest.yaml +++ /dev/null @@ -1,61 +0,0 @@ -build: - context: - root: ../../../ - include_paths: - - 10_async/00_base/120_openai_agents_local_sandbox - - test_utils - dockerfile: 10_async/00_base/120_openai_agents_local_sandbox/Dockerfile - dockerignore: 10_async/00_base/120_openai_agents_local_sandbox/.dockerignore - -local_development: - agent: - port: 8000 - host_address: host.docker.internal - paths: - acp: project/acp.py - -agent: - acp_type: async - name: ab120-openai-agents-local-sandbox - description: An async OpenAI Agents SDK agent using a local (unix_local) sandbox - - temporal: - enabled: false - - credentials: - - env_var_name: OPENAI_API_KEY - secret_name: openai-api-key - secret_key: api-key - - env_var_name: REDIS_URL - secret_name: redis-url-secret - secret_key: url - - env_var_name: SGP_API_KEY - secret_name: sgp-api-key - secret_key: api-key - - env_var_name: SGP_ACCOUNT_ID - secret_name: sgp-account-id - secret_key: account-id - - env_var_name: SGP_CLIENT_BASE_URL - secret_name: sgp-client-base-url - secret_key: url - - env: - OPENAI_AGENTS_DISABLE_TRACING: "1" - -deployment: - image: - repository: "" - tag: "latest" - - global: - agent: - name: "ab120-openai-agents-local-sandbox" - description: "An async OpenAI Agents SDK agent using a local (unix_local) sandbox" - replicaCount: 1 - resources: - requests: - cpu: "500m" - memory: "1Gi" - limits: - cpu: "1000m" - memory: "2Gi" diff --git a/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/project/acp.py b/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/project/acp.py deleted file mode 100644 index 6ff475873..000000000 --- a/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/project/acp.py +++ /dev/null @@ -1,149 +0,0 @@ -"""ACP handler for the async OpenAI Agents SDK local-sandbox agent. - -Uses the async ACP model (``acp_type: async``, ``temporal.enabled: false``), -mirroring the Pydantic AI tutorial (110). The difference is the runtime: here we -run an OpenAI Agents SDK ``SandboxAgent`` against the **local** sandbox backend -(``UnixLocalSandboxClient``), which executes real shell commands on the host. - -The OpenAI Agents SDK sandbox runtime drives the full tool-call loop internally -inside ``Runner.run`` (model -> shell command -> output -> model -> ... -> final -answer), so this handler runs the agent and persists a single final -``TextContent`` rather than streaming tokens itself. - -Multi-turn memory is persisted via ``adk.state``: on each turn we load the prior -OpenAI Agents SDK input list from state, run the agent with it, then save the -updated list (``result.to_input_list()``) back. Without this, every turn would be -a fresh stateless run and the agent would forget the prior conversation. -""" - -from __future__ import annotations - -import os -from typing import Any - -from dotenv import load_dotenv - -load_dotenv() - -import agentex.lib.adk as adk -from project.agent import run_agent -from agentex.lib.types.acp import SendEventParams, CancelTaskParams, CreateTaskParams -from agentex.lib.types.fastacp import AsyncACPConfig -from agentex.lib.types.tracing import SGPTracingProcessorConfig -from agentex.lib.utils.logging import make_logger -from agentex.types.text_content import TextContent -from agentex.lib.utils.model_utils import BaseModel -from agentex.lib.sdk.fastacp.fastacp import FastACP -from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config - -logger = make_logger(__name__) - -# LiteLLM proxy auth: copy LITELLM_API_KEY to OPENAI_API_KEY for OpenAI client -# compatibility, so the same example works behind the Scale LiteLLM gateway. -_litellm_key = os.environ.get("LITELLM_API_KEY") -if _litellm_key and not os.environ.get("OPENAI_API_KEY"): - os.environ["OPENAI_API_KEY"] = _litellm_key - -add_tracing_processor_config( - SGPTracingProcessorConfig( - sgp_api_key=os.environ.get("SGP_API_KEY", ""), - sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""), - sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""), - ) -) - -acp = FastACP.create( - acp_type="async", - config=AsyncACPConfig(type="base"), -) - - -class ConversationState(BaseModel): - """Per-task conversation state persisted via ``adk.state``. - - ``input_list`` holds the OpenAI Agents SDK conversation history — the same - structure ``Runner.run`` accepts as input and ``result.to_input_list()`` - returns. Persisting it between turns gives the agent multi-turn memory. - """ - - input_list: list[dict[str, Any]] = [] - turn_number: int = 0 - - -@acp.on_task_create -async def handle_task_create(params: CreateTaskParams): - """Initialize per-task state on task creation. - - A fresh task starts with no message history; the conversation is built up by - ``handle_task_event_send`` on each subsequent user message. - """ - logger.info(f"Task created: {params.task.id}") - await adk.state.create( - task_id=params.task.id, - agent_id=params.agent.id, - state=ConversationState(), - ) - - -@acp.on_task_event_send -async def handle_task_event_send(params: SendEventParams): - """Handle each user message: load prior history, run the agent, save updated history.""" - task_id = params.task.id - agent_id = params.agent.id - user_message = params.event.content.content - - logger.info(f"Processing message for thread {task_id}") - - # Echo the user's message into the task history so it shows up in the UI. - await adk.messages.create(task_id=task_id, content=params.event.content) - - # Load the previous conversation history from state. If state is missing - # (e.g. task wasn't initialised via on_task_create), fall back to a fresh - # one so the agent still responds — just without memory of prior turns. - task_state = await adk.state.get_by_task_and_agent(task_id=task_id, agent_id=agent_id) - if task_state is None: - state = ConversationState() - task_state = await adk.state.create(task_id=task_id, agent_id=agent_id, state=state) - else: - state = ConversationState.model_validate(task_state.state) - - state.turn_number += 1 - state.input_list.append({"role": "user", "content": user_message}) - - async with adk.tracing.span( - trace_id=task_id, - task_id=task_id, - name=f"Turn {state.turn_number}", - input={"message": user_message}, - data={"__span_type__": "AGENT_WORKFLOW"}, - ) as turn_span: - # The OpenAI Agents SDK sandbox runtime runs the full tool-call loop - # internally (model -> shell command on the local host -> output -> - # model -> ... -> final answer), so we get a single final result. - result = await run_agent(state.input_list) - final_output = result.final_output - - # Persist the assistant's final answer as a TaskMessage so it shows up - # in the UI. (Unlike the streaming Pydantic AI tutorial, the sandbox run - # is non-streaming, so we post the final text ourselves.) - await adk.messages.create( - task_id=task_id, - content=TextContent(author="agent", content=final_output), - ) - - # Save the updated message history so the next turn picks up here. - state.input_list = result.to_input_list() - await adk.state.update( - state_id=task_state.id, - task_id=task_id, - agent_id=agent_id, - state=state, - ) - - if turn_span: - turn_span.output = {"final_output": final_output} - - -@acp.on_task_cancel -async def handle_task_canceled(params: CancelTaskParams): - logger.info(f"Task canceled: {params.task.id}") diff --git a/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/project/agent.py b/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/project/agent.py deleted file mode 100644 index 177bb287d..000000000 --- a/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/project/agent.py +++ /dev/null @@ -1,95 +0,0 @@ -"""OpenAI Agents SDK local-sandbox agent definition (async, non-Temporal). - -This mirrors the Pydantic AI tutorial (110): the agent is the boundary between -this module and the API layer (acp.py). The difference is the runtime — here we -use the OpenAI Agents SDK ``SandboxAgent`` together with the **local** sandbox -backend (``UnixLocalSandboxClient``). - -The local sandbox runs shell commands ON THE HOST — the agent's own -container/process. There is no Docker, no Temporal, and no remote sandbox -infrastructure. The OpenAI Agents SDK runs its own tool-call loop internally: -when the model decides to run a shell command, the sandbox executes it locally -and feeds the output back to the model until it produces a final answer. -""" - -from __future__ import annotations - -from datetime import datetime - -from agents import Runner, set_tracing_disabled -from agents.sandbox import SandboxAgent, SandboxRunConfig -from agents.run_config import RunConfig -from agents.sandbox.sandboxes.unix_local import ( - UnixLocalSandboxClient, - UnixLocalSandboxClientOptions, -) - -from project.tools import get_capabilities - -# Disable the openai-agents SDK's native tracer so it doesn't ship traces to -# api.openai.com using OPENAI_API_KEY (which may be a gateway/proxy key and would -# 401). Agentex tracing still runs via the tracing manager configured in acp.py. -set_tracing_disabled(True) - -MODEL_NAME = "gpt-4o-mini" -INSTRUCTIONS = """You are a local sandbox assistant. - -Current date and time: {timestamp} - -You have access to shell tools that run real commands on the local machine. - -Guidelines: -- ALWAYS use the shell tools to actually run commands — never guess or make up - output. If the user asks for the Python version, run `python3 --version`. If - they ask to list files, run `ls`. If they ask you to compute something, use - `python3 -c "..."`. -- Run the minimal command(s) needed to answer the question. -- Report the real command output back to the user, concisely. -""" - - -def create_agent() -> SandboxAgent: - """Build and return the OpenAI Agents SDK sandbox agent. - - The agent is granted shell capabilities (see ``project.tools``). The actual - sandbox backend (where the shell commands run) is supplied at run time via - the ``RunConfig`` returned by ``create_run_config``. - """ - return SandboxAgent( - name="Local Sandbox Assistant", - model=MODEL_NAME, - instructions=INSTRUCTIONS.format( - timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S") - ), - capabilities=get_capabilities(), - ) - - -def create_run_config() -> RunConfig: - """Build the RunConfig that points the agent at the LOCAL sandbox backend. - - ``UnixLocalSandboxClient`` (backend_id="unix_local") runs shell commands on - the host — the agent's own process — so no Docker or remote infra is needed. - """ - return RunConfig( - sandbox=SandboxRunConfig( - client=UnixLocalSandboxClient(), - options=UnixLocalSandboxClientOptions(), - ) - ) - - -async def run_agent(input_list: list) -> "Runner": - """Run the sandbox agent over the conversation so far and return the result. - - The OpenAI Agents SDK handles the full tool-call loop internally: the model - issues shell commands, the local sandbox runs them on the host, and the - output is fed back until the model produces a final answer. - - We pass the full ``input_list`` (prior turns + the new user message) so the - agent has conversation memory across turns; the caller persists - ``result.to_input_list()`` back into ``adk.state`` for the next turn. - """ - agent = create_agent() - run_config = create_run_config() - return await Runner.run(agent, input=input_list, run_config=run_config, max_turns=10) diff --git a/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/project/tools.py b/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/project/tools.py deleted file mode 100644 index a931fa273..000000000 --- a/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/project/tools.py +++ /dev/null @@ -1,29 +0,0 @@ -"""Sandbox capabilities for the async OpenAI Agents SDK local-sandbox agent. - -Unlike the Pydantic AI tutorial (110), this agent does not register hand-written -Python functions as tools. Instead it is given *capabilities* — the OpenAI Agents -SDK sandbox runtime turns each capability into a real set of tools (run a shell -command, read a file, etc.) backed by an actual sandbox backend. - -Here we use the ``Shell`` capability, which lets the model run real shell commands. -With the local (``unix_local``) backend those commands execute ON THE HOST — the -agent's own process/container — so there is no Docker, Temporal, or remote infra -involved. This module hosts the capability factory so the agent wiring in -``project.agent`` stays readable and the capability set is easy to extend -(e.g. add ``Filesystem()`` or ``Memory()``). -""" - -from __future__ import annotations - -from agents.sandbox.capabilities import Shell - - -def get_capabilities() -> list: - """Return the sandbox capabilities the agent is allowed to use. - - Returns: - A list of OpenAI Agents SDK sandbox capabilities. We grant ``Shell`` so - the agent can run real shell commands on the local machine. Add - ``Filesystem()`` or ``Memory()`` here to expand what the agent can do. - """ - return [Shell()] diff --git a/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/pyproject.toml b/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/pyproject.toml deleted file mode 100644 index 75c6254f3..000000000 --- a/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/pyproject.toml +++ /dev/null @@ -1,36 +0,0 @@ -[build-system] -requires = ["hatchling"] -build-backend = "hatchling.build" - -[project] -name = "ab120-openai-agents-local-sandbox" -version = "0.1.0" -description = "An async OpenAI Agents SDK agent using a local (unix_local) sandbox" -readme = "README.md" -requires-python = ">=3.12" -dependencies = [ - "agentex-sdk", - "scale-gp", - "openai-agents>=0.14.3,<0.15", -] - -[project.optional-dependencies] -dev = [ - "pytest", - "pytest-asyncio", - "httpx", - "black", - "isort", - "flake8", -] - -[tool.hatch.build.targets.wheel] -packages = ["project"] - -[tool.black] -line-length = 88 -target-version = ['py312'] - -[tool.isort] -profile = "black" -line_length = 88 diff --git a/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/tests/test_agent.py b/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/tests/test_agent.py deleted file mode 100644 index 0c7904eac..000000000 --- a/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/tests/test_agent.py +++ /dev/null @@ -1,122 +0,0 @@ -"""Tests for the async OpenAI Agents SDK local-sandbox agent. - -This test suite validates that the agent actually runs shell commands in the -LOCAL sandbox (unix_local backend) by polling for the agent's response: -- Ask for the Python version -> response contains "Python 3" -- Ask it to compute 21 * 2 with python3 -> response contains "42" - -To run these tests: -1. Make sure the agent is running (via docker-compose or `agentex agents run`) -2. Set the AGENTEX_API_BASE_URL environment variable if not using default -3. Run: pytest test_agent.py -v - -Configuration: -- AGENTEX_API_BASE_URL: Base URL for the AgentEx server (default: http://localhost:5003) -- AGENT_NAME: Name of the agent to test (default: ab120-openai-agents-local-sandbox) -""" - -import os -import uuid - -import pytest -import pytest_asyncio -from test_utils.async_utils import send_event_and_poll_yielding - -from agentex import AsyncAgentex -from agentex.types.agent_rpc_params import ParamsCreateTaskRequest - -AGENTEX_API_BASE_URL = os.environ.get("AGENTEX_API_BASE_URL", "http://localhost:5003") -AGENT_NAME = os.environ.get("AGENT_NAME", "ab120-openai-agents-local-sandbox") - - -@pytest_asyncio.fixture -async def client(): - """Create an AsyncAgentex client instance for testing.""" - client = AsyncAgentex(base_url=AGENTEX_API_BASE_URL) - yield client - await client.close() - - -@pytest.fixture -def agent_name(): - """Return the agent name for testing.""" - return AGENT_NAME - - -@pytest_asyncio.fixture -async def agent_id(client, agent_name): - """Retrieve the agent ID based on the agent name.""" - agents = await client.agents.list() - for agent in agents: - if agent.name == agent_name: - return agent.id - raise ValueError(f"Agent with name {agent_name} not found.") - - -async def _send_and_collect_agent_text( - client: AsyncAgentex, agent_id: str, task_id: str, user_message: str -) -> str: - """Send a user message and accumulate all agent text responses into a string.""" - parts: list[str] = [] - async for message in send_event_and_poll_yielding( - client=client, - agent_id=agent_id, - task_id=task_id, - user_message=user_message, - timeout=60, - sleep_interval=1.0, - yield_updates=True, - ): - content = message.content - if content and content.type == "text" and content.author == "agent": - if content.content and content.content not in parts: - parts.append(content.content) - return "\n".join(parts) - - -class TestLocalSandboxEvents: - """Test the async local-sandbox OpenAI Agents SDK agent.""" - - @pytest.mark.asyncio - async def test_shell_python_version(self, client: AsyncAgentex, agent_id: str): - """The agent should run `python3 --version` in the local sandbox. - - The sandbox runs on Python 3.12, so the real output contains "Python 3". - """ - task_response = await client.agents.create_task( - agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex) - ) - task = task_response.result - assert task is not None - - text = await _send_and_collect_agent_text( - client, - agent_id, - task.id, - "Use your shell to print the Python version on this machine, then " - "tell me what it is.", - ) - assert text, "Expected a non-empty response from the sandbox agent." - assert "Python 3" in text - - @pytest.mark.asyncio - async def test_shell_compute(self, client: AsyncAgentex, agent_id: str): - """The agent should use python3 in the sandbox to compute 21 * 2 == 42.""" - task_response = await client.agents.create_task( - agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex) - ) - task = task_response.result - assert task is not None - - text = await _send_and_collect_agent_text( - client, - agent_id, - task.id, - "Use python3 in your shell to compute 21 * 2 and tell me the result.", - ) - assert text, "Expected a non-empty response from the sandbox agent." - assert "42" in text - - -if __name__ == "__main__": - pytest.main([__file__, "-v"]) diff --git a/examples/tutorials/10_async/00_base/130_harness_openai/Dockerfile b/examples/tutorials/10_async/00_base/130_harness_openai/Dockerfile deleted file mode 100644 index a31c89a31..000000000 --- a/examples/tutorials/10_async/00_base/130_harness_openai/Dockerfile +++ /dev/null @@ -1,50 +0,0 @@ -# syntax=docker/dockerfile:1.3 -FROM python:3.12-slim -COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ - -# Install system dependencies -RUN apt-get update && apt-get install -y \ - htop \ - vim \ - curl \ - tar \ - python3-dev \ - postgresql-client \ - build-essential \ - libpq-dev \ - gcc \ - cmake \ - netcat-openbsd \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - -RUN uv pip install --system --upgrade pip setuptools wheel - -ENV UV_HTTP_TIMEOUT=1000 - -# Copy pyproject.toml and README.md to install dependencies -COPY 10_async/00_base/130_harness_openai/pyproject.toml /app/130_harness_openai/pyproject.toml -COPY 10_async/00_base/130_harness_openai/README.md /app/130_harness_openai/README.md - -WORKDIR /app/130_harness_openai - -# Copy the project code -COPY 10_async/00_base/130_harness_openai/project /app/130_harness_openai/project - -# Copy the test files -COPY 10_async/00_base/130_harness_openai/tests /app/130_harness_openai/tests - -# Copy shared test utilities -COPY test_utils /app/test_utils - -# Install the required Python packages with dev dependencies -RUN uv pip install --system .[dev] pytest-asyncio httpx - -# Set environment variables -ENV PYTHONPATH=/app - -# Set test environment variables -ENV AGENT_NAME=ab130-harness-openai - -# Run the agent using uvicorn -CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] diff --git a/examples/tutorials/10_async/00_base/130_harness_openai/project/__init__.py b/examples/tutorials/10_async/00_base/130_harness_openai/project/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/.dockerignore b/examples/tutorials/10_async/00_base/140_codex/.dockerignore similarity index 100% rename from examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/.dockerignore rename to examples/tutorials/10_async/00_base/140_codex/.dockerignore diff --git a/examples/tutorials/10_async/00_base/harness_codex/Dockerfile b/examples/tutorials/10_async/00_base/140_codex/Dockerfile similarity index 64% rename from examples/tutorials/10_async/00_base/harness_codex/Dockerfile rename to examples/tutorials/10_async/00_base/140_codex/Dockerfile index 06b76aae2..ca5b99ffe 100644 --- a/examples/tutorials/10_async/00_base/harness_codex/Dockerfile +++ b/examples/tutorials/10_async/00_base/140_codex/Dockerfile @@ -22,18 +22,18 @@ RUN uv pip install --system --upgrade pip setuptools wheel ENV UV_HTTP_TIMEOUT=1000 -COPY 10_async/00_base/harness_codex/pyproject.toml /app/harness_codex/pyproject.toml -COPY 10_async/00_base/harness_codex/README.md /app/harness_codex/README.md +COPY 10_async/00_base/140_codex/pyproject.toml /app/140_codex/pyproject.toml +COPY 10_async/00_base/140_codex/README.md /app/140_codex/README.md -WORKDIR /app/harness_codex +WORKDIR /app/140_codex -COPY 10_async/00_base/harness_codex/project /app/harness_codex/project -COPY 10_async/00_base/harness_codex/tests /app/harness_codex/tests +COPY 10_async/00_base/140_codex/project /app/140_codex/project +COPY 10_async/00_base/140_codex/tests /app/140_codex/tests COPY test_utils /app/test_utils RUN uv pip install --system .[dev] ENV PYTHONPATH=/app -ENV AGENT_NAME=ab-harness-codex +ENV AGENT_NAME=ab140-codex CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] diff --git a/examples/tutorials/10_async/00_base/harness_codex/README.md b/examples/tutorials/10_async/00_base/140_codex/README.md similarity index 94% rename from examples/tutorials/10_async/00_base/harness_codex/README.md rename to examples/tutorials/10_async/00_base/140_codex/README.md index 9bbcd927a..a00ddb562 100644 --- a/examples/tutorials/10_async/00_base/harness_codex/README.md +++ b/examples/tutorials/10_async/00_base/140_codex/README.md @@ -1,4 +1,4 @@ -# harness_codex (async base) +# 140_codex (async base) Tutorial agent demonstrating the `convert_codex_to_agentex_events` tap, `CodexTurn`, and `UnifiedEmitter` for an **async** (Redis-streaming, no Temporal) @@ -28,7 +28,7 @@ Live runs require: ```bash cd /path/to/scale-agentex-python -uv run --all-packages --all-extras pytest examples/tutorials/10_async/00_base/harness_codex/tests/test_agent.py -q +uv run --all-packages --all-extras pytest examples/tutorials/10_async/00_base/140_codex/tests/test_agent.py -q ``` ## Running live integration tests diff --git a/examples/tutorials/10_async/00_base/harness_codex/conftest.py b/examples/tutorials/10_async/00_base/140_codex/conftest.py similarity index 100% rename from examples/tutorials/10_async/00_base/harness_codex/conftest.py rename to examples/tutorials/10_async/00_base/140_codex/conftest.py diff --git a/examples/tutorials/10_async/00_base/harness_codex/manifest.yaml b/examples/tutorials/10_async/00_base/140_codex/manifest.yaml similarity index 84% rename from examples/tutorials/10_async/00_base/harness_codex/manifest.yaml rename to examples/tutorials/10_async/00_base/140_codex/manifest.yaml index e88e2029d..be020b141 100644 --- a/examples/tutorials/10_async/00_base/harness_codex/manifest.yaml +++ b/examples/tutorials/10_async/00_base/140_codex/manifest.yaml @@ -2,10 +2,10 @@ build: context: root: ../../../ include_paths: - - 10_async/00_base/harness_codex + - 10_async/00_base/140_codex - test_utils - dockerfile: 10_async/00_base/harness_codex/Dockerfile - dockerignore: 10_async/00_base/harness_codex/.dockerignore + dockerfile: 10_async/00_base/140_codex/Dockerfile + dockerignore: 10_async/00_base/140_codex/.dockerignore local_development: agent: @@ -16,7 +16,7 @@ local_development: agent: acp_type: async - name: ab-harness-codex + name: ab140-codex description: Async (base) tutorial agent driving the unified harness surface via local codex CLI subprocess temporal: @@ -46,7 +46,7 @@ deployment: global: agent: - name: "ab-harness-codex" + name: "ab140-codex" description: "Async (base) tutorial agent driving the unified harness surface via local codex CLI subprocess" replicaCount: 1 resources: diff --git a/examples/tutorials/00_sync/harness_langgraph/project/__init__.py b/examples/tutorials/10_async/00_base/140_codex/project/__init__.py similarity index 100% rename from examples/tutorials/00_sync/harness_langgraph/project/__init__.py rename to examples/tutorials/10_async/00_base/140_codex/project/__init__.py diff --git a/examples/tutorials/10_async/00_base/harness_codex/project/acp.py b/examples/tutorials/10_async/00_base/140_codex/project/acp.py similarity index 100% rename from examples/tutorials/10_async/00_base/harness_codex/project/acp.py rename to examples/tutorials/10_async/00_base/140_codex/project/acp.py diff --git a/examples/tutorials/10_async/00_base/harness_codex/pyproject.toml b/examples/tutorials/10_async/00_base/140_codex/pyproject.toml similarity index 96% rename from examples/tutorials/10_async/00_base/harness_codex/pyproject.toml rename to examples/tutorials/10_async/00_base/140_codex/pyproject.toml index c25a65c47..bdf7c462f 100644 --- a/examples/tutorials/10_async/00_base/harness_codex/pyproject.toml +++ b/examples/tutorials/10_async/00_base/140_codex/pyproject.toml @@ -3,7 +3,7 @@ requires = ["hatchling"] build-backend = "hatchling.build" [project] -name = "ab-harness-codex" +name = "ab140-codex" version = "0.1.0" description = "Async (base) tutorial agent driving the unified harness surface via local codex CLI subprocess" readme = "README.md" diff --git a/examples/tutorials/10_async/00_base/harness_codex/tests/test_agent.py b/examples/tutorials/10_async/00_base/140_codex/tests/test_agent.py similarity index 99% rename from examples/tutorials/10_async/00_base/harness_codex/tests/test_agent.py rename to examples/tutorials/10_async/00_base/140_codex/tests/test_agent.py index b50ee9116..68ca5aded 100644 --- a/examples/tutorials/10_async/00_base/harness_codex/tests/test_agent.py +++ b/examples/tutorials/10_async/00_base/140_codex/tests/test_agent.py @@ -129,7 +129,7 @@ async def test_yield_turn_is_passthrough(self): LIVE = os.environ.get("CODEX_LIVE_TESTS", "") == "1" AGENTEX_API_BASE_URL = os.environ.get("AGENTEX_API_BASE_URL", "http://localhost:5003") -AGENT_NAME = os.environ.get("AGENT_NAME", "ab-harness-codex") +AGENT_NAME = os.environ.get("AGENT_NAME", "ab140-codex") @pytest.mark.skipif( diff --git a/examples/tutorials/10_async/00_base/harness_codex/project/__init__.py b/examples/tutorials/10_async/00_base/harness_codex/project/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/examples/tutorials/10_async/00_base/harness_langgraph/README.md b/examples/tutorials/10_async/00_base/harness_langgraph/README.md deleted file mode 100644 index 7efe28207..000000000 --- a/examples/tutorials/10_async/00_base/harness_langgraph/README.md +++ /dev/null @@ -1,57 +0,0 @@ -# Tutorial: Async Harness LangGraph Agent - -This tutorial demonstrates how to build an **async** LangGraph agent on AgentEx -using the **unified harness surface**: - -```python -turn = LangGraphTurn(stream, model=None) -emitter = UnifiedEmitter(task_id=task_id, trace_id=task_id, ...) -result = await emitter.auto_send_turn(turn) -``` - -Compare with ``100_langgraph``, which uses the bespoke -``stream_langgraph_events`` helper directly. - -## Key Concepts - -### Unified Harness - -`LangGraphTurn` implements the `HarnessTurn` protocol: it wraps the raw -LangGraph `astream()` generator and exposes `events` (an async generator of -`TaskMessageUpdate`) and `usage()` (token counts captured from the final -`AIMessage`). - -`UnifiedEmitter.auto_send_turn(turn)` pushes each event to Redis via -`streaming_task_message_context`, accumulates the final text, and returns a -`TurnResult(final_text=..., usage=...)`. - -The same `LangGraphTurn` object can also be passed to -`UnifiedEmitter.yield_turn` in the sync channel. - -### AGX1-377 Note - -LangGraph emits tool requests as `StreamTaskMessageFull` events (from "updates" -node outputs). The `SpanDeriver` does not open tool spans from Full events -today; that gap is tracked in AGX1-373. - -## Files - -| File | Description | -|------|-------------| -| `project/acp.py` | ACP server using unified harness (LangGraphTurn + auto_send_turn) | -| `project/graph.py` | LangGraph state graph (identical to 100_langgraph) | -| `project/tools.py` | Tool definitions (weather example) | -| `tests/test_agent.py` | Integration tests | -| `manifest.yaml` | Agent configuration (name: a-harness-langgraph) | - -## Running Locally - -```bash -agentex agents run -``` - -## Running Tests - -```bash -pytest tests/test_agent.py -v -``` diff --git a/examples/tutorials/10_async/00_base/harness_langgraph/manifest.yaml b/examples/tutorials/10_async/00_base/harness_langgraph/manifest.yaml deleted file mode 100644 index bb19e25b3..000000000 --- a/examples/tutorials/10_async/00_base/harness_langgraph/manifest.yaml +++ /dev/null @@ -1,58 +0,0 @@ -build: - context: - root: ../../../ - include_paths: - - 10_async/00_base/harness_langgraph - - test_utils - dockerfile: 10_async/00_base/harness_langgraph/Dockerfile - dockerignore: 10_async/00_base/harness_langgraph/.dockerignore - -local_development: - agent: - port: 8000 - host_address: host.docker.internal - paths: - acp: project/acp.py - -agent: - acp_type: async - name: a-harness-langgraph - description: An async LangGraph agent using the unified harness surface (LangGraphTurn + UnifiedEmitter.auto_send_turn) - - temporal: - enabled: false - - credentials: - - env_var_name: OPENAI_API_KEY - secret_name: openai-api-key - secret_key: api-key - - env_var_name: REDIS_URL - secret_name: redis-url-secret - secret_key: url - - env_var_name: SGP_API_KEY - secret_name: sgp-api-key - secret_key: api-key - - env_var_name: SGP_ACCOUNT_ID - secret_name: sgp-account-id - secret_key: account-id - - env_var_name: SGP_CLIENT_BASE_URL - secret_name: sgp-client-base-url - secret_key: url - -deployment: - image: - repository: "" - tag: "latest" - - global: - agent: - name: "a-harness-langgraph" - description: "An async LangGraph agent using the unified harness surface" - replicaCount: 1 - resources: - requests: - cpu: "500m" - memory: "1Gi" - limits: - cpu: "1000m" - memory: "2Gi" diff --git a/examples/tutorials/10_async/00_base/harness_langgraph/project/__init__.py b/examples/tutorials/10_async/00_base/harness_langgraph/project/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/examples/tutorials/10_async/00_base/harness_langgraph/project/acp.py b/examples/tutorials/10_async/00_base/harness_langgraph/project/acp.py deleted file mode 100644 index a99395424..000000000 --- a/examples/tutorials/10_async/00_base/harness_langgraph/project/acp.py +++ /dev/null @@ -1,109 +0,0 @@ -"""ACP handler for async harness LangGraph agent. - -Uses the unified harness surface: ``LangGraphTurn`` wraps the LangGraph -``astream()`` generator, and ``UnifiedEmitter.auto_send_turn`` streams events -to Redis and returns a ``TurnResult`` with the accumulated final text. - -Differences from ``100_langgraph`` (bespoke path): -- No ``create_langgraph_tracing_handler`` boilerplate. -- ``stream_langgraph_events`` is replaced by - ``UnifiedEmitter.auto_send_turn(LangGraphTurn(stream))``. -- Tool calls/responses go through ``streaming_task_message_context`` - (same code path as text deltas), making the event stream channel-agnostic. -- Usage data (token counts) is captured on ``LangGraphTurn.usage()`` after - ``auto_send_turn`` returns. - -AGX1-377 note: LangGraph emits tool requests as ``StreamTaskMessageFull`` -events (from "updates"). The ``SpanDeriver`` does not open tool spans from -Full events today; that gap is tracked in AGX1-373. -""" - -from __future__ import annotations - -import os - -from dotenv import load_dotenv - -load_dotenv() - -import agentex.lib.adk as adk -from project.graph import create_graph -from agentex.lib.types.acp import SendEventParams, CancelTaskParams, CreateTaskParams -from agentex.lib.types.fastacp import AsyncACPConfig -from agentex.lib.types.tracing import SGPTracingProcessorConfig -from agentex.lib.utils.logging import make_logger -from agentex.lib.sdk.fastacp.fastacp import FastACP -from agentex.lib.core.harness.emitter import UnifiedEmitter -from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn -from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config - -logger = make_logger(__name__) - -add_tracing_processor_config( - SGPTracingProcessorConfig( - sgp_api_key=os.environ.get("SGP_API_KEY", ""), - sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""), - sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""), - ) -) - -acp = FastACP.create( - acp_type="async", - config=AsyncACPConfig(type="base"), -) - -_graph = None - - -async def get_graph(): - global _graph - if _graph is None: - _graph = await create_graph() - return _graph - - -@acp.on_task_event_send -async def handle_task_event_send(params: SendEventParams): - """Handle incoming events, streaming tokens and tool calls via unified harness.""" - graph = await get_graph() - task_id = params.task.id - user_message = params.event.content.content - - logger.info(f"Processing message for thread {task_id}") - - await adk.messages.create(task_id=task_id, content=params.event.content) - - async with adk.tracing.span( - trace_id=task_id, - task_id=task_id, - name="message", - input={"message": user_message}, - data={"__span_type__": "AGENT_WORKFLOW"}, - ) as turn_span: - stream = graph.astream( - {"messages": [{"role": "user", "content": user_message}]}, - config={"configurable": {"thread_id": task_id}}, - stream_mode=["messages", "updates"], - ) - - turn = LangGraphTurn(stream, model=None) - emitter = UnifiedEmitter( - task_id=task_id, - trace_id=task_id, - parent_span_id=turn_span.id if turn_span else None, - ) - - result = await emitter.auto_send_turn(turn) - - if turn_span: - turn_span.output = {"final_output": result.final_text} - - -@acp.on_task_create -async def handle_task_create(params: CreateTaskParams): - logger.info(f"Task created: {params.task.id}") - - -@acp.on_task_cancel -async def handle_task_canceled(params: CancelTaskParams): - logger.info(f"Task canceled: {params.task.id}") diff --git a/examples/tutorials/10_async/00_base/harness_langgraph/project/graph.py b/examples/tutorials/10_async/00_base/harness_langgraph/project/graph.py deleted file mode 100644 index 4aeac3b3c..000000000 --- a/examples/tutorials/10_async/00_base/harness_langgraph/project/graph.py +++ /dev/null @@ -1,67 +0,0 @@ -"""LangGraph graph definition for the harness_langgraph async agent. - -Identical to ``100_langgraph/project/graph.py`` — the graph definition is not -affected by the harness migration. Only ``acp.py`` changes. -""" - -from __future__ import annotations - -from typing import Any, Annotated -from datetime import datetime -from typing_extensions import TypedDict - -from langgraph.graph import START, StateGraph -from langchain_openai import ChatOpenAI -from langgraph.prebuilt import ToolNode, tools_condition -from langchain_core.messages import SystemMessage -from langgraph.graph.message import add_messages - -from project.tools import TOOLS -from agentex.lib.adk import create_checkpointer - -MODEL_NAME = "gpt-5" -SYSTEM_PROMPT = """You are a helpful AI assistant with access to tools. - -Current date and time: {timestamp} - -Guidelines: -- Be concise and helpful -- Use tools when they would help answer the user's question -- If you're unsure, ask clarifying questions -- Always provide accurate information -""" - - -class AgentState(TypedDict): - """State schema for the agent graph.""" - - messages: Annotated[list[Any], add_messages] - - -async def create_graph(): - """Create and compile the agent graph with checkpointer.""" - llm = ChatOpenAI( - model=MODEL_NAME, - reasoning={"effort": "high", "summary": "auto"}, - ) - llm_with_tools = llm.bind_tools(TOOLS) - - checkpointer = await create_checkpointer() - - def agent_node(state: AgentState) -> dict[str, Any]: - """Process the current state and generate a response.""" - messages = state["messages"] - if not messages or not isinstance(messages[0], SystemMessage): - system_content = SYSTEM_PROMPT.format(timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S")) - messages = [SystemMessage(content=system_content)] + messages - response = llm_with_tools.invoke(messages) - return {"messages": [response]} - - builder = StateGraph(AgentState) - builder.add_node("agent", agent_node) - builder.add_node("tools", ToolNode(tools=TOOLS)) - builder.add_edge(START, "agent") - builder.add_conditional_edges("agent", tools_condition, "tools") - builder.add_edge("tools", "agent") - - return builder.compile(checkpointer=checkpointer) diff --git a/examples/tutorials/10_async/00_base/harness_langgraph/project/tools.py b/examples/tutorials/10_async/00_base/harness_langgraph/project/tools.py deleted file mode 100644 index 6e7614300..000000000 --- a/examples/tutorials/10_async/00_base/harness_langgraph/project/tools.py +++ /dev/null @@ -1,24 +0,0 @@ -"""Tool definitions for the harness_langgraph async agent.""" - -from langchain_core.tools import Tool - - -def get_weather(city: str) -> str: - """Get the current weather for a city. - - Args: - city: The name of the city to get weather for. - - Returns: - A string describing the weather conditions. - """ - return f"The weather in {city} is sunny and 72°F" - - -weather_tool = Tool( - name="get_weather", - func=get_weather, - description="Get the current weather for a city. Input should be a city name.", -) - -TOOLS = [weather_tool] diff --git a/examples/tutorials/10_async/00_base/harness_langgraph/tests/test_agent.py b/examples/tutorials/10_async/00_base/harness_langgraph/tests/test_agent.py deleted file mode 100644 index 762b2b90c..000000000 --- a/examples/tutorials/10_async/00_base/harness_langgraph/tests/test_agent.py +++ /dev/null @@ -1,100 +0,0 @@ -""" -Tests for the async harness LangGraph agent. - -Validates the unified harness surface (LangGraphTurn + UnifiedEmitter.auto_send_turn) -end-to-end against a live AgentEx server. - -Configuration: -- AGENTEX_API_BASE_URL: Base URL for the AgentEx server (default: http://localhost:5003) -- AGENT_NAME: Name of the agent to test (default: a-harness-langgraph) -""" - -import os - -import pytest -import pytest_asyncio - -from agentex import AsyncAgentex -from agentex.types import TextContentParam -from agentex.types.agent_rpc_params import ParamsCreateTaskRequest -from agentex.lib.sdk.fastacp.base.base_acp_server import uuid - -AGENTEX_API_BASE_URL = os.environ.get("AGENTEX_API_BASE_URL", "http://localhost:5003") -AGENT_NAME = os.environ.get("AGENT_NAME", "a-harness-langgraph") - - -@pytest_asyncio.fixture -async def client(): - client = AsyncAgentex(base_url=AGENTEX_API_BASE_URL) - yield client - await client.close() - - -@pytest.fixture -def agent_name(): - return AGENT_NAME - - -@pytest_asyncio.fixture -async def agent_id(client, agent_name): - agents = await client.agents.list() - for agent in agents: - if agent.name == agent_name: - return agent.id - raise ValueError(f"Agent with name {agent_name} not found.") - - -class TestNonStreamingEvents: - @pytest.mark.asyncio - async def test_send_event(self, client: AsyncAgentex, agent_id: str): - task_response = await client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex)) - task = task_response.result - assert task is not None - - event_content = TextContentParam( - type="text", - author="user", - content="Hello! What can you help me with?", - ) - await client.agents.send_event( - agent_id=agent_id, - params={"task_id": task.id, "content": event_content}, - ) - - @pytest.mark.asyncio - async def test_tool_calling(self, client: AsyncAgentex, agent_id: str): - task_response = await client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex)) - task = task_response.result - assert task is not None - - event_content = TextContentParam( - type="text", - author="user", - content="What's the weather in San Francisco?", - ) - await client.agents.send_event( - agent_id=agent_id, - params={"task_id": task.id, "content": event_content}, - ) - - -class TestStreamingEvents: - @pytest.mark.asyncio - async def test_send_event_and_stream(self, client: AsyncAgentex, agent_id: str): - task_response = await client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex)) - task = task_response.result - assert task is not None - - event_content = TextContentParam( - type="text", - author="user", - content="Tell me a short joke.", - ) - await client.agents.send_event( - agent_id=agent_id, - params={"task_id": task.id, "content": event_content}, - ) - - -if __name__ == "__main__": - pytest.main([__file__, "-v"]) diff --git a/examples/tutorials/10_async/00_base/harness_pydantic_ai/Dockerfile b/examples/tutorials/10_async/00_base/harness_pydantic_ai/Dockerfile deleted file mode 100644 index 3c1b9dfea..000000000 --- a/examples/tutorials/10_async/00_base/harness_pydantic_ai/Dockerfile +++ /dev/null @@ -1,50 +0,0 @@ -# syntax=docker/dockerfile:1.3 -FROM python:3.12-slim -COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ - -# Install system dependencies -RUN apt-get update && apt-get install -y \ - htop \ - vim \ - curl \ - tar \ - python3-dev \ - postgresql-client \ - build-essential \ - libpq-dev \ - gcc \ - cmake \ - netcat-openbsd \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - -RUN uv pip install --system --upgrade pip setuptools wheel - -ENV UV_HTTP_TIMEOUT=1000 - -# Copy pyproject.toml and README.md to install dependencies -COPY 10_async/00_base/harness_pydantic_ai/pyproject.toml /app/harness_pydantic_ai/pyproject.toml -COPY 10_async/00_base/harness_pydantic_ai/README.md /app/harness_pydantic_ai/README.md - -WORKDIR /app/harness_pydantic_ai - -# Copy the project code -COPY 10_async/00_base/harness_pydantic_ai/project /app/harness_pydantic_ai/project - -# Copy the test files -COPY 10_async/00_base/harness_pydantic_ai/tests /app/harness_pydantic_ai/tests - -# Copy shared test utilities -COPY test_utils /app/test_utils - -# Install the required Python packages with dev dependencies -RUN uv pip install --system .[dev] pytest-asyncio httpx - -# Set environment variables -ENV PYTHONPATH=/app - -# Set test environment variables -ENV AGENT_NAME=ab-harness-pydantic-ai - -# Run the agent using uvicorn -CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] diff --git a/examples/tutorials/10_async/00_base/harness_pydantic_ai/README.md b/examples/tutorials/10_async/00_base/harness_pydantic_ai/README.md deleted file mode 100644 index 51acb62bd..000000000 --- a/examples/tutorials/10_async/00_base/harness_pydantic_ai/README.md +++ /dev/null @@ -1,54 +0,0 @@ -# Async Pydantic AI Harness Test Agent - -A minimal **async** (Redis-streaming) Pydantic AI agent that drives the -**unified harness surface** (`UnifiedEmitter.auto_send_turn` + `PydanticAITurn`) -directly. - -## Why this agent exists - -The `10_async/00_base/110_pydantic_ai` tutorial streams via the -`stream_pydantic_ai_events` helper (which uses the unified surface internally). -This harness test agent calls `emitter.auto_send_turn(...)` **explicitly** at the -agent-author level, making the unified-surface wiring visible and giving the -async channel direct coverage. - -## How it wires the unified surface - -In `project/acp.py`: - -```python -emitter = UnifiedEmitter( - task_id=task_id, - trace_id=task_id, - parent_span_id=turn_span.id if turn_span else None, -) -async with agent.run_stream_events(user_message, message_history=previous_messages) as stream: - turn = PydanticAITurn(tee_messages(stream), model=MODEL_NAME, coalesce_tool_requests=True) - result = await emitter.auto_send_turn(turn) -``` - -- `coalesce_tool_requests=True` is required on the async/auto_send path until - AGX1-377 lands: tool requests are delivered as a single `Full(tool_request)` - rather than streamed `Start + Delta + Done`. -- The `UnifiedEmitter` is constructed from the ACP context (`task_id` + - `trace_id` + `parent_span_id`) so messages auto-send to the task stream - (Redis) and tracing is automatic. -- Multi-turn memory is persisted via `adk.state` (pydantic-ai message history - round-tripped through `ModelMessagesTypeAdapter`). - -## Files - -- `project/acp.py` — async ACP handler using `emitter.auto_send_turn(...)`. -- `project/agent.py` — builds the `pydantic_ai.Agent` with one tool. -- `project/tools.py` — `get_weather(city)` returning a constant. -- `tests/test_agent.py` — live integration test (requires a running agent). - -## Tools - -- `get_weather(city: str) -> str`: returns a fixed "sunny and 72°F" string. - -## Offline coverage - -Offline integration tests for the same wiring (pydantic-ai `TestModel` + fake -streaming/tracing, no network) live in the SDK repo at -`tests/lib/core/harness/test_harness_pydantic_ai_async.py`. diff --git a/examples/tutorials/10_async/00_base/harness_pydantic_ai/manifest.yaml b/examples/tutorials/10_async/00_base/harness_pydantic_ai/manifest.yaml deleted file mode 100644 index f9e50f329..000000000 --- a/examples/tutorials/10_async/00_base/harness_pydantic_ai/manifest.yaml +++ /dev/null @@ -1,58 +0,0 @@ -build: - context: - root: ../../../ - include_paths: - - 10_async/00_base/harness_pydantic_ai - - test_utils - dockerfile: 10_async/00_base/harness_pydantic_ai/Dockerfile - dockerignore: 10_async/00_base/harness_pydantic_ai/.dockerignore - -local_development: - agent: - port: 8000 - host_address: host.docker.internal - paths: - acp: project/acp.py - -agent: - acp_type: async - name: ab-harness-pydantic-ai - description: An async Pydantic AI harness test agent using the unified emitter surface - - temporal: - enabled: false - - credentials: - - env_var_name: OPENAI_API_KEY - secret_name: openai-api-key - secret_key: api-key - - env_var_name: REDIS_URL - secret_name: redis-url-secret - secret_key: url - - env_var_name: SGP_API_KEY - secret_name: sgp-api-key - secret_key: api-key - - env_var_name: SGP_ACCOUNT_ID - secret_name: sgp-account-id - secret_key: account-id - - env_var_name: SGP_CLIENT_BASE_URL - secret_name: sgp-client-base-url - secret_key: url - -deployment: - image: - repository: "" - tag: "latest" - - global: - agent: - name: "ab-harness-pydantic-ai" - description: "An async Pydantic AI harness test agent using the unified emitter surface" - replicaCount: 1 - resources: - requests: - cpu: "500m" - memory: "1Gi" - limits: - cpu: "1000m" - memory: "2Gi" diff --git a/examples/tutorials/10_async/00_base/harness_pydantic_ai/project/__init__.py b/examples/tutorials/10_async/00_base/harness_pydantic_ai/project/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/examples/tutorials/10_async/00_base/harness_pydantic_ai/project/acp.py b/examples/tutorials/10_async/00_base/harness_pydantic_ai/project/acp.py deleted file mode 100644 index 95b638f8b..000000000 --- a/examples/tutorials/10_async/00_base/harness_pydantic_ai/project/acp.py +++ /dev/null @@ -1,159 +0,0 @@ -"""ACP handler for the async harness Pydantic AI test agent. - -This agent exercises the UNIFIED HARNESS SURFACE on the async (Redis-streaming) -channel — ``UnifiedEmitter.auto_send_turn(PydanticAITurn(...))`` -— calling it directly rather than via the ``stream_pydantic_ai_events`` helper -(which the ``110_pydantic_ai`` tutorial uses). This makes the unified-surface -wiring explicit at the agent-author level. - -Multi-turn memory is persisted via ``adk.state``: on each turn we load the -previous pydantic-ai ``message_history`` from state, run the agent with it, -then save the updated history back. -""" - -from __future__ import annotations - -import os -from typing import Any, AsyncIterator - -from dotenv import load_dotenv - -load_dotenv() - -from pydantic_ai.run import AgentRunResultEvent -from pydantic_ai.messages import ModelMessagesTypeAdapter - -import agentex.lib.adk as adk -from project.agent import MODEL_NAME, create_agent -from agentex.lib.types.acp import SendEventParams, CancelTaskParams, CreateTaskParams -from agentex.lib.core.harness import UnifiedEmitter -from agentex.lib.types.fastacp import AsyncACPConfig -from agentex.lib.types.tracing import SGPTracingProcessorConfig -from agentex.lib.utils.logging import make_logger -from agentex.lib.utils.model_utils import BaseModel -from agentex.lib.sdk.fastacp.fastacp import FastACP -from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn -from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config - -logger = make_logger(__name__) - -add_tracing_processor_config( - SGPTracingProcessorConfig( - sgp_api_key=os.environ.get("SGP_API_KEY", ""), - sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""), - sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""), - ) -) - -acp = FastACP.create( - acp_type="async", - config=AsyncACPConfig(type="base"), -) - -_agent = None - - -def get_agent(): - global _agent - if _agent is None: - _agent = create_agent() - return _agent - - -class ConversationState(BaseModel): - """Per-task conversation state persisted via ``adk.state``. - - ``history_json`` holds the pydantic-ai message history serialized by - ``ModelMessagesTypeAdapter`` — pydantic-ai's official way to round-trip - ``ModelMessage`` objects through JSON. - """ - - history_json: str = "[]" - turn_number: int = 0 - - -@acp.on_task_create -async def handle_task_create(params: CreateTaskParams): - """Initialize per-task state on task creation.""" - logger.info(f"Task created: {params.task.id}") - await adk.state.create( - task_id=params.task.id, - agent_id=params.agent.id, - state=ConversationState(), - ) - - -@acp.on_task_event_send -async def handle_task_event_send(params: SendEventParams): - """Handle each user message through the unified auto_send_turn path.""" - agent = get_agent() - task_id = params.task.id - agent_id = params.agent.id - user_message = params.event.content.content - - logger.info(f"Processing message for thread {task_id}") - - # Echo the user's message into the task history. - await adk.messages.create(task_id=task_id, content=params.event.content) - - # Load the previous conversation history from state (fall back to fresh). - task_state = await adk.state.get_by_task_and_agent(task_id=task_id, agent_id=agent_id) - if task_state is None: - state = ConversationState() - task_state = await adk.state.create(task_id=task_id, agent_id=agent_id, state=state) - else: - state = ConversationState.model_validate(task_state.state) - - state.turn_number += 1 - previous_messages = ModelMessagesTypeAdapter.validate_json(state.history_json) - - async with adk.tracing.span( - trace_id=task_id, - task_id=task_id, - name=f"Turn {state.turn_number}", - input={"message": user_message}, - data={"__span_type__": "AGENT_WORKFLOW"}, - ) as turn_span: - # Construct the UnifiedEmitter from the ACP context so tracing is - # automatic and messages are auto-sent to the task stream (Redis). - emitter = UnifiedEmitter( - task_id=task_id, - trace_id=task_id, - parent_span_id=turn_span.id if turn_span else None, - ) - - # Capture the terminal AgentRunResultEvent to persist message history. - captured_messages: list[Any] = [] - - async def tee_messages(upstream) -> AsyncIterator[Any]: - async for event in upstream: - if isinstance(event, AgentRunResultEvent): - captured_messages[:] = list(event.result.all_messages()) - yield event - - async with agent.run_stream_events(user_message, message_history=previous_messages) as stream: - # The unified auto_send path delivers streamed tool requests natively - # (Start+Delta+Done), so no coalescing workaround is needed. - turn = PydanticAITurn( - tee_messages(stream), - model=MODEL_NAME, - ) - result = await emitter.auto_send_turn(turn) - - # Save the updated message history so the next turn picks up here. - if captured_messages: - state.history_json = ModelMessagesTypeAdapter.dump_json(captured_messages).decode() - await adk.state.update( - state_id=task_state.id, - task_id=task_id, - agent_id=agent_id, - state=state, - ) - - if turn_span: - turn_span.output = {"final_output": result.final_text} - - -@acp.on_task_cancel -async def handle_task_canceled(params: CancelTaskParams): - logger.info(f"Task canceled: {params.task.id}") diff --git a/examples/tutorials/10_async/00_base/harness_pydantic_ai/project/agent.py b/examples/tutorials/10_async/00_base/harness_pydantic_ai/project/agent.py deleted file mode 100644 index e7b764d82..000000000 --- a/examples/tutorials/10_async/00_base/harness_pydantic_ai/project/agent.py +++ /dev/null @@ -1,39 +0,0 @@ -"""Pydantic AI agent definition for the async harness test agent. - -The Agent is the boundary between this module and the API layer (acp.py). -Pydantic AI handles its own tool-call loop internally — no graph required. -""" - -from __future__ import annotations - -from datetime import datetime - -from pydantic_ai import Agent - -from project.tools import get_weather - -__all__ = ["create_agent", "MODEL_NAME"] - -MODEL_NAME = "openai:gpt-4o-mini" -SYSTEM_PROMPT = """You are a helpful AI assistant with access to tools. - -Current date and time: {timestamp} - -Guidelines: -- Be concise and helpful -- Use tools when they would help answer the user's question -- If you're unsure, ask clarifying questions -- Always provide accurate information -""" - - -def create_agent() -> Agent: - """Build and return the Pydantic AI agent with tools registered.""" - agent = Agent( - MODEL_NAME, - system_prompt=SYSTEM_PROMPT.format(timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S")), - ) - - agent.tool_plain(get_weather) - - return agent diff --git a/examples/tutorials/10_async/00_base/harness_pydantic_ai/project/tools.py b/examples/tutorials/10_async/00_base/harness_pydantic_ai/project/tools.py deleted file mode 100644 index 0f16a7cb0..000000000 --- a/examples/tutorials/10_async/00_base/harness_pydantic_ai/project/tools.py +++ /dev/null @@ -1,20 +0,0 @@ -"""Tool definitions for the async harness Pydantic AI agent. - -Pydantic AI tools are registered directly on the Agent via decorators -(see project.agent). This module hosts the bare function so it is easy to -unit-test in isolation. -""" - -from __future__ import annotations - - -def get_weather(city: str) -> str: - """Get the current weather for a city. - - Args: - city: The name of the city to get weather for. - - Returns: - A string describing the weather conditions. - """ - return f"The weather in {city} is sunny and 72°F" diff --git a/examples/tutorials/10_async/00_base/harness_pydantic_ai/pyproject.toml b/examples/tutorials/10_async/00_base/harness_pydantic_ai/pyproject.toml deleted file mode 100644 index 3dc1e0e41..000000000 --- a/examples/tutorials/10_async/00_base/harness_pydantic_ai/pyproject.toml +++ /dev/null @@ -1,36 +0,0 @@ -[build-system] -requires = ["hatchling"] -build-backend = "hatchling.build" - -[project] -name = "ab-harness-pydantic-ai" -version = "0.1.0" -description = "An async Pydantic AI harness test agent using the unified emitter surface" -readme = "README.md" -requires-python = ">=3.12" -dependencies = [ - "agentex-sdk", - "scale-gp", - "pydantic-ai-slim[openai]>=1.0,<2", -] - -[project.optional-dependencies] -dev = [ - "pytest", - "pytest-asyncio", - "httpx", - "black", - "isort", - "flake8", -] - -[tool.hatch.build.targets.wheel] -packages = ["project"] - -[tool.black] -line-length = 88 -target-version = ['py312'] - -[tool.isort] -profile = "black" -line_length = 88 diff --git a/examples/tutorials/10_async/00_base/harness_pydantic_ai/tests/test_agent.py b/examples/tutorials/10_async/00_base/harness_pydantic_ai/tests/test_agent.py deleted file mode 100644 index 11098c7d5..000000000 --- a/examples/tutorials/10_async/00_base/harness_pydantic_ai/tests/test_agent.py +++ /dev/null @@ -1,118 +0,0 @@ -"""Live tests for the async harness Pydantic AI agent. - -These tests require a running agent (server + deployed agent) and exercise the -unified-surface async handler end-to-end over the wire. They mirror the -``110_pydantic_ai`` async tutorial tests but target this harness agent. - -Offline coverage of the same wiring (TestModel + fake streaming/tracing) lives -in ``tests/lib/core/harness/test_harness_pydantic_ai_async.py`` in the SDK repo. - -To run these tests: -1. Make sure the agent is running (via docker-compose or `agentex agents run`) -2. Set the AGENTEX_API_BASE_URL environment variable if not using default -3. Run: pytest test_agent.py -v - -Configuration: -- AGENTEX_API_BASE_URL: Base URL for the AgentEx server (default: http://localhost:5003) -- AGENT_NAME: Name of the agent to test (default: ab-harness-pydantic-ai) -""" - -import os - -import pytest -import pytest_asyncio - -from agentex import AsyncAgentex -from agentex.types import TextContentParam -from agentex.types.agent_rpc_params import ParamsCreateTaskRequest -from agentex.lib.sdk.fastacp.base.base_acp_server import uuid - -AGENTEX_API_BASE_URL = os.environ.get("AGENTEX_API_BASE_URL", "http://localhost:5003") -AGENT_NAME = os.environ.get("AGENT_NAME", "ab-harness-pydantic-ai") - - -@pytest_asyncio.fixture -async def client(): - """Create an AsyncAgentex client instance for testing.""" - client = AsyncAgentex(base_url=AGENTEX_API_BASE_URL) - yield client - await client.close() - - -@pytest.fixture -def agent_name(): - """Return the agent name for testing.""" - return AGENT_NAME - - -@pytest_asyncio.fixture -async def agent_id(client, agent_name): - """Retrieve the agent ID based on the agent name.""" - agents = await client.agents.list() - for agent in agents: - if agent.name == agent_name: - return agent.id - raise ValueError(f"Agent with name {agent_name} not found.") - - -class TestNonStreamingEvents: - """Test non-streaming event sending through the unified auto_send_turn path.""" - - @pytest.mark.asyncio - async def test_send_event(self, client: AsyncAgentex, agent_id: str): - """Test sending an event to the async harness Pydantic AI agent.""" - task_response = await client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex)) - task = task_response.result - assert task is not None - - event_content = TextContentParam( - type="text", - author="user", - content="Hello! What can you help me with?", - ) - await client.agents.send_event( - agent_id=agent_id, - params={"task_id": task.id, "content": event_content}, - ) - - @pytest.mark.asyncio - async def test_tool_calling(self, client: AsyncAgentex, agent_id: str): - """Test that the agent can use tools (e.g., weather tool).""" - task_response = await client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex)) - task = task_response.result - assert task is not None - - event_content = TextContentParam( - type="text", - author="user", - content="What's the weather in San Francisco?", - ) - await client.agents.send_event( - agent_id=agent_id, - params={"task_id": task.id, "content": event_content}, - ) - - -class TestStreamingEvents: - """Test streaming event sending.""" - - @pytest.mark.asyncio - async def test_send_event_and_stream(self, client: AsyncAgentex, agent_id: str): - """Test sending an event and streaming the response.""" - task_response = await client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex)) - task = task_response.result - assert task is not None - - event_content = TextContentParam( - type="text", - author="user", - content="Tell me a short joke.", - ) - await client.agents.send_event( - agent_id=agent_id, - params={"task_id": task.id, "content": event_content}, - ) - - -if __name__ == "__main__": - pytest.main([__file__, "-v"]) diff --git a/examples/tutorials/10_async/10_temporal/110_pydantic_ai/README.md b/examples/tutorials/10_async/10_temporal/110_pydantic_ai/README.md index b221c1238..66466693b 100644 --- a/examples/tutorials/10_async/10_temporal/110_pydantic_ai/README.md +++ b/examples/tutorials/10_async/10_temporal/110_pydantic_ai/README.md @@ -1,153 +1,59 @@ -# Tutorial 110 (temporal): Pydantic AI Agent +# Temporal Pydantic AI Agent -This tutorial demonstrates a **durable** Pydantic AI agent on AgentEx, backed by Temporal: -- Workflow state survives crashes mid-conversation (Temporal replay) -- Every LLM call and every tool call becomes its own Temporal activity (independent retries + observability) -- Streaming via Redis still works — token-by-token deltas appear in the UI in real time +A minimal **Temporal-backed** Pydantic AI agent that drives the **unified +harness surface** (`UnifiedEmitter.auto_send_turn` + `PydanticAITurn`) from +inside the model activity's `event_stream_handler`. -This is the Temporal counterpart to the async base tutorial at [`10_async/00_base/110_pydantic_ai/`](../../00_base/110_pydantic_ai/). +## Why this agent exists -## Why Temporal? Why not just async? +This agent calls `emitter.auto_send_turn(...)` **explicitly** inside +the `event_stream_handler`, making the unified-surface wiring visible and giving +the temporal channel direct coverage. -In async base 110, the agent state lives in memory inside the ACP process. If that process dies mid-LLM-call, the in-flight turn is lost. Temporal fixes this by: +## How it wires the unified surface -1. Recording every external interaction (LLM call, tool call) to a durable event log. -2. On worker restart, **replaying** the workflow code, using cached activity results to skip work that already finished. -3. Letting workflows live forever — multi-day conversations or human-in-the-loop flows just work. - -## Architecture at a glance - -Two long-running processes plus shared infrastructure: - -``` -┌──────────────────────────┐ ┌──────────────────────────┐ -│ uvicorn project.acp:acp │ │ python -m run_worker │ -│ (HTTP shim, forwards │ │ (executes workflows + │ -│ signals to Temporal) │ │ activities) │ -└──────────────────────────┘ └──────────────────────────┘ - │ │ - └────► Temporal server ◄───────────┘ - (event log + queue) - - Redis ◄─── activities push deltas - │ - └─── Agentex API tails ──► UI client -``` - -The HTTP server is a thin shim that translates `task/event/send` into Temporal signals. The worker is where your agent code actually runs. Temporal sits in between, recording everything. - -## Key code patterns - -### `project/agent.py` — wrap the base agent in `TemporalAgent` - -```python -base_agent = Agent(MODEL_NAME, deps_type=TaskDeps, system_prompt=...) -base_agent.tool_plain(get_weather) - -temporal_agent = TemporalAgent( - base_agent, - name="at110_pydantic_ai_agent", - event_stream_handler=event_handler, # streams to Redis from inside the model activity -) -``` - -`TemporalAgent` (from `pydantic_ai.durable_exec.temporal`) wraps a normal Pydantic AI Agent so that: -- Each LLM call runs in its own activity -- Each tool call runs in its own activity -- The wrapping is invisible to the workflow code that calls `temporal_agent.run(...)` - -### `project/workflow.py` — declare `__pydantic_ai_agents__` +In `project/agent.py`, the `event_stream_handler` runs inside the model activity +and constructs a `UnifiedEmitter` from `RunContext.deps`: ```python -@workflow.defn(name=environment_variables.WORKFLOW_NAME) -class At110PydanticAiWorkflow(BaseWorkflow): - __pydantic_ai_agents__ = [temporal_agent] # ← discovered by PydanticAIPlugin - - @workflow.signal(name=SignalName.RECEIVE_EVENT) - async def on_task_event_send(self, params): - await adk.messages.create(task_id=params.task.id, content=params.event.content) - result = await temporal_agent.run( - params.event.content.content, - deps=TaskDeps(task_id=params.task.id), - ) +async def event_handler(run_context, events): + emitter = UnifiedEmitter( + task_id=run_context.deps.task_id, + trace_id=run_context.deps.task_id, + parent_span_id=run_context.deps.parent_span_id, + ) + turn = PydanticAITurn(events, model=MODEL_NAME, coalesce_tool_requests=True) + await emitter.auto_send_turn(turn) ``` -The `__pydantic_ai_agents__` attribute is how `PydanticAIPlugin` discovers which activities to register on the worker — no manual activity list needed. - -### `project/acp.py` — no handlers, just plugin wiring - -```python -acp = FastACP.create( - acp_type="async", - config=TemporalACPConfig( - type="temporal", - temporal_address=os.getenv("TEMPORAL_ADDRESS", "localhost:7233"), - plugins=[PydanticAIPlugin()], - ), -) -``` - -When `type="temporal"`, FastACP auto-wires HTTP → workflow signals. You don't define `@acp.on_task_event_send` anywhere — Temporal handles it. - -### `project/run_worker.py` — boot the worker with the plugin - -```python -worker = AgentexWorker( - task_queue=task_queue_name, - plugins=[PydanticAIPlugin()], -) -await worker.run( - activities=get_all_activities(), - workflow=At110PydanticAiWorkflow, -) -``` - -`get_all_activities()` returns the built-in Agentex activities (state, messages, streaming, tracing). Pydantic AI's per-agent activities are auto-added by the plugin. - -## Files - -| File | Purpose | -|------|---------| -| `project/acp.py` | Thin HTTP shim — `FastACP.create(type="temporal", ...)` | -| `project/workflow.py` | `@workflow.defn` class with the signal handler | -| `project/agent.py` | Base Pydantic AI Agent wrapped in `TemporalAgent` | -| `project/tools.py` | Tool functions (must be `async` for Temporal compatibility) | -| `project/run_worker.py` | Worker boot script (separate process) | -| `tests/test_agent.py` | End-to-end test verifying tool round-trips | -| `manifest.yaml` | Sets `temporal.enabled: true` and declares workflow + queue name | - -## Running Locally - -You'll need three terminals open (this is the price of Temporal): - -```bash -# Terminal 1 — backend services (separate repo) -cd ~/scale-agentex/agentex -make dev # brings up Temporal, Redis, Postgres, Agentex API - -# Terminal 2 — this tutorial (ACP server + Temporal worker) -cd ~/scale-agentex-python/examples/tutorials/10_async/10_temporal/110_pydantic_ai -agentex agents run # this also launches the worker process - -# Terminal 3 — tests -cd ~/scale-agentex-python/examples/tutorials/10_async/10_temporal/110_pydantic_ai -uv run pytest tests/test_agent.py -v -``` - -Watch the Temporal UI at http://localhost:8233 — you'll see workflow executions, signal events, and one activity per LLM call + one per tool call. - -## Sync vs Async vs Temporal — How the code differs - -| Concern | Sync (040) | Async base (110) | Temporal (this one) | -|---|---|---|---| -| `project/acp.py` | `@acp.on_message_send` yields events | `@acp.on_task_event_send` pushes to Redis | **No handlers** — `FastACP.create(type="temporal", ...)` | -| Where the agent runs | In the ACP HTTP process | In the ACP HTTP process | In a separate worker process | -| Durability | Ephemeral — request-scoped | Ephemeral — process-scoped | **Durable** — survives worker restarts via Temporal replay | -| Per-call retries | None | None | Each model + tool call automatically retried by Temporal | -| Code we add | — | `acp.py` handler | `workflow.py`, `run_worker.py`, wrap agent in `TemporalAgent` | - -## Notes - -- Multi-turn conversation memory is not wired here. Workflow state (`self._turn_number`) is durable, but message history isn't currently threaded into `temporal_agent.run(..., message_history=...)`. To add: load via `adk.messages.list(task_id=...)` inside the signal handler and pass through. -- Reasoning/thinking tokens are not exercised by `gpt-4o-mini`. Swap to a reasoning-capable model to exercise that branch end-to-end. -- Tools must be `async` (Pydantic AI's Temporal integration requires it — sync tools would run in threads, breaking Temporal's determinism guarantees). +- The handler runs inside a Temporal activity, so it can freely make + non-deterministic Redis + tracing writes. +- `coalesce_tool_requests=True` is required on the auto_send path until + AGX1-377 lands. +- `deps` (set by `project/workflow.py`) threads the `task_id` and the per-turn + `parent_span_id` into the handler so tool spans nest under the workflow's turn + span. + +## Structure + +- `project/acp.py` — thin ACP server; FastACP auto-wires HTTP routes to the + workflow when `TemporalACPConfig` is used. +- `project/agent.py` — base `Agent` + `TemporalAgent` + the unified-surface + `event_stream_handler`. +- `project/workflow.py` — durable workflow; each turn delegates to + `temporal_agent.run(...)`. +- `project/run_worker.py` — Temporal worker entry point. +- `project/tools.py` — async `get_weather(city)` returning a constant. +- `tests/test_agent.py` — live integration test (requires Temporal + Redis + + ACP server + worker). + +## Tools + +- `get_weather(city: str) -> str` (async): returns a fixed "sunny and 72°F" + string. Each tool call becomes its own Temporal activity. + +## Offline coverage + +Offline integration tests for the same wiring (pydantic-ai `TestModel` + fake +streaming/tracing, no Temporal server) live in the SDK repo under +`tests/lib/core/harness/` (the pydantic-ai temporal suite). diff --git a/examples/tutorials/10_async/10_temporal/110_pydantic_ai/manifest.yaml b/examples/tutorials/10_async/10_temporal/110_pydantic_ai/manifest.yaml index 15d00076f..7ca454b05 100644 --- a/examples/tutorials/10_async/10_temporal/110_pydantic_ai/manifest.yaml +++ b/examples/tutorials/10_async/10_temporal/110_pydantic_ai/manifest.yaml @@ -18,7 +18,7 @@ local_development: agent: acp_type: async name: at110-pydantic-ai - description: A Temporal-backed Pydantic AI agent with tool calling and Redis streaming + description: A Temporal-backed Pydantic AI harness test agent using the unified emitter surface temporal: enabled: true @@ -42,8 +42,6 @@ agent: - env_var_name: SGP_CLIENT_BASE_URL secret_name: sgp-client-base-url secret_key: url - # env: - # OPENAI_BASE_URL: "https://your-litellm-proxy/v1" deployment: image: @@ -53,7 +51,7 @@ deployment: global: agent: name: "at110-pydantic-ai" - description: "A Temporal-backed Pydantic AI agent" + description: "A Temporal-backed Pydantic AI harness test agent using the unified emitter surface" replicaCount: 1 resources: requests: diff --git a/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/acp.py b/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/acp.py index dacb45ad6..c142dcf70 100644 --- a/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/acp.py +++ b/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/acp.py @@ -1,7 +1,7 @@ -"""ACP server for the Temporal Pydantic AI tutorial. +"""ACP server for the Temporal harness Pydantic AI test agent. -This file is intentionally thin. When ``acp_type="async"`` is combined -with ``TemporalACPConfig(type="temporal", ...)``, FastACP auto-wires: +This file is intentionally thin. When ``acp_type="async"`` is combined with +``TemporalACPConfig(type="temporal", ...)``, FastACP auto-wires: HTTP task/create → @workflow.run on the workflow class HTTP task/event/send → @workflow.signal(SignalName.RECEIVE_EVENT) diff --git a/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/agent.py b/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/agent.py index a33a317cc..4e59688ce 100644 --- a/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/agent.py +++ b/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/agent.py @@ -1,18 +1,20 @@ -"""Pydantic AI agent definition for the Temporal tutorial. +"""Pydantic AI agent definition for the Temporal harness test agent. This module constructs the base ``pydantic_ai.Agent`` once at import time, registers tools on it, and wraps it in ``TemporalAgent`` from ``pydantic_ai.durable_exec.temporal``. -The ``TemporalAgent`` wrapper makes every model call and every tool call -run as a Temporal activity automatically. The workflow code stays -deterministic; the non-deterministic work (LLM HTTP calls, tool execution) -moves into recorded activities. - -Streaming back to Agentex happens via ``event_stream_handler``, which -receives Pydantic AI ``AgentStreamEvent``s from inside the model activity -and forwards them to Redis using our existing ``stream_pydantic_ai_events`` -helper. The ``task_id`` is threaded into the handler via ``deps``. +The ``TemporalAgent`` wrapper makes every model call and every tool call run as +a Temporal activity automatically. The workflow stays deterministic; the +non-deterministic work (LLM HTTP calls, tool execution) moves into recorded +activities. + +Streaming back to Agentex happens via ``event_stream_handler``, which receives +Pydantic AI ``AgentStreamEvent``s from inside the model activity and forwards +them through the UNIFIED HARNESS SURFACE (``UnifiedEmitter.auto_send_turn`` + +``PydanticAITurn``) — called directly rather than via ``stream_pydantic_ai_events``. +The ``task_id`` and per-turn ``parent_span_id`` are threaded into the handler +via ``deps``. """ from __future__ import annotations @@ -26,10 +28,10 @@ from pydantic_ai.durable_exec.temporal import TemporalAgent from project.tools import get_weather -from agentex.lib.adk import ( - stream_pydantic_ai_events, - create_pydantic_ai_tracing_handler, -) +from agentex.lib.core.harness import UnifiedEmitter +from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn + +__all__ = ["TaskDeps", "temporal_agent", "base_agent", "MODEL_NAME"] MODEL_NAME = "openai:gpt-4o-mini" SYSTEM_PROMPT = """You are a helpful AI assistant with access to tools. @@ -48,13 +50,13 @@ class TaskDeps(BaseModel): """Per-run dependencies passed into the agent via ``deps=``. Pydantic AI's ``RunContext.deps`` is the canonical place to thread - request-scoped data (like the Agentex task_id) into tools and - event handlers — including code that runs inside Temporal activities. + request-scoped data (like the Agentex task_id) into tools and event + handlers — including code that runs inside Temporal activities. """ task_id: str - # When set, the event handler nests per-tool-call spans under this - # span. Typically the ID of the per-turn span opened by the workflow. + # When set, the event handler nests per-tool-call spans under this span. + # Typically the ID of the per-turn span opened by the workflow. parent_span_id: str | None = None @@ -77,32 +79,33 @@ async def event_handler( run_context: RunContext[TaskDeps], events: AsyncIterable[AgentStreamEvent], ) -> None: - """Stream Pydantic AI events to Agentex via Redis from inside the model activity. + """Stream Pydantic AI events to Agentex via the unified surface. Pydantic AI calls this with the live event stream as soon as the model - activity begins emitting parts. Because the handler runs inside the - activity (not the workflow), it can freely make non-deterministic - Redis writes — including the tracing HTTP calls that record per-tool-call - spans under the workflow's per-turn span (when ``parent_span_id`` is set). + activity begins emitting parts. Because the handler runs inside the activity + (not the workflow), it can freely make non-deterministic Redis + tracing + writes. + + The UnifiedEmitter is constructed from ``deps`` (task_id + parent_span_id), + so tool spans nest under the workflow's per-turn span and messages auto-send + to the task stream. The auto_send path delivers streamed tool requests + natively, so no coalescing workaround is needed. """ - tracing_handler = create_pydantic_ai_tracing_handler( + emitter = UnifiedEmitter( + task_id=run_context.deps.task_id, trace_id=run_context.deps.task_id, parent_span_id=run_context.deps.parent_span_id, - task_id=run_context.deps.task_id, - ) - await stream_pydantic_ai_events( - events, - run_context.deps.task_id, - tracing_handler=tracing_handler, ) + turn = PydanticAITurn(events, model=MODEL_NAME) + await emitter.auto_send_turn(turn) -# Construct the durable agent at module load time so that the -# PydanticAIPlugin can auto-discover its activities via the workflow's -# ``__pydantic_ai_agents__`` attribute. +# Construct the durable agent at module load time so that the PydanticAIPlugin +# can auto-discover its activities via the workflow's ``__pydantic_ai_agents__`` +# attribute. base_agent = _build_base_agent() temporal_agent: TemporalAgent[TaskDeps, str] = TemporalAgent( base_agent, - name="at110_pydantic_ai_agent", + name="pydantic_ai_agent", event_stream_handler=event_handler, ) diff --git a/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/run_worker.py b/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/run_worker.py index e54c9d1dc..4b4d43d19 100644 --- a/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/run_worker.py +++ b/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/run_worker.py @@ -1,18 +1,18 @@ -"""Temporal worker for the Pydantic AI tutorial. +"""Temporal worker for the harness Pydantic AI test agent. -Run as a separate long-lived process alongside the ACP HTTP server. The -worker polls Temporal for workflow + activity tasks and executes them. +Run as a separate long-lived process alongside the ACP HTTP server. The worker +polls Temporal for workflow + activity tasks and executes them. -The ``PydanticAIPlugin`` reads ``__pydantic_ai_agents__`` off the workflow -class and registers every model/tool activity the TemporalAgent needs — -so we don't have to enumerate activities by hand here. +The ``PydanticAIPlugin`` reads ``__pydantic_ai_agents__`` off the workflow class +and registers every model/tool activity the TemporalAgent needs — so we don't +have to enumerate activities by hand here. """ import asyncio from pydantic_ai.durable_exec.temporal import PydanticAIPlugin -from project.workflow import At110PydanticAiWorkflow +from project.workflow import HarnessPydanticAiWorkflow from agentex.lib.utils.debug import setup_debug_if_enabled from agentex.lib.utils.logging import make_logger from agentex.lib.environment_variables import EnvironmentVariables @@ -31,8 +31,8 @@ async def main(): raise ValueError("WORKFLOW_TASK_QUEUE is not set") # get_all_activities() returns the built-in Agentex activities (state, - # messages, streaming, tracing). Pydantic AI's TemporalAgent activities - # are auto-registered by PydanticAIPlugin via __pydantic_ai_agents__. + # messages, streaming, tracing). Pydantic AI's TemporalAgent activities are + # auto-registered by PydanticAIPlugin via __pydantic_ai_agents__. worker = AgentexWorker( task_queue=task_queue_name, plugins=[PydanticAIPlugin()], @@ -40,7 +40,7 @@ async def main(): await worker.run( activities=get_all_activities(), - workflow=At110PydanticAiWorkflow, + workflow=HarnessPydanticAiWorkflow, ) diff --git a/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/tools.py b/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/tools.py index 75640fcb7..bbd6c5200 100644 --- a/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/tools.py +++ b/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/tools.py @@ -1,9 +1,8 @@ -"""Tool definitions for the Temporal Pydantic AI agent. +"""Tool definitions for the Temporal harness Pydantic AI agent. These functions are registered on the base Pydantic AI agent. When the agent is wrapped in ``TemporalAgent``, each tool call becomes its own Temporal -activity automatically — independently retryable and observable in the -Temporal UI. +activity automatically — independently retryable and observable. Tools must be ``async`` because Pydantic AI's Temporal integration requires it: non-async tools would run in threads, which is non-deterministic and diff --git a/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/workflow.py b/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/workflow.py index bb07ac818..9a01be7de 100644 --- a/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/workflow.py +++ b/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/workflow.py @@ -1,16 +1,16 @@ -"""Temporal workflow for the Pydantic AI tutorial. +"""Temporal workflow for the harness Pydantic AI test agent. The workflow holds task state durably across crashes. Its signal handler -delegates the actual agent run to ``temporal_agent.run(...)`` — which -internally schedules model and tool activities, each independently -durable. The ``event_stream_handler`` registered on ``temporal_agent`` -pushes streaming deltas to Redis while the model activity runs. +delegates the actual agent run to ``temporal_agent.run(...)`` — which internally +schedules model and tool activities, each independently durable. The +``event_stream_handler`` registered on ``temporal_agent`` (see project.agent) +pushes streaming deltas through the unified harness surface while the model +activity runs. Multi-turn memory is kept on the workflow instance itself -(``self._message_history``). Temporal's workflow state is already durable -and replay-safe, so unlike the async-base tutorial we don't need an -external ``adk.state`` round-trip — the message list survives crashes -because Temporal replays activity results that produced it. +(``self._message_history``). Temporal's workflow state is already durable and +replay-safe, so unlike the async-base agent we don't need an external +``adk.state`` round-trip. """ from __future__ import annotations @@ -56,14 +56,14 @@ @workflow.defn(name=environment_variables.WORKFLOW_NAME) -class At110PydanticAiWorkflow(BaseWorkflow): +class HarnessPydanticAiWorkflow(BaseWorkflow): """Long-running Temporal workflow that delegates each turn to a Pydantic AI TemporalAgent. The ``__pydantic_ai_agents__`` attribute is the marker the ``PydanticAIPlugin`` looks for at worker startup: it pulls - ``temporal_agent.temporal_activities`` off this list and registers them - on the worker automatically — so we don't have to list activities by - hand in ``run_worker.py``. + ``temporal_agent.temporal_activities`` off this list and registers them on + the worker automatically — so we don't have to list activities by hand in + ``run_worker.py``. """ __pydantic_ai_agents__ = [temporal_agent] @@ -74,8 +74,8 @@ def __init__(self): self._turn_number = 0 # Conversation history accumulated across turns. Each entry is a # pydantic-ai ``ModelMessage``. Temporal replays the activity that - # produced these messages, so the list is rebuilt deterministically - # if the workflow ever recovers from a crash. + # produced these messages, so the list is rebuilt deterministically if + # the workflow ever recovers from a crash. self._message_history: list["ModelMessage"] = [] @workflow.signal(name=SignalName.RECEIVE_EVENT) @@ -93,17 +93,10 @@ async def on_task_event_send(self, params: SendEventParams) -> None: name=f"Turn {self._turn_number}", input={"message": params.event.content.content}, ) as span: - # temporal_agent.run() is the magic line. From the outside it - # looks like a regular async call. Internally it schedules: - # 1. A model activity (LLM HTTP call recorded by Temporal) - # 2. For each tool the model invokes, a tool activity - # 3. Each activity is retried, observable, and durable - # While the model activity runs, the event_stream_handler on - # temporal_agent pushes deltas to Redis so the UI sees tokens. - # - # Passing ``message_history`` makes the run remember prior turns: - # without it the agent would respond to each user message as if - # it had never seen the conversation before. + # temporal_agent.run() schedules a model activity, per-tool + # activities, and the event_stream_handler activity (which pushes + # deltas through the unified surface). Passing ``message_history`` + # makes the run remember prior turns. result = await temporal_agent.run( params.event.content.content, message_history=self._message_history, @@ -112,8 +105,8 @@ async def on_task_event_send(self, params: SendEventParams) -> None: parent_span_id=span.id if span else None, ), ) - # Persist the new full history (user + assistant + any tool - # rounds) so the next turn picks up from here. + # Persist the new full history (user + assistant + any tool rounds) + # so the next turn picks up from here. self._message_history = list(result.all_messages()) if span: span.output = {"final_output": result.output} diff --git a/examples/tutorials/10_async/10_temporal/110_pydantic_ai/pyproject.toml b/examples/tutorials/10_async/10_temporal/110_pydantic_ai/pyproject.toml index 9f47733c0..2f308f2a1 100644 --- a/examples/tutorials/10_async/10_temporal/110_pydantic_ai/pyproject.toml +++ b/examples/tutorials/10_async/10_temporal/110_pydantic_ai/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "hatchling.build" [project] name = "at110-pydantic-ai" version = "0.1.0" -description = "A Temporal-backed Pydantic AI agent with tool calling and Redis streaming" +description = "A Temporal-backed Pydantic AI harness test agent using the unified emitter surface" readme = "README.md" requires-python = ">=3.12" dependencies = [ diff --git a/examples/tutorials/10_async/10_temporal/110_pydantic_ai/tests/test_agent.py b/examples/tutorials/10_async/10_temporal/110_pydantic_ai/tests/test_agent.py index d01276ab8..974cddcc0 100644 --- a/examples/tutorials/10_async/10_temporal/110_pydantic_ai/tests/test_agent.py +++ b/examples/tutorials/10_async/10_temporal/110_pydantic_ai/tests/test_agent.py @@ -1,9 +1,10 @@ -"""Tests for the Temporal Pydantic AI agent. +"""Live tests for the Temporal Pydantic AI agent. -This test suite validates: -- The agent responds to a basic message -- Tool calls are visible in the message history (proving each tool call - ran as its own Temporal activity) +These tests require a running agent (Temporal + Redis + ACP server + worker) and +exercise the unified-surface event_stream_handler end-to-end over the wire. + +Offline coverage of the same wiring (TestModel + fake streaming/tracing) lives +in the SDK repo under ``tests/lib/core/harness/`` (the pydantic-ai temporal suite). To run these tests: 1. Make sure the agent is running (worker + ACP server) @@ -16,10 +17,7 @@ import pytest import pytest_asyncio -from test_utils.async_utils import ( - poll_messages, - send_event_and_poll_yielding, -) +from test_utils.async_utils import poll_messages, send_event_and_poll_yielding from agentex import AsyncAgentex from agentex.types.task_message import TaskMessage @@ -51,14 +49,12 @@ async def agent_id(client, agent_name): class TestNonStreamingEvents: - """Test that the Temporal-backed Pydantic AI agent responds and uses tools.""" + """Test that the Temporal-backed harness agent responds and uses tools.""" @pytest.mark.asyncio async def test_send_event_and_poll(self, client: AsyncAgentex, agent_id: str): """Drive a full turn: create task, send a weather question, verify tool round-trip.""" - task_response = await client.agents.create_task( - agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex) - ) + task_response = await client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex)) task = task_response.result assert task is not None @@ -71,11 +67,7 @@ async def test_send_event_and_poll(self, client: AsyncAgentex, agent_id: str): sleep_interval=1.0, ): assert isinstance(message, TaskMessage) - if ( - message.content - and message.content.type == "text" - and message.content.author == "agent" - ): + if message.content and message.content.type == "text" and message.content.author == "agent": task_creation_found = True break assert task_creation_found, "Task creation welcome message not found" @@ -101,11 +93,7 @@ async def test_send_event_and_poll(self, client: AsyncAgentex, agent_id: str): if final_message and getattr(final_message, "streaming_status", None) == "DONE": break - if ( - message.content - and message.content.type == "text" - and message.content.author == "agent" - ): + if message.content and message.content.type == "text" and message.content.author == "agent": final_message = message content_length = len(getattr(message.content, "content", "") or "") if message.streaming_status == "DONE" and content_length > 0: @@ -115,9 +103,7 @@ async def test_send_event_and_poll(self, client: AsyncAgentex, agent_id: str): assert seen_tool_request, "Expected a tool_request (agent calling get_weather)" assert seen_tool_response, "Expected a tool_response (get_weather result)" assert final_message is not None, "Expected a final agent text message" - final_text = ( - getattr(final_message.content, "content", None) if final_message.content else None - ) + final_text = getattr(final_message.content, "content", None) if final_message.content else None assert isinstance(final_text, str) and len(final_text) > 0 # The get_weather tool always returns "72°F" — the response should mention it. assert "72" in final_text, "Expected weather response to mention 72°F" diff --git a/examples/tutorials/10_async/00_base/130_harness_openai/.dockerignore b/examples/tutorials/10_async/10_temporal/120_openai_agents/.dockerignore similarity index 100% rename from examples/tutorials/10_async/00_base/130_harness_openai/.dockerignore rename to examples/tutorials/10_async/10_temporal/120_openai_agents/.dockerignore diff --git a/examples/tutorials/10_async/10_temporal/harness_langgraph/Dockerfile b/examples/tutorials/10_async/10_temporal/120_openai_agents/Dockerfile similarity index 65% rename from examples/tutorials/10_async/10_temporal/harness_langgraph/Dockerfile rename to examples/tutorials/10_async/10_temporal/120_openai_agents/Dockerfile index f6c9fb59b..700f56cea 100644 --- a/examples/tutorials/10_async/10_temporal/harness_langgraph/Dockerfile +++ b/examples/tutorials/10_async/10_temporal/120_openai_agents/Dockerfile @@ -22,20 +22,20 @@ RUN uv pip install --system --upgrade pip setuptools wheel ENV UV_HTTP_TIMEOUT=1000 -COPY 10_async/10_temporal/harness_langgraph/pyproject.toml /app/harness_langgraph/pyproject.toml -COPY 10_async/10_temporal/harness_langgraph/README.md /app/harness_langgraph/README.md +COPY 10_async/10_temporal/120_openai_agents/pyproject.toml /app/120_openai_agents/pyproject.toml +COPY 10_async/10_temporal/120_openai_agents/README.md /app/120_openai_agents/README.md -WORKDIR /app/harness_langgraph +WORKDIR /app/120_openai_agents -COPY 10_async/10_temporal/harness_langgraph/project /app/harness_langgraph/project -COPY 10_async/10_temporal/harness_langgraph/tests /app/harness_langgraph/tests +COPY 10_async/10_temporal/120_openai_agents/project /app/120_openai_agents/project +COPY 10_async/10_temporal/120_openai_agents/tests /app/120_openai_agents/tests COPY test_utils /app/test_utils RUN uv pip install --system .[dev] ENV PYTHONPATH=/app -ENV AGENT_NAME=at-harness-langgraph +ENV AGENT_NAME=at120-openai-agents CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] diff --git a/examples/tutorials/10_async/10_temporal/140_harness_openai/README.md b/examples/tutorials/10_async/10_temporal/120_openai_agents/README.md similarity index 94% rename from examples/tutorials/10_async/10_temporal/140_harness_openai/README.md rename to examples/tutorials/10_async/10_temporal/120_openai_agents/README.md index 0415ae225..4db26d0a1 100644 --- a/examples/tutorials/10_async/10_temporal/140_harness_openai/README.md +++ b/examples/tutorials/10_async/10_temporal/120_openai_agents/README.md @@ -9,7 +9,7 @@ LLM calls are non-deterministic, so they can't run directly in a Temporal workflow. This tutorial keeps the workflow (`project/workflow.py`) deterministic and delegates each turn to a custom activity (`project/activities.py`). The activity uses the SAME `OpenAITurn` adapter as -the sync (`060_harness_openai`) and async (`130_harness_openai`) variants, and +the sync (`050_openai_agents`) and async (`120_openai_agents`) variants, and delivers via `UnifiedEmitter.auto_send_turn` — which is designed to run inside an activity (it writes streaming side effects to Redis and returns the final text + usage). diff --git a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/environments.yaml b/examples/tutorials/10_async/10_temporal/120_openai_agents/environments.yaml similarity index 100% rename from examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/environments.yaml rename to examples/tutorials/10_async/10_temporal/120_openai_agents/environments.yaml diff --git a/examples/tutorials/10_async/10_temporal/140_harness_openai/manifest.yaml b/examples/tutorials/10_async/10_temporal/120_openai_agents/manifest.yaml similarity index 78% rename from examples/tutorials/10_async/10_temporal/140_harness_openai/manifest.yaml rename to examples/tutorials/10_async/10_temporal/120_openai_agents/manifest.yaml index 64a943438..4b59db442 100644 --- a/examples/tutorials/10_async/10_temporal/140_harness_openai/manifest.yaml +++ b/examples/tutorials/10_async/10_temporal/120_openai_agents/manifest.yaml @@ -2,10 +2,10 @@ build: context: root: ../../../ include_paths: - - 10_async/10_temporal/140_harness_openai + - 10_async/10_temporal/120_openai_agents - test_utils - dockerfile: 10_async/10_temporal/140_harness_openai/Dockerfile - dockerignore: 10_async/10_temporal/140_harness_openai/.dockerignore + dockerfile: 10_async/10_temporal/120_openai_agents/Dockerfile + dockerignore: 10_async/10_temporal/120_openai_agents/.dockerignore local_development: agent: @@ -17,14 +17,14 @@ local_development: agent: acp_type: async - name: at140-harness-openai + name: at120-openai-agents description: A Temporal-backed OpenAI Agents SDK agent on the unified harness surface temporal: enabled: true workflows: - - name: at140-harness-openai - queue_name: at140_harness_openai_queue + - name: at120-openai-agents + queue_name: at120_openai_agents_queue credentials: - env_var_name: REDIS_URL @@ -50,7 +50,7 @@ deployment: global: agent: - name: "at140-harness-openai" + name: "at120-openai-agents" description: "A Temporal-backed OpenAI Agents SDK agent on the unified harness surface" replicaCount: 1 resources: diff --git a/examples/tutorials/00_sync/harness_pydantic_ai/project/__init__.py b/examples/tutorials/10_async/10_temporal/120_openai_agents/project/__init__.py similarity index 100% rename from examples/tutorials/00_sync/harness_pydantic_ai/project/__init__.py rename to examples/tutorials/10_async/10_temporal/120_openai_agents/project/__init__.py diff --git a/examples/tutorials/10_async/10_temporal/140_harness_openai/project/acp.py b/examples/tutorials/10_async/10_temporal/120_openai_agents/project/acp.py similarity index 100% rename from examples/tutorials/10_async/10_temporal/140_harness_openai/project/acp.py rename to examples/tutorials/10_async/10_temporal/120_openai_agents/project/acp.py diff --git a/examples/tutorials/10_async/10_temporal/140_harness_openai/project/activities.py b/examples/tutorials/10_async/10_temporal/120_openai_agents/project/activities.py similarity index 92% rename from examples/tutorials/10_async/10_temporal/140_harness_openai/project/activities.py rename to examples/tutorials/10_async/10_temporal/120_openai_agents/project/activities.py index a70ee0c5d..2a8a773c4 100644 --- a/examples/tutorials/10_async/10_temporal/140_harness_openai/project/activities.py +++ b/examples/tutorials/10_async/10_temporal/120_openai_agents/project/activities.py @@ -25,7 +25,7 @@ logger = make_logger(__name__) -RUN_HARNESS_AGENT_ACTIVITY = "run_harness_openai_agent" +RUN_AGENT_ACTIVITY = "run_openai_agent" class RunHarnessAgentParams(BaseModel): @@ -51,8 +51,8 @@ class RunHarnessAgentResult(BaseModel): class HarnessActivities: """Hosts the harness-backed OpenAI agent activity.""" - @activity.defn(name=RUN_HARNESS_AGENT_ACTIVITY) - async def run_harness_openai_agent(self, params: RunHarnessAgentParams) -> RunHarnessAgentResult: + @activity.defn(name=RUN_AGENT_ACTIVITY) + async def run_openai_agent(self, params: RunHarnessAgentParams) -> RunHarnessAgentResult: """Run the agent for one turn and auto-send its output. Threads the running conversation through ``input_list`` so multi-turn diff --git a/examples/tutorials/10_async/10_temporal/140_harness_openai/project/agent.py b/examples/tutorials/10_async/10_temporal/120_openai_agents/project/agent.py similarity index 100% rename from examples/tutorials/10_async/10_temporal/140_harness_openai/project/agent.py rename to examples/tutorials/10_async/10_temporal/120_openai_agents/project/agent.py diff --git a/examples/tutorials/10_async/10_temporal/140_harness_openai/project/run_worker.py b/examples/tutorials/10_async/10_temporal/120_openai_agents/project/run_worker.py similarity index 91% rename from examples/tutorials/10_async/10_temporal/140_harness_openai/project/run_worker.py rename to examples/tutorials/10_async/10_temporal/120_openai_agents/project/run_worker.py index 69586a395..b82ee0f50 100644 --- a/examples/tutorials/10_async/10_temporal/140_harness_openai/project/run_worker.py +++ b/examples/tutorials/10_async/10_temporal/120_openai_agents/project/run_worker.py @@ -2,7 +2,7 @@ Runs as a separate long-lived process alongside the ACP HTTP server. Registers the built-in Agentex activities plus the custom harness agent activity -(``HarnessActivities.run_harness_openai_agent``), and the workflow. +(``HarnessActivities.run_openai_agent``), and the workflow. """ import asyncio @@ -28,7 +28,7 @@ async def main(): harness_activities = HarnessActivities() all_activities = [ - harness_activities.run_harness_openai_agent, + harness_activities.run_openai_agent, *get_all_activities(), ] diff --git a/examples/tutorials/10_async/10_temporal/140_harness_openai/project/tools.py b/examples/tutorials/10_async/10_temporal/120_openai_agents/project/tools.py similarity index 100% rename from examples/tutorials/10_async/10_temporal/140_harness_openai/project/tools.py rename to examples/tutorials/10_async/10_temporal/120_openai_agents/project/tools.py diff --git a/examples/tutorials/10_async/10_temporal/140_harness_openai/project/workflow.py b/examples/tutorials/10_async/10_temporal/120_openai_agents/project/workflow.py similarity index 97% rename from examples/tutorials/10_async/10_temporal/140_harness_openai/project/workflow.py rename to examples/tutorials/10_async/10_temporal/120_openai_agents/project/workflow.py index 69ad7b365..566bd93b6 100644 --- a/examples/tutorials/10_async/10_temporal/140_harness_openai/project/workflow.py +++ b/examples/tutorials/10_async/10_temporal/120_openai_agents/project/workflow.py @@ -1,7 +1,7 @@ """Temporal workflow for the OpenAI Agents harness tutorial. The workflow stays deterministic: it echoes the user message and delegates the -non-deterministic LLM run to ``run_harness_openai_agent`` (see +non-deterministic LLM run to ``run_openai_agent`` (see ``project.activities``). That activity runs the OpenAI Agents SDK and delivers the turn through the unified harness surface (``OpenAITurn`` + ``UnifiedEmitter.auto_send_turn``). @@ -18,7 +18,7 @@ from agentex.lib import adk from project.activities import ( - RUN_HARNESS_AGENT_ACTIVITY, + RUN_AGENT_ACTIVITY, RunHarnessAgentParams, RunHarnessAgentResult, ) @@ -77,7 +77,7 @@ async def on_task_event_send(self, params: SendEventParams) -> None: input={"message": params.event.content.content}, ) as span: turn_result = await workflow.execute_activity( - RUN_HARNESS_AGENT_ACTIVITY, + RUN_AGENT_ACTIVITY, RunHarnessAgentParams( task_id=params.task.id, user_message=params.event.content.content, diff --git a/examples/tutorials/10_async/10_temporal/140_harness_openai/pyproject.toml b/examples/tutorials/10_async/10_temporal/120_openai_agents/pyproject.toml similarity index 95% rename from examples/tutorials/10_async/10_temporal/140_harness_openai/pyproject.toml rename to examples/tutorials/10_async/10_temporal/120_openai_agents/pyproject.toml index 5bf53f6be..e6c77fae3 100644 --- a/examples/tutorials/10_async/10_temporal/140_harness_openai/pyproject.toml +++ b/examples/tutorials/10_async/10_temporal/120_openai_agents/pyproject.toml @@ -3,7 +3,7 @@ requires = ["hatchling"] build-backend = "hatchling.build" [project] -name = "at140-harness-openai" +name = "at120-openai-agents" version = "0.1.0" description = "A Temporal-backed OpenAI Agents SDK agent on the unified harness surface" readme = "README.md" diff --git a/examples/tutorials/10_async/10_temporal/140_harness_openai/tests/test_agent.py b/examples/tutorials/10_async/10_temporal/120_openai_agents/tests/test_agent.py similarity index 100% rename from examples/tutorials/10_async/10_temporal/140_harness_openai/tests/test_agent.py rename to examples/tutorials/10_async/10_temporal/120_openai_agents/tests/test_agent.py diff --git a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/Dockerfile b/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/Dockerfile deleted file mode 100644 index d4927d0ce..000000000 --- a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/Dockerfile +++ /dev/null @@ -1,62 +0,0 @@ -# syntax=docker/dockerfile:1.3 -FROM python:3.12-slim -COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ - -# Install system dependencies -RUN apt-get update && apt-get install -y \ - htop \ - vim \ - curl \ - tar \ - python3-dev \ - postgresql-client \ - build-essential \ - libpq-dev \ - gcc \ - cmake \ - netcat-openbsd \ - nodejs \ - npm \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/** - -# Install tctl (Temporal CLI) -RUN curl -L https://github.com/temporalio/tctl/releases/download/v1.18.1/tctl_1.18.1_linux_arm64.tar.gz -o /tmp/tctl.tar.gz && \ - tar -xzf /tmp/tctl.tar.gz -C /usr/local/bin && \ - chmod +x /usr/local/bin/tctl && \ - rm /tmp/tctl.tar.gz - -RUN uv pip install --system --upgrade pip setuptools wheel - -ENV UV_HTTP_TIMEOUT=1000 - -# Copy pyproject.toml and README.md to install dependencies -COPY 10_async/10_temporal/120_openai_agents_local_sandbox/pyproject.toml /app/120_openai_agents_local_sandbox/pyproject.toml -COPY 10_async/10_temporal/120_openai_agents_local_sandbox/README.md /app/120_openai_agents_local_sandbox/README.md - -WORKDIR /app/120_openai_agents_local_sandbox - -# Copy the project code -COPY 10_async/10_temporal/120_openai_agents_local_sandbox/project /app/120_openai_agents_local_sandbox/project - -# Copy the test files -COPY 10_async/10_temporal/120_openai_agents_local_sandbox/tests /app/120_openai_agents_local_sandbox/tests - -# Copy shared test utilities -COPY test_utils /app/test_utils - -# Install the required Python packages with dev dependencies -RUN uv pip install --system .[dev] - -WORKDIR /app/120_openai_agents_local_sandbox - -ENV PYTHONPATH=/app - -# Set test environment variables -ENV AGENT_NAME=at120-openai-agents-local-sandbox - -# Run the ACP server using uvicorn -CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] - -# When we deploy the worker, we will replace the CMD with the following -# CMD ["python", "-m", "run_worker"] diff --git a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/README.md b/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/README.md deleted file mode 100644 index 161bc43da..000000000 --- a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/README.md +++ /dev/null @@ -1,130 +0,0 @@ -# Tutorial 120: Temporal OpenAI Agents SDK with a Local Sandbox - -This tutorial demonstrates running an [OpenAI Agents SDK](https://developers.openai.com/api/docs/guides/agents) -`SandboxAgent` inside a **Temporal** workflow, backed by the **local** -(`unix_local`) sandbox. - -The agent is a "local sandbox assistant": it answers questions by actually running -real shell commands (e.g. `python3 --version`, `ls`, `python3 -c "..."`) instead of -guessing. Because it runs inside Temporal, the sandbox tool calls become durable, -retried, and observable activities. - -This mirrors the canonical OpenAI Agents SDK Temporal example -(`060_open_ai_agents_sdk_hello_world`) and the tools example -(`070_open_ai_agents_sdk_tools`). The new piece is the **Temporal sandbox bridge**. - -## Key Concepts - -### Temporal ACP -The Temporal ACP model (`acp_type: async`, `temporal.enabled: true`) maps task -lifecycle to a Temporal workflow: -- `@workflow.run` (`on_task_create`) keeps the conversation alive. -- `@workflow.signal(name=SignalName.RECEIVE_EVENT)` (`on_task_event_send`) handles - each user message. - -No ACP handlers are registered by hand — the `TemporalACPConfig` wires them to the -workflow automatically. - -### Streaming (Interceptor + Model Provider + Hooks) -Real-time streaming uses STANDARD Temporal components — no forked plugin: -- **`ContextInterceptor`** threads `task_id` through activity headers. The workflow - sets `self._task_id` so the interceptor can read it. -- **`TemporalStreamingModelProvider`** returns a model that streams tokens to Redis - in real time while still returning the complete response to Temporal for - determinism / replay safety. -- **`TemporalStreamingHooks`** creates the lifecycle messages (tool request / - response, etc.) in the database. - -The `stream_lifecycle_content` activity must be registered on the worker alongside -`get_all_activities()`. - -### The Temporal sandbox bridge (`UnixLocalSandboxClient`) -The sandbox client is registered ON THE WORKER (and the ACP) via the standard -plugin: - -```python -from agents.sandbox.sandboxes.unix_local import UnixLocalSandboxClient -from temporalio.contrib.openai_agents import OpenAIAgentsPlugin, SandboxClientProvider - -OpenAIAgentsPlugin( - model_provider=TemporalStreamingModelProvider(), - sandbox_clients=[SandboxClientProvider("local", UnixLocalSandboxClient())], -) -``` - -Inside the workflow, the run is pointed at that backend by name: - -```python -from temporalio.contrib.openai_agents.workflow import temporal_sandbox_client -from agents.sandbox import SandboxAgent, SandboxRunConfig -from agents.run_config import RunConfig -from agents.sandbox.snapshot import NoopSnapshotSpec -from agents.sandbox.capabilities import Shell -from agents.sandbox.sandboxes.unix_local import UnixLocalSandboxClientOptions - -agent = SandboxAgent( - name="Local Sandbox Assistant", - model="gpt-4o-mini", - instructions="...use the shell tools to actually run commands...", - capabilities=[Shell()], -) -run_config = RunConfig( - sandbox=SandboxRunConfig( - client=temporal_sandbox_client("local"), - options=UnixLocalSandboxClientOptions(), - snapshot=NoopSnapshotSpec(), # skip the per-turn workspace snapshot - ) -) -result = await Runner.run( - agent, self._state.input_list, run_config=run_config, - hooks=TemporalStreamingHooks(task_id=params.task.id), -) -``` - -`temporal_sandbox_client("local")` resolves the worker-registered client, so the -sandbox shell tool calls run as Temporal activities (durable + observable in the -Temporal UI). - -## Two important lessons - -1. **Don't double-post the assistant message.** The `TemporalStreamingModelProvider` - already streams AND persists the assistant's response. If you also call - `adk.messages.create(...)` after `Runner.run`, the answer shows up twice. We only - persist conversation state for the next turn via `result.to_input_list()`. -2. **Use `NoopSnapshotSpec()`.** Without it, the sandbox tries to take a per-turn - workspace snapshot, and stopping the sandbox can raise - `WorkspaceArchiveReadError`. `NoopSnapshotSpec()` skips that snapshot. - -## Files - -| File | Description | -|------|-------------| -| `project/acp.py` | Temporal ACP server (plugin + sandbox client + interceptor) | -| `project/run_worker.py` | Temporal worker (registers workflow, activities, plugin, sandbox client) | -| `project/workflow.py` | `BaseWorkflow` that runs the `SandboxAgent` against the local sandbox | -| `tests/test_agent.py` | Integration tests (polling pattern) | -| `manifest.yaml` | Agent configuration (temporal enabled) | -| `environments.yaml` | Per-environment deployment overrides | - -## Running Locally - -```bash -# From this directory -agentex agents run -``` - -Set `OPENAI_API_KEY` (or `LITELLM_API_KEY` if you're behind the Scale LiteLLM -gateway) in your environment or in a `.env` file in `project/` so the agent can call -the model. - -## Running Tests - -```bash -pytest tests/test_agent.py -v -``` - -## Further Reading - -- OpenAI Agents SDK guide: https://developers.openai.com/api/docs/guides/agents -- The async (non-Temporal) variant: `10_async/00_base/120_openai_agents_local_sandbox` -- The canonical OpenAI Agents SDK Temporal example: `10_async/10_temporal/060_open_ai_agents_sdk_hello_world` diff --git a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/manifest.yaml b/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/manifest.yaml deleted file mode 100644 index 86ac89288..000000000 --- a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/manifest.yaml +++ /dev/null @@ -1,111 +0,0 @@ -# Agent Manifest Configuration -# --------------------------- -# This file defines how your agent should be built and deployed. - -# Build Configuration -# ------------------ -build: - context: - # Root directory for the build context - root: ../../../ # Up to tutorials level to include test_utils - - # Paths to include in the Docker build context - include_paths: - - 10_async/10_temporal/120_openai_agents_local_sandbox - - test_utils - - # Path to your agent's Dockerfile (relative to the root directory) - dockerfile: 10_async/10_temporal/120_openai_agents_local_sandbox/Dockerfile - - # Path to your agent's .dockerignore - dockerignore: 10_async/10_temporal/120_openai_agents_local_sandbox/.dockerignore - - -# Local Development Configuration -# ----------------------------- -local_development: - agent: - port: 8000 # Port where your local ACP server is running - host_address: host.docker.internal # Host address for Docker networking - - # File paths for local development (relative to this manifest.yaml) - paths: - # Path to ACP server file - acp: project/acp.py - # Path to temporal worker file - worker: project/run_worker.py - - -# Agent Configuration -# ----------------- -agent: - # Type of agent - either sync or async - acp_type: async - - # Unique name for your agent - name: at120-openai-agents-local-sandbox - - # Description of what your agent does - description: A Temporal OpenAI Agents SDK agent using a local (unix_local) sandbox - - # Temporal workflow configuration - temporal: - enabled: true - workflows: - # Name of the workflow class (must match the @workflow.defn name in workflow.py) - - name: at120-openai-agents-local-sandbox - - # Queue name for task distribution - queue_name: at120_openai_agents_local_sandbox_queue - - # Credentials mapping (maps Kubernetes secrets to environment variables) - credentials: - - env_var_name: OPENAI_API_KEY - secret_name: openai-api-key - secret_key: api-key - - env_var_name: REDIS_URL - secret_name: redis-url-secret - secret_key: url - - env_var_name: SGP_API_KEY - secret_name: sgp-api-key - secret_key: api-key - - env_var_name: SGP_ACCOUNT_ID - secret_name: sgp-account-id - secret_key: account-id - - env_var_name: SGP_CLIENT_BASE_URL - secret_name: sgp-client-base-url - secret_key: url - - # Environment variables for running locally and for deployment - env: - OPENAI_AGENTS_DISABLE_TRACING: "1" - - -# Deployment Configuration -# ----------------------- -deployment: - # Container image configuration - image: - repository: "" # Update with your container registry - tag: "latest" # Default tag, should be versioned in production - - imagePullSecrets: - - name: my-registry-secret # Update with your image pull secret name - - # Global deployment settings that apply to all clusters - global: - agent: - name: "at120-openai-agents-local-sandbox" - description: "A Temporal OpenAI Agents SDK agent using a local (unix_local) sandbox" - - # Default replica count - replicaCount: 1 - - # Default resource requirements - resources: - requests: - cpu: "500m" - memory: "1Gi" - limits: - cpu: "1000m" - memory: "2Gi" diff --git a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/project/__init__.py b/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/project/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/project/acp.py b/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/project/acp.py deleted file mode 100644 index 196e1e7cd..000000000 --- a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/project/acp.py +++ /dev/null @@ -1,83 +0,0 @@ -import os -import sys - -from temporalio.contrib.openai_agents import ( - OpenAIAgentsPlugin, - SandboxClientProvider, -) -from agents.sandbox.sandboxes.unix_local import UnixLocalSandboxClient - -# === DEBUG SETUP (AgentEx CLI Debug Support) === -if os.getenv("AGENTEX_DEBUG_ENABLED") == "true": - try: - import debugpy - debug_port = int(os.getenv("AGENTEX_DEBUG_PORT", "5679")) - debug_type = os.getenv("AGENTEX_DEBUG_TYPE", "acp") - wait_for_attach = os.getenv("AGENTEX_DEBUG_WAIT_FOR_ATTACH", "false").lower() == "true" - - # Configure debugpy - debugpy.configure(subProcess=False) - debugpy.listen(debug_port) - - print(f"🐛 [{debug_type.upper()}] Debug server listening on port {debug_port}") - - if wait_for_attach: - print(f"⏳ [{debug_type.upper()}] Waiting for debugger to attach...") - debugpy.wait_for_client() - print(f"✅ [{debug_type.upper()}] Debugger attached!") - else: - print(f"📡 [{debug_type.upper()}] Ready for debugger attachment") - - except ImportError: - print("❌ debugpy not available. Install with: pip install debugpy") - sys.exit(1) - except Exception as e: - print(f"❌ Debug setup failed: {e}") - sys.exit(1) -# === END DEBUG SETUP === - -from agentex.lib.types.fastacp import TemporalACPConfig -from agentex.lib.sdk.fastacp.fastacp import FastACP -from agentex.lib.core.temporal.plugins.openai_agents.models.temporal_streaming_model import ( - TemporalStreamingModelProvider, -) -from agentex.lib.core.temporal.plugins.openai_agents.interceptors.context_interceptor import ( - ContextInterceptor, -) - -context_interceptor = ContextInterceptor() -temporal_streaming_model_provider = TemporalStreamingModelProvider() - -# Create the ACP server. We register the STANDARD OpenAIAgentsPlugin with: -# - the streaming model provider (real-time token streaming + persistence) -# - the LOCAL sandbox backend, registered under the name "local" so the -# workflow can resolve it via ``temporal_sandbox_client("local")`` -# plus the ContextInterceptor that threads task_id through activity headers. -acp = FastACP.create( - acp_type="async", - config=TemporalACPConfig( - # When deployed to the cluster, the Temporal address is set automatically. - # For local development, we set the address manually to talk to the local - # Temporal service set up via docker compose. - type="temporal", - temporal_address=os.getenv("TEMPORAL_ADDRESS", "localhost:7233"), - plugins=[ - OpenAIAgentsPlugin( - model_provider=temporal_streaming_model_provider, - sandbox_clients=[ - SandboxClientProvider("local", UnixLocalSandboxClient()), - ], - ) - ], - interceptors=[context_interceptor], - ), -) - - -# Notice that we don't need to register any handlers when we use type="temporal". -# These handlers are automatically registered when the ACP is created: -# -# @acp.on_task_create -> the workflow method decorated with @workflow.run -# @acp.on_task_event_send -> the workflow method decorated with -# @workflow.signal(name=SignalName.RECEIVE_EVENT) -# @acp.on_task_cancel -> handled by the temporal client (cancels the workflow) diff --git a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/project/run_worker.py b/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/project/run_worker.py deleted file mode 100644 index a2b7bdf6b..000000000 --- a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/project/run_worker.py +++ /dev/null @@ -1,80 +0,0 @@ -import asyncio - -from temporalio.contrib.openai_agents import ( - OpenAIAgentsPlugin, - SandboxClientProvider, -) -from agents.sandbox.sandboxes.unix_local import UnixLocalSandboxClient - -from project.workflow import At120OpenaiAgentsLocalSandboxWorkflow -from agentex.lib.utils.debug import setup_debug_if_enabled -from agentex.lib.utils.logging import make_logger -from agentex.lib.environment_variables import EnvironmentVariables -from agentex.lib.core.temporal.activities import get_all_activities -from agentex.lib.core.temporal.workers.worker import AgentexWorker -from agentex.lib.core.temporal.plugins.openai_agents.hooks.activities import ( - stream_lifecycle_content, -) -from agentex.lib.core.temporal.plugins.openai_agents.models.temporal_streaming_model import ( - TemporalStreamingModelProvider, -) -from agentex.lib.core.temporal.plugins.openai_agents.interceptors.context_interceptor import ( - ContextInterceptor, -) - -environment_variables = EnvironmentVariables.refresh() - -logger = make_logger(__name__) - - -async def main(): - # Setup debug mode if enabled - setup_debug_if_enabled() - - task_queue_name = environment_variables.WORKFLOW_TASK_QUEUE - if task_queue_name is None: - raise ValueError("WORKFLOW_TASK_QUEUE is not set") - - # Register activities. ``stream_lifecycle_content`` powers the streaming - # lifecycle hooks; the rest are the standard AgentEx activities. - all_activities = get_all_activities() + [stream_lifecycle_content] - - # ============================================================================ - # STREAMING + SANDBOX SETUP - # ============================================================================ - # 1. ContextInterceptor threads task_id through activity headers so the - # streaming model + hooks know which task to stream/persist to. - # 2. TemporalStreamingModelProvider returns a model that streams tokens to - # Redis in real time while still returning the complete response to - # Temporal for determinism / replay safety. - # 3. SandboxClientProvider registers the LOCAL sandbox backend - # (UnixLocalSandboxClient) under the name "local". The workflow resolves - # it at run time via ``temporal_sandbox_client("local")``, so the sandbox - # tool calls run as durable Temporal activities. - # - # We use the STANDARD temporalio.contrib.openai_agents.OpenAIAgentsPlugin — - # no forked plugin needed. - context_interceptor = ContextInterceptor() - temporal_streaming_model_provider = TemporalStreamingModelProvider() - - worker = AgentexWorker( - task_queue=task_queue_name, - plugins=[ - OpenAIAgentsPlugin( - model_provider=temporal_streaming_model_provider, - sandbox_clients=[ - SandboxClientProvider("local", UnixLocalSandboxClient()), - ], - ) - ], - interceptors=[context_interceptor], - ) - - await worker.run( - activities=all_activities, - workflow=At120OpenaiAgentsLocalSandboxWorkflow, - ) - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/project/workflow.py b/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/project/workflow.py deleted file mode 100644 index 45b61b04e..000000000 --- a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/project/workflow.py +++ /dev/null @@ -1,213 +0,0 @@ -"""OpenAI Agents SDK + Temporal: Local Sandbox Tutorial - -This tutorial demonstrates running an OpenAI Agents SDK ``SandboxAgent`` inside a -Temporal workflow, backed by the **local** (``unix_local``) sandbox. The agent is -a "local sandbox assistant": it answers questions by actually running real shell -commands (e.g. ``python3 --version``, ``ls``, ``python3 -c "..."``) instead of -guessing. - -KEY CONCEPTS DEMONSTRATED: -- A ``SandboxAgent`` granted the ``Shell`` capability inside a durable Temporal - workflow. -- The Temporal sandbox bridge: ``temporal_sandbox_client("local")`` resolves to - the ``UnixLocalSandboxClient`` registered on the worker via - ``SandboxClientProvider`` (see ``run_worker.py`` / ``acp.py``). The sandbox tool - calls run as Temporal activities, so they are durable, retried, and observable. -- Real-time streaming + persistence via ``TemporalStreamingModelProvider`` + - ``ContextInterceptor`` (configured on the worker) and ``TemporalStreamingHooks``. - -IMPORTANT LESSONS (applied below): - (a) Do NOT post the assistant message yourself with ``adk.messages.create`` - after ``Runner.run``. The ``TemporalStreamingModelProvider`` already streams - and persists the assistant's response — posting it again would duplicate the - answer in the UI. We only persist conversation state for the next turn via - ``result.to_input_list()``. - (b) Use ``NoopSnapshotSpec()`` so the per-turn workspace snapshot is skipped. - Without it, stopping the sandbox can raise ``WorkspaceArchiveReadError``. -""" - -from __future__ import annotations - -import os -import json - -from agents import Runner -from temporalio import workflow - -from agentex.lib import adk -from agentex.lib.types.acp import SendEventParams, CreateTaskParams -from agentex.lib.types.tracing import SGPTracingProcessorConfig -from agentex.lib.utils.logging import make_logger -from agentex.types.text_content import TextContent -from agentex.lib.utils.model_utils import BaseModel -from agentex.lib.environment_variables import EnvironmentVariables -from agentex.lib.core.temporal.types.workflow import SignalName -from agentex.lib.core.temporal.workflows.workflow import BaseWorkflow -from agentex.lib.core.tracing.tracing_processor_manager import ( - add_tracing_processor_config, -) -from agentex.lib.core.temporal.plugins.openai_agents.hooks.hooks import ( - TemporalStreamingHooks, -) - -# OpenAI Agents SDK sandbox imports. These are safe to import at workflow module -# load time; the actual sandbox client is resolved at run time via -# ``temporal_sandbox_client`` (which maps to the worker-registered backend). -with workflow.unsafe.imports_passed_through(): - from agents.sandbox import SandboxAgent, SandboxRunConfig - from agents.run_config import RunConfig - from agents.sandbox.snapshot import NoopSnapshotSpec - from agents.sandbox.capabilities import Shell - from agents.sandbox.sandboxes.unix_local import UnixLocalSandboxClientOptions - from temporalio.contrib.openai_agents.workflow import temporal_sandbox_client - -# Configure tracing processor (optional - only if you have SGP credentials) -add_tracing_processor_config( - SGPTracingProcessorConfig( - sgp_api_key=os.environ.get("SGP_API_KEY", ""), - sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""), - ) -) - -environment_variables = EnvironmentVariables.refresh() - -if environment_variables.WORKFLOW_NAME is None: - raise ValueError("Environment variable WORKFLOW_NAME is not set") - -if environment_variables.AGENT_NAME is None: - raise ValueError("Environment variable AGENT_NAME is not set") - -logger = make_logger(__name__) - -MODEL_NAME = "gpt-4o-mini" -INSTRUCTIONS = """You are a local sandbox assistant. - -You have access to shell tools that run real commands on the local machine. - -Guidelines: -- ALWAYS use the shell tools to actually run commands — never guess or make up - output. If the user asks for the Python version, run `python3 --version`. If - they ask to list files, run `ls`. If they ask you to compute something, use - `python3 -c "..."`. -- Run the minimal command(s) needed to answer the question. -- Report the real command output back to the user, concisely. -""" - - -class StateModel(BaseModel): - """State model for preserving conversation history across turns.""" - - input_list: list = [] - turn_number: int = 0 - - -@workflow.defn(name=environment_variables.WORKFLOW_NAME) -class At120OpenaiAgentsLocalSandboxWorkflow(BaseWorkflow): - """Long-running Temporal workflow that runs a SandboxAgent against the local sandbox.""" - - def __init__(self): - super().__init__(display_name=environment_variables.AGENT_NAME) - self._complete_task = False - self._state: StateModel | None = None - self._task_id = None - self._trace_id = None - self._parent_span_id = None - - @workflow.signal(name=SignalName.RECEIVE_EVENT) - async def on_task_event_send(self, params: SendEventParams) -> None: - logger.info(f"Received task event: {params.task.id}") - - if self._state is None: - raise ValueError("State is not initialized") - - self._state.turn_number += 1 - - # The ContextInterceptor reads ``self._task_id`` off the workflow - # instance and threads it through activity headers so the streaming - # model + hooks know which task to stream/persist to. - self._task_id = params.task.id - self._trace_id = params.task.id - - # Add the user message to conversation history. - self._state.input_list.append({"role": "user", "content": params.event.content.content}) - - # Echo back the client's message so it shows up in the UI. - await adk.messages.create(task_id=params.task.id, content=params.event.content) - - async with adk.tracing.span( - trace_id=params.task.id, - name=f"Turn {self._state.turn_number}", - input=self._state.model_dump(), - ) as span: - self._parent_span_id = span.id if span else None - - # Build the sandbox agent. The Shell capability becomes real shell - # tools backed by the sandbox client resolved at run time. - agent = SandboxAgent( - name="Local Sandbox Assistant", - model=MODEL_NAME, - instructions=INSTRUCTIONS, - capabilities=[Shell()], - ) - - # Point the run at the LOCAL sandbox backend registered on the worker - # under the name "local". ``temporal_sandbox_client`` resolves that - # registration so the sandbox tool calls execute as Temporal - # activities (durable + observable). - # - # IMPORTANT: ``NoopSnapshotSpec()`` skips the per-turn workspace - # snapshot — otherwise stopping the sandbox can raise - # ``WorkspaceArchiveReadError``. - run_config = RunConfig( - sandbox=SandboxRunConfig( - client=temporal_sandbox_client("local"), - options=UnixLocalSandboxClientOptions(), - snapshot=NoopSnapshotSpec(), - ) - ) - - # TemporalStreamingHooks creates the lifecycle messages (tool - # request/response, etc.) and works with the streaming model - # provider to stream tokens to the UI in real time. - result = await Runner.run( - agent, - self._state.input_list, - run_config=run_config, - hooks=TemporalStreamingHooks(task_id=params.task.id), - max_turns=10, - ) - - # IMPORTANT: We do NOT post the assistant message ourselves here. - # The TemporalStreamingModelProvider already streamed and persisted - # the assistant's response. We only persist conversation state for - # the next turn. - self._state.input_list = result.to_input_list() - - if span: - span.output = self._state.model_dump() - - @workflow.run - async def on_task_create(self, params: CreateTaskParams) -> str: - logger.info(f"Task created: {params.task.id}") - - self._state = StateModel(input_list=[], turn_number=0) - - await adk.messages.create( - task_id=params.task.id, - content=TextContent( - author="agent", - content=( - f"Task initialized with params:\n{json.dumps(params.params, indent=2)}\n" - f"Send me a message and I'll run real shell commands in a local " - f"sandbox (backed by Temporal) to answer." - ), - ), - ) - - await workflow.wait_condition(lambda: self._complete_task, timeout=None) - return "Task completed" - - @workflow.signal - async def complete_task_signal(self) -> None: - logger.info("Received complete_task signal") - self._complete_task = True diff --git a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/pyproject.toml b/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/pyproject.toml deleted file mode 100644 index 696894e32..000000000 --- a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/pyproject.toml +++ /dev/null @@ -1,36 +0,0 @@ -[build-system] -requires = ["hatchling"] -build-backend = "hatchling.build" - -[project] -name = "at120_openai_agents_local_sandbox" -version = "0.1.0" -description = "A Temporal OpenAI Agents SDK agent using a local (unix_local) sandbox" -requires-python = ">=3.12" -dependencies = [ - "agentex-sdk>=0.6.0", - "openai-agents>=0.14.3,<0.15", - "temporalio>=1.18.2", - "scale-gp", -] - -[project.optional-dependencies] -dev = [ - "pytest", - "pytest-asyncio", - "black", - "isort", - "flake8", - "debugpy>=1.8.15", -] - -[tool.hatch.build.targets.wheel] -packages = ["project"] - -[tool.black] -line-length = 88 -target-version = ['py312'] - -[tool.isort] -profile = "black" -line_length = 88 diff --git a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/tests/test_agent.py b/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/tests/test_agent.py deleted file mode 100644 index 5e161c061..000000000 --- a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/tests/test_agent.py +++ /dev/null @@ -1,144 +0,0 @@ -"""Tests for the Temporal OpenAI Agents SDK local-sandbox agent. - -This test suite validates that the agent actually runs shell commands in the -LOCAL sandbox (unix_local backend) via the Temporal sandbox bridge, by polling -for the agent's response: -- Ask for the Python version -> response contains "Python 3" -- Ask it to compute 21 * 2 with python3 -> response contains "42" - -To run these tests: -1. Make sure the agent is running (via docker-compose or `agentex agents run`) -2. Set the AGENTEX_API_BASE_URL environment variable if not using default -3. Run: pytest test_agent.py -v - -Configuration: -- AGENTEX_API_BASE_URL: Base URL for the AgentEx server (default: http://localhost:5003) -- AGENT_NAME: Name of the agent to test (default: at120-openai-agents-local-sandbox) -""" - -import os -import uuid - -import pytest -import pytest_asyncio -from test_utils.async_utils import ( - poll_messages, - send_event_and_poll_yielding, -) - -from agentex import AsyncAgentex -from agentex.types.task_message import TaskMessage -from agentex.types.agent_rpc_params import ParamsCreateTaskRequest - -# Configuration from environment variables -AGENTEX_API_BASE_URL = os.environ.get("AGENTEX_API_BASE_URL", "http://localhost:5003") -AGENT_NAME = os.environ.get("AGENT_NAME", "at120-openai-agents-local-sandbox") - - -@pytest_asyncio.fixture -async def client(): - """Create an AsyncAgentex client instance for testing.""" - client = AsyncAgentex(base_url=AGENTEX_API_BASE_URL) - yield client - await client.close() - - -@pytest.fixture -def agent_name(): - """Return the agent name for testing.""" - return AGENT_NAME - - -@pytest_asyncio.fixture -async def agent_id(client, agent_name): - """Retrieve the agent ID based on the agent name.""" - agents = await client.agents.list() - for agent in agents: - if agent.name == agent_name: - return agent.id - raise ValueError(f"Agent with name {agent_name} not found.") - - -async def _create_task_and_await_welcome(client: AsyncAgentex, agent_id: str) -> str: - """Create a task and wait for the workflow's welcome message; return the task id.""" - task_response = await client.agents.create_task( - agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex) - ) - task = task_response.result - assert task is not None - - welcome_found = False - async for message in poll_messages( - client=client, - task_id=task.id, - timeout=30, - sleep_interval=1.0, - ): - assert isinstance(message, TaskMessage) - if message.content and message.content.type == "text" and message.content.author == "agent": - welcome_found = True - break - assert welcome_found, "Task creation (welcome) message not found" - return task.id - - -async def _send_and_collect_agent_text( - client: AsyncAgentex, agent_id: str, task_id: str, user_message: str -) -> str: - """Send a user message and accumulate the streamed agent text into a string.""" - final_message = None - async for message in send_event_and_poll_yielding( - client=client, - agent_id=agent_id, - task_id=task_id, - user_message=user_message, - timeout=60, - sleep_interval=1.0, - yield_updates=True, # Get updates as streaming writes chunks - ): - if message.content and message.content.type == "text" and message.content.author == "agent": - final_message = message - if message.streaming_status == "DONE": - break - - assert final_message is not None, "Should have received an agent text message" - assert final_message.content is not None, "Final message should have content" - return final_message.content.content or "" - - -class TestLocalSandboxEvents: - """Test the Temporal local-sandbox OpenAI Agents SDK agent.""" - - @pytest.mark.asyncio - async def test_shell_python_version(self, client: AsyncAgentex, agent_id: str): - """The agent should run `python3 --version` in the local sandbox. - - The sandbox runs on Python 3.12, so the real output contains "Python 3". - """ - task_id = await _create_task_and_await_welcome(client, agent_id) - text = await _send_and_collect_agent_text( - client, - agent_id, - task_id, - "Use your shell to print the Python version on this machine, then " - "tell me what it is.", - ) - assert text, "Expected a non-empty response from the sandbox agent." - assert "Python 3" in text - - @pytest.mark.asyncio - async def test_shell_compute(self, client: AsyncAgentex, agent_id: str): - """The agent should use python3 in the sandbox to compute 21 * 2 == 42.""" - task_id = await _create_task_and_await_welcome(client, agent_id) - text = await _send_and_collect_agent_text( - client, - agent_id, - task_id, - "Use python3 in your shell to compute 21 * 2 and tell me the result.", - ) - assert text, "Expected a non-empty response from the sandbox agent." - assert "42" in text - - -if __name__ == "__main__": - pytest.main([__file__, "-v"]) diff --git a/examples/tutorials/10_async/10_temporal/130_langgraph/.dockerignore b/examples/tutorials/10_async/10_temporal/130_langgraph/.dockerignore index c4f7a8b4b..c49489471 100644 --- a/examples/tutorials/10_async/10_temporal/130_langgraph/.dockerignore +++ b/examples/tutorials/10_async/10_temporal/130_langgraph/.dockerignore @@ -40,4 +40,4 @@ venv.bak/ .gitignore # Misc -.DS_Store \ No newline at end of file +.DS_Store diff --git a/examples/tutorials/10_async/10_temporal/130_langgraph/README.md b/examples/tutorials/10_async/10_temporal/130_langgraph/README.md index 61ccaf66a..0820f56ab 100644 --- a/examples/tutorials/10_async/10_temporal/130_langgraph/README.md +++ b/examples/tutorials/10_async/10_temporal/130_langgraph/README.md @@ -1,58 +1,49 @@ -# at130-langgraph — AgentEx Temporal + LangGraph +# Tutorial: Temporal LangGraph Agent -A minimal Temporal-backed [LangGraph](https://langchain-ai.github.io/langgraph/) -agent. It uses the official [`temporalio.contrib.langgraph`](https://docs.temporal.io/develop/python/integrations/langgraph) -plugin so each LangGraph node runs as a durable **Temporal activity** (the LLM -`agent` node) or inline in the **workflow** (the `tools` node) — set per node -with `execute_in`. *Temporal is the runtime; LangGraph is the agent framework.* +This tutorial demonstrates how to build a **Temporal-backed** LangGraph agent on +AgentEx using the **unified harness surface**. The agent's LLM node runs as a +durable Temporal activity; the tools node runs inline in the workflow. -> The Temporal LangGraph plugin is currently **experimental**. +## Key Concepts -## The graph +### Temporal + LangGraph -``` -START → agent → (tool calls?) → tools → agent - → (no tool calls?) → END -``` - -- `agent` (`execute_in="activity"`): the LLM call — a retried, observable Temporal activity. -- `tools` (`execute_in="workflow"`): runs the tool calls inline in the workflow. +The ``LangGraphPlugin`` from ``temporalio.contrib.langgraph`` turns annotated graph +nodes into Temporal activities or inline workflow callables: -The router and tools are `async` so LangGraph awaits them directly (a sync -callable is offloaded via `run_in_executor`, which Temporal workflows forbid). +- `agent` node: `execute_in="activity"` (durable, retryable LLM call) +- `tools` node: `execute_in="workflow"` (inline, fast tool execution) -## Project structure - -``` -130_langgraph/ -├── project/ -│ ├── acp.py # Thin async ACP server; registers the LangGraphPlugin -│ ├── workflow.py # Runs the graph each turn; keeps multi-turn memory -│ ├── graph.py # LangGraph graph; nodes tagged execute_in activity/workflow -│ └── tools.py # Async tool(s) -└── run_worker.py is project/run_worker.py -``` +### Message surfacing -## Running +After each turn, ``emit_langgraph_messages`` converts the new LangGraph messages +(tool requests, tool responses, final text) into AgentEx ``TaskMessage`` objects +and posts them to the task's message stream. -```bash -agentex agents run --manifest manifest.yaml -``` +This is the Temporal-specific path. The non-Temporal async/sync channels use +``UnifiedEmitter.auto_send_turn`` / ``UnifiedEmitter.yield_turn`` with +``LangGraphTurn`` instead. -Open the Temporal UI at http://localhost:8080 to watch the workflow and the -`agent` activity execute. Use `dev.ipynb` to create a task and send messages. +## Files -## Adding tools +| File | Description | +|------|-------------| +| `project/acp.py` | ACP server (Temporal config, LangGraphPlugin) | +| `project/graph.py` | LangGraph graph (agent + tools nodes) | +| `project/workflow.py` | Temporal workflow (signal handlers, emit_langgraph_messages) | +| `project/run_worker.py` | Temporal worker runner | +| `project/tools.py` | Tool definitions (weather example) | +| `tests/test_agent.py` | Integration tests | +| `manifest.yaml` | Agent configuration (name: at130-langgraph) | -Define an **async** `@tool` in `project/tools.py` and add it to `TOOLS`. The -model is bound with `TOOLS` and the tool node runs them by name. +## Running Locally -For a fuller version with human-in-the-loop approval and graph-introspection -queries, scaffold the `temporal-langgraph` template via `agentex init`. +```bash +agentex agents run +``` -## Tests +## Running Tests -- `tests/test_graph_temporal.py` — hermetic ReAct-loop test with a stub model, - plus a live end-to-end run through the real Temporal plugin (skipped unless - `LITELLM_API_KEY` is set). -- `tests/test_agent.py` — live integration against a running agent. +```bash +pytest tests/test_agent.py -v +``` diff --git a/examples/tutorials/10_async/10_temporal/130_langgraph/manifest.yaml b/examples/tutorials/10_async/10_temporal/130_langgraph/manifest.yaml index d1f5960b1..936ebfa68 100644 --- a/examples/tutorials/10_async/10_temporal/130_langgraph/manifest.yaml +++ b/examples/tutorials/10_async/10_temporal/130_langgraph/manifest.yaml @@ -1,20 +1,5 @@ -# Agent Manifest Configuration -# --------------------------- -# This file defines how your agent should be built and deployed. - -# Build Configuration -# ------------------ -# The build config defines what gets packaged into your agent's Docker image. -# This same configuration is used whether building locally or remotely. -# -# When building: -# 1. All files from include_paths are collected into a build context -# 2. The context is filtered by dockerignore rules -# 3. The Dockerfile uses this context to build your agent's image -# 4. The image is pushed to a registry and used to run your agent build: context: - # Build from the tutorials root so shared test_utils are available. root: ../../../ include_paths: - 10_async/10_temporal/130_langgraph @@ -22,107 +7,45 @@ build: dockerfile: 10_async/10_temporal/130_langgraph/Dockerfile dockerignore: 10_async/10_temporal/130_langgraph/.dockerignore - -# Local Development Configuration -# ----------------------------- -# Only used when running the agent locally local_development: agent: - port: 8000 # Port where your local ACP server is running - host_address: host.docker.internal # Host address for Docker networking (host.docker.internal for Docker, localhost for direct) - - # File paths for local development (relative to this manifest.yaml) + port: 8000 + host_address: host.docker.internal paths: - # Path to ACP server file - # Examples: - # project/acp.py (standard) - # src/server.py (custom structure) - # ../shared/acp.py (shared across projects) - # /absolute/path/acp.py (absolute path) acp: project/acp.py - - # Path to temporal worker file - # Examples: - # project/run_worker.py (standard) - # workers/temporal.py (custom structure) - # ../shared/worker.py (shared across projects) worker: project/run_worker.py - -# Agent Configuration -# ----------------- agent: - # Type of agent - either sync or async acp_type: async - - # Unique name for your agent - # Used for task routing and monitoring name: at130-langgraph + description: "A Temporal-backed LangGraph agent (harness variant) whose nodes run as Temporal activities" - # Description of what your agent does - # Helps with documentation and discovery - description: "A Temporal-backed LangGraph agent whose nodes run as Temporal activities" - - # Temporal workflow configuration - # This enables your agent to run as a Temporal workflow for long-running tasks temporal: enabled: true workflows: - # Name of the workflow class - # Must match the @workflow.defn name in your workflow.py - name: at130-langgraph - - # Queue name for task distribution - # Used by Temporal to route tasks to your agent - # Convention: _task_queue queue_name: at130_langgraph_queue - # Optional: Health check port for temporal worker - # Defaults to 80 if not specified - # health_check_port: 80 - - # Optional: Credentials mapping - # Maps Kubernetes secrets to environment variables - # Common credentials include: credentials: - env_var_name: REDIS_URL secret_name: redis-url-secret secret_key: url - # - env_var_name: LITELLM_API_KEY - # secret_name: litellm-api-key - # secret_key: api-key - - # Optional: Set Environment variables for running your agent locally as well - # as for deployment later on - env: {} - # LITELLM_API_KEY: "" - # OPENAI_BASE_URL: "" - # OPENAI_ORG_ID: "" + env: {} -# Deployment Configuration -# ----------------------- -# Configuration for deploying your agent to Kubernetes clusters deployment: - # Container image configuration image: - repository: "" # Update with your container registry - tag: "latest" # Default tag, should be versioned in production + repository: "" + tag: "latest" - imagePullSecrets: [] # Update with your image pull secret name - # - name: my-registry-secret + imagePullSecrets: [] - # Global deployment settings that apply to all clusters - # These can be overridden in cluster-specific environments (environments.yaml) global: - # Default replica count replicaCount: 1 - - # Default resource requirements resources: requests: cpu: "500m" memory: "1Gi" limits: cpu: "1000m" - memory: "2Gi" \ No newline at end of file + memory: "2Gi" diff --git a/examples/tutorials/10_async/10_temporal/130_langgraph/project/acp.py b/examples/tutorials/10_async/10_temporal/130_langgraph/project/acp.py index c01f8831c..7af9c5e68 100644 --- a/examples/tutorials/10_async/10_temporal/130_langgraph/project/acp.py +++ b/examples/tutorials/10_async/10_temporal/130_langgraph/project/acp.py @@ -1,19 +1,13 @@ -"""ACP server for the Temporal LangGraph agent. +"""ACP server for the Temporal harness LangGraph agent. -This file is intentionally thin. When ``acp_type="async"`` is combined with -``TemporalACPConfig(type="temporal", ...)``, FastACP auto-wires: +Follows the ``130_langgraph`` pattern: the Temporal ``LangGraphPlugin`` runs +graph nodes as Temporal activities. The agent logic lives in ``workflow.py`` +(the runtime) and ``graph.py`` (the LangGraph graph), executed by the Temporal +worker (``run_worker.py``), not by this HTTP process. - HTTP task/create → @workflow.run on the workflow class - HTTP task/event/send → @workflow.signal(SignalName.RECEIVE_EVENT) - HTTP task/cancel → workflow cancellation via the Temporal client - -so we don't define any handlers here. The agent logic lives in -``project/workflow.py`` (the runtime) and ``project/graph.py`` (the LangGraph -graph whose nodes run as Temporal activities), executed by the Temporal worker -(``project/run_worker.py``), not by this HTTP process. - -The ``LangGraphPlugin`` is registered here too so the Temporal client started -by FastACP shares the same graph registry as the worker. +The workflow uses ``emit_langgraph_messages`` to surface turn messages to +AgentEx. That helper is Temporal-specific and is not replaced by the unified +harness here (``UnifiedEmitter`` targets the non-Temporal async/sync channels). """ from __future__ import annotations @@ -33,10 +27,8 @@ acp = FastACP.create( acp_type="async", config=TemporalACPConfig( - # When deployed to the cluster, the Temporal address is set automatically. - # Locally we point at the Temporal service from docker compose. type="temporal", temporal_address=os.getenv("TEMPORAL_ADDRESS", "localhost:7233"), plugins=[LangGraphPlugin(graphs={GRAPH_NAME: build_graph()})], ), -) \ No newline at end of file +) diff --git a/examples/tutorials/10_async/10_temporal/130_langgraph/project/graph.py b/examples/tutorials/10_async/10_temporal/130_langgraph/project/graph.py index 0589aa9ba..7adba3ae4 100644 --- a/examples/tutorials/10_async/10_temporal/130_langgraph/project/graph.py +++ b/examples/tutorials/10_async/10_temporal/130_langgraph/project/graph.py @@ -1,24 +1,9 @@ """LangGraph graph for at130-langgraph — nodes run as Temporal activities. -The ``temporalio.contrib.langgraph`` plugin runs each node where its -``execute_in`` metadata says: the LLM ``agent`` node as a durable Temporal -**activity**, the ``tools`` node inline in the **workflow**. - - START → agent → (tool calls?) → tools → agent - → (no tool calls?) → END - -The router and tools are ``async`` so LangGraph awaits them directly — a sync -callable would be offloaded via ``run_in_executor``, which Temporal's workflow -event loop does not support. - -The in-workflow ``tools`` node is a plain ``async`` function rather than -LangGraph's ``ToolNode`` prebuilt on purpose. The plugin wraps an in-workflow -node in ``wrap_workflow``, whose closure captures the wrapped object. When that -object is itself a LangChain ``Runnable`` (as ``ToolNode`` is), LangGraph's -``compile()`` subgraph detection (``find_subgraph_pregel`` → -``get_function_nonlocals``) recurses through that wrapper without cycle -detection and never terminates, tripping Temporal's deadlock detector. A plain -function isn't a ``Runnable``, so compile stays trivial. +Identical in structure to ``130_langgraph/project/graph.py``. The graph +definition is not affected by the harness migration; only the agent naming +changes. The LLM ``agent`` node runs as a durable Temporal activity; +the ``tools`` node runs inline in the workflow. """ from __future__ import annotations @@ -40,10 +25,8 @@ from project.tools import TOOLS -# Look up tools by name for the in-workflow tools node. _TOOLS_BY_NAME = {tool.name: tool for tool in TOOLS} -# Name this graph is registered under in the LangGraphPlugin (acp.py / run_worker.py). GRAPH_NAME = "at130-langgraph" MODEL_NAME = "gpt-4o" SYSTEM_PROMPT = """You are a helpful AI assistant with access to tools. @@ -62,37 +45,27 @@ async def agent_node(state: AgentState) -> dict[str, Any]: llm = ChatOpenAI(model=MODEL_NAME).bind_tools(TOOLS) messages = state["messages"] if not messages or not isinstance(messages[0], SystemMessage): - system = SystemMessage( - content=SYSTEM_PROMPT.format(timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S")) - ) + system = SystemMessage(content=SYSTEM_PROMPT.format(timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S"))) messages = [system, *messages] return {"messages": [await llm.ainvoke(messages)]} async def tools_node(state: AgentState) -> dict[str, Any]: - """Run the tool calls the model requested. Runs inline in the workflow. - - A plain ``async`` function (not LangGraph's ``ToolNode``) — see the module - docstring for why a ``Runnable`` tools node can't be compiled here. - """ + """Run the tool calls the model requested. Runs inline in the workflow.""" last = state["messages"][-1] results: list[Any] = [] for call in getattr(last, "tool_calls", None) or []: tool = _TOOLS_BY_NAME.get(call["name"]) - # Mirror ToolNode: surface an unknown/hallucinated tool name as an error - # ToolMessage so the graph keeps running instead of crashing the node. if tool is None: output = f"Error: unknown tool {call['name']!r}. Available: {list(_TOOLS_BY_NAME)}" else: output = await tool.ainvoke(call["args"]) - results.append( - ToolMessage(content=str(output), tool_call_id=call["id"], name=call["name"]) - ) + results.append(ToolMessage(content=str(output), tool_call_id=call["id"], name=call["name"])) return {"messages": results} async def route_after_agent(state: AgentState) -> str: - """Go to the tools node if the model requested tools, else finish (async router).""" + """Go to the tools node if the model requested tools, else finish.""" last = state["messages"][-1] return "tools" if getattr(last, "tool_calls", None) else END diff --git a/examples/tutorials/10_async/10_temporal/130_langgraph/project/run_worker.py b/examples/tutorials/10_async/10_temporal/130_langgraph/project/run_worker.py index 7040f560b..4b31bf396 100644 --- a/examples/tutorials/10_async/10_temporal/130_langgraph/project/run_worker.py +++ b/examples/tutorials/10_async/10_temporal/130_langgraph/project/run_worker.py @@ -5,8 +5,7 @@ The ``LangGraphPlugin`` is given the graph registry (``{ GRAPH_NAME: graph }``). At runtime it turns the graph's ``execute_in="activity"`` nodes into Temporal -activities and registers them on the worker automatically — so we don't have -to enumerate node activities by hand. +activities and registers them on the worker automatically. """ import asyncio @@ -14,7 +13,7 @@ from temporalio.contrib.langgraph import LangGraphPlugin from project.graph import GRAPH_NAME, build_graph -from project.workflow import At130LanggraphWorkflow +from project.workflow import AtHarnessLanggraphWorkflow from agentex.lib.utils.debug import setup_debug_if_enabled from agentex.lib.utils.logging import make_logger from agentex.lib.environment_variables import EnvironmentVariables @@ -32,9 +31,6 @@ async def main(): if task_queue_name is None: raise ValueError("WORKFLOW_TASK_QUEUE is not set") - # AgentexWorker runs workflows with an unsandboxed runner, so importing - # langchain/langgraph inside the workflow + nodes is fine. The LangGraph - # plugin registers the graph's activity-nodes for us. worker = AgentexWorker( task_queue=task_queue_name, plugins=[LangGraphPlugin(graphs={GRAPH_NAME: build_graph()})], @@ -42,9 +38,9 @@ async def main(): await worker.run( activities=get_all_activities(), - workflow=At130LanggraphWorkflow, + workflow=AtHarnessLanggraphWorkflow, ) if __name__ == "__main__": - asyncio.run(main()) \ No newline at end of file + asyncio.run(main()) diff --git a/examples/tutorials/10_async/10_temporal/130_langgraph/project/tools.py b/examples/tutorials/10_async/10_temporal/130_langgraph/project/tools.py index 20b7185ee..e7220016e 100644 --- a/examples/tutorials/10_async/10_temporal/130_langgraph/project/tools.py +++ b/examples/tutorials/10_async/10_temporal/130_langgraph/project/tools.py @@ -1,20 +1,37 @@ -"""Tools for the LangGraph agent. +"""Tool definitions for the 130_langgraph temporal agent.""" -Tools are ``async`` so the in-workflow tool node can await them directly -(a sync tool would be offloaded via ``run_in_executor``, which Temporal's -workflow event loop does not allow). -""" +from langchain_core.tools import Tool -from __future__ import annotations -from langchain_core.tools import tool +def get_weather(city: str) -> str: + """Get the current weather for a city. + Args: + city: The name of the city to get weather for. -@tool -async def get_weather(city: str) -> str: - """Get the current weather for a city.""" - # TODO: replace with a real weather API call. + Returns: + A string describing the weather conditions. + """ return f"The weather in {city} is sunny and 72°F" -TOOLS = [get_weather] +async def aget_weather(city: str) -> str: + """Native async tool entrypoint. + + ``tools_node`` runs inline in the Temporal workflow and invokes tools via + ``tool.ainvoke``. A sync-only tool forces LangChain to bridge through + ``run_in_executor`` (a thread pool), which the deterministic Temporal + workflow event loop forbids (``NotImplementedError``). Providing a real + coroutine keeps tool execution on the workflow loop. + """ + return get_weather(city) + + +weather_tool = Tool( + name="get_weather", + func=get_weather, + coroutine=aget_weather, + description="Get the current weather for a city. Input should be a city name.", +) + +TOOLS = [weather_tool] diff --git a/examples/tutorials/10_async/10_temporal/130_langgraph/project/workflow.py b/examples/tutorials/10_async/10_temporal/130_langgraph/project/workflow.py index a50670251..b9224ca00 100644 --- a/examples/tutorials/10_async/10_temporal/130_langgraph/project/workflow.py +++ b/examples/tutorials/10_async/10_temporal/130_langgraph/project/workflow.py @@ -1,4 +1,4 @@ -"""Temporal workflow for at130-langgraph — Temporal as the LangGraph runtime. +"""Temporal workflow for at130-langgraph. Each turn the workflow runs the LangGraph graph (``project/graph.py``) via the ``temporalio.contrib.langgraph`` plugin. The plugin runs the LLM ``agent`` node @@ -37,7 +37,7 @@ @workflow.defn(name=environment_variables.WORKFLOW_NAME) -class At130LanggraphWorkflow(BaseWorkflow): +class AtHarnessLanggraphWorkflow(BaseWorkflow): """Runs the LangGraph agent each turn; its nodes run as Temporal activities.""" def __init__(self) -> None: @@ -56,10 +56,7 @@ async def on_task_event_send(self, params: SendEventParams) -> None: result = await compiled.ainvoke({"messages": self._messages}) self._messages = result["messages"] - # Surface the messages this turn produced (tool calls, results, final - # text) to the AgentEx UI. The SDK helper does the LangGraph→AgentEx - # message conversion. - await emit_langgraph_messages(self._messages[self._emitted:], params.task.id) + await emit_langgraph_messages(self._messages[self._emitted :], params.task.id) self._emitted = len(self._messages) @workflow.signal diff --git a/examples/tutorials/10_async/10_temporal/130_langgraph/pyproject.toml b/examples/tutorials/10_async/10_temporal/130_langgraph/pyproject.toml index e22905de4..6d2262761 100644 --- a/examples/tutorials/10_async/10_temporal/130_langgraph/pyproject.toml +++ b/examples/tutorials/10_async/10_temporal/130_langgraph/pyproject.toml @@ -5,13 +5,11 @@ build-backend = "hatchling.build" [project] name = "at130-langgraph" version = "0.1.0" -description = "A Temporal-backed LangGraph agent whose nodes run as Temporal activities" +description = "A Temporal-backed LangGraph agent (harness variant) whose nodes run as Temporal activities" requires-python = ">=3.12" dependencies = [ "agentex-sdk", "scale-gp", - # Temporal with the LangGraph plugin (temporalio.contrib.langgraph), - # which runs LangGraph nodes as Temporal activities. Needs >=1.27.0. "temporalio[langgraph]>=1.27.0", "langchain-openai", "langchain-core", @@ -39,4 +37,4 @@ target-version = ['py312'] [tool.isort] profile = "black" -line_length = 88 \ No newline at end of file +line_length = 88 diff --git a/examples/tutorials/10_async/10_temporal/130_langgraph/tests/test_agent.py b/examples/tutorials/10_async/10_temporal/130_langgraph/tests/test_agent.py index b798f568f..f2292389f 100644 --- a/examples/tutorials/10_async/10_temporal/130_langgraph/tests/test_agent.py +++ b/examples/tutorials/10_async/10_temporal/130_langgraph/tests/test_agent.py @@ -1,4 +1,4 @@ -"""Integration tests for the Temporal + LangGraph agent (live agent required). +"""Integration tests for the Temporal harness LangGraph agent (live agent required). These drive a *running* agent over the AgentEx API and verify that: - the agent sends a welcome message on task creation, @@ -6,9 +6,6 @@ (proving the LLM node ran as a Temporal activity and the tool node ran), - the final answer reflects the tool output. -For fast, network-free coverage of the graph + human-in-the-loop logic, see -``test_graph_temporal.py``. - To run: 1. Start the agent (worker + ACP server): ``agentex agents run --manifest manifest.yaml`` 2. Set AGENTEX_API_BASE_URL if not using the default @@ -60,29 +57,18 @@ class TestNonStreamingEvents: @pytest.mark.asyncio async def test_send_event_and_poll(self, client: AsyncAgentex, agent_id: str): """Create a task, ask about weather, verify the tool round-trip.""" - task_response = await client.agents.create_task( - agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex) - ) + task_response = await client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex)) task = task_response.result assert task is not None - # Wait for the welcome message from on_task_create task_creation_found = False - async for message in poll_messages( - client=client, task_id=task.id, timeout=30, sleep_interval=1.0 - ): + async for message in poll_messages(client=client, task_id=task.id, timeout=30, sleep_interval=1.0): assert isinstance(message, TaskMessage) - if ( - message.content - and message.content.type == "text" - and message.content.author == "agent" - ): + if message.content and message.content.type == "text" and message.content.author == "agent": task_creation_found = True break assert task_creation_found, "Task creation welcome message not found" - # Ask about weather — the agent (LangGraph node, as a Temporal activity) - # should call get_weather. seen_tool_request = False seen_tool_response = False final_message = None @@ -101,11 +87,7 @@ async def test_send_event_and_poll(self, client: AsyncAgentex, agent_id: str): if message.content and message.content.type == "tool_response": seen_tool_response = True - if ( - message.content - and message.content.type == "text" - and message.content.author == "agent" - ): + if message.content and message.content.type == "text" and message.content.author == "agent": final_message = message content_length = len(getattr(message.content, "content", "") or "") if getattr(message, "streaming_status", None) in (None, "DONE") and content_length > 0: @@ -115,11 +97,8 @@ async def test_send_event_and_poll(self, client: AsyncAgentex, agent_id: str): assert seen_tool_request, "Expected a tool_request (agent calling get_weather)" assert seen_tool_response, "Expected a tool_response (get_weather result)" assert final_message is not None, "Expected a final agent text message" - final_text = ( - getattr(final_message.content, "content", None) if final_message.content else None - ) + final_text = getattr(final_message.content, "content", None) if final_message.content else None assert isinstance(final_text, str) and len(final_text) > 0 - # get_weather always returns "72°F" — the response should mention it. assert "72" in final_text, "Expected weather response to mention 72°F" diff --git a/examples/tutorials/10_async/10_temporal/130_langgraph/tests/test_graph_temporal.py b/examples/tutorials/10_async/10_temporal/130_langgraph/tests/test_graph_temporal.py deleted file mode 100644 index 485b896f6..000000000 --- a/examples/tutorials/10_async/10_temporal/130_langgraph/tests/test_graph_temporal.py +++ /dev/null @@ -1,105 +0,0 @@ -"""Tests for the Temporal + LangGraph agent's graph. - -Two layers: - -1. ``TestGraphLogic`` — hermetic, no network. Compiles the actual shipped - graph (``project/graph.py``) with a deterministic stub model and runs the - ReAct loop (agent → tools → agent) to completion. - -2. ``TestTemporalPlugin`` — end-to-end through the real Temporal LangGraph - plugin on a local Temporal server, proving the LLM node runs as an activity - and the tool node in the workflow. Needs a real model, so it is skipped - unless ``LITELLM_API_KEY`` (or ``OPENAI_API_KEY``) is set. - -Run from the agent's own (uv) environment: pytest tests/test_graph_temporal.py -v -""" - -from __future__ import annotations - -import os -import uuid - -import pytest - -pytest.importorskip("langgraph") -pytest.importorskip("temporalio.contrib.langgraph") - -import project.graph as graph_module -from temporalio import workflow -from project.graph import GRAPH_NAME, build_graph -from langchain_core.messages import AIMessage, ToolMessage -from temporalio.contrib.langgraph import graph as lg_graph - - -@workflow.defn -class _DriverWorkflow: - """Module-level driver workflow (Temporal forbids local workflow classes).""" - - @workflow.run - async def run(self, message: str) -> str: - compiled = lg_graph(GRAPH_NAME).compile() - result = await compiled.ainvoke({"messages": [{"role": "user", "content": message}]}) - return result["messages"][-1].content - - -class _StubModel: - """Deterministic stand-in for ``ChatOpenAI(...).bind_tools(...)``. - - First call → emit a tool call for ``get_weather``; once a ToolMessage is in - the history → emit a plain text answer. Drives the full ReAct loop offline. - """ - - def bind_tools(self, _tools): - return self - - async def ainvoke(self, messages): - if any(isinstance(m, ToolMessage) for m in messages): - return AIMessage(content="All done — the tool has run.") - return AIMessage( - content="", - tool_calls=[{"id": "call_1", "name": "get_weather", "args": {"city": "Denver"}}], - ) - - -class TestGraphLogic: - """Hermetic test of the ReAct loop, no network.""" - - @pytest.mark.asyncio - async def test_react_loop_runs_tool(self, monkeypatch): - monkeypatch.setattr(graph_module, "ChatOpenAI", lambda *_a, **_k: _StubModel()) - compiled = build_graph().compile() - result = await compiled.ainvoke({"messages": [{"role": "user", "content": "go"}]}) - - tool_outputs = [m.content for m in result["messages"] if isinstance(m, ToolMessage)] - assert any("sunny" in o for o in tool_outputs) - assert "done" in result["messages"][-1].content.lower() - - -@pytest.mark.skipif( - not (os.environ.get("LITELLM_API_KEY") or os.environ.get("OPENAI_API_KEY")), - reason="needs a real model (set LITELLM_API_KEY) for the live Temporal run", -) -class TestTemporalPlugin: - """End-to-end through the real Temporal LangGraph plugin on a local server.""" - - @pytest.mark.asyncio - async def test_nodes_run_as_activities_via_plugin(self): - from temporalio.worker import Worker, UnsandboxedWorkflowRunner - from temporalio.testing import WorkflowEnvironment - from temporalio.contrib.langgraph import LangGraphPlugin - - plugin = LangGraphPlugin(graphs={GRAPH_NAME: build_graph()}) - async with await WorkflowEnvironment.start_local(plugins=[plugin]) as env: - async with Worker( - env.client, - task_queue="tq", - workflows=[_DriverWorkflow], - workflow_runner=UnsandboxedWorkflowRunner(), - ): - out = await env.client.execute_workflow( - _DriverWorkflow.run, - "What's the weather in Denver? Use the get_weather tool.", - id=f"wf-{uuid.uuid4()}", - task_queue="tq", - ) - assert "denver" in out.lower() diff --git a/examples/tutorials/10_async/10_temporal/140_harness_openai/Dockerfile b/examples/tutorials/10_async/10_temporal/140_harness_openai/Dockerfile deleted file mode 100644 index c107e3269..000000000 --- a/examples/tutorials/10_async/10_temporal/140_harness_openai/Dockerfile +++ /dev/null @@ -1,43 +0,0 @@ -# syntax=docker/dockerfile:1.3 -FROM python:3.12-slim -COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ - -# Install system dependencies -RUN apt-get update && apt-get install -y \ - htop \ - vim \ - curl \ - tar \ - python3-dev \ - postgresql-client \ - build-essential \ - libpq-dev \ - gcc \ - cmake \ - netcat-openbsd \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - -RUN uv pip install --system --upgrade pip setuptools wheel - -ENV UV_HTTP_TIMEOUT=1000 - -COPY 10_async/10_temporal/140_harness_openai/pyproject.toml /app/140_harness_openai/pyproject.toml -COPY 10_async/10_temporal/140_harness_openai/README.md /app/140_harness_openai/README.md - -WORKDIR /app/140_harness_openai - -COPY 10_async/10_temporal/140_harness_openai/project /app/140_harness_openai/project -COPY 10_async/10_temporal/140_harness_openai/tests /app/140_harness_openai/tests -COPY test_utils /app/test_utils - -RUN uv pip install --system .[dev] - -ENV PYTHONPATH=/app - -ENV AGENT_NAME=at140-harness-openai - -CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] - -# When we deploy the worker, we will replace the CMD with the following -# CMD ["python", "-m", "run_worker"] diff --git a/examples/tutorials/10_async/10_temporal/140_harness_openai/project/__init__.py b/examples/tutorials/10_async/10_temporal/140_harness_openai/project/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/examples/tutorials/10_async/00_base/harness_pydantic_ai/.dockerignore b/examples/tutorials/10_async/10_temporal/150_codex/.dockerignore similarity index 100% rename from examples/tutorials/10_async/00_base/harness_pydantic_ai/.dockerignore rename to examples/tutorials/10_async/10_temporal/150_codex/.dockerignore diff --git a/examples/tutorials/10_async/10_temporal/harness_codex/Dockerfile b/examples/tutorials/10_async/10_temporal/150_codex/Dockerfile similarity index 66% rename from examples/tutorials/10_async/10_temporal/harness_codex/Dockerfile rename to examples/tutorials/10_async/10_temporal/150_codex/Dockerfile index e2f8807fd..9561548c4 100644 --- a/examples/tutorials/10_async/10_temporal/harness_codex/Dockerfile +++ b/examples/tutorials/10_async/10_temporal/150_codex/Dockerfile @@ -22,19 +22,19 @@ RUN uv pip install --system --upgrade pip setuptools wheel ENV UV_HTTP_TIMEOUT=1000 -COPY 10_async/10_temporal/harness_codex/pyproject.toml /app/harness_codex/pyproject.toml -COPY 10_async/10_temporal/harness_codex/README.md /app/harness_codex/README.md +COPY 10_async/10_temporal/150_codex/pyproject.toml /app/150_codex/pyproject.toml +COPY 10_async/10_temporal/150_codex/README.md /app/150_codex/README.md -WORKDIR /app/harness_codex +WORKDIR /app/150_codex -COPY 10_async/10_temporal/harness_codex/project /app/harness_codex/project -COPY 10_async/10_temporal/harness_codex/tests /app/harness_codex/tests +COPY 10_async/10_temporal/150_codex/project /app/150_codex/project +COPY 10_async/10_temporal/150_codex/tests /app/150_codex/tests COPY test_utils /app/test_utils RUN uv pip install --system .[dev] ENV PYTHONPATH=/app -ENV AGENT_NAME=at-harness-codex +ENV AGENT_NAME=at150-codex CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] diff --git a/examples/tutorials/10_async/10_temporal/harness_codex/README.md b/examples/tutorials/10_async/10_temporal/150_codex/README.md similarity index 95% rename from examples/tutorials/10_async/10_temporal/harness_codex/README.md rename to examples/tutorials/10_async/10_temporal/150_codex/README.md index 4f9b76955..498b81374 100644 --- a/examples/tutorials/10_async/10_temporal/harness_codex/README.md +++ b/examples/tutorials/10_async/10_temporal/150_codex/README.md @@ -1,4 +1,4 @@ -# harness_codex (Temporal) +# 150_codex (Temporal) Tutorial agent demonstrating the `convert_codex_to_agentex_events` tap, `CodexTurn`, and `UnifiedEmitter` for a **Temporal-durable** async ACP agent. @@ -36,7 +36,7 @@ Live runs require: ```bash cd /path/to/scale-agentex-python -uv run --all-packages --all-extras pytest examples/tutorials/10_async/10_temporal/harness_codex/tests/test_agent.py -q +uv run --all-packages --all-extras pytest examples/tutorials/10_async/10_temporal/150_codex/tests/test_agent.py -q ``` ## Running live integration tests diff --git a/examples/tutorials/10_async/10_temporal/harness_codex/conftest.py b/examples/tutorials/10_async/10_temporal/150_codex/conftest.py similarity index 72% rename from examples/tutorials/10_async/10_temporal/harness_codex/conftest.py rename to examples/tutorials/10_async/10_temporal/150_codex/conftest.py index 4ae6ce61a..6370f278d 100644 --- a/examples/tutorials/10_async/10_temporal/harness_codex/conftest.py +++ b/examples/tutorials/10_async/10_temporal/150_codex/conftest.py @@ -11,7 +11,7 @@ # AGENT_NAME must match the manifest's agent name: the live test queries the # server by this name, and project.workflow reads it at import time. -os.environ.setdefault("AGENT_NAME", "at-harness-codex") +os.environ.setdefault("AGENT_NAME", "at150-codex") os.environ.setdefault("ACP_URL", "http://localhost:8000") -os.environ.setdefault("WORKFLOW_NAME", "at-harness-codex") -os.environ.setdefault("WORKFLOW_TASK_QUEUE", "at_harness_codex_queue") +os.environ.setdefault("WORKFLOW_NAME", "at150-codex") +os.environ.setdefault("WORKFLOW_TASK_QUEUE", "at150_codex_queue") diff --git a/examples/tutorials/10_async/10_temporal/harness_codex/manifest.yaml b/examples/tutorials/10_async/10_temporal/150_codex/manifest.yaml similarity index 80% rename from examples/tutorials/10_async/10_temporal/harness_codex/manifest.yaml rename to examples/tutorials/10_async/10_temporal/150_codex/manifest.yaml index 3bc21dccc..d64bdfad0 100644 --- a/examples/tutorials/10_async/10_temporal/harness_codex/manifest.yaml +++ b/examples/tutorials/10_async/10_temporal/150_codex/manifest.yaml @@ -2,10 +2,10 @@ build: context: root: ../../../ include_paths: - - 10_async/10_temporal/harness_codex + - 10_async/10_temporal/150_codex - test_utils - dockerfile: 10_async/10_temporal/harness_codex/Dockerfile - dockerignore: 10_async/10_temporal/harness_codex/.dockerignore + dockerfile: 10_async/10_temporal/150_codex/Dockerfile + dockerignore: 10_async/10_temporal/150_codex/.dockerignore local_development: agent: @@ -17,14 +17,14 @@ local_development: agent: acp_type: async - name: at-harness-codex + name: at150-codex description: Temporal tutorial agent driving the unified harness surface via local codex CLI subprocess temporal: enabled: true workflows: - - name: at-harness-codex - queue_name: at_harness_codex_queue + - name: at150-codex + queue_name: at150_codex_queue credentials: - env_var_name: OPENAI_API_KEY @@ -50,7 +50,7 @@ deployment: global: agent: - name: "at-harness-codex" + name: "at150-codex" description: "Temporal tutorial agent driving the unified harness surface via local codex CLI subprocess" replicaCount: 1 resources: diff --git a/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/project/__init__.py b/examples/tutorials/10_async/10_temporal/150_codex/project/__init__.py similarity index 100% rename from examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/project/__init__.py rename to examples/tutorials/10_async/10_temporal/150_codex/project/__init__.py diff --git a/examples/tutorials/10_async/10_temporal/harness_codex/project/acp.py b/examples/tutorials/10_async/10_temporal/150_codex/project/acp.py similarity index 100% rename from examples/tutorials/10_async/10_temporal/harness_codex/project/acp.py rename to examples/tutorials/10_async/10_temporal/150_codex/project/acp.py diff --git a/examples/tutorials/10_async/10_temporal/harness_codex/project/activities.py b/examples/tutorials/10_async/10_temporal/150_codex/project/activities.py similarity index 100% rename from examples/tutorials/10_async/10_temporal/harness_codex/project/activities.py rename to examples/tutorials/10_async/10_temporal/150_codex/project/activities.py diff --git a/examples/tutorials/10_async/10_temporal/harness_codex/project/run_worker.py b/examples/tutorials/10_async/10_temporal/150_codex/project/run_worker.py similarity index 100% rename from examples/tutorials/10_async/10_temporal/harness_codex/project/run_worker.py rename to examples/tutorials/10_async/10_temporal/150_codex/project/run_worker.py diff --git a/examples/tutorials/10_async/10_temporal/harness_codex/project/workflow.py b/examples/tutorials/10_async/10_temporal/150_codex/project/workflow.py similarity index 100% rename from examples/tutorials/10_async/10_temporal/harness_codex/project/workflow.py rename to examples/tutorials/10_async/10_temporal/150_codex/project/workflow.py diff --git a/examples/tutorials/10_async/10_temporal/harness_codex/pyproject.toml b/examples/tutorials/10_async/10_temporal/150_codex/pyproject.toml similarity index 96% rename from examples/tutorials/10_async/10_temporal/harness_codex/pyproject.toml rename to examples/tutorials/10_async/10_temporal/150_codex/pyproject.toml index c4d67d285..7e1d6250f 100644 --- a/examples/tutorials/10_async/10_temporal/harness_codex/pyproject.toml +++ b/examples/tutorials/10_async/10_temporal/150_codex/pyproject.toml @@ -3,7 +3,7 @@ requires = ["hatchling"] build-backend = "hatchling.build" [project] -name = "at-harness-codex" +name = "at150-codex" version = "0.1.0" description = "Temporal tutorial agent driving the unified harness surface via local codex CLI subprocess" readme = "README.md" diff --git a/examples/tutorials/10_async/10_temporal/harness_codex/tests/test_agent.py b/examples/tutorials/10_async/10_temporal/150_codex/tests/test_agent.py similarity index 99% rename from examples/tutorials/10_async/10_temporal/harness_codex/tests/test_agent.py rename to examples/tutorials/10_async/10_temporal/150_codex/tests/test_agent.py index 2066b35b1..fa6c66083 100644 --- a/examples/tutorials/10_async/10_temporal/harness_codex/tests/test_agent.py +++ b/examples/tutorials/10_async/10_temporal/150_codex/tests/test_agent.py @@ -213,7 +213,7 @@ async def _auto_send(_self, turn, *_a, **_kw): LIVE = os.environ.get("CODEX_LIVE_TESTS", "") == "1" AGENTEX_API_BASE_URL = os.environ.get("AGENTEX_API_BASE_URL", "http://localhost:5003") -AGENT_NAME = os.environ.get("AGENT_NAME", "at-harness-codex") +AGENT_NAME = os.environ.get("AGENT_NAME", "at150-codex") @pytest.mark.skipif( diff --git a/examples/tutorials/10_async/10_temporal/harness_codex/project/__init__.py b/examples/tutorials/10_async/10_temporal/harness_codex/project/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/examples/tutorials/10_async/10_temporal/harness_langgraph/README.md b/examples/tutorials/10_async/10_temporal/harness_langgraph/README.md deleted file mode 100644 index 4df6969f1..000000000 --- a/examples/tutorials/10_async/10_temporal/harness_langgraph/README.md +++ /dev/null @@ -1,53 +0,0 @@ -# Tutorial: Temporal Harness LangGraph Agent - -This tutorial demonstrates how to build a **Temporal-backed** LangGraph agent on -AgentEx, following the ``130_langgraph`` pattern. The agent's LLM node runs as a -durable Temporal activity; the tools node runs inline in the workflow. - -This agent is named ``at-harness-langgraph`` to distinguish it from -``at130-langgraph`` (the bespoke reference). The graph and workflow structure are -identical; only the agent name changes. - -## Key Concepts - -### Temporal + LangGraph - -The ``LangGraphPlugin`` from ``temporalio.contrib.langgraph`` turns annotated graph -nodes into Temporal activities or inline workflow callables: - -- `agent` node: `execute_in="activity"` (durable, retryable LLM call) -- `tools` node: `execute_in="workflow"` (inline, fast tool execution) - -### Message surfacing - -After each turn, ``emit_langgraph_messages`` converts the new LangGraph messages -(tool requests, tool responses, final text) into AgentEx ``TaskMessage`` objects -and posts them to the task's message stream. - -This is the Temporal-specific path. The non-Temporal async/sync channels use -``UnifiedEmitter.auto_send_turn`` / ``UnifiedEmitter.yield_turn`` with -``LangGraphTurn`` instead. - -## Files - -| File | Description | -|------|-------------| -| `project/acp.py` | ACP server (Temporal config, LangGraphPlugin) | -| `project/graph.py` | LangGraph graph (agent + tools nodes) | -| `project/workflow.py` | Temporal workflow (signal handlers, emit_langgraph_messages) | -| `project/run_worker.py` | Temporal worker runner | -| `project/tools.py` | Tool definitions (weather example) | -| `tests/test_agent.py` | Integration tests | -| `manifest.yaml` | Agent configuration (name: at-harness-langgraph) | - -## Running Locally - -```bash -agentex agents run -``` - -## Running Tests - -```bash -pytest tests/test_agent.py -v -``` diff --git a/examples/tutorials/10_async/10_temporal/harness_langgraph/manifest.yaml b/examples/tutorials/10_async/10_temporal/harness_langgraph/manifest.yaml deleted file mode 100644 index 596d38eb4..000000000 --- a/examples/tutorials/10_async/10_temporal/harness_langgraph/manifest.yaml +++ /dev/null @@ -1,51 +0,0 @@ -build: - context: - root: ../../../ - include_paths: - - 10_async/10_temporal/harness_langgraph - - test_utils - dockerfile: 10_async/10_temporal/harness_langgraph/Dockerfile - dockerignore: 10_async/10_temporal/harness_langgraph/.dockerignore - -local_development: - agent: - port: 8000 - host_address: host.docker.internal - paths: - acp: project/acp.py - worker: project/run_worker.py - -agent: - acp_type: async - name: at-harness-langgraph - description: "A Temporal-backed LangGraph agent (harness variant) whose nodes run as Temporal activities" - - temporal: - enabled: true - workflows: - - name: at-harness-langgraph - queue_name: at_harness_langgraph_queue - - credentials: - - env_var_name: REDIS_URL - secret_name: redis-url-secret - secret_key: url - - env: {} - -deployment: - image: - repository: "" - tag: "latest" - - imagePullSecrets: [] - - global: - replicaCount: 1 - resources: - requests: - cpu: "500m" - memory: "1Gi" - limits: - cpu: "1000m" - memory: "2Gi" diff --git a/examples/tutorials/10_async/10_temporal/harness_langgraph/project/__init__.py b/examples/tutorials/10_async/10_temporal/harness_langgraph/project/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/examples/tutorials/10_async/10_temporal/harness_langgraph/project/acp.py b/examples/tutorials/10_async/10_temporal/harness_langgraph/project/acp.py deleted file mode 100644 index 7af9c5e68..000000000 --- a/examples/tutorials/10_async/10_temporal/harness_langgraph/project/acp.py +++ /dev/null @@ -1,34 +0,0 @@ -"""ACP server for the Temporal harness LangGraph agent. - -Follows the ``130_langgraph`` pattern: the Temporal ``LangGraphPlugin`` runs -graph nodes as Temporal activities. The agent logic lives in ``workflow.py`` -(the runtime) and ``graph.py`` (the LangGraph graph), executed by the Temporal -worker (``run_worker.py``), not by this HTTP process. - -The workflow uses ``emit_langgraph_messages`` to surface turn messages to -AgentEx. That helper is Temporal-specific and is not replaced by the unified -harness here (``UnifiedEmitter`` targets the non-Temporal async/sync channels). -""" - -from __future__ import annotations - -import os - -from dotenv import load_dotenv - -load_dotenv() - -from temporalio.contrib.langgraph import LangGraphPlugin - -from project.graph import GRAPH_NAME, build_graph -from agentex.lib.types.fastacp import TemporalACPConfig -from agentex.lib.sdk.fastacp.fastacp import FastACP - -acp = FastACP.create( - acp_type="async", - config=TemporalACPConfig( - type="temporal", - temporal_address=os.getenv("TEMPORAL_ADDRESS", "localhost:7233"), - plugins=[LangGraphPlugin(graphs={GRAPH_NAME: build_graph()})], - ), -) diff --git a/examples/tutorials/10_async/10_temporal/harness_langgraph/project/graph.py b/examples/tutorials/10_async/10_temporal/harness_langgraph/project/graph.py deleted file mode 100644 index ce9c2b520..000000000 --- a/examples/tutorials/10_async/10_temporal/harness_langgraph/project/graph.py +++ /dev/null @@ -1,85 +0,0 @@ -"""LangGraph graph for at-harness-langgraph — nodes run as Temporal activities. - -Identical in structure to ``130_langgraph/project/graph.py``. The graph -definition is not affected by the harness migration; only the agent naming -changes. The LLM ``agent`` node runs as a durable Temporal activity; -the ``tools`` node runs inline in the workflow. -""" - -from __future__ import annotations - -import os -from typing import Any, Annotated -from datetime import datetime, timedelta - -_litellm_key = os.environ.get("LITELLM_API_KEY") -if _litellm_key: - os.environ.setdefault("OPENAI_API_KEY", _litellm_key) - -from typing_extensions import TypedDict - -from langgraph.graph import END, START, StateGraph -from langchain_openai import ChatOpenAI -from langchain_core.messages import ToolMessage, SystemMessage -from langgraph.graph.message import add_messages - -from project.tools import TOOLS - -_TOOLS_BY_NAME = {tool.name: tool for tool in TOOLS} - -GRAPH_NAME = "at-harness-langgraph" -MODEL_NAME = "gpt-4o" -SYSTEM_PROMPT = """You are a helpful AI assistant with access to tools. - -Current date and time: {timestamp} - -Be concise and use tools when they help answer the question.""" - - -class AgentState(TypedDict): - messages: Annotated[list[Any], add_messages] - - -async def agent_node(state: AgentState) -> dict[str, Any]: - """The 'agent' node — one LLM call. Runs as a durable Temporal activity.""" - llm = ChatOpenAI(model=MODEL_NAME).bind_tools(TOOLS) - messages = state["messages"] - if not messages or not isinstance(messages[0], SystemMessage): - system = SystemMessage(content=SYSTEM_PROMPT.format(timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S"))) - messages = [system, *messages] - return {"messages": [await llm.ainvoke(messages)]} - - -async def tools_node(state: AgentState) -> dict[str, Any]: - """Run the tool calls the model requested. Runs inline in the workflow.""" - last = state["messages"][-1] - results: list[Any] = [] - for call in getattr(last, "tool_calls", None) or []: - tool = _TOOLS_BY_NAME.get(call["name"]) - if tool is None: - output = f"Error: unknown tool {call['name']!r}. Available: {list(_TOOLS_BY_NAME)}" - else: - output = await tool.ainvoke(call["args"]) - results.append(ToolMessage(content=str(output), tool_call_id=call["id"], name=call["name"])) - return {"messages": results} - - -async def route_after_agent(state: AgentState) -> str: - """Go to the tools node if the model requested tools, else finish.""" - last = state["messages"][-1] - return "tools" if getattr(last, "tool_calls", None) else END - - -def build_graph() -> StateGraph: - """Build the agent graph; the LLM node runs as an activity, tools in the workflow.""" - builder = StateGraph(AgentState) - builder.add_node( - "agent", - agent_node, - metadata={"execute_in": "activity", "start_to_close_timeout": timedelta(minutes=5)}, - ) - builder.add_node("tools", tools_node, metadata={"execute_in": "workflow"}) - builder.add_edge(START, "agent") - builder.add_conditional_edges("agent", route_after_agent, {"tools": "tools", END: END}) - builder.add_edge("tools", "agent") - return builder diff --git a/examples/tutorials/10_async/10_temporal/harness_langgraph/project/tools.py b/examples/tutorials/10_async/10_temporal/harness_langgraph/project/tools.py deleted file mode 100644 index 10943c9d2..000000000 --- a/examples/tutorials/10_async/10_temporal/harness_langgraph/project/tools.py +++ /dev/null @@ -1,37 +0,0 @@ -"""Tool definitions for the harness_langgraph temporal agent.""" - -from langchain_core.tools import Tool - - -def get_weather(city: str) -> str: - """Get the current weather for a city. - - Args: - city: The name of the city to get weather for. - - Returns: - A string describing the weather conditions. - """ - return f"The weather in {city} is sunny and 72°F" - - -async def aget_weather(city: str) -> str: - """Native async tool entrypoint. - - ``tools_node`` runs inline in the Temporal workflow and invokes tools via - ``tool.ainvoke``. A sync-only tool forces LangChain to bridge through - ``run_in_executor`` (a thread pool), which the deterministic Temporal - workflow event loop forbids (``NotImplementedError``). Providing a real - coroutine keeps tool execution on the workflow loop. - """ - return get_weather(city) - - -weather_tool = Tool( - name="get_weather", - func=get_weather, - coroutine=aget_weather, - description="Get the current weather for a city. Input should be a city name.", -) - -TOOLS = [weather_tool] diff --git a/examples/tutorials/10_async/10_temporal/harness_langgraph/project/workflow.py b/examples/tutorials/10_async/10_temporal/harness_langgraph/project/workflow.py deleted file mode 100644 index 4125dca39..000000000 --- a/examples/tutorials/10_async/10_temporal/harness_langgraph/project/workflow.py +++ /dev/null @@ -1,80 +0,0 @@ -"""Temporal workflow for at-harness-langgraph. - -Each turn the workflow runs the LangGraph graph (``project/graph.py``) via the -``temporalio.contrib.langgraph`` plugin. The plugin runs the LLM ``agent`` node -as a durable Temporal activity and the ``tools`` node inline in the workflow. - -Multi-turn memory is kept on the workflow instance (``self._messages``) — it's -durable and replay-safe for free, so no checkpoint database is needed. -""" - -from __future__ import annotations - -import json -from typing import Any - -from temporalio import workflow -from temporalio.contrib.langgraph import graph as lg_graph - -from agentex.lib import adk -from project.graph import GRAPH_NAME -from agentex.lib.adk import emit_langgraph_messages -from agentex.protocol.acp import SendEventParams, CreateTaskParams -from agentex.lib.utils.logging import make_logger -from agentex.types.text_content import TextContent -from agentex.lib.environment_variables import EnvironmentVariables -from agentex.lib.core.temporal.types.workflow import SignalName -from agentex.lib.core.temporal.workflows.workflow import BaseWorkflow - -environment_variables = EnvironmentVariables.refresh() - -if environment_variables.WORKFLOW_NAME is None: - raise ValueError("Environment variable WORKFLOW_NAME is not set") -if environment_variables.AGENT_NAME is None: - raise ValueError("Environment variable AGENT_NAME is not set") - -logger = make_logger(__name__) - - -@workflow.defn(name=environment_variables.WORKFLOW_NAME) -class AtHarnessLanggraphWorkflow(BaseWorkflow): - """Runs the LangGraph agent each turn; its nodes run as Temporal activities.""" - - def __init__(self) -> None: - super().__init__(display_name=environment_variables.AGENT_NAME) - self._complete_task = False - self._messages: list[Any] = [] - self._emitted = 0 - - @workflow.signal(name=SignalName.RECEIVE_EVENT) - async def on_task_event_send(self, params: SendEventParams) -> None: - """Echo the user's message, run the graph, surface the new messages.""" - await adk.messages.create(task_id=params.task.id, content=params.event.content) - self._messages.append({"role": "user", "content": params.event.content.content}) - - compiled = lg_graph(GRAPH_NAME).compile() - result = await compiled.ainvoke({"messages": self._messages}) - self._messages = result["messages"] - - await emit_langgraph_messages(self._messages[self._emitted :], params.task.id) - self._emitted = len(self._messages) - - @workflow.signal - async def complete_task_signal(self) -> None: - self._complete_task = True - - @workflow.run - async def on_task_create(self, params: CreateTaskParams) -> str: - await adk.messages.create( - task_id=params.task.id, - content=TextContent( - author="agent", - content=( - f"Task initialized with params:\n{json.dumps(params.params, indent=2)}\n\n" - "Send me a message and I'll respond using a LangGraph agent whose nodes " - "run as durable Temporal activities." - ), - ), - ) - await workflow.wait_condition(lambda: self._complete_task, timeout=None) - return "Task completed" diff --git a/examples/tutorials/10_async/10_temporal/harness_langgraph/pyproject.toml b/examples/tutorials/10_async/10_temporal/harness_langgraph/pyproject.toml deleted file mode 100644 index 897f54dd6..000000000 --- a/examples/tutorials/10_async/10_temporal/harness_langgraph/pyproject.toml +++ /dev/null @@ -1,40 +0,0 @@ -[build-system] -requires = ["hatchling"] -build-backend = "hatchling.build" - -[project] -name = "at-harness-langgraph" -version = "0.1.0" -description = "A Temporal-backed LangGraph agent (harness variant) whose nodes run as Temporal activities" -requires-python = ">=3.12" -dependencies = [ - "agentex-sdk", - "scale-gp", - "temporalio[langgraph]>=1.27.0", - "langchain-openai", - "langchain-core", - "grandalf", - "python-dotenv", -] - -[project.optional-dependencies] -dev = [ - "pytest", - "pytest-asyncio", - "httpx", - "black", - "isort", - "flake8", - "debugpy>=1.8.15", -] - -[tool.hatch.build.targets.wheel] -packages = ["project"] - -[tool.black] -line-length = 88 -target-version = ['py312'] - -[tool.isort] -profile = "black" -line_length = 88 diff --git a/examples/tutorials/10_async/10_temporal/harness_langgraph/tests/test_agent.py b/examples/tutorials/10_async/10_temporal/harness_langgraph/tests/test_agent.py deleted file mode 100644 index 05d9ffa01..000000000 --- a/examples/tutorials/10_async/10_temporal/harness_langgraph/tests/test_agent.py +++ /dev/null @@ -1,106 +0,0 @@ -"""Integration tests for the Temporal harness LangGraph agent (live agent required). - -These drive a *running* agent over the AgentEx API and verify that: -- the agent sends a welcome message on task creation, -- a weather question triggers a tool_request / tool_response round-trip - (proving the LLM node ran as a Temporal activity and the tool node ran), -- the final answer reflects the tool output. - -To run: -1. Start the agent (worker + ACP server): ``agentex agents run --manifest manifest.yaml`` -2. Set AGENTEX_API_BASE_URL if not using the default -3. ``pytest tests/test_agent.py -v`` -""" - -import os -import uuid - -import pytest -import pytest_asyncio -from test_utils.async_utils import ( - poll_messages, - send_event_and_poll_yielding, -) - -from agentex import AsyncAgentex -from agentex.types.task_message import TaskMessage -from agentex.types.agent_rpc_params import ParamsCreateTaskRequest - -AGENTEX_API_BASE_URL = os.environ.get("AGENTEX_API_BASE_URL", "http://localhost:5003") -AGENT_NAME = os.environ.get("AGENT_NAME", "at-harness-langgraph") - - -@pytest_asyncio.fixture -async def client(): - client = AsyncAgentex(base_url=AGENTEX_API_BASE_URL) - yield client - await client.close() - - -@pytest.fixture -def agent_name(): - return AGENT_NAME - - -@pytest_asyncio.fixture -async def agent_id(client, agent_name): - agents = await client.agents.list() - for agent in agents: - if agent.name == agent_name: - return agent.id - raise ValueError(f"Agent with name {agent_name} not found.") - - -class TestNonStreamingEvents: - """The Temporal-backed LangGraph agent responds and uses tools.""" - - @pytest.mark.asyncio - async def test_send_event_and_poll(self, client: AsyncAgentex, agent_id: str): - """Create a task, ask about weather, verify the tool round-trip.""" - task_response = await client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex)) - task = task_response.result - assert task is not None - - task_creation_found = False - async for message in poll_messages(client=client, task_id=task.id, timeout=30, sleep_interval=1.0): - assert isinstance(message, TaskMessage) - if message.content and message.content.type == "text" and message.content.author == "agent": - task_creation_found = True - break - assert task_creation_found, "Task creation welcome message not found" - - seen_tool_request = False - seen_tool_response = False - final_message = None - async for message in send_event_and_poll_yielding( - client=client, - agent_id=agent_id, - task_id=task.id, - user_message="What is the weather in San Francisco? Use your tool.", - timeout=60, - sleep_interval=1.0, - ): - assert isinstance(message, TaskMessage) - - if message.content and message.content.type == "tool_request": - seen_tool_request = True - if message.content and message.content.type == "tool_response": - seen_tool_response = True - - if message.content and message.content.type == "text" and message.content.author == "agent": - final_message = message - content_length = len(getattr(message.content, "content", "") or "") - if getattr(message, "streaming_status", None) in (None, "DONE") and content_length > 0: - if seen_tool_response: - break - - assert seen_tool_request, "Expected a tool_request (agent calling get_weather)" - assert seen_tool_response, "Expected a tool_response (get_weather result)" - assert final_message is not None, "Expected a final agent text message" - final_text = getattr(final_message.content, "content", None) if final_message.content else None - assert isinstance(final_text, str) and len(final_text) > 0 - assert "72" in final_text, "Expected weather response to mention 72°F" - - -if __name__ == "__main__": - pytest.main([__file__, "-v"]) diff --git a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/Dockerfile b/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/Dockerfile deleted file mode 100644 index 98c74c6e8..000000000 --- a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/Dockerfile +++ /dev/null @@ -1,43 +0,0 @@ -# syntax=docker/dockerfile:1.3 -FROM python:3.12-slim -COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ - -# Install system dependencies -RUN apt-get update && apt-get install -y \ - htop \ - vim \ - curl \ - tar \ - python3-dev \ - postgresql-client \ - build-essential \ - libpq-dev \ - gcc \ - cmake \ - netcat-openbsd \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - -RUN uv pip install --system --upgrade pip setuptools wheel - -ENV UV_HTTP_TIMEOUT=1000 - -COPY 10_async/10_temporal/harness_pydantic_ai/pyproject.toml /app/harness_pydantic_ai/pyproject.toml -COPY 10_async/10_temporal/harness_pydantic_ai/README.md /app/harness_pydantic_ai/README.md - -WORKDIR /app/harness_pydantic_ai - -COPY 10_async/10_temporal/harness_pydantic_ai/project /app/harness_pydantic_ai/project -COPY 10_async/10_temporal/harness_pydantic_ai/tests /app/harness_pydantic_ai/tests -COPY test_utils /app/test_utils - -RUN uv pip install --system .[dev] - -ENV PYTHONPATH=/app - -ENV AGENT_NAME=at-harness-pydantic-ai - -CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] - -# When we deploy the worker, we will replace the CMD with the following -# CMD ["python", "-m", "run_worker"] diff --git a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/README.md b/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/README.md deleted file mode 100644 index 3e5fef4c6..000000000 --- a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/README.md +++ /dev/null @@ -1,61 +0,0 @@ -# Temporal Pydantic AI Harness Test Agent - -A minimal **Temporal-backed** Pydantic AI agent that drives the **unified -harness surface** (`UnifiedEmitter.auto_send_turn` + `PydanticAITurn`) from -inside the model activity's `event_stream_handler`. - -## Why this agent exists - -The `10_async/10_temporal/110_pydantic_ai` tutorial streams via the -`stream_pydantic_ai_events` helper (which uses the unified surface internally). -This harness test agent calls `emitter.auto_send_turn(...)` **explicitly** inside -the `event_stream_handler`, making the unified-surface wiring visible and giving -the temporal channel direct coverage. - -## How it wires the unified surface - -In `project/agent.py`, the `event_stream_handler` runs inside the model activity -and constructs a `UnifiedEmitter` from `RunContext.deps`: - -```python -async def event_handler(run_context, events): - emitter = UnifiedEmitter( - task_id=run_context.deps.task_id, - trace_id=run_context.deps.task_id, - parent_span_id=run_context.deps.parent_span_id, - ) - turn = PydanticAITurn(events, model=MODEL_NAME, coalesce_tool_requests=True) - await emitter.auto_send_turn(turn) -``` - -- The handler runs inside a Temporal activity, so it can freely make - non-deterministic Redis + tracing writes. -- `coalesce_tool_requests=True` is required on the auto_send path until - AGX1-377 lands. -- `deps` (set by `project/workflow.py`) threads the `task_id` and the per-turn - `parent_span_id` into the handler so tool spans nest under the workflow's turn - span. - -## Structure - -- `project/acp.py` — thin ACP server; FastACP auto-wires HTTP routes to the - workflow when `TemporalACPConfig` is used. -- `project/agent.py` — base `Agent` + `TemporalAgent` + the unified-surface - `event_stream_handler`. -- `project/workflow.py` — durable workflow; each turn delegates to - `temporal_agent.run(...)`. -- `project/run_worker.py` — Temporal worker entry point. -- `project/tools.py` — async `get_weather(city)` returning a constant. -- `tests/test_agent.py` — live integration test (requires Temporal + Redis + - ACP server + worker). - -## Tools - -- `get_weather(city: str) -> str` (async): returns a fixed "sunny and 72°F" - string. Each tool call becomes its own Temporal activity. - -## Offline coverage - -Offline integration tests for the same wiring (pydantic-ai `TestModel` + fake -streaming/tracing, no Temporal server) live in the SDK repo at -`tests/lib/core/harness/test_harness_pydantic_ai_temporal.py`. diff --git a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/manifest.yaml b/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/manifest.yaml deleted file mode 100644 index 9efbff918..000000000 --- a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/manifest.yaml +++ /dev/null @@ -1,62 +0,0 @@ -build: - context: - root: ../../../ - include_paths: - - 10_async/10_temporal/harness_pydantic_ai - - test_utils - dockerfile: 10_async/10_temporal/harness_pydantic_ai/Dockerfile - dockerignore: 10_async/10_temporal/harness_pydantic_ai/.dockerignore - -local_development: - agent: - port: 8000 - host_address: host.docker.internal - paths: - acp: project/acp.py - worker: project/run_worker.py - -agent: - acp_type: async - name: at-harness-pydantic-ai - description: A Temporal-backed Pydantic AI harness test agent using the unified emitter surface - - temporal: - enabled: true - workflows: - - name: at-harness-pydantic-ai - queue_name: at_harness_pydantic_ai_queue - - credentials: - - env_var_name: REDIS_URL - secret_name: redis-url-secret - secret_key: url - - env_var_name: OPENAI_API_KEY - secret_name: openai-api-key - secret_key: api-key - - env_var_name: SGP_API_KEY - secret_name: sgp-api-key - secret_key: api-key - - env_var_name: SGP_ACCOUNT_ID - secret_name: sgp-account-id - secret_key: account-id - - env_var_name: SGP_CLIENT_BASE_URL - secret_name: sgp-client-base-url - secret_key: url - -deployment: - image: - repository: "" - tag: "latest" - - global: - agent: - name: "at-harness-pydantic-ai" - description: "A Temporal-backed Pydantic AI harness test agent using the unified emitter surface" - replicaCount: 1 - resources: - requests: - cpu: "500m" - memory: "1Gi" - limits: - cpu: "1000m" - memory: "2Gi" diff --git a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/__init__.py b/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/agent.py b/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/agent.py deleted file mode 100644 index 5e8697264..000000000 --- a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/agent.py +++ /dev/null @@ -1,111 +0,0 @@ -"""Pydantic AI agent definition for the Temporal harness test agent. - -This module constructs the base ``pydantic_ai.Agent`` once at import time, -registers tools on it, and wraps it in ``TemporalAgent`` from -``pydantic_ai.durable_exec.temporal``. - -The ``TemporalAgent`` wrapper makes every model call and every tool call run as -a Temporal activity automatically. The workflow stays deterministic; the -non-deterministic work (LLM HTTP calls, tool execution) moves into recorded -activities. - -Streaming back to Agentex happens via ``event_stream_handler``, which receives -Pydantic AI ``AgentStreamEvent``s from inside the model activity and forwards -them through the UNIFIED HARNESS SURFACE (``UnifiedEmitter.auto_send_turn`` + -``PydanticAITurn``) — called directly rather than via ``stream_pydantic_ai_events``. -The ``task_id`` and per-turn ``parent_span_id`` are threaded into the handler -via ``deps``. -""" - -from __future__ import annotations - -from datetime import datetime -from collections.abc import AsyncIterable - -from pydantic import BaseModel -from pydantic_ai import Agent, RunContext -from pydantic_ai.messages import AgentStreamEvent -from pydantic_ai.durable_exec.temporal import TemporalAgent - -from project.tools import get_weather -from agentex.lib.core.harness import UnifiedEmitter -from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn - -__all__ = ["TaskDeps", "temporal_agent", "base_agent", "MODEL_NAME"] - -MODEL_NAME = "openai:gpt-4o-mini" -SYSTEM_PROMPT = """You are a helpful AI assistant with access to tools. - -Current date and time: {timestamp} - -Guidelines: -- Be concise and helpful -- Use tools when they would help answer the user's question -- If you're unsure, ask clarifying questions -- Always provide accurate information -""" - - -class TaskDeps(BaseModel): - """Per-run dependencies passed into the agent via ``deps=``. - - Pydantic AI's ``RunContext.deps`` is the canonical place to thread - request-scoped data (like the Agentex task_id) into tools and event - handlers — including code that runs inside Temporal activities. - """ - - task_id: str - # When set, the event handler nests per-tool-call spans under this span. - # Typically the ID of the per-turn span opened by the workflow. - parent_span_id: str | None = None - - -def _build_base_agent() -> Agent[TaskDeps, str]: - """Build the underlying Pydantic AI agent with tools registered. - - Tools must be registered BEFORE the agent is wrapped in TemporalAgent; - changes to tool registration after wrapping are not reflected. - """ - agent: Agent[TaskDeps, str] = Agent( - MODEL_NAME, - deps_type=TaskDeps, - system_prompt=SYSTEM_PROMPT.format(timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S")), - ) - agent.tool_plain(get_weather) - return agent - - -async def event_handler( - run_context: RunContext[TaskDeps], - events: AsyncIterable[AgentStreamEvent], -) -> None: - """Stream Pydantic AI events to Agentex via the unified surface. - - Pydantic AI calls this with the live event stream as soon as the model - activity begins emitting parts. Because the handler runs inside the activity - (not the workflow), it can freely make non-deterministic Redis + tracing - writes. - - The UnifiedEmitter is constructed from ``deps`` (task_id + parent_span_id), - so tool spans nest under the workflow's per-turn span and messages auto-send - to the task stream. The auto_send path delivers streamed tool requests - natively, so no coalescing workaround is needed. - """ - emitter = UnifiedEmitter( - task_id=run_context.deps.task_id, - trace_id=run_context.deps.task_id, - parent_span_id=run_context.deps.parent_span_id, - ) - turn = PydanticAITurn(events, model=MODEL_NAME) - await emitter.auto_send_turn(turn) - - -# Construct the durable agent at module load time so that the PydanticAIPlugin -# can auto-discover its activities via the workflow's ``__pydantic_ai_agents__`` -# attribute. -base_agent = _build_base_agent() -temporal_agent: TemporalAgent[TaskDeps, str] = TemporalAgent( - base_agent, - name="harness_pydantic_ai_agent", - event_stream_handler=event_handler, -) diff --git a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/run_worker.py b/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/run_worker.py deleted file mode 100644 index 4b4d43d19..000000000 --- a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/run_worker.py +++ /dev/null @@ -1,48 +0,0 @@ -"""Temporal worker for the harness Pydantic AI test agent. - -Run as a separate long-lived process alongside the ACP HTTP server. The worker -polls Temporal for workflow + activity tasks and executes them. - -The ``PydanticAIPlugin`` reads ``__pydantic_ai_agents__`` off the workflow class -and registers every model/tool activity the TemporalAgent needs — so we don't -have to enumerate activities by hand here. -""" - -import asyncio - -from pydantic_ai.durable_exec.temporal import PydanticAIPlugin - -from project.workflow import HarnessPydanticAiWorkflow -from agentex.lib.utils.debug import setup_debug_if_enabled -from agentex.lib.utils.logging import make_logger -from agentex.lib.environment_variables import EnvironmentVariables -from agentex.lib.core.temporal.activities import get_all_activities -from agentex.lib.core.temporal.workers.worker import AgentexWorker - -environment_variables = EnvironmentVariables.refresh() -logger = make_logger(__name__) - - -async def main(): - setup_debug_if_enabled() - - task_queue_name = environment_variables.WORKFLOW_TASK_QUEUE - if task_queue_name is None: - raise ValueError("WORKFLOW_TASK_QUEUE is not set") - - # get_all_activities() returns the built-in Agentex activities (state, - # messages, streaming, tracing). Pydantic AI's TemporalAgent activities are - # auto-registered by PydanticAIPlugin via __pydantic_ai_agents__. - worker = AgentexWorker( - task_queue=task_queue_name, - plugins=[PydanticAIPlugin()], - ) - - await worker.run( - activities=get_all_activities(), - workflow=HarnessPydanticAiWorkflow, - ) - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/tools.py b/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/tools.py deleted file mode 100644 index bbd6c5200..000000000 --- a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/tools.py +++ /dev/null @@ -1,24 +0,0 @@ -"""Tool definitions for the Temporal harness Pydantic AI agent. - -These functions are registered on the base Pydantic AI agent. When the agent -is wrapped in ``TemporalAgent``, each tool call becomes its own Temporal -activity automatically — independently retryable and observable. - -Tools must be ``async`` because Pydantic AI's Temporal integration requires -it: non-async tools would run in threads, which is non-deterministic and -unsafe for Temporal replay. -""" - -from __future__ import annotations - - -async def get_weather(city: str) -> str: - """Get the current weather for a city. - - Args: - city: The name of the city to get weather for. - - Returns: - A string describing the weather conditions. - """ - return f"The weather in {city} is sunny and 72°F" diff --git a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/workflow.py b/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/workflow.py deleted file mode 100644 index 9a01be7de..000000000 --- a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/workflow.py +++ /dev/null @@ -1,137 +0,0 @@ -"""Temporal workflow for the harness Pydantic AI test agent. - -The workflow holds task state durably across crashes. Its signal handler -delegates the actual agent run to ``temporal_agent.run(...)`` — which internally -schedules model and tool activities, each independently durable. The -``event_stream_handler`` registered on ``temporal_agent`` (see project.agent) -pushes streaming deltas through the unified harness surface while the model -activity runs. - -Multi-turn memory is kept on the workflow instance itself -(``self._message_history``). Temporal's workflow state is already durable and -replay-safe, so unlike the async-base agent we don't need an external -``adk.state`` round-trip. -""" - -from __future__ import annotations - -import os -import json -from typing import TYPE_CHECKING - -from temporalio import workflow - -from agentex.lib import adk -from project.agent import TaskDeps, temporal_agent -from agentex.lib.types.acp import SendEventParams, CreateTaskParams -from agentex.lib.types.tracing import SGPTracingProcessorConfig -from agentex.lib.utils.logging import make_logger -from agentex.types.text_content import TextContent -from agentex.lib.environment_variables import EnvironmentVariables -from agentex.lib.core.temporal.types.workflow import SignalName -from agentex.lib.core.temporal.workflows.workflow import BaseWorkflow -from agentex.lib.core.tracing.tracing_processor_manager import ( - add_tracing_processor_config, -) - -if TYPE_CHECKING: - from pydantic_ai.messages import ModelMessage - -add_tracing_processor_config( - SGPTracingProcessorConfig( - sgp_api_key=os.environ.get("SGP_API_KEY", ""), - sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""), - sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""), - ) -) - -environment_variables = EnvironmentVariables.refresh() - -if environment_variables.WORKFLOW_NAME is None: - raise ValueError("Environment variable WORKFLOW_NAME is not set") -if environment_variables.AGENT_NAME is None: - raise ValueError("Environment variable AGENT_NAME is not set") - -logger = make_logger(__name__) - - -@workflow.defn(name=environment_variables.WORKFLOW_NAME) -class HarnessPydanticAiWorkflow(BaseWorkflow): - """Long-running Temporal workflow that delegates each turn to a Pydantic AI TemporalAgent. - - The ``__pydantic_ai_agents__`` attribute is the marker the - ``PydanticAIPlugin`` looks for at worker startup: it pulls - ``temporal_agent.temporal_activities`` off this list and registers them on - the worker automatically — so we don't have to list activities by hand in - ``run_worker.py``. - """ - - __pydantic_ai_agents__ = [temporal_agent] - - def __init__(self): - super().__init__(display_name=environment_variables.AGENT_NAME) - self._complete_task = False - self._turn_number = 0 - # Conversation history accumulated across turns. Each entry is a - # pydantic-ai ``ModelMessage``. Temporal replays the activity that - # produced these messages, so the list is rebuilt deterministically if - # the workflow ever recovers from a crash. - self._message_history: list["ModelMessage"] = [] - - @workflow.signal(name=SignalName.RECEIVE_EVENT) - async def on_task_event_send(self, params: SendEventParams) -> None: - """Handle a new user message: echo it, then run the agent durably.""" - logger.info(f"Received task event: {params.task.id}") - self._turn_number += 1 - - # Echo the user's message so it shows up in the UI as a chat bubble. - await adk.messages.create(task_id=params.task.id, content=params.event.content) - - async with adk.tracing.span( - trace_id=params.task.id, - task_id=params.task.id, - name=f"Turn {self._turn_number}", - input={"message": params.event.content.content}, - ) as span: - # temporal_agent.run() schedules a model activity, per-tool - # activities, and the event_stream_handler activity (which pushes - # deltas through the unified surface). Passing ``message_history`` - # makes the run remember prior turns. - result = await temporal_agent.run( - params.event.content.content, - message_history=self._message_history, - deps=TaskDeps( - task_id=params.task.id, - parent_span_id=span.id if span else None, - ), - ) - # Persist the new full history (user + assistant + any tool rounds) - # so the next turn picks up from here. - self._message_history = list(result.all_messages()) - if span: - span.output = {"final_output": result.output} - - @workflow.run - async def on_task_create(self, params: CreateTaskParams) -> str: - """Workflow entry point — keep the conversation alive for incoming signals.""" - logger.info(f"Task created: {params.task.id}") - - await adk.messages.create( - task_id=params.task.id, - content=TextContent( - author="agent", - content=( - f"Task initialized with params:\n{json.dumps(params.params, indent=2)}\n" - f"Send me a message and I'll respond using a Pydantic AI agent backed by Temporal." - ), - ), - ) - - await workflow.wait_condition(lambda: self._complete_task, timeout=None) - return "Task completed" - - @workflow.signal - async def complete_task_signal(self) -> None: - """Graceful workflow shutdown signal.""" - logger.info("Received complete_task signal") - self._complete_task = True diff --git a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/pyproject.toml b/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/pyproject.toml deleted file mode 100644 index 4d9039640..000000000 --- a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/pyproject.toml +++ /dev/null @@ -1,38 +0,0 @@ -[build-system] -requires = ["hatchling"] -build-backend = "hatchling.build" - -[project] -name = "at-harness-pydantic-ai" -version = "0.1.0" -description = "A Temporal-backed Pydantic AI harness test agent using the unified emitter surface" -readme = "README.md" -requires-python = ">=3.12" -dependencies = [ - "agentex-sdk", - "scale-gp", - "temporalio>=1.18.2", - "pydantic-ai-slim[openai]>=1.0,<2", -] - -[project.optional-dependencies] -dev = [ - "pytest", - "pytest-asyncio", - "httpx", - "black", - "isort", - "flake8", - "debugpy>=1.8.15", -] - -[tool.hatch.build.targets.wheel] -packages = ["project"] - -[tool.black] -line-length = 88 -target-version = ['py312'] - -[tool.isort] -profile = "black" -line_length = 88 diff --git a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/tests/test_agent.py b/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/tests/test_agent.py deleted file mode 100644 index a5b90ca34..000000000 --- a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/tests/test_agent.py +++ /dev/null @@ -1,114 +0,0 @@ -"""Live tests for the Temporal harness Pydantic AI agent. - -These tests require a running agent (Temporal + Redis + ACP server + worker) and -exercise the unified-surface event_stream_handler end-to-end over the wire. They -mirror the ``at110`` temporal tutorial tests but target this harness agent. - -Offline coverage of the same wiring (TestModel + fake streaming/tracing) lives -in ``tests/lib/core/harness/test_harness_pydantic_ai_temporal.py`` in the SDK repo. - -To run these tests: -1. Make sure the agent is running (worker + ACP server) -2. Set AGENTEX_API_BASE_URL if not using the default -3. Run: pytest tests/test_agent.py -v -""" - -import os -import uuid - -import pytest -import pytest_asyncio -from test_utils.async_utils import poll_messages, send_event_and_poll_yielding - -from agentex import AsyncAgentex -from agentex.types.task_message import TaskMessage -from agentex.types.agent_rpc_params import ParamsCreateTaskRequest - -AGENTEX_API_BASE_URL = os.environ.get("AGENTEX_API_BASE_URL", "http://localhost:5003") -AGENT_NAME = os.environ.get("AGENT_NAME", "at-harness-pydantic-ai") - - -@pytest_asyncio.fixture -async def client(): - client = AsyncAgentex(base_url=AGENTEX_API_BASE_URL) - yield client - await client.close() - - -@pytest.fixture -def agent_name(): - return AGENT_NAME - - -@pytest_asyncio.fixture -async def agent_id(client, agent_name): - agents = await client.agents.list() - for agent in agents: - if agent.name == agent_name: - return agent.id - raise ValueError(f"Agent with name {agent_name} not found.") - - -class TestNonStreamingEvents: - """Test that the Temporal-backed harness agent responds and uses tools.""" - - @pytest.mark.asyncio - async def test_send_event_and_poll(self, client: AsyncAgentex, agent_id: str): - """Drive a full turn: create task, send a weather question, verify tool round-trip.""" - task_response = await client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex)) - task = task_response.result - assert task is not None - - # Wait for the welcome message from on_task_create - task_creation_found = False - async for message in poll_messages( - client=client, - task_id=task.id, - timeout=30, - sleep_interval=1.0, - ): - assert isinstance(message, TaskMessage) - if message.content and message.content.type == "text" and message.content.author == "agent": - task_creation_found = True - break - assert task_creation_found, "Task creation welcome message not found" - - # Ask about weather — the agent should call get_weather - seen_tool_request = False - seen_tool_response = False - final_message = None - async for message in send_event_and_poll_yielding( - client=client, - agent_id=agent_id, - task_id=task.id, - user_message="What is the weather in San Francisco?", - timeout=60, - sleep_interval=1.0, - ): - assert isinstance(message, TaskMessage) - - if message.content and message.content.type == "tool_request": - seen_tool_request = True - if message.content and message.content.type == "tool_response": - seen_tool_response = True - if final_message and getattr(final_message, "streaming_status", None) == "DONE": - break - - if message.content and message.content.type == "text" and message.content.author == "agent": - final_message = message - content_length = len(getattr(message.content, "content", "") or "") - if message.streaming_status == "DONE" and content_length > 0: - if not seen_tool_request or seen_tool_response: - break - - assert seen_tool_request, "Expected a tool_request (agent calling get_weather)" - assert seen_tool_response, "Expected a tool_response (get_weather result)" - assert final_message is not None, "Expected a final agent text message" - final_text = getattr(final_message.content, "content", None) if final_message.content else None - assert isinstance(final_text, str) and len(final_text) > 0 - # The get_weather tool always returns "72°F" — the response should mention it. - assert "72" in final_text, "Expected weather response to mention 72°F" - - -if __name__ == "__main__": - pytest.main([__file__, "-v"]) diff --git a/src/agentex/lib/adk/__init__.py b/src/agentex/lib/adk/__init__.py index fedd52f7a..3287dc07d 100644 --- a/src/agentex/lib/adk/__init__.py +++ b/src/agentex/lib/adk/__init__.py @@ -6,13 +6,13 @@ from agentex.lib.adk._modules.agents import AgentsModule from agentex.lib.adk._modules.agent_task_tracker import AgentTaskTrackerModule from agentex.lib.adk._modules.checkpointer import create_checkpointer -from agentex.lib.adk._modules._langgraph_tracing import create_langgraph_tracing_handler -from agentex.lib.adk._modules._langgraph_async import stream_langgraph_events -from agentex.lib.adk._modules._langgraph_messages import emit_langgraph_messages -from agentex.lib.adk._modules._langgraph_sync import convert_langgraph_to_agentex_events -from agentex.lib.adk._modules._pydantic_ai_async import stream_pydantic_ai_events +from agentex.lib.adk._modules._langgraph_turn import stream_langgraph_events +from agentex.lib.adk._modules._langgraph_sync import ( + emit_langgraph_messages, + convert_langgraph_to_agentex_events, +) +from agentex.lib.adk._modules._pydantic_ai_turn import stream_pydantic_ai_events from agentex.lib.adk._modules._pydantic_ai_sync import convert_pydantic_ai_to_agentex_events -from agentex.lib.adk._modules._pydantic_ai_tracing import create_pydantic_ai_tracing_handler from agentex.lib.adk._modules._claude_code_sync import convert_claude_code_to_agentex_events from agentex.lib.adk._modules._claude_code_turn import ( ClaudeCodeTurn, @@ -66,14 +66,12 @@ "agent_task_tracker", # Checkpointing / LangGraph "create_checkpointer", - "create_langgraph_tracing_handler", "stream_langgraph_events", "emit_langgraph_messages", "convert_langgraph_to_agentex_events", # Pydantic AI "stream_pydantic_ai_events", "convert_pydantic_ai_to_agentex_events", - "create_pydantic_ai_tracing_handler", # Claude Code "convert_claude_code_to_agentex_events", "ClaudeCodeTurn", diff --git a/src/agentex/lib/adk/_modules/_langgraph_async.py b/src/agentex/lib/adk/_modules/_langgraph_async.py deleted file mode 100644 index 02ef059eb..000000000 --- a/src/agentex/lib/adk/_modules/_langgraph_async.py +++ /dev/null @@ -1,65 +0,0 @@ -"""Async LangGraph streaming helper for Agentex. - -Converts LangGraph graph.astream() events into Agentex streaming updates -and pushes them to Redis via adk.streaming contexts. For use with async -ACP agents that stream via Redis rather than HTTP yields. - -Unified surface ---------------- -This module is now implemented on top of ``LangGraphTurn`` and -``UnifiedEmitter.auto_send_turn``, the same surface used by every other -harness adapter (pydantic-ai, openai-agents, etc.). The public signature -and return type are preserved identically. - -AGX1-377 note: LangGraph emits tool requests as ``StreamTaskMessageFull`` events -(from "updates" events), NOT Start+Delta+Done like pydantic-ai. ``auto_send`` -handles Full events correctly; no coalescing wrapper is needed. -""" - -from agentex.lib.utils.temporal import workflow_now_if_in_workflow - - -async def stream_langgraph_events(stream, task_id: str) -> str: - """Stream LangGraph events to Agentex via Redis. - - Processes the stream from graph.astream() called with - stream_mode=["messages", "updates"] and pushes text, reasoning, - tool request, and tool response messages through Redis streaming - contexts. - - Supports both regular models (chunk.content is a str) and reasoning - models like gpt-5/o1/o3 (chunk.content is a list of typed content blocks - in the Responses API responses/v1 format). - - Reimplemented on ``UnifiedEmitter.auto_send_turn(LangGraphTurn(...))`` for - cross-harness consistency. Behavior is identical to the previous bespoke - implementation (verified by characterization tests in test_langgraph_async.py). - - AGX1-377 note: LangGraph emits tool requests as ``Full`` events (from "updates"), - NOT Start+Delta+Done like pydantic-ai. ``auto_send`` handles Full events - correctly; no coalescing wrapper is needed. - - AGX1-378 note: ``created_at`` is set from ``workflow.now()`` when called inside a - Temporal workflow, matching the pattern used by the openai/litellm providers. - Outside a workflow (plain async activities, sync agents) it is ``None`` and the - server's wall clock is used. - - Args: - stream: Async iterator from graph.astream(..., stream_mode=["messages", "updates"]) - task_id: The Agentex task ID to stream messages to. - - Returns: - The accumulated final text output from the agent. - """ - from agentex.lib.core.harness.emitter import UnifiedEmitter - from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn - - # AGX1-377 note: LangGraph emits tool requests as Full events (from "updates"), - # NOT Start+Delta+Done like pydantic-ai. auto_send handles Full events correctly; - # no coalescing wrapper is needed. - # AGX1-378: stamp messages with workflow.now() inside Temporal for deterministic - # created_at ordering; falls back to None (server wall clock) outside a workflow. - turn = LangGraphTurn(stream, model=None) - emitter = UnifiedEmitter(task_id=task_id, trace_id=None, parent_span_id=None) - result = await emitter.auto_send_turn(turn, created_at=workflow_now_if_in_workflow()) - return result.final_text diff --git a/src/agentex/lib/adk/_modules/_langgraph_messages.py b/src/agentex/lib/adk/_modules/_langgraph_messages.py deleted file mode 100644 index c8856755b..000000000 --- a/src/agentex/lib/adk/_modules/_langgraph_messages.py +++ /dev/null @@ -1,85 +0,0 @@ -"""Emit finished LangGraph messages as Agentex task messages. - -This is the non-streaming counterpart to ``stream_langgraph_events``. Use it -when you run a LangGraph graph with ``ainvoke`` (for example a Temporal-backed -agent using the LangGraph plugin, where streaming deltas aren't available) and -want to surface the resulting messages to the Agentex UI after the fact. - -It maps LangGraph/LangChain message objects to Agentex content types: - -- ``AIMessage`` tool calls → ``ToolRequestContent`` (one per call) -- ``AIMessage`` text content → ``TextContent`` -- ``ToolMessage`` → ``ToolResponseContent`` - -Pass only the messages produced this turn (e.g. ``messages[already_emitted:]``) -so each message is surfaced exactly once across a multi-turn conversation. -""" - -from __future__ import annotations - -from typing import Any - - -async def emit_langgraph_messages(messages: list[Any], task_id: str) -> str: - """Create Agentex messages for a list of LangGraph messages. - - Args: - messages: LangGraph/LangChain message objects to surface — typically - the new messages a turn produced. - task_id: The Agentex task to create messages on. - - Returns: - The last assistant text emitted (useful as a span/turn output), or "". - """ - # Lazy imports so langchain isn't required at module load time. - from langchain_core.messages import AIMessage, ToolMessage - - from agentex.lib import adk - from agentex.types.text_content import TextContent - from agentex.types.tool_request_content import ToolRequestContent - from agentex.types.tool_response_content import ToolResponseContent - - final_text = "" - for message in messages: - if isinstance(message, AIMessage): - for tool_call in message.tool_calls or []: - await adk.messages.create( - task_id=task_id, - content=ToolRequestContent( - author="agent", - tool_call_id=tool_call["id"], - name=tool_call["name"], - arguments=tool_call["args"], - ), - ) - # ``content`` may be a plain string (OpenAI) or a list of content - # blocks (Anthropic/Claude via LangChain, e.g. - # ``[{"type": "text", "text": "..."}]``). Extract and join the text - # so the response is visible regardless of the underlying model. - if isinstance(message.content, str): - text = message.content - else: - text = "".join( - block.get("text", "") if isinstance(block, dict) else str(block) - for block in message.content - if not isinstance(block, dict) or block.get("type") == "text" - ) - if text: - final_text = text - await adk.messages.create( - task_id=task_id, - content=TextContent(author="agent", content=text, format="markdown"), - ) - elif isinstance(message, ToolMessage): - await adk.messages.create( - task_id=task_id, - content=ToolResponseContent( - author="agent", - tool_call_id=message.tool_call_id, - name=message.name or "unknown", - content=message.content - if isinstance(message.content, str) - else str(message.content), - ), - ) - return final_text diff --git a/src/agentex/lib/adk/_modules/_langgraph_sync.py b/src/agentex/lib/adk/_modules/_langgraph_sync.py index 48231a87d..9d7b73847 100644 --- a/src/agentex/lib/adk/_modules/_langgraph_sync.py +++ b/src/agentex/lib/adk/_modules/_langgraph_sync.py @@ -48,8 +48,8 @@ async def convert_langgraph_to_agentex_events( Supports both regular models (chunk.content is a str) and reasoning models like gpt-5/o1/o3 (chunk.content is a list of typed content blocks). - AGX1-377 note: LangGraph emits tool requests as ``StreamTaskMessageFull`` (from - "updates" events), NOT Start+Delta+Done like pydantic-ai. No coalesce_tool_requests + LangGraph emits tool requests as ``StreamTaskMessageFull`` (from "updates" + events), NOT Start+Delta+Done like pydantic-ai. No coalesce_tool_requests option is needed for LangGraph. Args: @@ -271,3 +271,82 @@ async def convert_langgraph_to_agentex_events( yield StreamTaskMessageDone(type="done", index=message_index) if reasoning_streaming: yield StreamTaskMessageDone(type="done", index=message_index) + + +async def emit_langgraph_messages(messages: list[Any], task_id: str) -> str: + """Create Agentex messages for a list of LangGraph messages. + + This is the non-streaming counterpart to ``stream_langgraph_events``. Use it + when you run a LangGraph graph with ``ainvoke`` (for example a Temporal-backed + agent using the LangGraph plugin, where streaming deltas aren't available) and + want to surface the resulting messages to the Agentex UI after the fact. + + It maps LangGraph/LangChain message objects to Agentex content types: + + - ``AIMessage`` tool calls -> ``ToolRequestContent`` (one per call) + - ``AIMessage`` text content -> ``TextContent`` + - ``ToolMessage`` -> ``ToolResponseContent`` + + Pass only the messages produced this turn (e.g. ``messages[already_emitted:]``) + so each message is surfaced exactly once across a multi-turn conversation. + + Args: + messages: LangGraph/LangChain message objects to surface — typically + the new messages a turn produced. + task_id: The Agentex task to create messages on. + + Returns: + The last assistant text emitted (useful as a span/turn output), or "". + """ + # Lazy imports so langchain isn't required at module load time. + from langchain_core.messages import AIMessage, ToolMessage + + from agentex.lib import adk + from agentex.types.text_content import TextContent + from agentex.types.tool_request_content import ToolRequestContent + from agentex.types.tool_response_content import ToolResponseContent + + final_text = "" + for message in messages: + if isinstance(message, AIMessage): + for tool_call in message.tool_calls or []: + await adk.messages.create( + task_id=task_id, + content=ToolRequestContent( + author="agent", + tool_call_id=tool_call["id"], + name=tool_call["name"], + arguments=tool_call["args"], + ), + ) + # ``content`` may be a plain string (OpenAI) or a list of content + # blocks (Anthropic/Claude via LangChain, e.g. + # ``[{"type": "text", "text": "..."}]``). Extract and join the text + # so the response is visible regardless of the underlying model. + if isinstance(message.content, str): + text = message.content + else: + text = "".join( + block.get("text", "") if isinstance(block, dict) else str(block) + for block in message.content + if not isinstance(block, dict) or block.get("type") == "text" + ) + if text: + final_text = text + await adk.messages.create( + task_id=task_id, + content=TextContent(author="agent", content=text, format="markdown"), + ) + elif isinstance(message, ToolMessage): + await adk.messages.create( + task_id=task_id, + content=ToolResponseContent( + author="agent", + tool_call_id=message.tool_call_id, + name=message.name or "unknown", + content=message.content + if isinstance(message.content, str) + else str(message.content), + ), + ) + return final_text diff --git a/src/agentex/lib/adk/_modules/_langgraph_tracing.py b/src/agentex/lib/adk/_modules/_langgraph_tracing.py deleted file mode 100644 index 2162201e1..000000000 --- a/src/agentex/lib/adk/_modules/_langgraph_tracing.py +++ /dev/null @@ -1,273 +0,0 @@ -"""LangChain callback handler that creates Agentex spans for LLM calls and tool executions. - -.. deprecated:: - ``AgentexLangGraphTracingHandler`` and ``create_langgraph_tracing_handler`` are - superseded by the unified harness surface (``LangGraphTurn`` + - ``UnifiedEmitter``), which derives spans automatically from the canonical - event stream without requiring a LangChain callback handler. - - They remain importable and functional for backward compatibility, but new - agents should use the unified path instead. -""" -# ruff: noqa: ARG002 -# Callback methods must accept all arguments defined by LangChain's AsyncCallbackHandler interface. - -from __future__ import annotations - -from uuid import UUID -from typing import Any, override - -from langchain_core.outputs import LLMResult -from langchain_core.messages import BaseMessage -from langchain_core.callbacks import AsyncCallbackHandler - -from agentex.types.span import Span -from agentex.lib.utils.logging import make_logger -from agentex.lib.adk._modules.tracing import TracingModule - -logger = make_logger(__name__) - - -class AgentexLangGraphTracingHandler(AsyncCallbackHandler): - """Async LangChain callback handler that records Agentex tracing spans. - - Creates child spans under a parent span for each LLM call and tool execution. - Designed to be passed via ``config={"callbacks": [handler]}`` to LangGraph's - ``graph.astream()`` or ``graph.ainvoke()``. - - Span hierarchy produced:: - - (e.g. "message" turn-level span) - ├── llm: (LLM call) - ├── tool: (tool execution) - └── llm: (LLM call) - - .. deprecated:: - Use ``LangGraphTurn`` with ``UnifiedEmitter`` instead. The unified - harness derives equivalent spans from the canonical event stream, - removing the need for a LangChain callback handler entirely. - """ - - def __init__( - self, - trace_id: str, - parent_span_id: str | None = None, - tracing: TracingModule | None = None, - ) -> None: - super().__init__() - self._trace_id = trace_id - self._parent_span_id = parent_span_id - # Lazily initialise TracingModule so the httpx client is created - # inside the *running* event-loop (not at import/construction time). - self._tracing_eager = tracing - self._tracing_lazy: TracingModule | None = None - # Map run_id → Span for in-flight spans - self._spans: dict[UUID, Span] = {} - - @property - def _tracing(self) -> TracingModule: - if self._tracing_eager is not None: - return self._tracing_eager - if self._tracing_lazy is None: - self._tracing_lazy = TracingModule() - return self._tracing_lazy - - # ------------------------------------------------------------------ - # LLM lifecycle - # ------------------------------------------------------------------ - - @override - async def on_chat_model_start( - self, - serialized: dict[str, Any], - messages: list[list[BaseMessage]], - *, - run_id: UUID, - parent_run_id: UUID | None = None, - tags: list[str] | None = None, - metadata: dict[str, Any] | None = None, - **kwargs: Any, - ) -> None: - model_name = (metadata or {}).get("ls_model_name", "") or _extract_model_name(serialized) - span = await self._tracing.start_span( - trace_id=self._trace_id, - name=f"llm:{model_name}" if model_name else "llm", - input=_serialize_messages(messages), - parent_id=self._parent_span_id, - data={"__span_type__": "COMPLETION"}, - ) - if span: - self._spans[run_id] = span - - @override - async def on_llm_end( - self, - response: LLMResult, - *, - run_id: UUID, - parent_run_id: UUID | None = None, - **kwargs: Any, - ) -> None: - span = self._spans.pop(run_id, None) - if span is None: - return - span.output = _serialize_llm_result(response) - await self._tracing.end_span(trace_id=self._trace_id, span=span) - - @override - async def on_llm_error( - self, - error: BaseException, - *, - run_id: UUID, - parent_run_id: UUID | None = None, - **kwargs: Any, - ) -> None: - span = self._spans.pop(run_id, None) - if span is None: - return - span.output = {"error": str(error)} - await self._tracing.end_span(trace_id=self._trace_id, span=span) - - # ------------------------------------------------------------------ - # Tool lifecycle - # ------------------------------------------------------------------ - - @override - async def on_tool_start( - self, - serialized: dict[str, Any], - input_str: str, - *, - run_id: UUID, - parent_run_id: UUID | None = None, - tags: list[str] | None = None, - metadata: dict[str, Any] | None = None, - inputs: dict[str, Any] | None = None, - **kwargs: Any, - ) -> None: - tool_name = serialized.get("name", "") or serialized.get("id", [""])[-1] - span = await self._tracing.start_span( - trace_id=self._trace_id, - name=f"tool:{tool_name}" if tool_name else "tool", - input={"input": input_str}, - parent_id=self._parent_span_id, - data={"__span_type__": "CUSTOM"}, - ) - if span: - self._spans[run_id] = span - - @override - async def on_tool_end( - self, - output: str, - *, - run_id: UUID, - parent_run_id: UUID | None = None, - **kwargs: Any, - ) -> None: - span = self._spans.pop(run_id, None) - if span is None: - return - span.output = {"output": output} - await self._tracing.end_span(trace_id=self._trace_id, span=span) - - @override - async def on_tool_error( - self, - error: BaseException, - *, - run_id: UUID, - parent_run_id: UUID | None = None, - **kwargs: Any, - ) -> None: - span = self._spans.pop(run_id, None) - if span is None: - return - span.output = {"error": str(error)} - await self._tracing.end_span(trace_id=self._trace_id, span=span) - - -# ------------------------------------------------------------------ -# Helpers -# ------------------------------------------------------------------ - - -def _extract_model_name(serialized: dict[str, Any]) -> str: - """Best-effort model name extraction from the serialized callback dict.""" - kwargs = serialized.get("kwargs", {}) - return kwargs.get("model_name", "") or kwargs.get("model", "") - - -def _serialize_messages(messages: list[list[BaseMessage]]) -> dict[str, Any]: - """Serialize LangChain messages into a JSON-safe dict for the span input.""" - result: list[dict[str, Any]] = [] - for batch in messages: - for msg in batch: - entry: dict[str, Any] = {"type": msg.type, "content": msg.content} - tool_calls = getattr(msg, "tool_calls", None) - if tool_calls: - entry["tool_calls"] = tool_calls - result.append(entry) - return {"messages": result} - - -def _serialize_llm_result(response: LLMResult) -> dict[str, Any]: - """Serialize an LLMResult into a JSON-safe dict for the span output.""" - output: dict[str, Any] = {} - if response.generations: - last_gen = response.generations[-1] - if last_gen: - gen = last_gen[-1] - msg = getattr(gen, "message", None) - - # For reasoning models, content is a list of typed blocks. - # Extract text from the blocks instead of relying on gen.text. - if msg and isinstance(msg.content, list): - text_parts: list[str] = [] - for block in msg.content: - if isinstance(block, dict): - if block.get("type") == "text": - text_parts.append(block.get("text", "")) - output["content"] = "".join(text_parts) if text_parts else gen.text - else: - output["content"] = gen.text - - if msg and hasattr(msg, "tool_calls") and msg.tool_calls: - output["tool_calls"] = [{"name": tc["name"], "args": tc["args"]} for tc in msg.tool_calls] - return output - - -def create_langgraph_tracing_handler( - trace_id: str, - parent_span_id: str | None = None, -) -> AgentexLangGraphTracingHandler: - """Create a LangChain callback handler that records Agentex tracing spans. - - Pass the returned handler to LangGraph via ``config={"callbacks": [handler]}``. - - Args: - trace_id: The trace ID (typically the task/thread ID). - parent_span_id: Optional parent span ID to nest LLM/tool spans under. - - Returns: - An ``AgentexLangGraphTracingHandler`` instance ready to use as a LangChain callback. - - .. deprecated:: - Use ``LangGraphTurn`` with ``UnifiedEmitter`` instead. The unified harness - derives equivalent spans from the canonical event stream automatically, with - no LangChain callback required:: - - from agentex.lib.core.harness.emitter import UnifiedEmitter - from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn - - turn = LangGraphTurn(stream) - emitter = UnifiedEmitter(task_id=task_id, trace_id=trace_id, parent_span_id=span_id) - result = await emitter.auto_send_turn(turn) - - This function remains available for backward compatibility. - """ - return AgentexLangGraphTracingHandler( - trace_id=trace_id, - parent_span_id=parent_span_id, - ) diff --git a/src/agentex/lib/adk/_modules/_langgraph_turn.py b/src/agentex/lib/adk/_modules/_langgraph_turn.py index da8ff0e7c..a6e290e1b 100644 --- a/src/agentex/lib/adk/_modules/_langgraph_turn.py +++ b/src/agentex/lib/adk/_modules/_langgraph_turn.py @@ -4,9 +4,9 @@ ``langgraph_usage_to_turn_usage`` helper that maps LangGraph's ``AIMessage.usage_metadata`` onto the framework-agnostic ``TurnUsage`` model. -AGX1-377 note: LangGraph emits tool requests as ``StreamTaskMessageFull`` events -(from "updates" events), NOT Start+Delta+Done like pydantic-ai. ``auto_send`` -handles Full events correctly; no coalescing wrapper is needed. +LangGraph emits tool requests as ``StreamTaskMessageFull`` events (from +"updates" events), NOT Start+Delta+Done like pydantic-ai. ``auto_send`` handles +Full events correctly; no coalescing wrapper is needed. """ from __future__ import annotations @@ -14,6 +14,7 @@ from typing import Any, AsyncIterator from collections.abc import AsyncGenerator +from agentex.lib.utils.temporal import workflow_now_if_in_workflow from agentex.lib.core.harness.types import TurnUsage, StreamTaskMessage from agentex.lib.adk._modules._langgraph_sync import convert_langgraph_to_agentex_events @@ -111,9 +112,9 @@ class LangGraphTurn: # Async / temporal result = await emitter.auto_send_turn(turn) - AGX1-377 note: LangGraph tool requests are ``StreamTaskMessageFull`` (from - "updates"), NOT Start+Delta+Done like pydantic-ai. No ``coalesce_tool_requests`` - option is needed. + LangGraph tool requests are ``StreamTaskMessageFull`` (from "updates"), NOT + Start+Delta+Done like pydantic-ai. No ``coalesce_tool_requests`` option is + needed. Usage data is captured lazily via the ``on_final_ai_message`` callback and is only valid after ``events`` has been fully consumed. Multi-step turns @@ -150,3 +151,50 @@ def usage(self) -> TurnUsage: did not report usage. """ return self._usage + + +async def stream_langgraph_events(stream, task_id: str) -> str: + """Stream LangGraph events to Agentex via Redis. + + Converts LangGraph ``graph.astream()`` events into Agentex streaming + updates and pushes them to Redis via ``adk.streaming`` contexts. For use + with async ACP agents that stream via Redis rather than HTTP yields. + + Processes the stream from graph.astream() called with + stream_mode=["messages", "updates"] and pushes text, reasoning, + tool request, and tool response messages through Redis streaming + contexts. + + Supports both regular models (chunk.content is a str) and reasoning + models like gpt-5/o1/o3 (chunk.content is a list of typed content blocks + in the Responses API responses/v1 format). + + Implemented on ``UnifiedEmitter.auto_send_turn(LangGraphTurn(...))`` for + cross-harness consistency, the same surface used by every other harness + adapter (pydantic-ai, openai-agents, etc.). The public signature and + return type are preserved identically. + + LangGraph emits tool requests as ``Full`` events (from "updates"), NOT + Start+Delta+Done like pydantic-ai. ``auto_send`` handles Full events + correctly; no coalescing wrapper is needed. + + ``created_at`` is set from ``workflow.now()`` when called inside a + Temporal workflow, matching the pattern used by the openai/litellm providers. + Outside a workflow (plain async activities, sync agents) it is ``None`` and the + server's wall clock is used. + + Args: + stream: Async iterator from graph.astream(..., stream_mode=["messages", "updates"]) + task_id: The Agentex task ID to stream messages to. + + Returns: + The accumulated final text output from the agent. + """ + from agentex.lib.core.harness.emitter import UnifiedEmitter + + # Stamp messages with workflow.now() inside Temporal for deterministic + # created_at ordering; falls back to None (server wall clock) outside a workflow. + turn = LangGraphTurn(stream, model=None) + emitter = UnifiedEmitter(task_id=task_id, trace_id=None, parent_span_id=None) + result = await emitter.auto_send_turn(turn, created_at=workflow_now_if_in_workflow()) + return result.final_text diff --git a/src/agentex/lib/adk/_modules/_openai_sync.py b/src/agentex/lib/adk/_modules/_openai_sync.py new file mode 100644 index 000000000..f16022200 --- /dev/null +++ b/src/agentex/lib/adk/_modules/_openai_sync.py @@ -0,0 +1,358 @@ +"""Sync OpenAI Agents SDK streaming tap for Agentex. + +Converts an OpenAI Agents SDK streamed run (``Runner.run_streamed(...)`` +``stream_events()``) into Agentex ``StreamTaskMessage*`` events, including +reasoning content and reasoning summary deltas for reasoning models (o1/o3/gpt-5). + +This is the lower-level primitive used by ``OpenAITurn`` (in +``_openai_turn.py``). New OpenAI Agents integrations should prefer wrapping a +``Runner.run_streamed`` result in ``OpenAITurn`` and driving delivery + tracing +through ``UnifiedEmitter``. +""" + +from __future__ import annotations + +from typing import Any + +from openai.types.responses import ( + ResponseTextDeltaEvent, + ResponseFunctionToolCall, + ResponseFunctionWebSearch, + ResponseOutputItemDoneEvent, + ResponseOutputItemAddedEvent, + ResponseCodeInterpreterToolCall, + ResponseReasoningSummaryPartAddedEvent, + ResponseReasoningSummaryTextDeltaEvent, +) +from openai.types.responses.response_reasoning_text_done_event import ResponseReasoningTextDoneEvent +from openai.types.responses.response_reasoning_text_delta_event import ResponseReasoningTextDeltaEvent +from openai.types.responses.response_reasoning_summary_text_done_event import ResponseReasoningSummaryTextDoneEvent + +from agentex.types.task_message_delta import TextDelta +from agentex.types.task_message_update import ( + StreamTaskMessageDone, + StreamTaskMessageFull, + StreamTaskMessageDelta, + StreamTaskMessageStart, +) +from agentex.types.task_message_content import TextContent +from agentex.types.tool_request_content import ToolRequestContent +from agentex.types.tool_response_content import ToolResponseContent +from agentex.types.reasoning_content_delta import ReasoningContentDelta +from agentex.types.reasoning_summary_delta import ReasoningSummaryDelta + + +def _extract_tool_call_info(tool_call_item: Any) -> tuple[str, str, dict[str, Any]]: + """ + Extract call_id, tool_name, and tool_arguments from a tool call item. + Args: + tool_call_item: The tool call item to process + Returns: + A tuple of (call_id, tool_name, tool_arguments) + """ + # Generic handling for different tool call types + # Try 'call_id' first, then 'id', then generate placeholder + if hasattr(tool_call_item, "call_id"): + call_id = tool_call_item.call_id + elif hasattr(tool_call_item, "id"): + call_id = tool_call_item.id + else: + call_id = f"unknown_call_{id(tool_call_item)}" + + if isinstance(tool_call_item, ResponseFunctionWebSearch): + tool_name = "web_search" + tool_arguments = {"action": tool_call_item.action.model_dump(), "status": tool_call_item.status} + elif isinstance(tool_call_item, ResponseCodeInterpreterToolCall): + tool_name = "code_interpreter" + tool_arguments = {"code": tool_call_item.code, "status": tool_call_item.status} + elif isinstance(tool_call_item, ResponseFunctionToolCall): + # Handle standard function tool calls + tool_name = tool_call_item.name + # Handle the arguments field which might be a string or None + if tool_call_item.arguments: + if isinstance(tool_call_item.arguments, str): + import json + + tool_arguments = json.loads(tool_call_item.arguments) if tool_call_item.arguments else {} + else: + tool_arguments = tool_call_item.arguments + else: + tool_arguments = {} + else: + # Generic handling for any tool call type + tool_name = getattr(tool_call_item, "name", type(tool_call_item).__name__) + # Handle the arguments field which might be a string or None + if hasattr(tool_call_item, "arguments"): + arguments = tool_call_item.arguments + if isinstance(arguments, str): + import json + + tool_arguments = json.loads(arguments) if arguments else {} + elif arguments is None: + tool_arguments = {} + else: + tool_arguments = arguments + else: + tool_arguments = tool_call_item.model_dump() + + return call_id, tool_name, tool_arguments + + +def _extract_tool_response_info(tool_map: dict[str, Any], tool_output_item: Any) -> tuple[str, str, str]: + """ + Extract call_id, tool_name, and content from a tool output item. + Args: + tool_map: Dictionary mapping call_ids to tool names + tool_output_item: The tool output item to process + Returns: + A tuple of (call_id, tool_name, content) + """ + + # Handle different formats of tool_output_item + if isinstance(tool_output_item, dict): + call_id = tool_output_item.get("call_id", tool_output_item.get("id", f"unknown_call_{id(tool_output_item)}")) + content = tool_output_item.get("output", str(tool_output_item)) + else: + # Try to get call_id from attributes + if hasattr(tool_output_item, "call_id"): + call_id = tool_output_item.call_id + elif hasattr(tool_output_item, "id"): + call_id = tool_output_item.id + else: + call_id = f"unknown_call_{id(tool_output_item)}" + + # Get content + if hasattr(tool_output_item, "output"): + content = tool_output_item.output + else: + content = str(tool_output_item) + + # Get tool name from map + tool_name = tool_map.get(call_id, "unknown_tool") + + return call_id, tool_name, content + + +async def convert_openai_to_agentex_events(stream_response): + """Convert OpenAI streaming events to AgentEx TaskMessageUpdate events with reasoning support. + + This is an enhanced version of the base converter that includes support for: + - Reasoning content deltas (for o1 models) + - Reasoning summary deltas (for o1 models) + + Args: + stream_response: An async iterator of OpenAI streaming events + Yields: + TaskMessageUpdate: AgentEx streaming events (StreamTaskMessageDelta, StreamTaskMessageFull, or StreamTaskMessageDone) + """ + + tool_map = {} + event_count = 0 + message_index = 0 # Track message index for proper sequencing + seen_tool_output = False # Track if we've seen tool output to know when final text starts + item_id_to_index = {} # Map item_id to message index + item_id_to_type = {} # Map item_id to content type (text, reasoning_content, reasoning_summary) + + async for event in stream_response: + event_count += 1 + + # Check for raw response events which contain the actual OpenAI streaming events + if hasattr(event, "type") and event.type == "raw_response_event": + if hasattr(event, "data"): + raw_event = event.data + + # Check for ResponseOutputItemAddedEvent which signals a new message starting + if isinstance(raw_event, ResponseOutputItemAddedEvent): + # Don't increment here - we'll increment when we see the actual text delta + # This is just a signal that a new message is starting + pass + + # Handle item completion - send done event to close the message + elif isinstance(raw_event, ResponseOutputItemDoneEvent): + item_id = raw_event.item.id + if item_id in item_id_to_index: + # Get the message type to decide whether to send done event + message_type = item_id_to_type.get(item_id, "text") + + # Don't send done events for reasoning content/summary + # They just end with their last delta + if message_type not in ("reasoning_content", "reasoning_summary"): + yield StreamTaskMessageDone( + type="done", + index=item_id_to_index[item_id], + ) + + # Skip reasoning summary part added events - we handle them on delta + elif isinstance(raw_event, ResponseReasoningSummaryPartAddedEvent): + pass + + # Handle reasoning summary text delta events + elif isinstance(raw_event, ResponseReasoningSummaryTextDeltaEvent): + item_id = raw_event.item_id + summary_index = raw_event.summary_index + + # If this is a new item_id we haven't seen, create a new message + if item_id and item_id not in item_id_to_index: + message_index += 1 + item_id_to_index[item_id] = message_index + item_id_to_type[item_id] = "reasoning_summary" + + # Send a start event for this new reasoning summary message + yield StreamTaskMessageStart( + type="start", + index=item_id_to_index[item_id], + content=TextContent( + type="text", + author="agent", + content="", # Start with empty content + ), + ) + + # Use the index for this item_id + current_index = item_id_to_index.get(item_id, message_index) + + # Yield reasoning summary delta + yield StreamTaskMessageDelta( + type="delta", + index=current_index, + delta=ReasoningSummaryDelta( + type="reasoning_summary", + summary_index=summary_index, + summary_delta=raw_event.delta, + ), + ) + + # Handle reasoning summary text done events + elif isinstance(raw_event, ResponseReasoningSummaryTextDoneEvent): + # We do NOT close the streaming context here + # as there can be multiple reasoning summaries. + # The context will be closed when the entire + # output item is done (ResponseOutputItemDoneEvent) + pass + + # Handle reasoning content text delta events + elif isinstance(raw_event, ResponseReasoningTextDeltaEvent): + item_id = raw_event.item_id + content_index = raw_event.content_index + + # If this is a new item_id we haven't seen, create a new message + if item_id and item_id not in item_id_to_index: + message_index += 1 + item_id_to_index[item_id] = message_index + item_id_to_type[item_id] = "reasoning_content" + + # Send a start event for this new reasoning content message + yield StreamTaskMessageStart( + type="start", + index=item_id_to_index[item_id], + content=TextContent( + type="text", + author="agent", + content="", # Start with empty content + ), + ) + + # Use the index for this item_id + current_index = item_id_to_index.get(item_id, message_index) + + # Yield reasoning content delta + yield StreamTaskMessageDelta( + type="delta", + index=current_index, + delta=ReasoningContentDelta( + type="reasoning_content", + content_index=content_index, + content_delta=raw_event.delta, + ), + ) + + # Handle reasoning content text done events + elif isinstance(raw_event, ResponseReasoningTextDoneEvent): + # We do NOT close the streaming context here + # as there can be multiple reasoning content texts. + # The context will be closed when the entire + # output item is done (ResponseOutputItemDoneEvent) + pass + + # Check if this is a text delta event from OpenAI + elif isinstance(raw_event, ResponseTextDeltaEvent): + # Check if this event has an item_id + item_id = getattr(raw_event, "item_id", None) + + # If this is a new item_id we haven't seen, it's a new message + if item_id and item_id not in item_id_to_index: + # Check if this is truly a NEW text message after tools + # We need to differentiate between the first text and the final text after tools + if seen_tool_output: + # This is the final text message after tool execution + message_index += 1 + item_id_to_index[item_id] = message_index + else: + item_id_to_index[item_id] = message_index + + item_id_to_type[item_id] = "text" + + # Send a start event with empty content for this new text message + yield StreamTaskMessageStart( + type="start", + index=item_id_to_index[item_id], + content=TextContent( + type="text", + author="agent", + content="", # Start with empty content, deltas will fill it + ), + ) + + # Use the index for this item_id + current_index = item_id_to_index.get(item_id, message_index) + + delta_message = StreamTaskMessageDelta( + type="delta", + index=current_index, + delta=TextDelta( + type="text", + text_delta=raw_event.delta, + ), + ) + yield delta_message + + elif hasattr(event, "type") and event.type == "run_item_stream_event": + # Skip reasoning_item events - they're handled via raw_response_event above + if hasattr(event, "item") and event.item.type == "reasoning_item": + continue + + # Check for tool_call_item type (this is when a tool is being called) + elif hasattr(event, "item") and event.item.type == "tool_call_item": + # Extract tool call information using the helper method + call_id, tool_name, tool_arguments = _extract_tool_call_info(event.item.raw_item) + tool_map[call_id] = tool_name + tool_request_content = ToolRequestContent( + tool_call_id=call_id, + name=tool_name, + arguments=tool_arguments, + author="agent", + ) + message_index += 1 # Increment for new message + yield StreamTaskMessageFull( + index=message_index, + type="full", + content=tool_request_content, + ) + + # Check for tool_call_output_item type (this is when a tool returns output) + elif hasattr(event, "item") and event.item.type == "tool_call_output_item": + # Extract tool response information using the helper method + call_id, tool_name, content = _extract_tool_response_info(tool_map, event.item.raw_item) + tool_response_content = ToolResponseContent( + tool_call_id=call_id, + name=tool_name, + content=content, + author="agent", + ) + message_index += 1 # Increment for new message + seen_tool_output = True # Mark that we've seen tool output so next text gets new index + yield StreamTaskMessageFull( + type="full", + index=message_index, + content=tool_response_content, + ) diff --git a/src/agentex/lib/adk/_modules/_openai_turn.py b/src/agentex/lib/adk/_modules/_openai_turn.py new file mode 100644 index 000000000..cfb1ce22d --- /dev/null +++ b/src/agentex/lib/adk/_modules/_openai_turn.py @@ -0,0 +1,134 @@ +"""OpenAITurn: adapt an OpenAI Agents SDK streamed run onto the harness surface. + +A ``HarnessTurn`` exposes a single canonical ``StreamTaskMessage*`` stream plus +normalized usage. ``OpenAITurn`` wraps a ``RunResultStreaming`` (from +``Runner.run_streamed``), converts its native OpenAI events into the canonical +stream via ``convert_openai_to_agentex_events``, and after exhaustion reads the +run's ``raw_responses`` to aggregate usage into a provider-independent +``TurnUsage``. + +Delivery (yield vs auto-send) and tracing are owned by ``UnifiedEmitter``; this +module is purely the provider->canonical adapter. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, AsyncIterator + +from agents.usage import Usage + +from agentex.lib.utils.logging import make_logger +from agentex.lib.core.harness.types import TurnUsage, StreamTaskMessage +from agentex.lib.adk._modules._openai_sync import ( + convert_openai_to_agentex_events, +) + +if TYPE_CHECKING: + from agents import ModelResponse, RunResultStreaming + +logger = make_logger(__name__) + + +def openai_usage_to_turn_usage(usage: Usage | None, model: str | None) -> TurnUsage: + """Map an ``agents.Usage`` to a harness-independent ``TurnUsage``. + + All field access is defensive (``getattr(..., None)``): different model + backends populate different subsets of the usage object, and real zeros are + valid values (e.g. 0 output tokens on a pure cache hit), so we never coerce + a present-but-zero value into ``None``. + """ + if usage is None: + return TurnUsage(model=model) + + input_details = getattr(usage, "input_tokens_details", None) + output_details = getattr(usage, "output_tokens_details", None) + + return TurnUsage( + model=model, + num_llm_calls=getattr(usage, "requests", None) or 0, + input_tokens=getattr(usage, "input_tokens", None), + cached_input_tokens=getattr(input_details, "cached_tokens", None), + output_tokens=getattr(usage, "output_tokens", None), + reasoning_tokens=getattr(output_details, "reasoning_tokens", None), + total_tokens=getattr(usage, "total_tokens", None), + ) + + +def _aggregate_usage(raw_responses: list[ModelResponse]) -> Usage | None: + """Sum the per-response ``Usage`` across a run's ``ModelResponse`` list. + + Returns ``None`` when no response carries usage so the caller can emit a + usage object with only the model name set. ``Usage.add`` accumulates + requests/tokens (including cached/reasoning detail fields). + """ + total: Usage | None = None + for response in raw_responses: + resp_usage = getattr(response, "usage", None) + if resp_usage is None: + continue + if total is None: + total = Usage() + total.add(resp_usage) + return total + + +class OpenAITurn: + """A single OpenAI Agents SDK turn adapted to the ``HarnessTurn`` protocol. + + Construct with exactly one of: + - ``result``: a ``RunResultStreaming`` from ``Runner.run_streamed``. Its + ``stream_events()`` is converted to the canonical stream, and after the + stream is exhausted ``raw_responses`` is read to compute usage. + - ``stream``: a pre-built async iterator of canonical ``StreamTaskMessage`` + events (bypasses ``convert_openai_to_agentex_events``). Useful for tests + and for callers that have already produced canonical events. Usage stays + at ``TurnUsage(model=...)`` because there is no run to read usage from. + + ``coalesce_tool_requests`` is accepted for API parity with other provider + turns but is a no-op for OpenAI: the OpenAI converter already emits a single + ``Full(ToolRequestContent)`` per tool call rather than streamed argument + deltas, so there is nothing to coalesce. + """ + + def __init__( + self, + result: RunResultStreaming | None = None, + model: str | None = None, + stream: AsyncIterator[StreamTaskMessage] | None = None, + coalesce_tool_requests: bool = False, # noqa: ARG002 - API parity, no-op for OpenAI + ) -> None: + if result is None and stream is None: + raise ValueError("OpenAITurn requires either `result` or `stream`") + self._result = result + self._model = model + self._stream = stream + self._usage: TurnUsage = TurnUsage(model=model) + + @property + def events(self) -> AsyncIterator[StreamTaskMessage]: + return self._iter_events() + + async def _iter_events(self) -> AsyncIterator[StreamTaskMessage]: + if self._stream is not None: + async for event in self._stream: + yield event + return + + result = self._result + assert result is not None # guaranteed by __init__ + async for event in convert_openai_to_agentex_events(result.stream_events()): + yield event + + # Stream is exhausted: the run has finished and raw_responses is now + # populated, so usage can be aggregated and normalized. + try: + raw_responses: list[Any] = list(getattr(result, "raw_responses", None) or []) + aggregated = _aggregate_usage(raw_responses) + self._usage = openai_usage_to_turn_usage(aggregated, self._model) + except Exception as exc: # pragma: no cover - defensive: never break delivery on usage + logger.warning(f"Failed to aggregate OpenAI usage: {exc}") + self._usage = TurnUsage(model=self._model) + + def usage(self) -> TurnUsage: + """Normalized turn usage. Valid only after ``events`` is exhausted.""" + return self._usage diff --git a/src/agentex/lib/adk/_modules/_pydantic_ai_async.py b/src/agentex/lib/adk/_modules/_pydantic_ai_async.py deleted file mode 100644 index 85abfb845..000000000 --- a/src/agentex/lib/adk/_modules/_pydantic_ai_async.py +++ /dev/null @@ -1,65 +0,0 @@ -"""Async Pydantic AI streaming helper for Agentex. - -Consumes a Pydantic AI ``agent.run_stream_events(...)`` async iterator and -pushes Agentex streaming updates to Redis via the ``adk.streaming`` -contexts. For use with async ACP agents that stream via Redis rather than -HTTP yields. - -Text and thinking tokens stream as deltas inside coalesced streaming -contexts. Tool requests and tool results are posted as open+close pairs -on a streaming context (the unified surface persists ``initial_content`` -when a context is closed without deltas). This matches the ``auto_send`` -convention used by all other async/Temporal harnesses. - -Tracing is opt-in via a ``tracing_handler`` parameter — see -``create_pydantic_ai_tracing_handler`` in -``agentex.lib.adk._modules._pydantic_ai_tracing``. -""" - -from __future__ import annotations - -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from agentex.lib.adk._modules._pydantic_ai_tracing import ( - AgentexPydanticAITracingHandler, - ) - - -async def stream_pydantic_ai_events( - stream, - task_id: str, - tracing_handler: "AgentexPydanticAITracingHandler | None" = None, -) -> str: - """Stream Pydantic AI events to Agentex via Redis. - - Args: - stream: Async iterator yielded by ``agent.run_stream_events(...)``. - task_id: The Agentex task ID to stream messages to. - tracing_handler: Optional handler from - ``create_pydantic_ai_tracing_handler(...)``. When provided, each - tool call in the run is also recorded as an Agentex child span - beneath the handler's configured ``parent_span_id``. Streaming - behavior is unchanged when omitted. - - Returns: - The accumulated text content of the **last** text part in the run. - Multi-step runs (where the model emits text, then a tool call, then - more text) return only the final text segment, matching the - ``stream_langgraph_events`` convention. - """ - from agentex.lib.core.harness.emitter import UnifiedEmitter - from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn - - turn = PydanticAITurn( - stream, - model=None, - tracing_handler=tracing_handler, - ) - emitter = UnifiedEmitter( - task_id=task_id, - trace_id=None, - parent_span_id=None, - ) - result = await emitter.auto_send_turn(turn) - return result.final_text diff --git a/src/agentex/lib/adk/_modules/_pydantic_ai_sync.py b/src/agentex/lib/adk/_modules/_pydantic_ai_sync.py index e4ac31e7e..0f9aaeb55 100644 --- a/src/agentex/lib/adk/_modules/_pydantic_ai_sync.py +++ b/src/agentex/lib/adk/_modules/_pydantic_ai_sync.py @@ -41,14 +41,9 @@ async def handle_message_send(params): import json import inspect -from typing import TYPE_CHECKING, Any, Callable, AsyncIterator +from typing import Any, Callable, AsyncIterator from pydantic_ai.run import AgentRunResultEvent - -if TYPE_CHECKING: - from agentex.lib.adk._modules._pydantic_ai_tracing import ( - AgentexPydanticAITracingHandler, - ) from pydantic_ai.messages import ( TextPart, PartEndEvent, @@ -124,7 +119,6 @@ def _tool_return_content(result: ToolReturnPart | Any) -> Any: async def convert_pydantic_ai_to_agentex_events( stream_response: AsyncIterator[Any], - tracing_handler: "AgentexPydanticAITracingHandler | None" = None, on_result: Callable[[AgentRunResultEvent], Any] | None = None, ) -> AsyncIterator[StreamTaskMessageStart | StreamTaskMessageDelta | StreamTaskMessageFull | StreamTaskMessageDone]: """Convert a Pydantic AI agent event stream into Agentex stream events. @@ -148,11 +142,6 @@ async def convert_pydantic_ai_to_agentex_events( stream_response: The async iterator yielded by Pydantic AI's ``agent.run_stream_events(...)`` context manager (or a stream of ``AgentStreamEvent`` items received in an ``event_stream_handler``). - tracing_handler: Optional handler from - ``create_pydantic_ai_tracing_handler(...)``. When provided, each - tool call in the run is also recorded as an Agentex child span - beneath the handler's configured ``parent_span_id``. Streaming - behavior is unchanged when omitted. on_result: Optional callback invoked with the terminal ``AgentRunResultEvent`` when the run completes. Both sync and async callables are accepted. No ``StreamTaskMessage*`` events are @@ -306,26 +295,6 @@ async def convert_pydantic_ai_to_agentex_events( if message_index is None: continue yield StreamTaskMessageDone(type="done", index=message_index) - # Tool-call parts end with the model's full args known. Open a - # tracing child span for the tool execution now; close it when - # FunctionToolResultEvent arrives below. - if tracing_handler is not None and isinstance(event.part, ToolCallPart) and event.part.tool_call_id: - args: dict[str, Any] | str | None - raw_args = event.part.args - if isinstance(raw_args, dict): - args = dict(raw_args) - elif isinstance(raw_args, str): - try: - args = json.loads(raw_args) if raw_args else {} - except json.JSONDecodeError: - args = {"_raw": raw_args} - else: - args = {} - await tracing_handler.on_tool_start( - tool_call_id=event.part.tool_call_id, - tool_name=event.part.tool_name, - arguments=args, - ) elif isinstance(event, FunctionToolResultEvent): result = event.part @@ -345,11 +314,6 @@ async def convert_pydantic_ai_to_agentex_events( content=content_payload, ), ) - if tracing_handler is not None and tool_call_id: - await tracing_handler.on_tool_end( - tool_call_id=tool_call_id, - result=content_payload, - ) elif isinstance(event, (FunctionToolCallEvent, FinalResultEvent, AgentRunResultEvent)): # Already covered by PartStart/PartDelta/PartEnd events above, or diff --git a/src/agentex/lib/adk/_modules/_pydantic_ai_tracing.py b/src/agentex/lib/adk/_modules/_pydantic_ai_tracing.py deleted file mode 100644 index e199d0a8c..000000000 --- a/src/agentex/lib/adk/_modules/_pydantic_ai_tracing.py +++ /dev/null @@ -1,221 +0,0 @@ -"""Tracing handler that records Agentex spans for tool calls in a pydantic-ai agent run. - -.. deprecated:: - ``AgentexPydanticAITracingHandler`` and ``create_pydantic_ai_tracing_handler`` - are superseded by the unified harness surface (``UnifiedEmitter`` in - ``agentex.lib.core.harness``). The unified surface derives tool and - reasoning spans directly from the canonical ``StreamTaskMessage*`` stream, - so no separate handler is required. Both symbols remain fully importable - and functional; they will be removed in a future release. New code should - construct a ``UnifiedEmitter`` with a ``trace_id`` instead: - - from agentex.lib.core.harness import UnifiedEmitter - from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn - - emitter = UnifiedEmitter(task_id=task_id, trace_id=trace_id, parent_span_id=parent_span_id) - turn = PydanticAITurn(agent.run_stream_events(prompt), model="openai:gpt-4o") - async for event in emitter.yield_turn(turn): - yield event - -# NOTE: A runtime ``warnings.warn(..., DeprecationWarning)`` is intentionally -# omitted here. The repo's pyproject ``filterwarnings = ["error"]`` would turn -# it into a test/caller failure, and the async helper (``stream_pydantic_ai_events``) -# still threads this handler through for existing callers that lack a ``trace_id`` -# on the async path. The runtime warning and caller migration are deferred until -# ``trace_id`` threading lands on the async helper in a future API-versioning change. - -Mirrors the LangGraph tracing handler pattern: the caller creates a handler -bound to a ``trace_id`` and a ``parent_span_id``, then hands it to -``stream_pydantic_ai_events(..., tracing_handler=handler)``. The streamer -calls ``on_tool_start`` / ``on_tool_end`` as it observes the corresponding -events in the agent stream, and the handler records one Agentex child span -per tool call. - -Why a handler-on-the-streamer rather than an OpenTelemetry bridge: -pydantic-ai exposes its stream of ``AgentStreamEvent`` directly, and that -stream already contains every signal we need to record tool spans. Going -through an OTel processor would require setting up an OTel ``TracerProvider`` -plus a bridge processor — that's a much larger investment, and orthogonal -to the streaming path we already own. This handler hooks into the same -event stream the UI-streaming helper consumes, so a single pass over the -events produces both: live deltas on Redis and child spans on the AgentEx -tracing pipeline. - -Why span IDs are derived from ``tool_call_id`` instead of held in a dict: -pydantic-ai's ``TemporalAgent`` splits the agent run across one or more -Temporal activities. The ``event_stream_handler`` is invoked once per -activity, with a fresh handler instance each time. So ``on_tool_start`` -(emitted inside the model activity that issued the tool call) and -``on_tool_end`` (emitted inside the next model activity, after the tool -runs) land in different handler instances — an in-memory dict can't pair -them. Deriving the span ID deterministically from ``(trace_id, -tool_call_id)`` makes the open/close pairing stateless: ``on_tool_end`` -re-derives the same ID and PATCHes the existing span directly. - -Span hierarchy produced:: - - (e.g. "Turn N", created by the caller) - ├── tool: (one child span per tool call) - └── tool: -""" - -from __future__ import annotations - -import uuid -from typing import Any -from datetime import UTC, datetime - -from agentex import AsyncAgentex -from agentex.lib.utils.logging import make_logger -from agentex.lib.adk._modules.tracing import TracingModule -from agentex.lib.adk.utils._modules.client import create_async_agentex_client - -logger = make_logger(__name__) - - -# Stable namespace for deriving tool-call span IDs. The exact UUID value is -# arbitrary; it just needs to be a constant so the same (trace_id, tool_call_id) -# always maps to the same span ID across handler invocations. -_TOOL_SPAN_NAMESPACE = uuid.UUID("8c2f9a2b-3e4d-4b5a-9c1f-0a1b2c3d4e5f") - - -def _tool_span_id(trace_id: str, tool_call_id: str) -> str: - """Deterministic span ID for a given tool call within a trace.""" - return str(uuid.uuid5(_TOOL_SPAN_NAMESPACE, f"{trace_id}:{tool_call_id}")) - - -class AgentexPydanticAITracingHandler: - """Records Agentex tracing spans for tool calls observed in a pydantic-ai event stream. - - .. deprecated:: - Superseded by ``UnifiedEmitter`` (``agentex.lib.core.harness``), which - derives tool and reasoning spans from the canonical ``StreamTaskMessage*`` - stream automatically when ``trace_id`` is provided. This class remains - fully functional but will be removed in a future release. New code should - use ``UnifiedEmitter`` with a trace context instead of constructing this - handler directly. - - Pass an instance to ``stream_pydantic_ai_events(..., tracing_handler=...)`` - or call ``on_tool_start`` / ``on_tool_end`` yourself if you're consuming - the event stream by hand. - """ - - def __init__( - self, - trace_id: str, - parent_span_id: str | None = None, - task_id: str | None = None, - tracing: TracingModule | None = None, - client: AsyncAgentex | None = None, - ) -> None: - self._trace_id = trace_id - self._parent_span_id = parent_span_id - # task_id on the span record (separate from trace_id) is what the - # AgentEx UI's per-task spans dropdown filters by. If you want your - # tool spans visible in that dropdown, set this to the task ID. - self._task_id = task_id - # ``_tracing`` is retained for callers / tests that want to inject a - # mocked TracingModule, even though the on_tool_* methods now go - # direct to the AgentEx client (see module docstring for why). - self._tracing_eager = tracing - self._tracing_lazy: TracingModule | None = None - # Defer client construction until first use so httpx binds to the - # running event loop (matches the TracingModule pattern). - self._client_eager = client - self._client_lazy: AsyncAgentex | None = None - - @property - def _tracing(self) -> TracingModule: - if self._tracing_eager is not None: - return self._tracing_eager - if self._tracing_lazy is None: - self._tracing_lazy = TracingModule() - return self._tracing_lazy - - @property - def _client(self) -> AsyncAgentex: - if self._client_eager is not None: - return self._client_eager - if self._client_lazy is None: - self._client_lazy = create_async_agentex_client() - return self._client_lazy - - async def on_tool_start( - self, - tool_call_id: str, - tool_name: str, - arguments: dict[str, Any] | str | None, - ) -> None: - """Open a child span for a tool call. - - Uses a deterministic span ID derived from ``tool_call_id`` so that - ``on_tool_end`` — which may run inside a different handler instance - when pydantic-ai splits the run across Temporal activities — can - close the same span without needing in-memory state. - """ - span_id = _tool_span_id(self._trace_id, tool_call_id) - await self._client.spans.create( - id=span_id, - trace_id=self._trace_id, - task_id=self._task_id, - parent_id=self._parent_span_id, - name=f"tool:{tool_name}" if tool_name else "tool", - start_time=datetime.now(UTC), - input={"arguments": arguments}, - data={"__span_type__": "CUSTOM"}, - ) - - async def on_tool_end(self, tool_call_id: str, result: Any) -> None: - """Close a child span by PATCHing its end_time and output. - - Re-derives the deterministic span ID from ``tool_call_id`` and updates - the existing span record directly. No in-memory span lookup, so this - works even when ``on_tool_start`` ran inside a different handler - instance (e.g. across pydantic-ai TemporalAgent activity boundaries). - """ - span_id = _tool_span_id(self._trace_id, tool_call_id) - await self._client.spans.update( - span_id, - end_time=datetime.now(UTC), - output={"result": result}, - ) - - async def on_tool_error(self, tool_call_id: str, error: BaseException | str) -> None: - """Close a child span with an error payload as output.""" - span_id = _tool_span_id(self._trace_id, tool_call_id) - await self._client.spans.update( - span_id, - end_time=datetime.now(UTC), - output={"error": str(error)}, - ) - - -def create_pydantic_ai_tracing_handler( - trace_id: str, - parent_span_id: str | None = None, - task_id: str | None = None, -) -> AgentexPydanticAITracingHandler: - """Create a tracing handler that records Agentex spans for pydantic-ai tool calls. - - .. deprecated:: - Superseded by ``UnifiedEmitter`` (``agentex.lib.core.harness``), which - derives tool and reasoning spans from the canonical ``StreamTaskMessage*`` - stream automatically when ``trace_id`` is provided. This function remains - fully functional but will be removed in a future release. New code should - construct a ``UnifiedEmitter`` with a trace context instead. - - Args: - trace_id: The trace ID. Typically the Agentex task ID. - parent_span_id: Optional parent span ID to nest tool spans under. If - omitted, the tool spans become trace-root spans. - task_id: Optional task ID stamped onto each span. Required for the - AgentEx UI's per-task spans dropdown to display the spans. - - Returns: - A handler suitable for passing to ``stream_pydantic_ai_events(..., tracing_handler=...)``. - """ - return AgentexPydanticAITracingHandler( - trace_id=trace_id, - parent_span_id=parent_span_id, - task_id=task_id, - ) diff --git a/src/agentex/lib/adk/_modules/_pydantic_ai_turn.py b/src/agentex/lib/adk/_modules/_pydantic_ai_turn.py index b06172e7f..4e9340d7a 100644 --- a/src/agentex/lib/adk/_modules/_pydantic_ai_turn.py +++ b/src/agentex/lib/adk/_modules/_pydantic_ai_turn.py @@ -15,7 +15,7 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any, AsyncIterator +from typing import Any, AsyncIterator from pydantic_ai.run import AgentRunResultEvent @@ -28,9 +28,6 @@ ) from agentex.lib.adk._modules._pydantic_ai_sync import convert_pydantic_ai_to_agentex_events -if TYPE_CHECKING: - from agentex.lib.adk._modules._pydantic_ai_tracing import AgentexPydanticAITracingHandler - StreamTaskMessage = StreamTaskMessageStart | StreamTaskMessageDelta | StreamTaskMessageFull | StreamTaskMessageDone @@ -83,19 +80,17 @@ class PydanticAITurn: ``events`` is identical to the bare ``convert_pydantic_ai_to_agentex_events`` output (tool calls stream as ``Start + ToolRequestDelta + Done``, preserving argument-token streaming on the sync/yield channel). The foundation - ``auto_send`` delivers the streamed tool-request shape natively (AGX1-377), - so no coalescing is needed on either channel. + ``auto_send`` delivers the streamed tool-request shape natively, so no + coalescing is needed on either channel. """ def __init__( self, stream: AsyncIterator[Any], model: str | None = None, - tracing_handler: "AgentexPydanticAITracingHandler | None" = None, ) -> None: self._stream = stream self._model = model - self._tracing_handler = tracing_handler self._usage = TurnUsage(model=model) @property @@ -119,7 +114,6 @@ def _capture(result_event: AgentRunResultEvent) -> None: raw_stream = convert_pydantic_ai_to_agentex_events( self._stream, - tracing_handler=self._tracing_handler, on_result=_capture, ) async for ev in raw_stream: @@ -132,3 +126,48 @@ def usage(self) -> TurnUsage: Before exhaustion the model field is set but token fields are None. """ return self._usage + + +async def stream_pydantic_ai_events( + stream, + task_id: str, +) -> str: + """Stream Pydantic AI events to Agentex via Redis. + + Consumes a Pydantic AI ``agent.run_stream_events(...)`` async iterator and + pushes Agentex streaming updates to Redis via the ``adk.streaming`` + contexts. For use with async ACP agents that stream via Redis rather than + HTTP yields. + + Text and thinking tokens stream as deltas inside coalesced streaming + contexts. Tool requests and tool results are posted as open+close pairs + on a streaming context (the unified surface persists ``initial_content`` + when a context is closed without deltas). This matches the ``auto_send`` + convention used by all other async/Temporal harnesses. + + Tracing is derived automatically from the event stream by the emitter when + a ``trace_id`` is provided to the ``UnifiedEmitter``. + + Args: + stream: Async iterator yielded by ``agent.run_stream_events(...)``. + task_id: The Agentex task ID to stream messages to. + + Returns: + The accumulated text content of the **last** text part in the run. + Multi-step runs (where the model emits text, then a tool call, then + more text) return only the final text segment, matching the + ``stream_langgraph_events`` convention. + """ + from agentex.lib.core.harness.emitter import UnifiedEmitter + + turn = PydanticAITurn( + stream, + model=None, + ) + emitter = UnifiedEmitter( + task_id=task_id, + trace_id=None, + parent_span_id=None, + ) + result = await emitter.auto_send_turn(turn) + return result.final_text diff --git a/src/agentex/lib/adk/providers/_modules/openai_turn.py b/src/agentex/lib/adk/providers/_modules/openai_turn.py index 17a6518ee..20ac73da5 100644 --- a/src/agentex/lib/adk/providers/_modules/openai_turn.py +++ b/src/agentex/lib/adk/providers/_modules/openai_turn.py @@ -1,134 +1,8 @@ -"""OpenAITurn: adapt an OpenAI Agents SDK streamed run onto the harness surface. +"""Back-compat shim: ``OpenAITurn`` now lives in +``agentex.lib.adk._modules._openai_turn``. -A ``HarnessTurn`` exposes a single canonical ``StreamTaskMessage*`` stream plus -normalized usage. ``OpenAITurn`` wraps a ``RunResultStreaming`` (from -``Runner.run_streamed``), converts its native OpenAI events into the canonical -stream via ``convert_openai_to_agentex_events``, and after exhaustion reads the -run's ``raw_responses`` to aggregate usage into a provider-independent -``TurnUsage``. - -Delivery (yield vs auto-send) and tracing are owned by ``UnifiedEmitter``; this -module is purely the provider->canonical adapter. +Existing importers of +``agentex.lib.adk.providers._modules.openai_turn.OpenAITurn`` keep working. """ -from __future__ import annotations - -from typing import TYPE_CHECKING, Any, AsyncIterator - -from agents.usage import Usage - -from agentex.lib.utils.logging import make_logger -from agentex.lib.core.harness.types import TurnUsage, StreamTaskMessage -from agentex.lib.adk.providers._modules.sync_provider import ( - convert_openai_to_agentex_events, -) - -if TYPE_CHECKING: - from agents import ModelResponse, RunResultStreaming - -logger = make_logger(__name__) - - -def openai_usage_to_turn_usage(usage: Usage | None, model: str | None) -> TurnUsage: - """Map an ``agents.Usage`` to a harness-independent ``TurnUsage``. - - All field access is defensive (``getattr(..., None)``): different model - backends populate different subsets of the usage object, and real zeros are - valid values (e.g. 0 output tokens on a pure cache hit), so we never coerce - a present-but-zero value into ``None``. - """ - if usage is None: - return TurnUsage(model=model) - - input_details = getattr(usage, "input_tokens_details", None) - output_details = getattr(usage, "output_tokens_details", None) - - return TurnUsage( - model=model, - num_llm_calls=getattr(usage, "requests", None) or 0, - input_tokens=getattr(usage, "input_tokens", None), - cached_input_tokens=getattr(input_details, "cached_tokens", None), - output_tokens=getattr(usage, "output_tokens", None), - reasoning_tokens=getattr(output_details, "reasoning_tokens", None), - total_tokens=getattr(usage, "total_tokens", None), - ) - - -def _aggregate_usage(raw_responses: list[ModelResponse]) -> Usage | None: - """Sum the per-response ``Usage`` across a run's ``ModelResponse`` list. - - Returns ``None`` when no response carries usage so the caller can emit a - usage object with only the model name set. ``Usage.add`` accumulates - requests/tokens (including cached/reasoning detail fields). - """ - total: Usage | None = None - for response in raw_responses: - resp_usage = getattr(response, "usage", None) - if resp_usage is None: - continue - if total is None: - total = Usage() - total.add(resp_usage) - return total - - -class OpenAITurn: - """A single OpenAI Agents SDK turn adapted to the ``HarnessTurn`` protocol. - - Construct with exactly one of: - - ``result``: a ``RunResultStreaming`` from ``Runner.run_streamed``. Its - ``stream_events()`` is converted to the canonical stream, and after the - stream is exhausted ``raw_responses`` is read to compute usage. - - ``stream``: a pre-built async iterator of canonical ``StreamTaskMessage`` - events (bypasses ``convert_openai_to_agentex_events``). Useful for tests - and for callers that have already produced canonical events. Usage stays - at ``TurnUsage(model=...)`` because there is no run to read usage from. - - ``coalesce_tool_requests`` is accepted for API parity with other provider - turns but is a no-op for OpenAI: the OpenAI converter already emits a single - ``Full(ToolRequestContent)`` per tool call rather than streamed argument - deltas, so there is nothing to coalesce. - """ - - def __init__( - self, - result: RunResultStreaming | None = None, - model: str | None = None, - stream: AsyncIterator[StreamTaskMessage] | None = None, - coalesce_tool_requests: bool = False, # noqa: ARG002 - API parity, no-op for OpenAI - ) -> None: - if result is None and stream is None: - raise ValueError("OpenAITurn requires either `result` or `stream`") - self._result = result - self._model = model - self._stream = stream - self._usage: TurnUsage = TurnUsage(model=model) - - @property - def events(self) -> AsyncIterator[StreamTaskMessage]: - return self._iter_events() - - async def _iter_events(self) -> AsyncIterator[StreamTaskMessage]: - if self._stream is not None: - async for event in self._stream: - yield event - return - - result = self._result - assert result is not None # guaranteed by __init__ - async for event in convert_openai_to_agentex_events(result.stream_events()): - yield event - - # Stream is exhausted: the run has finished and raw_responses is now - # populated, so usage can be aggregated and normalized. - try: - raw_responses: list[Any] = list(getattr(result, "raw_responses", None) or []) - aggregated = _aggregate_usage(raw_responses) - self._usage = openai_usage_to_turn_usage(aggregated, self._model) - except Exception as exc: # pragma: no cover - defensive: never break delivery on usage - logger.warning(f"Failed to aggregate OpenAI usage: {exc}") - self._usage = TurnUsage(model=self._model) - - def usage(self) -> TurnUsage: - """Normalized turn usage. Valid only after ``events`` is exhausted.""" - return self._usage +from agentex.lib.adk._modules._openai_turn import OpenAITurn # noqa: F401 diff --git a/src/agentex/lib/adk/providers/_modules/sync_provider.py b/src/agentex/lib/adk/providers/_modules/sync_provider.py index 9996bf30d..86696a2b5 100644 --- a/src/agentex/lib/adk/providers/_modules/sync_provider.py +++ b/src/agentex/lib/adk/providers/_modules/sync_provider.py @@ -14,36 +14,11 @@ TResponseInputItem, AgentOutputSchemaBase, ) -from openai.types.responses import ( - ResponseTextDeltaEvent, - ResponseFunctionToolCall, - ResponseFunctionWebSearch, - ResponseOutputItemDoneEvent, - ResponseOutputItemAddedEvent, - ResponseCodeInterpreterToolCall, - ResponseReasoningSummaryPartAddedEvent, - ResponseReasoningSummaryTextDeltaEvent, -) from agents.models.openai_provider import OpenAIProvider -from openai.types.responses.response_reasoning_text_done_event import ResponseReasoningTextDoneEvent -from openai.types.responses.response_reasoning_text_delta_event import ResponseReasoningTextDeltaEvent -from openai.types.responses.response_reasoning_summary_text_done_event import ResponseReasoningSummaryTextDoneEvent from agentex import AsyncAgentex from agentex.lib.utils.logging import make_logger from agentex.lib.core.tracing.tracer import AsyncTracer -from agentex.types.task_message_delta import TextDelta -from agentex.types.task_message_update import ( - StreamTaskMessageDone, - StreamTaskMessageFull, - StreamTaskMessageDelta, - StreamTaskMessageStart, -) -from agentex.types.task_message_content import TextContent -from agentex.types.tool_request_content import ToolRequestContent -from agentex.types.tool_response_content import ToolResponseContent -from agentex.types.reasoning_content_delta import ReasoningContentDelta -from agentex.types.reasoning_summary_delta import ReasoningSummaryDelta logger = make_logger(__name__) @@ -94,10 +69,10 @@ class SyncStreamingModel(Model): .. deprecated:: Prefer the unified harness surface for new OpenAI Agents integrations: wrap a ``Runner.run_streamed`` result in - ``agentex.lib.adk.providers._modules.openai_turn.OpenAITurn`` and drive + ``agentex.lib.adk._modules._openai_turn.OpenAITurn`` and drive delivery + tracing through ``UnifiedEmitter`` (see the - ``060_harness_openai`` / ``130_harness_openai`` / ``140_harness_openai`` - tutorials). This per-model tracing wrapper predates the harness and is + ``050_openai_agents`` / ``120_openai_agents`` tutorials). This + per-model tracing wrapper predates the harness and is retained only for backwards compatibility; it will be removed in a future release. No runtime warning is emitted. """ @@ -405,317 +380,8 @@ def get_model(self, model_name: Optional[str] = None) -> Model: return wrapped_model -def _extract_tool_call_info(tool_call_item: Any) -> tuple[str, str, dict[str, Any]]: - """ - Extract call_id, tool_name, and tool_arguments from a tool call item. - Args: - tool_call_item: The tool call item to process - Returns: - A tuple of (call_id, tool_name, tool_arguments) - """ - # Generic handling for different tool call types - # Try 'call_id' first, then 'id', then generate placeholder - if hasattr(tool_call_item, "call_id"): - call_id = tool_call_item.call_id - elif hasattr(tool_call_item, "id"): - call_id = tool_call_item.id - else: - call_id = f"unknown_call_{id(tool_call_item)}" - - if isinstance(tool_call_item, ResponseFunctionWebSearch): - tool_name = "web_search" - tool_arguments = {"action": tool_call_item.action.model_dump(), "status": tool_call_item.status} - elif isinstance(tool_call_item, ResponseCodeInterpreterToolCall): - tool_name = "code_interpreter" - tool_arguments = {"code": tool_call_item.code, "status": tool_call_item.status} - elif isinstance(tool_call_item, ResponseFunctionToolCall): - # Handle standard function tool calls - tool_name = tool_call_item.name - # Handle the arguments field which might be a string or None - if tool_call_item.arguments: - if isinstance(tool_call_item.arguments, str): - import json - - tool_arguments = json.loads(tool_call_item.arguments) if tool_call_item.arguments else {} - else: - tool_arguments = tool_call_item.arguments - else: - tool_arguments = {} - else: - # Generic handling for any tool call type - tool_name = getattr(tool_call_item, "name", type(tool_call_item).__name__) - # Handle the arguments field which might be a string or None - if hasattr(tool_call_item, "arguments"): - arguments = tool_call_item.arguments - if isinstance(arguments, str): - import json - - tool_arguments = json.loads(arguments) if arguments else {} - elif arguments is None: - tool_arguments = {} - else: - tool_arguments = arguments - else: - tool_arguments = tool_call_item.model_dump() - - return call_id, tool_name, tool_arguments - - -def _extract_tool_response_info(tool_map: dict[str, Any], tool_output_item: Any) -> tuple[str, str, str]: - """ - Extract call_id, tool_name, and content from a tool output item. - Args: - tool_map: Dictionary mapping call_ids to tool names - tool_output_item: The tool output item to process - Returns: - A tuple of (call_id, tool_name, content) - """ - - # Handle different formats of tool_output_item - if isinstance(tool_output_item, dict): - call_id = tool_output_item.get("call_id", tool_output_item.get("id", f"unknown_call_{id(tool_output_item)}")) - content = tool_output_item.get("output", str(tool_output_item)) - else: - # Try to get call_id from attributes - if hasattr(tool_output_item, "call_id"): - call_id = tool_output_item.call_id - elif hasattr(tool_output_item, "id"): - call_id = tool_output_item.id - else: - call_id = f"unknown_call_{id(tool_output_item)}" - - # Get content - if hasattr(tool_output_item, "output"): - content = tool_output_item.output - else: - content = str(tool_output_item) - - # Get tool name from map - tool_name = tool_map.get(call_id, "unknown_tool") - - return call_id, tool_name, content - - -async def convert_openai_to_agentex_events(stream_response): - """Convert OpenAI streaming events to AgentEx TaskMessageUpdate events with reasoning support. - - This is an enhanced version of the base converter that includes support for: - - Reasoning content deltas (for o1 models) - - Reasoning summary deltas (for o1 models) - - Args: - stream_response: An async iterator of OpenAI streaming events - Yields: - TaskMessageUpdate: AgentEx streaming events (StreamTaskMessageDelta, StreamTaskMessageFull, or StreamTaskMessageDone) - """ - - tool_map = {} - event_count = 0 - message_index = 0 # Track message index for proper sequencing - seen_tool_output = False # Track if we've seen tool output to know when final text starts - item_id_to_index = {} # Map item_id to message index - item_id_to_type = {} # Map item_id to content type (text, reasoning_content, reasoning_summary) - - async for event in stream_response: - event_count += 1 - - # Check for raw response events which contain the actual OpenAI streaming events - if hasattr(event, "type") and event.type == "raw_response_event": - if hasattr(event, "data"): - raw_event = event.data - - # Check for ResponseOutputItemAddedEvent which signals a new message starting - if isinstance(raw_event, ResponseOutputItemAddedEvent): - # Don't increment here - we'll increment when we see the actual text delta - # This is just a signal that a new message is starting - pass - - # Handle item completion - send done event to close the message - elif isinstance(raw_event, ResponseOutputItemDoneEvent): - item_id = raw_event.item.id - if item_id in item_id_to_index: - # Get the message type to decide whether to send done event - message_type = item_id_to_type.get(item_id, "text") - - # Don't send done events for reasoning content/summary - # They just end with their last delta - if message_type not in ("reasoning_content", "reasoning_summary"): - yield StreamTaskMessageDone( - type="done", - index=item_id_to_index[item_id], - ) - - # Skip reasoning summary part added events - we handle them on delta - elif isinstance(raw_event, ResponseReasoningSummaryPartAddedEvent): - pass - - # Handle reasoning summary text delta events - elif isinstance(raw_event, ResponseReasoningSummaryTextDeltaEvent): - item_id = raw_event.item_id - summary_index = raw_event.summary_index - - # If this is a new item_id we haven't seen, create a new message - if item_id and item_id not in item_id_to_index: - message_index += 1 - item_id_to_index[item_id] = message_index - item_id_to_type[item_id] = "reasoning_summary" - - # Send a start event for this new reasoning summary message - yield StreamTaskMessageStart( - type="start", - index=item_id_to_index[item_id], - content=TextContent( - type="text", - author="agent", - content="", # Start with empty content - ), - ) - - # Use the index for this item_id - current_index = item_id_to_index.get(item_id, message_index) - - # Yield reasoning summary delta - yield StreamTaskMessageDelta( - type="delta", - index=current_index, - delta=ReasoningSummaryDelta( - type="reasoning_summary", - summary_index=summary_index, - summary_delta=raw_event.delta, - ), - ) - - # Handle reasoning summary text done events - elif isinstance(raw_event, ResponseReasoningSummaryTextDoneEvent): - # We do NOT close the streaming context here - # as there can be multiple reasoning summaries. - # The context will be closed when the entire - # output item is done (ResponseOutputItemDoneEvent) - pass - - # Handle reasoning content text delta events - elif isinstance(raw_event, ResponseReasoningTextDeltaEvent): - item_id = raw_event.item_id - content_index = raw_event.content_index - - # If this is a new item_id we haven't seen, create a new message - if item_id and item_id not in item_id_to_index: - message_index += 1 - item_id_to_index[item_id] = message_index - item_id_to_type[item_id] = "reasoning_content" - - # Send a start event for this new reasoning content message - yield StreamTaskMessageStart( - type="start", - index=item_id_to_index[item_id], - content=TextContent( - type="text", - author="agent", - content="", # Start with empty content - ), - ) - - # Use the index for this item_id - current_index = item_id_to_index.get(item_id, message_index) - - # Yield reasoning content delta - yield StreamTaskMessageDelta( - type="delta", - index=current_index, - delta=ReasoningContentDelta( - type="reasoning_content", - content_index=content_index, - content_delta=raw_event.delta, - ), - ) - - # Handle reasoning content text done events - elif isinstance(raw_event, ResponseReasoningTextDoneEvent): - # We do NOT close the streaming context here - # as there can be multiple reasoning content texts. - # The context will be closed when the entire - # output item is done (ResponseOutputItemDoneEvent) - pass - - # Check if this is a text delta event from OpenAI - elif isinstance(raw_event, ResponseTextDeltaEvent): - # Check if this event has an item_id - item_id = getattr(raw_event, "item_id", None) - - # If this is a new item_id we haven't seen, it's a new message - if item_id and item_id not in item_id_to_index: - # Check if this is truly a NEW text message after tools - # We need to differentiate between the first text and the final text after tools - if seen_tool_output: - # This is the final text message after tool execution - message_index += 1 - item_id_to_index[item_id] = message_index - else: - item_id_to_index[item_id] = message_index - - item_id_to_type[item_id] = "text" - - # Send a start event with empty content for this new text message - yield StreamTaskMessageStart( - type="start", - index=item_id_to_index[item_id], - content=TextContent( - type="text", - author="agent", - content="", # Start with empty content, deltas will fill it - ), - ) - - # Use the index for this item_id - current_index = item_id_to_index.get(item_id, message_index) - - delta_message = StreamTaskMessageDelta( - type="delta", - index=current_index, - delta=TextDelta( - type="text", - text_delta=raw_event.delta, - ), - ) - yield delta_message - - elif hasattr(event, "type") and event.type == "run_item_stream_event": - # Skip reasoning_item events - they're handled via raw_response_event above - if hasattr(event, "item") and event.item.type == "reasoning_item": - continue - - # Check for tool_call_item type (this is when a tool is being called) - elif hasattr(event, "item") and event.item.type == "tool_call_item": - # Extract tool call information using the helper method - call_id, tool_name, tool_arguments = _extract_tool_call_info(event.item.raw_item) - tool_map[call_id] = tool_name - tool_request_content = ToolRequestContent( - tool_call_id=call_id, - name=tool_name, - arguments=tool_arguments, - author="agent", - ) - message_index += 1 # Increment for new message - yield StreamTaskMessageFull( - index=message_index, - type="full", - content=tool_request_content, - ) - - # Check for tool_call_output_item type (this is when a tool returns output) - elif hasattr(event, "item") and event.item.type == "tool_call_output_item": - # Extract tool response information using the helper method - call_id, tool_name, content = _extract_tool_response_info(tool_map, event.item.raw_item) - tool_response_content = ToolResponseContent( - tool_call_id=call_id, - name=tool_name, - content=content, - author="agent", - ) - message_index += 1 # Increment for new message - seen_tool_output = True # Mark that we've seen tool output so next text gets new index - yield StreamTaskMessageFull( - type="full", - index=message_index, - content=tool_response_content, - ) +# The OpenAI streaming tap ``convert_openai_to_agentex_events`` now lives in +# ``agentex.lib.adk._modules._openai_sync``; re-exported here for back-compat. +from agentex.lib.adk._modules._openai_sync import ( # noqa: E402 + convert_openai_to_agentex_events as convert_openai_to_agentex_events, +) diff --git a/src/agentex/lib/cli/commands/init.py b/src/agentex/lib/cli/commands/init.py index 307a5d0e8..9849e9bbc 100644 --- a/src/agentex/lib/cli/commands/init.py +++ b/src/agentex/lib/cli/commands/init.py @@ -26,14 +26,21 @@ class TemplateType(str, Enum): TEMPORAL_OPENAI_AGENTS = "temporal-openai-agents" TEMPORAL_PYDANTIC_AI = "temporal-pydantic-ai" TEMPORAL_LANGGRAPH = "temporal-langgraph" + TEMPORAL_CLAUDE_CODE = "temporal-claude-code" + TEMPORAL_CODEX = "temporal-codex" DEFAULT = "default" DEFAULT_LANGGRAPH = "default-langgraph" DEFAULT_PYDANTIC_AI = "default-pydantic-ai" + DEFAULT_OPENAI_AGENTS = "default-openai-agents" + DEFAULT_CLAUDE_CODE = "default-claude-code" + DEFAULT_CODEX = "default-codex" SYNC = "sync" SYNC_OPENAI_AGENTS = "sync-openai-agents" SYNC_OPENAI_AGENTS_LOCAL_SANDBOX = "sync-openai-agents-local-sandbox" SYNC_LANGGRAPH = "sync-langgraph" SYNC_PYDANTIC_AI = "sync-pydantic-ai" + SYNC_CLAUDE_CODE = "sync-claude-code" + SYNC_CODEX = "sync-codex" def render_template( @@ -66,14 +73,21 @@ def create_project_structure( TemplateType.TEMPORAL_OPENAI_AGENTS: ["acp.py", "workflow.py", "run_worker.py", "activities.py"], TemplateType.TEMPORAL_PYDANTIC_AI: ["acp.py", "workflow.py", "run_worker.py", "agent.py", "tools.py"], TemplateType.TEMPORAL_LANGGRAPH: ["acp.py", "workflow.py", "run_worker.py", "graph.py", "tools.py"], + TemplateType.TEMPORAL_CLAUDE_CODE: ["acp.py", "workflow.py", "run_worker.py", "activities.py"], + TemplateType.TEMPORAL_CODEX: ["acp.py", "workflow.py", "run_worker.py", "activities.py"], TemplateType.DEFAULT: ["acp.py"], TemplateType.DEFAULT_LANGGRAPH: ["acp.py", "graph.py", "tools.py"], TemplateType.DEFAULT_PYDANTIC_AI: ["acp.py", "agent.py", "tools.py"], + TemplateType.DEFAULT_OPENAI_AGENTS: ["acp.py"], + TemplateType.DEFAULT_CLAUDE_CODE: ["acp.py"], + TemplateType.DEFAULT_CODEX: ["acp.py"], TemplateType.SYNC: ["acp.py"], TemplateType.SYNC_OPENAI_AGENTS: ["acp.py"], TemplateType.SYNC_OPENAI_AGENTS_LOCAL_SANDBOX: ["acp.py", "agent.py", "tools.py"], TemplateType.SYNC_LANGGRAPH: ["acp.py", "graph.py", "tools.py"], TemplateType.SYNC_PYDANTIC_AI: ["acp.py", "agent.py", "tools.py"], + TemplateType.SYNC_CLAUDE_CODE: ["acp.py"], + TemplateType.SYNC_CODEX: ["acp.py"], }[template_type] # Create project/code files @@ -184,8 +198,11 @@ def validate_agent_name(text: str) -> bool | str: "Which Async template would you like to use?", choices=[ {"name": "Basic Async ACP", "value": TemplateType.DEFAULT}, + {"name": "Async ACP + OpenAI Agents SDK", "value": TemplateType.DEFAULT_OPENAI_AGENTS}, {"name": "Async ACP + LangGraph", "value": TemplateType.DEFAULT_LANGGRAPH}, {"name": "Async ACP + Pydantic AI", "value": TemplateType.DEFAULT_PYDANTIC_AI}, + {"name": "Async ACP + Claude Code", "value": TemplateType.DEFAULT_CLAUDE_CODE}, + {"name": "Async ACP + Codex", "value": TemplateType.DEFAULT_CODEX}, ], ).ask() if not template_type: @@ -198,6 +215,8 @@ def validate_agent_name(text: str) -> bool | str: {"name": "Temporal + OpenAI Agents SDK (Recommended)", "value": TemplateType.TEMPORAL_OPENAI_AGENTS}, {"name": "Temporal + Pydantic AI", "value": TemplateType.TEMPORAL_PYDANTIC_AI}, {"name": "Temporal + LangGraph", "value": TemplateType.TEMPORAL_LANGGRAPH}, + {"name": "Temporal + Claude Code", "value": TemplateType.TEMPORAL_CLAUDE_CODE}, + {"name": "Temporal + Codex", "value": TemplateType.TEMPORAL_CODEX}, ], ).ask() if not template_type: @@ -211,6 +230,8 @@ def validate_agent_name(text: str) -> bool | str: {"name": "Sync ACP + OpenAI Agents SDK + Local Sandbox", "value": TemplateType.SYNC_OPENAI_AGENTS_LOCAL_SANDBOX}, {"name": "Sync ACP + LangGraph", "value": TemplateType.SYNC_LANGGRAPH}, {"name": "Sync ACP + Pydantic AI", "value": TemplateType.SYNC_PYDANTIC_AI}, + {"name": "Sync ACP + Claude Code", "value": TemplateType.SYNC_CLAUDE_CODE}, + {"name": "Sync ACP + Codex", "value": TemplateType.SYNC_CODEX}, ], ).ask() if not template_type: diff --git a/examples/tutorials/10_async/10_temporal/140_harness_openai/.dockerignore b/src/agentex/lib/cli/templates/default-claude-code/.dockerignore.j2 similarity index 96% rename from examples/tutorials/10_async/10_temporal/140_harness_openai/.dockerignore rename to src/agentex/lib/cli/templates/default-claude-code/.dockerignore.j2 index c49489471..c2d7fca4d 100644 --- a/examples/tutorials/10_async/10_temporal/140_harness_openai/.dockerignore +++ b/src/agentex/lib/cli/templates/default-claude-code/.dockerignore.j2 @@ -40,4 +40,4 @@ venv.bak/ .gitignore # Misc -.DS_Store +.DS_Store diff --git a/examples/tutorials/10_async/10_temporal/130_langgraph/.env.example b/src/agentex/lib/cli/templates/default-claude-code/.env.example.j2 similarity index 79% rename from examples/tutorials/10_async/10_temporal/130_langgraph/.env.example rename to src/agentex/lib/cli/templates/default-claude-code/.env.example.j2 index ab1a5790f..015f49ef7 100644 --- a/examples/tutorials/10_async/10_temporal/130_langgraph/.env.example +++ b/src/agentex/lib/cli/templates/default-claude-code/.env.example.j2 @@ -1,4 +1,4 @@ -# at130-langgraph - Environment Variables +# {{ agent_name }} - Environment Variables # Copy this file to .env and fill in the values # API key for your LLM provider @@ -10,4 +10,4 @@ LITELLM_API_KEY= # SGP Configuration (optional - for tracing) # SGP_API_KEY= # SGP_ACCOUNT_ID= -# SGP_CLIENT_BASE_URL= \ No newline at end of file +# SGP_CLIENT_BASE_URL= diff --git a/src/agentex/lib/cli/templates/default-claude-code/Dockerfile-uv.j2 b/src/agentex/lib/cli/templates/default-claude-code/Dockerfile-uv.j2 new file mode 100644 index 000000000..582434ac9 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-claude-code/Dockerfile-uv.j2 @@ -0,0 +1,47 @@ +# syntax=docker/dockerfile:1.3 +FROM python:3.12-slim +COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + htop \ + vim \ + curl \ + tar \ + python3-dev \ + postgresql-client \ + build-essential \ + libpq-dev \ + gcc \ + cmake \ + netcat-openbsd \ + nodejs \ + npm \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/** + +ENV UV_COMPILE_BYTECODE=1 +ENV UV_LINK_MODE=copy +ENV UV_HTTP_TIMEOUT=1000 + +WORKDIR /app/{{ project_path_from_build_root }} + +# Copy dependency files for layer caching +COPY {{ project_path_from_build_root }}/pyproject.toml {{ project_path_from_build_root }}/uv.lock ./ + +# Install dependencies (without project itself, for layer caching) +RUN --mount=type=cache,target=/root/.cache/uv \ + uv sync --locked --no-install-project --no-dev + +# Copy the project code +COPY {{ project_path_from_build_root }}/project ./project + +# Install the project +RUN --mount=type=cache,target=/root/.cache/uv \ + uv sync --locked --no-dev + +ENV PATH="/app/{{ project_path_from_build_root }}/.venv/bin:$PATH" +ENV PYTHONPATH=/app + +# Run the agent using uvicorn +CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/default-claude-code/Dockerfile.j2 b/src/agentex/lib/cli/templates/default-claude-code/Dockerfile.j2 new file mode 100644 index 000000000..0395caf74 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-claude-code/Dockerfile.j2 @@ -0,0 +1,42 @@ +# syntax=docker/dockerfile:1.3 +FROM python:3.12-slim +COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + htop \ + vim \ + curl \ + tar \ + python3-dev \ + postgresql-client \ + build-essential \ + libpq-dev \ + gcc \ + cmake \ + netcat-openbsd \ + node \ + npm \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +RUN uv pip install --system --upgrade pip setuptools wheel + +ENV UV_HTTP_TIMEOUT=1000 + +# Copy just the requirements file to optimize caching +COPY {{ project_path_from_build_root }}/requirements.txt /app/{{ project_path_from_build_root }}/requirements.txt + +WORKDIR /app/{{ project_path_from_build_root }} + +# Install the required Python packages +RUN uv pip install --system -r requirements.txt + +# Copy the project code +COPY {{ project_path_from_build_root }}/project /app/{{ project_path_from_build_root }}/project + +# Set environment variables +ENV PYTHONPATH=/app + +# Run the agent using uvicorn +CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/default-claude-code/README.md.j2 b/src/agentex/lib/cli/templates/default-claude-code/README.md.j2 new file mode 100644 index 000000000..ab05398e3 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-claude-code/README.md.j2 @@ -0,0 +1,64 @@ +# {{ agent_name }} - AgentEx Async Claude Code Agent + +This template builds an **asynchronous** (non-Temporal) agent that drives the +**Claude Code CLI** through the unified harness surface on AgentEx: +- Spawns `claude -p --output-format stream-json --verbose` as a local subprocess +- Wraps the CLI's stdout stream in a `ClaudeCodeTurn` +- Delivers canonical `StreamTaskMessage*` events via `UnifiedEmitter.auto_send_turn` + (the async Redis push path), so the UI receives output in real time +- Tracing integration to SGP / AgentEx + +## Prerequisites + +- The `claude` CLI installed and on your `PATH` +- An `ANTHROPIC_API_KEY` (or equivalent credential) in your environment + +## Running the Agent + +```bash +agentex agents run --manifest manifest.yaml +``` + +## Project Structure + +``` +{{ project_name }}/ +├── project/ +│ ├── __init__.py +│ └── acp.py # ACP server, subprocess spawn, and event handlers +├── Dockerfile +├── manifest.yaml +├── dev.ipynb +{% if use_uv %} +└── pyproject.toml +{% else %} +└── requirements.txt +{% endif %} +``` + +## Key Concepts + +### Async ACP with the harness +The async ACP model streams events over Redis instead of an HTTP response. The +`@acp.on_task_event_send` handler spawns the Claude Code CLI and pushes the +harness events to the task stream. + +### The unified harness surface +`ClaudeCodeTurn` + `UnifiedEmitter` are the unified harness surface. The turn +normalizes CLI output into canonical AgentEx events; the emitter traces and +delivers them. + +## Development + +### 1. Customize the subprocess +Edit `_spawn_claude` in `project/acp.py` to change the CLI flags, working +directory, or how the prompt is delivered. + +### 2. Configure Credentials +Set your credentials via `manifest.yaml`, an exported environment variable, or a +`.env` file in the project directory. + +### 3. Run Locally +```bash +export ENVIRONMENT=development && agentex agents run --manifest manifest.yaml +``` diff --git a/examples/tutorials/10_async/10_temporal/130_langgraph/dev.ipynb b/src/agentex/lib/cli/templates/default-claude-code/dev.ipynb.j2 similarity index 98% rename from examples/tutorials/10_async/10_temporal/130_langgraph/dev.ipynb rename to src/agentex/lib/cli/templates/default-claude-code/dev.ipynb.j2 index 5320daac7..d3a68303f 100644 --- a/examples/tutorials/10_async/10_temporal/130_langgraph/dev.ipynb +++ b/src/agentex/lib/cli/templates/default-claude-code/dev.ipynb.j2 @@ -19,7 +19,7 @@ "metadata": {}, "outputs": [], "source": [ - "AGENT_NAME = \"at130-langgraph\"" + "AGENT_NAME = \"{{ agent_name }}\"" ] }, { @@ -123,4 +123,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} \ No newline at end of file +} diff --git a/src/agentex/lib/cli/templates/default-claude-code/environments.yaml.j2 b/src/agentex/lib/cli/templates/default-claude-code/environments.yaml.j2 new file mode 100644 index 000000000..f802776f0 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-claude-code/environments.yaml.j2 @@ -0,0 +1,57 @@ +# Agent Environment Configuration +# ------------------------------ +# This file defines environment-specific settings for your agent. +# This DIFFERS from the manifest.yaml file in that it is used to program things that are ONLY per environment. + +# ********** EXAMPLE ********** +# schema_version: "v1" # This is used to validate the file structure and is not used by the agentex CLI +# environments: +# dev: +# auth: +# principal: +# user_id: "1234567890" +# user_name: "John Doe" +# user_email: "john.doe@example.com" +# user_role: "admin" +# user_permissions: "read, write, delete" +# helm_overrides: # This is used to override the global helm values.yaml file in the agentex-agent helm charts +# replicas: 3 +# resources: +# requests: +# cpu: "1000m" +# memory: "2Gi" +# limits: +# cpu: "2000m" +# memory: "4Gi" +# env: +# - name: LOG_LEVEL +# value: "DEBUG" +# - name: ENVIRONMENT +# value: "staging" +# +# kubernetes: +# # OPTIONAL - Otherwise it will be derived from separately. However, this can be used to override the derived +# # namespace and deploy it with in the same namespace that already exists for a separate agent. +# namespace: "team-{{agent_name}}" +# ********** END EXAMPLE ********** + +schema_version: "v1" # This is used to validate the file structure and is not used by the agentex CLI +environments: + dev: + auth: + principal: + user_id: # TODO: Fill in + account_id: # TODO: Fill in + helm_overrides: + replicaCount: 2 + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1000m" + memory: "2Gi" + temporal: + enabled: false + + diff --git a/src/agentex/lib/cli/templates/default-claude-code/manifest.yaml.j2 b/src/agentex/lib/cli/templates/default-claude-code/manifest.yaml.j2 new file mode 100644 index 000000000..2d94ba41c --- /dev/null +++ b/src/agentex/lib/cli/templates/default-claude-code/manifest.yaml.j2 @@ -0,0 +1,120 @@ +# Agent Manifest Configuration +# --------------------------- +# This file defines how your agent should be built and deployed. + +# Build Configuration +# ------------------ +# The build config defines what gets packaged into your agent's Docker image. +# This same configuration is used whether building locally or remotely. +# +# When building: +# 1. All files from include_paths are collected into a build context +# 2. The context is filtered by dockerignore rules +# 3. The Dockerfile uses this context to build your agent's image +# 4. The image is pushed to a registry and used to run your agent +build: + context: + # Root directory for the build context + root: ../ # Keep this as the default root + + # Paths to include in the Docker build context + # Must include: + # - Your agent's directory (your custom agent code) + # These paths are collected and sent to the Docker daemon for building + include_paths: + - {{ project_path_from_build_root }} + + # Path to your agent's Dockerfile + # This defines how your agent's image is built from the context + # Relative to the root directory + dockerfile: {{ project_path_from_build_root }}/Dockerfile + + # Path to your agent's .dockerignore + # Filters unnecessary files from the build context + # Helps keep build context small and builds fast + dockerignore: {{ project_path_from_build_root }}/.dockerignore + + +# Local Development Configuration +# ----------------------------- +# Only used when running the agent locally +local_development: + agent: + port: 8000 # Port where your local ACP server is running + host_address: host.docker.internal # Host address for Docker networking (host.docker.internal for Docker, localhost for direct) + + # File paths for local development (relative to this manifest.yaml) + paths: + # Path to ACP server file + # Examples: + # project/acp.py (standard) + # src/server.py (custom structure) + # ../shared/acp.py (shared across projects) + # /absolute/path/acp.py (absolute path) + acp: project/acp.py + + +# Agent Configuration +# ----------------- +agent: + acp_type: async + + # Unique name for your agent + # Used for task routing and monitoring + name: {{ agent_name }} + + # Description of what your agent does + # Helps with documentation and discovery + description: {{ description }} + + # Temporal workflow configuration + # Set enabled: true to use Temporal workflows for long-running tasks + temporal: + enabled: false + + # Optional: Credentials mapping + # Maps Kubernetes secrets to environment variables + # Common credentials include: + credentials: + - env_var_name: LITELLM_API_KEY + secret_name: litellm-api-key + secret_key: api-key + - env_var_name: SGP_API_KEY + secret_name: sgp-api-key + secret_key: api-key + - env_var_name: REDIS_URL + secret_name: redis-url-secret + secret_key: url + + # Optional: Set Environment variables for running your agent locally as well + # as for deployment later on + env: + LITELLM_API_KEY: "" # Set your LLM API key + # OPENAI_BASE_URL: "" + +# Deployment Configuration +# ----------------------- +# Configuration for deploying your agent to Kubernetes clusters +deployment: + # Container image configuration + image: + repository: "" # Update with your container registry + tag: "latest" # Default tag, should be versioned in production + + imagePullSecrets: [] # Update with your image pull secret names + # - name: my-registry-secret + + # Global deployment settings that apply to all clusters + # These can be overridden in cluster-specific environments (environments.yaml) + global: + # Default replica count + replicaCount: 1 + + # Default resource requirements + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1000m" + memory: "2Gi" \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/default-claude-code/project/acp.py.j2 b/src/agentex/lib/cli/templates/default-claude-code/project/acp.py.j2 new file mode 100644 index 000000000..cd7e771db --- /dev/null +++ b/src/agentex/lib/cli/templates/default-claude-code/project/acp.py.j2 @@ -0,0 +1,147 @@ +"""ACP handler for {{ agent_name }} — an async Claude Code agent. + +Spawns ``claude -p --output-format stream-json --verbose`` as a LOCAL +asyncio subprocess (no Scale sandbox — that is a production concern). Stdout +lines are fed into ``ClaudeCodeTurn``. Events are delivered via +``UnifiedEmitter.auto_send_turn``, the async Redis push path. + +Live runs require the ``claude`` CLI to be installed and an +ANTHROPIC_API_KEY (or equivalent credential) in the environment. +""" + +from __future__ import annotations + +import os +import asyncio +from typing import AsyncIterator + +from dotenv import load_dotenv + +load_dotenv() + +import agentex.lib.adk as adk +from agentex.lib.adk import ClaudeCodeTurn +from agentex.lib.types.acp import SendEventParams, CancelTaskParams, CreateTaskParams +from agentex.lib.core.harness import UnifiedEmitter +from agentex.lib.types.fastacp import AsyncACPConfig +from agentex.lib.types.tracing import SGPTracingProcessorConfig +from agentex.lib.utils.logging import make_logger +from agentex.lib.sdk.fastacp.fastacp import FastACP +from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config + +logger = make_logger(__name__) + +add_tracing_processor_config( + SGPTracingProcessorConfig( + sgp_api_key=os.environ.get("SGP_API_KEY", ""), + sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""), + sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""), + ) +) + +acp = FastACP.create( + acp_type="async", + config=AsyncACPConfig(type="base"), +) + + +async def _spawn_claude(prompt: str) -> AsyncIterator[str]: + """Spawn ``claude -p --output-format stream-json`` locally and yield stdout lines. + + Injectable seam: tests can monkeypatch this with a fake async iterator of + pre-recorded lines so no real CLI invocation is needed offline. + """ + proc = await asyncio.create_subprocess_exec( + "claude", + "-p", + "--output-format", + "stream-json", + "--verbose", + stdin=asyncio.subprocess.PIPE, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + assert proc.stdout is not None + assert proc.stdin is not None + + proc.stdin.write(prompt.encode()) + proc.stdin.close() + + # Drain stderr concurrently. With --verbose, Claude Code can write enough to + # stderr to fill the OS pipe buffer; if we only read stdout, the CLI blocks + # on its stderr write while we block reading stdout — a deadlock. A + # background task keeps stderr flowing so stdout never stalls. + async def _drain_stderr() -> None: + assert proc.stderr is not None + async for _ in proc.stderr: + pass + + stderr_task = asyncio.create_task(_drain_stderr()) + + try: + buffer = "" + async for chunk in proc.stdout: + buffer += chunk.decode("utf-8", errors="replace") + while "\n" in buffer: + line, buffer = buffer.split("\n", 1) + line = line.strip() + if line: + yield line + + if buffer.strip(): + yield buffer.strip() + + await proc.wait() + finally: + # Release the subprocess and stderr drain task even if the consumer + # abandons the generator early (task cancellation / client disconnect): + # cancel the drain task and terminate+reap the process if it is still + # running, so neither is leaked. + stderr_task.cancel() + try: + await stderr_task + except asyncio.CancelledError: + pass + if proc.returncode is None: + try: + proc.terminate() + except ProcessLookupError: + pass + await proc.wait() + + +@acp.on_task_create +async def handle_task_create(params: CreateTaskParams): + logger.info("Task created: %s", params.task.id) + + +@acp.on_task_event_send +async def handle_task_event_send(params: SendEventParams): + """Handle a user message: spawn Claude Code locally and push events to the task stream.""" + task_id = params.task.id + prompt = params.event.content.content + logger.info("Processing message for task %s", task_id) + + await adk.messages.create(task_id=task_id, content=params.event.content) + + async with adk.tracing.span( + trace_id=task_id, + task_id=task_id, + name="message", + input={"message": prompt}, + data={"__span_type__": "AGENT_WORKFLOW"}, + ) as turn_span: + emitter = UnifiedEmitter( + task_id=task_id, + trace_id=task_id, + parent_span_id=turn_span.id if turn_span else None, + ) + turn = ClaudeCodeTurn(_spawn_claude(prompt)) + result = await emitter.auto_send_turn(turn) + if turn_span: + turn_span.output = {"final_text": result.final_text} + + +@acp.on_task_cancel +async def handle_task_canceled(params: CancelTaskParams): + logger.info("Task canceled: %s", params.task.id) diff --git a/examples/tutorials/10_async/00_base/harness_langgraph/pyproject.toml b/src/agentex/lib/cli/templates/default-claude-code/pyproject.toml.j2 similarity index 68% rename from examples/tutorials/10_async/00_base/harness_langgraph/pyproject.toml rename to src/agentex/lib/cli/templates/default-claude-code/pyproject.toml.j2 index 69856e6db..e499b1dc1 100644 --- a/examples/tutorials/10_async/00_base/harness_langgraph/pyproject.toml +++ b/src/agentex/lib/cli/templates/default-claude-code/pyproject.toml.j2 @@ -3,23 +3,19 @@ requires = ["hatchling"] build-backend = "hatchling.build" [project] -name = "a-harness-langgraph" +name = "{{ project_name }}" version = "0.1.0" -description = "An async LangGraph agent using the unified harness surface" -readme = "README.md" +description = "{{ description }}" requires-python = ">=3.12" dependencies = [ "agentex-sdk", "scale-gp", - "langgraph", - "langchain-openai", + "python-dotenv>=1.0,<2", ] [project.optional-dependencies] dev = [ "pytest", - "pytest-asyncio", - "httpx", "black", "isort", "flake8", diff --git a/src/agentex/lib/cli/templates/default-claude-code/requirements.txt.j2 b/src/agentex/lib/cli/templates/default-claude-code/requirements.txt.j2 new file mode 100644 index 000000000..8c0630384 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-claude-code/requirements.txt.j2 @@ -0,0 +1,8 @@ +# Install agentex-sdk from local path +agentex-sdk + +# Scale GenAI Platform Python SDK +scale-gp + +# Loads .env files for local development +python-dotenv>=1.0,<2 diff --git a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/.dockerignore b/src/agentex/lib/cli/templates/default-codex/.dockerignore.j2 similarity index 96% rename from examples/tutorials/10_async/10_temporal/harness_pydantic_ai/.dockerignore rename to src/agentex/lib/cli/templates/default-codex/.dockerignore.j2 index c49489471..c2d7fca4d 100644 --- a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/.dockerignore +++ b/src/agentex/lib/cli/templates/default-codex/.dockerignore.j2 @@ -40,4 +40,4 @@ venv.bak/ .gitignore # Misc -.DS_Store +.DS_Store diff --git a/src/agentex/lib/cli/templates/default-codex/.env.example.j2 b/src/agentex/lib/cli/templates/default-codex/.env.example.j2 new file mode 100644 index 000000000..015f49ef7 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-codex/.env.example.j2 @@ -0,0 +1,13 @@ +# {{ agent_name }} - Environment Variables +# Copy this file to .env and fill in the values + +# API key for your LLM provider +LITELLM_API_KEY= + +# LLM base URL (optional - override to use a different provider) +# OPENAI_BASE_URL= + +# SGP Configuration (optional - for tracing) +# SGP_API_KEY= +# SGP_ACCOUNT_ID= +# SGP_CLIENT_BASE_URL= diff --git a/src/agentex/lib/cli/templates/default-codex/Dockerfile-uv.j2 b/src/agentex/lib/cli/templates/default-codex/Dockerfile-uv.j2 new file mode 100644 index 000000000..582434ac9 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-codex/Dockerfile-uv.j2 @@ -0,0 +1,47 @@ +# syntax=docker/dockerfile:1.3 +FROM python:3.12-slim +COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + htop \ + vim \ + curl \ + tar \ + python3-dev \ + postgresql-client \ + build-essential \ + libpq-dev \ + gcc \ + cmake \ + netcat-openbsd \ + nodejs \ + npm \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/** + +ENV UV_COMPILE_BYTECODE=1 +ENV UV_LINK_MODE=copy +ENV UV_HTTP_TIMEOUT=1000 + +WORKDIR /app/{{ project_path_from_build_root }} + +# Copy dependency files for layer caching +COPY {{ project_path_from_build_root }}/pyproject.toml {{ project_path_from_build_root }}/uv.lock ./ + +# Install dependencies (without project itself, for layer caching) +RUN --mount=type=cache,target=/root/.cache/uv \ + uv sync --locked --no-install-project --no-dev + +# Copy the project code +COPY {{ project_path_from_build_root }}/project ./project + +# Install the project +RUN --mount=type=cache,target=/root/.cache/uv \ + uv sync --locked --no-dev + +ENV PATH="/app/{{ project_path_from_build_root }}/.venv/bin:$PATH" +ENV PYTHONPATH=/app + +# Run the agent using uvicorn +CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/default-codex/Dockerfile.j2 b/src/agentex/lib/cli/templates/default-codex/Dockerfile.j2 new file mode 100644 index 000000000..0395caf74 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-codex/Dockerfile.j2 @@ -0,0 +1,42 @@ +# syntax=docker/dockerfile:1.3 +FROM python:3.12-slim +COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + htop \ + vim \ + curl \ + tar \ + python3-dev \ + postgresql-client \ + build-essential \ + libpq-dev \ + gcc \ + cmake \ + netcat-openbsd \ + node \ + npm \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +RUN uv pip install --system --upgrade pip setuptools wheel + +ENV UV_HTTP_TIMEOUT=1000 + +# Copy just the requirements file to optimize caching +COPY {{ project_path_from_build_root }}/requirements.txt /app/{{ project_path_from_build_root }}/requirements.txt + +WORKDIR /app/{{ project_path_from_build_root }} + +# Install the required Python packages +RUN uv pip install --system -r requirements.txt + +# Copy the project code +COPY {{ project_path_from_build_root }}/project /app/{{ project_path_from_build_root }}/project + +# Set environment variables +ENV PYTHONPATH=/app + +# Run the agent using uvicorn +CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/default-codex/README.md.j2 b/src/agentex/lib/cli/templates/default-codex/README.md.j2 new file mode 100644 index 000000000..b82f1c5f2 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-codex/README.md.j2 @@ -0,0 +1,72 @@ +# {{ agent_name }} - AgentEx Async Codex Agent + +This template builds an **asynchronous** (non-Temporal) agent that drives the +**Codex CLI** through the unified harness surface on AgentEx: +- Spawns `codex exec --json` as a local subprocess +- Wraps the CLI's stdout stream in a `CodexTurn` +- Delivers canonical `StreamTaskMessage*` events via `UnifiedEmitter.auto_send_turn` + (the async Redis push path), so the UI receives output in real time +- Persists the codex session/thread ID via `adk.state` for multi-turn memory +- Tracing integration to SGP / AgentEx + +## Prerequisites + +- The `codex` CLI installed and on your `PATH` (`npm install -g @openai/codex`) +- An `OPENAI_API_KEY` in your environment + +## Running the Agent + +```bash +agentex agents run --manifest manifest.yaml +``` + +## Project Structure + +``` +{{ project_name }}/ +├── project/ +│ ├── __init__.py +│ └── acp.py # ACP server, subprocess spawn, state, and event handlers +├── Dockerfile +├── manifest.yaml +├── dev.ipynb +{% if use_uv %} +└── pyproject.toml +{% else %} +└── requirements.txt +{% endif %} +``` + +## Key Concepts + +### Async ACP with the harness +The async ACP model streams events over Redis instead of an HTTP response. The +`@acp.on_task_event_send` handler spawns the Codex CLI and pushes the harness +events to the task stream. + +### Multi-turn memory +The codex session/thread ID is persisted via `adk.state`, so each new turn +resumes the same codex session with `codex exec resume `. + +### The unified harness surface +`CodexTurn` + `UnifiedEmitter` are the unified harness surface. The turn +normalizes CLI output into canonical AgentEx events; the emitter traces and +delivers them. + +## Development + +### 1. Choose a model +Set `CODEX_MODEL` (defaults to `o4-mini`) to control which model codex uses. + +### 2. Customize the subprocess +Edit `_spawn_codex` in `project/acp.py` to change the CLI flags or how the +prompt is delivered. + +### 3. Configure Credentials +Set your credentials via `manifest.yaml`, an exported environment variable, or a +`.env` file in the project directory. + +### 4. Run Locally +```bash +export ENVIRONMENT=development && agentex agents run --manifest manifest.yaml +``` diff --git a/src/agentex/lib/cli/templates/default-codex/dev.ipynb.j2 b/src/agentex/lib/cli/templates/default-codex/dev.ipynb.j2 new file mode 100644 index 000000000..d3a68303f --- /dev/null +++ b/src/agentex/lib/cli/templates/default-codex/dev.ipynb.j2 @@ -0,0 +1,126 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "36834357", + "metadata": {}, + "outputs": [], + "source": [ + "from agentex import Agentex\n", + "\n", + "client = Agentex(base_url=\"http://localhost:5003\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d1c309d6", + "metadata": {}, + "outputs": [], + "source": [ + "AGENT_NAME = \"{{ agent_name }}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9f6e6ef0", + "metadata": {}, + "outputs": [], + "source": [ + "# (REQUIRED) Create a new task. For Async agents, you must create a task for messages to be associated with.\n", + "import uuid\n", + "\n", + "rpc_response = client.agents.create_task(\n", + " agent_name=AGENT_NAME,\n", + " params={\n", + " \"name\": f\"{str(uuid.uuid4())[:8]}-task\",\n", + " \"params\": {}\n", + " }\n", + ")\n", + "\n", + "task = rpc_response.result\n", + "print(task)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b03b0d37", + "metadata": {}, + "outputs": [], + "source": [ + "# Send an event to the agent\n", + "\n", + "# The response is expected to be a list of TaskMessage objects, which is a union of the following types:\n", + "# - TextContent: A message with just text content \n", + "# - DataContent: A message with JSON-serializable data content\n", + "# - ToolRequestContent: A message with a tool request, which contains a JSON-serializable request to call a tool\n", + "# - ToolResponseContent: A message with a tool response, which contains response object from a tool call in its content\n", + "\n", + "# When processing the message/send response, if you are expecting more than TextContent, such as DataContent, ToolRequestContent, or ToolResponseContent, you can process them as well\n", + "\n", + "rpc_response = client.agents.send_event(\n", + " agent_name=AGENT_NAME,\n", + " params={\n", + " \"content\": {\"type\": \"text\", \"author\": \"user\", \"content\": \"Hello what can you do?\"},\n", + " \"task_id\": task.id,\n", + " }\n", + ")\n", + "\n", + "event = rpc_response.result\n", + "print(event)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a6927cc0", + "metadata": {}, + "outputs": [], + "source": [ + "# Subscribe to the async task messages produced by the agent\n", + "from agentex.lib.utils.dev_tools import subscribe_to_async_task_messages\n", + "\n", + "task_messages = subscribe_to_async_task_messages(\n", + " client=client,\n", + " task=task, \n", + " only_after_timestamp=event.created_at, \n", + " print_messages=True,\n", + " rich_print=True,\n", + " timeout=5,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4864e354", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/src/agentex/lib/cli/templates/default-codex/environments.yaml.j2 b/src/agentex/lib/cli/templates/default-codex/environments.yaml.j2 new file mode 100644 index 000000000..f802776f0 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-codex/environments.yaml.j2 @@ -0,0 +1,57 @@ +# Agent Environment Configuration +# ------------------------------ +# This file defines environment-specific settings for your agent. +# This DIFFERS from the manifest.yaml file in that it is used to program things that are ONLY per environment. + +# ********** EXAMPLE ********** +# schema_version: "v1" # This is used to validate the file structure and is not used by the agentex CLI +# environments: +# dev: +# auth: +# principal: +# user_id: "1234567890" +# user_name: "John Doe" +# user_email: "john.doe@example.com" +# user_role: "admin" +# user_permissions: "read, write, delete" +# helm_overrides: # This is used to override the global helm values.yaml file in the agentex-agent helm charts +# replicas: 3 +# resources: +# requests: +# cpu: "1000m" +# memory: "2Gi" +# limits: +# cpu: "2000m" +# memory: "4Gi" +# env: +# - name: LOG_LEVEL +# value: "DEBUG" +# - name: ENVIRONMENT +# value: "staging" +# +# kubernetes: +# # OPTIONAL - Otherwise it will be derived from separately. However, this can be used to override the derived +# # namespace and deploy it with in the same namespace that already exists for a separate agent. +# namespace: "team-{{agent_name}}" +# ********** END EXAMPLE ********** + +schema_version: "v1" # This is used to validate the file structure and is not used by the agentex CLI +environments: + dev: + auth: + principal: + user_id: # TODO: Fill in + account_id: # TODO: Fill in + helm_overrides: + replicaCount: 2 + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1000m" + memory: "2Gi" + temporal: + enabled: false + + diff --git a/src/agentex/lib/cli/templates/default-codex/manifest.yaml.j2 b/src/agentex/lib/cli/templates/default-codex/manifest.yaml.j2 new file mode 100644 index 000000000..2d94ba41c --- /dev/null +++ b/src/agentex/lib/cli/templates/default-codex/manifest.yaml.j2 @@ -0,0 +1,120 @@ +# Agent Manifest Configuration +# --------------------------- +# This file defines how your agent should be built and deployed. + +# Build Configuration +# ------------------ +# The build config defines what gets packaged into your agent's Docker image. +# This same configuration is used whether building locally or remotely. +# +# When building: +# 1. All files from include_paths are collected into a build context +# 2. The context is filtered by dockerignore rules +# 3. The Dockerfile uses this context to build your agent's image +# 4. The image is pushed to a registry and used to run your agent +build: + context: + # Root directory for the build context + root: ../ # Keep this as the default root + + # Paths to include in the Docker build context + # Must include: + # - Your agent's directory (your custom agent code) + # These paths are collected and sent to the Docker daemon for building + include_paths: + - {{ project_path_from_build_root }} + + # Path to your agent's Dockerfile + # This defines how your agent's image is built from the context + # Relative to the root directory + dockerfile: {{ project_path_from_build_root }}/Dockerfile + + # Path to your agent's .dockerignore + # Filters unnecessary files from the build context + # Helps keep build context small and builds fast + dockerignore: {{ project_path_from_build_root }}/.dockerignore + + +# Local Development Configuration +# ----------------------------- +# Only used when running the agent locally +local_development: + agent: + port: 8000 # Port where your local ACP server is running + host_address: host.docker.internal # Host address for Docker networking (host.docker.internal for Docker, localhost for direct) + + # File paths for local development (relative to this manifest.yaml) + paths: + # Path to ACP server file + # Examples: + # project/acp.py (standard) + # src/server.py (custom structure) + # ../shared/acp.py (shared across projects) + # /absolute/path/acp.py (absolute path) + acp: project/acp.py + + +# Agent Configuration +# ----------------- +agent: + acp_type: async + + # Unique name for your agent + # Used for task routing and monitoring + name: {{ agent_name }} + + # Description of what your agent does + # Helps with documentation and discovery + description: {{ description }} + + # Temporal workflow configuration + # Set enabled: true to use Temporal workflows for long-running tasks + temporal: + enabled: false + + # Optional: Credentials mapping + # Maps Kubernetes secrets to environment variables + # Common credentials include: + credentials: + - env_var_name: LITELLM_API_KEY + secret_name: litellm-api-key + secret_key: api-key + - env_var_name: SGP_API_KEY + secret_name: sgp-api-key + secret_key: api-key + - env_var_name: REDIS_URL + secret_name: redis-url-secret + secret_key: url + + # Optional: Set Environment variables for running your agent locally as well + # as for deployment later on + env: + LITELLM_API_KEY: "" # Set your LLM API key + # OPENAI_BASE_URL: "" + +# Deployment Configuration +# ----------------------- +# Configuration for deploying your agent to Kubernetes clusters +deployment: + # Container image configuration + image: + repository: "" # Update with your container registry + tag: "latest" # Default tag, should be versioned in production + + imagePullSecrets: [] # Update with your image pull secret names + # - name: my-registry-secret + + # Global deployment settings that apply to all clusters + # These can be overridden in cluster-specific environments (environments.yaml) + global: + # Default replica count + replicaCount: 1 + + # Default resource requirements + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1000m" + memory: "2Gi" \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/default-codex/project/acp.py.j2 b/src/agentex/lib/cli/templates/default-codex/project/acp.py.j2 new file mode 100644 index 000000000..80f09b64b --- /dev/null +++ b/src/agentex/lib/cli/templates/default-codex/project/acp.py.j2 @@ -0,0 +1,226 @@ +"""Async (base) ACP handler for {{ agent_name }} — a Codex CLI harness agent. + +Demonstrates the ``convert_codex_to_agentex_events`` tap + ``CodexTurn`` + +``UnifiedEmitter`` for an async (Redis-streaming) ACP agent without Temporal. + +The handler: +1. Spawns ``codex exec --json`` as a LOCAL asyncio subprocess (no sandbox). + This is correct for local development; production isolation is a separate + concern. +2. Wraps the stdout line stream in a ``CodexTurn``. +3. Delivers every canonical ``StreamTaskMessage*`` event to Redis via + ``UnifiedEmitter.auto_send_turn``, so the UI receives tokens in real time. +4. Multi-turn memory is persisted via ``adk.state``. + +Live runs require: +- ``codex`` CLI on PATH (``npm install -g @openai/codex``) +- ``OPENAI_API_KEY`` set in the environment +""" + +from __future__ import annotations + +import os +import time +import codecs +import asyncio +from collections.abc import AsyncIterator + +from dotenv import load_dotenv + +load_dotenv() + +import agentex.lib.adk as adk +from agentex.lib.adk import CodexTurn +from agentex.lib.types.acp import SendEventParams, CancelTaskParams, CreateTaskParams +from agentex.lib.core.harness import UnifiedEmitter +from agentex.lib.types.fastacp import AsyncACPConfig +from agentex.lib.types.tracing import SGPTracingProcessorConfig +from agentex.lib.utils.logging import make_logger +from agentex.lib.utils.model_utils import BaseModel +from agentex.lib.sdk.fastacp.fastacp import FastACP +from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config + +logger = make_logger(__name__) + +add_tracing_processor_config( + SGPTracingProcessorConfig( + sgp_api_key=os.environ.get("SGP_API_KEY", ""), + sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""), + sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""), + ) +) + +acp = FastACP.create( + acp_type="async", + config=AsyncACPConfig(type="base"), +) + +MODEL = os.environ.get("CODEX_MODEL", "o4-mini") + + +class ConversationState(BaseModel): + """Per-task conversation state persisted via ``adk.state``. + + We store the codex session/thread ID so subsequent turns can resume the + same codex session via ``codex exec resume ``. + """ + + codex_thread_id: str | None = None + turn_number: int = 0 + + +async def _spawn_codex( + model: str, + thread_id: str | None = None, +) -> asyncio.subprocess.Process: + """Spawn ``codex exec --json`` locally and return the live process. + + Injection seam: tests replace this function with a fake that returns a + mock process whose stdout yields pre-recorded event lines. + + When ``thread_id`` is provided the subcommand becomes + ``codex exec ... resume -`` so codex continues the prior + conversation thread. + + The caller writes the prompt to stdin after the process starts, then + closes stdin so codex knows input is complete. + """ + base_flags = [ + "--json", + "--skip-git-repo-check", + "--dangerously-bypass-approvals-and-sandbox", + "--model", + model, + ] + + if thread_id: + cmd = ["codex", "exec", *base_flags, "resume", thread_id, "-"] + else: + cmd = ["codex", "exec", *base_flags, "-"] + + return await asyncio.create_subprocess_exec( + *cmd, + stdin=asyncio.subprocess.PIPE, + stdout=asyncio.subprocess.PIPE, + # Discard stderr: codex --json writes events to stdout; its stderr is + # progress/debug noise. Capturing it with PIPE but never reading it + # would deadlock once codex fills the OS pipe buffer (~64 KB). + stderr=asyncio.subprocess.DEVNULL, + env={**os.environ}, + ) + + +async def _process_stdout(process: asyncio.subprocess.Process) -> AsyncIterator[str]: + """Yield newline-delimited JSON lines from the process stdout. + + Uses an incremental UTF-8 decoder so a multibyte character split across two + 4 KB reads is decoded correctly instead of being corrupted at the boundary. + """ + assert process.stdout is not None + decoder = codecs.getincrementaldecoder("utf-8")(errors="replace") + buffer = "" + while True: + chunk = await process.stdout.read(4096) + if not chunk: + break + buffer += decoder.decode(chunk) + while "\n" in buffer: + line, buffer = buffer.split("\n", 1) + line = line.strip() + if line: + yield line + buffer += decoder.decode(b"", final=True) + if buffer.strip(): + yield buffer.strip() + + +@acp.on_task_create +async def handle_task_create(params: CreateTaskParams): + """Initialize per-task state on task creation.""" + logger.info("Task created: %s", params.task.id) + await adk.state.create( + task_id=params.task.id, + agent_id=params.agent.id, + state=ConversationState(), + ) + + +@acp.on_task_event_send +async def handle_task_event_send(params: SendEventParams): + """Handle each user message: spawn codex, stream events, save thread ID.""" + task_id = params.task.id + agent_id = params.agent.id + user_message = params.event.content.content + + logger.info("Processing message for task %s", task_id) + + await adk.messages.create(task_id=task_id, content=params.event.content) + + task_state = await adk.state.get_by_task_and_agent(task_id=task_id, agent_id=agent_id) + if task_state is None: + state = ConversationState() + task_state = await adk.state.create(task_id=task_id, agent_id=agent_id, state=state) + else: + state = ConversationState.model_validate(task_state.state) + + state.turn_number += 1 + + async with adk.tracing.span( + trace_id=task_id, + task_id=task_id, + name=f"Turn {state.turn_number}", + input={"message": user_message}, + data={"__span_type__": "AGENT_WORKFLOW"}, + ) as turn_span: + start_ms = int(time.monotonic() * 1000) + + process = await _spawn_codex(MODEL, thread_id=state.codex_thread_id) + + assert process.stdin is not None + process.stdin.write(user_message.encode("utf-8")) + await process.stdin.drain() + process.stdin.close() + + turn = CodexTurn( + events=_process_stdout(process), + model=MODEL, + ) + + emitter = UnifiedEmitter( + task_id=task_id, + trace_id=task_id, + parent_span_id=turn_span.id if turn_span else None, + ) + + result = await emitter.auto_send_turn(turn) + + await process.wait() + + # Record the real wall-clock duration AFTER streaming completes; setting + # it before the stream ran would capture only subprocess spawn overhead. + turn.duration_ms = int(time.monotonic() * 1000) - start_ms + + usage = turn.usage() + + # Persist the codex session id (public accessor; valid post-stream) so the + # next turn resumes the same session. + if turn.session_id: + state.codex_thread_id = turn.session_id + + await adk.state.update( + state_id=task_state.id, + task_id=task_id, + agent_id=agent_id, + state=state, + ) + + if turn_span: + turn_span.output = { + "final_text": result.final_text, + "model": usage.model, + } + + +@acp.on_task_cancel +async def handle_task_canceled(params: CancelTaskParams): + logger.info("Task canceled: %s", params.task.id) diff --git a/src/agentex/lib/cli/templates/default-codex/pyproject.toml.j2 b/src/agentex/lib/cli/templates/default-codex/pyproject.toml.j2 new file mode 100644 index 000000000..e499b1dc1 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-codex/pyproject.toml.j2 @@ -0,0 +1,33 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "{{ project_name }}" +version = "0.1.0" +description = "{{ description }}" +requires-python = ">=3.12" +dependencies = [ + "agentex-sdk", + "scale-gp", + "python-dotenv>=1.0,<2", +] + +[project.optional-dependencies] +dev = [ + "pytest", + "black", + "isort", + "flake8", +] + +[tool.hatch.build.targets.wheel] +packages = ["project"] + +[tool.black] +line-length = 88 +target-version = ['py312'] + +[tool.isort] +profile = "black" +line_length = 88 diff --git a/src/agentex/lib/cli/templates/default-codex/requirements.txt.j2 b/src/agentex/lib/cli/templates/default-codex/requirements.txt.j2 new file mode 100644 index 000000000..8c0630384 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-codex/requirements.txt.j2 @@ -0,0 +1,8 @@ +# Install agentex-sdk from local path +agentex-sdk + +# Scale GenAI Platform Python SDK +scale-gp + +# Loads .env files for local development +python-dotenv>=1.0,<2 diff --git a/src/agentex/lib/cli/templates/default-langgraph/project/acp.py.j2 b/src/agentex/lib/cli/templates/default-langgraph/project/acp.py.j2 index 3309dc07e..38d393b09 100644 --- a/src/agentex/lib/cli/templates/default-langgraph/project/acp.py.j2 +++ b/src/agentex/lib/cli/templates/default-langgraph/project/acp.py.j2 @@ -15,13 +15,14 @@ if _litellm_key: os.environ["OPENAI_API_KEY"] = _litellm_key import agentex.lib.adk as adk -from agentex.lib.adk import create_langgraph_tracing_handler, stream_langgraph_events +from agentex.lib.core.harness import UnifiedEmitter from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config from agentex.lib.sdk.fastacp.fastacp import FastACP from agentex.protocol.acp import SendEventParams, CancelTaskParams, CreateTaskParams from agentex.lib.types.fastacp import AsyncACPConfig from agentex.lib.types.tracing import SGPTracingProcessorConfig from agentex.lib.utils.logging import make_logger +from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn from project.graph import create_graph @@ -67,24 +68,23 @@ async def handle_task_event_send(params: SendEventParams): input={"message": user_message}, data={"__span_type__": "AGENT_WORKFLOW"}, ) as turn_span: - callback = create_langgraph_tracing_handler( - trace_id=task_id, - parent_span_id=turn_span.id if turn_span else None, - ) - stream = graph.astream( {"messages": [{"role": "user", "content": user_message}]}, - config={ - "configurable": {"thread_id": task_id}, - "callbacks": [callback], - }, + config={"configurable": {"thread_id": task_id}}, stream_mode=["messages", "updates"], ) - final_output = await stream_langgraph_events(stream, task_id) + turn = LangGraphTurn(stream, model=None) + emitter = UnifiedEmitter( + task_id=task_id, + trace_id=task_id, + parent_span_id=turn_span.id if turn_span else None, + ) + + result = await emitter.auto_send_turn(turn) if turn_span: - turn_span.output = {"final_output": final_output} + turn_span.output = {"final_output": result.final_text} @acp.on_task_create diff --git a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/.dockerignore b/src/agentex/lib/cli/templates/default-openai-agents/.dockerignore.j2 similarity index 96% rename from examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/.dockerignore rename to src/agentex/lib/cli/templates/default-openai-agents/.dockerignore.j2 index c49489471..c2d7fca4d 100644 --- a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/.dockerignore +++ b/src/agentex/lib/cli/templates/default-openai-agents/.dockerignore.j2 @@ -40,4 +40,4 @@ venv.bak/ .gitignore # Misc -.DS_Store +.DS_Store diff --git a/src/agentex/lib/cli/templates/default-openai-agents/.env.example.j2 b/src/agentex/lib/cli/templates/default-openai-agents/.env.example.j2 new file mode 100644 index 000000000..015f49ef7 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-openai-agents/.env.example.j2 @@ -0,0 +1,13 @@ +# {{ agent_name }} - Environment Variables +# Copy this file to .env and fill in the values + +# API key for your LLM provider +LITELLM_API_KEY= + +# LLM base URL (optional - override to use a different provider) +# OPENAI_BASE_URL= + +# SGP Configuration (optional - for tracing) +# SGP_API_KEY= +# SGP_ACCOUNT_ID= +# SGP_CLIENT_BASE_URL= diff --git a/src/agentex/lib/cli/templates/default-openai-agents/Dockerfile-uv.j2 b/src/agentex/lib/cli/templates/default-openai-agents/Dockerfile-uv.j2 new file mode 100644 index 000000000..582434ac9 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-openai-agents/Dockerfile-uv.j2 @@ -0,0 +1,47 @@ +# syntax=docker/dockerfile:1.3 +FROM python:3.12-slim +COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + htop \ + vim \ + curl \ + tar \ + python3-dev \ + postgresql-client \ + build-essential \ + libpq-dev \ + gcc \ + cmake \ + netcat-openbsd \ + nodejs \ + npm \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/** + +ENV UV_COMPILE_BYTECODE=1 +ENV UV_LINK_MODE=copy +ENV UV_HTTP_TIMEOUT=1000 + +WORKDIR /app/{{ project_path_from_build_root }} + +# Copy dependency files for layer caching +COPY {{ project_path_from_build_root }}/pyproject.toml {{ project_path_from_build_root }}/uv.lock ./ + +# Install dependencies (without project itself, for layer caching) +RUN --mount=type=cache,target=/root/.cache/uv \ + uv sync --locked --no-install-project --no-dev + +# Copy the project code +COPY {{ project_path_from_build_root }}/project ./project + +# Install the project +RUN --mount=type=cache,target=/root/.cache/uv \ + uv sync --locked --no-dev + +ENV PATH="/app/{{ project_path_from_build_root }}/.venv/bin:$PATH" +ENV PYTHONPATH=/app + +# Run the agent using uvicorn +CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/default-openai-agents/Dockerfile.j2 b/src/agentex/lib/cli/templates/default-openai-agents/Dockerfile.j2 new file mode 100644 index 000000000..4d9f41d45 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-openai-agents/Dockerfile.j2 @@ -0,0 +1,43 @@ +# syntax=docker/dockerfile:1.3 +FROM python:3.12-slim +COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + htop \ + vim \ + curl \ + tar \ + python3-dev \ + postgresql-client \ + build-essential \ + libpq-dev \ + gcc \ + cmake \ + netcat-openbsd \ + node \ + npm \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +RUN uv pip install --system --upgrade pip setuptools wheel + +ENV UV_HTTP_TIMEOUT=1000 + +# Copy just the requirements file to optimize caching +COPY {{ project_path_from_build_root }}/requirements.txt /app/{{ project_path_from_build_root }}/requirements.txt + +WORKDIR /app/{{ project_path_from_build_root }} + +# Install the required Python packages +RUN uv pip install --system -r requirements.txt + +# Copy the project code +COPY {{ project_path_from_build_root }}/project /app/{{ project_path_from_build_root }}/project + + +# Set environment variables +ENV PYTHONPATH=/app + +# Run the agent using uvicorn +CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/default-openai-agents/README.md.j2 b/src/agentex/lib/cli/templates/default-openai-agents/README.md.j2 new file mode 100644 index 000000000..9611e83bd --- /dev/null +++ b/src/agentex/lib/cli/templates/default-openai-agents/README.md.j2 @@ -0,0 +1,69 @@ +# {{ agent_name }} - AgentEx Async OpenAI Agents SDK Agent + +This template builds an **asynchronous** (non-Temporal) agent built on the +**OpenAI Agents SDK**, delivered through the unified harness surface on AgentEx: +- Defines an OpenAI Agents SDK `Agent` (with an example weather tool) inline in + `acp.py` +- Wraps the SDK run in an `OpenAITurn` +- Delivers canonical `StreamTaskMessage*` events via `UnifiedEmitter.auto_send_turn` + (the async Redis push path), so the UI receives output in real time +- Tracing integration to SGP / AgentEx + +## Prerequisites + +- An `OPENAI_API_KEY` in your environment (or a `LITELLM_API_KEY`, which is + copied to `OPENAI_API_KEY` for LiteLLM-proxy compatibility) + +## Running the Agent + +```bash +agentex agents run --manifest manifest.yaml +``` + +## Project Structure + +``` +{{ project_name }}/ +├── project/ +│ ├── __init__.py +│ └── acp.py # ACP server, agent + tool definitions, event handlers +├── Dockerfile +├── manifest.yaml +├── dev.ipynb +{% if use_uv %} +└── pyproject.toml +{% else %} +└── requirements.txt +{% endif %} +``` + +## Key Concepts + +### Async ACP with the harness +The async ACP model streams events over Redis instead of an HTTP response. The +`@acp.on_task_event_send` handler runs the OpenAI Agents SDK and pushes the +harness events to the task stream. + +### The unified harness surface +`OpenAITurn` + `UnifiedEmitter` are the unified harness surface. The turn +normalizes the SDK's streamed run into canonical AgentEx events; the emitter +traces and delivers them. + +## Development + +### 1. Add Your Own Tools +Define new `@function_tool` functions in `project/acp.py` and add them to the +agent's `tools=[...]` list in `create_agent()`. + +### 2. Customize the Agent +Edit `MODEL_NAME` and `INSTRUCTIONS` in `project/acp.py` to change the model or +system prompt. + +### 3. Configure Credentials +Set your credentials via `manifest.yaml`, an exported environment variable, or a +`.env` file in the project directory. + +### 4. Run Locally +```bash +export ENVIRONMENT=development && agentex agents run --manifest manifest.yaml +``` diff --git a/src/agentex/lib/cli/templates/default-openai-agents/dev.ipynb.j2 b/src/agentex/lib/cli/templates/default-openai-agents/dev.ipynb.j2 new file mode 100644 index 000000000..d8c10a65a --- /dev/null +++ b/src/agentex/lib/cli/templates/default-openai-agents/dev.ipynb.j2 @@ -0,0 +1,167 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "36834357", + "metadata": {}, + "outputs": [], + "source": [ + "from agentex import Agentex\n", + "\n", + "client = Agentex(base_url=\"http://localhost:5003\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d1c309d6", + "metadata": {}, + "outputs": [], + "source": [ + "AGENT_NAME = \"{{ agent_name }}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9f6e6ef0", + "metadata": {}, + "outputs": [], + "source": [ + "# # (Optional) Create a new task. If you don't create a new task, each message will be sent to a new task. The server will create the task for you.\n", + "\n", + "# import uuid\n", + "\n", + "# TASK_ID = str(uuid.uuid4())[:8]\n", + "\n", + "# rpc_response = client.agents.rpc_by_name(\n", + "# agent_name=AGENT_NAME,\n", + "# method=\"task/create\",\n", + "# params={\n", + "# \"name\": f\"{TASK_ID}-task\",\n", + "# \"params\": {}\n", + "# }\n", + "# )\n", + "\n", + "# task = rpc_response.result\n", + "# print(task)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b03b0d37", + "metadata": {}, + "outputs": [], + "source": [ + "# Test non streaming response\n", + "from agentex.types import TextContent\n", + "\n", + "# The response is expected to be a list of TaskMessage objects, which is a union of the following types:\n", + "# - TextContent: A message with just text content \n", + "# - DataContent: A message with JSON-serializable data content\n", + "# - ToolRequestContent: A message with a tool request, which contains a JSON-serializable request to call a tool\n", + "# - ToolResponseContent: A message with a tool response, which contains response object from a tool call in its content\n", + "\n", + "# When processing the message/send response, if you are expecting more than TextContent, such as DataContent, ToolRequestContent, or ToolResponseContent, you can process them as well\n", + "\n", + "rpc_response = client.agents.send_message(\n", + " agent_name=AGENT_NAME,\n", + " params={\n", + " \"content\": {\"type\": \"text\", \"author\": \"user\", \"content\": \"Hello what can you do?\"},\n", + " \"stream\": False\n", + " }\n", + ")\n", + "\n", + "if not rpc_response or not rpc_response.result:\n", + " raise ValueError(\"No result in response\")\n", + "\n", + "# Extract and print just the text content from the response\n", + "for task_message in rpc_response.result:\n", + " content = task_message.content\n", + " if isinstance(content, TextContent):\n", + " text = content.content\n", + " print(text)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "79688331", + "metadata": {}, + "outputs": [], + "source": [ + "# Test streaming response\n", + "from agentex.types.task_message_update import StreamTaskMessageDelta, StreamTaskMessageFull\n", + "from agentex.types.text_delta import TextDelta\n", + "\n", + "\n", + "# The result object of message/send will be a TaskMessageUpdate which is a union of the following types:\n", + "# - StreamTaskMessageStart: \n", + "# - An indicator that a streaming message was started, doesn't contain any useful content\n", + "# - StreamTaskMessageDelta: \n", + "# - A delta of a streaming message, contains the text delta to aggregate\n", + "# - StreamTaskMessageDone: \n", + "# - An indicator that a streaming message was done, doesn't contain any useful content\n", + "# - StreamTaskMessageFull: \n", + "# - A non-streaming message, there is nothing to aggregate, since this contains the full message, not deltas\n", + "\n", + "# Whenn processing StreamTaskMessageDelta, if you are expecting more than TextDeltas, such as DataDelta, ToolRequestDelta, or ToolResponseDelta, you can process them as well\n", + "# Whenn processing StreamTaskMessageFull, if you are expecting more than TextContent, such as DataContent, ToolRequestContent, or ToolResponseContent, you can process them as well\n", + "\n", + "for agent_rpc_response_chunk in client.agents.send_message_stream(\n", + " agent_name=AGENT_NAME,\n", + " params={\n", + " \"content\": {\"type\": \"text\", \"author\": \"user\", \"content\": \"Hello what can you do?\"},\n", + " \"stream\": True\n", + " }\n", + "):\n", + " # We know that the result of the message/send when stream is set to True will be a TaskMessageUpdate\n", + " task_message_update = agent_rpc_response_chunk.result\n", + " # Print oly the text deltas as they arrive or any full messages\n", + " if isinstance(task_message_update, StreamTaskMessageDelta):\n", + " delta = task_message_update.delta\n", + " if isinstance(delta, TextDelta):\n", + " print(delta.text_delta, end=\"\", flush=True)\n", + " else:\n", + " print(f\"Found non-text {type(task_message)} object in streaming message.\")\n", + " elif isinstance(task_message_update, StreamTaskMessageFull):\n", + " content = task_message_update.content\n", + " if isinstance(content, TextContent):\n", + " print(content.content)\n", + " else:\n", + " print(f\"Found non-text {type(task_message)} object in full message.\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c5e7e042", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/src/agentex/lib/cli/templates/default-openai-agents/environments.yaml.j2 b/src/agentex/lib/cli/templates/default-openai-agents/environments.yaml.j2 new file mode 100644 index 000000000..73924abdd --- /dev/null +++ b/src/agentex/lib/cli/templates/default-openai-agents/environments.yaml.j2 @@ -0,0 +1,53 @@ +# Agent Environment Configuration +# ------------------------------ +# This file defines environment-specific settings for your agent. +# This DIFFERS from the manifest.yaml file in that it is used to program things that are ONLY per environment. + +# ********** EXAMPLE ********** +# schema_version: "v1" # This is used to validate the file structure and is not used by the agentex CLI +# environments: +# dev: +# auth: +# principal: +# user_id: "1234567890" +# user_name: "John Doe" +# user_email: "john.doe@example.com" +# user_role: "admin" +# user_permissions: "read, write, delete" +# helm_overrides: # This is used to override the global helm values.yaml file in the agentex-agent helm charts +# replicas: 3 +# resources: +# requests: +# cpu: "1000m" +# memory: "2Gi" +# limits: +# cpu: "2000m" +# memory: "4Gi" +# env: +# - name: LOG_LEVEL +# value: "DEBUG" +# - name: ENVIRONMENT +# value: "staging" +# kubernetes: +# # OPTIONAL - Otherwise it will be derived from separately. However, this can be used to override the derived +# # namespace and deploy it with in the same namespace that already exists for a separate agent. +# namespace: "team-{{agent_name}}" +# ********** END EXAMPLE ********** + +schema_version: "v1" # This is used to validate the file structure and is not used by the agentex CLI +environments: + dev: + auth: + principal: + user_id: # TODO: Fill in + account_id: # TODO: Fill in + helm_overrides: + replicaCount: 2 + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1000m" + memory: "2Gi" + diff --git a/src/agentex/lib/cli/templates/default-openai-agents/manifest.yaml.j2 b/src/agentex/lib/cli/templates/default-openai-agents/manifest.yaml.j2 new file mode 100644 index 000000000..deae08dee --- /dev/null +++ b/src/agentex/lib/cli/templates/default-openai-agents/manifest.yaml.j2 @@ -0,0 +1,115 @@ +# Agent Manifest Configuration +# --------------------------- +# This file defines how your agent should be built and deployed. + +# Build Configuration +# ------------------ +# The build config defines what gets packaged into your agent's Docker image. +# This same configuration is used whether building locally or remotely. +# +# When building: +# 1. All files from include_paths are collected into a build context +# 2. The context is filtered by dockerignore rules +# 3. The Dockerfile uses this context to build your agent's image +# 4. The image is pushed to a registry and used to run your agent +build: + context: + # Root directory for the build context + root: ../ # Keep this as the default root + + # Paths to include in the Docker build context + # Must include: + # - Your agent's directory (your custom agent code) + # These paths are collected and sent to the Docker daemon for building + include_paths: + - {{ project_path_from_build_root }} + + # Path to your agent's Dockerfile + # This defines how your agent's image is built from the context + # Relative to the root directory + dockerfile: {{ project_path_from_build_root }}/Dockerfile + + # Path to your agent's .dockerignore + # Filters unnecessary files from the build context + # Helps keep build context small and builds fast + dockerignore: {{ project_path_from_build_root }}/.dockerignore + + +# Local Development Configuration +# ----------------------------- +# Only used when running the agent locally +local_development: + agent: + port: 8000 # Port where your local ACP server is running + host_address: host.docker.internal # Host address for Docker networking (host.docker.internal for Docker, localhost for direct) + + # File paths for local development (relative to this manifest.yaml) + paths: + # Path to ACP server file + # Examples: + # project/acp.py (standard) + # src/server.py (custom structure) + # ../shared/acp.py (shared across projects) + # /absolute/path/acp.py (absolute path) + acp: project/acp.py + + +# Agent Configuration +# ----------------- +agent: + acp_type: async + # Unique name for your agent + # Used for task routing and monitoring + name: {{ agent_name }} + + # Description of what your agent does + # Helps with documentation and discovery + description: {{ description }} + + # Temporal workflow configuration + # Set enabled: true to use Temporal workflows for long-running tasks + temporal: + enabled: false + + # Optional: Credentials mapping + # Maps Kubernetes secrets to environment variables + # Common credentials include: + credentials: [] # Update with your credentials + # - env_var_name: LITELLM_API_KEY + # secret_name: litellm-api-key + # secret_key: api-key + + # Optional: Set Environment variables for running your agent locally as well + # as for deployment later on + env: {} # Update with your environment variables + # LITELLM_API_KEY: "" + # OPENAI_BASE_URL: "" + # OPENAI_ORG_ID: "" + + +# Deployment Configuration +# ----------------------- +# Configuration for deploying your agent to Kubernetes clusters +deployment: + # Container image configuration + image: + repository: "" # Update with your container registry + tag: "latest" # Default tag, should be versioned in production + + imagePullSecrets: [] # Update with your image pull secret names + # - name: my-registry-secret + + # Global deployment settings that apply to all clusters + # These can be overridden in cluster-specific environments (environments.yaml) + global: + # Default replica count + replicaCount: 1 + + # Default resource requirements + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1000m" + memory: "2Gi" \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/default-openai-agents/project/acp.py.j2 b/src/agentex/lib/cli/templates/default-openai-agents/project/acp.py.j2 new file mode 100644 index 000000000..b430fa07d --- /dev/null +++ b/src/agentex/lib/cli/templates/default-openai-agents/project/acp.py.j2 @@ -0,0 +1,135 @@ +"""ACP handler for {{ agent_name }} — an async OpenAI Agents SDK agent. + +Uses the async ACP model with Redis streaming instead of HTTP yields. The +OpenAI Agents SDK run is wrapped in an ``OpenAITurn`` and pushed to the task +stream via ``UnifiedEmitter.auto_send_turn`` — the async delivery path of the +unified harness surface. ``auto_send_turn`` returns a ``TurnResult`` carrying +the accumulated final text and normalized usage. + +The agent and its tools are defined inline below so this template stays a +single, self-contained ``acp.py``. +""" + +from __future__ import annotations + +import os +from datetime import datetime + +from dotenv import load_dotenv + +load_dotenv() + +from agents import Agent, Runner, function_tool, set_tracing_disabled + +from agentex.lib import adk +from agentex.lib.types.acp import SendEventParams, CancelTaskParams, CreateTaskParams +from agentex.lib.types.fastacp import AsyncACPConfig +from agentex.lib.types.tracing import SGPTracingProcessorConfig +from agentex.lib.utils.logging import make_logger +from agentex.lib.sdk.fastacp.fastacp import FastACP +from agentex.lib.core.harness.emitter import UnifiedEmitter +from agentex.lib.adk.providers._modules.openai_turn import OpenAITurn +from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config + +# Disable the openai-agents SDK's native tracer so it doesn't ship traces to +# api.openai.com using OPENAI_API_KEY (which may be a LiteLLM proxy key). +# SGP tracing below still runs via the Agentex tracing manager. +set_tracing_disabled(True) + +logger = make_logger(__name__) + +# LiteLLM proxy auth: copy LITELLM_API_KEY to OPENAI_API_KEY for OpenAI client compatibility. +_litellm_key = os.environ.get("LITELLM_API_KEY") +if _litellm_key and not os.environ.get("OPENAI_API_KEY"): + os.environ["OPENAI_API_KEY"] = _litellm_key + +add_tracing_processor_config( + SGPTracingProcessorConfig( + sgp_api_key=os.environ.get("SGP_API_KEY", ""), + sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""), + sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""), + ) +) + +acp = FastACP.create( + acp_type="async", + config=AsyncACPConfig(type="base"), +) + +MODEL_NAME = "gpt-4o" +INSTRUCTIONS = """You are a helpful AI assistant with access to tools. + +Current date and time: {timestamp} + +Guidelines: +- Be concise and helpful +- Use the weather tool when the user asks about the weather +- Always report the real tool output back to the user +""" + + +@function_tool +def get_weather(city: str) -> str: + """Get the current weather for a city.""" + return f"The weather in {city} is sunny and 72°F" + + +def create_agent() -> Agent: + """Build and return the OpenAI Agents SDK agent with the weather tool.""" + return Agent( + name="{{ agent_name }}", + model=MODEL_NAME, + instructions=INSTRUCTIONS.format(timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S")), + tools=[get_weather], + ) + + +_agent = None + + +def get_agent() -> Agent: + global _agent + if _agent is None: + _agent = create_agent() + return _agent + + +@acp.on_task_create +async def handle_task_create(params: CreateTaskParams): + logger.info(f"Task created: {params.task.id}") + + +@acp.on_task_event_send +async def handle_task_event_send(params: SendEventParams): + """Handle each user message: run the agent and auto-send its turn.""" + agent = get_agent() + task_id = params.task.id + user_message = params.event.content.content + + logger.info(f"Processing message for task {task_id}") + + # Echo the user's message into the task history. + await adk.messages.create(task_id=task_id, content=params.event.content) + + async with adk.tracing.span( + trace_id=task_id, + task_id=task_id, + name="message", + input={"message": user_message}, + data={"__span_type__": "AGENT_WORKFLOW"}, + ) as turn_span: + result = Runner.run_streamed(starting_agent=agent, input=user_message) + turn = OpenAITurn(result=result, model=MODEL_NAME) + emitter = UnifiedEmitter( + task_id=task_id, + trace_id=task_id, + parent_span_id=turn_span.id if turn_span else None, + ) + turn_result = await emitter.auto_send_turn(turn) + if turn_span: + turn_span.output = {"final_output": turn_result.final_text} + + +@acp.on_task_cancel +async def handle_task_canceled(params: CancelTaskParams): + logger.info(f"Task canceled: {params.task.id}") diff --git a/src/agentex/lib/cli/templates/default-openai-agents/pyproject.toml.j2 b/src/agentex/lib/cli/templates/default-openai-agents/pyproject.toml.j2 new file mode 100644 index 000000000..4b9c7ed71 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-openai-agents/pyproject.toml.j2 @@ -0,0 +1,34 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "{{ project_name }}" +version = "0.1.0" +description = "{{ description }}" +requires-python = ">=3.12" +dependencies = [ + "agentex-sdk", + "scale-gp", + "openai-agents", + "python-dotenv>=1.0,<2", +] + +[project.optional-dependencies] +dev = [ + "pytest", + "black", + "isort", + "flake8", +] + +[tool.hatch.build.targets.wheel] +packages = ["project"] + +[tool.black] +line-length = 88 +target-version = ['py312'] + +[tool.isort] +profile = "black" +line_length = 88 diff --git a/src/agentex/lib/cli/templates/default-openai-agents/requirements.txt.j2 b/src/agentex/lib/cli/templates/default-openai-agents/requirements.txt.j2 new file mode 100644 index 000000000..14779c089 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-openai-agents/requirements.txt.j2 @@ -0,0 +1,11 @@ +# Install agentex-sdk from local path +agentex-sdk + +# Scale GenAI Platform Python SDK +scale-gp + +# OpenAI Agents SDK +openai-agents + +# Loads .env files for local development +python-dotenv>=1.0,<2 diff --git a/src/agentex/lib/cli/templates/default-pydantic-ai/project/acp.py.j2 b/src/agentex/lib/cli/templates/default-pydantic-ai/project/acp.py.j2 index 5692396b2..e5eabb20d 100644 --- a/src/agentex/lib/cli/templates/default-pydantic-ai/project/acp.py.j2 +++ b/src/agentex/lib/cli/templates/default-pydantic-ai/project/acp.py.j2 @@ -19,21 +19,19 @@ from dotenv import load_dotenv load_dotenv() -from project.agent import create_agent +from project.agent import MODEL_NAME, create_agent from pydantic_ai.run import AgentRunResultEvent from pydantic_ai.messages import ModelMessagesTypeAdapter import agentex.lib.adk as adk -from agentex.lib.adk import ( - stream_pydantic_ai_events, - create_pydantic_ai_tracing_handler, -) from agentex.protocol.acp import SendEventParams, CancelTaskParams, CreateTaskParams +from agentex.lib.core.harness import UnifiedEmitter from agentex.lib.types.fastacp import AsyncACPConfig from agentex.lib.types.tracing import SGPTracingProcessorConfig from agentex.lib.utils.logging import make_logger from agentex.lib.utils.model_utils import BaseModel from agentex.lib.sdk.fastacp.fastacp import FastACP +from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config logger = make_logger(__name__) @@ -125,15 +123,17 @@ async def handle_task_event_send(params: SendEventParams): input={"message": user_message}, data={"__span_type__": "AGENT_WORKFLOW"}, ) as turn_span: - tracing_handler = create_pydantic_ai_tracing_handler( + # Construct the UnifiedEmitter from the ACP context so tracing is + # automatic and messages are auto-sent to the task stream (Redis). + emitter = UnifiedEmitter( + task_id=task_id, trace_id=task_id, parent_span_id=turn_span.id if turn_span else None, - task_id=task_id, ) # Wrap the pydantic-ai event stream so we can capture the final # AgentRunResultEvent (which carries the full message list for the - # next turn) without changing the streaming-helper's signature. + # next turn) before forwarding events to the emitter. captured_messages: list[Any] = [] async def tee_messages(upstream) -> AsyncIterator[Any]: @@ -143,9 +143,8 @@ async def handle_task_event_send(params: SendEventParams): yield event async with agent.run_stream_events(user_message, message_history=previous_messages) as stream: - final_output = await stream_pydantic_ai_events( - tee_messages(stream), task_id, tracing_handler=tracing_handler - ) + turn = PydanticAITurn(tee_messages(stream), model=MODEL_NAME) + result = await emitter.auto_send_turn(turn) # Save the updated message history so the next turn picks up here. if captured_messages: @@ -158,7 +157,7 @@ async def handle_task_event_send(params: SendEventParams): ) if turn_span: - turn_span.output = {"final_output": final_output} + turn_span.output = {"final_output": result.final_text} @acp.on_task_cancel diff --git a/src/agentex/lib/cli/templates/sync-claude-code/.dockerignore.j2 b/src/agentex/lib/cli/templates/sync-claude-code/.dockerignore.j2 new file mode 100644 index 000000000..c2d7fca4d --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-claude-code/.dockerignore.j2 @@ -0,0 +1,43 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# Environments +.env** +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# IDE +.idea/ +.vscode/ +*.swp +*.swo + +# Git +.git +.gitignore + +# Misc +.DS_Store diff --git a/src/agentex/lib/cli/templates/sync-claude-code/.env.example.j2 b/src/agentex/lib/cli/templates/sync-claude-code/.env.example.j2 new file mode 100644 index 000000000..015f49ef7 --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-claude-code/.env.example.j2 @@ -0,0 +1,13 @@ +# {{ agent_name }} - Environment Variables +# Copy this file to .env and fill in the values + +# API key for your LLM provider +LITELLM_API_KEY= + +# LLM base URL (optional - override to use a different provider) +# OPENAI_BASE_URL= + +# SGP Configuration (optional - for tracing) +# SGP_API_KEY= +# SGP_ACCOUNT_ID= +# SGP_CLIENT_BASE_URL= diff --git a/src/agentex/lib/cli/templates/sync-claude-code/Dockerfile-uv.j2 b/src/agentex/lib/cli/templates/sync-claude-code/Dockerfile-uv.j2 new file mode 100644 index 000000000..582434ac9 --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-claude-code/Dockerfile-uv.j2 @@ -0,0 +1,47 @@ +# syntax=docker/dockerfile:1.3 +FROM python:3.12-slim +COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + htop \ + vim \ + curl \ + tar \ + python3-dev \ + postgresql-client \ + build-essential \ + libpq-dev \ + gcc \ + cmake \ + netcat-openbsd \ + nodejs \ + npm \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/** + +ENV UV_COMPILE_BYTECODE=1 +ENV UV_LINK_MODE=copy +ENV UV_HTTP_TIMEOUT=1000 + +WORKDIR /app/{{ project_path_from_build_root }} + +# Copy dependency files for layer caching +COPY {{ project_path_from_build_root }}/pyproject.toml {{ project_path_from_build_root }}/uv.lock ./ + +# Install dependencies (without project itself, for layer caching) +RUN --mount=type=cache,target=/root/.cache/uv \ + uv sync --locked --no-install-project --no-dev + +# Copy the project code +COPY {{ project_path_from_build_root }}/project ./project + +# Install the project +RUN --mount=type=cache,target=/root/.cache/uv \ + uv sync --locked --no-dev + +ENV PATH="/app/{{ project_path_from_build_root }}/.venv/bin:$PATH" +ENV PYTHONPATH=/app + +# Run the agent using uvicorn +CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/sync-claude-code/Dockerfile.j2 b/src/agentex/lib/cli/templates/sync-claude-code/Dockerfile.j2 new file mode 100644 index 000000000..4d9f41d45 --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-claude-code/Dockerfile.j2 @@ -0,0 +1,43 @@ +# syntax=docker/dockerfile:1.3 +FROM python:3.12-slim +COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + htop \ + vim \ + curl \ + tar \ + python3-dev \ + postgresql-client \ + build-essential \ + libpq-dev \ + gcc \ + cmake \ + netcat-openbsd \ + node \ + npm \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +RUN uv pip install --system --upgrade pip setuptools wheel + +ENV UV_HTTP_TIMEOUT=1000 + +# Copy just the requirements file to optimize caching +COPY {{ project_path_from_build_root }}/requirements.txt /app/{{ project_path_from_build_root }}/requirements.txt + +WORKDIR /app/{{ project_path_from_build_root }} + +# Install the required Python packages +RUN uv pip install --system -r requirements.txt + +# Copy the project code +COPY {{ project_path_from_build_root }}/project /app/{{ project_path_from_build_root }}/project + + +# Set environment variables +ENV PYTHONPATH=/app + +# Run the agent using uvicorn +CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/sync-claude-code/README.md.j2 b/src/agentex/lib/cli/templates/sync-claude-code/README.md.j2 new file mode 100644 index 000000000..7e38eddec --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-claude-code/README.md.j2 @@ -0,0 +1,64 @@ +# {{ agent_name }} - AgentEx Sync Claude Code Agent + +This template builds a **synchronous** agent that drives the **Claude Code CLI** +through the unified harness surface on AgentEx: +- Spawns `claude -p --output-format stream-json --verbose` as a local subprocess +- Wraps the CLI's stdout stream in a `ClaudeCodeTurn` +- Delivers canonical `StreamTaskMessage*` events via `UnifiedEmitter.yield_turn` + (the sync HTTP yield path) +- Tracing integration to SGP / AgentEx + +## Prerequisites + +- The `claude` CLI installed and on your `PATH` +- An `ANTHROPIC_API_KEY` (or equivalent credential) in your environment + +## Running the Agent + +```bash +agentex agents run --manifest manifest.yaml +``` + +## Project Structure + +``` +{{ project_name }}/ +├── project/ +│ ├── __init__.py +│ └── acp.py # ACP server, subprocess spawn, and message handler +├── Dockerfile +├── manifest.yaml +├── dev.ipynb +{% if use_uv %} +└── pyproject.toml +{% else %} +└── requirements.txt +{% endif %} +``` + +## Key Concepts + +### Sync ACP with the harness +The sync ACP model uses HTTP request/response. The `@acp.on_message_send` +handler spawns the Claude Code CLI and yields the harness events back to the +client as they arrive. + +### The unified harness surface +`ClaudeCodeTurn` + `UnifiedEmitter` are the unified harness surface. The turn +normalizes CLI output into canonical AgentEx events; the emitter traces and +delivers them. + +## Development + +### 1. Customize the subprocess +Edit `_spawn_claude` in `project/acp.py` to change the CLI flags, working +directory, or how the prompt is delivered. + +### 2. Configure Credentials +Set your credentials via `manifest.yaml`, an exported environment variable, or a +`.env` file in the project directory. + +### 3. Run Locally +```bash +export ENVIRONMENT=development && agentex agents run --manifest manifest.yaml +``` diff --git a/src/agentex/lib/cli/templates/sync-claude-code/dev.ipynb.j2 b/src/agentex/lib/cli/templates/sync-claude-code/dev.ipynb.j2 new file mode 100644 index 000000000..d8c10a65a --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-claude-code/dev.ipynb.j2 @@ -0,0 +1,167 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "36834357", + "metadata": {}, + "outputs": [], + "source": [ + "from agentex import Agentex\n", + "\n", + "client = Agentex(base_url=\"http://localhost:5003\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d1c309d6", + "metadata": {}, + "outputs": [], + "source": [ + "AGENT_NAME = \"{{ agent_name }}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9f6e6ef0", + "metadata": {}, + "outputs": [], + "source": [ + "# # (Optional) Create a new task. If you don't create a new task, each message will be sent to a new task. The server will create the task for you.\n", + "\n", + "# import uuid\n", + "\n", + "# TASK_ID = str(uuid.uuid4())[:8]\n", + "\n", + "# rpc_response = client.agents.rpc_by_name(\n", + "# agent_name=AGENT_NAME,\n", + "# method=\"task/create\",\n", + "# params={\n", + "# \"name\": f\"{TASK_ID}-task\",\n", + "# \"params\": {}\n", + "# }\n", + "# )\n", + "\n", + "# task = rpc_response.result\n", + "# print(task)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b03b0d37", + "metadata": {}, + "outputs": [], + "source": [ + "# Test non streaming response\n", + "from agentex.types import TextContent\n", + "\n", + "# The response is expected to be a list of TaskMessage objects, which is a union of the following types:\n", + "# - TextContent: A message with just text content \n", + "# - DataContent: A message with JSON-serializable data content\n", + "# - ToolRequestContent: A message with a tool request, which contains a JSON-serializable request to call a tool\n", + "# - ToolResponseContent: A message with a tool response, which contains response object from a tool call in its content\n", + "\n", + "# When processing the message/send response, if you are expecting more than TextContent, such as DataContent, ToolRequestContent, or ToolResponseContent, you can process them as well\n", + "\n", + "rpc_response = client.agents.send_message(\n", + " agent_name=AGENT_NAME,\n", + " params={\n", + " \"content\": {\"type\": \"text\", \"author\": \"user\", \"content\": \"Hello what can you do?\"},\n", + " \"stream\": False\n", + " }\n", + ")\n", + "\n", + "if not rpc_response or not rpc_response.result:\n", + " raise ValueError(\"No result in response\")\n", + "\n", + "# Extract and print just the text content from the response\n", + "for task_message in rpc_response.result:\n", + " content = task_message.content\n", + " if isinstance(content, TextContent):\n", + " text = content.content\n", + " print(text)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "79688331", + "metadata": {}, + "outputs": [], + "source": [ + "# Test streaming response\n", + "from agentex.types.task_message_update import StreamTaskMessageDelta, StreamTaskMessageFull\n", + "from agentex.types.text_delta import TextDelta\n", + "\n", + "\n", + "# The result object of message/send will be a TaskMessageUpdate which is a union of the following types:\n", + "# - StreamTaskMessageStart: \n", + "# - An indicator that a streaming message was started, doesn't contain any useful content\n", + "# - StreamTaskMessageDelta: \n", + "# - A delta of a streaming message, contains the text delta to aggregate\n", + "# - StreamTaskMessageDone: \n", + "# - An indicator that a streaming message was done, doesn't contain any useful content\n", + "# - StreamTaskMessageFull: \n", + "# - A non-streaming message, there is nothing to aggregate, since this contains the full message, not deltas\n", + "\n", + "# Whenn processing StreamTaskMessageDelta, if you are expecting more than TextDeltas, such as DataDelta, ToolRequestDelta, or ToolResponseDelta, you can process them as well\n", + "# Whenn processing StreamTaskMessageFull, if you are expecting more than TextContent, such as DataContent, ToolRequestContent, or ToolResponseContent, you can process them as well\n", + "\n", + "for agent_rpc_response_chunk in client.agents.send_message_stream(\n", + " agent_name=AGENT_NAME,\n", + " params={\n", + " \"content\": {\"type\": \"text\", \"author\": \"user\", \"content\": \"Hello what can you do?\"},\n", + " \"stream\": True\n", + " }\n", + "):\n", + " # We know that the result of the message/send when stream is set to True will be a TaskMessageUpdate\n", + " task_message_update = agent_rpc_response_chunk.result\n", + " # Print oly the text deltas as they arrive or any full messages\n", + " if isinstance(task_message_update, StreamTaskMessageDelta):\n", + " delta = task_message_update.delta\n", + " if isinstance(delta, TextDelta):\n", + " print(delta.text_delta, end=\"\", flush=True)\n", + " else:\n", + " print(f\"Found non-text {type(task_message)} object in streaming message.\")\n", + " elif isinstance(task_message_update, StreamTaskMessageFull):\n", + " content = task_message_update.content\n", + " if isinstance(content, TextContent):\n", + " print(content.content)\n", + " else:\n", + " print(f\"Found non-text {type(task_message)} object in full message.\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c5e7e042", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/src/agentex/lib/cli/templates/sync-claude-code/environments.yaml.j2 b/src/agentex/lib/cli/templates/sync-claude-code/environments.yaml.j2 new file mode 100644 index 000000000..73924abdd --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-claude-code/environments.yaml.j2 @@ -0,0 +1,53 @@ +# Agent Environment Configuration +# ------------------------------ +# This file defines environment-specific settings for your agent. +# This DIFFERS from the manifest.yaml file in that it is used to program things that are ONLY per environment. + +# ********** EXAMPLE ********** +# schema_version: "v1" # This is used to validate the file structure and is not used by the agentex CLI +# environments: +# dev: +# auth: +# principal: +# user_id: "1234567890" +# user_name: "John Doe" +# user_email: "john.doe@example.com" +# user_role: "admin" +# user_permissions: "read, write, delete" +# helm_overrides: # This is used to override the global helm values.yaml file in the agentex-agent helm charts +# replicas: 3 +# resources: +# requests: +# cpu: "1000m" +# memory: "2Gi" +# limits: +# cpu: "2000m" +# memory: "4Gi" +# env: +# - name: LOG_LEVEL +# value: "DEBUG" +# - name: ENVIRONMENT +# value: "staging" +# kubernetes: +# # OPTIONAL - Otherwise it will be derived from separately. However, this can be used to override the derived +# # namespace and deploy it with in the same namespace that already exists for a separate agent. +# namespace: "team-{{agent_name}}" +# ********** END EXAMPLE ********** + +schema_version: "v1" # This is used to validate the file structure and is not used by the agentex CLI +environments: + dev: + auth: + principal: + user_id: # TODO: Fill in + account_id: # TODO: Fill in + helm_overrides: + replicaCount: 2 + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1000m" + memory: "2Gi" + diff --git a/src/agentex/lib/cli/templates/sync-claude-code/manifest.yaml.j2 b/src/agentex/lib/cli/templates/sync-claude-code/manifest.yaml.j2 new file mode 100644 index 000000000..7bf2cb355 --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-claude-code/manifest.yaml.j2 @@ -0,0 +1,117 @@ +# Agent Manifest Configuration +# --------------------------- +# This file defines how your agent should be built and deployed. + +# Build Configuration +# ------------------ +# The build config defines what gets packaged into your agent's Docker image. +# This same configuration is used whether building locally or remotely. +# +# When building: +# 1. All files from include_paths are collected into a build context +# 2. The context is filtered by dockerignore rules +# 3. The Dockerfile uses this context to build your agent's image +# 4. The image is pushed to a registry and used to run your agent +build: + context: + # Root directory for the build context + root: ../ # Keep this as the default root + + # Paths to include in the Docker build context + # Must include: + # - Your agent's directory (your custom agent code) + # These paths are collected and sent to the Docker daemon for building + include_paths: + - {{ project_path_from_build_root }} + + # Path to your agent's Dockerfile + # This defines how your agent's image is built from the context + # Relative to the root directory + dockerfile: {{ project_path_from_build_root }}/Dockerfile + + # Path to your agent's .dockerignore + # Filters unnecessary files from the build context + # Helps keep build context small and builds fast + dockerignore: {{ project_path_from_build_root }}/.dockerignore + + +# Local Development Configuration +# ----------------------------- +# Only used when running the agent locally +local_development: + agent: + port: 8000 # Port where your local ACP server is running + host_address: host.docker.internal # Host address for Docker networking (host.docker.internal for Docker, localhost for direct) + + # File paths for local development (relative to this manifest.yaml) + paths: + # Path to ACP server file + # Examples: + # project/acp.py (standard) + # src/server.py (custom structure) + # ../shared/acp.py (shared across projects) + # /absolute/path/acp.py (absolute path) + acp: project/acp.py + + +# Agent Configuration +# ----------------- +agent: + acp_type: sync + # Unique name for your agent + # Used for task routing and monitoring + name: {{ agent_name }} + + # Description of what your agent does + # Helps with documentation and discovery + description: {{ description }} + + # Temporal workflow configuration + # Set enabled: true to use Temporal workflows for long-running tasks + temporal: + enabled: false + + # Optional: Credentials mapping + # Maps Kubernetes secrets to environment variables + # Common credentials include: + credentials: + - env_var_name: LITELLM_API_KEY + secret_name: litellm-api-key + secret_key: api-key + - env_var_name: SGP_API_KEY + secret_name: sgp-api-key + secret_key: api-key + + # Optional: Set Environment variables for running your agent locally as well + # as for deployment later on + env: + LITELLM_API_KEY: "" # Set your LLM API key + # OPENAI_BASE_URL: "" + + +# Deployment Configuration +# ----------------------- +# Configuration for deploying your agent to Kubernetes clusters +deployment: + # Container image configuration + image: + repository: "" # Update with your container registry + tag: "latest" # Default tag, should be versioned in production + + imagePullSecrets: [] # Update with your image pull secret names + # - name: my-registry-secret + + # Global deployment settings that apply to all clusters + # These can be overridden in cluster-specific environments (environments.yaml) + global: + # Default replica count + replicaCount: 1 + + # Default resource requirements + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1000m" + memory: "2Gi" \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/sync-claude-code/project/acp.py.j2 b/src/agentex/lib/cli/templates/sync-claude-code/project/acp.py.j2 new file mode 100644 index 000000000..95b370761 --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-claude-code/project/acp.py.j2 @@ -0,0 +1,135 @@ +"""ACP handler for {{ agent_name }} — a sync Claude Code agent. + +Spawns ``claude -p --output-format stream-json --verbose`` as a LOCAL +asyncio subprocess (no Scale sandbox — that is a production concern). Stdout +lines are fed into ``ClaudeCodeTurn``, which wraps +``convert_claude_code_to_agentex_events``. Events are delivered via +``UnifiedEmitter.yield_turn``, the sync HTTP yield path. + +Live runs require the ``claude`` CLI to be installed and an +ANTHROPIC_API_KEY (or equivalent credential) to be in the environment. +""" + +from __future__ import annotations + +import os +import asyncio +from typing import AsyncIterator, AsyncGenerator + +from dotenv import load_dotenv + +load_dotenv() + +import agentex.lib.adk as adk +from agentex.lib.adk import ClaudeCodeTurn +from agentex.lib.types.acp import SendMessageParams +from agentex.lib.core.harness import UnifiedEmitter +from agentex.lib.types.tracing import SGPTracingProcessorConfig +from agentex.lib.utils.logging import make_logger +from agentex.lib.sdk.fastacp.fastacp import FastACP +from agentex.types.task_message_update import TaskMessageUpdate +from agentex.types.task_message_content import TaskMessageContent +from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config + +logger = make_logger(__name__) + +add_tracing_processor_config( + SGPTracingProcessorConfig( + sgp_api_key=os.environ.get("SGP_API_KEY", ""), + sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""), + sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""), + ) +) + +acp = FastACP.create(acp_type="sync") + + +async def _spawn_claude(prompt: str) -> AsyncIterator[str]: + """Spawn ``claude -p --output-format stream-json`` locally and yield stdout lines. + + This is a seam: tests can replace it with a fake async iterator of + pre-recorded lines so no real CLI invocation is needed offline. + """ + proc = await asyncio.create_subprocess_exec( + "claude", + "-p", + "--output-format", + "stream-json", + "--verbose", + stdin=asyncio.subprocess.PIPE, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + assert proc.stdout is not None + assert proc.stdin is not None + + proc.stdin.write(prompt.encode()) + proc.stdin.close() + + # Drain stderr concurrently. With --verbose, Claude Code can write enough to + # stderr to fill the OS pipe buffer; if we only read stdout, the CLI blocks + # on its stderr write while we block reading stdout — a deadlock. A + # background task keeps stderr flowing so stdout never stalls. + async def _drain_stderr() -> None: + assert proc.stderr is not None + async for _ in proc.stderr: + pass + + stderr_task = asyncio.create_task(_drain_stderr()) + + try: + buffer = "" + async for chunk in proc.stdout: + buffer += chunk.decode("utf-8", errors="replace") + while "\n" in buffer: + line, buffer = buffer.split("\n", 1) + line = line.strip() + if line: + yield line + + if buffer.strip(): + yield buffer.strip() + + await proc.wait() + finally: + # Release the subprocess and stderr drain task even if the consumer + # abandons the generator early (task cancellation / client disconnect): + # cancel the drain task and terminate+reap the process if it is still + # running, so neither is leaked. + stderr_task.cancel() + try: + await stderr_task + except asyncio.CancelledError: + pass + if proc.returncode is None: + try: + proc.terminate() + except ProcessLookupError: + pass + await proc.wait() + + +@acp.on_message_send +async def handle_message_send( + params: SendMessageParams, +) -> TaskMessageContent | list[TaskMessageContent] | AsyncGenerator[TaskMessageUpdate, None]: + """Handle an incoming message: run Claude Code locally and stream events.""" + task_id = params.task.id + prompt = params.content.content + logger.info("Processing message for task %s", task_id) + + async with adk.tracing.span( + trace_id=task_id, + task_id=task_id, + name="message", + input={"message": prompt}, + data={"__span_type__": "AGENT_WORKFLOW"}, + ) as turn_span: + emitter = UnifiedEmitter( + task_id=task_id, + trace_id=task_id, + parent_span_id=turn_span.id if turn_span else None, + ) + turn = ClaudeCodeTurn(_spawn_claude(prompt)) + async for event in emitter.yield_turn(turn): + yield event diff --git a/src/agentex/lib/cli/templates/sync-claude-code/pyproject.toml.j2 b/src/agentex/lib/cli/templates/sync-claude-code/pyproject.toml.j2 new file mode 100644 index 000000000..e499b1dc1 --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-claude-code/pyproject.toml.j2 @@ -0,0 +1,33 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "{{ project_name }}" +version = "0.1.0" +description = "{{ description }}" +requires-python = ">=3.12" +dependencies = [ + "agentex-sdk", + "scale-gp", + "python-dotenv>=1.0,<2", +] + +[project.optional-dependencies] +dev = [ + "pytest", + "black", + "isort", + "flake8", +] + +[tool.hatch.build.targets.wheel] +packages = ["project"] + +[tool.black] +line-length = 88 +target-version = ['py312'] + +[tool.isort] +profile = "black" +line_length = 88 diff --git a/src/agentex/lib/cli/templates/sync-claude-code/requirements.txt.j2 b/src/agentex/lib/cli/templates/sync-claude-code/requirements.txt.j2 new file mode 100644 index 000000000..8c0630384 --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-claude-code/requirements.txt.j2 @@ -0,0 +1,8 @@ +# Install agentex-sdk from local path +agentex-sdk + +# Scale GenAI Platform Python SDK +scale-gp + +# Loads .env files for local development +python-dotenv>=1.0,<2 diff --git a/src/agentex/lib/cli/templates/sync-codex/.dockerignore.j2 b/src/agentex/lib/cli/templates/sync-codex/.dockerignore.j2 new file mode 100644 index 000000000..c2d7fca4d --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-codex/.dockerignore.j2 @@ -0,0 +1,43 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# Environments +.env** +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# IDE +.idea/ +.vscode/ +*.swp +*.swo + +# Git +.git +.gitignore + +# Misc +.DS_Store diff --git a/src/agentex/lib/cli/templates/sync-codex/.env.example.j2 b/src/agentex/lib/cli/templates/sync-codex/.env.example.j2 new file mode 100644 index 000000000..015f49ef7 --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-codex/.env.example.j2 @@ -0,0 +1,13 @@ +# {{ agent_name }} - Environment Variables +# Copy this file to .env and fill in the values + +# API key for your LLM provider +LITELLM_API_KEY= + +# LLM base URL (optional - override to use a different provider) +# OPENAI_BASE_URL= + +# SGP Configuration (optional - for tracing) +# SGP_API_KEY= +# SGP_ACCOUNT_ID= +# SGP_CLIENT_BASE_URL= diff --git a/src/agentex/lib/cli/templates/sync-codex/Dockerfile-uv.j2 b/src/agentex/lib/cli/templates/sync-codex/Dockerfile-uv.j2 new file mode 100644 index 000000000..582434ac9 --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-codex/Dockerfile-uv.j2 @@ -0,0 +1,47 @@ +# syntax=docker/dockerfile:1.3 +FROM python:3.12-slim +COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + htop \ + vim \ + curl \ + tar \ + python3-dev \ + postgresql-client \ + build-essential \ + libpq-dev \ + gcc \ + cmake \ + netcat-openbsd \ + nodejs \ + npm \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/** + +ENV UV_COMPILE_BYTECODE=1 +ENV UV_LINK_MODE=copy +ENV UV_HTTP_TIMEOUT=1000 + +WORKDIR /app/{{ project_path_from_build_root }} + +# Copy dependency files for layer caching +COPY {{ project_path_from_build_root }}/pyproject.toml {{ project_path_from_build_root }}/uv.lock ./ + +# Install dependencies (without project itself, for layer caching) +RUN --mount=type=cache,target=/root/.cache/uv \ + uv sync --locked --no-install-project --no-dev + +# Copy the project code +COPY {{ project_path_from_build_root }}/project ./project + +# Install the project +RUN --mount=type=cache,target=/root/.cache/uv \ + uv sync --locked --no-dev + +ENV PATH="/app/{{ project_path_from_build_root }}/.venv/bin:$PATH" +ENV PYTHONPATH=/app + +# Run the agent using uvicorn +CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/sync-codex/Dockerfile.j2 b/src/agentex/lib/cli/templates/sync-codex/Dockerfile.j2 new file mode 100644 index 000000000..4d9f41d45 --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-codex/Dockerfile.j2 @@ -0,0 +1,43 @@ +# syntax=docker/dockerfile:1.3 +FROM python:3.12-slim +COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + htop \ + vim \ + curl \ + tar \ + python3-dev \ + postgresql-client \ + build-essential \ + libpq-dev \ + gcc \ + cmake \ + netcat-openbsd \ + node \ + npm \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +RUN uv pip install --system --upgrade pip setuptools wheel + +ENV UV_HTTP_TIMEOUT=1000 + +# Copy just the requirements file to optimize caching +COPY {{ project_path_from_build_root }}/requirements.txt /app/{{ project_path_from_build_root }}/requirements.txt + +WORKDIR /app/{{ project_path_from_build_root }} + +# Install the required Python packages +RUN uv pip install --system -r requirements.txt + +# Copy the project code +COPY {{ project_path_from_build_root }}/project /app/{{ project_path_from_build_root }}/project + + +# Set environment variables +ENV PYTHONPATH=/app + +# Run the agent using uvicorn +CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/sync-codex/README.md.j2 b/src/agentex/lib/cli/templates/sync-codex/README.md.j2 new file mode 100644 index 000000000..4ca1aeccf --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-codex/README.md.j2 @@ -0,0 +1,67 @@ +# {{ agent_name }} - AgentEx Sync Codex Agent + +This template builds a **synchronous** agent that drives the **Codex CLI** +through the unified harness surface on AgentEx: +- Spawns `codex exec --json` as a local subprocess +- Wraps the CLI's stdout stream in a `CodexTurn` +- Delivers canonical `StreamTaskMessage*` events via `UnifiedEmitter.yield_turn` + (the sync HTTP yield path) +- Tracing integration to SGP / AgentEx + +## Prerequisites + +- The `codex` CLI installed and on your `PATH` (`npm install -g @openai/codex`) +- An `OPENAI_API_KEY` in your environment + +## Running the Agent + +```bash +agentex agents run --manifest manifest.yaml +``` + +## Project Structure + +``` +{{ project_name }}/ +├── project/ +│ ├── __init__.py +│ └── acp.py # ACP server, subprocess spawn, and message handler +├── Dockerfile +├── manifest.yaml +├── dev.ipynb +{% if use_uv %} +└── pyproject.toml +{% else %} +└── requirements.txt +{% endif %} +``` + +## Key Concepts + +### Sync ACP with the harness +The sync ACP model uses HTTP request/response. The `@acp.on_message_send` +handler spawns the Codex CLI and yields the harness events back to the client +as they arrive. + +### The unified harness surface +`CodexTurn` + `UnifiedEmitter` are the unified harness surface. The turn +normalizes CLI output into canonical AgentEx events; the emitter traces and +delivers them. + +## Development + +### 1. Choose a model +Set `CODEX_MODEL` (defaults to `o4-mini`) to control which model codex uses. + +### 2. Customize the subprocess +Edit `_spawn_codex` in `project/acp.py` to change the CLI flags or how the +prompt is delivered. + +### 3. Configure Credentials +Set your credentials via `manifest.yaml`, an exported environment variable, or a +`.env` file in the project directory. + +### 4. Run Locally +```bash +export ENVIRONMENT=development && agentex agents run --manifest manifest.yaml +``` diff --git a/src/agentex/lib/cli/templates/sync-codex/dev.ipynb.j2 b/src/agentex/lib/cli/templates/sync-codex/dev.ipynb.j2 new file mode 100644 index 000000000..d8c10a65a --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-codex/dev.ipynb.j2 @@ -0,0 +1,167 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "36834357", + "metadata": {}, + "outputs": [], + "source": [ + "from agentex import Agentex\n", + "\n", + "client = Agentex(base_url=\"http://localhost:5003\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d1c309d6", + "metadata": {}, + "outputs": [], + "source": [ + "AGENT_NAME = \"{{ agent_name }}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9f6e6ef0", + "metadata": {}, + "outputs": [], + "source": [ + "# # (Optional) Create a new task. If you don't create a new task, each message will be sent to a new task. The server will create the task for you.\n", + "\n", + "# import uuid\n", + "\n", + "# TASK_ID = str(uuid.uuid4())[:8]\n", + "\n", + "# rpc_response = client.agents.rpc_by_name(\n", + "# agent_name=AGENT_NAME,\n", + "# method=\"task/create\",\n", + "# params={\n", + "# \"name\": f\"{TASK_ID}-task\",\n", + "# \"params\": {}\n", + "# }\n", + "# )\n", + "\n", + "# task = rpc_response.result\n", + "# print(task)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b03b0d37", + "metadata": {}, + "outputs": [], + "source": [ + "# Test non streaming response\n", + "from agentex.types import TextContent\n", + "\n", + "# The response is expected to be a list of TaskMessage objects, which is a union of the following types:\n", + "# - TextContent: A message with just text content \n", + "# - DataContent: A message with JSON-serializable data content\n", + "# - ToolRequestContent: A message with a tool request, which contains a JSON-serializable request to call a tool\n", + "# - ToolResponseContent: A message with a tool response, which contains response object from a tool call in its content\n", + "\n", + "# When processing the message/send response, if you are expecting more than TextContent, such as DataContent, ToolRequestContent, or ToolResponseContent, you can process them as well\n", + "\n", + "rpc_response = client.agents.send_message(\n", + " agent_name=AGENT_NAME,\n", + " params={\n", + " \"content\": {\"type\": \"text\", \"author\": \"user\", \"content\": \"Hello what can you do?\"},\n", + " \"stream\": False\n", + " }\n", + ")\n", + "\n", + "if not rpc_response or not rpc_response.result:\n", + " raise ValueError(\"No result in response\")\n", + "\n", + "# Extract and print just the text content from the response\n", + "for task_message in rpc_response.result:\n", + " content = task_message.content\n", + " if isinstance(content, TextContent):\n", + " text = content.content\n", + " print(text)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "79688331", + "metadata": {}, + "outputs": [], + "source": [ + "# Test streaming response\n", + "from agentex.types.task_message_update import StreamTaskMessageDelta, StreamTaskMessageFull\n", + "from agentex.types.text_delta import TextDelta\n", + "\n", + "\n", + "# The result object of message/send will be a TaskMessageUpdate which is a union of the following types:\n", + "# - StreamTaskMessageStart: \n", + "# - An indicator that a streaming message was started, doesn't contain any useful content\n", + "# - StreamTaskMessageDelta: \n", + "# - A delta of a streaming message, contains the text delta to aggregate\n", + "# - StreamTaskMessageDone: \n", + "# - An indicator that a streaming message was done, doesn't contain any useful content\n", + "# - StreamTaskMessageFull: \n", + "# - A non-streaming message, there is nothing to aggregate, since this contains the full message, not deltas\n", + "\n", + "# Whenn processing StreamTaskMessageDelta, if you are expecting more than TextDeltas, such as DataDelta, ToolRequestDelta, or ToolResponseDelta, you can process them as well\n", + "# Whenn processing StreamTaskMessageFull, if you are expecting more than TextContent, such as DataContent, ToolRequestContent, or ToolResponseContent, you can process them as well\n", + "\n", + "for agent_rpc_response_chunk in client.agents.send_message_stream(\n", + " agent_name=AGENT_NAME,\n", + " params={\n", + " \"content\": {\"type\": \"text\", \"author\": \"user\", \"content\": \"Hello what can you do?\"},\n", + " \"stream\": True\n", + " }\n", + "):\n", + " # We know that the result of the message/send when stream is set to True will be a TaskMessageUpdate\n", + " task_message_update = agent_rpc_response_chunk.result\n", + " # Print oly the text deltas as they arrive or any full messages\n", + " if isinstance(task_message_update, StreamTaskMessageDelta):\n", + " delta = task_message_update.delta\n", + " if isinstance(delta, TextDelta):\n", + " print(delta.text_delta, end=\"\", flush=True)\n", + " else:\n", + " print(f\"Found non-text {type(task_message)} object in streaming message.\")\n", + " elif isinstance(task_message_update, StreamTaskMessageFull):\n", + " content = task_message_update.content\n", + " if isinstance(content, TextContent):\n", + " print(content.content)\n", + " else:\n", + " print(f\"Found non-text {type(task_message)} object in full message.\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c5e7e042", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/src/agentex/lib/cli/templates/sync-codex/environments.yaml.j2 b/src/agentex/lib/cli/templates/sync-codex/environments.yaml.j2 new file mode 100644 index 000000000..73924abdd --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-codex/environments.yaml.j2 @@ -0,0 +1,53 @@ +# Agent Environment Configuration +# ------------------------------ +# This file defines environment-specific settings for your agent. +# This DIFFERS from the manifest.yaml file in that it is used to program things that are ONLY per environment. + +# ********** EXAMPLE ********** +# schema_version: "v1" # This is used to validate the file structure and is not used by the agentex CLI +# environments: +# dev: +# auth: +# principal: +# user_id: "1234567890" +# user_name: "John Doe" +# user_email: "john.doe@example.com" +# user_role: "admin" +# user_permissions: "read, write, delete" +# helm_overrides: # This is used to override the global helm values.yaml file in the agentex-agent helm charts +# replicas: 3 +# resources: +# requests: +# cpu: "1000m" +# memory: "2Gi" +# limits: +# cpu: "2000m" +# memory: "4Gi" +# env: +# - name: LOG_LEVEL +# value: "DEBUG" +# - name: ENVIRONMENT +# value: "staging" +# kubernetes: +# # OPTIONAL - Otherwise it will be derived from separately. However, this can be used to override the derived +# # namespace and deploy it with in the same namespace that already exists for a separate agent. +# namespace: "team-{{agent_name}}" +# ********** END EXAMPLE ********** + +schema_version: "v1" # This is used to validate the file structure and is not used by the agentex CLI +environments: + dev: + auth: + principal: + user_id: # TODO: Fill in + account_id: # TODO: Fill in + helm_overrides: + replicaCount: 2 + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1000m" + memory: "2Gi" + diff --git a/src/agentex/lib/cli/templates/sync-codex/manifest.yaml.j2 b/src/agentex/lib/cli/templates/sync-codex/manifest.yaml.j2 new file mode 100644 index 000000000..7bf2cb355 --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-codex/manifest.yaml.j2 @@ -0,0 +1,117 @@ +# Agent Manifest Configuration +# --------------------------- +# This file defines how your agent should be built and deployed. + +# Build Configuration +# ------------------ +# The build config defines what gets packaged into your agent's Docker image. +# This same configuration is used whether building locally or remotely. +# +# When building: +# 1. All files from include_paths are collected into a build context +# 2. The context is filtered by dockerignore rules +# 3. The Dockerfile uses this context to build your agent's image +# 4. The image is pushed to a registry and used to run your agent +build: + context: + # Root directory for the build context + root: ../ # Keep this as the default root + + # Paths to include in the Docker build context + # Must include: + # - Your agent's directory (your custom agent code) + # These paths are collected and sent to the Docker daemon for building + include_paths: + - {{ project_path_from_build_root }} + + # Path to your agent's Dockerfile + # This defines how your agent's image is built from the context + # Relative to the root directory + dockerfile: {{ project_path_from_build_root }}/Dockerfile + + # Path to your agent's .dockerignore + # Filters unnecessary files from the build context + # Helps keep build context small and builds fast + dockerignore: {{ project_path_from_build_root }}/.dockerignore + + +# Local Development Configuration +# ----------------------------- +# Only used when running the agent locally +local_development: + agent: + port: 8000 # Port where your local ACP server is running + host_address: host.docker.internal # Host address for Docker networking (host.docker.internal for Docker, localhost for direct) + + # File paths for local development (relative to this manifest.yaml) + paths: + # Path to ACP server file + # Examples: + # project/acp.py (standard) + # src/server.py (custom structure) + # ../shared/acp.py (shared across projects) + # /absolute/path/acp.py (absolute path) + acp: project/acp.py + + +# Agent Configuration +# ----------------- +agent: + acp_type: sync + # Unique name for your agent + # Used for task routing and monitoring + name: {{ agent_name }} + + # Description of what your agent does + # Helps with documentation and discovery + description: {{ description }} + + # Temporal workflow configuration + # Set enabled: true to use Temporal workflows for long-running tasks + temporal: + enabled: false + + # Optional: Credentials mapping + # Maps Kubernetes secrets to environment variables + # Common credentials include: + credentials: + - env_var_name: LITELLM_API_KEY + secret_name: litellm-api-key + secret_key: api-key + - env_var_name: SGP_API_KEY + secret_name: sgp-api-key + secret_key: api-key + + # Optional: Set Environment variables for running your agent locally as well + # as for deployment later on + env: + LITELLM_API_KEY: "" # Set your LLM API key + # OPENAI_BASE_URL: "" + + +# Deployment Configuration +# ----------------------- +# Configuration for deploying your agent to Kubernetes clusters +deployment: + # Container image configuration + image: + repository: "" # Update with your container registry + tag: "latest" # Default tag, should be versioned in production + + imagePullSecrets: [] # Update with your image pull secret names + # - name: my-registry-secret + + # Global deployment settings that apply to all clusters + # These can be overridden in cluster-specific environments (environments.yaml) + global: + # Default replica count + replicaCount: 1 + + # Default resource requirements + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1000m" + memory: "2Gi" \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/sync-codex/project/acp.py.j2 b/src/agentex/lib/cli/templates/sync-codex/project/acp.py.j2 new file mode 100644 index 000000000..931385328 --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-codex/project/acp.py.j2 @@ -0,0 +1,174 @@ +"""Sync ACP handler for {{ agent_name }} — a Codex CLI harness agent. + +Demonstrates the ``convert_codex_to_agentex_events`` tap + ``CodexTurn`` + +``UnifiedEmitter`` for a sync (HTTP-yield) ACP agent. + +The handler: +1. Spawns ``codex exec --json`` as a LOCAL asyncio subprocess (no sandbox). + This is correct for local development; production isolation is a separate + concern. +2. Wraps the stdout line stream in a ``CodexTurn``. +3. Delivers every canonical ``StreamTaskMessage*`` event via + ``UnifiedEmitter.yield_turn``, which traces + yields each event back to + the HTTP caller in one pass. + +Live runs require: +- ``codex`` CLI on PATH (``npm install -g @openai/codex``) +- ``OPENAI_API_KEY`` set in the environment +""" + +from __future__ import annotations + +import os +import time +import codecs +import asyncio +from typing import AsyncGenerator +from collections.abc import AsyncIterator + +from dotenv import load_dotenv + +load_dotenv() + +import agentex.lib.adk as adk +from agentex.lib.adk import CodexTurn +from agentex.lib.types.acp import SendMessageParams +from agentex.lib.core.harness import UnifiedEmitter +from agentex.lib.types.tracing import SGPTracingProcessorConfig +from agentex.lib.utils.logging import make_logger +from agentex.lib.sdk.fastacp.fastacp import FastACP +from agentex.types.task_message_update import TaskMessageUpdate +from agentex.types.task_message_content import TaskMessageContent +from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config + +logger = make_logger(__name__) + +add_tracing_processor_config( + SGPTracingProcessorConfig( + sgp_api_key=os.environ.get("SGP_API_KEY", ""), + sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""), + sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""), + ) +) + +acp = FastACP.create(acp_type="sync") + +MODEL = os.environ.get("CODEX_MODEL", "o4-mini") + + +async def _spawn_codex(model: str) -> asyncio.subprocess.Process: + """Spawn ``codex exec --json`` locally and return the live process. + + Injection seam: tests replace this function with a fake that returns a + mock process whose stdout yields pre-recorded event lines. + + The flags: + --json machine-readable newline-delimited events + --skip-git-repo-check safe to run outside a git repo + --dangerously-bypass-approvals-and-sandbox + skip interactive approval prompts in a + non-interactive (server) context + --model which OpenAI model to use + + The caller writes the prompt to stdin after the process starts, then + closes stdin so codex knows input is complete. + """ + cmd = [ + "codex", + "exec", + "--json", + "--skip-git-repo-check", + "--dangerously-bypass-approvals-and-sandbox", + "--model", + model, + "-", # read prompt from stdin + ] + return await asyncio.create_subprocess_exec( + *cmd, + stdin=asyncio.subprocess.PIPE, + stdout=asyncio.subprocess.PIPE, + # Discard stderr: codex --json writes events to stdout; its stderr is + # progress/debug noise. Capturing it with PIPE but never reading it + # would deadlock once codex fills the OS pipe buffer (~64 KB). + stderr=asyncio.subprocess.DEVNULL, + env={**os.environ}, + ) + + +async def _process_stdout(process: asyncio.subprocess.Process) -> AsyncIterator[str]: + """Yield newline-delimited JSON lines from the process stdout. + + Uses an incremental UTF-8 decoder so a multibyte character split across two + 4 KB reads is decoded correctly instead of being corrupted at the boundary. + """ + assert process.stdout is not None + decoder = codecs.getincrementaldecoder("utf-8")(errors="replace") + buffer = "" + while True: + chunk = await process.stdout.read(4096) + if not chunk: + break + buffer += decoder.decode(chunk) + while "\n" in buffer: + line, buffer = buffer.split("\n", 1) + line = line.strip() + if line: + yield line + buffer += decoder.decode(b"", final=True) + if buffer.strip(): + yield buffer.strip() + + +@acp.on_message_send +async def handle_message_send( + params: SendMessageParams, +) -> TaskMessageContent | list[TaskMessageContent] | AsyncGenerator[TaskMessageUpdate, None]: + """Handle each message by running ``codex exec`` locally and streaming events.""" + task_id = params.task.id + user_message = params.content.content + logger.info("Processing message for task %s", task_id) + + start_ms = int(time.monotonic() * 1000) + + async with adk.tracing.span( + trace_id=task_id, + task_id=task_id, + name="message", + input={"message": user_message}, + data={"__span_type__": "AGENT_WORKFLOW"}, + ) as turn_span: + process = await _spawn_codex(MODEL) + + # Write prompt to stdin then close it so codex knows input is done. + assert process.stdin is not None + process.stdin.write(user_message.encode("utf-8")) + await process.stdin.drain() + process.stdin.close() + + turn = CodexTurn( + events=_process_stdout(process), + model=MODEL, + ) + + emitter = UnifiedEmitter( + task_id=task_id, + trace_id=task_id, + parent_span_id=turn_span.id if turn_span else None, + ) + + async for event in emitter.yield_turn(turn): + yield event + + await process.wait() + + # Record the real wall-clock duration AFTER streaming completes; setting + # it before the stream ran would capture only subprocess spawn overhead. + turn.duration_ms = int(time.monotonic() * 1000) - start_ms + + if turn_span: + usage = turn.usage() + turn_span.output = { + "model": usage.model, + "input_tokens": usage.input_tokens, + "output_tokens": usage.output_tokens, + } diff --git a/src/agentex/lib/cli/templates/sync-codex/pyproject.toml.j2 b/src/agentex/lib/cli/templates/sync-codex/pyproject.toml.j2 new file mode 100644 index 000000000..e499b1dc1 --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-codex/pyproject.toml.j2 @@ -0,0 +1,33 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "{{ project_name }}" +version = "0.1.0" +description = "{{ description }}" +requires-python = ">=3.12" +dependencies = [ + "agentex-sdk", + "scale-gp", + "python-dotenv>=1.0,<2", +] + +[project.optional-dependencies] +dev = [ + "pytest", + "black", + "isort", + "flake8", +] + +[tool.hatch.build.targets.wheel] +packages = ["project"] + +[tool.black] +line-length = 88 +target-version = ['py312'] + +[tool.isort] +profile = "black" +line_length = 88 diff --git a/src/agentex/lib/cli/templates/sync-codex/requirements.txt.j2 b/src/agentex/lib/cli/templates/sync-codex/requirements.txt.j2 new file mode 100644 index 000000000..8c0630384 --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-codex/requirements.txt.j2 @@ -0,0 +1,8 @@ +# Install agentex-sdk from local path +agentex-sdk + +# Scale GenAI Platform Python SDK +scale-gp + +# Loads .env files for local development +python-dotenv>=1.0,<2 diff --git a/src/agentex/lib/cli/templates/sync-langgraph/project/acp.py.j2 b/src/agentex/lib/cli/templates/sync-langgraph/project/acp.py.j2 index 54538d0c9..2b8233b5d 100644 --- a/src/agentex/lib/cli/templates/sync-langgraph/project/acp.py.j2 +++ b/src/agentex/lib/cli/templates/sync-langgraph/project/acp.py.j2 @@ -8,12 +8,13 @@ tokens and tool calls from the LangGraph graph to the Agentex frontend. from typing import AsyncGenerator import agentex.lib.adk as adk -from agentex.lib.adk import create_langgraph_tracing_handler, convert_langgraph_to_agentex_events +from agentex.lib.core.harness import UnifiedEmitter from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config from agentex.lib.sdk.fastacp.fastacp import FastACP from agentex.protocol.acp import SendMessageParams from agentex.lib.types.tracing import SGPTracingProcessorConfig from agentex.lib.utils.logging import make_logger +from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn from agentex.types.task_message_content import TaskMessageContent from agentex.types.task_message_delta import TextDelta from agentex.types.task_message_update import TaskMessageUpdate @@ -72,22 +73,21 @@ async def handle_message_send( input={"message": user_message}, data={"__span_type__": "AGENT_WORKFLOW"}, ) as turn_span: - callback = create_langgraph_tracing_handler( - trace_id=thread_id, - parent_span_id=turn_span.id if turn_span else None, - ) - stream = graph.astream( {"messages": [{"role": "user", "content": user_message}]}, - config={ - "configurable": {"thread_id": thread_id}, - "callbacks": [callback], - }, + config={"configurable": {"thread_id": thread_id}}, stream_mode=["messages", "updates"], ) + turn = LangGraphTurn(stream, model=None) + emitter = UnifiedEmitter( + task_id=thread_id, + trace_id=thread_id, + parent_span_id=turn_span.id if turn_span else None, + ) + final_text = "" - async for event in convert_langgraph_to_agentex_events(stream): + async for event in emitter.yield_turn(turn): # Accumulate text deltas for span output delta = getattr(event, "delta", None) if isinstance(delta, TextDelta) and delta.text_delta: diff --git a/src/agentex/lib/cli/templates/sync-pydantic-ai/project/acp.py.j2 b/src/agentex/lib/cli/templates/sync-pydantic-ai/project/acp.py.j2 index 4925e847f..f82dadcb6 100644 --- a/src/agentex/lib/cli/templates/sync-pydantic-ai/project/acp.py.j2 +++ b/src/agentex/lib/cli/templates/sync-pydantic-ai/project/acp.py.j2 @@ -15,19 +15,17 @@ from dotenv import load_dotenv load_dotenv() -from project.agent import create_agent +from project.agent import MODEL_NAME, create_agent import agentex.lib.adk as adk -from agentex.lib.adk import ( - create_pydantic_ai_tracing_handler, - convert_pydantic_ai_to_agentex_events, -) from agentex.protocol.acp import SendMessageParams +from agentex.lib.core.harness import UnifiedEmitter from agentex.lib.types.tracing import SGPTracingProcessorConfig from agentex.lib.utils.logging import make_logger from agentex.lib.sdk.fastacp.fastacp import FastACP from agentex.types.task_message_update import TaskMessageUpdate from agentex.types.task_message_content import TaskMessageContent +from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config logger = make_logger(__name__) @@ -73,7 +71,7 @@ async def handle_message_send( logger.info(f"Processing message for task {task_id}") # Open a per-message turn span. Tool calls below nest underneath this - # span via the tracing handler's parent_span_id wiring. + # span via the emitter's parent_span_id wiring. async with adk.tracing.span( trace_id=task_id, task_id=task_id, @@ -81,13 +79,14 @@ async def handle_message_send( input={"message": user_message}, data={"__span_type__": "AGENT_WORKFLOW"}, ) as turn_span: - tracing_handler = create_pydantic_ai_tracing_handler( + # Construct the UnifiedEmitter from the ACP/streaming context so tracing + # is automatic: tool spans nest under this turn's span. + emitter = UnifiedEmitter( + task_id=task_id, trace_id=task_id, parent_span_id=turn_span.id if turn_span else None, - task_id=task_id, ) async with agent.run_stream_events(user_message) as stream: - async for event in convert_pydantic_ai_to_agentex_events( - stream, tracing_handler=tracing_handler - ): - yield event + turn = PydanticAITurn(stream, model=MODEL_NAME) + async for ev in emitter.yield_turn(turn): + yield ev diff --git a/src/agentex/lib/cli/templates/temporal-claude-code/.dockerignore.j2 b/src/agentex/lib/cli/templates/temporal-claude-code/.dockerignore.j2 new file mode 100644 index 000000000..c2d7fca4d --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-claude-code/.dockerignore.j2 @@ -0,0 +1,43 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# Environments +.env** +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# IDE +.idea/ +.vscode/ +*.swp +*.swo + +# Git +.git +.gitignore + +# Misc +.DS_Store diff --git a/src/agentex/lib/cli/templates/temporal-claude-code/.env.example.j2 b/src/agentex/lib/cli/templates/temporal-claude-code/.env.example.j2 new file mode 100644 index 000000000..015f49ef7 --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-claude-code/.env.example.j2 @@ -0,0 +1,13 @@ +# {{ agent_name }} - Environment Variables +# Copy this file to .env and fill in the values + +# API key for your LLM provider +LITELLM_API_KEY= + +# LLM base URL (optional - override to use a different provider) +# OPENAI_BASE_URL= + +# SGP Configuration (optional - for tracing) +# SGP_API_KEY= +# SGP_ACCOUNT_ID= +# SGP_CLIENT_BASE_URL= diff --git a/src/agentex/lib/cli/templates/temporal-claude-code/Dockerfile-uv.j2 b/src/agentex/lib/cli/templates/temporal-claude-code/Dockerfile-uv.j2 new file mode 100644 index 000000000..2a3f1108b --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-claude-code/Dockerfile-uv.j2 @@ -0,0 +1,55 @@ +# syntax=docker/dockerfile:1.3 +FROM python:3.12-slim +COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + htop \ + vim \ + curl \ + tar \ + python3-dev \ + postgresql-client \ + build-essential \ + libpq-dev \ + gcc \ + cmake \ + netcat-openbsd \ + nodejs \ + npm \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/** + +# Install tctl (Temporal CLI) +RUN curl -L https://github.com/temporalio/tctl/releases/download/v1.18.1/tctl_1.18.1_linux_arm64.tar.gz -o /tmp/tctl.tar.gz && \ + tar -xzf /tmp/tctl.tar.gz -C /usr/local/bin && \ + chmod +x /usr/local/bin/tctl && \ + rm /tmp/tctl.tar.gz + +ENV UV_COMPILE_BYTECODE=1 +ENV UV_LINK_MODE=copy +ENV UV_HTTP_TIMEOUT=1000 + +WORKDIR /app/{{ project_path_from_build_root }} + +# Copy dependency files for layer caching +COPY {{ project_path_from_build_root }}/pyproject.toml {{ project_path_from_build_root }}/uv.lock ./ + +# Install dependencies (without project itself, for layer caching) +RUN --mount=type=cache,target=/root/.cache/uv \ + uv sync --locked --no-install-project --no-dev + +# Copy the project code +COPY {{ project_path_from_build_root }}/project ./project + +# Install the project +RUN --mount=type=cache,target=/root/.cache/uv \ + uv sync --locked --no-dev + +ENV PATH="/app/{{ project_path_from_build_root }}/.venv/bin:$PATH" + +# Run the ACP server using uvicorn +CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] + +# When we deploy the worker, we will replace the CMD with the following +# CMD ["python", "-m", "run_worker"] \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/temporal-claude-code/Dockerfile.j2 b/src/agentex/lib/cli/templates/temporal-claude-code/Dockerfile.j2 new file mode 100644 index 000000000..ba47485a9 --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-claude-code/Dockerfile.j2 @@ -0,0 +1,48 @@ +# syntax=docker/dockerfile:1.3 +FROM python:3.12-slim +COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + htop \ + vim \ + curl \ + tar \ + python3-dev \ + postgresql-client \ + build-essential \ + libpq-dev \ + gcc \ + cmake \ + netcat-openbsd \ + nodejs \ + npm \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +# Install tctl (Temporal CLI) +RUN curl -L https://github.com/temporalio/tctl/releases/download/v1.18.1/tctl_1.18.1_linux_arm64.tar.gz -o /tmp/tctl.tar.gz && \ + tar -xzf /tmp/tctl.tar.gz -C /usr/local/bin && \ + chmod +x /usr/local/bin/tctl && \ + rm /tmp/tctl.tar.gz + +RUN uv pip install --system --upgrade pip setuptools wheel + +ENV UV_HTTP_TIMEOUT=1000 + +# Copy just the requirements file to optimize caching +COPY {{ project_path_from_build_root }}/requirements.txt /app/{{ project_path_from_build_root }}/requirements.txt + +WORKDIR /app/{{ project_path_from_build_root }} + +# Install the required Python packages +RUN uv pip install --system -r requirements.txt + +# Copy the project code +COPY {{ project_path_from_build_root }}/project /app/{{ project_path_from_build_root }}/project + +# Run the ACP server using uvicorn +CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] + +# When we deploy the worker, we will replace the CMD with the following +# CMD ["python", "-m", "run_worker"] \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/temporal-claude-code/README.md.j2 b/src/agentex/lib/cli/templates/temporal-claude-code/README.md.j2 new file mode 100644 index 000000000..35ac019b5 --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-claude-code/README.md.j2 @@ -0,0 +1,73 @@ +# {{ agent_name }} — AgentEx Temporal + Claude Code + +This template builds a **Temporal-durable** agent that drives the **Claude Code +CLI** through the unified harness surface on AgentEx: +- A Temporal workflow holds conversation state (the Claude Code `session_id`) + durably across worker crashes +- Each turn delegates to the `run_claude_code_turn` activity, which spawns the + CLI (subprocess I/O is not permitted on the workflow event loop) +- The activity wraps the CLI's stdout stream in a `ClaudeCodeTurn` and delivers + canonical `StreamTaskMessage*` events via `UnifiedEmitter.auto_send_turn` +- Tracing integration to SGP / AgentEx + +## Prerequisites + +- The `claude` CLI installed and on your `PATH` +- An `ANTHROPIC_API_KEY` (or equivalent credential) in your environment +- A running Temporal service (provided automatically by the local dev stack) + +## Running the Agent + +```bash +agentex agents run --manifest manifest.yaml +``` + +This starts both the ACP HTTP server and the Temporal worker. + +## Project Structure + +``` +{{ project_name }}/ +├── project/ +│ ├── __init__.py +│ ├── acp.py # Thin ACP server; FastACP auto-wires to the workflow +│ ├── workflow.py # Temporal workflow (durable conversation state) +│ ├── activities.py # run_claude_code_turn activity (CLI subprocess) +│ └── run_worker.py # Temporal worker entrypoint +├── Dockerfile +├── manifest.yaml +├── dev.ipynb +{% if use_uv %} +└── pyproject.toml +{% else %} +└── requirements.txt +{% endif %} +``` + +## Key Concepts + +### Subprocess must run in an activity +Temporal runs workflow + signal-handler bodies on a deterministic sandbox event +loop that does not implement `subprocess_exec`. The workflow therefore delegates +each turn to the `run_claude_code_turn` activity, which also gains Temporal's +retry + timeout guarantees. + +### The unified harness surface +`ClaudeCodeTurn` + `UnifiedEmitter` are the unified harness surface. The turn +normalizes CLI output into canonical AgentEx events; the emitter traces and +delivers them. + +## Development + +### 1. Customize the subprocess +Edit `_spawn_claude` in `project/activities.py` to change the CLI flags, working +directory, or how the prompt is delivered. + +### 2. Configure Credentials +Set your credentials via `manifest.yaml`, an exported environment variable, or a +`.env` file in the project directory. + +### 3. Run Locally +```bash +export ENVIRONMENT=development && agentex agents run --manifest manifest.yaml +``` diff --git a/src/agentex/lib/cli/templates/temporal-claude-code/dev.ipynb.j2 b/src/agentex/lib/cli/templates/temporal-claude-code/dev.ipynb.j2 new file mode 100644 index 000000000..d3a68303f --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-claude-code/dev.ipynb.j2 @@ -0,0 +1,126 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "36834357", + "metadata": {}, + "outputs": [], + "source": [ + "from agentex import Agentex\n", + "\n", + "client = Agentex(base_url=\"http://localhost:5003\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d1c309d6", + "metadata": {}, + "outputs": [], + "source": [ + "AGENT_NAME = \"{{ agent_name }}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9f6e6ef0", + "metadata": {}, + "outputs": [], + "source": [ + "# (REQUIRED) Create a new task. For Async agents, you must create a task for messages to be associated with.\n", + "import uuid\n", + "\n", + "rpc_response = client.agents.create_task(\n", + " agent_name=AGENT_NAME,\n", + " params={\n", + " \"name\": f\"{str(uuid.uuid4())[:8]}-task\",\n", + " \"params\": {}\n", + " }\n", + ")\n", + "\n", + "task = rpc_response.result\n", + "print(task)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b03b0d37", + "metadata": {}, + "outputs": [], + "source": [ + "# Send an event to the agent\n", + "\n", + "# The response is expected to be a list of TaskMessage objects, which is a union of the following types:\n", + "# - TextContent: A message with just text content \n", + "# - DataContent: A message with JSON-serializable data content\n", + "# - ToolRequestContent: A message with a tool request, which contains a JSON-serializable request to call a tool\n", + "# - ToolResponseContent: A message with a tool response, which contains response object from a tool call in its content\n", + "\n", + "# When processing the message/send response, if you are expecting more than TextContent, such as DataContent, ToolRequestContent, or ToolResponseContent, you can process them as well\n", + "\n", + "rpc_response = client.agents.send_event(\n", + " agent_name=AGENT_NAME,\n", + " params={\n", + " \"content\": {\"type\": \"text\", \"author\": \"user\", \"content\": \"Hello what can you do?\"},\n", + " \"task_id\": task.id,\n", + " }\n", + ")\n", + "\n", + "event = rpc_response.result\n", + "print(event)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a6927cc0", + "metadata": {}, + "outputs": [], + "source": [ + "# Subscribe to the async task messages produced by the agent\n", + "from agentex.lib.utils.dev_tools import subscribe_to_async_task_messages\n", + "\n", + "task_messages = subscribe_to_async_task_messages(\n", + " client=client,\n", + " task=task, \n", + " only_after_timestamp=event.created_at, \n", + " print_messages=True,\n", + " rich_print=True,\n", + " timeout=5,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4864e354", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/tutorials/10_async/10_temporal/130_langgraph/environments.yaml b/src/agentex/lib/cli/templates/temporal-claude-code/environments.yaml.j2 similarity index 97% rename from examples/tutorials/10_async/10_temporal/130_langgraph/environments.yaml rename to src/agentex/lib/cli/templates/temporal-claude-code/environments.yaml.j2 index d54d8e5ff..a3df5e228 100644 --- a/examples/tutorials/10_async/10_temporal/130_langgraph/environments.yaml +++ b/src/agentex/lib/cli/templates/temporal-claude-code/environments.yaml.j2 @@ -32,7 +32,7 @@ # kubernetes: # # OPTIONAL - Otherwise it will be derived from separately. However, this can be used to override the derived # # namespace and deploy it with in the same namespace that already exists for a separate agent. -# namespace: "team-at130-langgraph" +# namespace: "team-{{agent_name}}" # ********** END EXAMPLE ********** schema_version: "v1" # This is used to validate the file structure and is not used by the agentex CLI diff --git a/src/agentex/lib/cli/templates/temporal-claude-code/manifest.yaml.j2 b/src/agentex/lib/cli/templates/temporal-claude-code/manifest.yaml.j2 new file mode 100644 index 000000000..18cffd54a --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-claude-code/manifest.yaml.j2 @@ -0,0 +1,140 @@ +# Agent Manifest Configuration +# --------------------------- +# This file defines how your agent should be built and deployed. + +# Build Configuration +# ------------------ +# The build config defines what gets packaged into your agent's Docker image. +# This same configuration is used whether building locally or remotely. +# +# When building: +# 1. All files from include_paths are collected into a build context +# 2. The context is filtered by dockerignore rules +# 3. The Dockerfile uses this context to build your agent's image +# 4. The image is pushed to a registry and used to run your agent +build: + context: + # Root directory for the build context + root: ../ # Keep this as the default root + + # Paths to include in the Docker build context + # Must include: + # - Your agent's directory (your custom agent code) + # These paths are collected and sent to the Docker daemon for building + include_paths: + - {{ project_path_from_build_root }} + + # Path to your agent's Dockerfile + # This defines how your agent's image is built from the context + # Relative to the root directory + dockerfile: {{ project_path_from_build_root }}/Dockerfile + + # Path to your agent's .dockerignore + # Filters unnecessary files from the build context + # Helps keep build context small and builds fast + dockerignore: {{ project_path_from_build_root }}/.dockerignore + + +# Local Development Configuration +# ----------------------------- +# Only used when running the agent locally +local_development: + agent: + port: 8000 # Port where your local ACP server is running + host_address: host.docker.internal # Host address for Docker networking (host.docker.internal for Docker, localhost for direct) + + # File paths for local development (relative to this manifest.yaml) + paths: + # Path to ACP server file + # Examples: + # project/acp.py (standard) + # src/server.py (custom structure) + # ../shared/acp.py (shared across projects) + # /absolute/path/acp.py (absolute path) + acp: project/acp.py + + # Path to temporal worker file + # Examples: + # project/run_worker.py (standard) + # workers/temporal.py (custom structure) + # ../shared/worker.py (shared across projects) + worker: project/run_worker.py + + +# Agent Configuration +# ----------------- +agent: + # Type of agent - either sync or async + acp_type: async + + # Unique name for your agent + # Used for task routing and monitoring + name: {{ agent_name }} + + # Description of what your agent does + # Helps with documentation and discovery + description: "{{ description }}" + + # Temporal workflow configuration + # This enables your agent to run as a Temporal workflow for long-running tasks + temporal: + enabled: true + workflows: + # Name of the workflow class + # Must match the @workflow.defn name in your workflow.py + - name: {{ workflow_name }} + + # Queue name for task distribution + # Used by Temporal to route tasks to your agent + # Convention: _task_queue + queue_name: {{ queue_name }} + + # Optional: Health check port for temporal worker + # Defaults to 80 if not specified + # health_check_port: 80 + + # Optional: Credentials mapping + # Maps Kubernetes secrets to environment variables + # Common credentials include: + credentials: + - env_var_name: REDIS_URL + secret_name: redis-url-secret + secret_key: url + # - env_var_name: LITELLM_API_KEY + # secret_name: litellm-api-key + # secret_key: api-key + + # Optional: Set Environment variables for running your agent locally as well + # as for deployment later on + env: {} + # LITELLM_API_KEY: "" + # OPENAI_BASE_URL: "" + # OPENAI_ORG_ID: "" + + +# Deployment Configuration +# ----------------------- +# Configuration for deploying your agent to Kubernetes clusters +deployment: + # Container image configuration + image: + repository: "" # Update with your container registry + tag: "latest" # Default tag, should be versioned in production + + imagePullSecrets: [] # Update with your image pull secret name + # - name: my-registry-secret + + # Global deployment settings that apply to all clusters + # These can be overridden in cluster-specific environments (environments.yaml) + global: + # Default replica count + replicaCount: 1 + + # Default resource requirements + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1000m" + memory: "2Gi" \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/temporal-claude-code/project/acp.py.j2 b/src/agentex/lib/cli/templates/temporal-claude-code/project/acp.py.j2 new file mode 100644 index 000000000..0515efeeb --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-claude-code/project/acp.py.j2 @@ -0,0 +1,31 @@ +"""ACP server for {{ agent_name }} — a Temporal Claude Code agent. + +This file is intentionally thin. When ``acp_type="async"`` is combined +with ``TemporalACPConfig``, FastACP auto-wires: + + HTTP task/create -> @workflow.run on the workflow class + HTTP task/event/send -> @workflow.signal(SignalName.RECEIVE_EVENT) + HTTP task/cancel -> workflow cancellation via the Temporal client + +The actual agent code lives in ``project/workflow.py`` and is executed by +the Temporal worker (``project/run_worker.py``), not by this HTTP process. +""" + +from __future__ import annotations + +import os + +from dotenv import load_dotenv + +load_dotenv() + +from agentex.lib.types.fastacp import TemporalACPConfig +from agentex.lib.sdk.fastacp.fastacp import FastACP + +acp = FastACP.create( + acp_type="async", + config=TemporalACPConfig( + type="temporal", + temporal_address=os.getenv("TEMPORAL_ADDRESS", "localhost:7233"), + ), +) diff --git a/src/agentex/lib/cli/templates/temporal-claude-code/project/activities.py.j2 b/src/agentex/lib/cli/templates/temporal-claude-code/project/activities.py.j2 new file mode 100644 index 000000000..b3e9b0f09 --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-claude-code/project/activities.py.j2 @@ -0,0 +1,139 @@ +"""Temporal activity for {{ agent_name }} — Claude Code harness. + +Subprocess spawning (and any other I/O) must run inside a Temporal *activity*, +not in workflow code. Temporal runs workflow + signal-handler bodies on a +deterministic sandbox event loop that does not implement ``subprocess_exec`` +(or threads / sockets), so spawning the CLI directly in the signal handler +raises ``NotImplementedError``. This activity runs the Claude Code CLI, drives +the ``ClaudeCodeTurn`` through ``UnifiedEmitter.auto_send_turn`` (the async +Redis push path), and returns the turn result to the workflow. + +The ``_spawn_claude`` async generator is an injectable seam: offline tests +can provide a fake that yields pre-recorded stdout lines so no real CLI runs. +""" + +from __future__ import annotations + +import asyncio +from typing import Any, AsyncIterator +from datetime import datetime + +from temporalio import activity + +from agentex.lib.adk import ClaudeCodeTurn +from agentex.lib.core.harness import UnifiedEmitter +from agentex.lib.utils.logging import make_logger +from agentex.lib.utils.model_utils import BaseModel + +logger = make_logger(__name__) + +RUN_CLAUDE_CODE_TURN_ACTIVITY = "run_claude_code_turn" + + +class RunClaudeCodeTurnParams(BaseModel): + """Arguments for one Claude Code turn run inside an activity.""" + + task_id: str + prompt: str + trace_id: str | None = None + parent_span_id: str | None = None + session_id: str | None = None + created_at: datetime | None = None + + +class RunClaudeCodeTurnResult(BaseModel): + """Result returned from the activity to the workflow.""" + + final_text: str + session_id: str | None = None + + +async def _spawn_claude(prompt: str, session_id: str | None = None) -> AsyncIterator[str]: + """Spawn ``claude -p --output-format stream-json`` locally and yield stdout lines. + + Pass ``session_id`` to resume a previous Claude Code session (multi-turn + memory via ``-r ``). + + Injectable seam: tests can monkeypatch this with a fake async iterator so no + real CLI invocation is needed offline. + """ + cmd = [ + "claude", + "-p", + "--output-format", + "stream-json", + "--verbose", + ] + if session_id: + cmd.extend(["-r", session_id]) + + proc = await asyncio.create_subprocess_exec( + *cmd, + stdin=asyncio.subprocess.PIPE, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + assert proc.stdout is not None + assert proc.stdin is not None + + proc.stdin.write(prompt.encode()) + proc.stdin.close() + + # Drain stderr concurrently. With --verbose, Claude Code can write enough to + # stderr to fill the OS pipe buffer; if we only read stdout, the CLI blocks + # on its stderr write while we block reading stdout — a deadlock. A + # background task keeps stderr flowing so stdout never stalls. + async def _drain_stderr() -> None: + assert proc.stderr is not None + async for _ in proc.stderr: + pass + + stderr_task = asyncio.create_task(_drain_stderr()) + + try: + buffer = "" + async for chunk in proc.stdout: + buffer += chunk.decode("utf-8", errors="replace") + while "\n" in buffer: + line, buffer = buffer.split("\n", 1) + line = line.strip() + if line: + yield line + + if buffer.strip(): + yield buffer.strip() + + await proc.wait() + finally: + # Release the subprocess and stderr drain task even if the consumer + # abandons the generator early (task cancellation / client disconnect): + # cancel the drain task and terminate+reap the process if it is still + # running, so neither is leaked. + stderr_task.cancel() + try: + await stderr_task + except asyncio.CancelledError: + pass + if proc.returncode is None: + try: + proc.terminate() + except ProcessLookupError: + pass + await proc.wait() + + +@activity.defn(name=RUN_CLAUDE_CODE_TURN_ACTIVITY) +async def run_claude_code_turn(params: RunClaudeCodeTurnParams) -> dict[str, Any]: + """Run one Claude Code turn end-to-end and stream events to the task. + + Runs in an activity (real asyncio loop) so subprocess I/O is permitted. + """ + emitter = UnifiedEmitter( + task_id=params.task_id, + trace_id=params.trace_id, + parent_span_id=params.parent_span_id, + ) + turn = ClaudeCodeTurn(_spawn_claude(params.prompt, session_id=params.session_id)) + result = await emitter.auto_send_turn(turn, created_at=params.created_at) + + return RunClaudeCodeTurnResult(final_text=result.final_text, session_id=turn.session_id).model_dump() diff --git a/examples/tutorials/10_async/10_temporal/harness_langgraph/project/run_worker.py b/src/agentex/lib/cli/templates/temporal-claude-code/project/run_worker.py.j2 similarity index 56% rename from examples/tutorials/10_async/10_temporal/harness_langgraph/project/run_worker.py rename to src/agentex/lib/cli/templates/temporal-claude-code/project/run_worker.py.j2 index ca64464fc..354326b9d 100644 --- a/examples/tutorials/10_async/10_temporal/harness_langgraph/project/run_worker.py +++ b/src/agentex/lib/cli/templates/temporal-claude-code/project/run_worker.py.j2 @@ -1,19 +1,17 @@ -"""Temporal worker for at-harness-langgraph. +"""Temporal worker for {{ agent_name }} — Claude Code harness. Run as a separate long-lived process alongside the ACP HTTP server. The worker polls Temporal for workflow + activity tasks and executes them. -The ``LangGraphPlugin`` is given the graph registry (``{ GRAPH_NAME: graph }``). -At runtime it turns the graph's ``execute_in="activity"`` nodes into Temporal -activities and registers them on the worker automatically. +The Claude Code CLI subprocess runs in the ``run_claude_code_turn`` activity +(registered below alongside the built-in Agentex activities), because +subprocess I/O is not permitted on the Temporal workflow event loop. """ import asyncio -from temporalio.contrib.langgraph import LangGraphPlugin - -from project.graph import GRAPH_NAME, build_graph -from project.workflow import AtHarnessLanggraphWorkflow +from project.workflow import {{ workflow_class }} +from project.activities import run_claude_code_turn from agentex.lib.utils.debug import setup_debug_if_enabled from agentex.lib.utils.logging import make_logger from agentex.lib.environment_variables import EnvironmentVariables @@ -31,14 +29,11 @@ async def main(): if task_queue_name is None: raise ValueError("WORKFLOW_TASK_QUEUE is not set") - worker = AgentexWorker( - task_queue=task_queue_name, - plugins=[LangGraphPlugin(graphs={GRAPH_NAME: build_graph()})], - ) + worker = AgentexWorker(task_queue=task_queue_name) await worker.run( - activities=get_all_activities(), - workflow=AtHarnessLanggraphWorkflow, + activities=[run_claude_code_turn, *get_all_activities()], + workflow={{ workflow_class }}, ) diff --git a/src/agentex/lib/cli/templates/temporal-claude-code/project/workflow.py.j2 b/src/agentex/lib/cli/templates/temporal-claude-code/project/workflow.py.j2 new file mode 100644 index 000000000..06d68a7b5 --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-claude-code/project/workflow.py.j2 @@ -0,0 +1,135 @@ +"""Temporal workflow for {{ agent_name }} — Claude Code harness. + +Holds conversation state (session_id for multi-turn resume) durably across +crashes. Each user message triggers ``on_task_event_send``, which delegates the +turn to the ``run_claude_code_turn`` activity. The activity spawns the Claude +Code CLI, wraps its stdout in ``ClaudeCodeTurn``, and delivers the turn via +``UnifiedEmitter.auto_send_turn`` (the async Redis push path). + +Note on subprocess inside Temporal +------------------------------------ +Subprocess (and all other) I/O must run in a Temporal *activity*, never in +workflow code. Temporal runs workflow + signal-handler bodies on a +deterministic sandbox event loop that does not implement ``subprocess_exec`` +(spawning the CLI there raises ``NotImplementedError``). The activity also gets +Temporal's retry + timeout guarantees. +""" + +from __future__ import annotations + +import os +import json +from datetime import timedelta + +from temporalio import workflow + +from agentex.lib import adk +from agentex.lib.types.acp import SendEventParams, CreateTaskParams +from agentex.lib.types.tracing import SGPTracingProcessorConfig +from agentex.lib.utils.logging import make_logger +from agentex.types.text_content import TextContent +from agentex.lib.environment_variables import EnvironmentVariables +from agentex.lib.core.temporal.types.workflow import SignalName +from agentex.lib.core.temporal.workflows.workflow import BaseWorkflow +from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config + +with workflow.unsafe.imports_passed_through(): + from project.activities import RunClaudeCodeTurnParams, run_claude_code_turn + +add_tracing_processor_config( + SGPTracingProcessorConfig( + sgp_api_key=os.environ.get("SGP_API_KEY", ""), + sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""), + sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""), + ) +) + +environment_variables = EnvironmentVariables.refresh() + +if environment_variables.WORKFLOW_NAME is None: + raise ValueError("Environment variable WORKFLOW_NAME is not set") +if environment_variables.AGENT_NAME is None: + raise ValueError("Environment variable AGENT_NAME is not set") + +logger = make_logger(__name__) + + +@workflow.defn(name=environment_variables.WORKFLOW_NAME) +class {{ workflow_class }}(BaseWorkflow): + """Temporal workflow that runs Claude Code locally for each user message. + + Persists the Claude Code session_id across turns so the CLI can resume + the conversation (``-r ``). Temporal's durable state ensures + the session_id survives worker crashes. + """ + + def __init__(self): + super().__init__(display_name=environment_variables.AGENT_NAME) + self._complete_task = False + self._turn_number = 0 + # Claude Code session_id for multi-turn resume. + self._session_id: str | None = None + + @workflow.signal(name=SignalName.RECEIVE_EVENT) + async def on_task_event_send(self, params: SendEventParams) -> None: + """Handle a user message: spawn Claude Code and push events to the task stream.""" + self._turn_number += 1 + task_id = params.task.id + prompt = params.event.content.content + logger.info("Turn %d for task %s", self._turn_number, task_id) + + await adk.messages.create(task_id=task_id, content=params.event.content) + + async with adk.tracing.span( + trace_id=task_id, + task_id=task_id, + name=f"Turn {self._turn_number}", + input={"message": prompt}, + ) as span: + # Delegate the subprocess turn to an activity: subprocess I/O is not + # permitted on the Temporal workflow event loop. The activity streams + # events to the task and returns the final text + session_id. + # workflow.now() gives a deterministic timestamp under replay. + result = await workflow.execute_activity( + run_claude_code_turn, + RunClaudeCodeTurnParams( + task_id=task_id, + prompt=prompt, + trace_id=task_id, + parent_span_id=span.id if span else None, + session_id=self._session_id, + created_at=workflow.now(), + ), + start_to_close_timeout=timedelta(minutes=5), + ) + + # Capture session_id to enable Claude Code resume on the next turn. + sid = result.get("session_id") + if sid: + self._session_id = sid + + if span: + span.output = {"final_text": result.get("final_text")} + + @workflow.run + async def on_task_create(self, params: CreateTaskParams) -> str: + logger.info("Task created: %s", params.task.id) + + await adk.messages.create( + task_id=params.task.id, + content=TextContent( + author="agent", + content=( + f"Task initialized with params:\n{json.dumps(params.params, indent=2)}\n" + "Send me a message and I'll run it through Claude Code locally." + ), + ), + ) + + await workflow.wait_condition(lambda: self._complete_task, timeout=None) + return "Task completed" + + @workflow.signal + async def complete_task_signal(self) -> None: + logger.info("Received complete_task signal") + self._complete_task = True diff --git a/examples/tutorials/00_sync/harness_langgraph/pyproject.toml b/src/agentex/lib/cli/templates/temporal-claude-code/pyproject.toml.j2 similarity index 74% rename from examples/tutorials/00_sync/harness_langgraph/pyproject.toml rename to src/agentex/lib/cli/templates/temporal-claude-code/pyproject.toml.j2 index deecd08b3..2c6ec9c2f 100644 --- a/examples/tutorials/00_sync/harness_langgraph/pyproject.toml +++ b/src/agentex/lib/cli/templates/temporal-claude-code/pyproject.toml.j2 @@ -3,16 +3,15 @@ requires = ["hatchling"] build-backend = "hatchling.build" [project] -name = "s-harness-langgraph" +name = "{{ project_name }}" version = "0.1.0" -description = "A sync LangGraph agent using the unified harness surface" -readme = "README.md" +description = "{{ description }}" requires-python = ">=3.12" dependencies = [ "agentex-sdk", "scale-gp", - "langgraph", - "langchain-openai", + "temporalio>=1.18.2", + "python-dotenv>=1.0,<2", ] [project.optional-dependencies] @@ -23,6 +22,7 @@ dev = [ "black", "isort", "flake8", + "debugpy>=1.8.15", ] [tool.hatch.build.targets.wheel] diff --git a/src/agentex/lib/cli/templates/temporal-claude-code/requirements.txt.j2 b/src/agentex/lib/cli/templates/temporal-claude-code/requirements.txt.j2 new file mode 100644 index 000000000..a060d2331 --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-claude-code/requirements.txt.j2 @@ -0,0 +1,11 @@ +# Agentex SDK +agentex-sdk + +# Scale GenAI Platform Python SDK +scale-gp + +# Temporal workflow engine +temporalio>=1.18.2 + +# Loads .env files for local development +python-dotenv>=1.0,<2 diff --git a/src/agentex/lib/cli/templates/temporal-codex/.dockerignore.j2 b/src/agentex/lib/cli/templates/temporal-codex/.dockerignore.j2 new file mode 100644 index 000000000..c2d7fca4d --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-codex/.dockerignore.j2 @@ -0,0 +1,43 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# Environments +.env** +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# IDE +.idea/ +.vscode/ +*.swp +*.swo + +# Git +.git +.gitignore + +# Misc +.DS_Store diff --git a/src/agentex/lib/cli/templates/temporal-codex/.env.example.j2 b/src/agentex/lib/cli/templates/temporal-codex/.env.example.j2 new file mode 100644 index 000000000..015f49ef7 --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-codex/.env.example.j2 @@ -0,0 +1,13 @@ +# {{ agent_name }} - Environment Variables +# Copy this file to .env and fill in the values + +# API key for your LLM provider +LITELLM_API_KEY= + +# LLM base URL (optional - override to use a different provider) +# OPENAI_BASE_URL= + +# SGP Configuration (optional - for tracing) +# SGP_API_KEY= +# SGP_ACCOUNT_ID= +# SGP_CLIENT_BASE_URL= diff --git a/src/agentex/lib/cli/templates/temporal-codex/Dockerfile-uv.j2 b/src/agentex/lib/cli/templates/temporal-codex/Dockerfile-uv.j2 new file mode 100644 index 000000000..2a3f1108b --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-codex/Dockerfile-uv.j2 @@ -0,0 +1,55 @@ +# syntax=docker/dockerfile:1.3 +FROM python:3.12-slim +COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + htop \ + vim \ + curl \ + tar \ + python3-dev \ + postgresql-client \ + build-essential \ + libpq-dev \ + gcc \ + cmake \ + netcat-openbsd \ + nodejs \ + npm \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/** + +# Install tctl (Temporal CLI) +RUN curl -L https://github.com/temporalio/tctl/releases/download/v1.18.1/tctl_1.18.1_linux_arm64.tar.gz -o /tmp/tctl.tar.gz && \ + tar -xzf /tmp/tctl.tar.gz -C /usr/local/bin && \ + chmod +x /usr/local/bin/tctl && \ + rm /tmp/tctl.tar.gz + +ENV UV_COMPILE_BYTECODE=1 +ENV UV_LINK_MODE=copy +ENV UV_HTTP_TIMEOUT=1000 + +WORKDIR /app/{{ project_path_from_build_root }} + +# Copy dependency files for layer caching +COPY {{ project_path_from_build_root }}/pyproject.toml {{ project_path_from_build_root }}/uv.lock ./ + +# Install dependencies (without project itself, for layer caching) +RUN --mount=type=cache,target=/root/.cache/uv \ + uv sync --locked --no-install-project --no-dev + +# Copy the project code +COPY {{ project_path_from_build_root }}/project ./project + +# Install the project +RUN --mount=type=cache,target=/root/.cache/uv \ + uv sync --locked --no-dev + +ENV PATH="/app/{{ project_path_from_build_root }}/.venv/bin:$PATH" + +# Run the ACP server using uvicorn +CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] + +# When we deploy the worker, we will replace the CMD with the following +# CMD ["python", "-m", "run_worker"] \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/temporal-codex/Dockerfile.j2 b/src/agentex/lib/cli/templates/temporal-codex/Dockerfile.j2 new file mode 100644 index 000000000..ba47485a9 --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-codex/Dockerfile.j2 @@ -0,0 +1,48 @@ +# syntax=docker/dockerfile:1.3 +FROM python:3.12-slim +COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + htop \ + vim \ + curl \ + tar \ + python3-dev \ + postgresql-client \ + build-essential \ + libpq-dev \ + gcc \ + cmake \ + netcat-openbsd \ + nodejs \ + npm \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +# Install tctl (Temporal CLI) +RUN curl -L https://github.com/temporalio/tctl/releases/download/v1.18.1/tctl_1.18.1_linux_arm64.tar.gz -o /tmp/tctl.tar.gz && \ + tar -xzf /tmp/tctl.tar.gz -C /usr/local/bin && \ + chmod +x /usr/local/bin/tctl && \ + rm /tmp/tctl.tar.gz + +RUN uv pip install --system --upgrade pip setuptools wheel + +ENV UV_HTTP_TIMEOUT=1000 + +# Copy just the requirements file to optimize caching +COPY {{ project_path_from_build_root }}/requirements.txt /app/{{ project_path_from_build_root }}/requirements.txt + +WORKDIR /app/{{ project_path_from_build_root }} + +# Install the required Python packages +RUN uv pip install --system -r requirements.txt + +# Copy the project code +COPY {{ project_path_from_build_root }}/project /app/{{ project_path_from_build_root }}/project + +# Run the ACP server using uvicorn +CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] + +# When we deploy the worker, we will replace the CMD with the following +# CMD ["python", "-m", "run_worker"] \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/temporal-codex/README.md.j2 b/src/agentex/lib/cli/templates/temporal-codex/README.md.j2 new file mode 100644 index 000000000..794109ff3 --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-codex/README.md.j2 @@ -0,0 +1,80 @@ +# {{ agent_name }} — AgentEx Temporal + Codex + +This template builds a **Temporal-durable** agent that drives the **Codex CLI** +through the unified harness surface on AgentEx: +- A Temporal workflow holds conversation state (the codex thread ID) durably + across worker crashes — no external state store needed +- Each turn delegates to the `run_codex_turn` activity, which spawns the CLI + (subprocess I/O is not permitted on the workflow event loop) +- The activity wraps the CLI's stdout stream in a `CodexTurn` and delivers + canonical `StreamTaskMessage*` events via `UnifiedEmitter.auto_send_turn` +- Tracing integration to SGP / AgentEx + +## Prerequisites + +- The `codex` CLI installed and on your `PATH` (`npm install -g @openai/codex`) +- An `OPENAI_API_KEY` in your environment +- A running Temporal service (provided automatically by the local dev stack) + +## Running the Agent + +```bash +agentex agents run --manifest manifest.yaml +``` + +This starts both the ACP HTTP server and the Temporal worker. + +## Project Structure + +``` +{{ project_name }}/ +├── project/ +│ ├── __init__.py +│ ├── acp.py # Thin ACP server; FastACP auto-wires to the workflow +│ ├── workflow.py # Temporal workflow (durable conversation state) +│ ├── activities.py # run_codex_turn activity (CLI subprocess) +│ └── run_worker.py # Temporal worker entrypoint +├── Dockerfile +├── manifest.yaml +├── dev.ipynb +{% if use_uv %} +└── pyproject.toml +{% else %} +└── requirements.txt +{% endif %} +``` + +## Key Concepts + +### Subprocess must run in an activity +Temporal runs workflow + signal-handler bodies on a deterministic sandbox event +loop that does not implement `subprocess_exec`. The workflow therefore delegates +each turn to the `run_codex_turn` activity, which also gains Temporal's retry + +timeout guarantees. + +### Durable multi-turn memory +The codex thread ID is kept on the workflow instance; Temporal's durable replay +reconstructs it after a crash, so the next turn resumes the same codex session. + +### The unified harness surface +`CodexTurn` + `UnifiedEmitter` are the unified harness surface. The turn +normalizes CLI output into canonical AgentEx events; the emitter traces and +delivers them. + +## Development + +### 1. Choose a model +Set `CODEX_MODEL` (defaults to `o4-mini`) to control which model codex uses. + +### 2. Customize the subprocess +Edit `_spawn_codex` in `project/activities.py` to change the CLI flags or how +the prompt is delivered. + +### 3. Configure Credentials +Set your credentials via `manifest.yaml`, an exported environment variable, or a +`.env` file in the project directory. + +### 4. Run Locally +```bash +export ENVIRONMENT=development && agentex agents run --manifest manifest.yaml +``` diff --git a/src/agentex/lib/cli/templates/temporal-codex/dev.ipynb.j2 b/src/agentex/lib/cli/templates/temporal-codex/dev.ipynb.j2 new file mode 100644 index 000000000..d3a68303f --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-codex/dev.ipynb.j2 @@ -0,0 +1,126 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "36834357", + "metadata": {}, + "outputs": [], + "source": [ + "from agentex import Agentex\n", + "\n", + "client = Agentex(base_url=\"http://localhost:5003\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d1c309d6", + "metadata": {}, + "outputs": [], + "source": [ + "AGENT_NAME = \"{{ agent_name }}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9f6e6ef0", + "metadata": {}, + "outputs": [], + "source": [ + "# (REQUIRED) Create a new task. For Async agents, you must create a task for messages to be associated with.\n", + "import uuid\n", + "\n", + "rpc_response = client.agents.create_task(\n", + " agent_name=AGENT_NAME,\n", + " params={\n", + " \"name\": f\"{str(uuid.uuid4())[:8]}-task\",\n", + " \"params\": {}\n", + " }\n", + ")\n", + "\n", + "task = rpc_response.result\n", + "print(task)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b03b0d37", + "metadata": {}, + "outputs": [], + "source": [ + "# Send an event to the agent\n", + "\n", + "# The response is expected to be a list of TaskMessage objects, which is a union of the following types:\n", + "# - TextContent: A message with just text content \n", + "# - DataContent: A message with JSON-serializable data content\n", + "# - ToolRequestContent: A message with a tool request, which contains a JSON-serializable request to call a tool\n", + "# - ToolResponseContent: A message with a tool response, which contains response object from a tool call in its content\n", + "\n", + "# When processing the message/send response, if you are expecting more than TextContent, such as DataContent, ToolRequestContent, or ToolResponseContent, you can process them as well\n", + "\n", + "rpc_response = client.agents.send_event(\n", + " agent_name=AGENT_NAME,\n", + " params={\n", + " \"content\": {\"type\": \"text\", \"author\": \"user\", \"content\": \"Hello what can you do?\"},\n", + " \"task_id\": task.id,\n", + " }\n", + ")\n", + "\n", + "event = rpc_response.result\n", + "print(event)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a6927cc0", + "metadata": {}, + "outputs": [], + "source": [ + "# Subscribe to the async task messages produced by the agent\n", + "from agentex.lib.utils.dev_tools import subscribe_to_async_task_messages\n", + "\n", + "task_messages = subscribe_to_async_task_messages(\n", + " client=client,\n", + " task=task, \n", + " only_after_timestamp=event.created_at, \n", + " print_messages=True,\n", + " rich_print=True,\n", + " timeout=5,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4864e354", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/tutorials/10_async/10_temporal/140_harness_openai/environments.yaml b/src/agentex/lib/cli/templates/temporal-codex/environments.yaml.j2 similarity index 97% rename from examples/tutorials/10_async/10_temporal/140_harness_openai/environments.yaml rename to src/agentex/lib/cli/templates/temporal-codex/environments.yaml.j2 index f90511911..a3df5e228 100644 --- a/examples/tutorials/10_async/10_temporal/140_harness_openai/environments.yaml +++ b/src/agentex/lib/cli/templates/temporal-codex/environments.yaml.j2 @@ -32,7 +32,7 @@ # kubernetes: # # OPTIONAL - Otherwise it will be derived from separately. However, this can be used to override the derived # # namespace and deploy it with in the same namespace that already exists for a separate agent. -# namespace: "team-example-tutorial" +# namespace: "team-{{agent_name}}" # ********** END EXAMPLE ********** schema_version: "v1" # This is used to validate the file structure and is not used by the agentex CLI diff --git a/src/agentex/lib/cli/templates/temporal-codex/manifest.yaml.j2 b/src/agentex/lib/cli/templates/temporal-codex/manifest.yaml.j2 new file mode 100644 index 000000000..18cffd54a --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-codex/manifest.yaml.j2 @@ -0,0 +1,140 @@ +# Agent Manifest Configuration +# --------------------------- +# This file defines how your agent should be built and deployed. + +# Build Configuration +# ------------------ +# The build config defines what gets packaged into your agent's Docker image. +# This same configuration is used whether building locally or remotely. +# +# When building: +# 1. All files from include_paths are collected into a build context +# 2. The context is filtered by dockerignore rules +# 3. The Dockerfile uses this context to build your agent's image +# 4. The image is pushed to a registry and used to run your agent +build: + context: + # Root directory for the build context + root: ../ # Keep this as the default root + + # Paths to include in the Docker build context + # Must include: + # - Your agent's directory (your custom agent code) + # These paths are collected and sent to the Docker daemon for building + include_paths: + - {{ project_path_from_build_root }} + + # Path to your agent's Dockerfile + # This defines how your agent's image is built from the context + # Relative to the root directory + dockerfile: {{ project_path_from_build_root }}/Dockerfile + + # Path to your agent's .dockerignore + # Filters unnecessary files from the build context + # Helps keep build context small and builds fast + dockerignore: {{ project_path_from_build_root }}/.dockerignore + + +# Local Development Configuration +# ----------------------------- +# Only used when running the agent locally +local_development: + agent: + port: 8000 # Port where your local ACP server is running + host_address: host.docker.internal # Host address for Docker networking (host.docker.internal for Docker, localhost for direct) + + # File paths for local development (relative to this manifest.yaml) + paths: + # Path to ACP server file + # Examples: + # project/acp.py (standard) + # src/server.py (custom structure) + # ../shared/acp.py (shared across projects) + # /absolute/path/acp.py (absolute path) + acp: project/acp.py + + # Path to temporal worker file + # Examples: + # project/run_worker.py (standard) + # workers/temporal.py (custom structure) + # ../shared/worker.py (shared across projects) + worker: project/run_worker.py + + +# Agent Configuration +# ----------------- +agent: + # Type of agent - either sync or async + acp_type: async + + # Unique name for your agent + # Used for task routing and monitoring + name: {{ agent_name }} + + # Description of what your agent does + # Helps with documentation and discovery + description: "{{ description }}" + + # Temporal workflow configuration + # This enables your agent to run as a Temporal workflow for long-running tasks + temporal: + enabled: true + workflows: + # Name of the workflow class + # Must match the @workflow.defn name in your workflow.py + - name: {{ workflow_name }} + + # Queue name for task distribution + # Used by Temporal to route tasks to your agent + # Convention: _task_queue + queue_name: {{ queue_name }} + + # Optional: Health check port for temporal worker + # Defaults to 80 if not specified + # health_check_port: 80 + + # Optional: Credentials mapping + # Maps Kubernetes secrets to environment variables + # Common credentials include: + credentials: + - env_var_name: REDIS_URL + secret_name: redis-url-secret + secret_key: url + # - env_var_name: LITELLM_API_KEY + # secret_name: litellm-api-key + # secret_key: api-key + + # Optional: Set Environment variables for running your agent locally as well + # as for deployment later on + env: {} + # LITELLM_API_KEY: "" + # OPENAI_BASE_URL: "" + # OPENAI_ORG_ID: "" + + +# Deployment Configuration +# ----------------------- +# Configuration for deploying your agent to Kubernetes clusters +deployment: + # Container image configuration + image: + repository: "" # Update with your container registry + tag: "latest" # Default tag, should be versioned in production + + imagePullSecrets: [] # Update with your image pull secret name + # - name: my-registry-secret + + # Global deployment settings that apply to all clusters + # These can be overridden in cluster-specific environments (environments.yaml) + global: + # Default replica count + replicaCount: 1 + + # Default resource requirements + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1000m" + memory: "2Gi" \ No newline at end of file diff --git a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/acp.py b/src/agentex/lib/cli/templates/temporal-codex/project/acp.py.j2 similarity index 65% rename from examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/acp.py rename to src/agentex/lib/cli/templates/temporal-codex/project/acp.py.j2 index c142dcf70..7ef5744f0 100644 --- a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/acp.py +++ b/src/agentex/lib/cli/templates/temporal-codex/project/acp.py.j2 @@ -1,11 +1,11 @@ -"""ACP server for the Temporal harness Pydantic AI test agent. +"""ACP server for {{ agent_name }} — a Temporal Codex harness agent. This file is intentionally thin. When ``acp_type="async"`` is combined with ``TemporalACPConfig(type="temporal", ...)``, FastACP auto-wires: - HTTP task/create → @workflow.run on the workflow class - HTTP task/event/send → @workflow.signal(SignalName.RECEIVE_EVENT) - HTTP task/cancel → workflow cancellation via the Temporal client + HTTP task/create -> @workflow.run on the workflow class + HTTP task/event/send -> @workflow.signal(SignalName.RECEIVE_EVENT) + HTTP task/cancel -> workflow cancellation via the Temporal client so we don't define any handlers here. The actual agent code lives in ``project/workflow.py`` and is executed by the Temporal worker @@ -20,8 +20,6 @@ load_dotenv() -from pydantic_ai.durable_exec.temporal import PydanticAIPlugin - from agentex.lib.types.fastacp import TemporalACPConfig from agentex.lib.sdk.fastacp.fastacp import FastACP @@ -30,6 +28,5 @@ config=TemporalACPConfig( type="temporal", temporal_address=os.getenv("TEMPORAL_ADDRESS", "localhost:7233"), - plugins=[PydanticAIPlugin()], ), ) diff --git a/src/agentex/lib/cli/templates/temporal-codex/project/activities.py.j2 b/src/agentex/lib/cli/templates/temporal-codex/project/activities.py.j2 new file mode 100644 index 000000000..8d48164fc --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-codex/project/activities.py.j2 @@ -0,0 +1,145 @@ +"""Temporal activity for {{ agent_name }} — Codex harness. + +Subprocess spawning (and any other I/O) must run inside a Temporal *activity*, +not in workflow code. Temporal runs workflow + signal-handler bodies on a +deterministic sandbox event loop that does not implement ``subprocess_exec`` +(or threads / sockets), so spawning ``codex exec`` directly in the signal +handler raises ``NotImplementedError``. This activity runs codex, drives the +``CodexTurn`` through ``UnifiedEmitter.auto_send_turn`` (the async Redis push +path), and returns the turn result to the workflow. + +The ``_spawn_codex`` / ``_process_stdout`` seams are injectable: offline tests +can replace them with fakes that yield pre-recorded event lines so no real CLI +runs. +""" + +from __future__ import annotations + +import os +import codecs +import asyncio +from typing import Any +from datetime import datetime +from collections.abc import AsyncIterator + +from temporalio import activity + +from agentex.lib.adk import CodexTurn +from agentex.lib.core.harness import UnifiedEmitter +from agentex.lib.utils.logging import make_logger +from agentex.lib.utils.model_utils import BaseModel + +logger = make_logger(__name__) + +RUN_CODEX_TURN_ACTIVITY = "run_codex_turn" + + +class RunCodexTurnParams(BaseModel): + """Arguments for one codex turn run inside an activity.""" + + task_id: str + prompt: str + model: str + trace_id: str | None = None + parent_span_id: str | None = None + thread_id: str | None = None + created_at: datetime | None = None + + +class RunCodexTurnResult(BaseModel): + """Result returned from the activity to the workflow.""" + + final_text: str + session_id: str | None = None + model: str | None = None + + +async def _spawn_codex( + model: str, + thread_id: str | None = None, +) -> asyncio.subprocess.Process: + """Spawn ``codex exec --json`` locally and return the live process. + + Injection seam: tests replace this function with a fake that returns a + mock process whose stdout yields pre-recorded event lines. + + The caller writes the prompt to stdin after the process starts, then + closes stdin so codex knows input is complete. + """ + base_flags = [ + "--json", + "--skip-git-repo-check", + "--dangerously-bypass-approvals-and-sandbox", + "--model", + model, + ] + + if thread_id: + cmd = ["codex", "exec", *base_flags, "resume", thread_id, "-"] + else: + cmd = ["codex", "exec", *base_flags, "-"] + + return await asyncio.create_subprocess_exec( + *cmd, + stdin=asyncio.subprocess.PIPE, + stdout=asyncio.subprocess.PIPE, + # Discard stderr: codex --json writes events to stdout; its stderr is + # progress/debug noise. Capturing it with PIPE but never reading it + # would deadlock once codex fills the OS pipe buffer (~64 KB). + stderr=asyncio.subprocess.DEVNULL, + env={**os.environ}, + ) + + +async def _process_stdout(process: asyncio.subprocess.Process) -> AsyncIterator[str]: + """Yield newline-delimited JSON lines from the process stdout. + + Uses an incremental UTF-8 decoder so a multibyte character split across two + 4 KB reads is decoded correctly instead of being corrupted at the boundary. + """ + assert process.stdout is not None + decoder = codecs.getincrementaldecoder("utf-8")(errors="replace") + buffer = "" + while True: + chunk = await process.stdout.read(4096) + if not chunk: + break + buffer += decoder.decode(chunk) + while "\n" in buffer: + line, buffer = buffer.split("\n", 1) + line = line.strip() + if line: + yield line + buffer += decoder.decode(b"", final=True) + if buffer.strip(): + yield buffer.strip() + + +@activity.defn(name=RUN_CODEX_TURN_ACTIVITY) +async def run_codex_turn(params: RunCodexTurnParams) -> dict[str, Any]: + """Run one codex turn end-to-end and stream events to the task. + + Runs in an activity (real asyncio loop) so subprocess I/O is permitted. + """ + process = await _spawn_codex(params.model, thread_id=params.thread_id) + + assert process.stdin is not None + process.stdin.write(params.prompt.encode("utf-8")) + await process.stdin.drain() + process.stdin.close() + + turn = CodexTurn(events=_process_stdout(process), model=params.model) + emitter = UnifiedEmitter( + task_id=params.task_id, + trace_id=params.trace_id, + parent_span_id=params.parent_span_id, + ) + result = await emitter.auto_send_turn(turn, created_at=params.created_at) + + await process.wait() + + return RunCodexTurnResult( + final_text=result.final_text, + session_id=turn.session_id, + model=turn.usage().model, + ).model_dump() diff --git a/src/agentex/lib/cli/templates/temporal-codex/project/run_worker.py.j2 b/src/agentex/lib/cli/templates/temporal-codex/project/run_worker.py.j2 new file mode 100644 index 000000000..d86519977 --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-codex/project/run_worker.py.j2 @@ -0,0 +1,41 @@ +"""Temporal worker for {{ agent_name }} — Codex harness. + +Run as a separate long-lived process alongside the ACP HTTP server. The +worker polls Temporal for workflow + activity tasks and executes them. + +The codex CLI subprocess runs in the ``run_codex_turn`` activity (registered +below alongside the built-in Agentex activities), because subprocess I/O is not +permitted on the Temporal workflow event loop. +""" + +import asyncio + +from project.workflow import {{ workflow_class }} +from project.activities import run_codex_turn +from agentex.lib.utils.debug import setup_debug_if_enabled +from agentex.lib.utils.logging import make_logger +from agentex.lib.environment_variables import EnvironmentVariables +from agentex.lib.core.temporal.activities import get_all_activities +from agentex.lib.core.temporal.workers.worker import AgentexWorker + +environment_variables = EnvironmentVariables.refresh() +logger = make_logger(__name__) + + +async def main(): + setup_debug_if_enabled() + + task_queue_name = environment_variables.WORKFLOW_TASK_QUEUE + if task_queue_name is None: + raise ValueError("WORKFLOW_TASK_QUEUE is not set") + + worker = AgentexWorker(task_queue=task_queue_name) + + await worker.run( + activities=[run_codex_turn, *get_all_activities()], + workflow={{ workflow_class }}, + ) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/src/agentex/lib/cli/templates/temporal-codex/project/workflow.py.j2 b/src/agentex/lib/cli/templates/temporal-codex/project/workflow.py.j2 new file mode 100644 index 000000000..6833a5701 --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-codex/project/workflow.py.j2 @@ -0,0 +1,145 @@ +"""Temporal workflow for {{ agent_name }} — Codex harness. + +Demonstrates the ``convert_codex_to_agentex_events`` tap + ``CodexTurn`` + +``UnifiedEmitter`` for a Temporal-durable ACP agent. + +KEY CONCEPTS DEMONSTRATED: +- Running ``codex exec --json`` in the ``run_codex_turn`` activity. Subprocess + I/O is not permitted on the Temporal workflow event loop (the deterministic + sandbox loop does not implement ``subprocess_exec``), so the signal handler + delegates the turn to an activity, which also gets Temporal's retry + timeout + guarantees. +- Wrapping the stdout line stream in a ``CodexTurn`` (inside the activity). +- Delivering events via ``UnifiedEmitter.auto_send_turn``, which pushes + ``StreamTaskMessage*`` events to Redis so the UI sees tokens in real time. +- Passing ``created_at=workflow.now()`` for deterministic timestamps under + Temporal replay (required for Temporal-safe delivery). +- Persisting the codex thread ID on the workflow instance itself — Temporal's + workflow state is durable, so no external ``adk.state`` round-trip is needed. +""" + +from __future__ import annotations + +import os +from datetime import timedelta + +from temporalio import workflow + +from agentex.lib import adk +from agentex.lib.types.acp import SendEventParams, CreateTaskParams +from agentex.lib.types.tracing import SGPTracingProcessorConfig +from agentex.lib.utils.logging import make_logger +from agentex.types.text_content import TextContent +from agentex.lib.environment_variables import EnvironmentVariables +from agentex.lib.core.temporal.types.workflow import SignalName +from agentex.lib.core.temporal.workflows.workflow import BaseWorkflow +from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config + +with workflow.unsafe.imports_passed_through(): + from project.activities import RunCodexTurnParams, run_codex_turn + +add_tracing_processor_config( + SGPTracingProcessorConfig( + sgp_api_key=os.environ.get("SGP_API_KEY", ""), + sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""), + sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""), + ) +) + +environment_variables = EnvironmentVariables.refresh() + +if environment_variables.WORKFLOW_NAME is None: + raise ValueError("Environment variable WORKFLOW_NAME is not set") +if environment_variables.AGENT_NAME is None: + raise ValueError("Environment variable AGENT_NAME is not set") + +logger = make_logger(__name__) + +MODEL = os.environ.get("CODEX_MODEL", "o4-mini") + + +@workflow.defn(name=environment_variables.WORKFLOW_NAME) +class {{ workflow_class }}(BaseWorkflow): + """Long-running Temporal workflow that runs codex exec for each turn. + + Conversation state (codex thread ID + turn counter) is kept on the + workflow instance. Temporal's durable replay reconstructs this state if + the worker crashes, so no external ``adk.state`` round-trip is needed. + """ + + def __init__(self): + super().__init__(display_name=environment_variables.AGENT_NAME) + self._complete_task = False + self._turn_number = 0 + self._codex_thread_id: str | None = None + + @workflow.signal(name=SignalName.RECEIVE_EVENT) + async def on_task_event_send(self, params: SendEventParams) -> None: + """Handle a new user message: spawn codex, stream events via UnifiedEmitter.""" + logger.info("Received task event: %s", params.task.id) + self._turn_number += 1 + + await adk.messages.create(task_id=params.task.id, content=params.event.content) + + user_message = params.event.content.content + + async with adk.tracing.span( + trace_id=params.task.id, + task_id=params.task.id, + name=f"Turn {self._turn_number}", + input={"message": user_message}, + ) as span: + # Delegate the subprocess turn to an activity: subprocess I/O is not + # permitted on the Temporal workflow event loop. The activity streams + # events to the task and returns the final text + codex thread id. + # workflow.now() gives a deterministic timestamp under replay. + result = await workflow.execute_activity( + run_codex_turn, + RunCodexTurnParams( + task_id=params.task.id, + prompt=user_message, + model=MODEL, + trace_id=params.task.id, + parent_span_id=span.id if span else None, + thread_id=self._codex_thread_id, + created_at=workflow.now(), + ), + start_to_close_timeout=timedelta(minutes=5), + ) + + # Persist the codex thread id so the next turn resumes the session. + session_id = result.get("session_id") + if session_id: + self._codex_thread_id = session_id + + if span: + span.output = { + "final_text": result.get("final_text"), + "model": result.get("model"), + } + + @workflow.run + async def on_task_create(self, params: CreateTaskParams) -> str: + """Workflow entry point — keep the conversation alive for incoming signals.""" + logger.info("Task created: %s", params.task.id) + + await adk.messages.create( + task_id=params.task.id, + content=TextContent( + author="agent", + content=( + "Task initialized.\n" + "Send me a message and I'll run codex (local subprocess) " + "to answer, streaming events via the unified harness surface." + ), + ), + ) + + await workflow.wait_condition(lambda: self._complete_task, timeout=None) + return "Task completed" + + @workflow.signal + async def complete_task_signal(self) -> None: + """Graceful workflow shutdown signal.""" + logger.info("Received complete_task signal") + self._complete_task = True diff --git a/src/agentex/lib/cli/templates/temporal-codex/pyproject.toml.j2 b/src/agentex/lib/cli/templates/temporal-codex/pyproject.toml.j2 new file mode 100644 index 000000000..2c6ec9c2f --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-codex/pyproject.toml.j2 @@ -0,0 +1,37 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "{{ project_name }}" +version = "0.1.0" +description = "{{ description }}" +requires-python = ">=3.12" +dependencies = [ + "agentex-sdk", + "scale-gp", + "temporalio>=1.18.2", + "python-dotenv>=1.0,<2", +] + +[project.optional-dependencies] +dev = [ + "pytest", + "pytest-asyncio", + "httpx", + "black", + "isort", + "flake8", + "debugpy>=1.8.15", +] + +[tool.hatch.build.targets.wheel] +packages = ["project"] + +[tool.black] +line-length = 88 +target-version = ['py312'] + +[tool.isort] +profile = "black" +line_length = 88 diff --git a/src/agentex/lib/cli/templates/temporal-codex/requirements.txt.j2 b/src/agentex/lib/cli/templates/temporal-codex/requirements.txt.j2 new file mode 100644 index 000000000..a060d2331 --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-codex/requirements.txt.j2 @@ -0,0 +1,11 @@ +# Agentex SDK +agentex-sdk + +# Scale GenAI Platform Python SDK +scale-gp + +# Temporal workflow engine +temporalio>=1.18.2 + +# Loads .env files for local development +python-dotenv>=1.0,<2 diff --git a/src/agentex/lib/cli/templates/temporal-pydantic-ai/project/agent.py.j2 b/src/agentex/lib/cli/templates/temporal-pydantic-ai/project/agent.py.j2 index 0aa958118..82b1db269 100644 --- a/src/agentex/lib/cli/templates/temporal-pydantic-ai/project/agent.py.j2 +++ b/src/agentex/lib/cli/templates/temporal-pydantic-ai/project/agent.py.j2 @@ -11,9 +11,9 @@ moves into recorded activities. Streaming back to Agentex happens via ``event_stream_handler``, which receives Pydantic AI ``AgentStreamEvent``s from inside the model activity -and forwards them to Redis using the ``stream_pydantic_ai_events`` helper. -The ``task_id`` and tracing parent span ID are threaded into the handler -via ``deps``. +and forwards them through the unified harness surface +(``UnifiedEmitter.auto_send_turn`` + ``PydanticAITurn``). The ``task_id`` and +tracing parent span ID are threaded into the handler via ``deps``. """ from __future__ import annotations @@ -27,10 +27,8 @@ from project.tools import get_weather from pydantic_ai.messages import AgentStreamEvent from pydantic_ai.durable_exec.temporal import TemporalAgent -from agentex.lib.adk import ( - stream_pydantic_ai_events, - create_pydantic_ai_tracing_handler, -) +from agentex.lib.core.harness import UnifiedEmitter +from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn # Swap this for any Pydantic AI-supported model identifier # (e.g. "anthropic:claude-3-5-sonnet-latest", "openai:gpt-4o"). @@ -92,17 +90,18 @@ async def event_handler( activity (not the workflow), it can freely make non-deterministic Redis writes — including the tracing HTTP calls that record per-tool-call spans under the workflow's per-turn span (when ``parent_span_id`` is set). + + The UnifiedEmitter is constructed from ``deps`` (task_id + parent_span_id), + so tool spans nest under the workflow's per-turn span and messages auto-send + to the task stream. """ - tracing_handler = create_pydantic_ai_tracing_handler( + emitter = UnifiedEmitter( + task_id=run_context.deps.task_id, trace_id=run_context.deps.task_id, parent_span_id=run_context.deps.parent_span_id, - task_id=run_context.deps.task_id, - ) - await stream_pydantic_ai_events( - events, - run_context.deps.task_id, - tracing_handler=tracing_handler, ) + turn = PydanticAITurn(events, model=MODEL_NAME) + await emitter.auto_send_turn(turn) # Construct the durable agent at module load time so that the diff --git a/src/agentex/lib/core/harness/auto_send.py b/src/agentex/lib/core/harness/auto_send.py index 2ecd6b583..b645a4aae 100644 --- a/src/agentex/lib/core/harness/auto_send.py +++ b/src/agentex/lib/core/harness/auto_send.py @@ -52,11 +52,11 @@ async def auto_send( final_text_parts so that multi-step turns return the LAST text segment. Full(TextContent) also overwrites final_text_parts (same semantics). - AGX1-378: created_at is forwarded to every streaming_task_message_context - call so callers can back-date message timestamps. + created_at is forwarded to every streaming_task_message_context call so + callers can back-date message timestamps. Mirrors the open/close/stream_update pattern from - src/agentex/lib/adk/_modules/_langgraph_async.py: + src/agentex/lib/adk/_modules/_langgraph_turn.py: - context opened via streaming_task_message_context(...).__aenter__() - context closed via ctx.close() (not __aexit__) - deltas pushed as StreamTaskMessageDelta with parent_task_message set @@ -110,8 +110,8 @@ async def _close_all() -> None: ctx = ctx_map.get(event.index) if ctx is not None and event.delta is not None: # Reconstruct the delta with parent_task_message set from - # the context's task_message (mirrors _langgraph_async.py - # lines 72-78 and 117-127). + # the context's task_message (mirrors the legacy + # _langgraph_async streaming helper, now in _langgraph_turn.py). delta_with_parent = StreamTaskMessageDelta( parent_task_message=ctx.task_message, delta=event.delta, diff --git a/src/agentex/lib/core/harness/tracer.py b/src/agentex/lib/core/harness/tracer.py index 4ca4d628b..0c6167b76 100644 --- a/src/agentex/lib/core/harness/tracer.py +++ b/src/agentex/lib/core/harness/tracer.py @@ -24,7 +24,7 @@ class SpanTracer: The real TracingModule.end_span does NOT accept an output kwarg — output is recorded by mutating span.output before calling end_span, matching the pattern - used throughout the codebase (see _langgraph_tracing.py on_tool_end etc.). + used throughout the codebase. Span-lifecycle contract: the `_open` dict (span key -> span object) is scoped to a single turn. Pairing is by `key`: diff --git a/src/agentex/lib/core/services/adk/providers/openai.py b/src/agentex/lib/core/services/adk/providers/openai.py index 1ae29589d..a2513ea01 100644 --- a/src/agentex/lib/core/services/adk/providers/openai.py +++ b/src/agentex/lib/core/services/adk/providers/openai.py @@ -742,11 +742,10 @@ async def run_agent_streamed_auto_send( ) as span: heartbeat_if_in_workflow("run agent streamed auto send") - # AGX1-378 restored: created_at is now threaded through - # UnifiedEmitter.auto_send_turn -> auto_send -> every - # streaming_task_message_context call, so the first agent message of - # the turn is stamped with the workflow-supplied timestamp (e.g. - # workflow.now()) just as the original inline loop did. + # created_at is threaded through UnifiedEmitter.auto_send_turn -> + # auto_send -> every streaming_task_message_context call, so the + # first agent message of the turn is stamped with the + # workflow-supplied timestamp (e.g. workflow.now()). # The dispenser is still used below for guardrail-rejection messages, # which open their own streaming contexts directly. _take_created_at = _make_created_at_dispenser(created_at) diff --git a/tests/lib/adk/providers/test_openai_activities.py b/tests/lib/adk/providers/test_openai_activities.py index 2f89308a9..964b24545 100644 --- a/tests/lib/adk/providers/test_openai_activities.py +++ b/tests/lib/adk/providers/test_openai_activities.py @@ -653,7 +653,7 @@ def _assert_tools_conversion(self, starting_agent, tools_case, _original_tools): @patch("agents.Runner.run_streamed") async def test_run_agent_streamed_auto_send_forwards_created_at(self, mock_runner_run_streamed): - """created_at is forwarded to every streaming context opened by auto_send_turn (AGX1-378).""" + """created_at is forwarded to every streaming context opened by auto_send_turn.""" from datetime import datetime, timezone from agentex.lib.core.temporal.activities.adk.providers.openai_activities import ( diff --git a/tests/lib/adk/providers/test_openai_turn.py b/tests/lib/adk/providers/test_openai_turn.py index 47a9ba9fe..d5ad2b5c8 100644 --- a/tests/lib/adk/providers/test_openai_turn.py +++ b/tests/lib/adk/providers/test_openai_turn.py @@ -25,7 +25,7 @@ def _import_target(): - from agentex.lib.adk.providers._modules.openai_turn import ( + from agentex.lib.adk._modules._openai_turn import ( OpenAITurn, _aggregate_usage, openai_usage_to_turn_usage, @@ -219,7 +219,7 @@ def stream_events(self): # monkeypatch that converter below so this can yield canonical events. return _canonical_stream(canonical) - import agentex.lib.adk.providers._modules.openai_turn as mod + import agentex.lib.adk._modules._openai_turn as mod async def _passthrough(stream): async for e in stream: diff --git a/tests/lib/adk/test_langgraph_async.py b/tests/lib/adk/test_langgraph_async.py index 682bd43bc..ebe215a15 100644 --- a/tests/lib/adk/test_langgraph_async.py +++ b/tests/lib/adk/test_langgraph_async.py @@ -26,7 +26,7 @@ from agentex.types.text_content import TextContent from agentex.types.task_message_delta import TextDelta from agentex.types.task_message_update import StreamTaskMessageDelta -from agentex.lib.adk._modules._langgraph_async import stream_langgraph_events +from agentex.lib.adk._modules._langgraph_turn import stream_langgraph_events TASK_ID = "task-test" diff --git a/tests/lib/adk/test_langgraph_sync.py b/tests/lib/adk/test_langgraph_sync.py index 248d18f68..9e8c6e4f0 100644 --- a/tests/lib/adk/test_langgraph_sync.py +++ b/tests/lib/adk/test_langgraph_sync.py @@ -1,10 +1,12 @@ -"""Tests for the sync LangGraph -> Agentex stream event converter. +"""Tests for the sync LangGraph -> Agentex path. Covers: -- Basic text, tool call, and tool response emission -- on_final_ai_message callback for usage capture -- create_langgraph_tracing_handler symbol is importable and functional - (runtime DeprecationWarning removed; deprecation is docstring-only) +- The bare converter ``convert_langgraph_to_agentex_events``: + * Basic text, tool call, and tool response emission + * on_final_ai_message callback for usage capture +- The unified sync (HTTP ACP) path ``UnifiedEmitter.yield_turn(LangGraphTurn(...))``: + * Passthrough: yield_turn events equal LangGraphTurn(stream).events + * Span derivation from Full tool events with a fake tracer NOTE: langchain_core imports must be deferred to test-function scope because conftest.py stubs out ``langchain_core.messages`` with MagicMock for ADK @@ -15,15 +17,20 @@ import sys from typing import Any, AsyncIterator +from datetime import datetime, timezone +from dataclasses import field, dataclass import pytest +from agentex.lib.core.harness.tracer import SpanTracer +from agentex.lib.core.harness.emitter import UnifiedEmitter from agentex.types.task_message_update import ( StreamTaskMessageFull, ) from agentex.types.tool_request_content import ToolRequestContent from agentex.types.tool_response_content import ToolResponseContent from agentex.lib.adk._modules._langgraph_sync import convert_langgraph_to_agentex_events +from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn # --------------------------------------------------------------------------- # Helpers @@ -227,21 +234,160 @@ def _cb(msg): assert yield_order.index("event") < yield_order.index("callback") -class TestLangGraphTracingHandlerBackwardCompat: - def test_create_langgraph_tracing_handler_no_runtime_warning(self): - """Deprecated symbol remains importable and emits no runtime DeprecationWarning. +# --------------------------------------------------------------------------- +# Unified sync path: LangGraphTurn + UnifiedEmitter.yield_turn +# +# Verifies the sync (HTTP ACP) delivery surface: +# 1. Passthrough: events from emitter.yield_turn(LangGraphTurn(stream)) equal +# LangGraphTurn(stream).events collected directly. +# 2. Span derivation: with trace_id + fake tracer, tool spans are derived from +# the event stream. +# --------------------------------------------------------------------------- + + +@dataclass +class _FakeTracingBackend: + spans_started: list[dict[str, Any]] = field(default_factory=list) + spans_ended: list[str] = field(default_factory=list) + + async def start_span(self, **kw) -> Any: + from agentex.types.span import Span + + sp = Span( + id=f"span-{len(self.spans_started) + 1}", + trace_id=kw.get("trace_id", "trace1"), + name=kw.get("name", ""), + start_time=datetime.now(tz=timezone.utc), + ) + self.spans_started.append(kw) + return sp + + async def end_span(self, *, trace_id: str, span: Any) -> None: + self.spans_ended.append(span.id if span else "") + + +class TestUnifiedSyncPathPassthrough: + async def test_yield_turn_events_equal_direct_events(self): + """Events from emitter.yield_turn(LangGraphTurn(stream)) must equal + LangGraphTurn(stream).events collected directly — the emitter must not + add, drop, or reorder events in yield mode.""" + from langchain_core.messages import AIMessage, AIMessageChunk + + chunk = AIMessageChunk(content="Hello!") + ai_msg = AIMessage(content="Hello!") - The runtime warnings.warn was removed (docstring-only deprecation) to - align with PR 4/6 and avoid breaking callers under warnings-as-errors. - Using ``warnings.simplefilter("error", DeprecationWarning)`` verifies - that calling the function is safe under -W error conditions. + events_raw = [ + ("messages", (chunk, {})), + ("updates", {"agent": {"messages": [ai_msg]}}), + ] + + direct = [e async for e in LangGraphTurn(_make_stream(events_raw)).events] + + emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None) + via_emitter = [e async for e in emitter.yield_turn(LangGraphTurn(_make_stream(events_raw)))] + + assert len(direct) == len(via_emitter), "yield_turn must not add or drop events relative to direct iteration" + for a, b in zip(direct, via_emitter, strict=True): + assert type(a) == type(b), f"Event type mismatch: {type(a).__name__} vs {type(b).__name__}" + + async def test_yield_turn_passes_all_event_types(self): + """Start, Delta, Done, Full — each type is preserved.""" + from langchain_core.messages import AIMessage, AIMessageChunk + + chunk = AIMessageChunk(content="hi") + tc = {"id": "c1", "name": "t", "args": {}} + ai_msg = AIMessage(content="hi", tool_calls=[tc]) + + events_raw = [ + ("messages", (chunk, {})), + ("updates", {"agent": {"messages": [ai_msg]}}), + ] + emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None) + out = [e async for e in emitter.yield_turn(LangGraphTurn(_make_stream(events_raw)))] + types = {type(e).__name__ for e in out} + # text chunk emits Start + Delta + assert "StreamTaskMessageStart" in types + assert "StreamTaskMessageDelta" in types + # tool call emits Full + assert "StreamTaskMessageFull" in types + + async def test_empty_stream_yields_no_events(self): + emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None) + out = [e async for e in emitter.yield_turn(LangGraphTurn(_make_stream([])))] + assert out == [] + + +class TestUnifiedSyncPathSpanDerivation: + @pytest.fixture + def fake_tracer(self): + backend = _FakeTracingBackend() + tracer = SpanTracer( + trace_id="trace1", + parent_span_id=None, + task_id="t", + tracing=backend, # type: ignore[arg-type] + ) + return tracer, backend + + async def test_tool_span_derived_from_full_events(self, fake_tracer): + """SpanDeriver handles Full tool events for LangGraph. + + Full(ToolRequestContent) opens a tool span keyed by tool_call_id; + Full(ToolResponseContent) closes it, aligning LangGraph's Full-event + path with the Start+Done harnesses (pydantic-ai, openai-agents). """ - import warnings + from langchain_core.messages import AIMessage, ToolMessage - from agentex.lib.adk._modules._langgraph_tracing import create_langgraph_tracing_handler + tracer, backend = fake_tracer + tc = {"id": "c1", "name": "get_weather", "args": {"city": "Paris"}} + ai_msg = AIMessage(content="", tool_calls=[tc]) + tool_msg = ToolMessage(content="Sunny", tool_call_id="c1", name="get_weather") + + events_raw = [ + ("updates", {"agent": {"messages": [ai_msg]}}), + ("updates", {"tools": {"messages": [tool_msg]}}), + ] - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter("error", DeprecationWarning) - create_langgraph_tracing_handler(trace_id="t1", parent_span_id="p1") + emitter = UnifiedEmitter(task_id="t", trace_id="trace1", parent_span_id=None, tracer=tracer) + _ = [e async for e in emitter.yield_turn(LangGraphTurn(_make_stream(events_raw)))] + + assert len(backend.spans_started) == 1, "Full(ToolRequestContent) opens one tool span" + started = backend.spans_started[0] + assert started["name"] == "get_weather" + assert started["input"] == {"city": "Paris"} + + async def test_no_spans_when_no_tool_calls(self, fake_tracer): + """yield_turn with tracer but no tool calls emits no spans.""" + from langchain_core.messages import AIMessage, AIMessageChunk + + tracer, backend = fake_tracer + chunk = AIMessageChunk(content="Hello!") + ai_msg = AIMessage(content="Hello!") + + events_raw = [ + ("messages", (chunk, {})), + ("updates", {"agent": {"messages": [ai_msg]}}), + ] + + emitter = UnifiedEmitter(task_id="t", trace_id="trace1", parent_span_id=None, tracer=tracer) + _ = [e async for e in emitter.yield_turn(LangGraphTurn(_make_stream(events_raw)))] + + assert backend.spans_started == [], "No tool spans when there are no tool calls" + + async def test_tracer_none_means_no_spans(self): + """With tracer=False, no spans should be emitted.""" + from langchain_core.messages import AIMessage, ToolMessage + + tc = {"id": "c1", "name": "t", "args": {}} + ai_msg = AIMessage(content="", tool_calls=[tc]) + tool_msg = ToolMessage(content="ok", tool_call_id="c1", name="t") + + events_raw = [ + ("updates", {"agent": {"messages": [ai_msg]}}), + ("updates", {"tools": {"messages": [tool_msg]}}), + ] - assert w == [], "create_langgraph_tracing_handler must NOT emit a runtime DeprecationWarning" + emitter = UnifiedEmitter(task_id="t", trace_id="trace1", parent_span_id=None, tracer=False) + _ = [e async for e in emitter.yield_turn(LangGraphTurn(_make_stream(events_raw)))] + # No assertion on spans since tracer=False means emitter.tracer is None + assert emitter.tracer is None diff --git a/tests/lib/adk/test_langgraph_sync_unified.py b/tests/lib/adk/test_langgraph_sync_unified.py deleted file mode 100644 index cfd522828..000000000 --- a/tests/lib/adk/test_langgraph_sync_unified.py +++ /dev/null @@ -1,214 +0,0 @@ -"""Unified sync path tests for LangGraphTurn + UnifiedEmitter. - -Verifies: -1. Passthrough: events from emitter.yield_turn(LangGraphTurn(stream)) equal - LangGraphTurn(stream).events collected directly. -2. Span derivation: with trace_id + fake tracer, tool spans are derived from - the event stream. - -NOTE: langchain_core imports are deferred to test scope because conftest.py -stubs ``langchain_core.messages`` with MagicMock. -""" - -from __future__ import annotations - -import sys -from typing import Any -from datetime import datetime, timezone -from dataclasses import field, dataclass - -import pytest - -from agentex.lib.core.harness.tracer import SpanTracer -from agentex.lib.core.harness.emitter import UnifiedEmitter -from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn - -# --------------------------------------------------------------------------- -# Remove conftest stubs so real langchain_core types are used -# --------------------------------------------------------------------------- - - -@pytest.fixture(autouse=True) -def _real_langchain_core(): - stub_keys = [k for k in sys.modules if k.startswith("langchain_core") or k.startswith("langgraph")] - saved = {k: sys.modules.pop(k) for k in stub_keys} - import importlib - - importlib.import_module("langchain_core.messages") - yield - sys.modules.update(saved) - - -# --------------------------------------------------------------------------- -# Helpers -# --------------------------------------------------------------------------- - - -def _make_stream(events: list[tuple[str, Any]]): - async def _gen(): - for e in events: - yield e - - return _gen() - - -# --------------------------------------------------------------------------- -# Fake SpanTracer -# --------------------------------------------------------------------------- - - -@dataclass -class _FakeTracingBackend: - spans_started: list[dict[str, Any]] = field(default_factory=list) - spans_ended: list[str] = field(default_factory=list) - - async def start_span(self, **kw) -> Any: - from agentex.types.span import Span - - sp = Span( - id=f"span-{len(self.spans_started) + 1}", - trace_id=kw.get("trace_id", "trace1"), - name=kw.get("name", ""), - start_time=datetime.now(tz=timezone.utc), - ) - self.spans_started.append(kw) - return sp - - async def end_span(self, *, trace_id: str, span: Any) -> None: - self.spans_ended.append(span.id if span else "") - - -# --------------------------------------------------------------------------- -# Tests -# --------------------------------------------------------------------------- - - -class TestPassthrough: - async def test_yield_turn_events_equal_direct_events(self): - """Events from emitter.yield_turn(LangGraphTurn(stream)) must equal - LangGraphTurn(stream).events collected directly — the emitter must not - add, drop, or reorder events in yield mode.""" - from langchain_core.messages import AIMessage, AIMessageChunk - - chunk = AIMessageChunk(content="Hello!") - ai_msg = AIMessage(content="Hello!") - - # Build two identical streams - events_raw = [ - ("messages", (chunk, {})), - ("updates", {"agent": {"messages": [ai_msg]}}), - ] - - # Direct collection - direct = [e async for e in LangGraphTurn(_make_stream(events_raw)).events] - - # Via emitter.yield_turn - emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None) - via_emitter = [e async for e in emitter.yield_turn(LangGraphTurn(_make_stream(events_raw)))] - - assert len(direct) == len(via_emitter), "yield_turn must not add or drop events relative to direct iteration" - for a, b in zip(direct, via_emitter, strict=True): - assert type(a) == type(b), f"Event type mismatch: {type(a).__name__} vs {type(b).__name__}" - - async def test_yield_turn_passes_all_event_types(self): - """Start, Delta, Done, Full — each type is preserved.""" - from langchain_core.messages import AIMessage, AIMessageChunk - - chunk = AIMessageChunk(content="hi") - tc = {"id": "c1", "name": "t", "args": {}} - ai_msg = AIMessage(content="hi", tool_calls=[tc]) - - events_raw = [ - ("messages", (chunk, {})), - ("updates", {"agent": {"messages": [ai_msg]}}), - ] - emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None) - out = [e async for e in emitter.yield_turn(LangGraphTurn(_make_stream(events_raw)))] - types = {type(e).__name__ for e in out} - # text chunk emits Start + Delta - assert "StreamTaskMessageStart" in types - assert "StreamTaskMessageDelta" in types - # tool call emits Full - assert "StreamTaskMessageFull" in types - - async def test_empty_stream_yields_no_events(self): - emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None) - out = [e async for e in emitter.yield_turn(LangGraphTurn(_make_stream([])))] - assert out == [] - - -class TestSpanDerivation: - @pytest.fixture - def fake_tracer(self): - backend = _FakeTracingBackend() - tracer = SpanTracer( - trace_id="trace1", - parent_span_id=None, - task_id="t", - tracing=backend, # type: ignore[arg-type] - ) - return tracer, backend - - async def test_tool_span_derived_from_full_events(self, fake_tracer): - """AGX1-377: SpanDeriver now handles Full tool events for LangGraph. - - Full(ToolRequestContent) opens a tool span keyed by tool_call_id; - Full(ToolResponseContent) closes it. This bridges the previous gap where - LangGraph's Full-event path produced no spans, aligning it with - Start+Done harnesses (pydantic-ai, openai-agents). - """ - from langchain_core.messages import AIMessage, ToolMessage - - tracer, backend = fake_tracer - tc = {"id": "c1", "name": "get_weather", "args": {"city": "Paris"}} - ai_msg = AIMessage(content="", tool_calls=[tc]) - tool_msg = ToolMessage(content="Sunny", tool_call_id="c1", name="get_weather") - - events_raw = [ - ("updates", {"agent": {"messages": [ai_msg]}}), - ("updates", {"tools": {"messages": [tool_msg]}}), - ] - - emitter = UnifiedEmitter(task_id="t", trace_id="trace1", parent_span_id=None, tracer=tracer) - _ = [e async for e in emitter.yield_turn(LangGraphTurn(_make_stream(events_raw)))] - - assert len(backend.spans_started) == 1, "Full(ToolRequestContent) opens one tool span" - started = backend.spans_started[0] - assert started["name"] == "get_weather" - assert started["input"] == {"city": "Paris"} - - async def test_no_spans_when_no_tool_calls(self, fake_tracer): - """yield_turn with tracer but no tool calls emits no spans.""" - from langchain_core.messages import AIMessage, AIMessageChunk - - tracer, backend = fake_tracer - chunk = AIMessageChunk(content="Hello!") - ai_msg = AIMessage(content="Hello!") - - events_raw = [ - ("messages", (chunk, {})), - ("updates", {"agent": {"messages": [ai_msg]}}), - ] - - emitter = UnifiedEmitter(task_id="t", trace_id="trace1", parent_span_id=None, tracer=tracer) - _ = [e async for e in emitter.yield_turn(LangGraphTurn(_make_stream(events_raw)))] - - assert backend.spans_started == [], "No tool spans when there are no tool calls" - - async def test_tracer_none_means_no_spans(self): - """With tracer=False, no spans should be emitted.""" - from langchain_core.messages import AIMessage, ToolMessage - - tc = {"id": "c1", "name": "t", "args": {}} - ai_msg = AIMessage(content="", tool_calls=[tc]) - tool_msg = ToolMessage(content="ok", tool_call_id="c1", name="t") - - events_raw = [ - ("updates", {"agent": {"messages": [ai_msg]}}), - ("updates", {"tools": {"messages": [tool_msg]}}), - ] - - emitter = UnifiedEmitter(task_id="t", trace_id="trace1", parent_span_id=None, tracer=False) - _ = [e async for e in emitter.yield_turn(LangGraphTurn(_make_stream(events_raw)))] - # No assertion on spans since tracer=False means emitter.tracer is None - assert emitter.tracer is None diff --git a/tests/lib/adk/test_pydantic_ai_async.py b/tests/lib/adk/test_pydantic_ai_async.py index 49cb6054c..4ab468152 100644 --- a/tests/lib/adk/test_pydantic_ai_async.py +++ b/tests/lib/adk/test_pydantic_ai_async.py @@ -36,7 +36,7 @@ from agentex.types.tool_request_content import ToolRequestContent from agentex.types.tool_response_content import ToolResponseContent from agentex.types.reasoning_content_delta import ReasoningContentDelta -from agentex.lib.adk._modules._pydantic_ai_async import stream_pydantic_ai_events +from agentex.lib.adk._modules._pydantic_ai_turn import stream_pydantic_ai_events TASK_ID = "task_test" @@ -262,8 +262,8 @@ async def test_tool_call_opens_streaming_context_with_identity( ) -> None: """Tool requests are delivered as a streaming context (Start+Delta+Done). - AGX1-377 fix: auto_send now delivers streamed tool-request messages - natively (Start+ToolRequestDelta+Done). The streaming context is opened + auto_send delivers streamed tool-request messages natively + (Start+ToolRequestDelta+Done). The streaming context is opened at the Start event with the initial ToolRequestContent (tool_call_id + name + empty arguments), argument tokens are streamed as deltas, and the context is closed on Done. @@ -304,7 +304,7 @@ async def test_tool_call_opens_streaming_context_with_identity( assert content.tool_call_id == "c1" assert content.name == "get_weather" assert content.author == "agent" - # AGX1-377 streamed shape: initial_content has empty args (args come via delta) + # Streamed shape: initial_content has empty args (args come via delta) assert content.arguments == {} # The arg delta is delivered as a stream_update assert len(ctx.updates) == 1 @@ -657,292 +657,6 @@ async def test_part_delta_without_matching_start_is_ignored( assert final == "" -class TestTracingHandler: - """Tracing handler hooks fire alongside streaming for each tool call.""" - - @dataclass - class _RecordingHandler: - starts: list[dict[str, Any]] = field(default_factory=list) - ends: list[dict[str, Any]] = field(default_factory=list) - - async def on_tool_start(self, tool_call_id: str, tool_name: str, arguments: Any) -> None: - self.starts.append({"tool_call_id": tool_call_id, "tool_name": tool_name, "arguments": arguments}) - - async def on_tool_end(self, tool_call_id: str, result: Any) -> None: - self.ends.append({"tool_call_id": tool_call_id, "result": result}) - - async def test_handler_records_start_and_end_for_each_tool_call( - self, fake_adk: tuple[FakeStreamingModule, FakeMessagesModule] - ) -> None: - streaming, messages = fake_adk - handler = self._RecordingHandler() - events = [ - PartStartEvent( - index=0, - part=ToolCallPart(tool_name="get_weather", args=None, tool_call_id="c1"), - ), - PartEndEvent( - index=0, - part=ToolCallPart(tool_name="get_weather", args='{"city":"Paris"}', tool_call_id="c1"), - ), - FunctionToolResultEvent( - part=ToolReturnPart(tool_name="get_weather", content="Sunny", tool_call_id="c1"), - ), - ] - await stream_pydantic_ai_events( - _aiter(events), - TASK_ID, - tracing_handler=handler, # type: ignore[arg-type] - ) - - # AGX1-373: tool messages arrive via streaming_task_message_context. - # Tracing is still additive — both messages are delivered AND hooks fire. - assert messages.created == [] - assert len(streaming.contexts) == 2 - assert isinstance(streaming.contexts[0].initial_content, ToolRequestContent) - assert isinstance(streaming.contexts[1].initial_content, ToolResponseContent) - # And both lifecycle hooks fired exactly once with the right payload. - assert handler.starts == [ - { - "tool_call_id": "c1", - "tool_name": "get_weather", - "arguments": {"city": "Paris"}, - } - ] - assert handler.ends == [{"tool_call_id": "c1", "result": "Sunny"}] - - async def test_handler_not_called_when_no_tool_calls_in_stream( - self, fake_adk: tuple[FakeStreamingModule, FakeMessagesModule] - ) -> None: - handler = self._RecordingHandler() - events = [ - PartStartEvent(index=0, part=TextPart(content="")), - PartDeltaEvent(index=0, delta=TextPartDelta(content_delta="Hello")), - PartEndEvent(index=0, part=TextPart(content="Hello")), - ] - await stream_pydantic_ai_events( - _aiter(events), - TASK_ID, - tracing_handler=handler, # type: ignore[arg-type] - ) - assert handler.starts == [] - assert handler.ends == [] - - async def test_handler_records_each_tool_in_multi_tool_run( - self, fake_adk: tuple[FakeStreamingModule, FakeMessagesModule] - ) -> None: - """A turn with two tool calls must produce two start/end pairs in order.""" - handler = self._RecordingHandler() - events = [ - PartStartEvent( - index=0, - part=ToolCallPart(tool_name="get_weather", args=None, tool_call_id="c1"), - ), - PartEndEvent( - index=0, - part=ToolCallPart(tool_name="get_weather", args="{}", tool_call_id="c1"), - ), - FunctionToolResultEvent( - part=ToolReturnPart(tool_name="get_weather", content="Sunny", tool_call_id="c1"), - ), - PartStartEvent( - index=0, - part=ToolCallPart(tool_name="lookup_city", args=None, tool_call_id="c2"), - ), - PartEndEvent( - index=0, - part=ToolCallPart(tool_name="lookup_city", args="{}", tool_call_id="c2"), - ), - FunctionToolResultEvent( - part=ToolReturnPart(tool_name="lookup_city", content="Paris, FR", tool_call_id="c2"), - ), - ] - await stream_pydantic_ai_events( - _aiter(events), - TASK_ID, - tracing_handler=handler, # type: ignore[arg-type] - ) - - assert [s["tool_call_id"] for s in handler.starts] == ["c1", "c2"] - assert [e["tool_call_id"] for e in handler.ends] == ["c1", "c2"] - assert handler.starts[0]["tool_name"] == "get_weather" - assert handler.starts[1]["tool_name"] == "lookup_city" - - async def test_omitting_handler_is_a_no_op_for_existing_behavior( - self, fake_adk: tuple[FakeStreamingModule, FakeMessagesModule] - ) -> None: - """Regression: passing no tracing handler preserves streaming behavior. - - AGX1-373: tool messages arrive via streaming_task_message_context - regardless of whether tracing_handler is passed. - """ - streaming, messages = fake_adk - events = [ - PartStartEvent( - index=0, - part=ToolCallPart(tool_name="get_weather", args=None, tool_call_id="c1"), - ), - PartEndEvent( - index=0, - part=ToolCallPart(tool_name="get_weather", args="{}", tool_call_id="c1"), - ), - FunctionToolResultEvent( - part=ToolReturnPart(tool_name="get_weather", content="Sunny", tool_call_id="c1"), - ), - ] - await stream_pydantic_ai_events(_aiter(events), TASK_ID) - # AGX1-373: tool messages via streaming_task_message_context. - assert messages.created == [] - assert len(streaming.contexts) == 2 - content_types = [type(ctx.initial_content).__name__ for ctx in streaming.contexts] - assert content_types == ["ToolRequestContent", "ToolResponseContent"] - - -class TestPydanticAITracingHandlerDeterministicIds: - """Regression coverage for ``AgentexPydanticAITracingHandler``. - - pydantic-ai's ``TemporalAgent`` splits a single agent run across several - Temporal activities. The event_stream_handler is invoked once per - activity, with a fresh handler instance each time. So ``on_tool_start`` - (during the model activity that issued the tool call) and ``on_tool_end`` - (during the next model activity, after the tool ran) end up in DIFFERENT - handler instances — an in-memory dict can't pair them. - - The fix is deterministic span IDs derived from ``(trace_id, tool_call_id)``. - These tests lock that in. - """ - - class _RecordingClient: - """Stand-in for ``AsyncAgentex`` capturing spans.create / spans.update calls.""" - - def __init__(self) -> None: - self.creates: list[dict[str, Any]] = [] - self.updates: list[tuple[str, dict[str, Any]]] = [] - self.spans = self # so .spans.create / .spans.update resolve back here - - async def create(self, **kwargs: Any) -> Any: - self.creates.append(kwargs) - return None - - async def update(self, span_id: str, **kwargs: Any) -> Any: - self.updates.append((span_id, kwargs)) - return None - - async def test_same_tool_call_id_yields_same_span_id_across_handler_instances( - self, - ) -> None: - """The whole point of the design: two handler instances with the same - trace_id and tool_call_id resolve to the same span ID — otherwise - ``on_tool_end`` patches a different (non-existent) record and the span - in the DB never gets ``end_time`` / ``output``.""" - from agentex.lib.adk._modules._pydantic_ai_tracing import ( - AgentexPydanticAITracingHandler, - ) - - client_a = self._RecordingClient() - client_b = self._RecordingClient() - - # Two independent handler instances — simulates the cross-activity - # invocation pattern in TemporalAgent. - handler_a = AgentexPydanticAITracingHandler( - trace_id="trace-1", - parent_span_id="parent-1", - task_id="task-1", - client=client_a, # type: ignore[arg-type] - ) - handler_b = AgentexPydanticAITracingHandler( - trace_id="trace-1", - parent_span_id="parent-1", - task_id="task-1", - client=client_b, # type: ignore[arg-type] - ) - - await handler_a.on_tool_start(tool_call_id="call_abc", tool_name="get_weather", arguments={"city": "Paris"}) - await handler_b.on_tool_end(tool_call_id="call_abc", result="Sunny, 72F") - - assert len(client_a.creates) == 1 - assert len(client_b.updates) == 1 - - created_span_id = client_a.creates[0]["id"] - updated_span_id = client_b.updates[0][0] - assert created_span_id == updated_span_id, ( - "on_tool_start and on_tool_end must address the same span across handler " - "instances; mismatch means tool spans will be left open and the AgentEx UI " - "will hide their trace." - ) - - async def test_different_tool_call_ids_yield_different_span_ids(self) -> None: - from agentex.lib.adk._modules._pydantic_ai_tracing import ( - AgentexPydanticAITracingHandler, - ) - - client = self._RecordingClient() - handler = AgentexPydanticAITracingHandler( - trace_id="trace-1", - client=client, # type: ignore[arg-type] - ) - - await handler.on_tool_start("call_a", "get_weather", {"city": "Paris"}) - await handler.on_tool_start("call_b", "get_weather", {"city": "Tokyo"}) - - ids = {c["id"] for c in client.creates} - assert len(ids) == 2, "Distinct tool_call_ids must map to distinct span IDs" - - async def test_same_tool_call_id_in_different_traces_yields_different_span_ids( - self, - ) -> None: - """Span IDs are namespaced by trace_id so two unrelated runs with the - same provider-issued tool_call_id don't collide.""" - from agentex.lib.adk._modules._pydantic_ai_tracing import ( - AgentexPydanticAITracingHandler, - ) - - client = self._RecordingClient() - handler_t1 = AgentexPydanticAITracingHandler(trace_id="trace-1", client=client) # type: ignore[arg-type] - handler_t2 = AgentexPydanticAITracingHandler(trace_id="trace-2", client=client) # type: ignore[arg-type] - - await handler_t1.on_tool_start("call_abc", "t", None) - await handler_t2.on_tool_start("call_abc", "t", None) - - ids = {c["id"] for c in client.creates} - assert len(ids) == 2 - - async def test_on_tool_end_patches_only_end_time_and_output(self) -> None: - """Don't overwrite start_time, name, parent_id, etc. on close — only patch - the fields we have new values for. Sending start_time again could clobber - what was set at create time.""" - from agentex.lib.adk._modules._pydantic_ai_tracing import ( - AgentexPydanticAITracingHandler, - ) - - client = self._RecordingClient() - handler = AgentexPydanticAITracingHandler(trace_id="trace-1", client=client) # type: ignore[arg-type] - - await handler.on_tool_end("call_abc", "Sunny") - - assert len(client.updates) == 1 - _, patch_kwargs = client.updates[0] - assert set(patch_kwargs.keys()) == {"end_time", "output"}, ( - f"Unexpected fields in tool span PATCH: {set(patch_kwargs.keys())}" - ) - assert patch_kwargs["output"] == {"result": "Sunny"} - - async def test_on_tool_error_patches_error_output(self) -> None: - from agentex.lib.adk._modules._pydantic_ai_tracing import ( - AgentexPydanticAITracingHandler, - ) - - client = self._RecordingClient() - handler = AgentexPydanticAITracingHandler(trace_id="trace-1", client=client) # type: ignore[arg-type] - - await handler.on_tool_error("call_abc", RuntimeError("boom")) - - assert len(client.updates) == 1 - _, patch_kwargs = client.updates[0] - assert "error" in patch_kwargs["output"] - assert "boom" in patch_kwargs["output"]["error"] - - class TestCleanupOnException: async def test_open_contexts_are_closed_on_iterator_failure( self, fake_adk: tuple[FakeStreamingModule, FakeMessagesModule] diff --git a/tests/lib/adk/test_pydantic_ai_sync.py b/tests/lib/adk/test_pydantic_ai_sync.py index 080bc5be8..ac9986f2b 100644 --- a/tests/lib/adk/test_pydantic_ai_sync.py +++ b/tests/lib/adk/test_pydantic_ai_sync.py @@ -1,4 +1,12 @@ -"""Tests for the Pydantic AI -> Agentex stream event converter.""" +"""Tests for the sync Pydantic AI -> Agentex path. + +Covers: +- The bare converter ``convert_pydantic_ai_to_agentex_events`` (text/thinking/ + tool-call streaming and arg-delta handling). +- The unified sync (HTTP ACP) path ``UnifiedEmitter.yield_turn(PydanticAITurn(...))``: + * Passthrough: yield_turn events equal PydanticAITurn(stream).events + * Span derivation (tool + reasoning) with a fake tracing backend +""" from __future__ import annotations @@ -25,6 +33,7 @@ FunctionToolResultEvent, ) +from agentex.lib.core.harness import UnifiedEmitter from agentex.types.reasoning_content import ReasoningContent from agentex.types.task_message_delta import TextDelta from agentex.types.tool_request_delta import ToolRequestDelta @@ -42,6 +51,9 @@ _args_delta_to_str, convert_pydantic_ai_to_agentex_events, ) +from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn + +from ..core.harness._fakes import FakeTracing async def _aiter(events: list[Any]) -> AsyncIterator[Any]: @@ -290,90 +302,6 @@ async def test_tool_retry_prompt_surfaces_as_response(self): assert out[0].content.content == "bad arguments" -class TestTracingHandlerSync: - """The sync converter has the same opt-in tracing-handler contract as the - async streamer: pass a handler and the converter calls ``on_tool_start`` / - ``on_tool_end`` for each tool call. Streaming yields are unchanged when - omitted.""" - - class _RecordingHandler: - def __init__(self) -> None: - self.starts: list[dict[str, Any]] = [] - self.ends: list[dict[str, Any]] = [] - - async def on_tool_start(self, tool_call_id: str, tool_name: str, arguments: Any) -> None: - self.starts.append({"tool_call_id": tool_call_id, "tool_name": tool_name, "arguments": arguments}) - - async def on_tool_end(self, tool_call_id: str, result: Any) -> None: - self.ends.append({"tool_call_id": tool_call_id, "result": result}) - - async def test_handler_records_start_and_end_for_a_tool_call(self): - handler = self._RecordingHandler() - events = [ - PartStartEvent( - index=0, - part=ToolCallPart(tool_name="get_weather", args=None, tool_call_id="c1"), - ), - PartEndEvent( - index=0, - part=ToolCallPart(tool_name="get_weather", args='{"city":"Paris"}', tool_call_id="c1"), - ), - FunctionToolResultEvent( - part=ToolReturnPart(tool_name="get_weather", content="Sunny", tool_call_id="c1"), - ), - ] - out = await _collect( - convert_pydantic_ai_to_agentex_events(_aiter(events), tracing_handler=handler) # type: ignore[arg-type] - ) - - # Streaming output is unchanged. - assert any(isinstance(e, StreamTaskMessageStart) for e in out) - assert any(isinstance(e, StreamTaskMessageFull) for e in out) - - assert handler.starts == [ - { - "tool_call_id": "c1", - "tool_name": "get_weather", - "arguments": {"city": "Paris"}, - } - ] - assert handler.ends == [{"tool_call_id": "c1", "result": "Sunny"}] - - async def test_handler_not_called_when_no_tool_calls(self): - handler = self._RecordingHandler() - events = [ - PartStartEvent(index=0, part=TextPart(content="")), - PartDeltaEvent(index=0, delta=TextPartDelta(content_delta="hi")), - PartEndEvent(index=0, part=TextPart(content="hi")), - ] - await _collect( - convert_pydantic_ai_to_agentex_events(_aiter(events), tracing_handler=handler) # type: ignore[arg-type] - ) - assert handler.starts == [] - assert handler.ends == [] - - async def test_omitting_handler_preserves_pre_tracing_behavior(self): - events = [ - PartStartEvent( - index=0, - part=ToolCallPart(tool_name="t", args=None, tool_call_id="c"), - ), - PartEndEvent( - index=0, - part=ToolCallPart(tool_name="t", args="{}", tool_call_id="c"), - ), - FunctionToolResultEvent( - part=ToolReturnPart(tool_name="t", content="ok", tool_call_id="c"), - ), - ] - out = await _collect(convert_pydantic_ai_to_agentex_events(_aiter(events))) - # Same emit shape as before: Start, Done, Full - types = [type(e).__name__ for e in out] - assert "StreamTaskMessageStart" in types - assert "StreamTaskMessageDone" in types - assert "StreamTaskMessageFull" in types - - class TestMultiStepRun: async def test_text_then_tool_then_text_assigns_distinct_indices(self): """A multi-step run: model emits text + tool call → tool runs → model emits more text. @@ -555,3 +483,157 @@ async def on_result_async(event: AgentRunResultEvent) -> None: assert len(awaited) == 1 assert awaited[0].result.output == "async_output" + + +# --------------------------------------------------------------------------- +# Unified sync path: PydanticAITurn + UnifiedEmitter.yield_turn +# +# Exercises the path documented in _pydantic_ai_sync.py under +# "Recommended: unified surface": +# - events forwarded by yield_turn equal PydanticAITurn(stream).events (passthrough) +# - with a trace context + fake tracing backend, tool / reasoning spans are derived +# --------------------------------------------------------------------------- + + +class TestUnifiedSyncPathPassthrough: + """The events forwarded by yield_turn are identical to PydanticAITurn.events.""" + + async def test_text_stream_passthrough(self): + raw_events = [ + PartStartEvent(index=0, part=TextPart(content="")), + PartDeltaEvent(index=0, delta=TextPartDelta(content_delta="hello")), + PartEndEvent(index=0, part=TextPart(content="hello")), + ] + + turn_a = PydanticAITurn(_aiter(raw_events), model="openai:gpt-4o") + direct = await _collect(turn_a.events) + + turn_b = PydanticAITurn(_aiter(raw_events), model="openai:gpt-4o") + emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None) + via_emitter = await _collect(emitter.yield_turn(turn_b)) + + assert len(via_emitter) == len(direct) + for a, b in zip(via_emitter, direct): + assert type(a) is type(b) + assert a.model_dump() == b.model_dump() + + async def test_tool_call_stream_passthrough(self): + raw_events = [ + PartStartEvent(index=0, part=ToolCallPart(tool_name="Bash", args=None, tool_call_id="c1")), + PartDeltaEvent(index=0, delta=ToolCallPartDelta(args_delta='{"cmd":"ls"}')), + PartEndEvent( + index=0, + part=ToolCallPart(tool_name="Bash", args='{"cmd":"ls"}', tool_call_id="c1"), + ), + ] + + turn_a = PydanticAITurn(_aiter(raw_events), model="openai:gpt-4o") + direct = await _collect(turn_a.events) + + turn_b = PydanticAITurn(_aiter(raw_events), model="openai:gpt-4o") + emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None) + via_emitter = await _collect(emitter.yield_turn(turn_b)) + + assert len(via_emitter) == len(direct) + for a, b in zip(via_emitter, direct): + assert type(a) is type(b) + assert a.model_dump() == b.model_dump() + + +class TestUnifiedSyncPathSpanDerivation: + """With trace context + fake tracing, spans are derived from the stream.""" + + async def test_tool_span_opened_and_closed(self): + """A tool call produces start_span + end_span on the fake tracing backend.""" + tool_events = [ + PartStartEvent( + index=0, + part=ToolCallPart(tool_name="Bash", args={"cmd": "ls"}, tool_call_id="call_1"), + ), + PartEndEvent( + index=0, + part=ToolCallPart(tool_name="Bash", args='{"cmd":"ls"}', tool_call_id="call_1"), + ), + FunctionToolResultEvent( + part=ToolReturnPart(tool_name="Bash", content="files", tool_call_id="call_1"), + ), + ] + + fake = FakeTracing() + turn = PydanticAITurn(_aiter(tool_events), model="openai:gpt-4o") + emitter = UnifiedEmitter(task_id="t", trace_id="tr", parent_span_id="p", tracing=fake) + + events = await _collect(emitter.yield_turn(turn)) + + assert len(events) >= 2, "at least Start(tool) + Done + Full(response)" + assert len(fake.started) == 1, "one tool span opened" + assert len(fake.ended) == 1, "one tool span closed" + span_name, parent_id, span_input = fake.started[0] + assert span_name == "Bash" + assert parent_id == "p" + closed_name, closed_output = fake.ended[0] + assert closed_name == "Bash" + + async def test_reasoning_span_opened_and_closed(self): + """A thinking/reasoning block produces start_span + end_span.""" + reasoning_events = [ + PartStartEvent(index=0, part=ThinkingPart(content="")), + PartDeltaEvent(index=0, delta=ThinkingPartDelta(content_delta="let me think")), + PartEndEvent(index=0, part=ThinkingPart(content="let me think")), + ] + + fake = FakeTracing() + turn = PydanticAITurn(_aiter(reasoning_events), model="openai:gpt-4o") + emitter = UnifiedEmitter(task_id="t", trace_id="tr", parent_span_id="p", tracing=fake) + + await _collect(emitter.yield_turn(turn)) + + assert len(fake.started) == 1, "one reasoning span opened" + assert len(fake.ended) == 1, "one reasoning span closed" + span_name, parent_id, _ = fake.started[0] + assert span_name == "reasoning" + assert parent_id == "p" + + async def test_no_trace_id_means_no_spans(self): + """When trace_id is None, no spans are derived even with a fake tracing backend.""" + raw_events = [ + PartStartEvent( + index=0, + part=ToolCallPart(tool_name="Bash", args={"cmd": "ls"}, tool_call_id="c2"), + ), + PartEndEvent( + index=0, + part=ToolCallPart(tool_name="Bash", args='{"cmd":"ls"}', tool_call_id="c2"), + ), + ] + + fake = FakeTracing() + turn = PydanticAITurn(_aiter(raw_events), model="openai:gpt-4o") + emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None, tracing=fake) + + await _collect(emitter.yield_turn(turn)) + + assert fake.started == [], "no spans when trace_id is absent" + assert fake.ended == [] + + async def test_tracer_false_suppresses_spans_even_with_trace_id(self): + """tracer=False disables span derivation regardless of trace_id.""" + raw_events = [ + PartStartEvent( + index=0, + part=ToolCallPart(tool_name="Bash", args={"cmd": "ls"}, tool_call_id="c3"), + ), + PartEndEvent( + index=0, + part=ToolCallPart(tool_name="Bash", args='{"cmd":"ls"}', tool_call_id="c3"), + ), + ] + + fake = FakeTracing() + turn = PydanticAITurn(_aiter(raw_events), model="openai:gpt-4o") + emitter = UnifiedEmitter(task_id="t", trace_id="tr", parent_span_id="p", tracer=False, tracing=fake) + + await _collect(emitter.yield_turn(turn)) + + assert fake.started == [] + assert fake.ended == [] diff --git a/tests/lib/adk/test_pydantic_ai_sync_unified.py b/tests/lib/adk/test_pydantic_ai_sync_unified.py deleted file mode 100644 index f920418de..000000000 --- a/tests/lib/adk/test_pydantic_ai_sync_unified.py +++ /dev/null @@ -1,209 +0,0 @@ -"""Tests for the unified sync (HTTP ACP) path: PydanticAITurn + UnifiedEmitter. - -Exercises the path documented in _pydantic_ai_sync.py under "Recommended: unified surface": -- events forwarded by yield_turn equal PydanticAITurn(stream).events (passthrough) -- with a trace context + fake tracing backend, tool spans are derived (start_span / end_span called) -- with a trace context + fake tracing backend, reasoning spans are derived -""" - -from __future__ import annotations - -from typing import Any, AsyncIterator - -from pydantic_ai.run import AgentRunResult, AgentRunResultEvent -from pydantic_ai.usage import RunUsage -from pydantic_ai.messages import ( - TextPart, - PartEndEvent, - ThinkingPart, - ToolCallPart, - TextPartDelta, - PartDeltaEvent, - PartStartEvent, - ThinkingPartDelta, - ToolCallPartDelta, -) - -from agentex.lib.core.harness import UnifiedEmitter -from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn - - -async def _aiter(events: list[Any]) -> AsyncIterator[Any]: - for e in events: - yield e - - -async def _collect(stream: AsyncIterator[Any]) -> list[Any]: - return [e async for e in stream] - - -class _FakeSpan: - def __init__(self, name: str): - self.name = name - self.output: Any = None - - -class _FakeTracing: - def __init__(self) -> None: - self.started: list[tuple[str, str | None, Any]] = [] - self.ended: list[tuple[str, Any]] = [] - - async def start_span(self, *, trace_id, name, input=None, parent_id=None, data=None, task_id=None): - self.started.append((name, parent_id, input)) - return _FakeSpan(name) - - async def end_span(self, *, trace_id, span): - self.ended.append((span.name, span.output)) - - -def _make_result_event(usage: RunUsage | None = None) -> AgentRunResultEvent: - result = AgentRunResult(output="done", _output_tool_name=None) - if usage is not None: - result._state.usage = usage - return AgentRunResultEvent(result=result) - - -class TestUnifiedSyncPathPassthrough: - """The events forwarded by yield_turn are identical to PydanticAITurn.events.""" - - async def test_text_stream_passthrough(self): - raw_events = [ - PartStartEvent(index=0, part=TextPart(content="")), - PartDeltaEvent(index=0, delta=TextPartDelta(content_delta="hello")), - PartEndEvent(index=0, part=TextPart(content="hello")), - ] - - turn_a = PydanticAITurn(_aiter(raw_events), model="openai:gpt-4o") - direct = await _collect(turn_a.events) - - turn_b = PydanticAITurn(_aiter(raw_events), model="openai:gpt-4o") - emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None) - via_emitter = await _collect(emitter.yield_turn(turn_b)) - - assert len(via_emitter) == len(direct) - for a, b in zip(via_emitter, direct): - assert type(a) is type(b) - assert a.model_dump() == b.model_dump() - - async def test_tool_call_stream_passthrough(self): - raw_events = [ - PartStartEvent(index=0, part=ToolCallPart(tool_name="Bash", args=None, tool_call_id="c1")), - PartDeltaEvent(index=0, delta=ToolCallPartDelta(args_delta='{"cmd":"ls"}')), - PartEndEvent( - index=0, - part=ToolCallPart(tool_name="Bash", args='{"cmd":"ls"}', tool_call_id="c1"), - ), - ] - - turn_a = PydanticAITurn(_aiter(raw_events), model="openai:gpt-4o") - direct = await _collect(turn_a.events) - - turn_b = PydanticAITurn(_aiter(raw_events), model="openai:gpt-4o") - emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None) - via_emitter = await _collect(emitter.yield_turn(turn_b)) - - assert len(via_emitter) == len(direct) - for a, b in zip(via_emitter, direct): - assert type(a) is type(b) - assert a.model_dump() == b.model_dump() - - -class TestUnifiedSyncPathSpanDerivation: - """With trace context + fake tracing, spans are derived from the stream.""" - - async def test_tool_span_opened_and_closed(self): - """A tool call produces start_span + end_span on the fake tracing backend.""" - from pydantic_ai.messages import ToolReturnPart, FunctionToolResultEvent - - tool_events = [ - PartStartEvent( - index=0, - part=ToolCallPart(tool_name="Bash", args={"cmd": "ls"}, tool_call_id="call_1"), - ), - PartEndEvent( - index=0, - part=ToolCallPart(tool_name="Bash", args='{"cmd":"ls"}', tool_call_id="call_1"), - ), - FunctionToolResultEvent( - part=ToolReturnPart(tool_name="Bash", content="files", tool_call_id="call_1"), - ), - ] - - fake = _FakeTracing() - turn = PydanticAITurn(_aiter(tool_events), model="openai:gpt-4o") - emitter = UnifiedEmitter(task_id="t", trace_id="tr", parent_span_id="p", tracing=fake) - - events = await _collect(emitter.yield_turn(turn)) - - assert len(events) >= 2, "at least Start(tool) + Done + Full(response)" - assert len(fake.started) == 1, "one tool span opened" - assert len(fake.ended) == 1, "one tool span closed" - span_name, parent_id, span_input = fake.started[0] - assert span_name == "Bash" - assert parent_id == "p" - closed_name, closed_output = fake.ended[0] - assert closed_name == "Bash" - - async def test_reasoning_span_opened_and_closed(self): - """A thinking/reasoning block produces start_span + end_span.""" - reasoning_events = [ - PartStartEvent(index=0, part=ThinkingPart(content="")), - PartDeltaEvent(index=0, delta=ThinkingPartDelta(content_delta="let me think")), - PartEndEvent(index=0, part=ThinkingPart(content="let me think")), - ] - - fake = _FakeTracing() - turn = PydanticAITurn(_aiter(reasoning_events), model="openai:gpt-4o") - emitter = UnifiedEmitter(task_id="t", trace_id="tr", parent_span_id="p", tracing=fake) - - await _collect(emitter.yield_turn(turn)) - - assert len(fake.started) == 1, "one reasoning span opened" - assert len(fake.ended) == 1, "one reasoning span closed" - span_name, parent_id, _ = fake.started[0] - assert span_name == "reasoning" - assert parent_id == "p" - - async def test_no_trace_id_means_no_spans(self): - """When trace_id is None, no spans are derived even with a fake tracing backend.""" - raw_events = [ - PartStartEvent( - index=0, - part=ToolCallPart(tool_name="Bash", args={"cmd": "ls"}, tool_call_id="c2"), - ), - PartEndEvent( - index=0, - part=ToolCallPart(tool_name="Bash", args='{"cmd":"ls"}', tool_call_id="c2"), - ), - ] - - fake = _FakeTracing() - turn = PydanticAITurn(_aiter(raw_events), model="openai:gpt-4o") - emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None, tracing=fake) - - await _collect(emitter.yield_turn(turn)) - - assert fake.started == [], "no spans when trace_id is absent" - assert fake.ended == [] - - async def test_tracer_false_suppresses_spans_even_with_trace_id(self): - """tracer=False disables span derivation regardless of trace_id.""" - raw_events = [ - PartStartEvent( - index=0, - part=ToolCallPart(tool_name="Bash", args={"cmd": "ls"}, tool_call_id="c3"), - ), - PartEndEvent( - index=0, - part=ToolCallPart(tool_name="Bash", args='{"cmd":"ls"}', tool_call_id="c3"), - ), - ] - - fake = _FakeTracing() - turn = PydanticAITurn(_aiter(raw_events), model="openai:gpt-4o") - emitter = UnifiedEmitter(task_id="t", trace_id="tr", parent_span_id="p", tracer=False, tracing=fake) - - await _collect(emitter.yield_turn(turn)) - - assert fake.started == [] - assert fake.ended == [] diff --git a/tests/lib/adk/test_pydantic_ai_turn.py b/tests/lib/adk/test_pydantic_ai_turn.py index 46bf247a3..c57251db6 100644 --- a/tests/lib/adk/test_pydantic_ai_turn.py +++ b/tests/lib/adk/test_pydantic_ai_turn.py @@ -233,8 +233,8 @@ async def test_no_usage_event_leaves_default_usage(self): class TestToolRequestStreaming: """PydanticAITurn.events equals the bare converter output unconditionally. - The foundation auto_send delivers Start+ToolRequestDelta+Done natively - (AGX1-377), so no coalescing is needed on either channel. + The foundation auto_send delivers Start+ToolRequestDelta+Done natively, so + no coalescing is needed on either channel. """ async def test_events_match_bare_converter_for_streamed_tool_call(self): diff --git a/tests/lib/core/harness/_fakes.py b/tests/lib/core/harness/_fakes.py new file mode 100644 index 000000000..f9fd34a45 --- /dev/null +++ b/tests/lib/core/harness/_fakes.py @@ -0,0 +1,63 @@ +"""Shared test doubles for the unified harness test suites. + +A single superset implementation of the in-memory tracing backend used across +the harness tests. Three recording shapes were previously duplicated: + +- Shape-1 (richest): ``started`` = ``(name, parent_id, input)`` 3-tuples, + ``ended`` = ``(name, output)`` 2-tuples, plus an ``ended_spans`` list of the + closed ``FakeSpan`` objects (which carry ``.name``, ``.output``, ``.data``). +- Shape-2: ``started`` = ``(name, parent_id)`` 2-tuples, ``ended`` = + ``(name, output)``. +- Shape-3: ``started`` = bare names, ``ended`` = bare outputs. + +``FakeTracing`` records the richest (shape-1) form and exposes read-only +convenience properties (``started_names``, ``started_pairs``, +``ended_outputs``) so shape-2 and shape-3 assertions stay clean. +""" + +from __future__ import annotations + +from typing import Any + + +class FakeSpan: + def __init__(self, name: str) -> None: + self.name = name + self.output: Any = None + self.data: Any = None + + +class FakeTracing: + def __init__(self) -> None: + self.started: list[tuple[str, Any, Any]] = [] + self.ended: list[tuple[str, Any]] = [] + self.ended_spans: list[FakeSpan] = [] + + async def start_span( + self, + *, + trace_id: str, + name: str, + input: Any = None, + parent_id: Any = None, + data: Any = None, + task_id: Any = None, + ) -> FakeSpan: + self.started.append((name, parent_id, input)) + return FakeSpan(name) + + async def end_span(self, *, trace_id: str, span: FakeSpan) -> None: + self.ended.append((span.name, span.output)) + self.ended_spans.append(span) + + @property + def started_names(self) -> list[str]: + return [name for (name, _parent, _input) in self.started] + + @property + def started_pairs(self) -> list[tuple[str, Any]]: + return [(name, parent) for (name, parent, _input) in self.started] + + @property + def ended_outputs(self) -> list[Any]: + return [output for (_name, output) in self.ended] diff --git a/tests/lib/core/harness/conformance/conftest.py b/tests/lib/core/harness/conformance/conftest.py new file mode 100644 index 000000000..e4da7f1e2 --- /dev/null +++ b/tests/lib/core/harness/conformance/conftest.py @@ -0,0 +1,21 @@ +"""Conformance-suite test setup. + +Eagerly import every per-harness conformance module so each one's module-level +``register(...)`` calls run before any test executes. This makes +``all_fixtures()`` complete and independent of pytest's collection/import order +(the runner documents that cross-module registration order is not guaranteed), +so the cross-harness ``test_span_derivation_is_deterministic`` guard in +``test_conformance.py`` covers the full fixture set even when this directory is +run in isolation. +""" + +from __future__ import annotations + +# Importing these for their registration side effects only. +from . import ( + test_codex_conformance, # noqa: F401 + test_openai_conformance, # noqa: F401 + test_langgraph_conformance, # noqa: F401 + test_claude_code_conformance, # noqa: F401 + test_pydantic_ai_conformance, # noqa: F401 +) diff --git a/tests/lib/core/harness/conformance/runner.py b/tests/lib/core/harness/conformance/runner.py index 84e84fa51..e6928669a 100644 --- a/tests/lib/core/harness/conformance/runner.py +++ b/tests/lib/core/harness/conformance/runner.py @@ -43,8 +43,8 @@ because: - StreamingTaskMessageContext.close() persists initial_content when no deltas have been streamed, so the message IS correctly persisted. - - It mirrors the pattern already used by the real _langgraph_async.py harness, - keeping behavioural parity. + - It mirrors the pattern already used by the real langgraph streaming helper + (now in _langgraph_turn.py), keeping behavioural parity. - Switching to adk.messages.create would require an additional injectable dependency, adding surface area for no observable benefit. The conformance test treats this as an ACCEPTABLE envelope difference: at the @@ -53,18 +53,14 @@ identical because both adapters drive the same SpanDeriver.observe() call sequence and forward every signal to their tracer. -AGX1-377 fix: auto_send now DELIVERS streamed tool-request messages (Start+Done) -instead of dropping them. The conformance normaliser previously suppressed the -delivery for Start(tool_request)+Done on the yield channel to match auto_send's -old drop behaviour. That suppression is now removed: both channels produce a -LogicalDelivery for a streamed tool_request, and the cross-channel assertion -verifies it is delivered on both. +auto_send DELIVERS streamed tool-request messages (Start+Done): both channels +produce a LogicalDelivery for a streamed tool_request, and the cross-channel +assertion verifies it is delivered on both. """ from __future__ import annotations import json -import types as _types from typing import Any, NamedTuple, override from dataclasses import dataclass @@ -81,6 +77,8 @@ from agentex.types.reasoning_content_delta import ReasoningContentDelta from agentex.lib.core.harness.span_derivation import SpanDeriver +from .._fakes import FakeTracing + @dataclass class Fixture: @@ -145,8 +143,8 @@ def _yield_logical_deliveries(events: list[StreamTaskMessage]) -> list[LogicalDe - reasoning: initial_content.summary joined (from Start) prepended to accumulated reasoning-content deltas (this catches a channel that drops the summary) - - tool_request: JSON-sorted arguments from the Start content (AGX1-377: now - delivered on both channels, no longer suppressed) + - tool_request: JSON-sorted arguments from the Start content (delivered on + both channels) - tool_response: str(content) from Full event """ from agentex.types.text_content import TextContent @@ -191,9 +189,9 @@ def _yield_logical_deliveries(events: list[StreamTaskMessage]) -> list[LogicalDe ) ) elif ctype == "tool_request" and isinstance(content, ToolRequestContent): - # AGX1-377 fix: auto_send now delivers streamed tool-request - # messages. Emit a delivery here so the cross-channel - # assertion verifies it is present on both channels. + # auto_send delivers streamed tool-request messages. Emit a + # delivery here so the cross-channel assertion verifies it is + # present on both channels. deliveries.append( LogicalDelivery( content_type=ctype, @@ -296,30 +294,6 @@ def streaming_task_message_context( return _FakeCtx(self.sink, ctype, initial_content) -class _FakeTracing: - """Minimal tracing backend: records started/ended span names + outputs.""" - - def __init__(self) -> None: - self.started: list[str] = [] - self.ended: list[Any] = [] - - async def start_span( - self, - *, - trace_id: str, - name: str, - input: Any = None, - parent_id: Any = None, - data: Any = None, - task_id: Any = None, - ) -> Any: - self.started.append(name) - return _types.SimpleNamespace() - - async def end_span(self, *, trace_id: str, span: Any) -> None: - self.ended.append(getattr(span, "output", None)) - - class _RecordingTracer(SpanTracer): """SpanTracer that records every SpanSignal it actually receives. @@ -486,7 +460,7 @@ async def run_cross_channel_conformance( from agentex.lib.core.harness.yield_delivery import yield_events # --- yield channel --- - tracer_yield = _RecordingTracer(tracing=_FakeTracing()) + tracer_yield = _RecordingTracer(tracing=FakeTracing()) yield_out = [e async for e in yield_events(_gen(fixture.events), tracer=tracer_yield)] # Span signals the yield channel actually emitted to its tracer @@ -496,7 +470,7 @@ async def run_cross_channel_conformance( yield_deliveries = _yield_text_reasoning_seq(_yield_logical_deliveries(yield_out)) # --- auto_send channel --- - tracer_auto = _RecordingTracer(tracing=_FakeTracing()) + tracer_auto = _RecordingTracer(tracing=FakeTracing()) fake_streaming = _FakeStreaming() await auto_send( _gen(fixture.events), diff --git a/tests/lib/core/harness/conformance/test_codex_conformance.py b/tests/lib/core/harness/conformance/test_codex_conformance.py index b00ed2970..b3db4f56e 100644 --- a/tests/lib/core/harness/conformance/test_codex_conformance.py +++ b/tests/lib/core/harness/conformance/test_codex_conformance.py @@ -19,7 +19,7 @@ from agentex.lib.core.harness.types import StreamTaskMessage from agentex.lib.adk._modules._codex_sync import convert_codex_to_agentex_events -from .runner import Fixture, register, derive_all +from .runner import Fixture, register async def _aiter(items: list[Any]) -> AsyncIterator[Any]: @@ -208,17 +208,6 @@ def _build(events: list[Any]) -> list[StreamTaskMessage]: _LOCAL_FIXTURES = [_CODEX_TEXT, _CODEX_TOOL, _CODEX_REASONING, _CODEX_MULTI] -@pytest.mark.parametrize("fixture", _LOCAL_FIXTURES, ids=lambda f: f.name) -def test_codex_span_derivation_is_deterministic(fixture: Fixture) -> None: - """Span derivation over codex events is deterministic (cross-channel guarantee). - - Deriving twice over the same events yields identical signals. This is the - invariant that makes ``yield`` and ``auto_send`` delivery equivalent: both - observe the same event stream, so their tracing side effects are identical. - """ - assert derive_all(fixture.events) == derive_all(fixture.events) - - @pytest.mark.parametrize("fixture", _LOCAL_FIXTURES, ids=lambda f: f.name) def test_codex_events_are_non_empty(fixture: Fixture) -> None: """Every codex fixture yields at least one StreamTaskMessage*.""" diff --git a/tests/lib/core/harness/conformance/test_conformance.py b/tests/lib/core/harness/conformance/test_conformance.py index 6d5f8ca66..7c79f9397 100644 --- a/tests/lib/core/harness/conformance/test_conformance.py +++ b/tests/lib/core/harness/conformance/test_conformance.py @@ -24,11 +24,9 @@ Full vs Start+Done envelope difference is a documented, acceptable choice in auto_send — see runner.py for the rationale). -AGX1-377 fix: auto_send now delivers streamed tool-request messages. The -suppression that previously prevented the yield normaliser from emitting a -LogicalDelivery for Start(tool_request)+Done is removed. Both channels now -produce a delivery for streamed tool_request, verified by the -"streamed-tool-request" fixture. +auto_send delivers streamed tool-request messages: both channels produce a +delivery for streamed tool_request, verified by the "streamed-tool-request" +fixture. """ from __future__ import annotations @@ -134,9 +132,8 @@ StreamTaskMessageDone(type="done", index=0), ], ), - # fixture 4: streamed tool_request (AGX1-377 fix) — tool_request delivered - # via Start+Done (no Full). auto_send now delivers this instead of dropping - # it. Both channels must produce a LogicalDelivery for this fixture. + # fixture 4: streamed tool_request — tool_request delivered via Start+Done + # (no Full). Both channels must produce a LogicalDelivery for this fixture. Fixture( name="streamed-tool-request", events=[ @@ -275,11 +272,28 @@ async def test_cross_channel_equivalence(fixture: Fixture) -> None: # --------------------------------------------------------------------------- -@pytest.mark.parametrize("fixture", all_fixtures(), ids=lambda f: f.name) -def test_span_derivation_is_deterministic(fixture: Fixture) -> None: - """Span derivation over the same event list is idempotent. +def test_span_derivation_is_deterministic() -> None: + """Span derivation over the same event list is idempotent, for EVERY + registered fixture across all harnesses. + + ``all_fixtures()`` is read at run time (not at collection/parametrize time) + so it sees fixtures registered by every conformance module, regardless of + import/collection order. The per-harness conformance modules are imported + eagerly via ``conftest.py`` in this directory, so this test covers the full + cross-harness fixture set even when run in isolation. (Parametrizing on + ``all_fixtures()`` at import time would freeze the set to whatever happened + to be registered before this module was collected.) Retained as a lightweight regression guard. The primary cross-channel guarantee is asserted in test_cross_channel_equivalence above. """ - assert derive_all(fixture.events) == derive_all(fixture.events) + fixtures = all_fixtures() + assert len(fixtures) > len(_FIXTURES), ( + "expected per-harness fixtures to be registered in addition to the " + f"{len(_FIXTURES)} generic ones; got {len(fixtures)} total — a conformance " + "module's fixtures are not being registered (check conftest imports)" + ) + for fixture in fixtures: + assert derive_all(fixture.events) == derive_all(fixture.events), ( + f"[{fixture.name}] span derivation is not deterministic" + ) diff --git a/tests/lib/core/harness/conformance/test_langgraph_conformance.py b/tests/lib/core/harness/conformance/test_langgraph_conformance.py index 721d6aac5..a8d43aef6 100644 --- a/tests/lib/core/harness/conformance/test_langgraph_conformance.py +++ b/tests/lib/core/harness/conformance/test_langgraph_conformance.py @@ -32,7 +32,7 @@ from agentex.types.tool_response_content import ToolResponseContent from agentex.types.reasoning_content_delta import ReasoningContentDelta -from .runner import Fixture, register, derive_all, run_cross_channel_conformance +from .runner import Fixture, register, run_cross_channel_conformance # --------------------------------------------------------------------------- # Fixtures @@ -216,14 +216,3 @@ async def test_cross_channel_equivalence(fixture: Fixture) -> None: assert yield_spans == auto_spans, ( f"[{fixture.name}] span signals differ:\n yield: {yield_spans}\n auto_send: {auto_spans}" ) - - -# --------------------------------------------------------------------------- -# Backward-compatible determinism guard -# --------------------------------------------------------------------------- - - -@pytest.mark.parametrize("fixture", _LANGGRAPH_FIXTURES, ids=lambda f: f.name) -def test_span_derivation_is_deterministic(fixture: Fixture) -> None: - """Span derivation over the same event list is idempotent.""" - assert derive_all(fixture.events) == derive_all(fixture.events) diff --git a/tests/lib/core/harness/conformance/test_pydantic_ai_conformance.py b/tests/lib/core/harness/conformance/test_pydantic_ai_conformance.py index ca8234fda..3594de474 100644 --- a/tests/lib/core/harness/conformance/test_pydantic_ai_conformance.py +++ b/tests/lib/core/harness/conformance/test_pydantic_ai_conformance.py @@ -8,8 +8,8 @@ Streamed tool requests ---------------------- The pydantic-ai stream emits a tool REQUEST as Start + ToolRequestDelta + Done (not a -Full event). AGX1-377 has landed: both the conformance runner and auto_send now deliver -the Start+Delta+Done(tool_request) shape, so the cross-channel test asserts full +Full event). Both the conformance runner and auto_send deliver the +Start+Delta+Done(tool_request) shape, so the cross-channel test asserts full delivery-equivalence for streamed tool requests. The fixtures below retain the ToolRequestDelta events as the streamed tool-request inputs. """ @@ -39,7 +39,6 @@ from .runner import ( Fixture, register, - derive_all, run_cross_channel_conformance, ) @@ -78,8 +77,8 @@ def _build_fixtures() -> list[Fixture]: # ------------------------------------------------------------------ # # 2. Single tool call + tool response. # The canonical stream emits Start+ToolRequestDelta+Done for the request - # and Full(ToolResponseContent) for the response. See AGX1-377 note above - # for why the request delivery is not yet asserted cross-channel. + # and Full(ToolResponseContent) for the response. Both are asserted + # delivery-equivalent cross-channel (see the module docstring). # ------------------------------------------------------------------ # tool_call_pydantic = [ PartStartEvent( @@ -170,8 +169,8 @@ async def test_cross_channel_equivalence(fixture: Fixture) -> None: """Assert that yield_events and auto_send produce equivalent logical deliveries and identical span signals for each pydantic-ai fixture. - See runner.py for the full contract. The AGX1-377 note at the top of this - module explains why streamed-tool-request delivery is not yet asserted. + See runner.py for the full contract, including streamed-tool-request + delivery equivalence. """ yield_deliveries, auto_deliveries, yield_spans, auto_spans = await run_cross_channel_conformance(fixture) @@ -181,14 +180,3 @@ async def test_cross_channel_equivalence(fixture: Fixture) -> None: assert yield_spans == auto_spans, ( f"[{fixture.name}] span signals differ:\n yield: {yield_spans}\n auto_send: {auto_spans}" ) - - -# --------------------------------------------------------------------------- -# Backward-compatible determinism guard -# --------------------------------------------------------------------------- - - -@pytest.mark.parametrize("fixture", _FIXTURES, ids=lambda f: f.name) -def test_span_derivation_is_deterministic(fixture: Fixture) -> None: - """Span derivation over the same event list is idempotent.""" - assert derive_all(fixture.events) == derive_all(fixture.events) diff --git a/tests/lib/core/harness/test_auto_send.py b/tests/lib/core/harness/test_auto_send.py index 1948e9196..764dae8b3 100644 --- a/tests/lib/core/harness/test_auto_send.py +++ b/tests/lib/core/harness/test_auto_send.py @@ -9,7 +9,6 @@ This mirrors _langgraph_async.py lines 62-78 and 100-127. """ -import types as _types from datetime import datetime import pytest @@ -29,6 +28,8 @@ from agentex.types.tool_request_content import ToolRequestContent from agentex.types.tool_response_content import ToolResponseContent +from ._fakes import FakeTracing + class _FakeCtx: """Mirrors StreamingTaskMessageContext: __aenter__ opens (returns self with task_message set), @@ -181,21 +182,9 @@ async def test_auto_send_posts_full_tool_messages(): # --------------------------------------------------------------------------- -class _RecordTracing: - def __init__(self): - self.started, self.ended = [], [] - - async def start_span(self, *, trace_id, name, input=None, parent_id=None, data=None, task_id=None): - self.started.append(name) - return _types.SimpleNamespace() - - async def end_span(self, *, trace_id, span): - self.ended.append(getattr(span, "output", None)) - - @pytest.mark.asyncio async def test_auto_send_derives_tool_spans_via_tracer(): - fake_tracing = _RecordTracing() + fake_tracing = FakeTracing() tracer = SpanTracer(trace_id="t", parent_span_id="p", tracing=fake_tracing) streaming = _FakeStreaming() @@ -228,8 +217,8 @@ async def test_auto_send_derives_tool_spans_via_tracer(): result = await auto_send(_gen(events), task_id="task1", tracer=tracer, streaming=streaming) assert result.final_text == "" - assert fake_tracing.started == ["Bash"] - assert fake_tracing.ended == ["ok"] + assert fake_tracing.started_names == ["Bash"] + assert fake_tracing.ended_outputs == ["ok"] # --------------------------------------------------------------------------- @@ -301,13 +290,13 @@ async def _exploding_gen(): # --------------------------------------------------------------------------- -# Test 6: streamed tool_request delivered (AGX1-377 core) +# Test 6: streamed tool_request delivered # --------------------------------------------------------------------------- @pytest.mark.asyncio async def test_auto_send_streams_tool_request(): - """A Start(ToolRequestContent) MUST open a streaming context (AGX1-377).""" + """A Start(ToolRequestContent) MUST open a streaming context.""" streaming = _FakeStreaming() events = [ StreamTaskMessageStart( @@ -457,7 +446,7 @@ async def test_auto_send_full_text_content_sets_final_text(): # --------------------------------------------------------------------------- -# Test 10: created_at is forwarded to streaming context (AGX1-378) +# Test 10: created_at is forwarded to streaming context # --------------------------------------------------------------------------- diff --git a/tests/lib/core/harness/test_emitter.py b/tests/lib/core/harness/test_emitter.py index df155ec44..3f70660ec 100644 --- a/tests/lib/core/harness/test_emitter.py +++ b/tests/lib/core/harness/test_emitter.py @@ -11,13 +11,7 @@ StreamTaskMessageStart, ) - -class _FakeTracing: - async def start_span(self, **kw): - return None - - async def end_span(self, **kw): - pass +from ._fakes import FakeTracing class _FakeCtx: @@ -84,7 +78,7 @@ async def test_emitter_yield_mode_passes_through(): async def test_emitter_tracing_default_on_when_trace_id_present(): # Inject a fake tracing backend so the test env doesn't need temporalio. # This exercises the default-on path (tracer=None) when trace_id is truthy. - emitter = UnifiedEmitter(task_id="t", trace_id="trace1", parent_span_id="p", tracing=_FakeTracing()) + emitter = UnifiedEmitter(task_id="t", trace_id="trace1", parent_span_id="p", tracing=FakeTracing()) assert emitter.tracer is not None diff --git a/tests/lib/core/harness/test_harness_claude_code_async.py b/tests/lib/core/harness/test_harness_claude_code_async.py new file mode 100644 index 000000000..c622d25c1 --- /dev/null +++ b/tests/lib/core/harness/test_harness_claude_code_async.py @@ -0,0 +1,248 @@ +"""Integration test: async (Redis-streaming) channel with a claude-code turn. + +Exercises the unified harness surface (UnifiedEmitter.auto_send_turn + ClaudeCodeTurn) +with hand-built claude-code ``stream-json`` envelopes and a fake streaming +backend so the test runs fully offline (no claude-code CLI subprocess, no +Redis, no Agentex server). + +Native envelope shapes are copied verbatim from the claude-code turn test and +conformance fixtures (assistant tool_use -> Start(ToolRequestContent)+Done; +user tool_result -> Full(ToolResponseContent); assistant text -> +Start(TextContent)+Delta+Done; result envelope -> usage). + +What is tested +-------------- +- auto_send pushes the correct message contexts: tool_request + tool_response + + text (in that order). +- TurnResult.final_text equals the final assistant text. +- TurnResult.usage reflects the claude-code ``result`` envelope (input/output + tokens, cost, num_llm_calls from num_turns). +- With a SpanTracer + fake tracing, a tool span is derived on the async path. + +What is NOT covered without live infrastructure +----------------------------------------------- +- Actual Redis streaming. +- The ACP on_task_event_send / on_task_create / on_task_cancel lifecycle. +- A real claude-code CLI subprocess / live model behaviour. + +See also: test_harness_claude_code_sync.py and test_harness_claude_code_temporal.py. +""" + +from __future__ import annotations + +from typing import Any, AsyncIterator + +import pytest + +from agentex.types.task_message import TaskMessage +from agentex.lib.core.harness.types import TurnResult +from agentex.lib.core.harness.tracer import SpanTracer +from agentex.lib.core.harness.emitter import UnifiedEmitter +from agentex.types.tool_request_content import ToolRequestContent +from agentex.types.tool_response_content import ToolResponseContent +from agentex.lib.adk._modules._claude_code_turn import ClaudeCodeTurn + +from ._fakes import FakeTracing + +# --------------------------------------------------------------------------- +# Native claude-code envelope fixtures +# --------------------------------------------------------------------------- + + +def _tool_then_text_envelopes() -> list[dict[str, Any]]: + return [ + { + "type": "assistant", + "message": { + "content": [ + { + "type": "tool_use", + "id": "call_read", + "name": "Read", + "input": {"path": "/workspace/README.md"}, + } + ] + }, + }, + { + "type": "user", + "message": { + "content": [ + { + "type": "tool_result", + "tool_use_id": "call_read", + "content": "# My Project — temperature 72F", + } + ] + }, + }, + { + "type": "assistant", + "message": {"content": [{"type": "text", "text": "The project file says 72F."}]}, + }, + { + "type": "result", + "usage": {"input_tokens": 200, "output_tokens": 80}, + "cost_usd": 0.015, + "num_turns": 2, + }, + ] + + +async def _aiter(envelopes: list[dict[str, Any]]) -> AsyncIterator[dict[str, Any]]: + for e in envelopes: + yield e + + +# --------------------------------------------------------------------------- +# Fake streaming backend +# --------------------------------------------------------------------------- + + +class _FakeCtx: + def __init__(self, sink: list[Any], ctype: str, initial_content: Any) -> None: + self.sink = sink + self.ctype = ctype + self.task_message = TaskMessage(id="msg-1", task_id="task1", content=initial_content) + + async def __aenter__(self) -> "_FakeCtx": + self.sink.append(("open", self.ctype, self.task_message.content)) + return self + + async def __aexit__(self, *args: Any) -> bool: + await self.close() + return False + + async def close(self) -> None: + self.sink.append(("close", self.ctype)) + + async def stream_update(self, update: Any) -> Any: + self.sink.append(("delta", self.ctype, update)) + return update + + +class _FakeStreaming: + def __init__(self) -> None: + self.sink: list[Any] = [] + self.messages_opened: list[Any] = [] + + def streaming_task_message_context( + self, + task_id: str, + initial_content: Any, + streaming_mode: str = "coalesced", + created_at: Any = None, + ) -> _FakeCtx: + ctype = getattr(initial_content, "type", None) or "" + self.messages_opened.append(initial_content) + return _FakeCtx(self.sink, ctype, initial_content) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +async def _run_auto_send_turn( + envelopes: list[dict[str, Any]], + trace_id: str | None = None, + parent_span_id: str | None = None, + fake_tracing: FakeTracing | None = None, +) -> tuple[TurnResult, _FakeStreaming]: + fake_streaming = _FakeStreaming() + tracer: SpanTracer | bool | None = None + if trace_id and fake_tracing is not None: + tracer = SpanTracer( + trace_id=trace_id, + parent_span_id=parent_span_id, + task_id="task1", + tracing=fake_tracing, + ) + + turn = ClaudeCodeTurn(_aiter(envelopes)) + emitter = UnifiedEmitter( + task_id="task1", + trace_id=trace_id, + parent_span_id=parent_span_id, + tracer=tracer if tracer is not None else False, + streaming=fake_streaming, + ) + result = await emitter.auto_send_turn(turn) + return result, fake_streaming + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + + +class TestAsyncAutoSendMessageOrder: + async def test_tool_request_pushed_before_tool_response(self) -> None: + _, fake_streaming = await _run_auto_send_turn(_tool_then_text_envelopes()) + types = [getattr(m, "type", None) for m in fake_streaming.messages_opened] + assert "tool_request" in types + assert "tool_response" in types + assert types.index("tool_request") < types.index("tool_response") + + async def test_text_pushed_last(self) -> None: + _, fake_streaming = await _run_auto_send_turn(_tool_then_text_envelopes()) + types = [getattr(m, "type", None) for m in fake_streaming.messages_opened] + assert types[-1] == "text", f"Expected last type=text, got {types}" + + +class TestAsyncAutoSendContent: + async def test_tool_request_content(self) -> None: + _, fake_streaming = await _run_auto_send_turn(_tool_then_text_envelopes()) + tool_reqs = [m for m in fake_streaming.messages_opened if isinstance(m, ToolRequestContent)] + assert len(tool_reqs) == 1 + assert tool_reqs[0].name == "Read" + + async def test_tool_response_content(self) -> None: + _, fake_streaming = await _run_auto_send_turn(_tool_then_text_envelopes()) + tool_resps = [m for m in fake_streaming.messages_opened if isinstance(m, ToolResponseContent)] + assert len(tool_resps) == 1 + assert "72F" in str(tool_resps[0].content) + + async def test_tool_call_ids_match(self) -> None: + _, fake_streaming = await _run_auto_send_turn(_tool_then_text_envelopes()) + tool_req = next(m for m in fake_streaming.messages_opened if isinstance(m, ToolRequestContent)) + tool_resp = next(m for m in fake_streaming.messages_opened if isinstance(m, ToolResponseContent)) + assert tool_req.tool_call_id == tool_resp.tool_call_id == "call_read" + + +class TestAsyncAutoSendFinalTextAndUsage: + async def test_final_text_matches_last_text(self) -> None: + result, _ = await _run_auto_send_turn(_tool_then_text_envelopes()) + assert result.final_text == "The project file says 72F." + + async def test_usage_from_result_envelope(self) -> None: + """TurnResult.usage reflects the claude-code result envelope.""" + result, _ = await _run_auto_send_turn(_tool_then_text_envelopes()) + assert result.usage is not None + assert result.usage.input_tokens == 200 + assert result.usage.output_tokens == 80 + assert result.usage.total_tokens == 280 + assert result.usage.cost_usd == pytest.approx(0.015) + assert result.usage.num_llm_calls == 2 + + async def test_context_lifecycle_open_then_close(self) -> None: + _, fake_streaming = await _run_auto_send_turn(_tool_then_text_envelopes()) + opens = [e for e in fake_streaming.sink if e[0] == "open"] + closes = [e for e in fake_streaming.sink if e[0] == "close"] + assert len(opens) == len(closes) + assert len(opens) == len(fake_streaming.messages_opened) + + +class TestAsyncAutoSendSpanDerivation: + async def test_tool_span_derived_on_async_path(self) -> None: + fake_tracing = FakeTracing() + await _run_auto_send_turn( + _tool_then_text_envelopes(), + trace_id="trace1", + parent_span_id="parent", + fake_tracing=fake_tracing, + ) + assert len(fake_tracing.started) == 1 + assert fake_tracing.started[0][0] == "Read" + assert len(fake_tracing.ended) == 1 + assert "72F" in str(fake_tracing.ended[0][1]) diff --git a/tests/lib/core/harness/test_harness_claude_code_sync.py b/tests/lib/core/harness/test_harness_claude_code_sync.py new file mode 100644 index 000000000..b53485499 --- /dev/null +++ b/tests/lib/core/harness/test_harness_claude_code_sync.py @@ -0,0 +1,303 @@ +"""Integration test: sync (HTTP-yield) channel with a claude-code turn. + +Exercises the unified harness surface (UnifiedEmitter.yield_turn + ClaudeCodeTurn) +with hand-built claude-code ``stream-json`` envelopes so the test runs fully +offline (no claude-code CLI subprocess, no API keys, no Agentex server). + +Native stream shapes +--------------------- +``ClaudeCodeTurn`` consumes an async iterator of raw claude-code stream-json +envelopes (str | dict). The envelope shapes used here are copied verbatim from +the claude-code turn test (tests/lib/adk/test_claude_code_turn.py) and the +claude-code conformance fixtures +(tests/lib/core/harness/conformance/test_claude_code_conformance.py): + + assistant text block -> Start(TextContent) + Delta + Done + assistant tool_use -> Start(ToolRequestContent) + Done + user tool_result -> Full(ToolResponseContent) + assistant thinking -> Start(ReasoningContent) + Delta + Done + +What is tested +-------------- +- The sync handler forwards StreamTaskMessage* events in canonical order: + tool_request (Start+Done) -> tool_response (Full) -> text. +- The tool_response carries the tool_result content, keyed by tool_use_id. +- With a trace_id + fake tracing, the SpanDeriver opens a tool span on + Done(tool_request) and closes it on the matching Full(tool_response), and + opens/closes a reasoning span for a thinking block. + +What is NOT covered without live infrastructure +----------------------------------------------- +- Actual HTTP streaming over the ACP sync endpoint. +- A real claude-code CLI subprocess / live model behaviour. +- The full FastACP request/response lifecycle. + +See also: test_harness_claude_code_async.py and test_harness_claude_code_temporal.py. +""" + +from __future__ import annotations + +from typing import Any, AsyncIterator, override + +from agentex.lib.core.harness.types import OpenSpan, CloseSpan +from agentex.lib.core.harness.tracer import SpanTracer +from agentex.lib.core.harness.emitter import UnifiedEmitter +from agentex.types.task_message_update import ( + StreamTaskMessageDone, + StreamTaskMessageFull, + StreamTaskMessageStart, +) +from agentex.types.tool_request_content import ToolRequestContent +from agentex.types.tool_response_content import ToolResponseContent +from agentex.lib.adk._modules._claude_code_turn import ClaudeCodeTurn + +from ._fakes import FakeTracing + +# --------------------------------------------------------------------------- +# Native claude-code envelope fixtures (copied from the turn + conformance tests) +# --------------------------------------------------------------------------- + + +def _tool_then_text_envelopes() -> list[dict[str, Any]]: + """tool_use -> tool_result -> final text, then a result envelope with usage.""" + return [ + { + "type": "assistant", + "message": { + "content": [ + { + "type": "tool_use", + "id": "call_read", + "name": "Read", + "input": {"path": "/workspace/README.md"}, + } + ] + }, + }, + { + "type": "user", + "message": { + "content": [ + { + "type": "tool_result", + "tool_use_id": "call_read", + "content": "# My Project — temperature 72F", + } + ] + }, + }, + { + "type": "assistant", + "message": {"content": [{"type": "text", "text": "The project file says 72F."}]}, + }, + { + "type": "result", + "usage": {"input_tokens": 100, "output_tokens": 50}, + "cost_usd": 0.01, + "num_turns": 2, + }, + ] + + +def _thinking_envelopes() -> list[dict[str, Any]]: + return [ + { + "type": "assistant", + "message": { + "content": [ + {"type": "thinking", "thinking": "Let me think.\nStep 1: check the facts."}, + {"type": "text", "text": "Here is my answer."}, + ] + }, + }, + {"type": "result", "usage": {"input_tokens": 10, "output_tokens": 5}}, + ] + + +async def _aiter(envelopes: list[dict[str, Any]]) -> AsyncIterator[dict[str, Any]]: + for e in envelopes: + yield e + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +async def _run_yield_turn( + envelopes: list[dict[str, Any]], + trace_id: str | None = None, + parent_span_id: str | None = None, + fake_tracing: FakeTracing | None = None, +) -> list[Any]: + tracer: SpanTracer | bool | None = None + if trace_id and fake_tracing is not None: + tracer = SpanTracer( + trace_id=trace_id, + parent_span_id=parent_span_id, + task_id="task1", + tracing=fake_tracing, + ) + + turn = ClaudeCodeTurn(_aiter(envelopes)) + emitter = UnifiedEmitter( + task_id="task1", + trace_id=trace_id, + parent_span_id=parent_span_id, + tracer=tracer if tracer is not None else False, + ) + return [ev async for ev in emitter.yield_turn(turn)] + + +# --------------------------------------------------------------------------- +# Tests: event order and content +# --------------------------------------------------------------------------- + + +class TestSyncYieldEventOrder: + async def test_tool_request_precedes_tool_response(self) -> None: + events = await _run_yield_turn(_tool_then_text_envelopes()) + content_types = [ + getattr(getattr(ev, "content", None), "type", None) + for ev in events + if isinstance(ev, (StreamTaskMessageStart, StreamTaskMessageFull)) + ] + assert "tool_request" in content_types + assert "tool_response" in content_types + assert content_types.index("tool_request") < content_types.index("tool_response") + + async def test_text_appears_after_tool_response(self) -> None: + events = await _run_yield_turn(_tool_then_text_envelopes()) + tool_resp_pos = next( + i + for i, ev in enumerate(events) + if isinstance(ev, StreamTaskMessageFull) + and getattr(getattr(ev, "content", None), "type", None) == "tool_response" + ) + text_start_pos = next( + i + for i, ev in enumerate(events) + if isinstance(ev, StreamTaskMessageStart) and getattr(getattr(ev, "content", None), "type", None) == "text" + ) + assert tool_resp_pos < text_start_pos + + async def test_tool_response_carries_result_keyed_by_tool_use_id(self) -> None: + events = await _run_yield_turn(_tool_then_text_envelopes()) + full_responses = [ + ev.content + for ev in events + if isinstance(ev, StreamTaskMessageFull) and isinstance(getattr(ev, "content", None), ToolResponseContent) + ] + assert len(full_responses) == 1 + tool_response = full_responses[0] + assert isinstance(tool_response, ToolResponseContent) + assert tool_response.tool_call_id == "call_read" + assert "72F" in str(tool_response.content) + + async def test_tool_request_is_read(self) -> None: + events = await _run_yield_turn(_tool_then_text_envelopes()) + tool_reqs = [ + ev.content + for ev in events + if isinstance(getattr(ev, "content", None), ToolRequestContent) + ] + assert any(isinstance(c, ToolRequestContent) and c.name == "Read" for c in tool_reqs) + + async def test_every_start_has_matching_done(self) -> None: + events = await _run_yield_turn(_tool_then_text_envelopes()) + starts = {ev.index for ev in events if isinstance(ev, StreamTaskMessageStart)} + dones = {ev.index for ev in events if isinstance(ev, StreamTaskMessageDone)} + assert starts == dones, f"Unmatched Start/Done indices: starts={starts} dones={dones}" + + +# --------------------------------------------------------------------------- +# Tests: span derivation on the yield path +# --------------------------------------------------------------------------- + + +class TestSyncYieldSpanDerivation: + async def test_tool_span_opened_and_closed(self) -> None: + """Done(tool_request) opens a tool span; Full(tool_response) closes it.""" + fake_tracing = FakeTracing() + await _run_yield_turn( + _tool_then_text_envelopes(), + trace_id="trace1", + parent_span_id="parent-span", + fake_tracing=fake_tracing, + ) + assert len(fake_tracing.started) == 1 + assert len(fake_tracing.ended) == 1 + name, parent_id, _ = fake_tracing.started[0] + assert name == "Read" + assert parent_id == "parent-span" + + async def test_tool_span_output_is_tool_result(self) -> None: + fake_tracing = FakeTracing() + await _run_yield_turn( + _tool_then_text_envelopes(), + trace_id="trace1", + parent_span_id="parent-span", + fake_tracing=fake_tracing, + ) + name, output = fake_tracing.ended[0] + assert name == "Read" + assert "72F" in str(output) + + async def test_reasoning_span_for_thinking_block(self) -> None: + """A thinking block opens and closes a reasoning span.""" + fake_tracing = FakeTracing() + await _run_yield_turn( + _thinking_envelopes(), + trace_id="trace1", + parent_span_id="parent-span", + fake_tracing=fake_tracing, + ) + assert fake_tracing.started_names == ["reasoning"] + assert len(fake_tracing.ended) == 1 + + async def test_no_trace_id_means_no_spans(self) -> None: + fake_tracing = FakeTracing() + turn = ClaudeCodeTurn(_aiter(_tool_then_text_envelopes())) + emitter = UnifiedEmitter(task_id="task1", trace_id=None, parent_span_id=None, tracing=fake_tracing) + [_ async for _ in emitter.yield_turn(turn)] + assert fake_tracing.started == [] + assert fake_tracing.ended == [] + + async def test_tracer_false_suppresses_spans(self) -> None: + fake_tracing = FakeTracing() + turn = ClaudeCodeTurn(_aiter(_tool_then_text_envelopes())) + emitter = UnifiedEmitter( + task_id="task1", + trace_id="trace1", + parent_span_id="parent-span", + tracer=False, + tracing=fake_tracing, + ) + [_ async for _ in emitter.yield_turn(turn)] + assert fake_tracing.started == [] + assert fake_tracing.ended == [] + + async def test_span_signal_types(self) -> None: + received_signals: list[Any] = [] + + class _RecordingTracer(SpanTracer): + @override + async def handle(self, signal: Any) -> None: + received_signals.append(signal) + await super().handle(signal) + + fake_tracing = FakeTracing() + tracer = _RecordingTracer( + trace_id="trace1", + parent_span_id="parent", + task_id="task1", + tracing=fake_tracing, + ) + turn = ClaudeCodeTurn(_aiter(_tool_then_text_envelopes())) + emitter = UnifiedEmitter(task_id="task1", trace_id="trace1", parent_span_id="parent", tracer=tracer) + [_ async for _ in emitter.yield_turn(turn)] + + tool_signals = [s for s in received_signals if getattr(s, "name", None) == "Read"] + assert len(tool_signals) >= 1 + assert isinstance(received_signals[0], OpenSpan) + assert any(isinstance(s, CloseSpan) for s in received_signals) diff --git a/tests/lib/core/harness/test_harness_claude_code_temporal.py b/tests/lib/core/harness/test_harness_claude_code_temporal.py new file mode 100644 index 000000000..b643f0d20 --- /dev/null +++ b/tests/lib/core/harness/test_harness_claude_code_temporal.py @@ -0,0 +1,183 @@ +"""Integration test: Temporal channel with a claude-code turn, offline. + +The claude-code tap is a pure library adapter (no Temporal-specific helper such +as langgraph's ``stream_langgraph_events``). In a Temporal deployment the +claude-code CLI runs inside a Temporal activity and the resulting canonical +stream is delivered via the SAME ``UnifiedEmitter.auto_send_turn`` path used by +the non-temporal async channel. The only temporal-specific concern at the +harness boundary is that the activity stamps messages with a deterministic +``created_at`` (e.g. ``workflow.now()``) for replay determinism. + +This suite therefore exercises the auto_send path inside an activity-style call +plus the temporal-only contract: ``created_at`` is threaded through to every +streaming context. The native claude-code envelope shapes are copied verbatim +from the claude-code turn test / conformance fixtures. + +What is tested +-------------- +- The canonical message sequence (tool_request -> tool_response -> text) is + delivered via auto_send_turn, exactly as inside a Temporal activity. +- ``created_at`` passed to ``auto_send_turn`` is forwarded to every + ``streaming_task_message_context`` call (deterministic timestamping). +- Final text + usage from the result envelope are returned. + +What is NOT covered without live infrastructure +----------------------------------------------- +- Temporal scheduling / durability / replay behaviour. +- Redis streaming (requires a running Redis instance). +- A real claude-code CLI subprocess / live model behaviour. + +See also: test_harness_claude_code_sync.py and test_harness_claude_code_async.py. +""" + +from __future__ import annotations + +from typing import Any, AsyncIterator +from datetime import datetime, timezone + +from agentex.types.task_message import TaskMessage +from agentex.lib.core.harness.emitter import UnifiedEmitter +from agentex.types.tool_request_content import ToolRequestContent +from agentex.types.tool_response_content import ToolResponseContent +from agentex.lib.adk._modules._claude_code_turn import ClaudeCodeTurn + + +def _tool_then_text_envelopes() -> list[dict[str, Any]]: + return [ + { + "type": "assistant", + "message": { + "content": [ + { + "type": "tool_use", + "id": "call_read", + "name": "Read", + "input": {"path": "/workspace/README.md"}, + } + ] + }, + }, + { + "type": "user", + "message": { + "content": [ + {"type": "tool_result", "tool_use_id": "call_read", "content": "# My Project — 72F"} + ] + }, + }, + { + "type": "assistant", + "message": {"content": [{"type": "text", "text": "The project file says 72F."}]}, + }, + {"type": "result", "usage": {"input_tokens": 50, "output_tokens": 20}, "num_turns": 2}, + ] + + +async def _aiter(envelopes: list[dict[str, Any]]) -> AsyncIterator[dict[str, Any]]: + for e in envelopes: + yield e + + +# --------------------------------------------------------------------------- +# Fake streaming backend that records created_at +# --------------------------------------------------------------------------- + + +class _FakeCtx: + def __init__(self, ctype: str, initial_content: Any) -> None: + self.ctype = ctype + self.task_message = TaskMessage(id="msg-1", task_id="task1", content=initial_content) + + async def __aenter__(self) -> "_FakeCtx": + return self + + async def __aexit__(self, *args: Any) -> bool: + await self.close() + return False + + async def close(self) -> None: + pass + + async def stream_update(self, update: Any) -> Any: + return update + + +class _FakeStreaming: + def __init__(self) -> None: + self.messages_opened: list[Any] = [] + self.created_ats: list[Any] = [] + + def streaming_task_message_context( + self, + task_id: str, + initial_content: Any, + streaming_mode: str = "coalesced", + created_at: Any = None, + ) -> _FakeCtx: + ctype = getattr(initial_content, "type", None) or "" + self.messages_opened.append(initial_content) + self.created_ats.append(created_at) + return _FakeCtx(ctype, initial_content) + + +async def _run_activity( + envelopes: list[dict[str, Any]], created_at: datetime | None +) -> tuple[Any, _FakeStreaming]: + fake_streaming = _FakeStreaming() + turn = ClaudeCodeTurn(_aiter(envelopes)) + emitter = UnifiedEmitter( + task_id="task1", + trace_id=None, + parent_span_id=None, + tracer=False, + streaming=fake_streaming, + ) + result = await emitter.auto_send_turn(turn, created_at=created_at) + return result, fake_streaming + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + + +class TestTemporalActivityDelivery: + async def test_canonical_sequence_delivered(self) -> None: + _, fake_streaming = await _run_activity(_tool_then_text_envelopes(), created_at=None) + types = [getattr(m, "type", None) for m in fake_streaming.messages_opened] + assert "tool_request" in types + assert "tool_response" in types + assert types.index("tool_request") < types.index("tool_response") + assert types[-1] == "text" + + async def test_tool_round_trip_keyed_correctly(self) -> None: + _, fake_streaming = await _run_activity(_tool_then_text_envelopes(), created_at=None) + tool_req = next(m for m in fake_streaming.messages_opened if isinstance(m, ToolRequestContent)) + tool_resp = next(m for m in fake_streaming.messages_opened if isinstance(m, ToolResponseContent)) + assert tool_req.tool_call_id == tool_resp.tool_call_id == "call_read" + + async def test_final_text_and_usage(self) -> None: + result, _ = await _run_activity(_tool_then_text_envelopes(), created_at=None) + assert result.final_text == "The project file says 72F." + assert result.usage.input_tokens == 50 + assert result.usage.num_llm_calls == 2 + + +class TestTemporalCreatedAtThreading: + async def test_created_at_threaded_to_all_contexts(self) -> None: + fixed = datetime(2026, 6, 22, 12, 0, 0, tzinfo=timezone.utc) + _, fake_streaming = await _run_activity(_tool_then_text_envelopes(), created_at=fixed) + assert len(fake_streaming.created_ats) == len(fake_streaming.messages_opened) + assert all(ts == fixed for ts in fake_streaming.created_ats), ( + f"Expected every context stamped with {fixed}, got {fake_streaming.created_ats}" + ) + + async def test_default_created_at_is_none(self) -> None: + _, fake_streaming = await _run_activity(_tool_then_text_envelopes(), created_at=None) + assert all(ts is None for ts in fake_streaming.created_ats) + + async def test_created_at_deterministic_across_runs(self) -> None: + fixed = datetime(2026, 1, 1, 0, 0, 0, tzinfo=timezone.utc) + _, first = await _run_activity(_tool_then_text_envelopes(), created_at=fixed) + _, second = await _run_activity(_tool_then_text_envelopes(), created_at=fixed) + assert first.created_ats == second.created_ats diff --git a/tests/lib/core/harness/test_harness_codex_async.py b/tests/lib/core/harness/test_harness_codex_async.py new file mode 100644 index 000000000..c31ebfa49 --- /dev/null +++ b/tests/lib/core/harness/test_harness_codex_async.py @@ -0,0 +1,228 @@ +"""Integration test: async (Redis-streaming) channel with a codex turn. + +Exercises the unified harness surface (UnifiedEmitter.auto_send_turn + CodexTurn) +with hand-built codex ``exec --json`` event dicts and a fake streaming backend +so the test runs fully offline (no codex CLI subprocess, no Redis, no Agentex +server). + +Native event shapes are copied verbatim from the codex turn test / conformance +fixtures (command_execution -> tool round-trip; agent_message -> text; +turn.completed -> usage). + +What is tested +-------------- +- auto_send pushes the correct message contexts: tool_request + tool_response + + text (in that order). +- TurnResult.final_text equals the final agent_message text. +- TurnResult.usage reflects the codex ``turn.completed`` usage (input/output/ + total tokens) plus the locally-counted num_tool_calls. +- With a SpanTracer + fake tracing, a tool span is derived on the async path. + +What is NOT covered without live infrastructure +----------------------------------------------- +- Actual Redis streaming. +- The ACP on_task_event_send / on_task_create / on_task_cancel lifecycle. +- A real codex CLI subprocess / live model behaviour. + +See also: test_harness_codex_sync.py and test_harness_codex_temporal.py. +""" + +from __future__ import annotations + +from typing import Any, AsyncIterator + +from agentex.types.task_message import TaskMessage +from agentex.lib.core.harness.types import TurnResult +from agentex.lib.core.harness.tracer import SpanTracer +from agentex.lib.core.harness.emitter import UnifiedEmitter +from agentex.types.tool_request_content import ToolRequestContent +from agentex.types.tool_response_content import ToolResponseContent +from agentex.lib.adk._modules._codex_turn import CodexTurn + +from ._fakes import FakeTracing + +# --------------------------------------------------------------------------- +# Native codex event fixtures +# --------------------------------------------------------------------------- + + +def _tool_then_text_events() -> list[dict[str, Any]]: + return [ + {"type": "thread.started", "thread_id": "thread-abc"}, + { + "type": "item.started", + "item": {"id": "tool1", "type": "command_execution", "command": "cat weather.txt"}, + }, + { + "type": "item.completed", + "item": { + "id": "tool1", + "type": "command_execution", + "command": "cat weather.txt", + "aggregated_output": "sunny and 72F", + "exit_code": 0, + }, + }, + {"type": "item.started", "item": {"id": "msg1", "type": "agent_message", "text": ""}}, + { + "type": "item.completed", + "item": {"id": "msg1", "type": "agent_message", "text": "The weather is sunny and 72F."}, + }, + { + "type": "turn.completed", + "usage": {"input_tokens": 20, "output_tokens": 8, "total_tokens": 28}, + }, + ] + + +async def _aiter(events: list[dict[str, Any]]) -> AsyncIterator[dict[str, Any]]: + for e in events: + yield e + + +# --------------------------------------------------------------------------- +# Fake streaming backend +# --------------------------------------------------------------------------- + + +class _FakeCtx: + def __init__(self, sink: list[Any], ctype: str, initial_content: Any) -> None: + self.sink = sink + self.ctype = ctype + self.task_message = TaskMessage(id="msg-1", task_id="task1", content=initial_content) + + async def __aenter__(self) -> "_FakeCtx": + self.sink.append(("open", self.ctype, self.task_message.content)) + return self + + async def __aexit__(self, *args: Any) -> bool: + await self.close() + return False + + async def close(self) -> None: + self.sink.append(("close", self.ctype)) + + async def stream_update(self, update: Any) -> Any: + self.sink.append(("delta", self.ctype, update)) + return update + + +class _FakeStreaming: + def __init__(self) -> None: + self.sink: list[Any] = [] + self.messages_opened: list[Any] = [] + + def streaming_task_message_context( + self, + task_id: str, + initial_content: Any, + streaming_mode: str = "coalesced", + created_at: Any = None, + ) -> _FakeCtx: + ctype = getattr(initial_content, "type", None) or "" + self.messages_opened.append(initial_content) + return _FakeCtx(self.sink, ctype, initial_content) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +async def _run_auto_send_turn( + events: list[dict[str, Any]], + trace_id: str | None = None, + parent_span_id: str | None = None, + fake_tracing: FakeTracing | None = None, +) -> tuple[TurnResult, _FakeStreaming]: + fake_streaming = _FakeStreaming() + tracer: SpanTracer | bool | None = None + if trace_id and fake_tracing is not None: + tracer = SpanTracer( + trace_id=trace_id, + parent_span_id=parent_span_id, + task_id="task1", + tracing=fake_tracing, + ) + + turn = CodexTurn(_aiter(events), model="o4-mini") + emitter = UnifiedEmitter( + task_id="task1", + trace_id=trace_id, + parent_span_id=parent_span_id, + tracer=tracer if tracer is not None else False, + streaming=fake_streaming, + ) + result = await emitter.auto_send_turn(turn) + return result, fake_streaming + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + + +class TestAsyncAutoSendMessageOrder: + async def test_tool_request_pushed_before_tool_response(self) -> None: + _, fake_streaming = await _run_auto_send_turn(_tool_then_text_events()) + types = [getattr(m, "type", None) for m in fake_streaming.messages_opened] + assert "tool_request" in types + assert "tool_response" in types + assert types.index("tool_request") < types.index("tool_response") + + async def test_text_pushed_last(self) -> None: + _, fake_streaming = await _run_auto_send_turn(_tool_then_text_events()) + types = [getattr(m, "type", None) for m in fake_streaming.messages_opened] + assert types[-1] == "text", f"Expected last type=text, got {types}" + + +class TestAsyncAutoSendContent: + async def test_tool_response_content(self) -> None: + _, fake_streaming = await _run_auto_send_turn(_tool_then_text_events()) + tool_resps = [m for m in fake_streaming.messages_opened if isinstance(m, ToolResponseContent)] + assert len(tool_resps) == 1 + assert "72F" in str(tool_resps[0].content) + + async def test_tool_call_ids_match(self) -> None: + _, fake_streaming = await _run_auto_send_turn(_tool_then_text_events()) + tool_req = next(m for m in fake_streaming.messages_opened if isinstance(m, ToolRequestContent)) + tool_resp = next(m for m in fake_streaming.messages_opened if isinstance(m, ToolResponseContent)) + assert tool_req.tool_call_id == tool_resp.tool_call_id + + +class TestAsyncAutoSendFinalTextAndUsage: + async def test_final_text_matches_last_text(self) -> None: + result, _ = await _run_auto_send_turn(_tool_then_text_events()) + assert result.final_text == "The weather is sunny and 72F." + + async def test_usage_from_turn_completed(self) -> None: + """TurnResult.usage reflects the codex turn.completed usage + tool count.""" + result, _ = await _run_auto_send_turn(_tool_then_text_events()) + assert result.usage is not None + assert result.usage.input_tokens == 20 + assert result.usage.output_tokens == 8 + assert result.usage.total_tokens == 28 + assert result.usage.model == "o4-mini" + assert result.usage.num_tool_calls == 1 + assert result.usage.num_llm_calls == 1 + + async def test_context_lifecycle_open_then_close(self) -> None: + _, fake_streaming = await _run_auto_send_turn(_tool_then_text_events()) + opens = [e for e in fake_streaming.sink if e[0] == "open"] + closes = [e for e in fake_streaming.sink if e[0] == "close"] + assert len(opens) == len(closes) + assert len(opens) == len(fake_streaming.messages_opened) + + +class TestAsyncAutoSendSpanDerivation: + async def test_tool_span_derived_on_async_path(self) -> None: + fake_tracing = FakeTracing() + await _run_auto_send_turn( + _tool_then_text_events(), + trace_id="trace1", + parent_span_id="parent", + fake_tracing=fake_tracing, + ) + assert len(fake_tracing.started) == 1 + assert len(fake_tracing.ended) == 1 + assert "72F" in str(fake_tracing.ended[0][1]) diff --git a/tests/lib/core/harness/test_harness_codex_sync.py b/tests/lib/core/harness/test_harness_codex_sync.py new file mode 100644 index 000000000..0209e1e3d --- /dev/null +++ b/tests/lib/core/harness/test_harness_codex_sync.py @@ -0,0 +1,278 @@ +"""Integration test: sync (HTTP-yield) channel with a codex turn. + +Exercises the unified harness surface (UnifiedEmitter.yield_turn + CodexTurn) +with hand-built codex ``exec --json`` event dicts so the test runs fully offline +(no codex CLI subprocess, no API keys, no Agentex server). + +Native stream shapes +--------------------- +``CodexTurn`` consumes an async iterator of raw codex events (str | dict). The +event shapes used here are copied verbatim from the codex turn test +(tests/lib/adk/test_codex_turn.py) and the codex conformance fixtures +(tests/lib/core/harness/conformance/test_codex_conformance.py): + + command_execution item -> Start(ToolRequestContent) + Done + Full(ToolResponseContent) + agent_message item -> Start(TextContent) + ... + Full/Done + reasoning item -> Start(ReasoningContent) + Full(ReasoningContent) + turn.completed -> usage + +Reasoning note +-------------- +The codex converter emits reasoning as Start(ReasoningContent) + Full(ReasoningContent) +with NO Done event. The SpanDeriver opens a reasoning span on Start but only +closes it on a Done; with no Done, the reasoning span is closed by flush() at +end of stream (is_complete=False). This is asserted explicitly below rather than +glossed over — it is a real codex-specific quirk, not a missing channel. + +What is tested +-------------- +- The sync handler forwards StreamTaskMessage* events in canonical order: + tool_request (Start+Done) -> tool_response (Full) -> text. +- The tool_response carries the command output, keyed by item id. +- With a trace_id + fake tracing, a tool span is opened on Done(tool_request) + and closed on the matching Full(tool_response), and a reasoning span is + opened (closed-by-flush) for a reasoning item. + +What is NOT covered without live infrastructure +----------------------------------------------- +- Actual HTTP streaming over the ACP sync endpoint. +- A real codex CLI subprocess / live model behaviour. +- The full FastACP request/response lifecycle. + +See also: test_harness_codex_async.py and test_harness_codex_temporal.py. +""" + +from __future__ import annotations + +from typing import Any, AsyncIterator, override + +from agentex.lib.core.harness.types import OpenSpan, CloseSpan +from agentex.lib.core.harness.tracer import SpanTracer +from agentex.lib.core.harness.emitter import UnifiedEmitter +from agentex.types.task_message_update import ( + StreamTaskMessageFull, + StreamTaskMessageStart, +) +from agentex.types.tool_request_content import ToolRequestContent +from agentex.types.tool_response_content import ToolResponseContent +from agentex.lib.adk._modules._codex_turn import CodexTurn + +from ._fakes import FakeTracing + +# --------------------------------------------------------------------------- +# Native codex event fixtures (copied from the turn + conformance tests) +# --------------------------------------------------------------------------- + + +def _tool_then_text_events() -> list[dict[str, Any]]: + """A command_execution tool round-trip followed by a final text reply.""" + return [ + {"type": "thread.started", "thread_id": "thread-abc"}, + {"type": "turn.started"}, + { + "type": "item.started", + "item": {"id": "tool1", "type": "command_execution", "command": "cat weather.txt"}, + }, + { + "type": "item.completed", + "item": { + "id": "tool1", + "type": "command_execution", + "command": "cat weather.txt", + "aggregated_output": "sunny and 72F", + "exit_code": 0, + }, + }, + {"type": "item.started", "item": {"id": "msg1", "type": "agent_message", "text": ""}}, + { + "type": "item.completed", + "item": {"id": "msg1", "type": "agent_message", "text": "The weather is sunny and 72F."}, + }, + { + "type": "turn.completed", + "usage": {"input_tokens": 20, "output_tokens": 8, "total_tokens": 28}, + }, + ] + + +def _reasoning_events() -> list[dict[str, Any]]: + return [ + {"type": "thread.started", "thread_id": "thread-reason"}, + {"type": "item.started", "item": {"id": "r1", "type": "reasoning", "text": ""}}, + { + "type": "item.completed", + "item": {"id": "r1", "type": "reasoning", "text": "Step 1: analyze\nStep 2: solve"}, + }, + {"type": "item.started", "item": {"id": "msg2", "type": "agent_message", "text": ""}}, + {"type": "item.completed", "item": {"id": "msg2", "type": "agent_message", "text": "42"}}, + {"type": "turn.completed", "usage": {"input_tokens": 30, "output_tokens": 20, "total_tokens": 50}}, + ] + + +async def _aiter(events: list[dict[str, Any]]) -> AsyncIterator[dict[str, Any]]: + for e in events: + yield e + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +async def _run_yield_turn( + events: list[dict[str, Any]], + trace_id: str | None = None, + parent_span_id: str | None = None, + fake_tracing: FakeTracing | None = None, +) -> list[Any]: + tracer: SpanTracer | bool | None = None + if trace_id and fake_tracing is not None: + tracer = SpanTracer( + trace_id=trace_id, + parent_span_id=parent_span_id, + task_id="task1", + tracing=fake_tracing, + ) + + turn = CodexTurn(_aiter(events), model="o4-mini") + emitter = UnifiedEmitter( + task_id="task1", + trace_id=trace_id, + parent_span_id=parent_span_id, + tracer=tracer if tracer is not None else False, + ) + return [ev async for ev in emitter.yield_turn(turn)] + + +# --------------------------------------------------------------------------- +# Tests: event order and content +# --------------------------------------------------------------------------- + + +class TestSyncYieldEventOrder: + async def test_tool_request_precedes_tool_response(self) -> None: + events = await _run_yield_turn(_tool_then_text_events()) + content_types = [ + getattr(getattr(ev, "content", None), "type", None) + for ev in events + if isinstance(ev, (StreamTaskMessageStart, StreamTaskMessageFull)) + ] + assert "tool_request" in content_types + assert "tool_response" in content_types + assert content_types.index("tool_request") < content_types.index("tool_response") + + async def test_text_appears_after_tool_response(self) -> None: + events = await _run_yield_turn(_tool_then_text_events()) + tool_resp_pos = next( + i + for i, ev in enumerate(events) + if isinstance(ev, StreamTaskMessageFull) + and getattr(getattr(ev, "content", None), "type", None) == "tool_response" + ) + text_start_pos = next( + i + for i, ev in enumerate(events) + if isinstance(ev, StreamTaskMessageStart) and getattr(getattr(ev, "content", None), "type", None) == "text" + ) + assert tool_resp_pos < text_start_pos + + async def test_tool_response_carries_command_output(self) -> None: + events = await _run_yield_turn(_tool_then_text_events()) + full_responses = [ + ev.content + for ev in events + if isinstance(ev, StreamTaskMessageFull) and isinstance(getattr(ev, "content", None), ToolResponseContent) + ] + assert len(full_responses) == 1 + tool_response = full_responses[0] + assert isinstance(tool_response, ToolResponseContent) + assert "72F" in str(tool_response.content) + + async def test_tool_request_present(self) -> None: + events = await _run_yield_turn(_tool_then_text_events()) + tool_reqs = [ + ev.content for ev in events if isinstance(getattr(ev, "content", None), ToolRequestContent) + ] + assert len(tool_reqs) == 1 + + +# --------------------------------------------------------------------------- +# Tests: span derivation on the yield path +# --------------------------------------------------------------------------- + + +class TestSyncYieldSpanDerivation: + async def test_tool_span_opened_and_closed(self) -> None: + """Done(tool_request) opens a tool span; Full(tool_response) closes it.""" + fake_tracing = FakeTracing() + await _run_yield_turn( + _tool_then_text_events(), + trace_id="trace1", + parent_span_id="parent-span", + fake_tracing=fake_tracing, + ) + assert len(fake_tracing.started) == 1 + assert len(fake_tracing.ended) == 1 + _name, parent_id, _input = fake_tracing.started[0] + assert parent_id == "parent-span" + + async def test_tool_span_output_is_command_output(self) -> None: + fake_tracing = FakeTracing() + await _run_yield_turn( + _tool_then_text_events(), + trace_id="trace1", + parent_span_id="parent-span", + fake_tracing=fake_tracing, + ) + _name, output = fake_tracing.ended[0] + assert "72F" in str(output) + + async def test_reasoning_span_opened_then_flush_closed(self) -> None: + """A codex reasoning item emits Start+Full (no Done): the reasoning span + opens and is closed by flush() at end of stream (is_complete=False).""" + received_signals: list[Any] = [] + + class _RecordingTracer(SpanTracer): + @override + async def handle(self, signal: Any) -> None: + received_signals.append(signal) + await super().handle(signal) + + fake_tracing = FakeTracing() + tracer = _RecordingTracer( + trace_id="trace1", + parent_span_id="parent-span", + task_id="task1", + tracing=fake_tracing, + ) + turn = CodexTurn(_aiter(_reasoning_events()), model="o4-mini") + emitter = UnifiedEmitter(task_id="task1", trace_id="trace1", parent_span_id="parent-span", tracer=tracer) + [_ async for _ in emitter.yield_turn(turn)] + + opens = [s for s in received_signals if isinstance(s, OpenSpan) and s.kind == "reasoning"] + closes = [s for s in received_signals if isinstance(s, CloseSpan) and str(s.key).startswith("reasoning:")] + assert len(opens) == 1, "Reasoning Start must open exactly one reasoning span" + assert len(closes) == 1, "Reasoning span must be closed (by flush) at end of stream" + assert closes[0].is_complete is False, "No Done event, so the reasoning span is flush-closed as incomplete" + + async def test_no_trace_id_means_no_spans(self) -> None: + fake_tracing = FakeTracing() + turn = CodexTurn(_aiter(_tool_then_text_events()), model="o4-mini") + emitter = UnifiedEmitter(task_id="task1", trace_id=None, parent_span_id=None, tracing=fake_tracing) + [_ async for _ in emitter.yield_turn(turn)] + assert fake_tracing.started == [] + assert fake_tracing.ended == [] + + async def test_tracer_false_suppresses_spans(self) -> None: + fake_tracing = FakeTracing() + turn = CodexTurn(_aiter(_tool_then_text_events()), model="o4-mini") + emitter = UnifiedEmitter( + task_id="task1", + trace_id="trace1", + parent_span_id="parent-span", + tracer=False, + tracing=fake_tracing, + ) + [_ async for _ in emitter.yield_turn(turn)] + assert fake_tracing.started == [] + assert fake_tracing.ended == [] diff --git a/tests/lib/core/harness/test_harness_codex_temporal.py b/tests/lib/core/harness/test_harness_codex_temporal.py new file mode 100644 index 000000000..0af0b862b --- /dev/null +++ b/tests/lib/core/harness/test_harness_codex_temporal.py @@ -0,0 +1,180 @@ +"""Integration test: Temporal channel with a codex turn, offline. + +The codex tap is a pure library adapter (subprocess/sandbox provisioning lives +in the golden agent; there is no codex-specific temporal helper like langgraph's +``stream_langgraph_events``). In a Temporal deployment the codex CLI runs inside +a Temporal activity and the resulting canonical stream is delivered via the SAME +``UnifiedEmitter.auto_send_turn`` path used by the non-temporal async channel. +The only temporal-specific concern at the harness boundary is that the activity +stamps messages with a deterministic ``created_at`` (e.g. ``workflow.now()``) +for replay determinism. + +This suite exercises the auto_send path inside an activity-style call plus the +temporal-only contract: ``created_at`` is threaded through to every streaming +context. The native codex event shapes are copied verbatim from the codex turn +test / conformance fixtures. + +What is tested +-------------- +- The canonical message sequence (tool_request -> tool_response -> text) is + delivered via auto_send_turn, exactly as inside a Temporal activity. +- ``created_at`` passed to ``auto_send_turn`` is forwarded to every + ``streaming_task_message_context`` call (deterministic timestamping). +- Final text + usage from turn.completed are returned. + +What is NOT covered without live infrastructure +----------------------------------------------- +- Temporal scheduling / durability / replay behaviour. +- Redis streaming (requires a running Redis instance). +- A real codex CLI subprocess / live model behaviour. + +See also: test_harness_codex_sync.py and test_harness_codex_async.py. +""" + +from __future__ import annotations + +from typing import Any, AsyncIterator +from datetime import datetime, timezone + +from agentex.types.task_message import TaskMessage +from agentex.lib.core.harness.emitter import UnifiedEmitter +from agentex.types.tool_request_content import ToolRequestContent +from agentex.types.tool_response_content import ToolResponseContent +from agentex.lib.adk._modules._codex_turn import CodexTurn + + +def _tool_then_text_events() -> list[dict[str, Any]]: + return [ + {"type": "thread.started", "thread_id": "thread-abc"}, + { + "type": "item.started", + "item": {"id": "tool1", "type": "command_execution", "command": "cat weather.txt"}, + }, + { + "type": "item.completed", + "item": { + "id": "tool1", + "type": "command_execution", + "command": "cat weather.txt", + "aggregated_output": "sunny and 72F", + "exit_code": 0, + }, + }, + {"type": "item.started", "item": {"id": "msg1", "type": "agent_message", "text": ""}}, + { + "type": "item.completed", + "item": {"id": "msg1", "type": "agent_message", "text": "The weather is sunny and 72F."}, + }, + { + "type": "turn.completed", + "usage": {"input_tokens": 20, "output_tokens": 8, "total_tokens": 28}, + }, + ] + + +async def _aiter(events: list[dict[str, Any]]) -> AsyncIterator[dict[str, Any]]: + for e in events: + yield e + + +# --------------------------------------------------------------------------- +# Fake streaming backend that records created_at +# --------------------------------------------------------------------------- + + +class _FakeCtx: + def __init__(self, ctype: str, initial_content: Any) -> None: + self.ctype = ctype + self.task_message = TaskMessage(id="msg-1", task_id="task1", content=initial_content) + + async def __aenter__(self) -> "_FakeCtx": + return self + + async def __aexit__(self, *args: Any) -> bool: + await self.close() + return False + + async def close(self) -> None: + pass + + async def stream_update(self, update: Any) -> Any: + return update + + +class _FakeStreaming: + def __init__(self) -> None: + self.messages_opened: list[Any] = [] + self.created_ats: list[Any] = [] + + def streaming_task_message_context( + self, + task_id: str, + initial_content: Any, + streaming_mode: str = "coalesced", + created_at: Any = None, + ) -> _FakeCtx: + ctype = getattr(initial_content, "type", None) or "" + self.messages_opened.append(initial_content) + self.created_ats.append(created_at) + return _FakeCtx(ctype, initial_content) + + +async def _run_activity(events: list[dict[str, Any]], created_at: datetime | None) -> tuple[Any, _FakeStreaming]: + fake_streaming = _FakeStreaming() + turn = CodexTurn(_aiter(events), model="o4-mini") + emitter = UnifiedEmitter( + task_id="task1", + trace_id=None, + parent_span_id=None, + tracer=False, + streaming=fake_streaming, + ) + result = await emitter.auto_send_turn(turn, created_at=created_at) + return result, fake_streaming + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + + +class TestTemporalActivityDelivery: + async def test_canonical_sequence_delivered(self) -> None: + _, fake_streaming = await _run_activity(_tool_then_text_events(), created_at=None) + types = [getattr(m, "type", None) for m in fake_streaming.messages_opened] + assert "tool_request" in types + assert "tool_response" in types + assert types.index("tool_request") < types.index("tool_response") + assert types[-1] == "text" + + async def test_tool_round_trip_keyed_correctly(self) -> None: + _, fake_streaming = await _run_activity(_tool_then_text_events(), created_at=None) + tool_req = next(m for m in fake_streaming.messages_opened if isinstance(m, ToolRequestContent)) + tool_resp = next(m for m in fake_streaming.messages_opened if isinstance(m, ToolResponseContent)) + assert tool_req.tool_call_id == tool_resp.tool_call_id + + async def test_final_text_and_usage(self) -> None: + result, _ = await _run_activity(_tool_then_text_events(), created_at=None) + assert result.final_text == "The weather is sunny and 72F." + assert result.usage.total_tokens == 28 + assert result.usage.num_tool_calls == 1 + + +class TestTemporalCreatedAtThreading: + async def test_created_at_threaded_to_all_contexts(self) -> None: + fixed = datetime(2026, 6, 22, 12, 0, 0, tzinfo=timezone.utc) + _, fake_streaming = await _run_activity(_tool_then_text_events(), created_at=fixed) + assert len(fake_streaming.created_ats) == len(fake_streaming.messages_opened) + assert all(ts == fixed for ts in fake_streaming.created_ats), ( + f"Expected every context stamped with {fixed}, got {fake_streaming.created_ats}" + ) + + async def test_default_created_at_is_none(self) -> None: + _, fake_streaming = await _run_activity(_tool_then_text_events(), created_at=None) + assert all(ts is None for ts in fake_streaming.created_ats) + + async def test_created_at_deterministic_across_runs(self) -> None: + fixed = datetime(2026, 1, 1, 0, 0, 0, tzinfo=timezone.utc) + _, first = await _run_activity(_tool_then_text_events(), created_at=fixed) + _, second = await _run_activity(_tool_then_text_events(), created_at=fixed) + assert first.created_ats == second.created_ats diff --git a/tests/lib/core/harness/test_harness_langgraph_async.py b/tests/lib/core/harness/test_harness_langgraph_async.py index 39bf5bc66..09e92102b 100644 --- a/tests/lib/core/harness/test_harness_langgraph_async.py +++ b/tests/lib/core/harness/test_harness_langgraph_async.py @@ -13,10 +13,10 @@ -------------- - The async handler pushes the correct sequence of messages to the fake streaming backend: Full(ToolRequest) + Full(ToolResponse) + text Start/Delta/Done. -- final_text accumulates all text (not just last segment — AGX1-377 unified behavior). +- final_text accumulates all text (not just last segment — unified behavior). - Tool messages go through streaming_task_message_context (not messages.create). -- With a SpanTracer, no tool spans are produced (AGX1-377: Full events are not - handled by SpanDeriver today). +- With a SpanTracer, Full tool events produce tool spans (request opens, response + closes), aligning LangGraph tracing with the Start+Done harnesses. What is NOT covered without live infrastructure ----------------------------------------------- @@ -46,6 +46,8 @@ from agentex.types.tool_response_content import ToolResponseContent from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn +from ._fakes import FakeTracing + # --------------------------------------------------------------------------- # Remove conftest stubs so real langchain_core types are used # --------------------------------------------------------------------------- @@ -102,30 +104,6 @@ def streaming_task_message_context(self, task_id: str, initial_content: Any, **k return ctx -# --------------------------------------------------------------------------- -# Fake tracing backend -# --------------------------------------------------------------------------- - - -class _FakeSpan: - def __init__(self, name: str) -> None: - self.name = name - self.output: Any = None - - -class _FakeTracing: - def __init__(self) -> None: - self.started: list[tuple[str, Any]] = [] - self.ended: list[tuple[str, Any]] = [] - - async def start_span(self, *, trace_id: str, name: str, **kw: Any) -> _FakeSpan: - self.started.append((name, kw.get("parent_id"))) - return _FakeSpan(name) - - async def end_span(self, *, trace_id: str, span: _FakeSpan) -> None: - self.ended.append((span.name, span.output)) - - # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- @@ -142,9 +120,9 @@ async def _gen(): async def _run_auto_send_turn( stream_events: list[tuple[str, Any]], trace_id: str | None = None, -) -> tuple[TurnResult, _FakeStreaming, _FakeTracing | None]: +) -> tuple[TurnResult, _FakeStreaming, FakeTracing | None]: fake_streaming = _FakeStreaming() - fake_tracing = _FakeTracing() if trace_id else None + fake_tracing = FakeTracing() if trace_id else None tracer: SpanTracer | bool = False if trace_id and fake_tracing is not None: @@ -275,7 +253,7 @@ async def test_turn_usage_populated_after_events_consumed(self): assert usage.total_tokens == 15 async def test_tracer_produces_tool_spans_for_full_events(self): - """AGX1-377: SpanDeriver now handles Full tool events (request opens, response closes). + """SpanDeriver handles Full tool events (request opens, response closes). Full(ToolRequestContent) opens a tool span; Full(ToolResponseContent) closes it. This aligns LangGraph tracing with Start+Done harnesses (pydantic-ai, openai-agents). diff --git a/tests/lib/core/harness/test_harness_langgraph_sync.py b/tests/lib/core/harness/test_harness_langgraph_sync.py index 9f67dd2b6..67d213b6a 100644 --- a/tests/lib/core/harness/test_harness_langgraph_sync.py +++ b/tests/lib/core/harness/test_harness_langgraph_sync.py @@ -46,6 +46,8 @@ from agentex.types.tool_response_content import ToolResponseContent from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn +from ._fakes import FakeTracing + # --------------------------------------------------------------------------- # Remove conftest stubs so real langchain_core types are used # --------------------------------------------------------------------------- @@ -62,32 +64,6 @@ def _real_langchain_core(): sys.modules.update(saved) -# --------------------------------------------------------------------------- -# Fake tracing backend -# --------------------------------------------------------------------------- - - -class _FakeSpan: - def __init__(self, name: str) -> None: - self.name = name - self.output: Any = None - - -class _FakeTracing: - def __init__(self) -> None: - self.started: list[tuple[str, Any]] = [] - self.ended: list[tuple[str, Any]] = [] - - async def start_span( - self, *, trace_id: str, name: str, input: Any = None, parent_id: Any = None, **kw: Any - ) -> _FakeSpan: - self.started.append((name, parent_id)) - return _FakeSpan(name) - - async def end_span(self, *, trace_id: str, span: _FakeSpan) -> None: - self.ended.append((span.name, span.output)) - - # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- @@ -103,8 +79,8 @@ async def _gen(): async def _run_yield_turn( stream_events: list[tuple[str, Any]], trace_id: str | None = None -) -> tuple[list[Any], _FakeTracing | None]: - fake_tracing = _FakeTracing() if trace_id else None +) -> tuple[list[Any], FakeTracing | None]: + fake_tracing = FakeTracing() if trace_id else None tracer: SpanTracer | bool | None = None if trace_id and fake_tracing is not None: tracer = SpanTracer(trace_id=trace_id, parent_span_id=None, task_id="task1", tracing=fake_tracing) @@ -191,7 +167,7 @@ async def test_empty_stream_yields_nothing(self): assert out == [] async def test_tracer_produces_tool_spans_for_full_events(self): - """AGX1-377: SpanDeriver now handles Full tool events (request opens, response closes). + """SpanDeriver handles Full tool events (request opens, response closes). Full(ToolRequestContent) opens a tool span; Full(ToolResponseContent) closes it. This aligns LangGraph tracing with Start+Done harnesses (pydantic-ai, openai-agents). diff --git a/tests/lib/core/harness/test_harness_langgraph_temporal.py b/tests/lib/core/harness/test_harness_langgraph_temporal.py index 1a094a33c..219e92229 100644 --- a/tests/lib/core/harness/test_harness_langgraph_temporal.py +++ b/tests/lib/core/harness/test_harness_langgraph_temporal.py @@ -1,7 +1,7 @@ """Integration test: Temporal channel with a LangGraph agent. -The Temporal LangGraph agent pattern uses ``emit_langgraph_messages`` (from -``_langgraph_messages.py``) inside a Temporal activity. That module is not +The Temporal LangGraph agent pattern uses ``emit_langgraph_messages`` (now in +``_langgraph_sync.py``) inside a Temporal activity. That helper is not yet unified onto the harness surface (it has its own Redis-streaming code). This test file verifies the LangGraph Temporal agent's streaming behavior using @@ -43,8 +43,7 @@ from agentex.lib.core.harness.emitter import UnifiedEmitter from agentex.types.tool_request_content import ToolRequestContent from agentex.types.tool_response_content import ToolResponseContent -from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn -from agentex.lib.adk._modules._langgraph_async import stream_langgraph_events +from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn, stream_langgraph_events # --------------------------------------------------------------------------- # Remove conftest stubs so real langchain_core types are used diff --git a/tests/lib/core/harness/test_harness_openai_async.py b/tests/lib/core/harness/test_harness_openai_async.py new file mode 100644 index 000000000..1329b94b9 --- /dev/null +++ b/tests/lib/core/harness/test_harness_openai_async.py @@ -0,0 +1,305 @@ +"""Integration test: async (Redis-streaming) channel with an OpenAI-agents turn. + +Exercises the unified harness surface (UnifiedEmitter.auto_send_turn + OpenAITurn) +with hand-built canonical StreamTaskMessage* streams and a fake streaming +backend so the test runs fully offline (no API keys, no Redis, no Agentex +server). + +The canonical event shapes are copied from the OpenAI converter contract +(see tests/lib/core/harness/conformance/test_openai_conformance.py): tool calls +are Full(ToolRequestContent) + Full(ToolResponseContent); text is +Start+Delta+Done. + +What is tested +-------------- +- auto_send pushes the correct message contexts to the fake streaming backend: + tool_request + tool_response + text (in that order). +- TurnResult.final_text equals the accumulated text deltas. +- TurnResult carries a TurnUsage; via the OpenAITurn result/converter path the + aggregated token usage (input/output/total + num_llm_calls) is surfaced in + TurnResult.usage. +- With a SpanTracer + fake tracing, a tool span is derived on the async path. + +What is NOT covered without live infrastructure +----------------------------------------------- +- Actual Redis streaming. +- The ACP on_task_event_send / on_task_create / on_task_cancel lifecycle. +- A real Runner.run_streamed execution / live OpenAI model behaviour. + +See also: test_harness_openai_sync.py and test_harness_openai_temporal.py. +""" + +from __future__ import annotations + +from typing import Any + +import pytest +from agents.usage import Usage + +from agentex.types.text_delta import TextDelta +from agentex.types.task_message import TaskMessage +from agentex.types.text_content import TextContent +from agentex.lib.core.harness.types import TurnResult, StreamTaskMessage +from agentex.lib.core.harness.tracer import SpanTracer +from agentex.lib.core.harness.emitter import UnifiedEmitter +from agentex.types.task_message_update import ( + StreamTaskMessageDone, + StreamTaskMessageFull, + StreamTaskMessageDelta, + StreamTaskMessageStart, +) +from agentex.types.tool_request_content import ToolRequestContent +from agentex.types.tool_response_content import ToolResponseContent +from agentex.lib.adk._modules._openai_turn import OpenAITurn + +from ._fakes import FakeTracing + +# --------------------------------------------------------------------------- +# Canonical event fixtures (copied from the OpenAI converter contract) +# --------------------------------------------------------------------------- + + +def _tool_then_text_events() -> list[StreamTaskMessage]: + return [ + StreamTaskMessageFull( + type="full", + index=0, + content=ToolRequestContent( + type="tool_request", + author="agent", + tool_call_id="call_1", + name="get_weather", + arguments={"city": "Paris"}, + ), + ), + StreamTaskMessageFull( + type="full", + index=1, + content=ToolResponseContent( + type="tool_response", + author="agent", + tool_call_id="call_1", + name="get_weather", + content="The weather in Paris is sunny and 72F", + ), + ), + StreamTaskMessageStart( + type="start", + index=2, + content=TextContent(type="text", author="agent", content=""), + ), + StreamTaskMessageDelta(type="delta", index=2, delta=TextDelta(type="text", text_delta="Sunny ")), + StreamTaskMessageDelta(type="delta", index=2, delta=TextDelta(type="text", text_delta="and 72F.")), + StreamTaskMessageDone(type="done", index=2), + ] + + +async def _canonical_stream(events: list[StreamTaskMessage]): + for e in events: + yield e + + +# --------------------------------------------------------------------------- +# Fake streaming backend (replaces adk.streaming; no Redis required) +# --------------------------------------------------------------------------- + + +class _FakeCtx: + def __init__(self, sink: list[Any], ctype: str, initial_content: Any) -> None: + self.sink = sink + self.ctype = ctype + self.task_message = TaskMessage(id="msg-1", task_id="task1", content=initial_content) + + async def __aenter__(self) -> "_FakeCtx": + self.sink.append(("open", self.ctype, self.task_message.content)) + return self + + async def __aexit__(self, *args: Any) -> bool: + await self.close() + return False + + async def close(self) -> None: + self.sink.append(("close", self.ctype)) + + async def stream_update(self, update: Any) -> Any: + self.sink.append(("delta", self.ctype, update)) + return update + + +class _FakeStreaming: + def __init__(self) -> None: + self.sink: list[Any] = [] + self.messages_opened: list[Any] = [] + + def streaming_task_message_context( + self, + task_id: str, + initial_content: Any, + streaming_mode: str = "coalesced", + created_at: Any = None, + ) -> _FakeCtx: + ctype = getattr(initial_content, "type", None) or "" + self.messages_opened.append(initial_content) + return _FakeCtx(self.sink, ctype, initial_content) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +async def _run_auto_send_turn( + events: list[StreamTaskMessage], + trace_id: str | None = None, + parent_span_id: str | None = None, + fake_tracing: FakeTracing | None = None, +) -> tuple[TurnResult, _FakeStreaming]: + fake_streaming = _FakeStreaming() + tracer: SpanTracer | bool | None = None + if trace_id and fake_tracing is not None: + tracer = SpanTracer( + trace_id=trace_id, + parent_span_id=parent_span_id, + task_id="task1", + tracing=fake_tracing, + ) + + turn = OpenAITurn(stream=_canonical_stream(events), model="gpt-4o") + emitter = UnifiedEmitter( + task_id="task1", + trace_id=trace_id, + parent_span_id=parent_span_id, + tracer=tracer if tracer is not None else False, + streaming=fake_streaming, + ) + result = await emitter.auto_send_turn(turn) + return result, fake_streaming + + +# --------------------------------------------------------------------------- +# Tests: message order and content +# --------------------------------------------------------------------------- + + +class TestAsyncAutoSendMessageOrder: + async def test_tool_request_pushed_before_tool_response(self) -> None: + _, fake_streaming = await _run_auto_send_turn(_tool_then_text_events()) + message_types = [getattr(m, "type", None) for m in fake_streaming.messages_opened] + assert "tool_request" in message_types + assert message_types.index("tool_request") < message_types.index("tool_response") + + async def test_text_pushed_last(self) -> None: + _, fake_streaming = await _run_auto_send_turn(_tool_then_text_events()) + message_types = [getattr(m, "type", None) for m in fake_streaming.messages_opened] + assert message_types[-1] == "text", f"Expected last message type=text, got {message_types}" + + async def test_exactly_three_messages(self) -> None: + _, fake_streaming = await _run_auto_send_turn(_tool_then_text_events()) + assert len(fake_streaming.messages_opened) == 3, ( + f"Expected 3 messages, got {[getattr(m, 'type', None) for m in fake_streaming.messages_opened]}" + ) + + +class TestAsyncAutoSendContentVerification: + async def test_tool_request_content(self) -> None: + _, fake_streaming = await _run_auto_send_turn(_tool_then_text_events()) + tool_reqs = [m for m in fake_streaming.messages_opened if isinstance(m, ToolRequestContent)] + assert len(tool_reqs) == 1 + assert tool_reqs[0].name == "get_weather" + + async def test_tool_response_content(self) -> None: + _, fake_streaming = await _run_auto_send_turn(_tool_then_text_events()) + tool_resps = [m for m in fake_streaming.messages_opened if isinstance(m, ToolResponseContent)] + assert len(tool_resps) == 1 + assert "72F" in str(tool_resps[0].content) + assert tool_resps[0].name == "get_weather" + + async def test_tool_call_ids_match(self) -> None: + _, fake_streaming = await _run_auto_send_turn(_tool_then_text_events()) + tool_req = next(m for m in fake_streaming.messages_opened if isinstance(m, ToolRequestContent)) + tool_resp = next(m for m in fake_streaming.messages_opened if isinstance(m, ToolResponseContent)) + assert tool_req.tool_call_id == tool_resp.tool_call_id + + +class TestAsyncAutoSendFinalTextAndUsage: + async def test_final_text_matches_deltas(self) -> None: + result, _ = await _run_auto_send_turn(_tool_then_text_events()) + assert result.final_text == "Sunny and 72F." + + async def test_turn_result_has_usage(self) -> None: + """An injected canonical stream has no run to read usage from, so usage + carries only the model name (input_tokens stays None).""" + result, _ = await _run_auto_send_turn(_tool_then_text_events()) + assert result.usage is not None + assert result.usage.model == "gpt-4o" + + async def test_context_lifecycle_open_then_close(self) -> None: + _, fake_streaming = await _run_auto_send_turn(_tool_then_text_events()) + opens = [e for e in fake_streaming.sink if e[0] == "open"] + closes = [e for e in fake_streaming.sink if e[0] == "close"] + assert len(opens) == len(closes) == 3 + + async def test_usage_populated_from_result_path(self, monkeypatch: pytest.MonkeyPatch) -> None: + """Via the OpenAITurn result/converter path, aggregated token usage is + surfaced on TurnResult.usage after the stream is consumed. + + Mirrors the OpenAI turn test: a fake RunResultStreaming exposes + raw_responses with a Usage, and the converter is monkeypatched to a + passthrough so the canonical text stream is delivered while usage is read + from raw_responses. + """ + import agentex.lib.adk._modules._openai_turn as turn_mod + + canonical: list[StreamTaskMessage] = [ + StreamTaskMessageStart( + type="start", index=0, content=TextContent(type="text", author="agent", content="") + ), + StreamTaskMessageDelta(type="delta", index=0, delta=TextDelta(type="text", text_delta="hi")), + StreamTaskMessageDone(type="done", index=0), + ] + + class _FakeResult: + def __init__(self) -> None: + self.raw_responses = [ + type("R", (), {"usage": Usage(requests=2, input_tokens=8, output_tokens=4, total_tokens=12)})() + ] + + def stream_events(self): # type: ignore[no-untyped-def] + return _canonical_stream(canonical) + + async def _passthrough(stream): # type: ignore[no-untyped-def] + async for e in stream: + yield e + + monkeypatch.setattr(turn_mod, "convert_openai_to_agentex_events", _passthrough) + + turn = OpenAITurn(result=_FakeResult(), model="gpt-4o") + emitter = UnifiedEmitter( + task_id="task1", + trace_id=None, + parent_span_id=None, + tracer=False, + streaming=_FakeStreaming(), + ) + result = await emitter.auto_send_turn(turn) + + assert result.final_text == "hi" + assert result.usage.model == "gpt-4o" + assert result.usage.num_llm_calls == 2 + assert result.usage.input_tokens == 8 + assert result.usage.output_tokens == 4 + assert result.usage.total_tokens == 12 + + +class TestAsyncAutoSendSpanDerivation: + async def test_tool_span_derived_on_async_path(self) -> None: + fake_tracing = FakeTracing() + await _run_auto_send_turn( + _tool_then_text_events(), + trace_id="trace1", + parent_span_id="parent", + fake_tracing=fake_tracing, + ) + assert len(fake_tracing.started) == 1 + assert fake_tracing.started[0][0] == "get_weather" + assert len(fake_tracing.ended) == 1 diff --git a/tests/lib/core/harness/test_harness_openai_sync.py b/tests/lib/core/harness/test_harness_openai_sync.py new file mode 100644 index 000000000..34a9b72c6 --- /dev/null +++ b/tests/lib/core/harness/test_harness_openai_sync.py @@ -0,0 +1,323 @@ +"""Integration test: sync (HTTP-yield) channel with an OpenAI-agents turn. + +Exercises the unified harness surface (UnifiedEmitter.yield_turn + OpenAITurn) +with hand-built canonical StreamTaskMessage* streams so the test runs fully +offline (no API keys, no live OpenAI Agents run, no Agentex server). + +Why an injected canonical stream +-------------------------------- +OpenAI's native ``RunResultStreaming`` events are heavy SDK objects; the +``OpenAITurn`` accepts a pre-built canonical ``stream=`` of StreamTaskMessage* +events that bypasses ``convert_openai_to_agentex_events``. The shapes used here +are copied verbatim from the OpenAI converter contract exercised by +``tests/lib/core/harness/conformance/test_openai_conformance.py`` (tool calls +are Full(ToolRequestContent) + Full(ToolResponseContent); reasoning is +Start(ReasoningContent) + Delta + Done). This keeps the canonical stream +faithful to what the live converter produces while staying offline. + +What is tested +-------------- +- The sync handler forwards StreamTaskMessage* events verbatim in canonical + order: tool_request (Full) -> tool_response (Full) -> text (Start+Delta+Done). +- Final accumulated text equals the seeded text deltas. +- With a trace_id + fake tracing, a tool span is opened (OpenSpan) on + Full(ToolRequestContent) and closed (CloseSpan) on the matching + Full(ToolResponseContent), and a reasoning span is opened/closed for a + reasoning segment — proving the SpanDeriver is wired on the yield path. + +What is NOT covered without live infrastructure +----------------------------------------------- +- Actual HTTP streaming over the ACP sync endpoint. +- A real ``Runner.run_streamed`` execution / live OpenAI model behaviour. +- ``convert_openai_to_agentex_events`` over real SDK events (covered by the + OpenAI turn + conformance suites). + +See also: test_harness_openai_async.py and test_harness_openai_temporal.py. +""" + +from __future__ import annotations + +from typing import Any, override + +from agentex.types.text_delta import TextDelta +from agentex.types.text_content import TextContent +from agentex.lib.core.harness.types import OpenSpan, CloseSpan, StreamTaskMessage +from agentex.lib.core.harness.tracer import SpanTracer +from agentex.types.reasoning_content import ReasoningContent +from agentex.lib.core.harness.emitter import UnifiedEmitter +from agentex.types.task_message_update import ( + StreamTaskMessageDone, + StreamTaskMessageFull, + StreamTaskMessageDelta, + StreamTaskMessageStart, +) +from agentex.types.tool_request_content import ToolRequestContent +from agentex.types.tool_response_content import ToolResponseContent +from agentex.lib.adk._modules._openai_turn import OpenAITurn +from agentex.types.reasoning_content_delta import ReasoningContentDelta + +from ._fakes import FakeTracing + +# --------------------------------------------------------------------------- +# Canonical event fixtures (copied from the OpenAI converter contract) +# --------------------------------------------------------------------------- + + +def _tool_then_text_events() -> list[StreamTaskMessage]: + """A tool round-trip followed by a final text reply. + + Mirrors the OpenAI converter's tool path: a Full(ToolRequestContent) for the + call and a Full(ToolResponseContent) for the result (matched by tool_call_id), + then a streamed text answer. + """ + return [ + StreamTaskMessageFull( + type="full", + index=0, + content=ToolRequestContent( + type="tool_request", + author="agent", + tool_call_id="call_1", + name="get_weather", + arguments={"city": "Paris"}, + ), + ), + StreamTaskMessageFull( + type="full", + index=1, + content=ToolResponseContent( + type="tool_response", + author="agent", + tool_call_id="call_1", + name="get_weather", + content="The weather in Paris is sunny and 72F", + ), + ), + StreamTaskMessageStart( + type="start", + index=2, + content=TextContent(type="text", author="agent", content=""), + ), + StreamTaskMessageDelta(type="delta", index=2, delta=TextDelta(type="text", text_delta="Sunny ")), + StreamTaskMessageDelta(type="delta", index=2, delta=TextDelta(type="text", text_delta="and 72F.")), + StreamTaskMessageDone(type="done", index=2), + ] + + +def _reasoning_events() -> list[StreamTaskMessage]: + """A reasoning segment: Start(ReasoningContent) + Delta + Done.""" + return [ + StreamTaskMessageStart( + type="start", + index=0, + content=ReasoningContent(type="reasoning", author="agent", summary=["Thinking..."]), + ), + StreamTaskMessageDelta( + type="delta", + index=0, + delta=ReasoningContentDelta(type="reasoning_content", content_index=0, content_delta="step 1"), + ), + StreamTaskMessageDone(type="done", index=0), + ] + + +async def _canonical_stream(events: list[StreamTaskMessage]): + for e in events: + yield e + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +async def _run_yield_turn( + events: list[StreamTaskMessage], + trace_id: str | None = None, + parent_span_id: str | None = None, + fake_tracing: FakeTracing | None = None, +) -> list[Any]: + """Drive the sync (yield) path and collect all yielded events.""" + tracer: SpanTracer | bool | None = None + if trace_id and fake_tracing is not None: + tracer = SpanTracer( + trace_id=trace_id, + parent_span_id=parent_span_id, + task_id="task1", + tracing=fake_tracing, + ) + + turn = OpenAITurn(stream=_canonical_stream(events), model="gpt-4o") + emitter = UnifiedEmitter( + task_id="task1", + trace_id=trace_id, + parent_span_id=parent_span_id, + tracer=tracer if tracer is not None else False, + ) + return [ev async for ev in emitter.yield_turn(turn)] + + +# --------------------------------------------------------------------------- +# Tests: event order and content +# --------------------------------------------------------------------------- + + +class TestSyncYieldEventOrder: + async def test_tool_request_precedes_tool_response(self) -> None: + events = await _run_yield_turn(_tool_then_text_events()) + content_types = [ + getattr(getattr(ev, "content", None), "type", None) + for ev in events + if isinstance(ev, (StreamTaskMessageStart, StreamTaskMessageFull)) + ] + assert "tool_request" in content_types + assert "tool_response" in content_types + assert content_types.index("tool_request") < content_types.index("tool_response") + + async def test_text_appears_after_tool_response(self) -> None: + events = await _run_yield_turn(_tool_then_text_events()) + tool_resp_pos = next( + i + for i, ev in enumerate(events) + if isinstance(ev, StreamTaskMessageFull) + and getattr(getattr(ev, "content", None), "type", None) == "tool_response" + ) + text_start_pos = next( + i + for i, ev in enumerate(events) + if isinstance(ev, StreamTaskMessageStart) and getattr(getattr(ev, "content", None), "type", None) == "text" + ) + assert tool_resp_pos < text_start_pos + + async def test_tool_response_carries_weather_result(self) -> None: + events = await _run_yield_turn(_tool_then_text_events()) + full_responses = [ + ev + for ev in events + if isinstance(ev, StreamTaskMessageFull) and isinstance(getattr(ev, "content", None), ToolResponseContent) + ] + assert len(full_responses) == 1 + tool_response = full_responses[0].content + assert isinstance(tool_response, ToolResponseContent) + assert "72F" in str(tool_response.content) + assert tool_response.name == "get_weather" + + async def test_accumulated_text_matches_deltas(self) -> None: + events = await _run_yield_turn(_tool_then_text_events()) + accumulated = "".join( + ev.delta.text_delta + for ev in events + if isinstance(ev, StreamTaskMessageDelta) and isinstance(ev.delta, TextDelta) and ev.delta.text_delta + ) + assert accumulated == "Sunny and 72F." + + async def test_every_start_has_matching_done(self) -> None: + events = await _run_yield_turn(_tool_then_text_events()) + starts = {ev.index for ev in events if isinstance(ev, StreamTaskMessageStart)} + dones = {ev.index for ev in events if isinstance(ev, StreamTaskMessageDone)} + assert starts == dones, f"Unmatched Start/Done indices: starts={starts} dones={dones}" + + +# --------------------------------------------------------------------------- +# Tests: span derivation on the yield path +# --------------------------------------------------------------------------- + + +class TestSyncYieldSpanDerivation: + async def test_tool_span_opened_and_closed(self) -> None: + """Full(ToolRequestContent) opens a tool span; Full(ToolResponseContent) closes it.""" + fake_tracing = FakeTracing() + await _run_yield_turn( + _tool_then_text_events(), + trace_id="trace1", + parent_span_id="parent-span", + fake_tracing=fake_tracing, + ) + + assert len(fake_tracing.started) == 1, "Expected exactly one tool span opened" + assert len(fake_tracing.ended) == 1, "Expected exactly one tool span closed" + name, parent_id, _ = fake_tracing.started[0] + assert name == "get_weather" + assert parent_id == "parent-span" + + async def test_tool_span_output_is_tool_result(self) -> None: + fake_tracing = FakeTracing() + await _run_yield_turn( + _tool_then_text_events(), + trace_id="trace1", + parent_span_id="parent-span", + fake_tracing=fake_tracing, + ) + name, output = fake_tracing.ended[0] + assert name == "get_weather" + assert "72F" in str(output) + + async def test_reasoning_span_opened_and_closed(self) -> None: + """A reasoning segment opens and closes a reasoning span.""" + fake_tracing = FakeTracing() + await _run_yield_turn( + _reasoning_events(), + trace_id="trace1", + parent_span_id="parent-span", + fake_tracing=fake_tracing, + ) + assert fake_tracing.started_names == ["reasoning"] + assert len(fake_tracing.ended) == 1 + + async def test_no_trace_id_means_no_spans(self) -> None: + fake_tracing = FakeTracing() + turn = OpenAITurn(stream=_canonical_stream(_tool_then_text_events()), model="gpt-4o") + emitter = UnifiedEmitter( + task_id="task1", + trace_id=None, + parent_span_id=None, + tracing=fake_tracing, + ) + [_ async for _ in emitter.yield_turn(turn)] + assert fake_tracing.started == [] + assert fake_tracing.ended == [] + + async def test_tracer_false_suppresses_spans(self) -> None: + fake_tracing = FakeTracing() + turn = OpenAITurn(stream=_canonical_stream(_tool_then_text_events()), model="gpt-4o") + emitter = UnifiedEmitter( + task_id="task1", + trace_id="trace1", + parent_span_id="parent-span", + tracer=False, + tracing=fake_tracing, + ) + [_ async for _ in emitter.yield_turn(turn)] + assert fake_tracing.started == [] + assert fake_tracing.ended == [] + + async def test_span_signal_types(self) -> None: + """The signals received by the tracer are OpenSpan then CloseSpan.""" + received_signals: list[Any] = [] + + class _RecordingTracer(SpanTracer): + @override + async def handle(self, signal: Any) -> None: + received_signals.append(signal) + await super().handle(signal) + + fake_tracing = FakeTracing() + tracer = _RecordingTracer( + trace_id="trace1", + parent_span_id="parent", + task_id="task1", + tracing=fake_tracing, + ) + turn = OpenAITurn(stream=_canonical_stream(_tool_then_text_events()), model="gpt-4o") + emitter = UnifiedEmitter( + task_id="task1", + trace_id="trace1", + parent_span_id="parent", + tracer=tracer, + ) + [_ async for _ in emitter.yield_turn(turn)] + + assert len(received_signals) == 2 + assert isinstance(received_signals[0], OpenSpan) + assert isinstance(received_signals[1], CloseSpan) + assert received_signals[0].name == "get_weather" diff --git a/tests/lib/core/harness/test_harness_openai_temporal.py b/tests/lib/core/harness/test_harness_openai_temporal.py new file mode 100644 index 000000000..61cda37ef --- /dev/null +++ b/tests/lib/core/harness/test_harness_openai_temporal.py @@ -0,0 +1,195 @@ +"""Integration test: Temporal channel with an OpenAI-agents turn, offline. + +In a Temporal OpenAI deployment (see +examples/tutorials/10_async/10_temporal/120_openai_agents), the OpenAI Agents +SDK run executes inside a Temporal activity. Each turn's canonical stream is +delivered to Redis via the SAME ``UnifiedEmitter.auto_send_turn`` path used by +the non-temporal async channel — the only temporal-specific concern at the +harness boundary is that the activity stamps messages with a deterministic +``created_at`` (e.g. ``workflow.now()``) so replay is deterministic. + +There is no dedicated ``stream_openai_events`` temporal helper (unlike +langgraph's ``stream_langgraph_events``); the temporal OpenAI agent builds an +``OpenAITurn`` and calls ``auto_send_turn`` directly inside the activity. This +suite therefore exercises the auto_send path plus the temporal-only contract: +``created_at`` is threaded through to every streaming context. + +What is tested +-------------- +- The canonical message sequence (tool_request -> tool_response -> text) is + delivered via auto_send_turn, exactly as inside a Temporal activity. +- ``created_at`` passed to ``auto_send_turn`` is forwarded to every + ``streaming_task_message_context`` call (deterministic timestamping). +- Final text is returned from the turn. + +What is NOT covered without live infrastructure +----------------------------------------------- +- Temporal scheduling (workflow.signal -> activity dispatch). +- Temporal durability / replay behaviour. +- Redis streaming (requires a running Redis instance). +- A real Runner.run_streamed execution / live OpenAI model behaviour. + +See also: test_harness_openai_sync.py and test_harness_openai_async.py. +""" + +from __future__ import annotations + +from typing import Any +from datetime import datetime, timezone + +from agentex.types.text_delta import TextDelta +from agentex.types.task_message import TaskMessage +from agentex.types.text_content import TextContent +from agentex.lib.core.harness.types import StreamTaskMessage +from agentex.lib.core.harness.emitter import UnifiedEmitter +from agentex.types.task_message_update import ( + StreamTaskMessageDone, + StreamTaskMessageFull, + StreamTaskMessageDelta, + StreamTaskMessageStart, +) +from agentex.types.tool_request_content import ToolRequestContent +from agentex.types.tool_response_content import ToolResponseContent +from agentex.lib.adk._modules._openai_turn import OpenAITurn + + +def _tool_then_text_events() -> list[StreamTaskMessage]: + return [ + StreamTaskMessageFull( + type="full", + index=0, + content=ToolRequestContent( + type="tool_request", + author="agent", + tool_call_id="call_1", + name="get_weather", + arguments={"city": "Paris"}, + ), + ), + StreamTaskMessageFull( + type="full", + index=1, + content=ToolResponseContent( + type="tool_response", + author="agent", + tool_call_id="call_1", + name="get_weather", + content="The weather in Paris is sunny and 72F", + ), + ), + StreamTaskMessageStart( + type="start", + index=2, + content=TextContent(type="text", author="agent", content=""), + ), + StreamTaskMessageDelta(type="delta", index=2, delta=TextDelta(type="text", text_delta="Sunny ")), + StreamTaskMessageDelta(type="delta", index=2, delta=TextDelta(type="text", text_delta="and 72F.")), + StreamTaskMessageDone(type="done", index=2), + ] + + +async def _canonical_stream(events: list[StreamTaskMessage]): + for e in events: + yield e + + +# --------------------------------------------------------------------------- +# Fake streaming backend that records the created_at it receives +# --------------------------------------------------------------------------- + + +class _FakeCtx: + def __init__(self, ctype: str, initial_content: Any) -> None: + self.ctype = ctype + self.task_message = TaskMessage(id="msg-1", task_id="task1", content=initial_content) + + async def __aenter__(self) -> "_FakeCtx": + return self + + async def __aexit__(self, *args: Any) -> bool: + await self.close() + return False + + async def close(self) -> None: + pass + + async def stream_update(self, update: Any) -> Any: + return update + + +class _FakeStreaming: + def __init__(self) -> None: + self.messages_opened: list[Any] = [] + self.created_ats: list[Any] = [] + + def streaming_task_message_context( + self, + task_id: str, + initial_content: Any, + streaming_mode: str = "coalesced", + created_at: Any = None, + ) -> _FakeCtx: + ctype = getattr(initial_content, "type", None) or "" + self.messages_opened.append(initial_content) + self.created_ats.append(created_at) + return _FakeCtx(ctype, initial_content) + + +async def _run_activity(events: list[StreamTaskMessage], created_at: datetime | None) -> tuple[Any, _FakeStreaming]: + """Mirror the temporal activity body: build an OpenAITurn and auto_send it.""" + fake_streaming = _FakeStreaming() + turn = OpenAITurn(stream=_canonical_stream(events), model="gpt-4o") + emitter = UnifiedEmitter( + task_id="task1", + trace_id=None, + parent_span_id=None, + tracer=False, + streaming=fake_streaming, + ) + result = await emitter.auto_send_turn(turn, created_at=created_at) + return result, fake_streaming + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + + +class TestTemporalActivityMessageOrder: + async def test_canonical_sequence_delivered(self) -> None: + _, fake_streaming = await _run_activity(_tool_then_text_events(), created_at=None) + types = [getattr(m, "type", None) for m in fake_streaming.messages_opened] + assert "tool_request" in types + assert "tool_response" in types + assert types.index("tool_request") < types.index("tool_response") + assert types[-1] == "text" + + async def test_final_text_returned(self) -> None: + result, _ = await _run_activity(_tool_then_text_events(), created_at=None) + assert result.final_text == "Sunny and 72F." + + +class TestTemporalCreatedAtThreading: + """created_at is forwarded to every streaming context (deterministic replay).""" + + async def test_created_at_threaded_to_all_contexts(self) -> None: + fixed = datetime(2026, 6, 22, 12, 0, 0, tzinfo=timezone.utc) + _, fake_streaming = await _run_activity(_tool_then_text_events(), created_at=fixed) + assert len(fake_streaming.created_ats) == 3 + assert all(ts == fixed for ts in fake_streaming.created_ats), ( + f"Expected every context stamped with {fixed}, got {fake_streaming.created_ats}" + ) + + async def test_default_created_at_is_none(self) -> None: + """When the activity does not stamp a timestamp, contexts see None.""" + _, fake_streaming = await _run_activity(_tool_then_text_events(), created_at=None) + assert all(ts is None for ts in fake_streaming.created_ats) + + async def test_created_at_is_deterministic_across_runs(self) -> None: + """Two runs with the same created_at stamp identical timestamps — the + determinism the Temporal channel relies on for replay.""" + fixed = datetime(2026, 1, 1, 0, 0, 0, tzinfo=timezone.utc) + _, first = await _run_activity(_tool_then_text_events(), created_at=fixed) + _, second = await _run_activity(_tool_then_text_events(), created_at=fixed) + assert first.created_ats == second.created_ats + assert all(ts == fixed for ts in first.created_ats) diff --git a/tests/lib/core/harness/test_harness_pydantic_ai_async.py b/tests/lib/core/harness/test_harness_pydantic_ai_async.py index 8bda7d020..4b6b86415 100644 --- a/tests/lib/core/harness/test_harness_pydantic_ai_async.py +++ b/tests/lib/core/harness/test_harness_pydantic_ai_async.py @@ -12,7 +12,7 @@ The async path uses the bare PydanticAITurn (no coalescing): the foundation auto_send delivers streamed tool-request Start+ToolRequestDelta+Done messages -natively (AGX1-377 fix), so no coalescing wrapper is needed. +natively, so no coalescing wrapper is needed. What is tested -------------- @@ -51,6 +51,8 @@ from agentex.types.tool_response_content import ToolResponseContent from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn +from ._fakes import FakeTracing + # --------------------------------------------------------------------------- # Minimal agent under test # --------------------------------------------------------------------------- @@ -120,39 +122,6 @@ def streaming_task_message_context( return _FakeCtx(self.sink, ctype, initial_content) -# --------------------------------------------------------------------------- -# Fake tracing backend -# --------------------------------------------------------------------------- - - -class _FakeSpan: - def __init__(self, name: str) -> None: - self.name = name - self.output: Any = None - - -class _FakeTracing: - def __init__(self) -> None: - self.started: list[tuple[str, str | None]] = [] - self.ended: list[tuple[str, Any]] = [] - - async def start_span( - self, - *, - trace_id: str, - name: str, - input: Any = None, - parent_id: Any = None, - data: Any = None, - task_id: Any = None, - ) -> _FakeSpan: - self.started.append((name, parent_id)) - return _FakeSpan(name) - - async def end_span(self, *, trace_id: str, span: _FakeSpan) -> None: - self.ended.append((span.name, span.output)) - - # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- @@ -163,7 +132,7 @@ async def _run_auto_send_turn( user_msg: str = "What is the weather in Paris?", trace_id: str | None = None, parent_span_id: str | None = None, - fake_tracing: _FakeTracing | None = None, + fake_tracing: FakeTracing | None = None, ) -> tuple[TurnResult, _FakeStreaming]: """Drive the async (auto_send) path and return the TurnResult + fake streaming state.""" fake_streaming = _FakeStreaming() @@ -304,9 +273,9 @@ async def test_context_lifecycle_open_then_close(self) -> None: class TestAsyncAutoSendSpanDerivation: """Span derivation on the async path now works for streamed tool requests. - The foundation auto_send delivers Start+ToolRequestDelta+Done natively - (AGX1-377 fix). The SpanDeriver opens a tool span on Done(tool_request), - so the async path now derives spans just like the sync path. + The foundation auto_send delivers Start+ToolRequestDelta+Done natively. + The SpanDeriver opens a tool span on Done(tool_request), so the async path + derives spans just like the sync path. """ async def test_tool_span_derived_on_async_path(self) -> None: @@ -314,7 +283,7 @@ async def test_tool_span_derived_on_async_path(self) -> None: on the async/auto_send path when auto_send delivers the streamed Start+ToolRequestDelta+Done sequence.""" agent = _make_agent() - fake_tracing = _FakeTracing() + fake_tracing = FakeTracing() tracer = SpanTracer( trace_id="trace1", parent_span_id="parent", diff --git a/tests/lib/core/harness/test_harness_pydantic_ai_sync.py b/tests/lib/core/harness/test_harness_pydantic_ai_sync.py index 1557d0dd1..04beea81d 100644 --- a/tests/lib/core/harness/test_harness_pydantic_ai_sync.py +++ b/tests/lib/core/harness/test_harness_pydantic_ai_sync.py @@ -49,6 +49,8 @@ from agentex.types.tool_response_content import ToolResponseContent from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn +from ._fakes import FakeTracing + # --------------------------------------------------------------------------- # Minimal agent under test # --------------------------------------------------------------------------- @@ -74,39 +76,6 @@ def get_weather(city: str) -> str: return agent -# --------------------------------------------------------------------------- -# Fake tracing backend (no network calls) -# --------------------------------------------------------------------------- - - -class _FakeSpan: - def __init__(self, name: str) -> None: - self.name = name - self.output: Any = None - - -class _FakeTracing: - def __init__(self) -> None: - self.started: list[tuple[str, str | None]] = [] - self.ended: list[tuple[str, Any]] = [] - - async def start_span( - self, - *, - trace_id: str, - name: str, - input: Any = None, - parent_id: Any = None, - data: Any = None, - task_id: Any = None, - ) -> _FakeSpan: - self.started.append((name, parent_id)) - return _FakeSpan(name) - - async def end_span(self, *, trace_id: str, span: _FakeSpan) -> None: - self.ended.append((span.name, span.output)) - - # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- @@ -117,7 +86,7 @@ async def _run_yield_turn( user_msg: str = "What is the weather in Paris?", trace_id: str | None = None, parent_span_id: str | None = None, - fake_tracing: _FakeTracing | None = None, + fake_tracing: FakeTracing | None = None, ) -> list[Any]: """Drive the sync (yield) path and collect all yielded events.""" tracer: SpanTracer | bool | None = None @@ -245,7 +214,7 @@ class TestSyncYieldSpanDerivation: async def test_tool_span_opened_and_closed(self) -> None: """One tool span is opened and closed per tool call.""" agent = _make_agent() - fake_tracing = _FakeTracing() + fake_tracing = FakeTracing() tracer = SpanTracer( trace_id="trace1", parent_span_id="parent-span", @@ -266,14 +235,14 @@ async def test_tool_span_opened_and_closed(self) -> None: assert len(fake_tracing.started) == 1, "Expected exactly one tool span opened" assert len(fake_tracing.ended) == 1, "Expected exactly one tool span closed" - span_name, parent_id = fake_tracing.started[0] + span_name, parent_id, _ = fake_tracing.started[0] assert span_name == "get_weather" assert parent_id == "parent-span" async def test_tool_span_output_is_tool_result(self) -> None: """The closed tool span's output equals the tool's return value.""" agent = _make_agent() - fake_tracing = _FakeTracing() + fake_tracing = FakeTracing() tracer = SpanTracer( trace_id="trace1", parent_span_id="parent-span", @@ -299,7 +268,7 @@ async def test_tool_span_output_is_tool_result(self) -> None: async def test_no_trace_id_means_no_spans(self) -> None: """With trace_id=None, no spans are derived (emitter disables tracing).""" agent = _make_agent() - fake_tracing = _FakeTracing() + fake_tracing = FakeTracing() async with agent.run_stream_events("What is the weather in Paris?") as stream: turn = PydanticAITurn(stream, model="test") @@ -317,7 +286,7 @@ async def test_no_trace_id_means_no_spans(self) -> None: async def test_tracer_false_suppresses_spans(self) -> None: """tracer=False disables span derivation regardless of trace_id.""" agent = _make_agent() - fake_tracing = _FakeTracing() + fake_tracing = FakeTracing() async with agent.run_stream_events("What is the weather in Paris?") as stream: turn = PydanticAITurn(stream, model="test") @@ -345,7 +314,7 @@ async def handle(self, signal: Any) -> None: received_signals.append(signal) await super().handle(signal) - fake_tracing = _FakeTracing() + fake_tracing = FakeTracing() tracer = _RecordingTracer( trace_id="trace1", parent_span_id="parent", diff --git a/tests/lib/core/harness/test_tracer.py b/tests/lib/core/harness/test_tracer.py index ed40cf595..b3d9002c4 100644 --- a/tests/lib/core/harness/test_tracer.py +++ b/tests/lib/core/harness/test_tracer.py @@ -5,32 +5,12 @@ from agentex.lib.core.harness.types import OpenSpan, CloseSpan from agentex.lib.core.harness.tracer import SpanTracer - -class _FakeSpan: - def __init__(self, name): - self.name = name - self.output = None - self.data = None - - -class _FakeTracing: - def __init__(self): - self.started = [] - self.ended = [] - self.ended_spans = [] - - async def start_span(self, *, trace_id, name, input=None, parent_id=None, data=None, task_id=None): - self.started.append((name, parent_id, input)) - return _FakeSpan(name) - - async def end_span(self, *, trace_id, span): - self.ended.append((span.name, span.output)) - self.ended_spans.append(span) +from ._fakes import FakeTracing @pytest.mark.asyncio async def test_open_then_close_starts_and_ends_span(): - fake = _FakeTracing() + fake = FakeTracing() tracer = SpanTracer(trace_id="t1", parent_span_id="p1", tracing=fake) await tracer.handle(OpenSpan(key="call_1", kind="tool", name="Bash", input={"cmd": "ls"})) await tracer.handle(CloseSpan(key="call_1", output="files", is_complete=True)) @@ -41,7 +21,7 @@ async def test_open_then_close_starts_and_ends_span(): @pytest.mark.asyncio async def test_close_records_is_error_on_span_data(): """A CloseSpan carrying is_error records the status on span.data (AGX1-371).""" - fake = _FakeTracing() + fake = FakeTracing() tracer = SpanTracer(trace_id="t1", parent_span_id="p1", tracing=fake) await tracer.handle(OpenSpan(key="call_err", kind="tool", name="Bash", input={})) await tracer.handle(CloseSpan(key="call_err", output="boom", is_complete=True, is_error=True)) @@ -51,7 +31,7 @@ async def test_close_records_is_error_on_span_data(): @pytest.mark.asyncio async def test_close_without_status_leaves_span_data_untouched(): """is_error=None (no status reported) must not write to span.data.""" - fake = _FakeTracing() + fake = FakeTracing() tracer = SpanTracer(trace_id="t1", parent_span_id="p1", tracing=fake) await tracer.handle(OpenSpan(key="call_1", kind="tool", name="Bash", input={})) await tracer.handle(CloseSpan(key="call_1", output="files", is_complete=True)) @@ -60,7 +40,7 @@ async def test_close_without_status_leaves_span_data_untouched(): @pytest.mark.asyncio async def test_no_trace_id_is_noop(): - fake = _FakeTracing() + fake = FakeTracing() tracer = SpanTracer(trace_id="", parent_span_id=None, tracing=fake) await tracer.handle(OpenSpan(key="k", kind="tool", name="X")) await tracer.handle(CloseSpan(key="k")) @@ -69,7 +49,7 @@ async def test_no_trace_id_is_noop(): @pytest.mark.asyncio async def test_tracing_failure_is_swallowed(): - class _Boom(_FakeTracing): + class _Boom(FakeTracing): @override async def start_span(self, **kw): raise RuntimeError("backend down") @@ -83,7 +63,7 @@ async def start_span(self, **kw): @pytest.mark.asyncio async def test_duplicate_open_replaces_silently(): - fake = _FakeTracing() + fake = FakeTracing() tracer = SpanTracer(trace_id="t1", parent_span_id="p1", tracing=fake) await tracer.handle(OpenSpan(key="k", kind="tool", name="A")) await tracer.handle(OpenSpan(key="k", kind="tool", name="B")) diff --git a/tests/lib/core/harness/test_yield_delivery.py b/tests/lib/core/harness/test_yield_delivery.py index f3f491d84..ef3861a16 100644 --- a/tests/lib/core/harness/test_yield_delivery.py +++ b/tests/lib/core/harness/test_yield_delivery.py @@ -1,5 +1,3 @@ -import types as _types - import pytest from agentex.lib.core.harness.tracer import SpanTracer @@ -12,17 +10,7 @@ from agentex.types.tool_response_content import ToolResponseContent from agentex.lib.core.harness.yield_delivery import yield_events - -class _RecordTracing: - def __init__(self): - self.started, self.ended = [], [] - - async def start_span(self, *, trace_id, name, input=None, parent_id=None, data=None, task_id=None): - self.started.append(name) - return _types.SimpleNamespace() # supports arbitrary attribute assignment (span.output = ...) - - async def end_span(self, *, trace_id, span): - self.ended.append(getattr(span, "output", None)) +from ._fakes import FakeTracing async def _gen(events): @@ -32,7 +20,7 @@ async def _gen(events): @pytest.mark.asyncio async def test_yield_passes_events_through_and_traces(): - fake = _RecordTracing() + fake = FakeTracing() tracer = SpanTracer(trace_id="t", parent_span_id="p", tracing=fake) events = [ StreamTaskMessageStart( @@ -53,8 +41,8 @@ async def test_yield_passes_events_through_and_traces(): ] out = [e async for e in yield_events(_gen(events), tracer=tracer)] assert out == events # passthrough unchanged - assert fake.started == ["Bash"] # span derived + opened - assert fake.ended == ["ok"] # span closed with response + assert fake.started_names == ["Bash"] # span derived + opened + assert fake.ended_outputs == ["ok"] # span closed with response @pytest.mark.asyncio @@ -68,7 +56,7 @@ async def test_yield_without_tracer_is_pure_passthrough(): @pytest.mark.asyncio async def test_flush_runs_on_early_close(): - fake = _RecordTracing() + fake = FakeTracing() tracer = SpanTracer(trace_id="t", parent_span_id="p", tracing=fake) events = [ StreamTaskMessageStart( @@ -85,5 +73,5 @@ async def test_flush_runs_on_early_close(): first = await gen.__anext__() # Start second = await gen.__anext__() # Done -> tool span opens here await gen.aclose() # triggers the finally -> flush() - assert fake.started == ["Bash"] - assert fake.ended == [None] # flush closed the unpaired span (incomplete, no output) + assert fake.started_names == ["Bash"] + assert fake.ended_outputs == [None] # flush closed the unpaired span (incomplete, no output)