Skip to content

Commit 84f99f4

Browse files
declan-scaleclaude
andcommitted
test(openai-conformance): adapt to cross-channel runner on #414 foundation
Replace the old determinism-only test (derive_all) with the full cross-channel assertion pattern: register fixtures with per-module _OPENAI_FIXTURES, call run_cross_channel_conformance, and assert logical-delivery and span-signal equivalence across yield_events and auto_send — matching the pattern in test_conformance.py. Swap ReasoningSummaryDelta for ReasoningContentDelta so the runner's payload accumulator recognises the delta type and the payload comparison exercises the reasoning seeding path. Remove derive_all import. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 26b86ac commit 84f99f4

1 file changed

Lines changed: 62 additions & 25 deletions

File tree

tests/lib/core/harness/conformance/test_openai_conformance.py

Lines changed: 62 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,26 @@
11
"""OpenAI conformance fixtures for the shared harness span-derivation engine.
22
3-
The cross-channel guarantee is that yield-delivery and auto-send observe the
4-
SAME canonical ``StreamTaskMessage*`` stream, so span derivation over that
5-
stream must be deterministic and idempotent regardless of channel. These
3+
The cross-channel guarantee is that yield-delivery and auto_send observe the
4+
SAME canonical StreamTaskMessage* stream, so span derivation and logical
5+
delivery over that stream must be equivalent regardless of channel. These
66
fixtures express the canonical sequences an OpenAI turn produces (text,
7-
tool-call, reasoning, and a combined multi-step turn) and assert that property.
7+
tool-call, reasoning, and a combined multi-step turn) and assert that property
8+
via run_cross_channel_conformance.
89
9-
Registry hazard (see conformance/runner.py): ``_REGISTRY`` is process-global and
10+
Registry hazard (see conformance/runner.py): _REGISTRY is process-global and
1011
collection order across modules is not guaranteed. To stay deterministic this
1112
module keeps its OWN fixture list and parametrizes over THAT list, rather than
12-
over ``all_fixtures()``. It still calls ``register()`` so the cross-module
13-
conformance suite can see these fixtures too.
13+
over all_fixtures(). It still calls register() so the cross-module conformance
14+
suite can see these fixtures too.
1415
"""
1516

1617
from __future__ import annotations
1718

1819
import pytest
1920

21+
from agentex.types.text_delta import TextDelta
2022
from agentex.types.text_content import TextContent
2123
from agentex.types.reasoning_content import ReasoningContent
22-
from agentex.types.task_message_delta import TextDelta, ReasoningSummaryDelta
2324
from agentex.types.task_message_update import (
2425
StreamTaskMessageDone,
2526
StreamTaskMessageFull,
@@ -28,8 +29,9 @@
2829
)
2930
from agentex.types.tool_request_content import ToolRequestContent
3031
from agentex.types.tool_response_content import ToolResponseContent
32+
from agentex.types.reasoning_content_delta import ReasoningContentDelta
3133

32-
from .runner import Fixture, register, derive_all
34+
from .runner import Fixture, register, run_cross_channel_conformance
3335

3436
_OPENAI_FIXTURES: list[Fixture] = []
3537

@@ -40,15 +42,17 @@ def _add(fixture: Fixture) -> None:
4042
register(fixture)
4143

4244

43-
# Text-only turn: start -> deltas -> done. No spans are derived from plain text.
45+
# Text-only turn: start -> deltas -> done.
46+
# Uses non-empty initial_content so payload comparison catches a channel that
47+
# drops StreamTaskMessageStart.content.
4448
_add(
4549
Fixture(
4650
name="openai-text-only",
4751
events=[
4852
StreamTaskMessageStart(
4953
type="start",
5054
index=0,
51-
content=TextContent(type="text", author="agent", content=""),
55+
content=TextContent(type="text", author="agent", content="Init"),
5256
),
5357
StreamTaskMessageDelta(type="delta", index=0, delta=TextDelta(type="text", text_delta="Hel")),
5458
StreamTaskMessageDelta(type="delta", index=0, delta=TextDelta(type="text", text_delta="lo")),
@@ -57,9 +61,8 @@ def _add(fixture: Fixture) -> None:
5761
)
5862
)
5963

60-
# Tool-call turn: the OpenAI converter emits a single Full(ToolRequestContent)
61-
# for the call and a Full(ToolResponseContent) for the result, matched by
62-
# tool_call_id. Mirrors convert_openai_to_agentex_events' tool path.
64+
# Tool-call turn: Full(ToolRequestContent) for the call + Full(ToolResponseContent)
65+
# for the result, matched by tool_call_id. Mirrors the OpenAI converter's tool path.
6366
_add(
6467
Fixture(
6568
name="openai-tool-call",
@@ -90,21 +93,30 @@ def _add(fixture: Fixture) -> None:
9093
)
9194
)
9295

93-
# Reasoning turn: start(ReasoningContent) -> summary deltas -> done. Span
94-
# derivation opens a reasoning span on Start and closes it on the index's Done.
96+
# Reasoning turn: start(ReasoningContent) -> content deltas -> done.
97+
# ReasoningContent.summary is seeded in the payload so a channel that drops the
98+
# summary fails the cross-channel comparison.
9599
_add(
96100
Fixture(
97101
name="openai-reasoning",
98102
events=[
99103
StreamTaskMessageStart(
100104
type="start",
101105
index=0,
102-
content=ReasoningContent(type="reasoning", author="agent", summary=[], content=[], style="active"),
106+
content=ReasoningContent(
107+
type="reasoning",
108+
author="agent",
109+
summary=["Thinking..."],
110+
),
103111
),
104112
StreamTaskMessageDelta(
105113
type="delta",
106114
index=0,
107-
delta=ReasoningSummaryDelta(type="reasoning_summary", summary_index=0, summary_delta="thinking"),
115+
delta=ReasoningContentDelta(
116+
type="reasoning_content",
117+
content_index=0,
118+
content_delta="step 1",
119+
),
108120
),
109121
StreamTaskMessageDone(type="done", index=0),
110122
],
@@ -119,12 +131,20 @@ def _add(fixture: Fixture) -> None:
119131
StreamTaskMessageStart(
120132
type="start",
121133
index=0,
122-
content=ReasoningContent(type="reasoning", author="agent", summary=[], content=[], style="active"),
134+
content=ReasoningContent(
135+
type="reasoning",
136+
author="agent",
137+
summary=["plan"],
138+
),
123139
),
124140
StreamTaskMessageDelta(
125141
type="delta",
126142
index=0,
127-
delta=ReasoningSummaryDelta(type="reasoning_summary", summary_index=0, summary_delta="plan"),
143+
delta=ReasoningContentDelta(
144+
type="reasoning_content",
145+
content_index=0,
146+
content_delta="elaboration",
147+
),
128148
),
129149
StreamTaskMessageDone(type="done", index=0),
130150
StreamTaskMessageFull(
@@ -162,8 +182,25 @@ def _add(fixture: Fixture) -> None:
162182

163183

164184
@pytest.mark.parametrize("fixture", _OPENAI_FIXTURES, ids=lambda f: f.name)
165-
def test_openai_span_derivation_is_deterministic(fixture):
166-
"""Deriving twice over the same canonical events yields identical signals,
167-
which is exactly what makes yield-delivery and auto-send equivalent (both
168-
observe the same stream)."""
169-
assert derive_all(fixture.events) == derive_all(fixture.events)
185+
@pytest.mark.asyncio
186+
async def test_openai_cross_channel_equivalence(fixture: Fixture) -> None:
187+
"""Assert that yield_events and auto_send produce equivalent logical
188+
deliveries and identical span signals for every OpenAI fixture.
189+
190+
This is the cross-channel guarantee: the two delivery adapters agree on
191+
WHAT was delivered (logical content) and HOW spans were derived, even
192+
though their streaming-envelope shapes differ (Full vs Start+Done for tool
193+
messages).
194+
195+
The span signals are the ones each channel's tracer ACTUALLY recorded while
196+
delivering, not a re-derivation, so a regression where one channel skips
197+
deriver.observe() for some event type is caught here.
198+
"""
199+
yield_deliveries, auto_deliveries, yield_spans, auto_spans = await run_cross_channel_conformance(fixture)
200+
201+
assert yield_deliveries == auto_deliveries, (
202+
f"[{fixture.name}] logical deliveries differ:\n yield: {yield_deliveries}\n auto_send: {auto_deliveries}"
203+
)
204+
assert yield_spans == auto_spans, (
205+
f"[{fixture.name}] span signals differ:\n yield: {yield_spans}\n auto_send: {auto_spans}"
206+
)

0 commit comments

Comments
 (0)