11"""OpenAI conformance fixtures for the shared harness span-derivation engine.
22
3- The cross-channel guarantee is that yield-delivery and auto-send observe the
4- SAME canonical `` StreamTaskMessage*`` stream, so span derivation over that
5- stream must be deterministic and idempotent regardless of channel. These
3+ The cross-channel guarantee is that yield-delivery and auto_send observe the
4+ SAME canonical StreamTaskMessage* stream, so span derivation and logical
5+ delivery over that stream must be equivalent regardless of channel. These
66fixtures express the canonical sequences an OpenAI turn produces (text,
7- tool-call, reasoning, and a combined multi-step turn) and assert that property.
7+ tool-call, reasoning, and a combined multi-step turn) and assert that property
8+ via run_cross_channel_conformance.
89
9- Registry hazard (see conformance/runner.py): `` _REGISTRY`` is process-global and
10+ Registry hazard (see conformance/runner.py): _REGISTRY is process-global and
1011collection order across modules is not guaranteed. To stay deterministic this
1112module keeps its OWN fixture list and parametrizes over THAT list, rather than
12- over `` all_fixtures()`` . It still calls `` register()`` so the cross-module
13- conformance suite can see these fixtures too.
13+ over all_fixtures(). It still calls register() so the cross-module conformance
14+ suite can see these fixtures too.
1415"""
1516
1617from __future__ import annotations
1718
1819import pytest
1920
21+ from agentex .types .text_delta import TextDelta
2022from agentex .types .text_content import TextContent
2123from agentex .types .reasoning_content import ReasoningContent
22- from agentex .types .task_message_delta import TextDelta , ReasoningSummaryDelta
2324from agentex .types .task_message_update import (
2425 StreamTaskMessageDone ,
2526 StreamTaskMessageFull ,
2829)
2930from agentex .types .tool_request_content import ToolRequestContent
3031from agentex .types .tool_response_content import ToolResponseContent
32+ from agentex .types .reasoning_content_delta import ReasoningContentDelta
3133
32- from .runner import Fixture , register , derive_all
34+ from .runner import Fixture , register , run_cross_channel_conformance
3335
3436_OPENAI_FIXTURES : list [Fixture ] = []
3537
@@ -40,15 +42,17 @@ def _add(fixture: Fixture) -> None:
4042 register (fixture )
4143
4244
43- # Text-only turn: start -> deltas -> done. No spans are derived from plain text.
45+ # Text-only turn: start -> deltas -> done.
46+ # Uses non-empty initial_content so payload comparison catches a channel that
47+ # drops StreamTaskMessageStart.content.
4448_add (
4549 Fixture (
4650 name = "openai-text-only" ,
4751 events = [
4852 StreamTaskMessageStart (
4953 type = "start" ,
5054 index = 0 ,
51- content = TextContent (type = "text" , author = "agent" , content = "" ),
55+ content = TextContent (type = "text" , author = "agent" , content = "Init " ),
5256 ),
5357 StreamTaskMessageDelta (type = "delta" , index = 0 , delta = TextDelta (type = "text" , text_delta = "Hel" )),
5458 StreamTaskMessageDelta (type = "delta" , index = 0 , delta = TextDelta (type = "text" , text_delta = "lo" )),
@@ -57,9 +61,8 @@ def _add(fixture: Fixture) -> None:
5761 )
5862)
5963
60- # Tool-call turn: the OpenAI converter emits a single Full(ToolRequestContent)
61- # for the call and a Full(ToolResponseContent) for the result, matched by
62- # tool_call_id. Mirrors convert_openai_to_agentex_events' tool path.
64+ # Tool-call turn: Full(ToolRequestContent) for the call + Full(ToolResponseContent)
65+ # for the result, matched by tool_call_id. Mirrors the OpenAI converter's tool path.
6366_add (
6467 Fixture (
6568 name = "openai-tool-call" ,
@@ -90,21 +93,30 @@ def _add(fixture: Fixture) -> None:
9093 )
9194)
9295
93- # Reasoning turn: start(ReasoningContent) -> summary deltas -> done. Span
94- # derivation opens a reasoning span on Start and closes it on the index's Done.
96+ # Reasoning turn: start(ReasoningContent) -> content deltas -> done.
97+ # ReasoningContent.summary is seeded in the payload so a channel that drops the
98+ # summary fails the cross-channel comparison.
9599_add (
96100 Fixture (
97101 name = "openai-reasoning" ,
98102 events = [
99103 StreamTaskMessageStart (
100104 type = "start" ,
101105 index = 0 ,
102- content = ReasoningContent (type = "reasoning" , author = "agent" , summary = [], content = [], style = "active" ),
106+ content = ReasoningContent (
107+ type = "reasoning" ,
108+ author = "agent" ,
109+ summary = ["Thinking..." ],
110+ ),
103111 ),
104112 StreamTaskMessageDelta (
105113 type = "delta" ,
106114 index = 0 ,
107- delta = ReasoningSummaryDelta (type = "reasoning_summary" , summary_index = 0 , summary_delta = "thinking" ),
115+ delta = ReasoningContentDelta (
116+ type = "reasoning_content" ,
117+ content_index = 0 ,
118+ content_delta = "step 1" ,
119+ ),
108120 ),
109121 StreamTaskMessageDone (type = "done" , index = 0 ),
110122 ],
@@ -119,12 +131,20 @@ def _add(fixture: Fixture) -> None:
119131 StreamTaskMessageStart (
120132 type = "start" ,
121133 index = 0 ,
122- content = ReasoningContent (type = "reasoning" , author = "agent" , summary = [], content = [], style = "active" ),
134+ content = ReasoningContent (
135+ type = "reasoning" ,
136+ author = "agent" ,
137+ summary = ["plan" ],
138+ ),
123139 ),
124140 StreamTaskMessageDelta (
125141 type = "delta" ,
126142 index = 0 ,
127- delta = ReasoningSummaryDelta (type = "reasoning_summary" , summary_index = 0 , summary_delta = "plan" ),
143+ delta = ReasoningContentDelta (
144+ type = "reasoning_content" ,
145+ content_index = 0 ,
146+ content_delta = "elaboration" ,
147+ ),
128148 ),
129149 StreamTaskMessageDone (type = "done" , index = 0 ),
130150 StreamTaskMessageFull (
@@ -162,8 +182,25 @@ def _add(fixture: Fixture) -> None:
162182
163183
164184@pytest .mark .parametrize ("fixture" , _OPENAI_FIXTURES , ids = lambda f : f .name )
165- def test_openai_span_derivation_is_deterministic (fixture ):
166- """Deriving twice over the same canonical events yields identical signals,
167- which is exactly what makes yield-delivery and auto-send equivalent (both
168- observe the same stream)."""
169- assert derive_all (fixture .events ) == derive_all (fixture .events )
185+ @pytest .mark .asyncio
186+ async def test_openai_cross_channel_equivalence (fixture : Fixture ) -> None :
187+ """Assert that yield_events and auto_send produce equivalent logical
188+ deliveries and identical span signals for every OpenAI fixture.
189+
190+ This is the cross-channel guarantee: the two delivery adapters agree on
191+ WHAT was delivered (logical content) and HOW spans were derived, even
192+ though their streaming-envelope shapes differ (Full vs Start+Done for tool
193+ messages).
194+
195+ The span signals are the ones each channel's tracer ACTUALLY recorded while
196+ delivering, not a re-derivation, so a regression where one channel skips
197+ deriver.observe() for some event type is caught here.
198+ """
199+ yield_deliveries , auto_deliveries , yield_spans , auto_spans = await run_cross_channel_conformance (fixture )
200+
201+ assert yield_deliveries == auto_deliveries , (
202+ f"[{ fixture .name } ] logical deliveries differ:\n yield: { yield_deliveries } \n auto_send: { auto_deliveries } "
203+ )
204+ assert yield_spans == auto_spans , (
205+ f"[{ fixture .name } ] span signals differ:\n yield: { yield_spans } \n auto_send: { auto_spans } "
206+ )
0 commit comments