Skip to content

Commit 8a0cf6b

Browse files
committed
fix(test): update codex reasoning span expectation
1 parent 9a1d363 commit 8a0cf6b

1 file changed

Lines changed: 9 additions & 11 deletions

File tree

tests/lib/core/harness/test_harness_codex_sync.py

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,9 @@
1818
1919
Reasoning note
2020
--------------
21-
The codex converter emits reasoning as Start(ReasoningContent) + Full(ReasoningContent)
22-
with NO Done event. The SpanDeriver opens a reasoning span on Start but only
23-
closes it on a Done; with no Done, the reasoning span is closed by flush() at
24-
end of stream (is_complete=False). This is asserted explicitly below rather than
25-
glossed over — it is a real codex-specific quirk, not a missing channel.
21+
The codex converter emits reasoning as Start(ReasoningContent) + deltas + Done.
22+
The SpanDeriver opens a reasoning span on Start and closes it normally when the
23+
Done event is observed (is_complete=True).
2624
2725
What is tested
2826
--------------
@@ -31,7 +29,7 @@
3129
- The tool_response carries the command output, keyed by item id.
3230
- With a trace_id + fake tracing, a tool span is opened on Done(tool_request)
3331
and closed on the matching Full(tool_response), and a reasoning span is
34-
opened (closed-by-flush) for a reasoning item.
32+
opened and closed normally for a reasoning item.
3533
3634
What is NOT covered without live infrastructure
3735
-----------------------------------------------
@@ -227,9 +225,9 @@ async def test_tool_span_output_is_command_output(self) -> None:
227225
_name, output = fake_tracing.ended[0]
228226
assert "72F" in str(output)
229227

230-
async def test_reasoning_span_opened_then_flush_closed(self) -> None:
231-
"""A codex reasoning item emits Start+Full (no Done): the reasoning span
232-
opens and is closed by flush() at end of stream (is_complete=False)."""
228+
async def test_reasoning_span_opened_then_done_closed(self) -> None:
229+
"""A codex reasoning item emits Start+Delta+Done: the reasoning span
230+
opens and is closed normally when the Done event is observed."""
233231
received_signals: list[Any] = []
234232

235233
class _RecordingTracer(SpanTracer):
@@ -252,8 +250,8 @@ async def handle(self, signal: Any) -> None:
252250
opens = [s for s in received_signals if isinstance(s, OpenSpan) and s.kind == "reasoning"]
253251
closes = [s for s in received_signals if isinstance(s, CloseSpan) and str(s.key).startswith("reasoning:")]
254252
assert len(opens) == 1, "Reasoning Start must open exactly one reasoning span"
255-
assert len(closes) == 1, "Reasoning span must be closed (by flush) at end of stream"
256-
assert closes[0].is_complete is False, "No Done event, so the reasoning span is flush-closed as incomplete"
253+
assert len(closes) == 1, "Reasoning span must close exactly once"
254+
assert closes[0].is_complete is True, "Done event closes the reasoning span as complete"
257255

258256
async def test_no_trace_id_means_no_spans(self) -> None:
259257
fake_tracing = FakeTracing()

0 commit comments

Comments
 (0)