Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions src/agents/run_internal/session_persistence.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,12 @@ async def prepare_input_with_session(
converted_history = [
strip_internal_input_item_metadata(ensure_input_item_format(item)) for item in history
]
if not is_openai_conversation_session:
converted_history = [
sanitized_item
for item in converted_history
if (sanitized_item := _strip_reasoning_item_ids_from_history_item(item)) is not None
]

new_input_list = [
ensure_input_item_format(item) for item in ItemHelpers.input_to_new_input_list(input)
Expand Down Expand Up @@ -187,6 +193,31 @@ async def prepare_input_with_session(
return deduplicated, normalize_input_items_for_api(appended_as_inputs)


def _strip_reasoning_item_ids_from_history_item(
item: TResponseInputItem,
) -> TResponseInputItem | None:
"""Remove reasoning item IDs from session history before sending to the Responses API.

Some reasoning models emit `reasoning` items with `rs_...` IDs that are not guaranteed to be
stable across turns. Replaying those IDs in a subsequent `responses.create` call can raise a
404 "Item with id 'rs_...' not found". Stripping the ID keeps the reasoning payload usable
without relying on server-side item retention.

Reasoning items without a summary cannot be replayed as model input, so they are dropped.
"""
if not isinstance(item, dict):
return item
if item.get("type") != "reasoning":
return item
if not item.get("summary"):
return None
if "id" not in item:
return item
sanitized = dict(item)
sanitized.pop("id", None)
return cast(TResponseInputItem, sanitized)


async def persist_session_items_for_guardrail_trip(
session: Session | None,
server_conversation_tracker: OpenAIServerConversationTracker | None,
Expand Down
49 changes: 49 additions & 0 deletions tests/test_agent_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -1824,6 +1824,55 @@ async def test_prepare_input_with_session_prefers_latest_function_call_output():
assert cast(dict[str, Any], session_items[0])["output"] == "new-output"


@pytest.mark.asyncio
async def test_prepare_input_with_session_strips_reasoning_item_ids_from_history():
reasoning_item = cast(
TResponseInputItem,
{
"type": "reasoning",
"id": "rs_test",
"summary": [{"type": "summary_text", "text": "thinking"}],
},
)
session = SimpleListSession(history=[reasoning_item])

prepared_input, session_items = await prepare_input_with_session("hello", session, None)

assert isinstance(prepared_input, list)
assert len(session_items) == 1
assert cast(dict[str, Any], session_items[0]).get("role") == "user"
prepared_reasoning = [
cast(dict[str, Any], item)
for item in prepared_input
if isinstance(item, dict) and item.get("type") == "reasoning"
]
assert len(prepared_reasoning) == 1
assert prepared_reasoning[0].get("summary") == [{"type": "summary_text", "text": "thinking"}]
assert "id" not in prepared_reasoning[0]


@pytest.mark.asyncio
@pytest.mark.parametrize(
"reasoning_item",
[
{"type": "reasoning", "id": "rs_missing_summary"},
{"type": "reasoning", "id": "rs_empty_summary", "summary": []},
],
)
async def test_prepare_input_with_session_drops_reasoning_items_without_summary(
reasoning_item: dict[str, Any],
):
session = SimpleListSession(history=[cast(TResponseInputItem, reasoning_item)])

prepared_input, session_items = await prepare_input_with_session("hello", session, None)

assert isinstance(prepared_input, list)
assert len(session_items) == 1
assert not any(
isinstance(item, dict) and item.get("type") == "reasoning" for item in prepared_input
)


@pytest.mark.asyncio
async def test_prepare_input_with_session_drops_orphan_function_calls():
orphan_call = cast(
Expand Down