diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index 9f795d2489..c6df2f564b 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -2,11 +2,12 @@ import uuid import random import socket -from collections.abc import Mapping +from collections.abc import Mapping, Iterable from datetime import datetime, timezone from importlib import import_module from typing import TYPE_CHECKING, List, Dict, cast, overload import warnings +import json from sentry_sdk._compat import check_uwsgi_thread_support from sentry_sdk._metrics_batcher import MetricsBatcher @@ -30,6 +31,7 @@ ) from sentry_sdk.serializer import serialize from sentry_sdk.tracing import trace +from sentry_sdk.traces import SpanStatus from sentry_sdk.tracing_utils import has_span_streaming_enabled from sentry_sdk.transport import ( HttpTransportCore, @@ -38,6 +40,7 @@ ) from sentry_sdk.consts import ( SPANDATA, + SPANSTATUS, DEFAULT_MAX_VALUE_LENGTH, DEFAULT_OPTIONS, INSTRUMENTER, @@ -56,6 +59,8 @@ ) from sentry_sdk.scrubber import EventScrubber from sentry_sdk.monitor import Monitor +from sentry_sdk.envelope import Item, PayloadRef +from sentry_sdk.utils import datetime_from_isoformat if TYPE_CHECKING: from typing import Any @@ -66,7 +71,15 @@ from typing import Union from typing import TypeVar - from sentry_sdk._types import Event, Hint, SDKInfo, Log, Metric, EventDataCategory + from sentry_sdk._types import ( + Event, + Hint, + SDKInfo, + Log, + Metric, + EventDataCategory, + SerializedAttributeValue, + ) from sentry_sdk.integrations import Integration from sentry_sdk.scope import Scope from sentry_sdk.session import Session @@ -89,6 +102,181 @@ } +def _serialized_v1_attribute_to_serialized_v2_attribute( + attribute_value: "Any", +) -> "Optional[SerializedAttributeValue]": + if isinstance(attribute_value, bool): + return { + "value": attribute_value, + "type": "boolean", + } + + if isinstance(attribute_value, int): + return { + "value": attribute_value, + "type": "integer", + } + + if isinstance(attribute_value, float): + return { + "value": attribute_value, + "type": "double", + } + + if isinstance(attribute_value, str): + return { + "value": attribute_value, + "type": "string", + } + + if isinstance(attribute_value, list): + if not attribute_value: + return {"value": [], "type": "array"} + + ty = type(attribute_value[0]) + if ty in (int, str, bool, float) and all( + type(v) is ty for v in attribute_value + ): + return { + "value": attribute_value, + "type": "array", + } + + # Types returned when the serializer for V1 span attributes recurses into some container types. + if isinstance(attribute_value, (dict, list)): + return { + "value": json.dumps(attribute_value), + "type": "string", + } + + return None + + +def _serialized_v1_span_to_serialized_v2_span( + span: "dict[str, Any]", event: "Event" +) -> "dict[str, Any]": + # See SpanBatcher._to_transport_format() for analogous population of all entries except "attributes". + res: "dict[str, Any]" = { + "status": SpanStatus.OK.value, + "is_segment": False, + } + + if "trace_id" in span: + res["trace_id"] = span["trace_id"] + + if "span_id" in span: + res["span_id"] = span["span_id"] + + if "description" in span: + res["name"] = span["description"] + + if "start_timestamp" in span: + start_timestamp = None + try: + start_timestamp = datetime_from_isoformat(span["start_timestamp"]) + except Exception: + pass + + if start_timestamp is not None: + res["start_timestamp"] = start_timestamp.timestamp() + + if "timestamp" in span: + end_timestamp = None + try: + end_timestamp = datetime_from_isoformat(span["timestamp"]) + except Exception: + pass + + if end_timestamp is not None: + res["end_timestamp"] = end_timestamp.timestamp() + + if "parent_span_id" in span: + res["parent_span_id"] = span["parent_span_id"] + + if "status" in span and span["status"] != SPANSTATUS.OK: + res["status"] = "error" + + attributes: "Dict[str, Any]" = {} + + if "op" in span: + attributes["sentry.op"] = span["op"] + if "origin" in span: + attributes["sentry.origin"] = span["origin"] + + span_data = span.get("data") + if isinstance(span_data, dict): + attributes.update(span_data) + + span_tags = span.get("tags") + if isinstance(span_tags, dict): + attributes.update(span_tags) + + # See Scope._apply_user_attributes_to_telemetry() for user attributes. + user = event.get("user") + if isinstance(user, dict): + if "id" in user: + attributes["user.id"] = user["id"] + if "username" in user: + attributes["user.name"] = user["username"] + if "email" in user: + attributes["user.email"] = user["email"] + + # See Scope.set_global_attributes() for release, environment, and SDK metadata. + if "release" in event: + attributes["sentry.release"] = event["release"] + if "environment" in event: + attributes["sentry.environment"] = event["environment"] + if "transaction" in event: + attributes["sentry.segment.name"] = event["transaction"] + + trace_context = event.get("contexts", {}).get("trace", {}) + if "span_id" in trace_context: + attributes["sentry.segment.id"] = trace_context["span_id"] + + sdk_info = event.get("sdk") + if isinstance(sdk_info, dict): + if "name" in sdk_info: + attributes["sentry.sdk.name"] = sdk_info["name"] + if "version" in sdk_info: + attributes["sentry.sdk.version"] = sdk_info["version"] + + if not attributes: + return res + + res["attributes"] = {} + for key, value in attributes.items(): + res["attributes"][key] = _serialized_v1_attribute_to_serialized_v2_attribute( + value + ) + + return res + + +def _split_gen_ai_spans( + event_opt: "Event", +) -> "Optional[tuple[List[Dict[str, object]], List[Dict[str, object]]]]": + if "spans" not in event_opt: + return None + + spans: "Any" = event_opt["spans"] + if isinstance(spans, AnnotatedValue): + spans = spans.value + + if not isinstance(spans, Iterable): + return None + + non_gen_ai_spans = [] + gen_ai_spans = [] + for span in spans: + span_op = span.get("op") + if isinstance(span_op, str) and span_op.startswith("gen_ai."): + gen_ai_spans.append(span) + else: + non_gen_ai_spans.append(span) + + return non_gen_ai_spans, gen_ai_spans + + def _get_options(*args: "Optional[str]", **kwargs: "Any") -> "Dict[str, Any]": if args and (isinstance(args[0], (bytes, str)) or args[0] is None): dsn: "Optional[str]" = args[0] @@ -912,7 +1100,37 @@ def capture_event( if is_transaction: if isinstance(profile, Profile): envelope.add_profile(profile.to_json(event_opt, self.options)) - envelope.add_transaction(event_opt) + + split_spans = _split_gen_ai_spans(event_opt) + if split_spans is None or not split_spans[1]: + envelope.add_transaction(event_opt) + else: + non_gen_ai_spans, gen_ai_spans = split_spans + + event_opt["spans"] = non_gen_ai_spans + envelope.add_transaction(event_opt) + + envelope.add_item( + Item( + type=SpanBatcher.TYPE, + content_type=SpanBatcher.CONTENT_TYPE, + headers={ + "item_count": len(gen_ai_spans), + }, + payload=PayloadRef( + json={ + "items": [ + _serialized_v1_span_to_serialized_v2_span( + span, event + ) + for span in gen_ai_spans + if isinstance(span, dict) + ] + }, + ), + ) + ) + elif is_checkin: envelope.add_checkin(event_opt) else: diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py index e86f7e1fa9..c7fc280b6c 100644 --- a/tests/integrations/anthropic/test_anthropic.py +++ b/tests/integrations/anthropic/test_anthropic.py @@ -91,14 +91,14 @@ async def __call__(self, *args, **kwargs): ], ) def test_nonstreaming_create_message( - sentry_init, capture_events, send_default_pii, include_prompts + sentry_init, capture_items, send_default_pii, include_prompts ): sentry_init( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("transaction", "span") client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -120,37 +120,38 @@ def test_nonstreaming_create_message( assert usage.input_tokens == 10 assert usage.output_tokens == 20 - assert len(events) == 1 - (event,) = events - - assert event["type"] == "transaction" + (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "anthropic" - assert len(event["spans"]) == 1 - (span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + (span,) = spans - assert span["op"] == OP.GEN_AI_CHAT - assert span["description"] == "chat model" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" if send_default_pii and include_prompts: assert ( - span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '[{"role": "user", "content": "Hello, Claude"}]' ) - assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude." + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude." else: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False - assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL" - assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["end_turn"] + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False + assert ( + span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] + == "msg_01XFDUDYJgAACzvnptvVoYEL" + ) + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["end_turn"] @pytest.mark.asyncio @@ -164,14 +165,14 @@ def test_nonstreaming_create_message( ], ) async def test_nonstreaming_create_message_async( - sentry_init, capture_events, send_default_pii, include_prompts + sentry_init, capture_items, send_default_pii, include_prompts ): sentry_init( integrations=[AnthropicIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("transaction", "span") client = AsyncAnthropic(api_key="z") client.messages._post = AsyncMock(return_value=EXAMPLE_MESSAGE) @@ -193,36 +194,37 @@ async def test_nonstreaming_create_message_async( assert usage.input_tokens == 10 assert usage.output_tokens == 20 - assert len(events) == 1 - (event,) = events - - assert event["type"] == "transaction" + (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "anthropic" - assert len(event["spans"]) == 1 - (span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + (span,) = spans - assert span["op"] == OP.GEN_AI_CHAT - assert span["description"] == "chat model" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" if send_default_pii and include_prompts: assert ( - span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '[{"role": "user", "content": "Hello, Claude"}]' ) - assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude." + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude." else: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False - assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL" + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False + assert ( + span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] + == "msg_01XFDUDYJgAACzvnptvVoYEL" + ) @pytest.mark.parametrize( @@ -236,7 +238,7 @@ async def test_nonstreaming_create_message_async( ) def test_streaming_create_message( sentry_init, - capture_events, + capture_items, send_default_pii, include_prompts, get_model_response, @@ -286,7 +288,7 @@ def test_streaming_create_message( traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("transaction", "span") messages = [ { @@ -308,42 +310,45 @@ def test_streaming_create_message( for _ in message: pass - assert len(events) == 1 - (event,) = events - - assert event["type"] == "transaction" + (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "anthropic" - span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT) + spans = [item.payload for item in items if item.type == "span"] + span = next( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) - assert span["op"] == OP.GEN_AI_CHAT - assert span["description"] == "chat model" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" if send_default_pii and include_prompts: assert ( - span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '[{"role": "user", "content": "Hello, Claude"}]' ) - assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" else: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True - assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL" - assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["max_tokens"] + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert ( + span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] + == "msg_01XFDUDYJgAACzvnptvVoYEL" + ) + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["max_tokens"] def test_streaming_create_message_close( sentry_init, - capture_events, + capture_items, get_model_response, server_side_event_chunks, ): @@ -391,7 +396,7 @@ def test_streaming_create_message_close( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") messages = [ { @@ -415,31 +420,34 @@ def test_streaming_create_message_close( messages.close() - assert len(events) == 1 - (event,) = events - - assert event["type"] == "transaction" + (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "anthropic" - span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT) + spans = [item.payload for item in items if item.type == "span"] + span = next( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) - assert span["op"] == OP.GEN_AI_CHAT - assert span["description"] == "chat model" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" assert ( - span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '[{"role": "user", "content": "Hello, Claude"}]' ) - assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!" - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True - assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL" + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert ( + span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] + == "msg_01XFDUDYJgAACzvnptvVoYEL" + ) @pytest.mark.skipif( @@ -448,7 +456,7 @@ def test_streaming_create_message_close( ) def test_streaming_create_message_api_error( sentry_init, - capture_events, + capture_items, get_model_response, server_side_event_chunks, ): @@ -491,7 +499,7 @@ def test_streaming_create_message_api_error( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") messages = [ { @@ -513,34 +521,36 @@ def test_streaming_create_message_api_error( for _ in message: pass - assert len(events) == 1 - (event,) = events - - assert event["type"] == "transaction" + (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "anthropic" - span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT) + spans = [item.payload for item in items if item.type == "span"] + span = next( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) - assert span["op"] == OP.GEN_AI_CHAT - assert span["description"] == "chat model" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" assert ( - span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '[{"role": "user", "content": "Hello, Claude"}]' ) - assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!" - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True - assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL" + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert ( + span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] + == "msg_01XFDUDYJgAACzvnptvVoYEL" + ) - assert span["status"] == "internal_error" - assert span["tags"]["status"] == "internal_error" + assert span["status"] == "error" assert event["contexts"]["trace"]["status"] == "internal_error" @@ -555,7 +565,7 @@ def test_streaming_create_message_api_error( ) def test_stream_messages( sentry_init, - capture_events, + capture_items, send_default_pii, include_prompts, get_model_response, @@ -605,7 +615,7 @@ def test_stream_messages( traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("transaction", "span") messages = [ { @@ -628,42 +638,45 @@ def test_stream_messages( for event in stream: pass - assert len(events) == 1 - (event,) = events - - assert event["type"] == "transaction" + (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "anthropic" - span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT) + spans = [item.payload for item in items if item.type == "span"] + span = next( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) - assert span["op"] == OP.GEN_AI_CHAT - assert span["description"] == "chat model" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" if send_default_pii and include_prompts: assert ( - span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '[{"role": "user", "content": "Hello, Claude"}]' ) - assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" else: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True - assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL" - assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["max_tokens"] + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert ( + span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] + == "msg_01XFDUDYJgAACzvnptvVoYEL" + ) + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["max_tokens"] def test_stream_messages_close( sentry_init, - capture_events, + capture_items, get_model_response, server_side_event_chunks, ): @@ -711,7 +724,7 @@ def test_stream_messages_close( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") messages = [ { @@ -740,31 +753,34 @@ def test_stream_messages_close( stream.close() - assert len(events) == 1 - (event,) = events - - assert event["type"] == "transaction" + (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "anthropic" - span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT) + spans = [item.payload for item in items if item.type == "span"] + span = next( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) - assert span["op"] == OP.GEN_AI_CHAT - assert span["description"] == "chat model" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" assert ( - span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '[{"role": "user", "content": "Hello, Claude"}]' ) - assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!" - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True - assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL" + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert ( + span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] + == "msg_01XFDUDYJgAACzvnptvVoYEL" + ) @pytest.mark.skipif( @@ -773,7 +789,7 @@ def test_stream_messages_close( ) def test_stream_messages_api_error( sentry_init, - capture_events, + capture_items, get_model_response, server_side_event_chunks, ): @@ -816,7 +832,7 @@ def test_stream_messages_api_error( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") messages = [ { @@ -839,34 +855,36 @@ def test_stream_messages_api_error( for event in stream: pass - assert len(events) == 1 - (event,) = events - - assert event["type"] == "transaction" + (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "anthropic" - span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT) + spans = [item.payload for item in items if item.type == "span"] + span = next( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) - assert span["op"] == OP.GEN_AI_CHAT - assert span["description"] == "chat model" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" assert ( - span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '[{"role": "user", "content": "Hello, Claude"}]' ) - assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!" - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True - assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL" + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert ( + span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] + == "msg_01XFDUDYJgAACzvnptvVoYEL" + ) - assert span["status"] == "internal_error" - assert span["tags"]["status"] == "internal_error" + assert span["status"] == "error" assert event["contexts"]["trace"]["status"] == "internal_error" @@ -882,7 +900,7 @@ def test_stream_messages_api_error( ) async def test_streaming_create_message_async( sentry_init, - capture_events, + capture_items, send_default_pii, include_prompts, get_model_response, @@ -936,7 +954,7 @@ async def test_streaming_create_message_async( default_integrations=False, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("transaction", "span") messages = [ { @@ -958,44 +976,45 @@ async def test_streaming_create_message_async( async for _ in message: pass - assert len(events) == 1 - (event,) = events - - assert event["type"] == "transaction" + (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "anthropic" - assert len(event["spans"]) == 1 - (span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + (span,) = spans - assert span["op"] == OP.GEN_AI_CHAT - assert span["description"] == "chat model" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" if send_default_pii and include_prompts: assert ( - span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '[{"role": "user", "content": "Hello, Claude"}]' ) - assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" else: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True - assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL" - assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["max_tokens"] + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert ( + span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] + == "msg_01XFDUDYJgAACzvnptvVoYEL" + ) + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["max_tokens"] @pytest.mark.asyncio async def test_streaming_create_message_async_close( sentry_init, - capture_events, + capture_items, get_model_response, async_iterator, server_side_event_chunks, @@ -1046,7 +1065,7 @@ async def test_streaming_create_message_async_close( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") messages = [ { @@ -1069,31 +1088,34 @@ async def test_streaming_create_message_async_close( await messages.__anext__() await messages.close() - assert len(events) == 1 - (event,) = events - - assert event["type"] == "transaction" + (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "anthropic" - span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT) + spans = [item.payload for item in items if item.type == "span"] + span = next( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) - assert span["op"] == OP.GEN_AI_CHAT - assert span["description"] == "chat model" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" assert ( - span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '[{"role": "user", "content": "Hello, Claude"}]' ) - assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!" - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True - assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL" + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert ( + span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] + == "msg_01XFDUDYJgAACzvnptvVoYEL" + ) @pytest.mark.skipif( @@ -1103,7 +1125,7 @@ async def test_streaming_create_message_async_close( @pytest.mark.asyncio async def test_streaming_create_message_async_api_error( sentry_init, - capture_events, + capture_items, get_model_response, async_iterator, server_side_event_chunks, @@ -1149,7 +1171,7 @@ async def test_streaming_create_message_async_api_error( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") messages = [ { @@ -1171,34 +1193,36 @@ async def test_streaming_create_message_async_api_error( async for _ in message: pass - assert len(events) == 1 - (event,) = events - - assert event["type"] == "transaction" + (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "anthropic" - span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT) + spans = [item.payload for item in items if item.type == "span"] + span = next( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) - assert span["op"] == OP.GEN_AI_CHAT - assert span["description"] == "chat model" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" assert ( - span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '[{"role": "user", "content": "Hello, Claude"}]' ) - assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!" - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True - assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL" + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert ( + span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] + == "msg_01XFDUDYJgAACzvnptvVoYEL" + ) - assert span["status"] == "internal_error" - assert span["tags"]["status"] == "internal_error" + assert span["status"] == "error" assert event["contexts"]["trace"]["status"] == "internal_error" @@ -1214,7 +1238,7 @@ async def test_streaming_create_message_async_api_error( ) async def test_stream_message_async( sentry_init, - capture_events, + capture_items, send_default_pii, include_prompts, get_model_response, @@ -1267,7 +1291,7 @@ async def test_stream_message_async( traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("transaction", "span") messages = [ { @@ -1290,37 +1314,38 @@ async def test_stream_message_async( async for event in stream: pass - assert len(events) == 1 - (event,) = events - - assert event["type"] == "transaction" + (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "anthropic" - assert len(event["spans"]) == 1 - (span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + (span,) = spans - assert span["op"] == OP.GEN_AI_CHAT - assert span["description"] == "chat model" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" if send_default_pii and include_prompts: assert ( - span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '[{"role": "user", "content": "Hello, Claude"}]' ) - assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" else: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True - assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL" + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert ( + span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] + == "msg_01XFDUDYJgAACzvnptvVoYEL" + ) @pytest.mark.skipif( @@ -1330,7 +1355,7 @@ async def test_stream_message_async( @pytest.mark.asyncio async def test_stream_messages_async_api_error( sentry_init, - capture_events, + capture_items, get_model_response, async_iterator, server_side_event_chunks, @@ -1376,7 +1401,7 @@ async def test_stream_messages_async_api_error( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") messages = [ { @@ -1399,41 +1424,43 @@ async def test_stream_messages_async_api_error( async for event in stream: pass - assert len(events) == 1 - (event,) = events - - assert event["type"] == "transaction" + (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "anthropic" - span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT) + spans = [item.payload for item in items if item.type == "span"] + span = next( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) - assert span["op"] == OP.GEN_AI_CHAT - assert span["description"] == "chat model" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" assert ( - span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '[{"role": "user", "content": "Hello, Claude"}]' ) - assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!" - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True - assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL" + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert ( + span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] + == "msg_01XFDUDYJgAACzvnptvVoYEL" + ) - assert span["status"] == "internal_error" - assert span["tags"]["status"] == "internal_error" + assert span["status"] == "error" assert event["contexts"]["trace"]["status"] == "internal_error" @pytest.mark.asyncio async def test_stream_messages_async_close( sentry_init, - capture_events, + capture_items, get_model_response, async_iterator, server_side_event_chunks, @@ -1484,7 +1511,7 @@ async def test_stream_messages_async_close( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") messages = [ { @@ -1515,31 +1542,34 @@ async def test_stream_messages_async_close( await stream.close() - assert len(events) == 1 - (event,) = events - - assert event["type"] == "transaction" + (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "anthropic" - span = next(span for span in event["spans"] if span["op"] == OP.GEN_AI_CHAT) + spans = [item.payload for item in items if item.type == "span"] + span = next( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) - assert span["op"] == OP.GEN_AI_CHAT - assert span["description"] == "chat model" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" assert ( - span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '[{"role": "user", "content": "Hello, Claude"}]' ) - assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi!" - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True - assert span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "msg_01XFDUDYJgAACzvnptvVoYEL" + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert ( + span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] + == "msg_01XFDUDYJgAACzvnptvVoYEL" + ) @pytest.mark.skipif( @@ -1557,7 +1587,7 @@ async def test_stream_messages_async_close( ) def test_streaming_create_message_with_input_json_delta( sentry_init, - capture_events, + capture_items, send_default_pii, include_prompts, get_model_response, @@ -1637,7 +1667,7 @@ def test_streaming_create_message_with_input_json_delta( traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("transaction", "span") messages = [ { @@ -1659,38 +1689,36 @@ def test_streaming_create_message_with_input_json_delta( for _ in message: pass - assert len(events) == 1 - (event,) = events - - assert event["type"] == "transaction" + (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "anthropic" - assert len(event["spans"]) == 1 - (span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + (span,) = spans - assert span["op"] == OP.GEN_AI_CHAT - assert span["description"] == "chat model" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" if send_default_pii and include_prompts: assert ( - span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '[{"role": "user", "content": "What is the weather like in San Francisco?"}]' ) assert ( - span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == '{"location": "San Francisco, CA"}' ) else: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 366 - assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407 - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 366 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True @pytest.mark.skipif( @@ -1708,7 +1736,7 @@ def test_streaming_create_message_with_input_json_delta( ) def test_stream_messages_with_input_json_delta( sentry_init, - capture_events, + capture_items, send_default_pii, include_prompts, get_model_response, @@ -1788,7 +1816,7 @@ def test_stream_messages_with_input_json_delta( traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("transaction", "span") messages = [ { @@ -1811,38 +1839,36 @@ def test_stream_messages_with_input_json_delta( for event in stream: pass - assert len(events) == 1 - (event,) = events - - assert event["type"] == "transaction" + (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "anthropic" - assert len(event["spans"]) == 1 - (span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + (span,) = spans - assert span["op"] == OP.GEN_AI_CHAT - assert span["description"] == "chat model" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" if send_default_pii and include_prompts: assert ( - span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '[{"role": "user", "content": "What is the weather like in San Francisco?"}]' ) assert ( - span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == '{"location": "San Francisco, CA"}' ) else: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 366 - assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407 - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 366 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True @pytest.mark.asyncio @@ -1861,7 +1887,7 @@ def test_stream_messages_with_input_json_delta( ) async def test_streaming_create_message_with_input_json_delta_async( sentry_init, - capture_events, + capture_items, send_default_pii, include_prompts, get_model_response, @@ -1947,7 +1973,7 @@ async def test_streaming_create_message_with_input_json_delta_async( traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("transaction", "span") messages = [ { @@ -1969,39 +1995,37 @@ async def test_streaming_create_message_with_input_json_delta_async( async for _ in message: pass - assert len(events) == 1 - (event,) = events - - assert event["type"] == "transaction" + (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "anthropic" - assert len(event["spans"]) == 1 - (span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + (span,) = spans - assert span["op"] == OP.GEN_AI_CHAT - assert span["description"] == "chat model" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" if send_default_pii and include_prompts: assert ( - span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '[{"role": "user", "content": "What is the weather like in San Francisco?"}]' ) assert ( - span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == '{"location": "San Francisco, CA"}' ) else: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 366 - assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407 - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 366 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True @pytest.mark.asyncio @@ -2020,7 +2044,7 @@ async def test_streaming_create_message_with_input_json_delta_async( ) async def test_stream_message_with_input_json_delta_async( sentry_init, - capture_events, + capture_items, send_default_pii, include_prompts, get_model_response, @@ -2106,7 +2130,7 @@ async def test_stream_message_with_input_json_delta_async( traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("transaction", "span") messages = [ { @@ -2129,44 +2153,42 @@ async def test_stream_message_with_input_json_delta_async( async for event in stream: pass - assert len(events) == 1 - (event,) = events - - assert event["type"] == "transaction" + (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "anthropic" - assert len(event["spans"]) == 1 - (span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + (span,) = spans - assert span["op"] == OP.GEN_AI_CHAT - assert span["description"] == "chat model" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" if send_default_pii and include_prompts: assert ( - span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '[{"role": "user", "content": "What is the weather like in San Francisco?"}]' ) assert ( - span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == '{"location": "San Francisco, CA"}' ) else: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 366 - assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407 - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 366 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True -def test_exception_message_create(sentry_init, capture_events): +def test_exception_message_create(sentry_init, capture_items): sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) - events = capture_events() + items = capture_items("event", "transaction") client = Anthropic(api_key="z") client.messages._post = mock.Mock( @@ -2179,14 +2201,16 @@ def test_exception_message_create(sentry_init, capture_events): max_tokens=1024, ) - (event, transaction) = events + (event,) = (item.payload for item in items if item.type == "event") assert event["level"] == "error" + + (transaction,) = (item.payload for item in items if item.type == "transaction") assert transaction["contexts"]["trace"]["status"] == "internal_error" -def test_span_status_error(sentry_init, capture_events): +def test_span_status_error(sentry_init, capture_items): sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) - events = capture_events() + items = capture_items("event", "span") with start_transaction(name="anthropic"): client = Anthropic(api_key="z") @@ -2200,18 +2224,19 @@ def test_span_status_error(sentry_init, capture_events): max_tokens=1024, ) - (error, transaction) = events + (error,) = (item.payload for item in items if item.type == "event") assert error["level"] == "error" - assert transaction["spans"][0]["status"] == "internal_error" - assert transaction["spans"][0]["tags"]["status"] == "internal_error" - assert transaction["spans"][0]["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert transaction["spans"][0]["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + + spans = [item.payload for item in items if item.type == "span"] + assert spans[0]["status"] == "error" + assert spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert spans[0]["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" @pytest.mark.asyncio -async def test_span_status_error_async(sentry_init, capture_events): +async def test_span_status_error_async(sentry_init, capture_items): sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) - events = capture_events() + items = capture_items("event", "span") with start_transaction(name="anthropic"): client = AsyncAnthropic(api_key="z") @@ -2225,18 +2250,19 @@ async def test_span_status_error_async(sentry_init, capture_events): max_tokens=1024, ) - (error, transaction) = events + (error,) = (item.payload for item in items if item.type == "event") assert error["level"] == "error" - assert transaction["spans"][0]["status"] == "internal_error" - assert transaction["spans"][0]["tags"]["status"] == "internal_error" - assert transaction["spans"][0]["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert transaction["spans"][0]["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + + spans = [item.payload for item in items if item.type == "span"] + assert spans[0]["status"] == "error" + assert spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert spans[0]["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" @pytest.mark.asyncio -async def test_exception_message_create_async(sentry_init, capture_events): +async def test_exception_message_create_async(sentry_init, capture_items): sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) - events = capture_events() + items = capture_items("event", "transaction") client = AsyncAnthropic(api_key="z") client.messages._post = AsyncMock( @@ -2249,17 +2275,19 @@ async def test_exception_message_create_async(sentry_init, capture_events): max_tokens=1024, ) - (event, transaction) = events + (event,) = (item.payload for item in items if item.type == "event") assert event["level"] == "error" + + (transaction,) = (item.payload for item in items if item.type == "transaction") assert transaction["contexts"]["trace"]["status"] == "internal_error" -def test_span_origin(sentry_init, capture_events): +def test_span_origin(sentry_init, capture_items): sentry_init( integrations=[AnthropicIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("transaction", "span") client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -2274,21 +2302,22 @@ def test_span_origin(sentry_init, capture_events): with start_transaction(name="anthropic"): client.messages.create(max_tokens=1024, messages=messages, model="model") - (event,) = events - + (event,) = (item.payload for item in items if item.type == "transaction") assert event["contexts"]["trace"]["origin"] == "manual" - assert event["spans"][0]["origin"] == "auto.ai.anthropic" - assert event["spans"][0]["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert event["spans"][0]["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + + spans = [item.payload for item in items if item.type == "span"] + assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.anthropic" + assert spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert spans[0]["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" @pytest.mark.asyncio -async def test_span_origin_async(sentry_init, capture_events): +async def test_span_origin_async(sentry_init, capture_items): sentry_init( integrations=[AnthropicIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("transaction", "span") client = AsyncAnthropic(api_key="z") client.messages._post = AsyncMock(return_value=EXAMPLE_MESSAGE) @@ -2303,12 +2332,13 @@ async def test_span_origin_async(sentry_init, capture_events): with start_transaction(name="anthropic"): await client.messages.create(max_tokens=1024, messages=messages, model="model") - (event,) = events - + (event,) = (item.payload for item in items if item.type == "transaction") assert event["contexts"]["trace"]["origin"] == "manual" - assert event["spans"][0]["origin"] == "auto.ai.anthropic" - assert event["spans"][0]["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert event["spans"][0]["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + + spans = [item.payload for item in items if item.type == "span"] + assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.anthropic" + assert spans[0]["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert spans[0]["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" @pytest.mark.skipif( @@ -2392,7 +2422,7 @@ def test_set_output_data_with_input_json_delta(sentry_init): ], ) def test_anthropic_message_role_mapping( - sentry_init, capture_events, test_message, expected_role + sentry_init, capture_items, test_message, expected_role ): """Test that Anthropic integration properly maps message roles like 'ai' to 'assistant'""" sentry_init( @@ -2400,7 +2430,7 @@ def test_anthropic_message_role_mapping( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") client = Anthropic(api_key="z") @@ -2425,29 +2455,28 @@ def mock_messages_create(*args, **kwargs): model="claude-3-opus", max_tokens=10, messages=test_messages ) - (event,) = events - span = event["spans"][0] + span = next(item.payload for item in items if item.type == "span") # Verify that the span was created correctly - assert span["op"] == "gen_ai.chat" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] # Parse the stored messages - stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert stored_messages[0]["role"] == expected_role -def test_anthropic_message_truncation(sentry_init, capture_events): +def test_anthropic_message_truncation(sentry_init, capture_items): """Test that large messages are truncated properly in Anthropic integration.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -2466,21 +2495,18 @@ def test_anthropic_message_truncation(sentry_init, capture_events): with start_transaction(): client.messages.create(max_tokens=1024, messages=messages, model="model") - assert len(events) > 0 - tx = events[0] - assert tx["type"] == "transaction" - + spans = [item.payload for item in items if item.type == "span"] chat_spans = [ - span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT + span for span in spans if span["attributes"].get("sentry.op") == OP.GEN_AI_CHAT ] assert len(chat_spans) > 0 chat_span = chat_spans[0] - assert chat_span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert chat_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"] + assert chat_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert chat_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["attributes"] - messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + messages_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] assert isinstance(messages_data, str) parsed_messages = json.loads(messages_data) @@ -2488,18 +2514,19 @@ def test_anthropic_message_truncation(sentry_init, capture_events): assert len(parsed_messages) == 1 assert "small message 5" in str(parsed_messages[0]) + tx = next(item.payload for item in items if item.type == "transaction") assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5 @pytest.mark.asyncio -async def test_anthropic_message_truncation_async(sentry_init, capture_events): +async def test_anthropic_message_truncation_async(sentry_init, capture_items): """Test that large messages are truncated properly in Anthropic integration.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") client = AsyncAnthropic(api_key="z") client.messages._post = mock.AsyncMock(return_value=EXAMPLE_MESSAGE) @@ -2518,21 +2545,18 @@ async def test_anthropic_message_truncation_async(sentry_init, capture_events): with start_transaction(): await client.messages.create(max_tokens=1024, messages=messages, model="model") - assert len(events) > 0 - tx = events[0] - assert tx["type"] == "transaction" - + spans = [item.payload for item in items if item.type == "span"] chat_spans = [ - span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT + span for span in spans if span["attributes"].get("sentry.op") == OP.GEN_AI_CHAT ] assert len(chat_spans) > 0 chat_span = chat_spans[0] - assert chat_span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert chat_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"] + assert chat_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert chat_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["attributes"] - messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + messages_data = chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] assert isinstance(messages_data, str) parsed_messages = json.loads(messages_data) @@ -2540,6 +2564,7 @@ async def test_anthropic_message_truncation_async(sentry_init, capture_events): assert len(parsed_messages) == 1 assert "small message 5" in str(parsed_messages[0]) + tx = next(item.payload for item in items if item.type == "transaction") assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5 @@ -2553,7 +2578,7 @@ async def test_anthropic_message_truncation_async(sentry_init, capture_events): ], ) def test_nonstreaming_create_message_with_system_prompt( - sentry_init, capture_events, send_default_pii, include_prompts + sentry_init, capture_items, send_default_pii, include_prompts ): """Test that system prompts are properly captured in GEN_AI_REQUEST_MESSAGES.""" sentry_init( @@ -2561,7 +2586,7 @@ def test_nonstreaming_create_message_with_system_prompt( traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("transaction", "span") client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -2586,46 +2611,46 @@ def test_nonstreaming_create_message_with_system_prompt( assert usage.input_tokens == 10 assert usage.output_tokens == 20 - assert len(events) == 1 - (event,) = events - - assert event["type"] == "transaction" + (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "anthropic" - assert len(event["spans"]) == 1 - (span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + (span,) = spans - assert span["op"] == OP.GEN_AI_CHAT - assert span["description"] == "chat model" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" if send_default_pii and include_prompts: - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["data"] + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["attributes"] system_instructions = json.loads( - span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] ) assert system_instructions == [ {"type": "text", "content": "You are a helpful assistant."} ] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] - stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] + stored_messages = json.loads( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) assert len(stored_messages) == 1 assert stored_messages[0]["role"] == "user" assert stored_messages[0]["content"] == "Hello, Claude" - assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude." + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude." else: - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False - assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["end_turn"] + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["end_turn"] @pytest.mark.asyncio @@ -2639,7 +2664,7 @@ def test_nonstreaming_create_message_with_system_prompt( ], ) async def test_nonstreaming_create_message_with_system_prompt_async( - sentry_init, capture_events, send_default_pii, include_prompts + sentry_init, capture_items, send_default_pii, include_prompts ): """Test that system prompts are properly captured in GEN_AI_REQUEST_MESSAGES (async).""" sentry_init( @@ -2647,7 +2672,7 @@ async def test_nonstreaming_create_message_with_system_prompt_async( traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("transaction", "span") client = AsyncAnthropic(api_key="z") client.messages._post = AsyncMock(return_value=EXAMPLE_MESSAGE) @@ -2672,46 +2697,46 @@ async def test_nonstreaming_create_message_with_system_prompt_async( assert usage.input_tokens == 10 assert usage.output_tokens == 20 - assert len(events) == 1 - (event,) = events - - assert event["type"] == "transaction" + (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "anthropic" - assert len(event["spans"]) == 1 - (span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + (span,) = spans - assert span["op"] == OP.GEN_AI_CHAT - assert span["description"] == "chat model" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" if send_default_pii and include_prompts: - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["data"] + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["attributes"] system_instructions = json.loads( - span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] ) assert system_instructions == [ {"type": "text", "content": "You are a helpful assistant."} ] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] - stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] + stored_messages = json.loads( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) assert len(stored_messages) == 1 assert stored_messages[0]["role"] == "user" assert stored_messages[0]["content"] == "Hello, Claude" - assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude." + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude." else: - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False - assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["end_turn"] + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["end_turn"] @pytest.mark.parametrize( @@ -2725,7 +2750,7 @@ async def test_nonstreaming_create_message_with_system_prompt_async( ) def test_streaming_create_message_with_system_prompt( sentry_init, - capture_events, + capture_items, send_default_pii, include_prompts, get_model_response, @@ -2776,7 +2801,7 @@ def test_streaming_create_message_with_system_prompt( traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("transaction", "span") messages = [ { @@ -2802,46 +2827,46 @@ def test_streaming_create_message_with_system_prompt( for _ in message: pass - assert len(events) == 1 - (event,) = events - - assert event["type"] == "transaction" + (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "anthropic" - assert len(event["spans"]) == 1 - (span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + (span,) = spans - assert span["op"] == OP.GEN_AI_CHAT - assert span["description"] == "chat model" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" if send_default_pii and include_prompts: - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["data"] + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["attributes"] system_instructions = json.loads( - span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] ) assert system_instructions == [ {"type": "text", "content": "You are a helpful assistant."} ] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] - stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] + stored_messages = json.loads( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) assert len(stored_messages) == 1 assert stored_messages[0]["role"] == "user" assert stored_messages[0]["content"] == "Hello, Claude" - assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" else: - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True @pytest.mark.parametrize( @@ -2855,7 +2880,7 @@ def test_streaming_create_message_with_system_prompt( ) def test_stream_messages_with_system_prompt( sentry_init, - capture_events, + capture_items, send_default_pii, include_prompts, get_model_response, @@ -2906,7 +2931,7 @@ def test_stream_messages_with_system_prompt( traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("transaction", "span") messages = [ { @@ -2930,46 +2955,46 @@ def test_stream_messages_with_system_prompt( for event in stream: pass - assert len(events) == 1 - (event,) = events - - assert event["type"] == "transaction" + (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "anthropic" - assert len(event["spans"]) == 1 - (span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + (span,) = spans - assert span["op"] == OP.GEN_AI_CHAT - assert span["description"] == "chat model" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" if send_default_pii and include_prompts: - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["data"] + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["attributes"] system_instructions = json.loads( - span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] ) assert system_instructions == [ {"type": "text", "content": "You are a helpful assistant."} ] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] - stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] + stored_messages = json.loads( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) assert len(stored_messages) == 1 assert stored_messages[0]["role"] == "user" assert stored_messages[0]["content"] == "Hello, Claude" - assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" else: - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True @pytest.mark.asyncio @@ -2984,7 +3009,7 @@ def test_stream_messages_with_system_prompt( ) async def test_stream_message_with_system_prompt_async( sentry_init, - capture_events, + capture_items, send_default_pii, include_prompts, get_model_response, @@ -3038,7 +3063,7 @@ async def test_stream_message_with_system_prompt_async( traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("transaction", "span") messages = [ { @@ -3062,46 +3087,46 @@ async def test_stream_message_with_system_prompt_async( async for event in stream: pass - assert len(events) == 1 - (event,) = events - - assert event["type"] == "transaction" + (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "anthropic" - assert len(event["spans"]) == 1 - (span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + (span,) = spans - assert span["op"] == OP.GEN_AI_CHAT - assert span["description"] == "chat model" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" if send_default_pii and include_prompts: - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["data"] + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["attributes"] system_instructions = json.loads( - span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] ) assert system_instructions == [ {"type": "text", "content": "You are a helpful assistant."} ] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] - stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] + stored_messages = json.loads( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) assert len(stored_messages) == 1 assert stored_messages[0]["role"] == "user" assert stored_messages[0]["content"] == "Hello, Claude" - assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" else: - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True @pytest.mark.asyncio @@ -3116,7 +3141,7 @@ async def test_stream_message_with_system_prompt_async( ) async def test_streaming_create_message_with_system_prompt_async( sentry_init, - capture_events, + capture_items, send_default_pii, include_prompts, get_model_response, @@ -3170,7 +3195,7 @@ async def test_streaming_create_message_with_system_prompt_async( traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("transaction", "span") messages = [ { @@ -3196,56 +3221,56 @@ async def test_streaming_create_message_with_system_prompt_async( async for _ in message: pass - assert len(events) == 1 - (event,) = events - - assert event["type"] == "transaction" + (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "anthropic" - assert len(event["spans"]) == 1 - (span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + (span,) = spans - assert span["op"] == OP.GEN_AI_CHAT - assert span["description"] == "chat model" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" + assert span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert span["name"] == "chat model" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" if send_default_pii and include_prompts: - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["data"] + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["attributes"] system_instructions = json.loads( - span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] ) assert system_instructions == [ {"type": "text", "content": "You are a helpful assistant."} ] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] - stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] + stored_messages = json.loads( + span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + ) assert len(stored_messages) == 1 assert stored_messages[0]["role"] == "user" assert stored_messages[0]["content"] == "Hello, Claude" - assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" else: - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True -def test_system_prompt_with_complex_structure(sentry_init, capture_events): +def test_system_prompt_with_complex_structure(sentry_init, capture_items): """Test that complex system prompt structures (list of text blocks) are properly captured.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -3268,17 +3293,18 @@ def test_system_prompt_with_complex_structure(sentry_init, capture_events): ) assert response == EXAMPLE_MESSAGE - assert len(events) == 1 - (event,) = events - assert len(event["spans"]) == 1 - (span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + (span,) = spans - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" - assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "anthropic" + assert span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["data"] - system_instructions = json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["attributes"] + system_instructions = json.loads( + span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + ) # System content should be a list of text blocks assert isinstance(system_instructions, list) @@ -3287,8 +3313,8 @@ def test_system_prompt_with_complex_structure(sentry_init, capture_events): {"type": "text", "content": "Be concise and clear."}, ] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] - stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] + stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert len(stored_messages) == 1 assert stored_messages[0]["role"] == "user" @@ -3490,14 +3516,14 @@ def test_transform_message_content_list_anthropic(): # Integration tests for binary data in messages -def test_message_with_base64_image(sentry_init, capture_events): +def test_message_with_base64_image(sentry_init, capture_items): """Test that messages with base64 images are properly captured.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -3521,12 +3547,11 @@ def test_message_with_base64_image(sentry_init, capture_events): with start_transaction(name="anthropic"): client.messages.create(max_tokens=1024, messages=messages, model="model") - assert len(events) == 1 - (event,) = events - (span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + (span,) = spans - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] - stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] + stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert len(stored_messages) == 1 assert stored_messages[0]["role"] == "user" @@ -3541,14 +3566,14 @@ def test_message_with_base64_image(sentry_init, capture_events): } -def test_message_with_url_image(sentry_init, capture_events): +def test_message_with_url_image(sentry_init, capture_items): """Test that messages with URL-referenced images are properly captured.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -3571,11 +3596,10 @@ def test_message_with_url_image(sentry_init, capture_events): with start_transaction(name="anthropic"): client.messages.create(max_tokens=1024, messages=messages, model="model") - assert len(events) == 1 - (event,) = events - (span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + (span,) = spans - stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) content = stored_messages[0]["content"] assert content[1] == { "type": "uri", @@ -3585,14 +3609,14 @@ def test_message_with_url_image(sentry_init, capture_events): } -def test_message_with_file_image(sentry_init, capture_events): +def test_message_with_file_image(sentry_init, capture_items): """Test that messages with file_id-referenced images are properly captured.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -3616,11 +3640,10 @@ def test_message_with_file_image(sentry_init, capture_events): with start_transaction(name="anthropic"): client.messages.create(max_tokens=1024, messages=messages, model="model") - assert len(events) == 1 - (event,) = events - (span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + (span,) = spans - stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) content = stored_messages[0]["content"] assert content[1] == { "type": "file", @@ -3630,14 +3653,14 @@ def test_message_with_file_image(sentry_init, capture_events): } -def test_message_with_base64_pdf(sentry_init, capture_events): +def test_message_with_base64_pdf(sentry_init, capture_items): """Test that messages with base64-encoded PDF documents are properly captured.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -3651,7 +3674,7 @@ def test_message_with_base64_pdf(sentry_init, capture_events): "source": { "type": "base64", "media_type": "application/pdf", - "data": "JVBERi0xLjQKJeLj...base64pdfdata", + "attributes": "JVBERi0xLjQKJeLj...base64pdfdata", }, }, ], @@ -3661,11 +3684,10 @@ def test_message_with_base64_pdf(sentry_init, capture_events): with start_transaction(name="anthropic"): client.messages.create(max_tokens=1024, messages=messages, model="model") - assert len(events) == 1 - (event,) = events - (span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + (span,) = spans - stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) content = stored_messages[0]["content"] assert content[1] == { "type": "blob", @@ -3675,14 +3697,14 @@ def test_message_with_base64_pdf(sentry_init, capture_events): } -def test_message_with_url_pdf(sentry_init, capture_events): +def test_message_with_url_pdf(sentry_init, capture_items): """Test that messages with URL-referenced PDF documents are properly captured.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -3705,11 +3727,10 @@ def test_message_with_url_pdf(sentry_init, capture_events): with start_transaction(name="anthropic"): client.messages.create(max_tokens=1024, messages=messages, model="model") - assert len(events) == 1 - (event,) = events - (span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + (span,) = spans - stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) content = stored_messages[0]["content"] assert content[1] == { "type": "uri", @@ -3719,14 +3740,14 @@ def test_message_with_url_pdf(sentry_init, capture_events): } -def test_message_with_file_document(sentry_init, capture_events): +def test_message_with_file_document(sentry_init, capture_items): """Test that messages with file_id-referenced documents are properly captured.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -3750,11 +3771,10 @@ def test_message_with_file_document(sentry_init, capture_events): with start_transaction(name="anthropic"): client.messages.create(max_tokens=1024, messages=messages, model="model") - assert len(events) == 1 - (event,) = events - (span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + (span,) = spans - stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) content = stored_messages[0]["content"] assert content[1] == { "type": "file", @@ -3764,14 +3784,14 @@ def test_message_with_file_document(sentry_init, capture_events): } -def test_message_with_mixed_content(sentry_init, capture_events): +def test_message_with_mixed_content(sentry_init, capture_items): """Test that messages with mixed content (text, images, documents) are properly captured.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -3785,7 +3805,7 @@ def test_message_with_mixed_content(sentry_init, capture_events): "source": { "type": "base64", "media_type": "image/png", - "data": "iVBORw0KGgo...base64imagedata", + "attributes": "iVBORw0KGgo...base64imagedata", }, }, { @@ -3800,7 +3820,7 @@ def test_message_with_mixed_content(sentry_init, capture_events): "source": { "type": "base64", "media_type": "application/pdf", - "data": "JVBERi0xLjQK...base64pdfdata", + "attributes": "JVBERi0xLjQK...base64pdfdata", }, }, {"type": "text", "text": "Please provide a detailed analysis."}, @@ -3811,11 +3831,10 @@ def test_message_with_mixed_content(sentry_init, capture_events): with start_transaction(name="anthropic"): client.messages.create(max_tokens=1024, messages=messages, model="model") - assert len(events) == 1 - (event,) = events - (span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + (span,) = spans - stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) content = stored_messages[0]["content"] assert len(content) == 5 @@ -3847,14 +3866,14 @@ def test_message_with_mixed_content(sentry_init, capture_events): } -def test_message_with_multiple_images_different_formats(sentry_init, capture_events): +def test_message_with_multiple_images_different_formats(sentry_init, capture_items): """Test that messages with multiple images of different source types are handled.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -3867,7 +3886,7 @@ def test_message_with_multiple_images_different_formats(sentry_init, capture_eve "source": { "type": "base64", "media_type": "image/jpeg", - "data": "base64data1...", + "attributes": "base64data1...", }, }, { @@ -3893,11 +3912,10 @@ def test_message_with_multiple_images_different_formats(sentry_init, capture_eve with start_transaction(name="anthropic"): client.messages.create(max_tokens=1024, messages=messages, model="model") - assert len(events) == 1 - (event,) = events - (span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + (span,) = spans - stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) content = stored_messages[0]["content"] assert len(content) == 4 @@ -3922,14 +3940,14 @@ def test_message_with_multiple_images_different_formats(sentry_init, capture_eve assert content[3] == {"type": "text", "text": "Compare these three images."} -def test_binary_content_not_stored_when_pii_disabled(sentry_init, capture_events): +def test_binary_content_not_stored_when_pii_disabled(sentry_init, capture_items): """Test that binary content is not stored when send_default_pii is False.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=False, ) - events = capture_events() + items = capture_items("transaction", "span") client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -3943,7 +3961,7 @@ def test_binary_content_not_stored_when_pii_disabled(sentry_init, capture_events "source": { "type": "base64", "media_type": "image/jpeg", - "data": "base64encodeddatahere...", + "attributes": "base64encodeddatahere...", }, }, ], @@ -3953,22 +3971,21 @@ def test_binary_content_not_stored_when_pii_disabled(sentry_init, capture_events with start_transaction(name="anthropic"): client.messages.create(max_tokens=1024, messages=messages, model="model") - assert len(events) == 1 - (event,) = events - (span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + (span,) = spans # Messages should not be stored - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] -def test_binary_content_not_stored_when_prompts_disabled(sentry_init, capture_events): +def test_binary_content_not_stored_when_prompts_disabled(sentry_init, capture_items): """Test that binary content is not stored when include_prompts is False.""" sentry_init( integrations=[AnthropicIntegration(include_prompts=False)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") client = Anthropic(api_key="z") client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) @@ -3982,7 +3999,7 @@ def test_binary_content_not_stored_when_prompts_disabled(sentry_init, capture_ev "source": { "type": "base64", "media_type": "image/jpeg", - "data": "base64encodeddatahere...", + "attributes": "base64encodeddatahere...", }, }, ], @@ -3992,18 +4009,17 @@ def test_binary_content_not_stored_when_prompts_disabled(sentry_init, capture_ev with start_transaction(name="anthropic"): client.messages.create(max_tokens=1024, messages=messages, model="model") - assert len(events) == 1 - (event,) = events - (span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + (span,) = spans # Messages should not be stored - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] -def test_cache_tokens_nonstreaming(sentry_init, capture_events): +def test_cache_tokens_nonstreaming(sentry_init, capture_items): """Test cache read/write tokens are tracked for non-streaming responses.""" sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) - events = capture_events() + items = capture_items("transaction", "span") client = Anthropic(api_key="z") client.messages._post = mock.Mock( @@ -4029,16 +4045,16 @@ def test_cache_tokens_nonstreaming(sentry_init, capture_events): model="claude-3-5-sonnet-20241022", ) - (span,) = events[0]["spans"] + (span,) = [item.payload for item in items if item.type == "span"] # input_tokens normalized: 100 + 80 (cache_read) + 20 (cache_write) = 200 - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200 - assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 50 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 250 - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80 - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 50 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 250 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20 -def test_input_tokens_include_cache_write_nonstreaming(sentry_init, capture_events): +def test_input_tokens_include_cache_write_nonstreaming(sentry_init, capture_items): """ Test that gen_ai.usage.input_tokens includes cache_write tokens (non-streaming). @@ -4051,7 +4067,7 @@ def test_input_tokens_include_cache_write_nonstreaming(sentry_init, capture_even cache_creation_input_tokens=2846, cache_read_input_tokens=0) """ sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) - events = capture_events() + items = capture_items("transaction", "span") client = Anthropic(api_key="z") client.messages._post = mock.Mock( @@ -4077,16 +4093,16 @@ def test_input_tokens_include_cache_write_nonstreaming(sentry_init, capture_even model="claude-sonnet-4-20250514", ) - (span,) = events[0]["spans"] + (span,) = [item.payload for item in items if item.type == "span"] # input_tokens should be total: 19 (non-cached) + 2846 (cache_write) = 2865 - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879 # 2865 + 14 - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 0 - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 2846 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879 # 2865 + 14 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 0 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 2846 -def test_input_tokens_include_cache_read_nonstreaming(sentry_init, capture_events): +def test_input_tokens_include_cache_read_nonstreaming(sentry_init, capture_items): """ Test that gen_ai.usage.input_tokens includes cache_read tokens (non-streaming). @@ -4099,7 +4115,7 @@ def test_input_tokens_include_cache_read_nonstreaming(sentry_init, capture_event cache_creation_input_tokens=0, cache_read_input_tokens=2846) """ sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) - events = capture_events() + items = capture_items("transaction", "span") client = Anthropic(api_key="z") client.messages._post = mock.Mock( @@ -4125,18 +4141,18 @@ def test_input_tokens_include_cache_read_nonstreaming(sentry_init, capture_event model="claude-sonnet-4-20250514", ) - (span,) = events[0]["spans"] + (span,) = [item.payload for item in items if item.type == "span"] # input_tokens should be total: 19 (non-cached) + 2846 (cache_read) = 2865 - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879 # 2865 + 14 - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 2846 - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 0 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879 # 2865 + 14 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 2846 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 0 def test_input_tokens_include_cache_read_streaming( sentry_init, - capture_events, + capture_items, get_model_response, server_side_event_chunks, ): @@ -4176,7 +4192,7 @@ def test_input_tokens_include_cache_read_streaming( ) sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) - events = capture_events() + items = capture_items("transaction", "span") with mock.patch.object( client._client, @@ -4192,18 +4208,18 @@ def test_input_tokens_include_cache_read_streaming( ): pass - (span,) = events[0]["spans"] + (span,) = [item.payload for item in items if item.type == "span"] # input_tokens should be total: 19 + 2846 = test_stream_messages_input_tokens_include_cache_read_streaming - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879 # 2865 + 14 - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 2846 - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 0 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879 # 2865 + 14 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 2846 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 0 def test_stream_messages_input_tokens_include_cache_read_streaming( sentry_init, - capture_events, + capture_items, get_model_response, server_side_event_chunks, ): @@ -4242,7 +4258,7 @@ def test_stream_messages_input_tokens_include_cache_read_streaming( ) sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) - events = capture_events() + items = capture_items("transaction", "span") with mock.patch.object( client._client, @@ -4258,16 +4274,16 @@ def test_stream_messages_input_tokens_include_cache_read_streaming( for event in stream: pass - (span,) = events[0]["spans"] + (span,) = [item.payload for item in items if item.type == "span"] # input_tokens should be total: 19 + 2846 = 2865 - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879 # 2865 + 14 - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 2846 - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 0 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879 # 2865 + 14 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 2846 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 0 -def test_input_tokens_unchanged_without_caching(sentry_init, capture_events): +def test_input_tokens_unchanged_without_caching(sentry_init, capture_items): """ Test that input_tokens is unchanged when there are no cached tokens. @@ -4275,7 +4291,7 @@ def test_input_tokens_unchanged_without_caching(sentry_init, capture_events): Usage(input_tokens=20, output_tokens=12) """ sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) - events = capture_events() + items = capture_items("transaction", "span") client = Anthropic(api_key="z") client.messages._post = mock.Mock( @@ -4299,15 +4315,15 @@ def test_input_tokens_unchanged_without_caching(sentry_init, capture_events): model="claude-sonnet-4-20250514", ) - (span,) = events[0]["spans"] + (span,) = [item.payload for item in items if item.type == "span"] - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 20 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 32 # 20 + 12 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 20 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 32 # 20 + 12 def test_cache_tokens_streaming( sentry_init, - capture_events, + capture_items, get_model_response, server_side_event_chunks, ): @@ -4343,7 +4359,7 @@ def test_cache_tokens_streaming( ) sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) - events = capture_events() + items = capture_items("transaction", "span") with mock.patch.object( client._client, @@ -4359,17 +4375,17 @@ def test_cache_tokens_streaming( ): pass - (span,) = events[0]["spans"] + (span,) = [item.payload for item in items if item.type == "span"] # input_tokens normalized: 100 + 80 (cache_read) + 20 (cache_write) = 200 - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200 - assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 210 - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80 - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 210 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20 def test_stream_messages_cache_tokens( - sentry_init, capture_events, get_model_response, server_side_event_chunks + sentry_init, capture_items, get_model_response, server_side_event_chunks ): """Test cache tokens are tracked for streaming responses.""" client = Anthropic(api_key="z") @@ -4403,7 +4419,7 @@ def test_stream_messages_cache_tokens( ) sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0) - events = capture_events() + items = capture_items("transaction", "span") with mock.patch.object( client._client, @@ -4419,10 +4435,10 @@ def test_stream_messages_cache_tokens( for event in stream: pass - (span,) = events[0]["spans"] + (span,) = [item.payload for item in items if item.type == "span"] # input_tokens normalized: 100 + 80 (cache_read) + 20 (cache_write) = 200 - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200 - assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 - assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 210 - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80 - assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 210 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80 + assert span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20 diff --git a/tests/integrations/google_genai/test_google_genai.py b/tests/integrations/google_genai/test_google_genai.py index 6e91ba6634..e074b79c8c 100644 --- a/tests/integrations/google_genai/test_google_genai.py +++ b/tests/integrations/google_genai/test_google_genai.py @@ -124,14 +124,14 @@ def create_test_config( ], ) def test_nonstreaming_generate_content( - sentry_init, capture_events, send_default_pii, include_prompts, mock_genai_client + sentry_init, capture_items, send_default_pii, include_prompts, mock_genai_client ): sentry_init( integrations=[GoogleGenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("transaction", "span") # Mock the HTTP response at the _api_client.request() level mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -146,38 +146,37 @@ def test_nonstreaming_generate_content( mock_genai_client.models.generate_content( model="gemini-1.5-flash", contents="Tell me a joke", config=config ) - assert len(events) == 1 - (event,) = events - assert event["type"] == "transaction" + (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "google_genai" - assert len(event["spans"]) == 1 - chat_span = event["spans"][0] + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + chat_span = next(item.payload for item in items if item.type == "span") # Check chat span - assert chat_span["op"] == OP.GEN_AI_CHAT - assert chat_span["description"] == "chat gemini-1.5-flash" - assert chat_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" - assert chat_span["data"][SPANDATA.GEN_AI_SYSTEM] == "gcp.gemini" - assert chat_span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "gemini-1.5-flash" + assert chat_span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + assert chat_span["name"] == "chat gemini-1.5-flash" + assert chat_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + assert chat_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "gcp.gemini" + assert chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "gemini-1.5-flash" if send_default_pii and include_prompts: # Response text is stored as a JSON array - response_text = chat_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + response_text = chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] # Parse the JSON array response_texts = json.loads(response_text) assert response_texts == ["Hello! How can I help you today?"] else: - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_span["data"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_span["attributes"] # Check token usage - assert chat_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 # Output tokens now include reasoning tokens: candidates_token_count (20) + thoughts_token_count (3) = 23 - assert chat_span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 23 - assert chat_span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 - assert chat_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 5 - assert chat_span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS_REASONING] == 3 + assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 23 + assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 5 + assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS_REASONING] == 3 @pytest.mark.parametrize("generate_content_config", (False, True)) @@ -210,7 +209,7 @@ def test_nonstreaming_generate_content( ) def test_generate_content_with_system_instruction( sentry_init, - capture_events, + capture_items, mock_genai_client, generate_content_config, system_instructions, @@ -221,7 +220,7 @@ def test_generate_content_with_system_instruction( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -243,16 +242,15 @@ def test_generate_content_with_system_instruction( config=config, ) - (event,) = events - invoke_span = event["spans"][0] + invoke_span = next(item.payload for item in items if item.type == "span") if expected_texts is None: - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_span["data"] + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_span["attributes"] return # (PII is enabled and include_prompts is True in this test) system_instructions = json.loads( - invoke_span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + invoke_span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] ) assert system_instructions == [ @@ -260,12 +258,12 @@ def test_generate_content_with_system_instruction( ] -def test_generate_content_with_tools(sentry_init, capture_events, mock_genai_client): +def test_generate_content_with_tools(sentry_init, capture_items, mock_genai_client): sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("span") # Create a mock tool function def get_weather(location: str) -> str: @@ -319,18 +317,17 @@ def get_weather(location: str) -> str: model="gemini-1.5-flash", contents="What's the weather?", config=config ) - (event,) = events - invoke_span = event["spans"][0] + invoke_span = next(item.payload for item in items if item.type == "span") # Check that tools are recorded (data is serialized as a string) - tools_data_str = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS] + tools_data_str = invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS] # Parse the JSON string to verify content tools_data = json.loads(tools_data_str) assert len(tools_data) == 2 # The order of tools may not be guaranteed, so sort by name and description for comparison sorted_tools = sorted( - tools_data, key=lambda t: (t.get("name", ""), t.get("description", "")) + tools_data, key=lambda t: (t.get("name", ""), t.get("name", "")) ) # The function tool @@ -342,13 +339,13 @@ def get_weather(location: str) -> str: assert sorted_tools[1]["description"] == "Get weather information (tool object)" -def test_tool_execution(sentry_init, capture_events): +def test_tool_execution(sentry_init, capture_items): sentry_init( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") # Create a mock tool function def get_weather(location: str) -> str: @@ -366,25 +363,25 @@ def get_weather(location: str) -> str: assert result == "The weather in San Francisco is sunny" - (event,) = events - assert len(event["spans"]) == 1 - tool_span = event["spans"][0] + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + tool_span = next(item.payload for item in items if item.type == "span") - assert tool_span["op"] == OP.GEN_AI_EXECUTE_TOOL - assert tool_span["description"] == "execute_tool get_weather" - assert tool_span["data"][SPANDATA.GEN_AI_TOOL_NAME] == "get_weather" + assert tool_span["attributes"]["sentry.op"] == OP.GEN_AI_EXECUTE_TOOL + assert tool_span["name"] == "execute_tool get_weather" + assert tool_span["attributes"][SPANDATA.GEN_AI_TOOL_NAME] == "get_weather" assert ( - tool_span["data"][SPANDATA.GEN_AI_TOOL_DESCRIPTION] + tool_span["attributes"][SPANDATA.GEN_AI_TOOL_DESCRIPTION] == "Get the weather for a location" ) -def test_error_handling(sentry_init, capture_events, mock_genai_client): +def test_error_handling(sentry_init, capture_items, mock_genai_client): sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("event", "transaction") # Mock an error at the HTTP level with mock.patch.object( @@ -399,8 +396,8 @@ def test_error_handling(sentry_init, capture_events, mock_genai_client): ) # Should have both transaction and error events - assert len(events) == 2 - error_event, transaction_event = events + assert len([item for item in items if item.type == "transaction"]) == 1 + (error_event,) = (item.payload for item in items if item.type == "event") assert error_event["level"] == "error" assert error_event["exception"]["values"][0]["type"] == "Exception" @@ -408,14 +405,14 @@ def test_error_handling(sentry_init, capture_events, mock_genai_client): assert error_event["exception"]["values"][0]["mechanism"]["type"] == "google_genai" -def test_streaming_generate_content(sentry_init, capture_events, mock_genai_client): +def test_streaming_generate_content(sentry_init, capture_items, mock_genai_client): """Test streaming with generate_content_stream, verifying chunk accumulation.""" sentry_init( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") # Create streaming chunks - simulating a multi-chunk response # Chunk 1: First part of text with partial usage metadata @@ -497,40 +494,41 @@ def test_streaming_generate_content(sentry_init, capture_events, mock_genai_clie assert collected_chunks[1].candidates[0].content.parts[0].text == "How can I " assert collected_chunks[2].candidates[0].content.parts[0].text == "help you today?" - (event,) = events - - assert len(event["spans"]) == 1 - chat_span = event["spans"][0] + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + chat_span = next(item.payload for item in items if item.type == "span") # Check that streaming flag is set on both spans - assert chat_span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True # Verify accumulated response text (all chunks combined) expected_full_text = "Hello! How can I help you today?" # Response text is stored as a JSON string - chat_response_text = json.loads(chat_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]) + chat_response_text = json.loads( + chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] + ) assert chat_response_text == [expected_full_text] # Verify finish reasons (only the final chunk has a finish reason) # When there's a single finish reason, it's stored as a plain string (not JSON) - assert SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS in chat_span["data"] - assert chat_span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == "STOP" - assert chat_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 - assert chat_span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 - assert chat_span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 25 - assert chat_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 5 - assert chat_span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS_REASONING] == 3 + assert SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS in chat_span["attributes"] + assert chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == "STOP" + assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10 + assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 25 + assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 5 + assert chat_span["attributes"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS_REASONING] == 3 # Verify model name - assert chat_span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "gemini-1.5-flash" + assert chat_span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "gemini-1.5-flash" -def test_span_origin(sentry_init, capture_events, mock_genai_client): +def test_span_origin(sentry_init, capture_items, mock_genai_client): sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("span", "transaction") mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -543,22 +541,21 @@ def test_span_origin(sentry_init, capture_events, mock_genai_client): model="gemini-1.5-flash", contents="Test origin", config=config ) - (event,) = events - + (event,) = (item.payload for item in items if item.type == "transaction") assert event["contexts"]["trace"]["origin"] == "manual" - for span in event["spans"]: - assert span["origin"] == "auto.ai.google_genai" + spans = [item.payload for item in items if item.type == "span"] + for span in spans: + assert span["attributes"]["sentry.origin"] == "auto.ai.google_genai" -def test_response_without_usage_metadata( - sentry_init, capture_events, mock_genai_client -): + +def test_response_without_usage_metadata(sentry_init, capture_items, mock_genai_client): """Test handling of responses without usage metadata""" sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("span") # Response without usage metadata response_json = { @@ -584,23 +581,22 @@ def test_response_without_usage_metadata( model="gemini-1.5-flash", contents="Test", config=config ) - (event,) = events - chat_span = event["spans"][0] + chat_span = next(item.payload for item in items if item.type == "span") # Usage data should not be present - assert SPANDATA.GEN_AI_USAGE_INPUT_TOKENS not in chat_span["data"] - assert SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS not in chat_span["data"] - assert SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS not in chat_span["data"] + assert SPANDATA.GEN_AI_USAGE_INPUT_TOKENS not in chat_span["attributes"] + assert SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS not in chat_span["attributes"] + assert SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS not in chat_span["attributes"] -def test_multiple_candidates(sentry_init, capture_events, mock_genai_client): +def test_multiple_candidates(sentry_init, capture_items, mock_genai_client): """Test handling of multiple response candidates""" sentry_init( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") # Response with multiple candidates multi_candidate_json = { @@ -638,12 +634,11 @@ def test_multiple_candidates(sentry_init, capture_events, mock_genai_client): model="gemini-1.5-flash", contents="Generate multiple", config=config ) - (event,) = events - chat_span = event["spans"][0] + chat_span = next(item.payload for item in items if item.type == "span") # Should capture all responses # Response text is stored as a JSON string when there are multiple responses - response_text = chat_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + response_text = chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] if isinstance(response_text, str) and response_text.startswith("["): # It's a JSON array response_list = json.loads(response_text) @@ -654,18 +649,18 @@ def test_multiple_candidates(sentry_init, capture_events, mock_genai_client): # Finish reasons are serialized as JSON finish_reasons = json.loads( - chat_span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] + chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] ) assert finish_reasons == ["STOP", "MAX_TOKENS"] -def test_all_configuration_parameters(sentry_init, capture_events, mock_genai_client): +def test_all_configuration_parameters(sentry_init, capture_items, mock_genai_client): """Test that all configuration parameters are properly recorded""" sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("span") mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -686,26 +681,25 @@ def test_all_configuration_parameters(sentry_init, capture_events, mock_genai_cl model="gemini-1.5-flash", contents="Test all params", config=config ) - (event,) = events - invoke_span = event["spans"][0] + invoke_span = next(item.payload for item in items if item.type == "span") # Check all parameters are recorded - assert invoke_span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.8 - assert invoke_span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.95 - assert invoke_span["data"][SPANDATA.GEN_AI_REQUEST_TOP_K] == 40 - assert invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 2048 - assert invoke_span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 - assert invoke_span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 - assert invoke_span["data"][SPANDATA.GEN_AI_REQUEST_SEED] == 12345 + assert invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.8 + assert invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.95 + assert invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_K] == 40 + assert invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 2048 + assert invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 + assert invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 + assert invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_SEED] == 12345 -def test_empty_response(sentry_init, capture_events, mock_genai_client): +def test_empty_response(sentry_init, capture_items, mock_genai_client): """Test handling of minimal response with no content""" sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("span") # Minimal response with empty candidates array minimal_response_json = {"candidates": []} @@ -723,20 +717,20 @@ def test_empty_response(sentry_init, capture_events, mock_genai_client): assert response is not None assert len(response.candidates) == 0 - (event,) = events # Should still create spans even with empty candidates - assert len(event["spans"]) == 1 + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 def test_response_with_different_id_fields( - sentry_init, capture_events, mock_genai_client + sentry_init, capture_items, mock_genai_client ): """Test handling of different response ID field names""" sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("span") # Response with response_id and model_version response_json = { @@ -763,20 +757,21 @@ def test_response_with_different_id_fields( model="gemini-1.5-flash", contents="Test", config=create_test_config() ) - (event,) = events - chat_span = event["spans"][0] + chat_span = next(item.payload for item in items if item.type == "span") - assert chat_span["data"][SPANDATA.GEN_AI_RESPONSE_ID] == "resp-456" - assert chat_span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "gemini-1.5-flash-001" + assert chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_ID] == "resp-456" + assert ( + chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] + == "gemini-1.5-flash-001" + ) -def test_tool_with_async_function(sentry_init, capture_events): +def test_tool_with_async_function(sentry_init): """Test that async tool functions are properly wrapped""" sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, ) - capture_events() # Create an async tool function async def async_tool(param: str) -> str: @@ -792,14 +787,14 @@ async def async_tool(param: str) -> str: assert hasattr(wrapped_async_tool, "__wrapped__") # Should preserve original -def test_contents_as_none(sentry_init, capture_events, mock_genai_client): +def test_contents_as_none(sentry_init, capture_items, mock_genai_client): """Test handling when contents parameter is None""" sentry_init( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -811,22 +806,21 @@ def test_contents_as_none(sentry_init, capture_events, mock_genai_client): model="gemini-1.5-flash", contents=None, config=create_test_config() ) - (event,) = events - invoke_span = event["spans"][0] + invoke_span = next(item.payload for item in items if item.type == "span") # Should handle None contents gracefully - messages = invoke_span["data"].get(SPANDATA.GEN_AI_REQUEST_MESSAGES, []) + messages = invoke_span["attributes"].get(SPANDATA.GEN_AI_REQUEST_MESSAGES, []) # Should only have system message if any, not user message assert all(msg["role"] != "user" or msg["content"] is not None for msg in messages) -def test_tool_calls_extraction(sentry_init, capture_events, mock_genai_client): +def test_tool_calls_extraction(sentry_init, capture_items, mock_genai_client): """Test extraction of tool/function calls from response""" sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("span") # Response with function calls function_call_response_json = { @@ -875,14 +869,17 @@ def test_tool_calls_extraction(sentry_init, capture_events, mock_genai_client): config=create_test_config(), ) - (event,) = events - chat_span = event["spans"][0] # The chat span + chat_span = next( + item.payload for item in items if item.type == "span" + ) # The chat span # Check that tool calls are extracted and stored - assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in chat_span["data"] + assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS in chat_span["attributes"] # Parse the JSON string to verify content - tool_calls = json.loads(chat_span["data"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS]) + tool_calls = json.loads( + chat_span["attributes"][SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS] + ) assert len(tool_calls) == 2 @@ -902,16 +899,14 @@ def test_tool_calls_extraction(sentry_init, capture_events, mock_genai_client): assert json.loads(tool_calls[1]["arguments"]) == {"timezone": "PST"} -def test_google_genai_message_truncation( - sentry_init, capture_events, mock_genai_client -): +def test_google_genai_message_truncation(sentry_init, capture_items, mock_genai_client): """Test that large messages are truncated properly in Google GenAI integration.""" sentry_init( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") large_content = ( "This is a very long message that will exceed our size limits. " * 1000 @@ -930,11 +925,10 @@ def test_google_genai_message_truncation( config=create_test_config(), ) - (event,) = events - invoke_span = event["spans"][0] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["data"] + invoke_span = next(item.payload for item in items if item.type == "span") + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["attributes"] - messages_data = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + messages_data = invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] assert isinstance(messages_data, str) parsed_messages = json.loads(messages_data) @@ -980,14 +974,14 @@ def test_google_genai_message_truncation( ], ) def test_embed_content( - sentry_init, capture_events, send_default_pii, include_prompts, mock_genai_client + sentry_init, capture_items, send_default_pii, include_prompts, mock_genai_client ): sentry_init( integrations=[GoogleGenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("transaction", "span") # Mock the HTTP response at the _api_client.request() level mock_http_response = create_mock_http_response(EXAMPLE_EMBED_RESPONSE_JSON) @@ -1006,47 +1000,49 @@ def test_embed_content( ], ) - assert len(events) == 1 - (event,) = events - - assert event["type"] == "transaction" + (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "google_genai_embeddings" # Should have 1 span for embeddings - assert len(event["spans"]) == 1 - (embed_span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + (embed_span,) = spans # Check embeddings span - assert embed_span["op"] == OP.GEN_AI_EMBEDDINGS - assert embed_span["description"] == "embeddings text-embedding-004" - assert embed_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings" - assert embed_span["data"][SPANDATA.GEN_AI_SYSTEM] == "gcp.gemini" - assert embed_span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-004" + assert embed_span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS + assert embed_span["name"] == "embeddings text-embedding-004" + assert embed_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings" + assert embed_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "gcp.gemini" + assert ( + embed_span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-004" + ) # Check input texts if PII is allowed if send_default_pii and include_prompts: - input_texts = json.loads(embed_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) + input_texts = json.loads( + embed_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] + ) assert input_texts == [ "What is your name?", "What is your favorite color?", ] else: - assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embed_span["data"] + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embed_span["attributes"] # Check usage data (sum of token counts from statistics: 10 + 15 = 25) # Note: Only available in newer versions with ContentEmbeddingStatistics - if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["data"]: - assert embed_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 25 + if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["attributes"]: + assert embed_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 25 -def test_embed_content_string_input(sentry_init, capture_events, mock_genai_client): +def test_embed_content_string_input(sentry_init, capture_items, mock_genai_client): """Test embed_content with a single string instead of list.""" sentry_init( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") # Mock response with single embedding single_embed_response = { @@ -1074,25 +1070,25 @@ def test_embed_content_string_input(sentry_init, capture_events, mock_genai_clie contents="Single text input", ) - (event,) = events - (embed_span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + (embed_span,) = spans # Check that single string is handled correctly - input_texts = json.loads(embed_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) + input_texts = json.loads(embed_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) assert input_texts == ["Single text input"] # Should use token_count from statistics (5), not billable_character_count (10) # Note: Only available in newer versions with ContentEmbeddingStatistics - if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["data"]: - assert embed_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5 + if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["attributes"]: + assert embed_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5 -def test_embed_content_error_handling(sentry_init, capture_events, mock_genai_client): +def test_embed_content_error_handling(sentry_init, capture_items, mock_genai_client): """Test error handling in embed_content.""" sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("transaction", "event") # Mock an error at the HTTP level with mock.patch.object( @@ -1108,8 +1104,8 @@ def test_embed_content_error_handling(sentry_init, capture_events, mock_genai_cl ) # Should have both transaction and error events - assert len(events) == 2 - error_event, _ = events + assert len([item for item in items if item.type == "transaction"]) == 1 + (error_event,) = (item.payload for item in items if item.type == "event") assert error_event["level"] == "error" assert error_event["exception"]["values"][0]["type"] == "Exception" @@ -1118,14 +1114,14 @@ def test_embed_content_error_handling(sentry_init, capture_events, mock_genai_cl def test_embed_content_without_statistics( - sentry_init, capture_events, mock_genai_client + sentry_init, capture_items, mock_genai_client ): """Test embed_content response without statistics (older package versions).""" sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("span") # Response without statistics (typical for older google-genai versions) # Embeddings exist but don't have the statistics field @@ -1150,21 +1146,21 @@ def test_embed_content_without_statistics( contents=["Test without statistics", "Another test"], ) - (event,) = events - (embed_span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + (embed_span,) = spans # No usage tokens since there are no statistics in older versions # This is expected and the integration should handle it gracefully - assert SPANDATA.GEN_AI_USAGE_INPUT_TOKENS not in embed_span["data"] + assert SPANDATA.GEN_AI_USAGE_INPUT_TOKENS not in embed_span["attributes"] -def test_embed_content_span_origin(sentry_init, capture_events, mock_genai_client): +def test_embed_content_span_origin(sentry_init, capture_items, mock_genai_client): """Test that embed_content spans have correct origin.""" sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("transaction", "span") mock_http_response = create_mock_http_response(EXAMPLE_EMBED_RESPONSE_JSON) @@ -1177,11 +1173,12 @@ def test_embed_content_span_origin(sentry_init, capture_events, mock_genai_clien contents=["Test origin"], ) - (event,) = events - + (event,) = (item.payload for item in items if item.type == "transaction") assert event["contexts"]["trace"]["origin"] == "manual" - for span in event["spans"]: - assert span["origin"] == "auto.ai.google_genai" + + spans = [item.payload for item in items if item.type == "span"] + for span in spans: + assert span["attributes"]["sentry.origin"] == "auto.ai.google_genai" @pytest.mark.asyncio @@ -1195,7 +1192,7 @@ def test_embed_content_span_origin(sentry_init, capture_events, mock_genai_clien ], ) async def test_async_embed_content( - sentry_init, capture_events, send_default_pii, include_prompts, mock_genai_client + sentry_init, capture_items, send_default_pii, include_prompts, mock_genai_client ): """Test async embed_content method.""" sentry_init( @@ -1203,7 +1200,7 @@ async def test_async_embed_content( traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("transaction", "span") # Mock the async HTTP response mock_http_response = create_mock_http_response(EXAMPLE_EMBED_RESPONSE_JSON) @@ -1222,42 +1219,44 @@ async def test_async_embed_content( ], ) - assert len(events) == 1 - (event,) = events - - assert event["type"] == "transaction" + (event,) = (item.payload for item in items if item.type == "transaction") assert event["transaction"] == "google_genai_embeddings_async" # Should have 1 span for embeddings - assert len(event["spans"]) == 1 - (embed_span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + assert len(spans) == 1 + (embed_span,) = spans # Check embeddings span - assert embed_span["op"] == OP.GEN_AI_EMBEDDINGS - assert embed_span["description"] == "embeddings text-embedding-004" - assert embed_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings" - assert embed_span["data"][SPANDATA.GEN_AI_SYSTEM] == "gcp.gemini" - assert embed_span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-004" + assert embed_span["attributes"]["sentry.op"] == OP.GEN_AI_EMBEDDINGS + assert embed_span["name"] == "embeddings text-embedding-004" + assert embed_span["attributes"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings" + assert embed_span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "gcp.gemini" + assert ( + embed_span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-004" + ) # Check input texts if PII is allowed if send_default_pii and include_prompts: - input_texts = json.loads(embed_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) + input_texts = json.loads( + embed_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] + ) assert input_texts == [ "What is your name?", "What is your favorite color?", ] else: - assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embed_span["data"] + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in embed_span["attributes"] # Check usage data (sum of token counts from statistics: 10 + 15 = 25) # Note: Only available in newer versions with ContentEmbeddingStatistics - if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["data"]: - assert embed_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 25 + if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["attributes"]: + assert embed_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 25 @pytest.mark.asyncio async def test_async_embed_content_string_input( - sentry_init, capture_events, mock_genai_client + sentry_init, capture_items, mock_genai_client ): """Test async embed_content with a single string instead of list.""" sentry_init( @@ -1265,7 +1264,7 @@ async def test_async_embed_content_string_input( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") # Mock response with single embedding single_embed_response = { @@ -1293,28 +1292,28 @@ async def test_async_embed_content_string_input( contents="Single text input", ) - (event,) = events - (embed_span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + (embed_span,) = spans # Check that single string is handled correctly - input_texts = json.loads(embed_span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) + input_texts = json.loads(embed_span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) assert input_texts == ["Single text input"] # Should use token_count from statistics (5), not billable_character_count (10) # Note: Only available in newer versions with ContentEmbeddingStatistics - if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["data"]: - assert embed_span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5 + if SPANDATA.GEN_AI_USAGE_INPUT_TOKENS in embed_span["attributes"]: + assert embed_span["attributes"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5 @pytest.mark.asyncio async def test_async_embed_content_error_handling( - sentry_init, capture_events, mock_genai_client + sentry_init, capture_items, mock_genai_client ): """Test error handling in async embed_content.""" sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("transaction", "event") # Mock an error at the HTTP level with mock.patch.object( @@ -1330,8 +1329,8 @@ async def test_async_embed_content_error_handling( ) # Should have both transaction and error events - assert len(events) == 2 - error_event, _ = events + assert len([item for item in items if item.type == "transaction"]) == 1 + (error_event,) = (item.payload for item in items if item.type == "event") assert error_event["level"] == "error" assert error_event["exception"]["values"][0]["type"] == "Exception" @@ -1341,14 +1340,14 @@ async def test_async_embed_content_error_handling( @pytest.mark.asyncio async def test_async_embed_content_without_statistics( - sentry_init, capture_events, mock_genai_client + sentry_init, capture_items, mock_genai_client ): """Test async embed_content response without statistics (older package versions).""" sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("span") # Response without statistics (typical for older google-genai versions) # Embeddings exist but don't have the statistics field @@ -1373,24 +1372,24 @@ async def test_async_embed_content_without_statistics( contents=["Test without statistics", "Another test"], ) - (event,) = events - (embed_span,) = event["spans"] + spans = [item.payload for item in items if item.type == "span"] + (embed_span,) = spans # No usage tokens since there are no statistics in older versions # This is expected and the integration should handle it gracefully - assert SPANDATA.GEN_AI_USAGE_INPUT_TOKENS not in embed_span["data"] + assert SPANDATA.GEN_AI_USAGE_INPUT_TOKENS not in embed_span["attributes"] @pytest.mark.asyncio async def test_async_embed_content_span_origin( - sentry_init, capture_events, mock_genai_client + sentry_init, capture_items, mock_genai_client ): """Test that async embed_content spans have correct origin.""" sentry_init( integrations=[GoogleGenAIIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("transaction", "span") mock_http_response = create_mock_http_response(EXAMPLE_EMBED_RESPONSE_JSON) @@ -1403,16 +1402,17 @@ async def test_async_embed_content_span_origin( contents=["Test origin"], ) - (event,) = events - + (event,) = [item.payload for item in items if item.type == "transaction"] assert event["contexts"]["trace"]["origin"] == "manual" - for span in event["spans"]: - assert span["origin"] == "auto.ai.google_genai" + + spans = [item.payload for item in items if item.type == "span"] + for span in spans: + assert span["attributes"]["sentry.origin"] == "auto.ai.google_genai" # Integration tests for generate_content with different input message formats def test_generate_content_with_content_object( - sentry_init, capture_events, mock_genai_client + sentry_init, capture_items, mock_genai_client ): """Test generate_content with Content object input.""" sentry_init( @@ -1420,7 +1420,7 @@ def test_generate_content_with_content_object( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -1437,10 +1437,9 @@ def test_generate_content_with_content_object( model="gemini-1.5-flash", contents=content, config=create_test_config() ) - (event,) = events - invoke_span = event["spans"][0] + invoke_span = next(item.payload for item in items if item.type == "span") - messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert len(messages) == 1 assert messages[0]["role"] == "user" assert messages[0]["content"] == [ @@ -1449,7 +1448,7 @@ def test_generate_content_with_content_object( def test_generate_content_with_dict_format( - sentry_init, capture_events, mock_genai_client + sentry_init, capture_items, mock_genai_client ): """Test generate_content with dict format input (ContentDict).""" sentry_init( @@ -1457,7 +1456,7 @@ def test_generate_content_with_dict_format( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -1472,10 +1471,9 @@ def test_generate_content_with_dict_format( model="gemini-1.5-flash", contents=contents, config=create_test_config() ) - (event,) = events - invoke_span = event["spans"][0] + invoke_span = next(item.payload for item in items if item.type == "span") - messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert len(messages) == 1 assert messages[0]["role"] == "user" assert messages[0]["content"] == [ @@ -1483,16 +1481,14 @@ def test_generate_content_with_dict_format( ] -def test_generate_content_with_file_data( - sentry_init, capture_events, mock_genai_client -): +def test_generate_content_with_file_data(sentry_init, capture_items, mock_genai_client): """Test generate_content with file_data (external file reference).""" sentry_init( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -1516,10 +1512,9 @@ def test_generate_content_with_file_data( model="gemini-1.5-flash", contents=content, config=create_test_config() ) - (event,) = events - invoke_span = event["spans"][0] + invoke_span = next(item.payload for item in items if item.type == "span") - messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert len(messages) == 1 assert messages[0]["role"] == "user" assert len(messages[0]["content"]) == 2 @@ -1534,7 +1529,7 @@ def test_generate_content_with_file_data( def test_generate_content_with_inline_data( - sentry_init, capture_events, mock_genai_client + sentry_init, capture_items, mock_genai_client ): """Test generate_content with inline_data (binary data).""" sentry_init( @@ -1542,7 +1537,7 @@ def test_generate_content_with_inline_data( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -1565,10 +1560,9 @@ def test_generate_content_with_inline_data( model="gemini-1.5-flash", contents=content, config=create_test_config() ) - (event,) = events - invoke_span = event["spans"][0] + invoke_span = next(item.payload for item in items if item.type == "span") - messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert len(messages) == 1 assert messages[0]["role"] == "user" assert len(messages[0]["content"]) == 2 @@ -1580,7 +1574,7 @@ def test_generate_content_with_inline_data( def test_generate_content_with_function_response( - sentry_init, capture_events, mock_genai_client + sentry_init, capture_items, mock_genai_client ): """Test generate_content with function_response (tool result).""" sentry_init( @@ -1588,7 +1582,7 @@ def test_generate_content_with_function_response( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -1622,10 +1616,9 @@ def test_generate_content_with_function_response( model="gemini-1.5-flash", contents=contents, config=create_test_config() ) - (event,) = events - invoke_span = event["spans"][0] + invoke_span = next(item.payload for item in items if item.type == "span") - messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert len(messages) == 1 # First message is user message assert messages[0]["role"] == "tool" @@ -1635,7 +1628,7 @@ def test_generate_content_with_function_response( def test_generate_content_with_mixed_string_and_content( - sentry_init, capture_events, mock_genai_client + sentry_init, capture_items, mock_genai_client ): """Test generate_content with mixed string and Content objects in list.""" sentry_init( @@ -1643,7 +1636,7 @@ def test_generate_content_with_mixed_string_and_content( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -1668,10 +1661,9 @@ def test_generate_content_with_mixed_string_and_content( model="gemini-1.5-flash", contents=contents, config=create_test_config() ) - (event,) = events - invoke_span = event["spans"][0] + invoke_span = next(item.payload for item in items if item.type == "span") - messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert len(messages) == 1 # User message assert messages[0]["role"] == "user" @@ -1679,7 +1671,7 @@ def test_generate_content_with_mixed_string_and_content( def test_generate_content_with_part_object_directly( - sentry_init, capture_events, mock_genai_client + sentry_init, capture_items, mock_genai_client ): """Test generate_content with Part object directly (not wrapped in Content).""" sentry_init( @@ -1687,7 +1679,7 @@ def test_generate_content_with_part_object_directly( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -1702,17 +1694,16 @@ def test_generate_content_with_part_object_directly( model="gemini-1.5-flash", contents=part, config=create_test_config() ) - (event,) = events - invoke_span = event["spans"][0] + invoke_span = next(item.payload for item in items if item.type == "span") - messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert len(messages) == 1 assert messages[0]["role"] == "user" assert messages[0]["content"] == [{"text": "Direct Part object", "type": "text"}] def test_generate_content_with_list_of_dicts( - sentry_init, capture_events, mock_genai_client + sentry_init, capture_items, mock_genai_client ): """ Test generate_content with list of dict format inputs. @@ -1726,7 +1717,7 @@ def test_generate_content_with_list_of_dicts( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -1745,17 +1736,16 @@ def test_generate_content_with_list_of_dicts( model="gemini-1.5-flash", contents=contents, config=create_test_config() ) - (event,) = events - invoke_span = event["spans"][0] + invoke_span = next(item.payload for item in items if item.type == "span") - messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert len(messages) == 1 assert messages[0]["role"] == "user" assert messages[0]["content"] == [{"text": "Second user message", "type": "text"}] def test_generate_content_with_dict_inline_data( - sentry_init, capture_events, mock_genai_client + sentry_init, capture_items, mock_genai_client ): """Test generate_content with dict format containing inline_data.""" sentry_init( @@ -1763,7 +1753,7 @@ def test_generate_content_with_dict_inline_data( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -1784,10 +1774,9 @@ def test_generate_content_with_dict_inline_data( model="gemini-1.5-flash", contents=contents, config=create_test_config() ) - (event,) = events - invoke_span = event["spans"][0] + invoke_span = next(item.payload for item in items if item.type == "span") - messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert len(messages) == 1 assert messages[0]["role"] == "user" assert len(messages[0]["content"]) == 2 @@ -1801,14 +1790,14 @@ def test_generate_content_with_dict_inline_data( def test_generate_content_without_parts_property_inline_data( - sentry_init, capture_events, mock_genai_client + sentry_init, capture_items, mock_genai_client ): sentry_init( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -1825,10 +1814,9 @@ def test_generate_content_without_parts_property_inline_data( model="gemini-1.5-flash", contents=contents, config=create_test_config() ) - (event,) = events - invoke_span = event["spans"][0] + invoke_span = next(item.payload for item in items if item.type == "span") - messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert len(messages) == 1 @@ -1845,14 +1833,14 @@ def test_generate_content_without_parts_property_inline_data( def test_generate_content_without_parts_property_inline_data_and_binary_data_within_string( - sentry_init, capture_events, mock_genai_client + sentry_init, capture_items, mock_genai_client ): sentry_init( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) @@ -1874,10 +1862,9 @@ def test_generate_content_without_parts_property_inline_data_and_binary_data_wit model="gemini-1.5-flash", contents=contents, config=create_test_config() ) - (event,) = events - invoke_span = event["spans"][0] + invoke_span = next(item.payload for item in items if item.type == "span") - messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert len(messages) == 1 assert messages[0]["role"] == "user" @@ -2162,7 +2149,9 @@ def test_extract_contents_messages_dict_inline_data(): """Test extract_contents_messages with dict containing inline_data""" content_dict = { "role": "user", - "parts": [{"inline_data": {"data": b"binary_data", "mime_type": "image/gif"}}], + "parts": [ + {"inline_data": {"attributes": b"binary_data", "mime_type": "image/gif"}} + ], } result = extract_contents_messages(content_dict) diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index ada2e633de..e53f8e4f55 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -132,14 +132,14 @@ async def __call__(self, *args, **kwargs): ], ) def test_nonstreaming_chat_completion_no_prompts( - sentry_init, capture_events, send_default_pii, include_prompts + sentry_init, capture_items, send_default_pii, include_prompts ): sentry_init( integrations=[OpenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("span") client = OpenAI(api_key="z") client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION) @@ -163,27 +163,26 @@ def test_nonstreaming_chat_completion_no_prompts( ) assert response == "the model response" - tx = events[0] - assert tx["type"] == "transaction" - span = tx["spans"][0] - assert span["op"] == "gen_ai.chat" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai" - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 - assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 - assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 - assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 - assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 - assert span["data"]["gen_ai.usage.output_tokens"] == 10 - assert span["data"]["gen_ai.usage.input_tokens"] == 20 - assert span["data"]["gen_ai.usage.total_tokens"] == 30 + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] + + assert span["attributes"]["gen_ai.usage.output_tokens"] == 10 + assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 30 @pytest.mark.parametrize( @@ -229,13 +228,13 @@ def test_nonstreaming_chat_completion_no_prompts( ), ], ) -def test_nonstreaming_chat_completion(sentry_init, capture_events, messages, request): +def test_nonstreaming_chat_completion(sentry_init, capture_items, messages, request): sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") client = OpenAI(api_key="z") client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION) @@ -256,30 +255,29 @@ def test_nonstreaming_chat_completion(sentry_init, capture_events, messages, req ) assert response == "the model response" - tx = events[0] - assert tx["type"] == "transaction" - span = tx["spans"][0] - assert span["op"] == "gen_ai.chat" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai" - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False - - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 - assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 - assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 - assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 - assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 + + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False + + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 param_id = request.node.callspec.id if "blocks" in param_id: - assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ + assert json.loads(span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ { "type": "text", "content": "You are a helpful assistant.", } ] else: - assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ + assert json.loads(span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ { "type": "text", "content": "You are a helpful assistant.", @@ -290,12 +288,12 @@ def test_nonstreaming_chat_completion(sentry_init, capture_events, messages, req }, ] - assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert "the model response" in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + assert "hello" in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert "the model response" in span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] - assert span["data"]["gen_ai.usage.output_tokens"] == 10 - assert span["data"]["gen_ai.usage.input_tokens"] == 20 - assert span["data"]["gen_ai.usage.total_tokens"] == 30 + assert span["attributes"]["gen_ai.usage.output_tokens"] == 10 + assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 30 @pytest.mark.asyncio @@ -308,14 +306,14 @@ def test_nonstreaming_chat_completion(sentry_init, capture_events, messages, req ], ) async def test_nonstreaming_chat_completion_async_no_prompts( - sentry_init, capture_events, send_default_pii, include_prompts + sentry_init, capture_items, send_default_pii, include_prompts ): sentry_init( integrations=[OpenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("span") client = AsyncOpenAI(api_key="z") client.chat.completions._post = mock.AsyncMock(return_value=EXAMPLE_CHAT_COMPLETION) @@ -336,27 +334,26 @@ async def test_nonstreaming_chat_completion_async_no_prompts( response = response.choices[0].message.content assert response == "the model response" - tx = events[0] - assert tx["type"] == "transaction" - span = tx["spans"][0] - assert span["op"] == "gen_ai.chat" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai" - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 - assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 - assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 - assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 - assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 - assert span["data"]["gen_ai.usage.output_tokens"] == 10 - assert span["data"]["gen_ai.usage.input_tokens"] == 20 - assert span["data"]["gen_ai.usage.total_tokens"] == 30 + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] + + assert span["attributes"]["gen_ai.usage.output_tokens"] == 10 + assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 30 @pytest.mark.asyncio @@ -404,14 +401,14 @@ async def test_nonstreaming_chat_completion_async_no_prompts( ], ) async def test_nonstreaming_chat_completion_async( - sentry_init, capture_events, messages, request + sentry_init, capture_items, messages, request ): sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") client = AsyncOpenAI(api_key="z") client.chat.completions._post = AsyncMock(return_value=EXAMPLE_CHAT_COMPLETION) @@ -429,30 +426,29 @@ async def test_nonstreaming_chat_completion_async( response = response.choices[0].message.content assert response == "the model response" - tx = events[0] - assert tx["type"] == "transaction" - span = tx["spans"][0] - assert span["op"] == "gen_ai.chat" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai" - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False - - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 - assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 - assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 - assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 - assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 + + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False + + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 param_id = request.node.callspec.id if "blocks" in param_id: - assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ + assert json.loads(span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ { "type": "text", "content": "You are a helpful assistant.", } ] else: - assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ + assert json.loads(span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ { "type": "text", "content": "You are a helpful assistant.", @@ -463,12 +459,12 @@ async def test_nonstreaming_chat_completion_async( }, ] - assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert "the model response" in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + assert "hello" in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert "the model response" in span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] - assert span["data"]["gen_ai.usage.output_tokens"] == 10 - assert span["data"]["gen_ai.usage.input_tokens"] == 20 - assert span["data"]["gen_ai.usage.total_tokens"] == 30 + assert span["attributes"]["gen_ai.usage.output_tokens"] == 10 + assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 30 def tiktoken_encoding_if_installed(): @@ -491,7 +487,7 @@ def tiktoken_encoding_if_installed(): ) def test_streaming_chat_completion_no_prompts( sentry_init, - capture_events, + capture_items, send_default_pii, include_prompts, get_model_response, @@ -507,7 +503,7 @@ def test_streaming_chat_completion_no_prompts( traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("span") client = OpenAI(api_key="z") returned_stream = get_model_response( @@ -581,32 +577,31 @@ def test_streaming_chat_completion_no_prompts( ) assert response_string == "hello world" - tx = events[0] - assert tx["type"] == "transaction" - span = tx["spans"][0] - assert span["op"] == "gen_ai.chat" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai" - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True - - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 - assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 - assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 - assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 - assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 - - assert span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id" - - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 + + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id" + + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] try: import tiktoken # type: ignore # noqa # pylint: disable=unused-import - assert span["data"]["gen_ai.usage.output_tokens"] == 2 - assert span["data"]["gen_ai.usage.input_tokens"] == 7 - assert span["data"]["gen_ai.usage.total_tokens"] == 9 + assert span["attributes"]["gen_ai.usage.output_tokens"] == 2 + assert span["attributes"]["gen_ai.usage.input_tokens"] == 7 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 9 except ImportError: pass # if tiktoken is not installed, we can't guarantee token usage will be calculated properly @@ -617,7 +612,7 @@ def test_streaming_chat_completion_no_prompts( ) def test_streaming_chat_completion_with_usage_in_stream( sentry_init, - capture_events, + capture_items, get_model_response, server_side_event_chunks, ): @@ -627,7 +622,7 @@ def test_streaming_chat_completion_with_usage_in_stream( traces_sample_rate=1.0, send_default_pii=False, ) - events = capture_events() + items = capture_items("span") client = OpenAI(api_key="z") returned_stream = get_model_response( @@ -684,13 +679,11 @@ def test_streaming_chat_completion_with_usage_in_stream( for _ in response_stream: pass - tx = events[0] - assert tx["type"] == "transaction" - span = tx["spans"][0] - assert span["op"] == "gen_ai.chat" - assert span["data"]["gen_ai.usage.input_tokens"] == 20 - assert span["data"]["gen_ai.usage.output_tokens"] == 10 - assert span["data"]["gen_ai.usage.total_tokens"] == 30 + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 + assert span["attributes"]["gen_ai.usage.output_tokens"] == 10 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 30 @pytest.mark.skipif( @@ -699,7 +692,7 @@ def test_streaming_chat_completion_with_usage_in_stream( ) def test_streaming_chat_completion_empty_content_preserves_token_usage( sentry_init, - capture_events, + capture_items, get_model_response, server_side_event_chunks, ): @@ -709,7 +702,7 @@ def test_streaming_chat_completion_empty_content_preserves_token_usage( traces_sample_rate=1.0, send_default_pii=False, ) - events = capture_events() + items = capture_items("span") client = OpenAI(api_key="z") returned_stream = get_model_response( @@ -747,13 +740,11 @@ def test_streaming_chat_completion_empty_content_preserves_token_usage( for _ in response_stream: pass - tx = events[0] - assert tx["type"] == "transaction" - span = tx["spans"][0] - assert span["op"] == "gen_ai.chat" - assert span["data"]["gen_ai.usage.input_tokens"] == 20 - assert "gen_ai.usage.output_tokens" not in span["data"] - assert span["data"]["gen_ai.usage.total_tokens"] == 20 + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 + assert "gen_ai.usage.output_tokens" not in span["attributes"] + assert span["attributes"]["gen_ai.usage.total_tokens"] == 20 @pytest.mark.skipif( @@ -763,7 +754,7 @@ def test_streaming_chat_completion_empty_content_preserves_token_usage( @pytest.mark.asyncio async def test_streaming_chat_completion_empty_content_preserves_token_usage_async( sentry_init, - capture_events, + capture_items, get_model_response, async_iterator, server_side_event_chunks, @@ -774,7 +765,7 @@ async def test_streaming_chat_completion_empty_content_preserves_token_usage_asy traces_sample_rate=1.0, send_default_pii=False, ) - events = capture_events() + items = capture_items("span") client = AsyncOpenAI(api_key="z") returned_stream = get_model_response( @@ -814,13 +805,11 @@ async def test_streaming_chat_completion_empty_content_preserves_token_usage_asy async for _ in response_stream: pass - tx = events[0] - assert tx["type"] == "transaction" - span = tx["spans"][0] - assert span["op"] == "gen_ai.chat" - assert span["data"]["gen_ai.usage.input_tokens"] == 20 - assert "gen_ai.usage.output_tokens" not in span["data"] - assert span["data"]["gen_ai.usage.total_tokens"] == 20 + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 + assert "gen_ai.usage.output_tokens" not in span["attributes"] + assert span["attributes"]["gen_ai.usage.total_tokens"] == 20 @pytest.mark.skipif( @@ -830,7 +819,7 @@ async def test_streaming_chat_completion_empty_content_preserves_token_usage_asy @pytest.mark.asyncio async def test_streaming_chat_completion_async_with_usage_in_stream( sentry_init, - capture_events, + capture_items, get_model_response, async_iterator, server_side_event_chunks, @@ -841,7 +830,7 @@ async def test_streaming_chat_completion_async_with_usage_in_stream( traces_sample_rate=1.0, send_default_pii=False, ) - events = capture_events() + items = capture_items("span") client = AsyncOpenAI(api_key="z") returned_stream = get_model_response( @@ -900,13 +889,11 @@ async def test_streaming_chat_completion_async_with_usage_in_stream( async for _ in response_stream: pass - tx = events[0] - assert tx["type"] == "transaction" - span = tx["spans"][0] - assert span["op"] == "gen_ai.chat" - assert span["data"]["gen_ai.usage.input_tokens"] == 20 - assert span["data"]["gen_ai.usage.output_tokens"] == 10 - assert span["data"]["gen_ai.usage.total_tokens"] == 30 + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 + assert span["attributes"]["gen_ai.usage.output_tokens"] == 10 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 30 # noinspection PyTypeChecker @@ -955,7 +942,7 @@ async def test_streaming_chat_completion_async_with_usage_in_stream( ) def test_streaming_chat_completion( sentry_init, - capture_events, + capture_items, messages, request, get_model_response, @@ -971,7 +958,7 @@ def test_streaming_chat_completion( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") client = OpenAI(api_key="z") returned_stream = get_model_response( @@ -1041,30 +1028,29 @@ def test_streaming_chat_completion( map(lambda x: x.choices[0].delta.content, response_stream) ) assert response_string == "hello world" - tx = events[0] - assert tx["type"] == "transaction" - span = tx["spans"][0] - assert span["op"] == "gen_ai.chat" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai" - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True - - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 - assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 - assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 - assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 - assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 + + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 param_id = request.node.callspec.id if "blocks" in param_id: - assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ + assert json.loads(span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ { "type": "text", "content": "You are a helpful assistant.", } ] else: - assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ + assert json.loads(span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ { "type": "text", "content": "You are a helpful assistant.", @@ -1075,22 +1061,22 @@ def test_streaming_chat_completion( }, ] - assert span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id" - assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert "hello world" in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + assert "hello" in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert "hello world" in span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] try: import tiktoken # type: ignore # noqa # pylint: disable=unused-import if "blocks" in param_id: - assert span["data"]["gen_ai.usage.output_tokens"] == 2 - assert span["data"]["gen_ai.usage.input_tokens"] == 7 - assert span["data"]["gen_ai.usage.total_tokens"] == 9 + assert span["attributes"]["gen_ai.usage.output_tokens"] == 2 + assert span["attributes"]["gen_ai.usage.input_tokens"] == 7 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 9 else: - assert span["data"]["gen_ai.usage.output_tokens"] == 2 - assert span["data"]["gen_ai.usage.input_tokens"] == 12 - assert span["data"]["gen_ai.usage.total_tokens"] == 14 + assert span["attributes"]["gen_ai.usage.output_tokens"] == 2 + assert span["attributes"]["gen_ai.usage.input_tokens"] == 12 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 14 except ImportError: pass # if tiktoken is not installed, we can't guarantee token usage will be calculated properly @@ -1107,7 +1093,7 @@ def test_streaming_chat_completion( ) async def test_streaming_chat_completion_async_no_prompts( sentry_init, - capture_events, + capture_items, send_default_pii, include_prompts, get_model_response, @@ -1124,7 +1110,7 @@ async def test_streaming_chat_completion_async_no_prompts( traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("span") client = AsyncOpenAI(api_key="z") returned_stream = get_model_response( @@ -1201,32 +1187,31 @@ async def test_streaming_chat_completion_async_no_prompts( response_string += x.choices[0].delta.content assert response_string == "hello world" - tx = events[0] - assert tx["type"] == "transaction" - span = tx["spans"][0] - assert span["op"] == "gen_ai.chat" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai" - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True - - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 - assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 - assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 - assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 - assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 - - assert span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id" - - assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 + + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id" + + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["attributes"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] try: import tiktoken # type: ignore # noqa # pylint: disable=unused-import - assert span["data"]["gen_ai.usage.output_tokens"] == 2 - assert span["data"]["gen_ai.usage.input_tokens"] == 7 - assert span["data"]["gen_ai.usage.total_tokens"] == 9 + assert span["attributes"]["gen_ai.usage.output_tokens"] == 2 + assert span["attributes"]["gen_ai.usage.input_tokens"] == 7 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 9 except ImportError: pass # if tiktoken is not installed, we can't guarantee token usage will be calculated properly @@ -1279,7 +1264,7 @@ async def test_streaming_chat_completion_async_no_prompts( ) async def test_streaming_chat_completion_async( sentry_init, - capture_events, + capture_items, messages, request, get_model_response, @@ -1296,7 +1281,7 @@ async def test_streaming_chat_completion_async( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") client = AsyncOpenAI(api_key="z") @@ -1371,32 +1356,31 @@ async def test_streaming_chat_completion_async( response_string += x.choices[0].delta.content assert response_string == "hello world" - tx = events[0] - assert tx["type"] == "transaction" - span = tx["spans"][0] - assert span["op"] == "gen_ai.chat" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai" - assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True - - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 - assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 - assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 - assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 - assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 - - assert span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id" + + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "some-model" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.1 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.2 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 + + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "model-id" param_id = request.node.callspec.id if "blocks" in param_id: - assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ + assert json.loads(span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ { "type": "text", "content": "You are a helpful assistant.", } ] else: - assert json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ + assert json.loads(span["attributes"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) == [ { "type": "text", "content": "You are a helpful assistant.", @@ -1407,28 +1391,28 @@ async def test_streaming_chat_completion_async( }, ] - assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert "hello world" in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + assert "hello" in span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert "hello world" in span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] try: import tiktoken # type: ignore # noqa # pylint: disable=unused-import if "blocks" in param_id: - assert span["data"]["gen_ai.usage.output_tokens"] == 2 - assert span["data"]["gen_ai.usage.input_tokens"] == 7 - assert span["data"]["gen_ai.usage.total_tokens"] == 9 + assert span["attributes"]["gen_ai.usage.output_tokens"] == 2 + assert span["attributes"]["gen_ai.usage.input_tokens"] == 7 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 9 else: - assert span["data"]["gen_ai.usage.output_tokens"] == 2 - assert span["data"]["gen_ai.usage.input_tokens"] == 12 - assert span["data"]["gen_ai.usage.total_tokens"] == 14 + assert span["attributes"]["gen_ai.usage.output_tokens"] == 2 + assert span["attributes"]["gen_ai.usage.input_tokens"] == 12 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 14 except ImportError: pass # if tiktoken is not installed, we can't guarantee token usage will be calculated properly -def test_bad_chat_completion(sentry_init, capture_events): +def test_bad_chat_completion(sentry_init, capture_items): sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0) - events = capture_events() + items = capture_items("event") client = OpenAI(api_key="z") client.chat.completions._post = mock.Mock( @@ -1440,13 +1424,13 @@ def test_bad_chat_completion(sentry_init, capture_events): messages=[{"role": "system", "content": "hello"}], ) - (event,) = events + (event,) = (item.payload for item in items if item.type == "event") assert event["level"] == "error" -def test_span_status_error(sentry_init, capture_events): +def test_span_status_error(sentry_init, capture_items): sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0) - events = capture_events() + items = capture_items("event", "transaction", "span") with start_transaction(name="test"): client = OpenAI(api_key="z") @@ -1458,17 +1442,20 @@ def test_span_status_error(sentry_init, capture_events): model="some-model", messages=[{"role": "system", "content": "hello"}] ) - (error, transaction) = events - assert error["level"] == "error" - assert transaction["spans"][0]["status"] == "internal_error" - assert transaction["spans"][0]["tags"]["status"] == "internal_error" + (event,) = (item.payload for item in items if item.type == "event") + assert event["level"] == "error" + + spans = [item.payload for item in items if item.type == "span"] + assert spans[0]["status"] == "error" + + (transaction,) = (item.payload for item in items if item.type == "transaction") assert transaction["contexts"]["trace"]["status"] == "internal_error" @pytest.mark.asyncio -async def test_bad_chat_completion_async(sentry_init, capture_events): +async def test_bad_chat_completion_async(sentry_init, capture_items): sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0) - events = capture_events() + items = capture_items("event") client = AsyncOpenAI(api_key="z") client.chat.completions._post = AsyncMock( @@ -1479,7 +1466,7 @@ async def test_bad_chat_completion_async(sentry_init, capture_events): model="some-model", messages=[{"role": "system", "content": "hello"}] ) - (event,) = events + (event,) = (item.payload for item in items if item.type == "event") assert event["level"] == "error" @@ -1492,14 +1479,14 @@ async def test_bad_chat_completion_async(sentry_init, capture_events): ], ) def test_embeddings_create_no_pii( - sentry_init, capture_events, send_default_pii, include_prompts + sentry_init, capture_items, send_default_pii, include_prompts ): sentry_init( integrations=[OpenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("span") client = OpenAI(api_key="z") @@ -1521,17 +1508,15 @@ def test_embeddings_create_no_pii( assert len(response.data[0].embedding) == 3 - tx = events[0] - assert tx["type"] == "transaction" - span = tx["spans"][0] - assert span["op"] == "gen_ai.embeddings" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-3-large" + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.embeddings" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-3-large" - assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["data"] + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["attributes"] - assert span["data"]["gen_ai.usage.input_tokens"] == 20 - assert span["data"]["gen_ai.usage.total_tokens"] == 30 + assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 30 @pytest.mark.parametrize( @@ -1577,13 +1562,13 @@ def test_embeddings_create_no_pii( ), ], ) -def test_embeddings_create(sentry_init, capture_events, input, request): +def test_embeddings_create(sentry_init, capture_items, input, request): sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") client = OpenAI(api_key="z") @@ -1603,24 +1588,24 @@ def test_embeddings_create(sentry_init, capture_events, input, request): assert len(response.data[0].embedding) == 3 - tx = events[0] - assert tx["type"] == "transaction" - span = tx["spans"][0] - assert span["op"] == "gen_ai.embeddings" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-3-large" + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.embeddings" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-3-large" param_id = request.node.callspec.id if param_id == "string": - assert json.loads(span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == ["hello"] + assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [ + "hello" + ] elif param_id == "string_sequence" or param_id == "string_iterable": - assert json.loads(span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [ + assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [ "First text", "Second text", "Third text", ] elif param_id == "tokens" or param_id == "token_iterable": - assert json.loads(span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [ + assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [ 5, 8, 13, @@ -1628,13 +1613,13 @@ def test_embeddings_create(sentry_init, capture_events, input, request): 34, ] else: - assert json.loads(span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [ + assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [ [5, 8, 13, 21, 34], [8, 13, 21, 34, 55], ] - assert span["data"]["gen_ai.usage.input_tokens"] == 20 - assert span["data"]["gen_ai.usage.total_tokens"] == 30 + assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 30 @pytest.mark.asyncio @@ -1647,14 +1632,14 @@ def test_embeddings_create(sentry_init, capture_events, input, request): ], ) async def test_embeddings_create_async_no_pii( - sentry_init, capture_events, send_default_pii, include_prompts + sentry_init, capture_items, send_default_pii, include_prompts ): sentry_init( integrations=[OpenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("span") client = AsyncOpenAI(api_key="z") @@ -1676,17 +1661,15 @@ async def test_embeddings_create_async_no_pii( assert len(response.data[0].embedding) == 3 - tx = events[0] - assert tx["type"] == "transaction" - span = tx["spans"][0] - assert span["op"] == "gen_ai.embeddings" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-3-large" + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.embeddings" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-3-large" - assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["data"] + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["attributes"] - assert span["data"]["gen_ai.usage.input_tokens"] == 20 - assert span["data"]["gen_ai.usage.total_tokens"] == 30 + assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 30 @pytest.mark.asyncio @@ -1733,13 +1716,13 @@ async def test_embeddings_create_async_no_pii( ), ], ) -async def test_embeddings_create_async(sentry_init, capture_events, input, request): +async def test_embeddings_create_async(sentry_init, capture_items, input, request): sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") client = AsyncOpenAI(api_key="z") @@ -1761,24 +1744,24 @@ async def test_embeddings_create_async(sentry_init, capture_events, input, reque assert len(response.data[0].embedding) == 3 - tx = events[0] - assert tx["type"] == "transaction" - span = tx["spans"][0] - assert span["op"] == "gen_ai.embeddings" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-3-large" + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.embeddings" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-3-large" param_id = request.node.callspec.id if param_id == "string": - assert json.loads(span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == ["hello"] + assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [ + "hello" + ] elif param_id == "string_sequence" or param_id == "string_iterable": - assert json.loads(span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [ + assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [ "First text", "Second text", "Third text", ] elif param_id == "tokens" or param_id == "token_iterable": - assert json.loads(span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [ + assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [ 5, 8, 13, @@ -1786,13 +1769,13 @@ async def test_embeddings_create_async(sentry_init, capture_events, input, reque 34, ] else: - assert json.loads(span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [ + assert json.loads(span["attributes"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]) == [ [5, 8, 13, 21, 34], [8, 13, 21, 34, 55], ] - assert span["data"]["gen_ai.usage.input_tokens"] == 20 - assert span["data"]["gen_ai.usage.total_tokens"] == 30 + assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 30 @pytest.mark.parametrize( @@ -1800,14 +1783,14 @@ async def test_embeddings_create_async(sentry_init, capture_events, input, reque [(True, True), (True, False), (False, True), (False, False)], ) def test_embeddings_create_raises_error( - sentry_init, capture_events, send_default_pii, include_prompts + sentry_init, capture_items, send_default_pii, include_prompts ): sentry_init( integrations=[OpenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("event") client = OpenAI(api_key="z") @@ -1818,7 +1801,7 @@ def test_embeddings_create_raises_error( with pytest.raises(OpenAIError): client.embeddings.create(input="hello", model="text-embedding-3-large") - (event,) = events + (event,) = (item.payload for item in items if item.type == "event") assert event["level"] == "error" @@ -1828,14 +1811,14 @@ def test_embeddings_create_raises_error( [(True, True), (True, False), (False, True), (False, False)], ) async def test_embeddings_create_raises_error_async( - sentry_init, capture_events, send_default_pii, include_prompts + sentry_init, capture_items, send_default_pii, include_prompts ): sentry_init( integrations=[OpenAIIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("event") client = AsyncOpenAI(api_key="z") @@ -1846,16 +1829,16 @@ async def test_embeddings_create_raises_error_async( with pytest.raises(OpenAIError): await client.embeddings.create(input="hello", model="text-embedding-3-large") - (event,) = events + (event,) = (item.payload for item in items if item.type == "event") assert event["level"] == "error" -def test_span_origin_nonstreaming_chat(sentry_init, capture_events): +def test_span_origin_nonstreaming_chat(sentry_init, capture_items): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("transaction", "span") client = OpenAI(api_key="z") client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION) @@ -1865,19 +1848,20 @@ def test_span_origin_nonstreaming_chat(sentry_init, capture_events): model="some-model", messages=[{"role": "system", "content": "hello"}] ) - (event,) = events - + (event,) = (item.payload for item in items if item.type == "transaction") assert event["contexts"]["trace"]["origin"] == "manual" - assert event["spans"][0]["origin"] == "auto.ai.openai" + + spans = [item.payload for item in items if item.type == "span"] + assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai" @pytest.mark.asyncio -async def test_span_origin_nonstreaming_chat_async(sentry_init, capture_events): +async def test_span_origin_nonstreaming_chat_async(sentry_init, capture_items): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("transaction", "span") client = AsyncOpenAI(api_key="z") client.chat.completions._post = AsyncMock(return_value=EXAMPLE_CHAT_COMPLETION) @@ -1887,18 +1871,19 @@ async def test_span_origin_nonstreaming_chat_async(sentry_init, capture_events): model="some-model", messages=[{"role": "system", "content": "hello"}] ) - (event,) = events - + (event,) = (item.payload for item in items if item.type == "transaction") assert event["contexts"]["trace"]["origin"] == "manual" - assert event["spans"][0]["origin"] == "auto.ai.openai" + + spans = [item.payload for item in items if item.type == "span"] + assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai" -def test_span_origin_streaming_chat(sentry_init, capture_events): +def test_span_origin_streaming_chat(sentry_init, capture_items): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("transaction", "span") client = OpenAI(api_key="z") returned_stream = Stream(cast_to=None, response=None, client=client) @@ -1946,21 +1931,22 @@ def test_span_origin_streaming_chat(sentry_init, capture_events): "".join(map(lambda x: x.choices[0].delta.content, response_stream)) - (event,) = events + (transaction,) = (item.payload for item in items if item.type == "transaction") + assert transaction["contexts"]["trace"]["origin"] == "manual" - assert event["contexts"]["trace"]["origin"] == "manual" - assert event["spans"][0]["origin"] == "auto.ai.openai" + spans = [item.payload for item in items if item.type == "span"] + assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai" @pytest.mark.asyncio async def test_span_origin_streaming_chat_async( - sentry_init, capture_events, async_iterator + sentry_init, capture_items, async_iterator ): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("transaction", "span") client = AsyncOpenAI(api_key="z") returned_stream = AsyncStream(cast_to=None, response=None, client=client) @@ -2014,18 +2000,19 @@ async def test_span_origin_streaming_chat_async( # "".join(map(lambda x: x.choices[0].delta.content, response_stream)) - (event,) = events - + (event,) = (item.payload for item in items if item.type == "transaction") assert event["contexts"]["trace"]["origin"] == "manual" - assert event["spans"][0]["origin"] == "auto.ai.openai" + spans = [item.payload for item in items if item.type == "span"] + assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai" -def test_span_origin_embeddings(sentry_init, capture_events): + +def test_span_origin_embeddings(sentry_init, capture_items): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("transaction", "span") client = OpenAI(api_key="z") @@ -2043,19 +2030,20 @@ def test_span_origin_embeddings(sentry_init, capture_events): with start_transaction(name="openai tx"): client.embeddings.create(input="hello", model="text-embedding-3-large") - (event,) = events - + (event,) = [item.payload for item in items if item.type == "transaction"] assert event["contexts"]["trace"]["origin"] == "manual" - assert event["spans"][0]["origin"] == "auto.ai.openai" + + spans = [item.payload for item in items if item.type == "span"] + assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai" @pytest.mark.asyncio -async def test_span_origin_embeddings_async(sentry_init, capture_events): +async def test_span_origin_embeddings_async(sentry_init, capture_items): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("transaction", "span") client = AsyncOpenAI(api_key="z") @@ -2073,10 +2061,11 @@ async def test_span_origin_embeddings_async(sentry_init, capture_events): with start_transaction(name="openai tx"): await client.embeddings.create(input="hello", model="text-embedding-3-large") - (event,) = events - + (event,) = [item.payload for item in items if item.type == "transaction"] assert event["contexts"]["trace"]["origin"] == "manual" - assert event["spans"][0]["origin"] == "auto.ai.openai" + + spans = [item.payload for item in items if item.type == "span"] + assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai" def test_completions_token_usage_from_response(): @@ -2442,12 +2431,12 @@ def count_tokens(msg): @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") -def test_ai_client_span_responses_api_no_pii(sentry_init, capture_events): +def test_ai_client_span_responses_api_no_pii(sentry_init, capture_items): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("span") client = OpenAI(api_key="z") client.responses._post = mock.Mock(return_value=EXAMPLE_RESPONSE) @@ -2462,13 +2451,10 @@ def test_ai_client_span_responses_api_no_pii(sentry_init, capture_events): top_p=0.9, ) - (transaction,) = events - spans = transaction["spans"] + spans = [item.payload for item in items if item.type == "span"] assert len(spans) == 1 - assert spans[0]["op"] == "gen_ai.responses" - assert spans[0]["origin"] == "auto.ai.openai" - assert spans[0]["data"] == { + assert spans[0]["attributes"] == { "gen_ai.operation.name": "responses", "gen_ai.request.max_tokens": 100, "gen_ai.request.temperature": 0.7, @@ -2482,13 +2468,21 @@ def test_ai_client_span_responses_api_no_pii(sentry_init, capture_events): "gen_ai.usage.output_tokens": 10, "gen_ai.usage.output_tokens.reasoning": 8, "gen_ai.usage.total_tokens": 30, + "sentry.environment": "production", + "sentry.op": "gen_ai.responses", + "sentry.origin": "auto.ai.openai", + "sentry.release": mock.ANY, + "sentry.sdk.name": "sentry.python", + "sentry.sdk.version": mock.ANY, + "sentry.segment.id": mock.ANY, + "sentry.segment.name": "openai tx", "thread.id": mock.ANY, "thread.name": mock.ANY, } - assert "gen_ai.system_instructions" not in spans[0]["data"] - assert "gen_ai.request.messages" not in spans[0]["data"] - assert "gen_ai.response.text" not in spans[0]["data"] + assert "gen_ai.system_instructions" not in spans[0]["attributes"] + assert "gen_ai.request.messages" not in spans[0]["attributes"] + assert "gen_ai.response.text" not in spans[0]["attributes"] @pytest.mark.parametrize( @@ -2557,14 +2551,14 @@ def test_ai_client_span_responses_api_no_pii(sentry_init, capture_events): ) @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") def test_ai_client_span_responses_api( - sentry_init, capture_events, instructions, input, request + sentry_init, capture_items, instructions, input, request ): sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") client = OpenAI(api_key="z") client.responses._post = mock.Mock(return_value=EXAMPLE_RESPONSE) @@ -2579,12 +2573,9 @@ def test_ai_client_span_responses_api( top_p=0.9, ) - (transaction,) = events - spans = transaction["spans"] + spans = [item.payload for item in items if item.type == "span"] assert len(spans) == 1 - assert spans[0]["op"] == "gen_ai.responses" - assert spans[0]["origin"] == "auto.ai.openai" expected_data = { "gen_ai.operation.name": "responses", @@ -2601,6 +2592,14 @@ def test_ai_client_span_responses_api( "gen_ai.usage.total_tokens": 30, "gen_ai.request.model": "gpt-4o", "gen_ai.response.text": "the model response", + "sentry.environment": "production", + "sentry.op": "gen_ai.responses", + "sentry.origin": "auto.ai.openai", + "sentry.release": mock.ANY, + "sentry.sdk.name": "sentry.python", + "sentry.sdk.version": mock.ANY, + "sentry.segment.id": mock.ANY, + "sentry.segment.name": "openai tx", "thread.id": mock.ANY, "thread.name": mock.ANY, } @@ -2759,17 +2758,17 @@ def test_ai_client_span_responses_api( } ) - assert spans[0]["data"] == expected_data + assert spans[0]["attributes"] == expected_data @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") -def test_error_in_responses_api(sentry_init, capture_events): +def test_error_in_responses_api(sentry_init, capture_items): sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("event", "transaction", "span") client = OpenAI(api_key="z") client.responses._post = mock.Mock( @@ -2784,15 +2783,17 @@ def test_error_in_responses_api(sentry_init, capture_events): input="How do I check if a Python object is an instance of a class?", ) - (error_event, transaction_event) = events - - assert transaction_event["type"] == "transaction" # make sure the span where the error occurred is captured - assert transaction_event["spans"][0]["op"] == "gen_ai.responses" + spans = [item.payload for item in items if item.type == "span"] + assert spans[0]["attributes"]["sentry.op"] == "gen_ai.responses" + (error_event,) = (item.payload for item in items if item.type == "event") assert error_event["level"] == "error" assert error_event["exception"]["values"][0]["type"] == "OpenAIError" + (transaction_event,) = ( + item.payload for item in items if item.type == "transaction" + ) assert ( error_event["contexts"]["trace"]["trace_id"] == transaction_event["contexts"]["trace"]["trace_id"] @@ -2866,14 +2867,14 @@ def test_error_in_responses_api(sentry_init, capture_events): ], ) async def test_ai_client_span_responses_async_api( - sentry_init, capture_events, instructions, input, request + sentry_init, capture_items, instructions, input, request ): sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") client = AsyncOpenAI(api_key="z") client.responses._post = AsyncMock(return_value=EXAMPLE_RESPONSE) @@ -2888,12 +2889,9 @@ async def test_ai_client_span_responses_async_api( top_p=0.9, ) - (transaction,) = events - spans = transaction["spans"] + spans = [item.payload for item in items if item.type == "span"] assert len(spans) == 1 - assert spans[0]["op"] == "gen_ai.responses" - assert spans[0]["origin"] == "auto.ai.openai" expected_data = { "gen_ai.operation.name": "responses", @@ -2911,6 +2909,14 @@ async def test_ai_client_span_responses_async_api( "gen_ai.usage.output_tokens.reasoning": 8, "gen_ai.usage.total_tokens": 30, "gen_ai.response.text": "the model response", + "sentry.environment": "production", + "sentry.op": "gen_ai.responses", + "sentry.origin": "auto.ai.openai", + "sentry.release": mock.ANY, + "sentry.sdk.name": "sentry.python", + "sentry.sdk.version": mock.ANY, + "sentry.segment.id": mock.ANY, + "sentry.segment.name": "openai tx", "thread.id": mock.ANY, "thread.name": mock.ANY, } @@ -3069,7 +3075,7 @@ async def test_ai_client_span_responses_async_api( } ) - assert spans[0]["data"] == expected_data + assert spans[0]["attributes"] == expected_data @pytest.mark.asyncio @@ -3140,7 +3146,7 @@ async def test_ai_client_span_responses_async_api( @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") async def test_ai_client_span_streaming_responses_async_api( sentry_init, - capture_events, + capture_items, instructions, input, request, @@ -3153,7 +3159,7 @@ async def test_ai_client_span_streaming_responses_async_api( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") client = AsyncOpenAI(api_key="z") returned_stream = get_model_response( @@ -3178,11 +3184,12 @@ async def test_ai_client_span_streaming_responses_async_api( async for _ in result: pass - (transaction,) = events - spans = [span for span in transaction["spans"] if span["op"] == OP.GEN_AI_RESPONSES] + spans = [item.payload for item in items if item.type == "span"] + spans = [ + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_RESPONSES + ] assert len(spans) == 1 - assert spans[0]["origin"] == "auto.ai.openai" expected_data = { "gen_ai.operation.name": "responses", @@ -3200,6 +3207,14 @@ async def test_ai_client_span_streaming_responses_async_api( "gen_ai.usage.total_tokens": 30, "gen_ai.request.model": "gpt-4o", "gen_ai.response.text": "hello world", + "sentry.environment": "production", + "sentry.op": "gen_ai.responses", + "sentry.origin": "auto.ai.openai", + "sentry.release": mock.ANY, + "sentry.sdk.name": "sentry.python", + "sentry.sdk.version": mock.ANY, + "sentry.segment.id": mock.ANY, + "sentry.segment.name": "openai tx", "thread.id": mock.ANY, "thread.name": mock.ANY, } @@ -3358,18 +3373,18 @@ async def test_ai_client_span_streaming_responses_async_api( } ) - assert spans[0]["data"] == expected_data + assert spans[0]["attributes"] == expected_data @pytest.mark.asyncio @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") -async def test_error_in_responses_async_api(sentry_init, capture_events): +async def test_error_in_responses_async_api(sentry_init, capture_items): sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("event", "transaction", "span") client = AsyncOpenAI(api_key="z") client.responses._post = AsyncMock( @@ -3384,15 +3399,17 @@ async def test_error_in_responses_async_api(sentry_init, capture_events): input="How do I check if a Python object is an instance of a class?", ) - (error_event, transaction_event) = events - - assert transaction_event["type"] == "transaction" # make sure the span where the error occurred is captured - assert transaction_event["spans"][0]["op"] == "gen_ai.responses" + spans = [item.payload for item in items if item.type == "span"] + assert spans[0]["attributes"]["sentry.op"] == "gen_ai.responses" + (error_event,) = (item.payload for item in items if item.type == "event") assert error_event["level"] == "error" assert error_event["exception"]["values"][0]["type"] == "OpenAIError" + (transaction_event,) = ( + item.payload for item in items if item.type == "transaction" + ) assert ( error_event["contexts"]["trace"]["trace_id"] == transaction_event["contexts"]["trace"]["trace_id"] @@ -3479,7 +3496,7 @@ async def test_error_in_responses_async_api(sentry_init, capture_events): @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") def test_streaming_responses_api( sentry_init, - capture_events, + capture_items, send_default_pii, include_prompts, get_model_response, @@ -3494,7 +3511,7 @@ def test_streaming_responses_api( traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("span") client = OpenAI(api_key="z") returned_stream = get_model_response( @@ -3525,26 +3542,25 @@ def test_streaming_responses_api( assert response_string == "hello world" - (transaction,) = events - (span,) = transaction["spans"] - assert span["op"] == "gen_ai.responses" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 - assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 - assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 + (span,) = (item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.responses" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 - assert span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "response-model-id" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "response-model-id" if send_default_pii and include_prompts: - assert span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '["hello"]' - assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "hello world" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '["hello"]' + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "hello world" else: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] - assert span["data"]["gen_ai.usage.input_tokens"] == 20 - assert span["data"]["gen_ai.usage.output_tokens"] == 10 - assert span["data"]["gen_ai.usage.total_tokens"] == 30 + assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 + assert span["attributes"]["gen_ai.usage.output_tokens"] == 10 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 30 @pytest.mark.asyncio @@ -3555,7 +3571,7 @@ def test_streaming_responses_api( @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") async def test_streaming_responses_api_async( sentry_init, - capture_events, + capture_items, send_default_pii, include_prompts, get_model_response, @@ -3571,7 +3587,7 @@ async def test_streaming_responses_api_async( traces_sample_rate=1.0, send_default_pii=send_default_pii, ) - events = capture_events() + items = capture_items("span") client = AsyncOpenAI(api_key="z") returned_stream = get_model_response( @@ -3600,26 +3616,25 @@ async def test_streaming_responses_api_async( assert response_string == "hello world" - (transaction,) = events - (span,) = transaction["spans"] - assert span["op"] == "gen_ai.responses" - assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai" - assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 - assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 - assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 + (span,) = (item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.responses" + assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 - assert span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "response-model-id" + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "response-model-id" if send_default_pii and include_prompts: - assert span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '["hello"]' - assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "hello world" + assert span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] == '["hello"]' + assert span["attributes"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "hello world" else: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["attributes"] - assert span["data"]["gen_ai.usage.input_tokens"] == 20 - assert span["data"]["gen_ai.usage.output_tokens"] == 10 - assert span["data"]["gen_ai.usage.total_tokens"] == 30 + assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 + assert span["attributes"]["gen_ai.usage.output_tokens"] == 10 + assert span["attributes"]["gen_ai.usage.total_tokens"] == 30 @pytest.mark.skipif( @@ -3630,12 +3645,12 @@ async def test_streaming_responses_api_async( "tools", [[], None, NOT_GIVEN, omit], ) -def test_empty_tools_in_chat_completion(sentry_init, capture_events, tools): +def test_empty_tools_in_chat_completion(sentry_init, capture_items, tools): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("span") client = OpenAI(api_key="z") client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION) @@ -3647,10 +3662,9 @@ def test_empty_tools_in_chat_completion(sentry_init, capture_events, tools): tools=tools, ) - (event,) = events - span = event["spans"][0] + span = next(item.payload for item in items if item.type == "span") - assert "gen_ai.request.available_tools" not in span["data"] + assert "gen_ai.request.available_tools" not in span["attributes"] # Test messages with mixed roles including "ai" that should be mapped to "assistant" @@ -3669,7 +3683,7 @@ def test_empty_tools_in_chat_completion(sentry_init, capture_events, tools): ], ) def test_openai_message_role_mapping( - sentry_init, capture_events, test_message, expected_role + sentry_init, capture_items, test_message, expected_role ): """Test that OpenAI integration properly maps message roles like 'ai' to 'assistant'""" @@ -3678,7 +3692,7 @@ def test_openai_message_role_mapping( traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("span") client = OpenAI(api_key="z") client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION) @@ -3688,28 +3702,27 @@ def test_openai_message_role_mapping( with start_transaction(name="openai tx"): client.chat.completions.create(model="test-model", messages=test_messages) # Verify that the span was created correctly - (event,) = events - span = event["spans"][0] - assert span["op"] == "gen_ai.chat" - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] # Parse the stored messages import json - stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) assert len(stored_messages) == 1 assert stored_messages[0]["role"] == expected_role -def test_openai_message_truncation(sentry_init, capture_events): +def test_openai_message_truncation(sentry_init, capture_items): """Test that large messages are truncated properly in OpenAI integration.""" sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, send_default_pii=True, ) - events = capture_events() + items = capture_items("transaction", "span") client = OpenAI(api_key="z") client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION) @@ -3730,17 +3743,17 @@ def test_openai_message_truncation(sentry_init, capture_events): messages=large_messages, ) - (event,) = events - span = event["spans"][0] - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] + span = next(item.payload for item in items if item.type == "span") + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] - messages_data = span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + messages_data = span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] assert isinstance(messages_data, str) parsed_messages = json.loads(messages_data) assert isinstance(parsed_messages, list) assert len(parsed_messages) <= len(large_messages) + (event,) = (item.payload for item in items if item.type == "transaction") meta_path = event["_meta"] span_meta = meta_path["spans"]["0"]["data"] messages_meta = span_meta[SPANDATA.GEN_AI_REQUEST_MESSAGES] @@ -3749,7 +3762,7 @@ def test_openai_message_truncation(sentry_init, capture_events): # noinspection PyTypeChecker def test_streaming_chat_completion_ttft( - sentry_init, capture_events, get_model_response, server_side_event_chunks + sentry_init, capture_items, get_model_response, server_side_event_chunks ): """ Test that streaming chat completions capture time-to-first-token (TTFT). @@ -3758,7 +3771,7 @@ def test_streaming_chat_completion_ttft( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("span") client = OpenAI(api_key="z") returned_stream = get_model_response( @@ -3810,13 +3823,12 @@ def test_streaming_chat_completion_ttft( for _ in response_stream: pass - (tx,) = events - span = tx["spans"][0] - assert span["op"] == "gen_ai.chat" + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.chat" # Verify TTFT is captured - assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["data"] - ttft = span["data"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN] + assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["attributes"] + ttft = span["attributes"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN] assert isinstance(ttft, float) assert ttft > 0 @@ -3825,7 +3837,7 @@ def test_streaming_chat_completion_ttft( @pytest.mark.asyncio async def test_streaming_chat_completion_ttft_async( sentry_init, - capture_events, + capture_items, get_model_response, async_iterator, server_side_event_chunks, @@ -3837,7 +3849,7 @@ async def test_streaming_chat_completion_ttft_async( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("span") client = AsyncOpenAI(api_key="z") returned_stream = get_model_response( @@ -3891,13 +3903,12 @@ async def test_streaming_chat_completion_ttft_async( async for _ in response_stream: pass - (tx,) = events - span = tx["spans"][0] - assert span["op"] == "gen_ai.chat" + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.chat" # Verify TTFT is captured - assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["data"] - ttft = span["data"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN] + assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["attributes"] + ttft = span["attributes"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN] assert isinstance(ttft, float) assert ttft > 0 @@ -3905,7 +3916,7 @@ async def test_streaming_chat_completion_ttft_async( # noinspection PyTypeChecker @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") def test_streaming_responses_api_ttft( - sentry_init, capture_events, get_model_response, server_side_event_chunks + sentry_init, capture_items, get_model_response, server_side_event_chunks ): """ Test that streaming responses API captures time-to-first-token (TTFT). @@ -3914,7 +3925,7 @@ def test_streaming_responses_api_ttft( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("span") client = OpenAI(api_key="z") returned_stream = get_model_response( @@ -3936,13 +3947,12 @@ def test_streaming_responses_api_ttft( for _ in response_stream: pass - (tx,) = events - span = tx["spans"][0] - assert span["op"] == "gen_ai.responses" + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.responses" # Verify TTFT is captured - assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["data"] - ttft = span["data"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN] + assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["attributes"] + ttft = span["attributes"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN] assert isinstance(ttft, float) assert ttft > 0 @@ -3952,7 +3962,7 @@ def test_streaming_responses_api_ttft( @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") async def test_streaming_responses_api_ttft_async( sentry_init, - capture_events, + capture_items, get_model_response, async_iterator, server_side_event_chunks, @@ -3964,7 +3974,7 @@ async def test_streaming_responses_api_ttft_async( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, ) - events = capture_events() + items = capture_items("span") client = AsyncOpenAI(api_key="z") returned_stream = get_model_response( @@ -3986,12 +3996,11 @@ async def test_streaming_responses_api_ttft_async( async for _ in response_stream: pass - (tx,) = events - span = tx["spans"][0] - assert span["op"] == "gen_ai.responses" + span = next(item.payload for item in items if item.type == "span") + assert span["attributes"]["sentry.op"] == "gen_ai.responses" # Verify TTFT is captured - assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["data"] - ttft = span["data"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN] + assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["attributes"] + ttft = span["attributes"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN] assert isinstance(ttft, float) assert ttft > 0