Skip to content
Draft
224 changes: 221 additions & 3 deletions sentry_sdk/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@
import uuid
import random
import socket
from collections.abc import Mapping
from collections.abc import Mapping, Iterable
from datetime import datetime, timezone
from importlib import import_module
from typing import TYPE_CHECKING, List, Dict, cast, overload
import warnings
import json

from sentry_sdk._compat import check_uwsgi_thread_support
from sentry_sdk._metrics_batcher import MetricsBatcher
Expand All @@ -30,6 +31,7 @@
)
from sentry_sdk.serializer import serialize
from sentry_sdk.tracing import trace
from sentry_sdk.traces import SpanStatus
from sentry_sdk.tracing_utils import has_span_streaming_enabled
from sentry_sdk.transport import (
HttpTransportCore,
Expand All @@ -38,6 +40,7 @@
)
from sentry_sdk.consts import (
SPANDATA,
SPANSTATUS,
DEFAULT_MAX_VALUE_LENGTH,
DEFAULT_OPTIONS,
INSTRUMENTER,
Expand All @@ -56,6 +59,8 @@
)
from sentry_sdk.scrubber import EventScrubber
from sentry_sdk.monitor import Monitor
from sentry_sdk.envelope import Item, PayloadRef
from sentry_sdk.utils import datetime_from_isoformat

if TYPE_CHECKING:
from typing import Any
Expand All @@ -66,7 +71,15 @@
from typing import Union
from typing import TypeVar

from sentry_sdk._types import Event, Hint, SDKInfo, Log, Metric, EventDataCategory
from sentry_sdk._types import (
Event,
Hint,
SDKInfo,
Log,
Metric,
EventDataCategory,
SerializedAttributeValue,
)
from sentry_sdk.integrations import Integration
from sentry_sdk.scope import Scope
from sentry_sdk.session import Session
Expand All @@ -89,6 +102,181 @@
}


def _serialized_v1_attribute_to_serialized_v2_attribute(
attribute_value: "Any",
) -> "Optional[SerializedAttributeValue]":
if isinstance(attribute_value, bool):
return {
"value": attribute_value,
"type": "boolean",
}

if isinstance(attribute_value, int):
return {
"value": attribute_value,
"type": "integer",
}

if isinstance(attribute_value, float):
return {
"value": attribute_value,
"type": "double",
}

if isinstance(attribute_value, str):
return {
"value": attribute_value,
"type": "string",
}

if isinstance(attribute_value, list):
if not attribute_value:
return {"value": [], "type": "array"}

ty = type(attribute_value[0])
if ty in (int, str, bool, float) and all(
type(v) is ty for v in attribute_value
):
return {
"value": attribute_value,
"type": "array",
}

# Types returned when the serializer for V1 span attributes recurses into some container types.
if isinstance(attribute_value, (dict, list)):
return {
"value": json.dumps(attribute_value),
"type": "string",
}

return None


def _serialized_v1_span_to_serialized_v2_span(
span: "dict[str, Any]", event: "Event"
) -> "dict[str, Any]":

Check failure on line 157 in sentry_sdk/client.py

View check run for this annotation

@sentry/warden / warden: find-bugs

GenAI span serialization uses raw event instead of prepared event, causing missing user/release/environment data

On line 1124, `_serialized_v1_span_to_serialized_v2_span(span, event)` passes the raw `event` parameter instead of `event_opt` (the prepared event). The function expects the enriched event which contains user info, release, environment, SDK metadata, and trace context that are populated by `_prepare_event()` → `scope.apply_to_event()`. This causes all V2 span attributes like `user.id`, `user.name`, `user.email`, `sentry.release`, `sentry.environment`, `sentry.segment.name`, `sentry.segment.id`, `sentry.sdk.name`, and `sentry.sdk.version` to be missing or incorrect in the serialized GenAI spans.
# See SpanBatcher._to_transport_format() for analogous population of all entries except "attributes".
res: "dict[str, Any]" = {
"status": SpanStatus.OK.value,
"is_segment": False,
}

if "trace_id" in span:
res["trace_id"] = span["trace_id"]

if "span_id" in span:
res["span_id"] = span["span_id"]

if "description" in span:
res["name"] = span["description"]

if "start_timestamp" in span:
start_timestamp = None
try:
start_timestamp = datetime_from_isoformat(span["start_timestamp"])
except Exception:
pass

if start_timestamp is not None:
res["start_timestamp"] = start_timestamp.timestamp()

if "timestamp" in span:
end_timestamp = None
try:
end_timestamp = datetime_from_isoformat(span["timestamp"])
except Exception:
pass

if end_timestamp is not None:
res["end_timestamp"] = end_timestamp.timestamp()

if "parent_span_id" in span:
res["parent_span_id"] = span["parent_span_id"]

if "status" in span and span["status"] != SPANSTATUS.OK:
res["status"] = "error"

attributes: "Dict[str, Any]" = {}

if "op" in span:
attributes["sentry.op"] = span["op"]
if "origin" in span:
attributes["sentry.origin"] = span["origin"]

span_data = span.get("data")
if isinstance(span_data, dict):
attributes.update(span_data)

span_tags = span.get("tags")
if isinstance(span_tags, dict):
attributes.update(span_tags)

# See Scope._apply_user_attributes_to_telemetry() for user attributes.
user = event.get("user")
if isinstance(user, dict):
if "id" in user:
attributes["user.id"] = user["id"]
if "username" in user:
attributes["user.name"] = user["username"]
if "email" in user:
attributes["user.email"] = user["email"]

# See Scope.set_global_attributes() for release, environment, and SDK metadata.
if "release" in event:
attributes["sentry.release"] = event["release"]
if "environment" in event:
attributes["sentry.environment"] = event["environment"]
if "transaction" in event:
attributes["sentry.segment.name"] = event["transaction"]

trace_context = event.get("contexts", {}).get("trace", {})
if "span_id" in trace_context:
attributes["sentry.segment.id"] = trace_context["span_id"]

sdk_info = event.get("sdk")
if isinstance(sdk_info, dict):
if "name" in sdk_info:
attributes["sentry.sdk.name"] = sdk_info["name"]
if "version" in sdk_info:
attributes["sentry.sdk.version"] = sdk_info["version"]

if not attributes:
return res

res["attributes"] = {}
for key, value in attributes.items():
res["attributes"][key] = _serialized_v1_attribute_to_serialized_v2_attribute(
value
)

Check warning on line 250 in sentry_sdk/client.py

View check run for this annotation

@sentry/warden / warden: code-review

None values stored in attributes dictionary when unsupported types are encountered

The `_serialized_v1_attribute_to_serialized_v2_attribute` function returns `None` for unsupported attribute types (line 152), but the calling code in `_serialized_v1_span_to_serialized_v2_span` (lines 247-250) does not filter these `None` values before storing them in `res["attributes"]`. This results in `None` values being included in the serialized V2 span's attributes dictionary, which may cause issues when the Sentry backend processes these spans. The existing `serialize_attribute` utility function in `utils.py` handles this by falling back to a string representation for unknown types.
Comment thread
sentry-warden[bot] marked this conversation as resolved.

return res


def _split_gen_ai_spans(
event_opt: "Event",
) -> "Optional[tuple[List[Dict[str, object]], List[Dict[str, object]]]]":
if "spans" not in event_opt:
return None

spans: "Any" = event_opt["spans"]
if isinstance(spans, AnnotatedValue):
spans = spans.value

if not isinstance(spans, Iterable):
return None

non_gen_ai_spans = []
gen_ai_spans = []
for span in spans:
span_op = span.get("op")
Comment thread
sentry-warden[bot] marked this conversation as resolved.
if isinstance(span_op, str) and span_op.startswith("gen_ai."):
gen_ai_spans.append(span)
else:
non_gen_ai_spans.append(span)

return non_gen_ai_spans, gen_ai_spans


def _get_options(*args: "Optional[str]", **kwargs: "Any") -> "Dict[str, Any]":
if args and (isinstance(args[0], (bytes, str)) or args[0] is None):
dsn: "Optional[str]" = args[0]
Expand Down Expand Up @@ -912,7 +1100,37 @@
if is_transaction:
if isinstance(profile, Profile):
envelope.add_profile(profile.to_json(event_opt, self.options))
envelope.add_transaction(event_opt)

split_spans = _split_gen_ai_spans(event_opt)
if split_spans is None or not split_spans[1]:
envelope.add_transaction(event_opt)
else:
non_gen_ai_spans, gen_ai_spans = split_spans

event_opt["spans"] = non_gen_ai_spans
envelope.add_transaction(event_opt)

envelope.add_item(
Item(
type=SpanBatcher.TYPE,
content_type=SpanBatcher.CONTENT_TYPE,
headers={
"item_count": len(gen_ai_spans),
},
payload=PayloadRef(
json={
"items": [
_serialized_v1_span_to_serialized_v2_span(
span, event

Check failure on line 1124 in sentry_sdk/client.py

View check run for this annotation

@sentry/warden / warden: find-bugs

[DGJ-UTQ] GenAI span serialization uses raw event instead of prepared event, causing missing user/release/environment data (additional location)

On line 1124, `_serialized_v1_span_to_serialized_v2_span(span, event)` passes the raw `event` parameter instead of `event_opt` (the prepared event). The function expects the enriched event which contains user info, release, environment, SDK metadata, and trace context that are populated by `_prepare_event()` → `scope.apply_to_event()`. This causes all V2 span attributes like `user.id`, `user.name`, `user.email`, `sentry.release`, `sentry.environment`, `sentry.segment.name`, `sentry.segment.id`, `sentry.sdk.name`, and `sentry.sdk.version` to be missing or incorrect in the serialized GenAI spans.
)

Check warning on line 1125 in sentry_sdk/client.py

View check run for this annotation

@sentry/warden / warden: code-review

GenAI span conversion uses unprocessed event instead of event_opt

The `_serialized_v1_span_to_serialized_v2_span` function is called with `event` (the original unprocessed event) instead of `event_opt` (the event after `_prepare_event` processing). The function extracts user attributes, release, environment, SDK info, and trace context from the event. Since scope processing happens in `_prepare_event` and updates these fields, using the original `event` may result in missing or incomplete attributes in the serialized V2 spans.
for span in gen_ai_spans
if isinstance(span, dict)
]
Comment thread
sentry-warden[bot] marked this conversation as resolved.
},
),
)
)

elif is_checkin:
envelope.add_checkin(event_opt)
else:
Expand Down
Loading
Loading