From 8904588eef93a46d915495a780ebd363411c1ece Mon Sep 17 00:00:00 2001 From: Pavan Raj Date: Thu, 11 Jun 2026 10:48:58 +0530 Subject: [PATCH 1/3] test: Add token span attribute integration tests --- tests/test_token_usage_attributes.py | 784 +++++++++++++++++++++++++++ 1 file changed, 784 insertions(+) create mode 100644 tests/test_token_usage_attributes.py diff --git a/tests/test_token_usage_attributes.py b/tests/test_token_usage_attributes.py new file mode 100644 index 00000000..2d0711ad --- /dev/null +++ b/tests/test_token_usage_attributes.py @@ -0,0 +1,784 @@ +""" +Integration tests for token-to-span attribute assignment. + +Verifies that each LLM provider's usage extraction function correctly maps +response fields to standardised OpenTelemetry span attribute keys, and that +SpanIOProcessor aliasing rewrites input_tokens/output_tokens to the canonical +prompt_tokens/completion_tokens keys before export. + +Flow under test: + Provider utils → SpanIOProcessor-patched span.set_attribute → final attributes +""" + +from unittest.mock import MagicMock, Mock, patch + +import pytest +from opentelemetry.semconv_ai import SpanAttributes + +from netra.instrumentation.cerebras.utils import set_response_attributes as cerebras_set_usage +from netra.instrumentation.dspy.utils import extract_usage_info as dspy_extract_usage +from netra.instrumentation.google_genai.utils import set_response_attributes as google_genai_set_usage +from netra.instrumentation.groq.utils import _set_usage_attributes as groq_set_usage +from netra.instrumentation.litellm.utils import _set_usage_attributes as litellm_set_usage +from netra.instrumentation.openai.utils import _set_usage_attributes as openai_set_usage +from netra.instrumentation.pydantic_ai.utils import set_pydantic_response_attributes as pydantic_ai_set_usage +from netra.processors.span_io_processor import SpanIOProcessor + + +@pytest.fixture +def patched_span(): + """ + A MagicMock span pre-patched by SpanIOProcessor.on_start(). + + Attributes written via set_attribute are stored in span.attributes so + tests can assert on the final resolved keys. + """ + span = MagicMock() + span.attributes = {} + span._is_recording = True + + def set_attr(key, value): + span.attributes[key] = value + + span.set_attribute.side_effect = set_attr + span.is_recording.return_value = True + + mock_context = Mock() + mock_context.is_valid = True + span.get_span_context.return_value = mock_context + + processor = SpanIOProcessor() + processor.on_start(span) + return span + + +class TestTokenUsageAttributes: + """Integration tests for token-to-span attribute assignment.""" + + # -- SpanIOProcessor aliasing -- + + def test_span_io_processor_aliases_input_tokens_to_prompt_tokens(self, patched_span): + """Test that SpanIOProcessor rewrites input_tokens to prompt_tokens.""" + # Act + patched_span.set_attribute("gen_ai.usage.input_tokens", 100) + patched_span.set_attribute("gen_ai.usage.output_tokens", 50) + + # Assert — canonical keys present, raw alias keys absent + assert patched_span.attributes["gen_ai.usage.prompt_tokens"] == 100 + assert patched_span.attributes["gen_ai.usage.completion_tokens"] == 50 + assert "gen_ai.usage.input_tokens" not in patched_span.attributes + assert "gen_ai.usage.output_tokens" not in patched_span.attributes + + # -- OpenAI -- + + def test_openai_token_usage(self, patched_span): + """Test OpenAI token usage extraction and mapping.""" + # Arrange + usage = { + "prompt_tokens": 10, + "completion_tokens": 20, + "total_tokens": 30, + "prompt_tokens_details": {"cached_tokens": 5}, + "completion_tokens_details": {"reasoning_tokens": 7}, + } + + # Act + openai_set_usage(patched_span, usage) + + # Assert + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}"] == 10 + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}"] == 20 + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_TOTAL_TOKENS}"] == 30 + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS}"] == 5 + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_REASONING_TOKENS}"] == 7 + + def test_openai_token_usage_alternative_keys(self, patched_span): + """Test OpenAI token usage with alternative keys (input/output).""" + # Arrange + usage = { + "input_tokens": 15, + "output_tokens": 25, + "input_tokens_details": {"cached_tokens": 3}, + "output_tokens_details": {"reasoning_tokens": 4}, + } + + # Act + openai_set_usage(patched_span, usage) + + # Assert + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}"] == 15 + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}"] == 25 + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS}"] == 3 + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_REASONING_TOKENS}"] == 4 + + def test_openai_missing_usage_fields_writes_nothing(self, patched_span): + """Test OpenAI with empty usage dict does not write any token attributes.""" + # Arrange + usage = {} + + # Act + openai_set_usage(patched_span, usage) + + # Assert + assert f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}" not in patched_span.attributes + assert f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}" not in patched_span.attributes + assert f"{SpanAttributes.LLM_USAGE_TOTAL_TOKENS}" not in patched_span.attributes + + def test_openai_zero_token_values_not_written(self, patched_span): + """Test OpenAI skips zero token values due to falsy `or` guard in implementation.""" + # Arrange — prompt_tokens=0 is falsy so the `or` falls through to input_tokens + # which is also absent, resulting in None; attribute is not written + usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0} + + # Act + openai_set_usage(patched_span, usage) + + # Assert + assert f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}" not in patched_span.attributes + assert f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}" not in patched_span.attributes + + def test_openai_partial_usage_only_present_fields_written(self, patched_span): + """Test OpenAI with only completion_tokens present writes only that attribute.""" + # Arrange + usage = {"completion_tokens": 42} + + # Act + openai_set_usage(patched_span, usage) + + # Assert + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}"] == 42 + assert f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}" not in patched_span.attributes + + # -- Groq -- + + def test_groq_token_usage(self, patched_span): + """Test Groq token usage extraction.""" + # Arrange + usage = { + "prompt_tokens": 12, + "completion_tokens": 22, + "total_tokens": 34, + "prompt_tokens_details": {"cached_tokens": 6}, + } + + # Act + groq_set_usage(patched_span, usage) + + # Assert + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}"] == 12 + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}"] == 22 + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_TOTAL_TOKENS}"] == 34 + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS}"] == 6 + + def test_groq_zero_token_values_not_written(self, patched_span): + """Test Groq skips zero token values despite `is not None` guard. + + Although the guard is `is not None`, token extraction uses + `usage.get("prompt_tokens") or usage.get("input_tokens")` — the `or` + treats 0 as falsy, so the extracted value is None before the guard runs. + """ + # Arrange + usage = {"prompt_tokens": 0, "completion_tokens": 0} + + # Act + groq_set_usage(patched_span, usage) + + # Assert + assert f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}" not in patched_span.attributes + assert f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}" not in patched_span.attributes + + def test_groq_missing_usage_fields_writes_nothing(self, patched_span): + """Test Groq with empty usage dict does not write any token attributes.""" + # Arrange + usage = {} + + # Act + groq_set_usage(patched_span, usage) + + # Assert + assert f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}" not in patched_span.attributes + assert f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}" not in patched_span.attributes + + # -- Cerebras -- + + def test_cerebras_token_usage_dict(self, patched_span): + """Test Cerebras token usage with dictionary-based prompt_tokens_details.""" + # Arrange + response = Mock() + + # Act + with patch("netra.instrumentation.cerebras.utils.model_as_dict") as mock_model_as_dict: + mock_model_as_dict.return_value = { + "usage": { + "prompt_tokens": 40, + "completion_tokens": 60, + "total_tokens": 100, + "prompt_tokens_details": {"cached_tokens": 10}, + } + } + cerebras_set_usage(patched_span, response) + + # Assert + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}"] == 40 + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}"] == 60 + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_TOTAL_TOKENS}"] == 100 + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS}"] == 10 + + def test_cerebras_token_usage_object(self, patched_span): + """Test Cerebras token usage with object-based prompt_tokens_details.""" + # Arrange + response = Mock() + details = Mock() + details.cached_tokens = 15 + + # Act + with patch("netra.instrumentation.cerebras.utils.model_as_dict") as mock_model_as_dict: + mock_model_as_dict.return_value = { + "usage": { + "prompt_tokens": 40, + "completion_tokens": 60, + "total_tokens": 100, + "prompt_tokens_details": details, + } + } + cerebras_set_usage(patched_span, response) + + # Assert + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS}"] == 15 + + def test_cerebras_missing_usage_key_writes_nothing(self, patched_span): + """Test Cerebras with no usage key in response dict writes nothing.""" + # Arrange + response = Mock() + + # Act + with patch("netra.instrumentation.cerebras.utils.model_as_dict") as mock_model_as_dict: + mock_model_as_dict.return_value = {} + cerebras_set_usage(patched_span, response) + + # Assert + assert f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}" not in patched_span.attributes + assert f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}" not in patched_span.attributes + + # -- LiteLLM -- + + def test_litellm_token_usage(self, patched_span): + """Test LiteLLM token usage extraction.""" + # Arrange + usage = { + "prompt_tokens": 50, + "completion_tokens": 70, + "total_tokens": 120, + "prompt_tokens_details": {"cached_tokens": 20}, + "completion_tokens_details": {"reasoning_tokens": 10}, + } + + # Act + litellm_set_usage(patched_span, usage) + + # Assert + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}"] == 50 + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}"] == 70 + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_TOTAL_TOKENS}"] == 120 + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS}"] == 20 + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_REASONING_TOKENS}"] == 10 + + def test_litellm_partial_usage_only_present_fields_written(self, patched_span): + """Test LiteLLM with only prompt_tokens present writes only that attribute.""" + # Arrange + usage = {"prompt_tokens": 55} + + # Act + litellm_set_usage(patched_span, usage) + + # Assert + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}"] == 55 + assert f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}" not in patched_span.attributes + + # -- DSPy -- + + def test_dspy_token_usage(self, patched_span): + """Test DSPy token usage generator yields correct key-value pairs.""" + # Arrange + response = {"usage": {"prompt_tokens": 80, "completion_tokens": 90, "total_tokens": 170}} + + # Act + with patch("netra.instrumentation.dspy.utils.convert_to_dict", return_value=response): + for key, value in dspy_extract_usage(response): + patched_span.set_attribute(key, value) + + # Assert + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}"] == 80 + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}"] == 90 + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_TOTAL_TOKENS}"] == 170 + + def test_dspy_missing_usage_yields_nothing(self, patched_span): + """Test DSPy generator yields nothing when usage key is absent.""" + # Arrange + response = {} + + # Act + with patch("netra.instrumentation.dspy.utils.convert_to_dict", return_value=response): + result = dict(dspy_extract_usage(response)) + + # Assert + assert result == {} + assert f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}" not in patched_span.attributes + + def test_dspy_zero_token_values_not_yielded(self, patched_span): + """Test DSPy skips zero token values despite `is not None` guard. + + Although the guard is `is not None`, token extraction uses + `usage.get("prompt_tokens") or usage.get("input_tokens")` — the `or` + treats 0 as falsy, so the extracted value is None before the guard runs. + """ + # Arrange + response = {"usage": {"prompt_tokens": 0, "completion_tokens": 0}} + + # Act + with patch("netra.instrumentation.dspy.utils.convert_to_dict", return_value=response): + for key, value in dspy_extract_usage(response): + patched_span.set_attribute(key, value) + + # Assert + assert f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}" not in patched_span.attributes + assert f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}" not in patched_span.attributes + + # -- Pydantic AI -- + + def test_pydantic_ai_token_usage(self, patched_span): + """Test Pydantic AI maps request_tokens/response_tokens to standard keys.""" + # Arrange + response = Mock() + usage = Mock() + usage.request_tokens = 200 + usage.response_tokens = 150 + usage.total_tokens = 350 + usage.requests = 1 + usage.details = {"some_detail": 5} + response.usage.return_value = usage + + # Act + with patch("netra.instrumentation.pydantic_ai.utils._safe_get_attribute") as mock_get: + + def side_effect(obj, attr): + if obj == usage: + return getattr(usage, attr) if hasattr(usage, attr) else usage.details.get(attr) + return getattr(obj, attr, None) + + mock_get.side_effect = side_effect + pydantic_ai_set_usage(patched_span, response) + + # Assert — _safe_set_attribute stringifies values in Pydantic AI utils + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}"] == "200" + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}"] == "150" + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_TOTAL_TOKENS}"] == "350" + + # -- Google GenAI -- + + def test_google_genai_token_usage(self, patched_span): + """Test Google GenAI sums candidates and thoughts for completion tokens.""" + # Arrange + response = Mock() + usage = Mock() + usage.total_token_count = 500 + usage.candidates_token_count = 100 + usage.thoughts_token_count = 50 + usage.cached_content_token_count = 200 + usage.prompt_token_count = 350 + + # Act + with patch("netra.instrumentation.google_genai.utils._extract_usage_metadata", return_value=usage): + google_genai_set_usage(patched_span, response) + + # Assert — completion = candidates (100) + thoughts (50) = 150 + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_TOTAL_TOKENS}"] == 500 + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}"] == 150 + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS}"] == 200 + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}"] == 350 + + def test_google_genai_only_candidates_no_thoughts(self, patched_span): + """Test Google GenAI completion tokens equals candidates alone when thoughts absent.""" + # Arrange + response = Mock() + usage = Mock() + usage.total_token_count = 200 + usage.candidates_token_count = 80 + usage.thoughts_token_count = None + usage.cached_content_token_count = None + usage.prompt_token_count = 120 + + # Act + with patch("netra.instrumentation.google_genai.utils._extract_usage_metadata", return_value=usage): + google_genai_set_usage(patched_span, response) + + # Assert — completion = 80 + 0 = 80 + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}"] == 80 + + def test_google_genai_no_candidates_no_thoughts_skips_completion(self, patched_span): + """Test Google GenAI skips completion tokens when both candidates and thoughts are absent.""" + # Arrange + response = Mock() + usage = Mock() + usage.total_token_count = None + usage.candidates_token_count = None + usage.thoughts_token_count = None + usage.cached_content_token_count = None + usage.prompt_token_count = None + + # Act + with patch("netra.instrumentation.google_genai.utils._extract_usage_metadata", return_value=usage): + google_genai_set_usage(patched_span, response) + + # Assert — output sum is 0, so completion key not written + assert f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}" not in patched_span.attributes + + # -- OpenAI streaming -- + + def test_openai_streaming_accumulation(self, patched_span): + """Test OpenAI streaming usage is accumulated across chunks and written on finalise.""" + # Arrange + from netra.instrumentation.openai.wrappers import StreamingWrapper + + class DummyStream: + def __iter__(self): + return self + + def __next__(self): + raise StopIteration + + wrapper = StreamingWrapper(span=patched_span, response=DummyStream(), request_kwargs={}) + chunk1 = {"choices": [{"delta": {"content": "Hello"}}]} + chunk2 = { + "choices": [{"delta": {"content": " world"}}], + "usage": {"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15}, + } + + # Act — process chunks then finalise + with ( + patch("netra.instrumentation.openai.wrappers.time.time", return_value=123.456), + patch("netra.instrumentation.openai.wrappers.record_span_timing"), + patch("netra.instrumentation.openai.wrappers.model_as_dict", side_effect=lambda x: x), + ): + wrapper._process_chunk(chunk1) + wrapper._process_chunk(chunk2) + + # Assert — usage captured into _complete_response after chunk processing + assert wrapper._complete_response["usage"]["prompt_tokens"] == 10 + + with patch("netra.instrumentation.openai.wrappers.set_response_attributes") as mock_set_attr: + wrapper._finalize_span() + mock_set_attr.assert_called_once() + call_args = mock_set_attr.call_args[0] + assert call_args[1]["usage"]["prompt_tokens"] == 10 + + # Assert — final span attributes written correctly after set_response_attributes + from netra.instrumentation.openai.utils import set_response_attributes + + set_response_attributes(patched_span, call_args[1]) + + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}"] == 10 + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}"] == 5 + + def test_groq_token_usage_alternative_keys(self, patched_span): + """Test Groq token usage falls back to input_tokens/output_tokens when primary keys absent.""" + # Arrange + usage = { + "input_tokens": 18, + "output_tokens": 9, + "input_tokens_details": {"cached_tokens": 4}, + } + + # Act + groq_set_usage(patched_span, usage) + + # Assert + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}"] == 18 + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}"] == 9 + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS}"] == 4 + + def test_groq_partial_usage_only_present_fields_written(self, patched_span): + """Test Groq with only completion_tokens present writes only that attribute.""" + # Arrange + usage = {"completion_tokens": 33} + + # Act + groq_set_usage(patched_span, usage) + + # Assert + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}"] == 33 + assert f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}" not in patched_span.attributes + + def test_cerebras_zero_token_values_not_written(self, patched_span): + """Test Cerebras skips zero token values due to falsy if guard in implementation.""" + # Arrange + response = Mock() + + # Act + with patch("netra.instrumentation.cerebras.utils.model_as_dict") as mock_model_as_dict: + mock_model_as_dict.return_value = {"usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}} + cerebras_set_usage(patched_span, response) + + # Assert + assert f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}" not in patched_span.attributes + assert f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}" not in patched_span.attributes + + def test_cerebras_partial_usage_only_present_fields_written(self, patched_span): + """Test Cerebras with only prompt_tokens present writes only that attribute.""" + # Arrange + response = Mock() + + # Act + with patch("netra.instrumentation.cerebras.utils.model_as_dict") as mock_model_as_dict: + mock_model_as_dict.return_value = {"usage": {"prompt_tokens": 25}} + cerebras_set_usage(patched_span, response) + + # Assert + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}"] == 25 + assert f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}" not in patched_span.attributes + + def test_cerebras_token_usage_object_no_cached_tokens_attr(self, patched_span): + """Test Cerebras silently skips cached tokens when details object lacks cached_tokens attr. + + Cerebras details handling branches on hasattr then isinstance — if neither + matches (e.g. an object without cached_tokens), the cache key is never written. + """ + # Arrange + response = Mock() + details = Mock(spec=[]) # spec=[] means no attributes defined, hasattr returns False + + # Act + with patch("netra.instrumentation.cerebras.utils.model_as_dict") as mock_model_as_dict: + mock_model_as_dict.return_value = { + "usage": { + "prompt_tokens": 40, + "completion_tokens": 60, + "prompt_tokens_details": details, + } + } + cerebras_set_usage(patched_span, response) + + # Assert — main tokens written, but cache key silently absent + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}"] == 40 + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}"] == 60 + assert f"{SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS}" not in patched_span.attributes + + def test_litellm_token_usage_alternative_keys(self, patched_span): + """Test LiteLLM token usage falls back to input_tokens/output_tokens when primary keys absent.""" + # Arrange + usage = { + "input_tokens": 90, + "output_tokens": 45, + "input_tokens_details": {"cached_tokens": 15}, + "output_tokens_details": {"reasoning_tokens": 8}, + } + + # Act + litellm_set_usage(patched_span, usage) + + # Assert + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}"] == 90 + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}"] == 45 + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS}"] == 15 + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_REASONING_TOKENS}"] == 8 + + def test_litellm_missing_usage_fields_writes_nothing(self, patched_span): + """Test LiteLLM with empty usage dict does not write any token attributes.""" + # Arrange + usage = {} + + # Act + litellm_set_usage(patched_span, usage) + + # Assert + assert f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}" not in patched_span.attributes + assert f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}" not in patched_span.attributes + assert f"{SpanAttributes.LLM_USAGE_TOTAL_TOKENS}" not in patched_span.attributes + + def test_litellm_zero_token_values_not_written(self, patched_span): + """Test LiteLLM skips zero token values due to falsy `or` guard in implementation.""" + # Arrange + usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0} + + # Act + litellm_set_usage(patched_span, usage) + + # Assert + assert f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}" not in patched_span.attributes + assert f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}" not in patched_span.attributes + + def test_dspy_token_usage_alternative_keys(self, patched_span): + """Test DSPy generator falls back to input_tokens/output_tokens when primary keys absent.""" + # Arrange + response = {"usage": {"input_tokens": 55, "output_tokens": 22, "total_tokens": 77}} + + # Act + with patch("netra.instrumentation.dspy.utils.convert_to_dict", return_value=response): + for key, value in dspy_extract_usage(response): + patched_span.set_attribute(key, value) + + # Assert + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}"] == 55 + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}"] == 22 + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_TOTAL_TOKENS}"] == 77 + + def test_dspy_partial_usage_only_present_fields_written(self, patched_span): + """Test DSPy with only total_tokens present yields only that key.""" + # Arrange + response = {"usage": {"total_tokens": 60}} + + # Act + with patch("netra.instrumentation.dspy.utils.convert_to_dict", return_value=response): + for key, value in dspy_extract_usage(response): + patched_span.set_attribute(key, value) + + # Assert + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_TOTAL_TOKENS}"] == 60 + assert f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}" not in patched_span.attributes + assert f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}" not in patched_span.attributes + + def test_pydantic_ai_none_tokens_not_written(self, patched_span): + """Test Pydantic AI writes nothing when all token fields are None.""" + # Arrange + response = Mock() + usage = Mock() + usage.request_tokens = None + usage.response_tokens = None + usage.total_tokens = None + usage.requests = None + usage.details = None + response.usage.return_value = usage + + # Act + with patch("netra.instrumentation.pydantic_ai.utils._safe_get_attribute") as mock_get: + + def side_effect(obj, attr): + return getattr(obj, attr, None) + + mock_get.side_effect = side_effect + pydantic_ai_set_usage(patched_span, response) + + # Assert + assert f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}" not in patched_span.attributes + assert f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}" not in patched_span.attributes + assert f"{SpanAttributes.LLM_USAGE_TOTAL_TOKENS}" not in patched_span.attributes + + def test_pydantic_ai_partial_usage_only_present_fields_written(self, patched_span): + """Test Pydantic AI with only request_tokens present writes only prompt attribute.""" + # Arrange + response = Mock() + usage = Mock() + usage.request_tokens = 100 + usage.response_tokens = None + usage.total_tokens = None + usage.requests = None + usage.details = None + response.usage.return_value = usage + + # Act + with patch("netra.instrumentation.pydantic_ai.utils._safe_get_attribute") as mock_get: + + def side_effect(obj, attr): + return getattr(obj, attr, None) + + mock_get.side_effect = side_effect + pydantic_ai_set_usage(patched_span, response) + + # Assert + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}"] == "100" + assert f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}" not in patched_span.attributes + + def test_pydantic_ai_dual_attribute_write(self, patched_span): + """Test Pydantic AI CallToolsNode writes both gen_ai.usage.* and pydantic_ai.usage.* keys. + + The dual-write lives in _set_call_tools_node_attributes (lines 295-320 of utils.py), + not in set_pydantic_response_attributes. This test calls that path directly via a + mock node whose model_response carries a usage object. + """ + # Arrange + from netra.instrumentation.pydantic_ai.utils import _set_call_tools_node_attributes + + usage = Mock() + usage.request_tokens = 75 + usage.response_tokens = 50 + usage.total_tokens = 125 + usage.requests = 1 + usage.details = None + + model_response = Mock() + model_response.usage = usage + model_response.parts = [] + model_response.model_name = None + model_response.timestamp = None + + node = Mock() + node.model_response = model_response + node.tool_results = None + + # Act + _set_call_tools_node_attributes(patched_span, node) + + # Assert — standard OTel keys + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}"] == "75" + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}"] == "50" + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_TOTAL_TOKENS}"] == "125" + # Assert — pydantic_ai-specific dual-write keys written alongside OTel keys + assert patched_span.attributes["pydantic_ai.usage.request_tokens"] == "75" + assert patched_span.attributes["pydantic_ai.usage.response_tokens"] == "50" + assert patched_span.attributes["pydantic_ai.usage.total_tokens"] == "125" + + def test_google_genai_missing_usage_metadata_writes_nothing(self, patched_span): + """Test Google GenAI writes nothing when _extract_usage_metadata returns None.""" + # Arrange + response = Mock() + + # Act + with patch("netra.instrumentation.google_genai.utils._extract_usage_metadata", return_value=None): + google_genai_set_usage(patched_span, response) + + # Assert + assert f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}" not in patched_span.attributes + assert f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}" not in patched_span.attributes + assert f"{SpanAttributes.LLM_USAGE_TOTAL_TOKENS}" not in patched_span.attributes + + def test_google_genai_zero_completion_tokens_not_written(self, patched_span): + """Test Google GenAI skips completion tokens when candidates and thoughts both equal zero. + + Although 0 is a valid int and passes the isinstance guard, the final + `if output > 0` check prevents writing a zero sum to the span. + """ + # Arrange + response = Mock() + usage = Mock() + usage.total_token_count = 0 + usage.candidates_token_count = 0 + usage.thoughts_token_count = 0 + usage.cached_content_token_count = None + usage.prompt_token_count = 0 + + # Act + with patch("netra.instrumentation.google_genai.utils._extract_usage_metadata", return_value=usage): + google_genai_set_usage(patched_span, response) + + # Assert — output sum is 0 + 0 = 0, `if output > 0` guard skips write + assert f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}" not in patched_span.attributes + + def test_google_genai_cached_tokens_not_written_when_absent(self, patched_span): + """Test Google GenAI skips cache key when cached_content_token_count is None.""" + # Arrange + response = Mock() + usage = Mock() + usage.total_token_count = 100 + usage.candidates_token_count = 60 + usage.thoughts_token_count = None + usage.cached_content_token_count = None + usage.prompt_token_count = 40 + + # Act + with patch("netra.instrumentation.google_genai.utils._extract_usage_metadata", return_value=usage): + google_genai_set_usage(patched_span, response) + + # Assert — main tokens written, cache key absent + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}"] == 40 + assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}"] == 60 + assert f"{SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS}" not in patched_span.attributes From d1939a63f19d05875508d5940128bf8beef61a71 Mon Sep 17 00:00:00 2001 From: Pavan Raj Date: Fri, 12 Jun 2026 09:53:37 +0530 Subject: [PATCH 2/3] refactor: remove unnecessary f-strings, reorder provider blocks and split streaming test --- tests/test_token_usage_attributes.py | 661 ++++++++++++++------------- 1 file changed, 336 insertions(+), 325 deletions(-) diff --git a/tests/test_token_usage_attributes.py b/tests/test_token_usage_attributes.py index 2d0711ad..b8bdbb13 100644 --- a/tests/test_token_usage_attributes.py +++ b/tests/test_token_usage_attributes.py @@ -86,11 +86,11 @@ def test_openai_token_usage(self, patched_span): openai_set_usage(patched_span, usage) # Assert - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}"] == 10 - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}"] == 20 - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_TOTAL_TOKENS}"] == 30 - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS}"] == 5 - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_REASONING_TOKENS}"] == 7 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == 10 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == 20 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] == 30 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS] == 5 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_REASONING_TOKENS] == 7 def test_openai_token_usage_alternative_keys(self, patched_span): """Test OpenAI token usage with alternative keys (input/output).""" @@ -106,10 +106,10 @@ def test_openai_token_usage_alternative_keys(self, patched_span): openai_set_usage(patched_span, usage) # Assert - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}"] == 15 - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}"] == 25 - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS}"] == 3 - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_REASONING_TOKENS}"] == 4 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == 15 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == 25 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS] == 3 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_REASONING_TOKENS] == 4 def test_openai_missing_usage_fields_writes_nothing(self, patched_span): """Test OpenAI with empty usage dict does not write any token attributes.""" @@ -120,9 +120,9 @@ def test_openai_missing_usage_fields_writes_nothing(self, patched_span): openai_set_usage(patched_span, usage) # Assert - assert f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}" not in patched_span.attributes - assert f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}" not in patched_span.attributes - assert f"{SpanAttributes.LLM_USAGE_TOTAL_TOKENS}" not in patched_span.attributes + assert SpanAttributes.LLM_USAGE_PROMPT_TOKENS not in patched_span.attributes + assert SpanAttributes.LLM_USAGE_COMPLETION_TOKENS not in patched_span.attributes + assert SpanAttributes.LLM_USAGE_TOTAL_TOKENS not in patched_span.attributes def test_openai_zero_token_values_not_written(self, patched_span): """Test OpenAI skips zero token values due to falsy `or` guard in implementation.""" @@ -134,8 +134,8 @@ def test_openai_zero_token_values_not_written(self, patched_span): openai_set_usage(patched_span, usage) # Assert - assert f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}" not in patched_span.attributes - assert f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}" not in patched_span.attributes + assert SpanAttributes.LLM_USAGE_PROMPT_TOKENS not in patched_span.attributes + assert SpanAttributes.LLM_USAGE_COMPLETION_TOKENS not in patched_span.attributes def test_openai_partial_usage_only_present_fields_written(self, patched_span): """Test OpenAI with only completion_tokens present writes only that attribute.""" @@ -146,8 +146,66 @@ def test_openai_partial_usage_only_present_fields_written(self, patched_span): openai_set_usage(patched_span, usage) # Assert - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}"] == 42 - assert f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}" not in patched_span.attributes + assert patched_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == 42 + assert SpanAttributes.LLM_USAGE_PROMPT_TOKENS not in patched_span.attributes + + # -- OpenAI streaming -- + + def _make_streaming_wrapper(self, patched_span): + """Set up a StreamingWrapper with two chunks and all timing patches applied.""" + from netra.instrumentation.openai.wrappers import StreamingWrapper + + class DummyStream: + def __iter__(self): + return self + + def __next__(self): + raise StopIteration + + wrapper = StreamingWrapper(span=patched_span, response=DummyStream(), request_kwargs={}) + chunk1 = {"choices": [{"delta": {"content": "Hello"}}]} + chunk2 = { + "choices": [{"delta": {"content": " world"}}], + "usage": {"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15}, + } + + with ( + patch("netra.instrumentation.openai.wrappers.time.time", return_value=123.456), + patch("netra.instrumentation.openai.wrappers.record_span_timing"), + patch("netra.instrumentation.openai.wrappers.model_as_dict", side_effect=lambda x: x), + ): + wrapper._process_chunk(chunk1) + wrapper._process_chunk(chunk2) + + return wrapper + + def test_openai_streaming_chunk_accumulation(self, patched_span): + """Test OpenAI streaming usage is accumulated from chunks into _complete_response.""" + # Arrange + Act + wrapper = self._make_streaming_wrapper(patched_span) + + # Assert — usage captured from chunk2 into _complete_response + assert wrapper._complete_response["usage"]["prompt_tokens"] == 10 + assert wrapper._complete_response["usage"]["completion_tokens"] == 5 + assert wrapper._complete_response["usage"]["total_tokens"] == 15 + + def test_openai_streaming_finalize_writes_span_attributes(self, patched_span): + """Test OpenAI streaming finalise passes accumulated usage to set_response_attributes.""" + # Arrange + from netra.instrumentation.openai.utils import set_response_attributes + + wrapper = self._make_streaming_wrapper(patched_span) + + # Act + with patch("netra.instrumentation.openai.wrappers.set_response_attributes") as mock_set_attr: + wrapper._finalize_span() + mock_set_attr.assert_called_once() + call_args = mock_set_attr.call_args[0] + set_response_attributes(patched_span, call_args[1]) + + # Assert — final span attributes written correctly + assert patched_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == 10 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == 5 # -- Groq -- @@ -165,10 +223,27 @@ def test_groq_token_usage(self, patched_span): groq_set_usage(patched_span, usage) # Assert - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}"] == 12 - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}"] == 22 - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_TOTAL_TOKENS}"] == 34 - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS}"] == 6 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == 12 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == 22 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] == 34 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS] == 6 + + def test_groq_token_usage_alternative_keys(self, patched_span): + """Test Groq token usage falls back to input_tokens/output_tokens when primary keys absent.""" + # Arrange + usage = { + "input_tokens": 18, + "output_tokens": 9, + "input_tokens_details": {"cached_tokens": 4}, + } + + # Act + groq_set_usage(patched_span, usage) + + # Assert + assert patched_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == 18 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == 9 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS] == 4 def test_groq_zero_token_values_not_written(self, patched_span): """Test Groq skips zero token values despite `is not None` guard. @@ -184,8 +259,8 @@ def test_groq_zero_token_values_not_written(self, patched_span): groq_set_usage(patched_span, usage) # Assert - assert f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}" not in patched_span.attributes - assert f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}" not in patched_span.attributes + assert SpanAttributes.LLM_USAGE_PROMPT_TOKENS not in patched_span.attributes + assert SpanAttributes.LLM_USAGE_COMPLETION_TOKENS not in patched_span.attributes def test_groq_missing_usage_fields_writes_nothing(self, patched_span): """Test Groq with empty usage dict does not write any token attributes.""" @@ -196,8 +271,20 @@ def test_groq_missing_usage_fields_writes_nothing(self, patched_span): groq_set_usage(patched_span, usage) # Assert - assert f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}" not in patched_span.attributes - assert f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}" not in patched_span.attributes + assert SpanAttributes.LLM_USAGE_PROMPT_TOKENS not in patched_span.attributes + assert SpanAttributes.LLM_USAGE_COMPLETION_TOKENS not in patched_span.attributes + + def test_groq_partial_usage_only_present_fields_written(self, patched_span): + """Test Groq with only completion_tokens present writes only that attribute.""" + # Arrange + usage = {"completion_tokens": 33} + + # Act + groq_set_usage(patched_span, usage) + + # Assert + assert patched_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == 33 + assert SpanAttributes.LLM_USAGE_PROMPT_TOKENS not in patched_span.attributes # -- Cerebras -- @@ -219,10 +306,10 @@ def test_cerebras_token_usage_dict(self, patched_span): cerebras_set_usage(patched_span, response) # Assert - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}"] == 40 - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}"] == 60 - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_TOTAL_TOKENS}"] == 100 - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS}"] == 10 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == 40 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == 60 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] == 100 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS] == 10 def test_cerebras_token_usage_object(self, patched_span): """Test Cerebras token usage with object-based prompt_tokens_details.""" @@ -244,7 +331,33 @@ def test_cerebras_token_usage_object(self, patched_span): cerebras_set_usage(patched_span, response) # Assert - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS}"] == 15 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS] == 15 + + def test_cerebras_token_usage_object_no_cached_tokens_attr(self, patched_span): + """Test Cerebras silently skips cached tokens when details object lacks cached_tokens attr. + + Cerebras details handling branches on hasattr then isinstance — if neither + matches (e.g. an object without cached_tokens), the cache key is never written. + """ + # Arrange + response = Mock() + details = Mock(spec=[]) # spec=[] means no attributes defined, hasattr returns False + + # Act + with patch("netra.instrumentation.cerebras.utils.model_as_dict") as mock_model_as_dict: + mock_model_as_dict.return_value = { + "usage": { + "prompt_tokens": 40, + "completion_tokens": 60, + "prompt_tokens_details": details, + } + } + cerebras_set_usage(patched_span, response) + + # Assert — main tokens written, but cache key silently absent + assert patched_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == 40 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == 60 + assert SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS not in patched_span.attributes def test_cerebras_missing_usage_key_writes_nothing(self, patched_span): """Test Cerebras with no usage key in response dict writes nothing.""" @@ -257,8 +370,36 @@ def test_cerebras_missing_usage_key_writes_nothing(self, patched_span): cerebras_set_usage(patched_span, response) # Assert - assert f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}" not in patched_span.attributes - assert f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}" not in patched_span.attributes + assert SpanAttributes.LLM_USAGE_PROMPT_TOKENS not in patched_span.attributes + assert SpanAttributes.LLM_USAGE_COMPLETION_TOKENS not in patched_span.attributes + + def test_cerebras_zero_token_values_not_written(self, patched_span): + """Test Cerebras skips zero token values due to falsy if guard in implementation.""" + # Arrange + response = Mock() + + # Act + with patch("netra.instrumentation.cerebras.utils.model_as_dict") as mock_model_as_dict: + mock_model_as_dict.return_value = {"usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}} + cerebras_set_usage(patched_span, response) + + # Assert + assert SpanAttributes.LLM_USAGE_PROMPT_TOKENS not in patched_span.attributes + assert SpanAttributes.LLM_USAGE_COMPLETION_TOKENS not in patched_span.attributes + + def test_cerebras_partial_usage_only_present_fields_written(self, patched_span): + """Test Cerebras with only prompt_tokens present writes only that attribute.""" + # Arrange + response = Mock() + + # Act + with patch("netra.instrumentation.cerebras.utils.model_as_dict") as mock_model_as_dict: + mock_model_as_dict.return_value = {"usage": {"prompt_tokens": 25}} + cerebras_set_usage(patched_span, response) + + # Assert + assert patched_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == 25 + assert SpanAttributes.LLM_USAGE_COMPLETION_TOKENS not in patched_span.attributes # -- LiteLLM -- @@ -277,11 +418,55 @@ def test_litellm_token_usage(self, patched_span): litellm_set_usage(patched_span, usage) # Assert - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}"] == 50 - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}"] == 70 - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_TOTAL_TOKENS}"] == 120 - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS}"] == 20 - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_REASONING_TOKENS}"] == 10 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == 50 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == 70 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] == 120 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS] == 20 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_REASONING_TOKENS] == 10 + + def test_litellm_token_usage_alternative_keys(self, patched_span): + """Test LiteLLM token usage falls back to input_tokens/output_tokens when primary keys absent.""" + # Arrange + usage = { + "input_tokens": 90, + "output_tokens": 45, + "input_tokens_details": {"cached_tokens": 15}, + "output_tokens_details": {"reasoning_tokens": 8}, + } + + # Act + litellm_set_usage(patched_span, usage) + + # Assert + assert patched_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == 90 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == 45 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS] == 15 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_REASONING_TOKENS] == 8 + + def test_litellm_missing_usage_fields_writes_nothing(self, patched_span): + """Test LiteLLM with empty usage dict does not write any token attributes.""" + # Arrange + usage = {} + + # Act + litellm_set_usage(patched_span, usage) + + # Assert + assert SpanAttributes.LLM_USAGE_PROMPT_TOKENS not in patched_span.attributes + assert SpanAttributes.LLM_USAGE_COMPLETION_TOKENS not in patched_span.attributes + assert SpanAttributes.LLM_USAGE_TOTAL_TOKENS not in patched_span.attributes + + def test_litellm_zero_token_values_not_written(self, patched_span): + """Test LiteLLM skips zero token values due to falsy `or` guard in implementation.""" + # Arrange + usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0} + + # Act + litellm_set_usage(patched_span, usage) + + # Assert + assert SpanAttributes.LLM_USAGE_PROMPT_TOKENS not in patched_span.attributes + assert SpanAttributes.LLM_USAGE_COMPLETION_TOKENS not in patched_span.attributes def test_litellm_partial_usage_only_present_fields_written(self, patched_span): """Test LiteLLM with only prompt_tokens present writes only that attribute.""" @@ -292,8 +477,8 @@ def test_litellm_partial_usage_only_present_fields_written(self, patched_span): litellm_set_usage(patched_span, usage) # Assert - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}"] == 55 - assert f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}" not in patched_span.attributes + assert patched_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == 55 + assert SpanAttributes.LLM_USAGE_COMPLETION_TOKENS not in patched_span.attributes # -- DSPy -- @@ -308,9 +493,24 @@ def test_dspy_token_usage(self, patched_span): patched_span.set_attribute(key, value) # Assert - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}"] == 80 - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}"] == 90 - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_TOTAL_TOKENS}"] == 170 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == 80 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == 90 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] == 170 + + def test_dspy_token_usage_alternative_keys(self, patched_span): + """Test DSPy generator falls back to input_tokens/output_tokens when primary keys absent.""" + # Arrange + response = {"usage": {"input_tokens": 55, "output_tokens": 22, "total_tokens": 77}} + + # Act + with patch("netra.instrumentation.dspy.utils.convert_to_dict", return_value=response): + for key, value in dspy_extract_usage(response): + patched_span.set_attribute(key, value) + + # Assert + assert patched_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == 55 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == 22 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] == 77 def test_dspy_missing_usage_yields_nothing(self, patched_span): """Test DSPy generator yields nothing when usage key is absent.""" @@ -323,7 +523,7 @@ def test_dspy_missing_usage_yields_nothing(self, patched_span): # Assert assert result == {} - assert f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}" not in patched_span.attributes + assert SpanAttributes.LLM_USAGE_PROMPT_TOKENS not in patched_span.attributes def test_dspy_zero_token_values_not_yielded(self, patched_span): """Test DSPy skips zero token values despite `is not None` guard. @@ -341,8 +541,23 @@ def test_dspy_zero_token_values_not_yielded(self, patched_span): patched_span.set_attribute(key, value) # Assert - assert f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}" not in patched_span.attributes - assert f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}" not in patched_span.attributes + assert SpanAttributes.LLM_USAGE_PROMPT_TOKENS not in patched_span.attributes + assert SpanAttributes.LLM_USAGE_COMPLETION_TOKENS not in patched_span.attributes + + def test_dspy_partial_usage_only_present_fields_written(self, patched_span): + """Test DSPy with only total_tokens present yields only that key.""" + # Arrange + response = {"usage": {"total_tokens": 60}} + + # Act + with patch("netra.instrumentation.dspy.utils.convert_to_dict", return_value=response): + for key, value in dspy_extract_usage(response): + patched_span.set_attribute(key, value) + + # Assert + assert patched_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] == 60 + assert SpanAttributes.LLM_USAGE_PROMPT_TOKENS not in patched_span.attributes + assert SpanAttributes.LLM_USAGE_COMPLETION_TOKENS not in patched_span.attributes # -- Pydantic AI -- @@ -370,272 +585,9 @@ def side_effect(obj, attr): pydantic_ai_set_usage(patched_span, response) # Assert — _safe_set_attribute stringifies values in Pydantic AI utils - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}"] == "200" - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}"] == "150" - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_TOTAL_TOKENS}"] == "350" - - # -- Google GenAI -- - - def test_google_genai_token_usage(self, patched_span): - """Test Google GenAI sums candidates and thoughts for completion tokens.""" - # Arrange - response = Mock() - usage = Mock() - usage.total_token_count = 500 - usage.candidates_token_count = 100 - usage.thoughts_token_count = 50 - usage.cached_content_token_count = 200 - usage.prompt_token_count = 350 - - # Act - with patch("netra.instrumentation.google_genai.utils._extract_usage_metadata", return_value=usage): - google_genai_set_usage(patched_span, response) - - # Assert — completion = candidates (100) + thoughts (50) = 150 - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_TOTAL_TOKENS}"] == 500 - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}"] == 150 - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS}"] == 200 - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}"] == 350 - - def test_google_genai_only_candidates_no_thoughts(self, patched_span): - """Test Google GenAI completion tokens equals candidates alone when thoughts absent.""" - # Arrange - response = Mock() - usage = Mock() - usage.total_token_count = 200 - usage.candidates_token_count = 80 - usage.thoughts_token_count = None - usage.cached_content_token_count = None - usage.prompt_token_count = 120 - - # Act - with patch("netra.instrumentation.google_genai.utils._extract_usage_metadata", return_value=usage): - google_genai_set_usage(patched_span, response) - - # Assert — completion = 80 + 0 = 80 - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}"] == 80 - - def test_google_genai_no_candidates_no_thoughts_skips_completion(self, patched_span): - """Test Google GenAI skips completion tokens when both candidates and thoughts are absent.""" - # Arrange - response = Mock() - usage = Mock() - usage.total_token_count = None - usage.candidates_token_count = None - usage.thoughts_token_count = None - usage.cached_content_token_count = None - usage.prompt_token_count = None - - # Act - with patch("netra.instrumentation.google_genai.utils._extract_usage_metadata", return_value=usage): - google_genai_set_usage(patched_span, response) - - # Assert — output sum is 0, so completion key not written - assert f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}" not in patched_span.attributes - - # -- OpenAI streaming -- - - def test_openai_streaming_accumulation(self, patched_span): - """Test OpenAI streaming usage is accumulated across chunks and written on finalise.""" - # Arrange - from netra.instrumentation.openai.wrappers import StreamingWrapper - - class DummyStream: - def __iter__(self): - return self - - def __next__(self): - raise StopIteration - - wrapper = StreamingWrapper(span=patched_span, response=DummyStream(), request_kwargs={}) - chunk1 = {"choices": [{"delta": {"content": "Hello"}}]} - chunk2 = { - "choices": [{"delta": {"content": " world"}}], - "usage": {"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15}, - } - - # Act — process chunks then finalise - with ( - patch("netra.instrumentation.openai.wrappers.time.time", return_value=123.456), - patch("netra.instrumentation.openai.wrappers.record_span_timing"), - patch("netra.instrumentation.openai.wrappers.model_as_dict", side_effect=lambda x: x), - ): - wrapper._process_chunk(chunk1) - wrapper._process_chunk(chunk2) - - # Assert — usage captured into _complete_response after chunk processing - assert wrapper._complete_response["usage"]["prompt_tokens"] == 10 - - with patch("netra.instrumentation.openai.wrappers.set_response_attributes") as mock_set_attr: - wrapper._finalize_span() - mock_set_attr.assert_called_once() - call_args = mock_set_attr.call_args[0] - assert call_args[1]["usage"]["prompt_tokens"] == 10 - - # Assert — final span attributes written correctly after set_response_attributes - from netra.instrumentation.openai.utils import set_response_attributes - - set_response_attributes(patched_span, call_args[1]) - - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}"] == 10 - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}"] == 5 - - def test_groq_token_usage_alternative_keys(self, patched_span): - """Test Groq token usage falls back to input_tokens/output_tokens when primary keys absent.""" - # Arrange - usage = { - "input_tokens": 18, - "output_tokens": 9, - "input_tokens_details": {"cached_tokens": 4}, - } - - # Act - groq_set_usage(patched_span, usage) - - # Assert - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}"] == 18 - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}"] == 9 - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS}"] == 4 - - def test_groq_partial_usage_only_present_fields_written(self, patched_span): - """Test Groq with only completion_tokens present writes only that attribute.""" - # Arrange - usage = {"completion_tokens": 33} - - # Act - groq_set_usage(patched_span, usage) - - # Assert - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}"] == 33 - assert f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}" not in patched_span.attributes - - def test_cerebras_zero_token_values_not_written(self, patched_span): - """Test Cerebras skips zero token values due to falsy if guard in implementation.""" - # Arrange - response = Mock() - - # Act - with patch("netra.instrumentation.cerebras.utils.model_as_dict") as mock_model_as_dict: - mock_model_as_dict.return_value = {"usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}} - cerebras_set_usage(patched_span, response) - - # Assert - assert f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}" not in patched_span.attributes - assert f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}" not in patched_span.attributes - - def test_cerebras_partial_usage_only_present_fields_written(self, patched_span): - """Test Cerebras with only prompt_tokens present writes only that attribute.""" - # Arrange - response = Mock() - - # Act - with patch("netra.instrumentation.cerebras.utils.model_as_dict") as mock_model_as_dict: - mock_model_as_dict.return_value = {"usage": {"prompt_tokens": 25}} - cerebras_set_usage(patched_span, response) - - # Assert - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}"] == 25 - assert f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}" not in patched_span.attributes - - def test_cerebras_token_usage_object_no_cached_tokens_attr(self, patched_span): - """Test Cerebras silently skips cached tokens when details object lacks cached_tokens attr. - - Cerebras details handling branches on hasattr then isinstance — if neither - matches (e.g. an object without cached_tokens), the cache key is never written. - """ - # Arrange - response = Mock() - details = Mock(spec=[]) # spec=[] means no attributes defined, hasattr returns False - - # Act - with patch("netra.instrumentation.cerebras.utils.model_as_dict") as mock_model_as_dict: - mock_model_as_dict.return_value = { - "usage": { - "prompt_tokens": 40, - "completion_tokens": 60, - "prompt_tokens_details": details, - } - } - cerebras_set_usage(patched_span, response) - - # Assert — main tokens written, but cache key silently absent - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}"] == 40 - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}"] == 60 - assert f"{SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS}" not in patched_span.attributes - - def test_litellm_token_usage_alternative_keys(self, patched_span): - """Test LiteLLM token usage falls back to input_tokens/output_tokens when primary keys absent.""" - # Arrange - usage = { - "input_tokens": 90, - "output_tokens": 45, - "input_tokens_details": {"cached_tokens": 15}, - "output_tokens_details": {"reasoning_tokens": 8}, - } - - # Act - litellm_set_usage(patched_span, usage) - - # Assert - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}"] == 90 - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}"] == 45 - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS}"] == 15 - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_REASONING_TOKENS}"] == 8 - - def test_litellm_missing_usage_fields_writes_nothing(self, patched_span): - """Test LiteLLM with empty usage dict does not write any token attributes.""" - # Arrange - usage = {} - - # Act - litellm_set_usage(patched_span, usage) - - # Assert - assert f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}" not in patched_span.attributes - assert f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}" not in patched_span.attributes - assert f"{SpanAttributes.LLM_USAGE_TOTAL_TOKENS}" not in patched_span.attributes - - def test_litellm_zero_token_values_not_written(self, patched_span): - """Test LiteLLM skips zero token values due to falsy `or` guard in implementation.""" - # Arrange - usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0} - - # Act - litellm_set_usage(patched_span, usage) - - # Assert - assert f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}" not in patched_span.attributes - assert f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}" not in patched_span.attributes - - def test_dspy_token_usage_alternative_keys(self, patched_span): - """Test DSPy generator falls back to input_tokens/output_tokens when primary keys absent.""" - # Arrange - response = {"usage": {"input_tokens": 55, "output_tokens": 22, "total_tokens": 77}} - - # Act - with patch("netra.instrumentation.dspy.utils.convert_to_dict", return_value=response): - for key, value in dspy_extract_usage(response): - patched_span.set_attribute(key, value) - - # Assert - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}"] == 55 - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}"] == 22 - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_TOTAL_TOKENS}"] == 77 - - def test_dspy_partial_usage_only_present_fields_written(self, patched_span): - """Test DSPy with only total_tokens present yields only that key.""" - # Arrange - response = {"usage": {"total_tokens": 60}} - - # Act - with patch("netra.instrumentation.dspy.utils.convert_to_dict", return_value=response): - for key, value in dspy_extract_usage(response): - patched_span.set_attribute(key, value) - - # Assert - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_TOTAL_TOKENS}"] == 60 - assert f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}" not in patched_span.attributes - assert f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}" not in patched_span.attributes + assert patched_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == "200" + assert patched_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == "150" + assert patched_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] == "350" def test_pydantic_ai_none_tokens_not_written(self, patched_span): """Test Pydantic AI writes nothing when all token fields are None.""" @@ -659,9 +611,9 @@ def side_effect(obj, attr): pydantic_ai_set_usage(patched_span, response) # Assert - assert f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}" not in patched_span.attributes - assert f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}" not in patched_span.attributes - assert f"{SpanAttributes.LLM_USAGE_TOTAL_TOKENS}" not in patched_span.attributes + assert SpanAttributes.LLM_USAGE_PROMPT_TOKENS not in patched_span.attributes + assert SpanAttributes.LLM_USAGE_COMPLETION_TOKENS not in patched_span.attributes + assert SpanAttributes.LLM_USAGE_TOTAL_TOKENS not in patched_span.attributes def test_pydantic_ai_partial_usage_only_present_fields_written(self, patched_span): """Test Pydantic AI with only request_tokens present writes only prompt attribute.""" @@ -685,8 +637,8 @@ def side_effect(obj, attr): pydantic_ai_set_usage(patched_span, response) # Assert - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}"] == "100" - assert f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}" not in patched_span.attributes + assert patched_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == "100" + assert SpanAttributes.LLM_USAGE_COMPLETION_TOKENS not in patched_span.attributes def test_pydantic_ai_dual_attribute_write(self, patched_span): """Test Pydantic AI CallToolsNode writes both gen_ai.usage.* and pydantic_ai.usage.* keys. @@ -719,14 +671,73 @@ def test_pydantic_ai_dual_attribute_write(self, patched_span): _set_call_tools_node_attributes(patched_span, node) # Assert — standard OTel keys - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}"] == "75" - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}"] == "50" - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_TOTAL_TOKENS}"] == "125" + assert patched_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == "75" + assert patched_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == "50" + assert patched_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] == "125" # Assert — pydantic_ai-specific dual-write keys written alongside OTel keys assert patched_span.attributes["pydantic_ai.usage.request_tokens"] == "75" assert patched_span.attributes["pydantic_ai.usage.response_tokens"] == "50" assert patched_span.attributes["pydantic_ai.usage.total_tokens"] == "125" + # -- Google GenAI -- + + def test_google_genai_token_usage(self, patched_span): + """Test Google GenAI sums candidates and thoughts for completion tokens.""" + # Arrange + response = Mock() + usage = Mock() + usage.total_token_count = 500 + usage.candidates_token_count = 100 + usage.thoughts_token_count = 50 + usage.cached_content_token_count = 200 + usage.prompt_token_count = 350 + + # Act + with patch("netra.instrumentation.google_genai.utils._extract_usage_metadata", return_value=usage): + google_genai_set_usage(patched_span, response) + + # Assert — completion = candidates (100) + thoughts (50) = 150 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] == 500 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == 150 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS] == 200 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == 350 + + def test_google_genai_only_candidates_no_thoughts(self, patched_span): + """Test Google GenAI completion tokens equals candidates alone when thoughts absent.""" + # Arrange + response = Mock() + usage = Mock() + usage.total_token_count = 200 + usage.candidates_token_count = 80 + usage.thoughts_token_count = None + usage.cached_content_token_count = None + usage.prompt_token_count = 120 + + # Act + with patch("netra.instrumentation.google_genai.utils._extract_usage_metadata", return_value=usage): + google_genai_set_usage(patched_span, response) + + # Assert — completion = 80 + 0 = 80 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == 80 + + def test_google_genai_no_candidates_no_thoughts_skips_completion(self, patched_span): + """Test Google GenAI skips completion tokens when both candidates and thoughts are absent.""" + # Arrange + response = Mock() + usage = Mock() + usage.total_token_count = None + usage.candidates_token_count = None + usage.thoughts_token_count = None + usage.cached_content_token_count = None + usage.prompt_token_count = None + + # Act + with patch("netra.instrumentation.google_genai.utils._extract_usage_metadata", return_value=usage): + google_genai_set_usage(patched_span, response) + + # Assert — output sum is 0, so completion key not written + assert SpanAttributes.LLM_USAGE_COMPLETION_TOKENS not in patched_span.attributes + def test_google_genai_missing_usage_metadata_writes_nothing(self, patched_span): """Test Google GenAI writes nothing when _extract_usage_metadata returns None.""" # Arrange @@ -737,9 +748,9 @@ def test_google_genai_missing_usage_metadata_writes_nothing(self, patched_span): google_genai_set_usage(patched_span, response) # Assert - assert f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}" not in patched_span.attributes - assert f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}" not in patched_span.attributes - assert f"{SpanAttributes.LLM_USAGE_TOTAL_TOKENS}" not in patched_span.attributes + assert SpanAttributes.LLM_USAGE_PROMPT_TOKENS not in patched_span.attributes + assert SpanAttributes.LLM_USAGE_COMPLETION_TOKENS not in patched_span.attributes + assert SpanAttributes.LLM_USAGE_TOTAL_TOKENS not in patched_span.attributes def test_google_genai_zero_completion_tokens_not_written(self, patched_span): """Test Google GenAI skips completion tokens when candidates and thoughts both equal zero. @@ -761,7 +772,7 @@ def test_google_genai_zero_completion_tokens_not_written(self, patched_span): google_genai_set_usage(patched_span, response) # Assert — output sum is 0 + 0 = 0, `if output > 0` guard skips write - assert f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}" not in patched_span.attributes + assert SpanAttributes.LLM_USAGE_COMPLETION_TOKENS not in patched_span.attributes def test_google_genai_cached_tokens_not_written_when_absent(self, patched_span): """Test Google GenAI skips cache key when cached_content_token_count is None.""" @@ -779,6 +790,6 @@ def test_google_genai_cached_tokens_not_written_when_absent(self, patched_span): google_genai_set_usage(patched_span, response) # Assert — main tokens written, cache key absent - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_PROMPT_TOKENS}"] == 40 - assert patched_span.attributes[f"{SpanAttributes.LLM_USAGE_COMPLETION_TOKENS}"] == 60 - assert f"{SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS}" not in patched_span.attributes + assert patched_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == 40 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == 60 + assert SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS not in patched_span.attributes From 338d0ea83f91b81898362f64634411c2457456e2 Mon Sep 17 00:00:00 2001 From: Pavan Raj Date: Fri, 12 Jun 2026 10:18:11 +0530 Subject: [PATCH 3/3] refactor: Group tests by provider using nested classes --- tests/test_token_usage_attributes.py | 1454 +++++++++++++------------- 1 file changed, 721 insertions(+), 733 deletions(-) diff --git a/tests/test_token_usage_attributes.py b/tests/test_token_usage_attributes.py index b8bdbb13..053cd070 100644 --- a/tests/test_token_usage_attributes.py +++ b/tests/test_token_usage_attributes.py @@ -21,6 +21,7 @@ from netra.instrumentation.groq.utils import _set_usage_attributes as groq_set_usage from netra.instrumentation.litellm.utils import _set_usage_attributes as litellm_set_usage from netra.instrumentation.openai.utils import _set_usage_attributes as openai_set_usage +from netra.instrumentation.pydantic_ai.utils import _set_call_tools_node_attributes from netra.instrumentation.pydantic_ai.utils import set_pydantic_response_attributes as pydantic_ai_set_usage from netra.processors.span_io_processor import SpanIOProcessor @@ -55,741 +56,728 @@ def set_attr(key, value): class TestTokenUsageAttributes: """Integration tests for token-to-span attribute assignment.""" - # -- SpanIOProcessor aliasing -- - - def test_span_io_processor_aliases_input_tokens_to_prompt_tokens(self, patched_span): - """Test that SpanIOProcessor rewrites input_tokens to prompt_tokens.""" - # Act - patched_span.set_attribute("gen_ai.usage.input_tokens", 100) - patched_span.set_attribute("gen_ai.usage.output_tokens", 50) - - # Assert — canonical keys present, raw alias keys absent - assert patched_span.attributes["gen_ai.usage.prompt_tokens"] == 100 - assert patched_span.attributes["gen_ai.usage.completion_tokens"] == 50 - assert "gen_ai.usage.input_tokens" not in patched_span.attributes - assert "gen_ai.usage.output_tokens" not in patched_span.attributes - - # -- OpenAI -- - - def test_openai_token_usage(self, patched_span): - """Test OpenAI token usage extraction and mapping.""" - # Arrange - usage = { - "prompt_tokens": 10, - "completion_tokens": 20, - "total_tokens": 30, - "prompt_tokens_details": {"cached_tokens": 5}, - "completion_tokens_details": {"reasoning_tokens": 7}, - } - - # Act - openai_set_usage(patched_span, usage) - - # Assert - assert patched_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == 10 - assert patched_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == 20 - assert patched_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] == 30 - assert patched_span.attributes[SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS] == 5 - assert patched_span.attributes[SpanAttributes.LLM_USAGE_REASONING_TOKENS] == 7 - - def test_openai_token_usage_alternative_keys(self, patched_span): - """Test OpenAI token usage with alternative keys (input/output).""" - # Arrange - usage = { - "input_tokens": 15, - "output_tokens": 25, - "input_tokens_details": {"cached_tokens": 3}, - "output_tokens_details": {"reasoning_tokens": 4}, - } - - # Act - openai_set_usage(patched_span, usage) - - # Assert - assert patched_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == 15 - assert patched_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == 25 - assert patched_span.attributes[SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS] == 3 - assert patched_span.attributes[SpanAttributes.LLM_USAGE_REASONING_TOKENS] == 4 - - def test_openai_missing_usage_fields_writes_nothing(self, patched_span): - """Test OpenAI with empty usage dict does not write any token attributes.""" - # Arrange - usage = {} - - # Act - openai_set_usage(patched_span, usage) - - # Assert - assert SpanAttributes.LLM_USAGE_PROMPT_TOKENS not in patched_span.attributes - assert SpanAttributes.LLM_USAGE_COMPLETION_TOKENS not in patched_span.attributes - assert SpanAttributes.LLM_USAGE_TOTAL_TOKENS not in patched_span.attributes - - def test_openai_zero_token_values_not_written(self, patched_span): - """Test OpenAI skips zero token values due to falsy `or` guard in implementation.""" - # Arrange — prompt_tokens=0 is falsy so the `or` falls through to input_tokens - # which is also absent, resulting in None; attribute is not written - usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0} - - # Act - openai_set_usage(patched_span, usage) - - # Assert - assert SpanAttributes.LLM_USAGE_PROMPT_TOKENS not in patched_span.attributes - assert SpanAttributes.LLM_USAGE_COMPLETION_TOKENS not in patched_span.attributes - - def test_openai_partial_usage_only_present_fields_written(self, patched_span): - """Test OpenAI with only completion_tokens present writes only that attribute.""" - # Arrange - usage = {"completion_tokens": 42} - - # Act - openai_set_usage(patched_span, usage) - - # Assert - assert patched_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == 42 - assert SpanAttributes.LLM_USAGE_PROMPT_TOKENS not in patched_span.attributes - - # -- OpenAI streaming -- - - def _make_streaming_wrapper(self, patched_span): - """Set up a StreamingWrapper with two chunks and all timing patches applied.""" - from netra.instrumentation.openai.wrappers import StreamingWrapper - - class DummyStream: - def __iter__(self): - return self - - def __next__(self): - raise StopIteration - - wrapper = StreamingWrapper(span=patched_span, response=DummyStream(), request_kwargs={}) - chunk1 = {"choices": [{"delta": {"content": "Hello"}}]} - chunk2 = { - "choices": [{"delta": {"content": " world"}}], - "usage": {"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15}, - } - - with ( - patch("netra.instrumentation.openai.wrappers.time.time", return_value=123.456), - patch("netra.instrumentation.openai.wrappers.record_span_timing"), - patch("netra.instrumentation.openai.wrappers.model_as_dict", side_effect=lambda x: x), - ): - wrapper._process_chunk(chunk1) - wrapper._process_chunk(chunk2) - - return wrapper - - def test_openai_streaming_chunk_accumulation(self, patched_span): - """Test OpenAI streaming usage is accumulated from chunks into _complete_response.""" - # Arrange + Act - wrapper = self._make_streaming_wrapper(patched_span) - - # Assert — usage captured from chunk2 into _complete_response - assert wrapper._complete_response["usage"]["prompt_tokens"] == 10 - assert wrapper._complete_response["usage"]["completion_tokens"] == 5 - assert wrapper._complete_response["usage"]["total_tokens"] == 15 - - def test_openai_streaming_finalize_writes_span_attributes(self, patched_span): - """Test OpenAI streaming finalise passes accumulated usage to set_response_attributes.""" - # Arrange - from netra.instrumentation.openai.utils import set_response_attributes - - wrapper = self._make_streaming_wrapper(patched_span) - - # Act - with patch("netra.instrumentation.openai.wrappers.set_response_attributes") as mock_set_attr: - wrapper._finalize_span() - mock_set_attr.assert_called_once() - call_args = mock_set_attr.call_args[0] - set_response_attributes(patched_span, call_args[1]) - - # Assert — final span attributes written correctly - assert patched_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == 10 - assert patched_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == 5 - - # -- Groq -- - - def test_groq_token_usage(self, patched_span): - """Test Groq token usage extraction.""" - # Arrange - usage = { - "prompt_tokens": 12, - "completion_tokens": 22, - "total_tokens": 34, - "prompt_tokens_details": {"cached_tokens": 6}, - } - - # Act - groq_set_usage(patched_span, usage) - - # Assert - assert patched_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == 12 - assert patched_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == 22 - assert patched_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] == 34 - assert patched_span.attributes[SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS] == 6 - - def test_groq_token_usage_alternative_keys(self, patched_span): - """Test Groq token usage falls back to input_tokens/output_tokens when primary keys absent.""" - # Arrange - usage = { - "input_tokens": 18, - "output_tokens": 9, - "input_tokens_details": {"cached_tokens": 4}, - } - - # Act - groq_set_usage(patched_span, usage) - - # Assert - assert patched_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == 18 - assert patched_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == 9 - assert patched_span.attributes[SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS] == 4 - - def test_groq_zero_token_values_not_written(self, patched_span): - """Test Groq skips zero token values despite `is not None` guard. - - Although the guard is `is not None`, token extraction uses - `usage.get("prompt_tokens") or usage.get("input_tokens")` — the `or` - treats 0 as falsy, so the extracted value is None before the guard runs. - """ - # Arrange - usage = {"prompt_tokens": 0, "completion_tokens": 0} - - # Act - groq_set_usage(patched_span, usage) - - # Assert - assert SpanAttributes.LLM_USAGE_PROMPT_TOKENS not in patched_span.attributes - assert SpanAttributes.LLM_USAGE_COMPLETION_TOKENS not in patched_span.attributes - - def test_groq_missing_usage_fields_writes_nothing(self, patched_span): - """Test Groq with empty usage dict does not write any token attributes.""" - # Arrange - usage = {} - - # Act - groq_set_usage(patched_span, usage) - - # Assert - assert SpanAttributes.LLM_USAGE_PROMPT_TOKENS not in patched_span.attributes - assert SpanAttributes.LLM_USAGE_COMPLETION_TOKENS not in patched_span.attributes - - def test_groq_partial_usage_only_present_fields_written(self, patched_span): - """Test Groq with only completion_tokens present writes only that attribute.""" - # Arrange - usage = {"completion_tokens": 33} - - # Act - groq_set_usage(patched_span, usage) - - # Assert - assert patched_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == 33 - assert SpanAttributes.LLM_USAGE_PROMPT_TOKENS not in patched_span.attributes - - # -- Cerebras -- - - def test_cerebras_token_usage_dict(self, patched_span): - """Test Cerebras token usage with dictionary-based prompt_tokens_details.""" - # Arrange - response = Mock() - - # Act - with patch("netra.instrumentation.cerebras.utils.model_as_dict") as mock_model_as_dict: - mock_model_as_dict.return_value = { - "usage": { - "prompt_tokens": 40, - "completion_tokens": 60, - "total_tokens": 100, - "prompt_tokens_details": {"cached_tokens": 10}, - } + class TestSpanIO: + """Tests for SpanIOProcessor aliasing.""" + + def test_aliases_input_tokens_to_prompt_tokens(self, patched_span): + """Test that SpanIOProcessor rewrites input_tokens to prompt_tokens.""" + # Act + patched_span.set_attribute("gen_ai.usage.input_tokens", 100) + patched_span.set_attribute("gen_ai.usage.output_tokens", 50) + + # Assert — canonical keys present, raw alias keys absent + assert patched_span.attributes["gen_ai.usage.prompt_tokens"] == 100 + assert patched_span.attributes["gen_ai.usage.completion_tokens"] == 50 + assert "gen_ai.usage.input_tokens" not in patched_span.attributes + assert "gen_ai.usage.output_tokens" not in patched_span.attributes + + class TestOpenAI: + """Tests for OpenAI token usage extraction.""" + + def test_token_usage(self, patched_span): + """Test OpenAI token usage extraction and mapping.""" + # Arrange + usage = { + "prompt_tokens": 10, + "completion_tokens": 20, + "total_tokens": 30, + "prompt_tokens_details": {"cached_tokens": 5}, + "completion_tokens_details": {"reasoning_tokens": 7}, + } + + # Act + openai_set_usage(patched_span, usage) + + # Assert + assert patched_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == 10 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == 20 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] == 30 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS] == 5 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_REASONING_TOKENS] == 7 + + def test_token_usage_alternative_keys(self, patched_span): + """Test OpenAI token usage with alternative keys (input/output).""" + # Arrange + usage = { + "input_tokens": 15, + "output_tokens": 25, + "input_tokens_details": {"cached_tokens": 3}, + "output_tokens_details": {"reasoning_tokens": 4}, } - cerebras_set_usage(patched_span, response) - - # Assert - assert patched_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == 40 - assert patched_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == 60 - assert patched_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] == 100 - assert patched_span.attributes[SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS] == 10 - - def test_cerebras_token_usage_object(self, patched_span): - """Test Cerebras token usage with object-based prompt_tokens_details.""" - # Arrange - response = Mock() - details = Mock() - details.cached_tokens = 15 - - # Act - with patch("netra.instrumentation.cerebras.utils.model_as_dict") as mock_model_as_dict: - mock_model_as_dict.return_value = { - "usage": { - "prompt_tokens": 40, - "completion_tokens": 60, - "total_tokens": 100, - "prompt_tokens_details": details, + + # Act + openai_set_usage(patched_span, usage) + + # Assert + assert patched_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == 15 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == 25 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS] == 3 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_REASONING_TOKENS] == 4 + + def test_missing_usage_fields_writes_nothing(self, patched_span): + """Test OpenAI with empty usage dict does not write any token attributes.""" + # Arrange + usage = {} + + # Act + openai_set_usage(patched_span, usage) + + # Assert + assert SpanAttributes.LLM_USAGE_PROMPT_TOKENS not in patched_span.attributes + assert SpanAttributes.LLM_USAGE_COMPLETION_TOKENS not in patched_span.attributes + assert SpanAttributes.LLM_USAGE_TOTAL_TOKENS not in patched_span.attributes + + def test_zero_token_values_not_written(self, patched_span): + """Test OpenAI skips zero token values due to falsy `or` guard in implementation.""" + # Arrange — prompt_tokens=0 is falsy so the `or` falls through to input_tokens + # which is also absent, resulting in None; attribute is not written + usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0} + + # Act + openai_set_usage(patched_span, usage) + + # Assert + assert SpanAttributes.LLM_USAGE_PROMPT_TOKENS not in patched_span.attributes + assert SpanAttributes.LLM_USAGE_COMPLETION_TOKENS not in patched_span.attributes + + def test_partial_usage_only_present_fields_written(self, patched_span): + """Test OpenAI with only completion_tokens present writes only that attribute.""" + # Arrange + usage = {"completion_tokens": 42} + + # Act + openai_set_usage(patched_span, usage) + + # Assert + assert patched_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == 42 + assert SpanAttributes.LLM_USAGE_PROMPT_TOKENS not in patched_span.attributes + + class TestStreaming: + """Tests for OpenAI streaming usage accumulation.""" + + def _make_streaming_wrapper(self, patched_span): + """Set up a StreamingWrapper with two chunks and all timing patches applied.""" + from netra.instrumentation.openai.wrappers import StreamingWrapper + + class DummyStream: + def __iter__(self): + return self + + def __next__(self): + raise StopIteration + + wrapper = StreamingWrapper(span=patched_span, response=DummyStream(), request_kwargs={}) + chunk1 = {"choices": [{"delta": {"content": "Hello"}}]} + chunk2 = { + "choices": [{"delta": {"content": " world"}}], + "usage": {"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15}, } + + with ( + patch("netra.instrumentation.openai.wrappers.time.time", return_value=123.456), + patch("netra.instrumentation.openai.wrappers.record_span_timing"), + patch("netra.instrumentation.openai.wrappers.model_as_dict", side_effect=lambda x: x), + ): + wrapper._process_chunk(chunk1) + wrapper._process_chunk(chunk2) + + return wrapper + + def test_chunk_accumulation(self, patched_span): + """Test OpenAI streaming usage is accumulated from chunks into _complete_response.""" + # Arrange + Act + wrapper = self._make_streaming_wrapper(patched_span) + + # Assert — usage captured from chunk2 into _complete_response + assert wrapper._complete_response["usage"]["prompt_tokens"] == 10 + assert wrapper._complete_response["usage"]["completion_tokens"] == 5 + assert wrapper._complete_response["usage"]["total_tokens"] == 15 + + def test_finalize_writes_span_attributes(self, patched_span): + """Test OpenAI streaming finalise passes accumulated usage to set_response_attributes.""" + # Arrange + from netra.instrumentation.openai.utils import set_response_attributes + + wrapper = self._make_streaming_wrapper(patched_span) + + # Act + with patch("netra.instrumentation.openai.wrappers.set_response_attributes") as mock_set_attr: + wrapper._finalize_span() + mock_set_attr.assert_called_once() + call_args = mock_set_attr.call_args[0] + set_response_attributes(patched_span, call_args[1]) + + # Assert — final span attributes written correctly + assert patched_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == 10 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == 5 + + class TestGroq: + """Tests for Groq token usage extraction.""" + + def test_token_usage(self, patched_span): + """Test Groq token usage extraction.""" + # Arrange + usage = { + "prompt_tokens": 12, + "completion_tokens": 22, + "total_tokens": 34, + "prompt_tokens_details": {"cached_tokens": 6}, + } + + # Act + groq_set_usage(patched_span, usage) + + # Assert + assert patched_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == 12 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == 22 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] == 34 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS] == 6 + + def test_token_usage_alternative_keys(self, patched_span): + """Test Groq token usage falls back to input_tokens/output_tokens when primary keys absent.""" + # Arrange + usage = { + "input_tokens": 18, + "output_tokens": 9, + "input_tokens_details": {"cached_tokens": 4}, } - cerebras_set_usage(patched_span, response) - - # Assert - assert patched_span.attributes[SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS] == 15 - - def test_cerebras_token_usage_object_no_cached_tokens_attr(self, patched_span): - """Test Cerebras silently skips cached tokens when details object lacks cached_tokens attr. - - Cerebras details handling branches on hasattr then isinstance — if neither - matches (e.g. an object without cached_tokens), the cache key is never written. - """ - # Arrange - response = Mock() - details = Mock(spec=[]) # spec=[] means no attributes defined, hasattr returns False - - # Act - with patch("netra.instrumentation.cerebras.utils.model_as_dict") as mock_model_as_dict: - mock_model_as_dict.return_value = { - "usage": { - "prompt_tokens": 40, - "completion_tokens": 60, - "prompt_tokens_details": details, + + # Act + groq_set_usage(patched_span, usage) + + # Assert + assert patched_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == 18 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == 9 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS] == 4 + + def test_zero_token_values_not_written(self, patched_span): + """Test Groq skips zero token values despite `is not None` guard.""" + # Arrange + usage = {"prompt_tokens": 0, "completion_tokens": 0} + + # Act + groq_set_usage(patched_span, usage) + + # Assert + assert SpanAttributes.LLM_USAGE_PROMPT_TOKENS not in patched_span.attributes + assert SpanAttributes.LLM_USAGE_COMPLETION_TOKENS not in patched_span.attributes + + def test_missing_usage_fields_writes_nothing(self, patched_span): + """Test Groq with empty usage dict does not write any token attributes.""" + # Arrange + usage = {} + + # Act + groq_set_usage(patched_span, usage) + + # Assert + assert SpanAttributes.LLM_USAGE_PROMPT_TOKENS not in patched_span.attributes + assert SpanAttributes.LLM_USAGE_COMPLETION_TOKENS not in patched_span.attributes + + def test_partial_usage_only_present_fields_written(self, patched_span): + """Test Groq with only completion_tokens present writes only that attribute.""" + # Arrange + usage = {"completion_tokens": 33} + + # Act + groq_set_usage(patched_span, usage) + + # Assert + assert patched_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == 33 + assert SpanAttributes.LLM_USAGE_PROMPT_TOKENS not in patched_span.attributes + + class TestCerebras: + """Tests for Cerebras token usage extraction.""" + + def test_token_usage_dict(self, patched_span): + """Test Cerebras token usage with dictionary-based prompt_tokens_details.""" + # Arrange + response = Mock() + + # Act + with patch("netra.instrumentation.cerebras.utils.model_as_dict") as mock_model_as_dict: + mock_model_as_dict.return_value = { + "usage": { + "prompt_tokens": 40, + "completion_tokens": 60, + "total_tokens": 100, + "prompt_tokens_details": {"cached_tokens": 10}, + } + } + cerebras_set_usage(patched_span, response) + + # Assert + assert patched_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == 40 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == 60 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] == 100 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS] == 10 + + def test_token_usage_object(self, patched_span): + """Test Cerebras token usage with object-based prompt_tokens_details.""" + # Arrange + response = Mock() + details = Mock() + details.cached_tokens = 15 + + # Act + with patch("netra.instrumentation.cerebras.utils.model_as_dict") as mock_model_as_dict: + mock_model_as_dict.return_value = { + "usage": { + "prompt_tokens": 40, + "completion_tokens": 60, + "total_tokens": 100, + "prompt_tokens_details": details, + } + } + cerebras_set_usage(patched_span, response) + + # Assert + assert patched_span.attributes[SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS] == 15 + + def test_token_usage_object_no_cached_tokens_attr(self, patched_span): + """Test Cerebras silently skips cached tokens when details object lacks cached_tokens attr.""" + # Arrange + response = Mock() + details = Mock(spec=[]) # spec=[] means no attributes defined, hasattr returns False + + # Act + with patch("netra.instrumentation.cerebras.utils.model_as_dict") as mock_model_as_dict: + mock_model_as_dict.return_value = { + "usage": { + "prompt_tokens": 40, + "completion_tokens": 60, + "prompt_tokens_details": details, + } + } + cerebras_set_usage(patched_span, response) + + # Assert — main tokens written, but cache key silently absent + assert patched_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == 40 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == 60 + assert SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS not in patched_span.attributes + + def test_missing_usage_key_writes_nothing(self, patched_span): + """Test Cerebras with no usage key in response dict writes nothing.""" + # Arrange + response = Mock() + + # Act + with patch("netra.instrumentation.cerebras.utils.model_as_dict") as mock_model_as_dict: + mock_model_as_dict.return_value = {} + cerebras_set_usage(patched_span, response) + + # Assert + assert SpanAttributes.LLM_USAGE_PROMPT_TOKENS not in patched_span.attributes + assert SpanAttributes.LLM_USAGE_COMPLETION_TOKENS not in patched_span.attributes + + def test_zero_token_values_not_written(self, patched_span): + """Test Cerebras skips zero token values due to falsy if guard in implementation.""" + # Arrange + response = Mock() + + # Act + with patch("netra.instrumentation.cerebras.utils.model_as_dict") as mock_model_as_dict: + mock_model_as_dict.return_value = { + "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0} } + cerebras_set_usage(patched_span, response) + + # Assert + assert SpanAttributes.LLM_USAGE_PROMPT_TOKENS not in patched_span.attributes + assert SpanAttributes.LLM_USAGE_COMPLETION_TOKENS not in patched_span.attributes + + def test_partial_usage_only_present_fields_written(self, patched_span): + """Test Cerebras with only prompt_tokens present writes only that attribute.""" + # Arrange + response = Mock() + + # Act + with patch("netra.instrumentation.cerebras.utils.model_as_dict") as mock_model_as_dict: + mock_model_as_dict.return_value = {"usage": {"prompt_tokens": 25}} + cerebras_set_usage(patched_span, response) + + # Assert + assert patched_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == 25 + assert SpanAttributes.LLM_USAGE_COMPLETION_TOKENS not in patched_span.attributes + + class TestLiteLLM: + """Tests for LiteLLM token usage extraction.""" + + def test_token_usage(self, patched_span): + """Test LiteLLM token usage extraction.""" + # Arrange + usage = { + "prompt_tokens": 50, + "completion_tokens": 70, + "total_tokens": 120, + "prompt_tokens_details": {"cached_tokens": 20}, + "completion_tokens_details": {"reasoning_tokens": 10}, } - cerebras_set_usage(patched_span, response) - - # Assert — main tokens written, but cache key silently absent - assert patched_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == 40 - assert patched_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == 60 - assert SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS not in patched_span.attributes - - def test_cerebras_missing_usage_key_writes_nothing(self, patched_span): - """Test Cerebras with no usage key in response dict writes nothing.""" - # Arrange - response = Mock() - - # Act - with patch("netra.instrumentation.cerebras.utils.model_as_dict") as mock_model_as_dict: - mock_model_as_dict.return_value = {} - cerebras_set_usage(patched_span, response) - - # Assert - assert SpanAttributes.LLM_USAGE_PROMPT_TOKENS not in patched_span.attributes - assert SpanAttributes.LLM_USAGE_COMPLETION_TOKENS not in patched_span.attributes - - def test_cerebras_zero_token_values_not_written(self, patched_span): - """Test Cerebras skips zero token values due to falsy if guard in implementation.""" - # Arrange - response = Mock() - - # Act - with patch("netra.instrumentation.cerebras.utils.model_as_dict") as mock_model_as_dict: - mock_model_as_dict.return_value = {"usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}} - cerebras_set_usage(patched_span, response) - - # Assert - assert SpanAttributes.LLM_USAGE_PROMPT_TOKENS not in patched_span.attributes - assert SpanAttributes.LLM_USAGE_COMPLETION_TOKENS not in patched_span.attributes - - def test_cerebras_partial_usage_only_present_fields_written(self, patched_span): - """Test Cerebras with only prompt_tokens present writes only that attribute.""" - # Arrange - response = Mock() - - # Act - with patch("netra.instrumentation.cerebras.utils.model_as_dict") as mock_model_as_dict: - mock_model_as_dict.return_value = {"usage": {"prompt_tokens": 25}} - cerebras_set_usage(patched_span, response) - - # Assert - assert patched_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == 25 - assert SpanAttributes.LLM_USAGE_COMPLETION_TOKENS not in patched_span.attributes - - # -- LiteLLM -- - - def test_litellm_token_usage(self, patched_span): - """Test LiteLLM token usage extraction.""" - # Arrange - usage = { - "prompt_tokens": 50, - "completion_tokens": 70, - "total_tokens": 120, - "prompt_tokens_details": {"cached_tokens": 20}, - "completion_tokens_details": {"reasoning_tokens": 10}, - } - - # Act - litellm_set_usage(patched_span, usage) - - # Assert - assert patched_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == 50 - assert patched_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == 70 - assert patched_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] == 120 - assert patched_span.attributes[SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS] == 20 - assert patched_span.attributes[SpanAttributes.LLM_USAGE_REASONING_TOKENS] == 10 - - def test_litellm_token_usage_alternative_keys(self, patched_span): - """Test LiteLLM token usage falls back to input_tokens/output_tokens when primary keys absent.""" - # Arrange - usage = { - "input_tokens": 90, - "output_tokens": 45, - "input_tokens_details": {"cached_tokens": 15}, - "output_tokens_details": {"reasoning_tokens": 8}, - } - - # Act - litellm_set_usage(patched_span, usage) - - # Assert - assert patched_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == 90 - assert patched_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == 45 - assert patched_span.attributes[SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS] == 15 - assert patched_span.attributes[SpanAttributes.LLM_USAGE_REASONING_TOKENS] == 8 - - def test_litellm_missing_usage_fields_writes_nothing(self, patched_span): - """Test LiteLLM with empty usage dict does not write any token attributes.""" - # Arrange - usage = {} - - # Act - litellm_set_usage(patched_span, usage) - - # Assert - assert SpanAttributes.LLM_USAGE_PROMPT_TOKENS not in patched_span.attributes - assert SpanAttributes.LLM_USAGE_COMPLETION_TOKENS not in patched_span.attributes - assert SpanAttributes.LLM_USAGE_TOTAL_TOKENS not in patched_span.attributes - - def test_litellm_zero_token_values_not_written(self, patched_span): - """Test LiteLLM skips zero token values due to falsy `or` guard in implementation.""" - # Arrange - usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0} - - # Act - litellm_set_usage(patched_span, usage) - - # Assert - assert SpanAttributes.LLM_USAGE_PROMPT_TOKENS not in patched_span.attributes - assert SpanAttributes.LLM_USAGE_COMPLETION_TOKENS not in patched_span.attributes - - def test_litellm_partial_usage_only_present_fields_written(self, patched_span): - """Test LiteLLM with only prompt_tokens present writes only that attribute.""" - # Arrange - usage = {"prompt_tokens": 55} - - # Act - litellm_set_usage(patched_span, usage) - - # Assert - assert patched_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == 55 - assert SpanAttributes.LLM_USAGE_COMPLETION_TOKENS not in patched_span.attributes - - # -- DSPy -- - - def test_dspy_token_usage(self, patched_span): - """Test DSPy token usage generator yields correct key-value pairs.""" - # Arrange - response = {"usage": {"prompt_tokens": 80, "completion_tokens": 90, "total_tokens": 170}} - - # Act - with patch("netra.instrumentation.dspy.utils.convert_to_dict", return_value=response): - for key, value in dspy_extract_usage(response): - patched_span.set_attribute(key, value) - - # Assert - assert patched_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == 80 - assert patched_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == 90 - assert patched_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] == 170 - - def test_dspy_token_usage_alternative_keys(self, patched_span): - """Test DSPy generator falls back to input_tokens/output_tokens when primary keys absent.""" - # Arrange - response = {"usage": {"input_tokens": 55, "output_tokens": 22, "total_tokens": 77}} - - # Act - with patch("netra.instrumentation.dspy.utils.convert_to_dict", return_value=response): - for key, value in dspy_extract_usage(response): - patched_span.set_attribute(key, value) - - # Assert - assert patched_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == 55 - assert patched_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == 22 - assert patched_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] == 77 - - def test_dspy_missing_usage_yields_nothing(self, patched_span): - """Test DSPy generator yields nothing when usage key is absent.""" - # Arrange - response = {} - - # Act - with patch("netra.instrumentation.dspy.utils.convert_to_dict", return_value=response): - result = dict(dspy_extract_usage(response)) - - # Assert - assert result == {} - assert SpanAttributes.LLM_USAGE_PROMPT_TOKENS not in patched_span.attributes - - def test_dspy_zero_token_values_not_yielded(self, patched_span): - """Test DSPy skips zero token values despite `is not None` guard. - - Although the guard is `is not None`, token extraction uses - `usage.get("prompt_tokens") or usage.get("input_tokens")` — the `or` - treats 0 as falsy, so the extracted value is None before the guard runs. - """ - # Arrange - response = {"usage": {"prompt_tokens": 0, "completion_tokens": 0}} - - # Act - with patch("netra.instrumentation.dspy.utils.convert_to_dict", return_value=response): - for key, value in dspy_extract_usage(response): - patched_span.set_attribute(key, value) - - # Assert - assert SpanAttributes.LLM_USAGE_PROMPT_TOKENS not in patched_span.attributes - assert SpanAttributes.LLM_USAGE_COMPLETION_TOKENS not in patched_span.attributes - - def test_dspy_partial_usage_only_present_fields_written(self, patched_span): - """Test DSPy with only total_tokens present yields only that key.""" - # Arrange - response = {"usage": {"total_tokens": 60}} - - # Act - with patch("netra.instrumentation.dspy.utils.convert_to_dict", return_value=response): - for key, value in dspy_extract_usage(response): - patched_span.set_attribute(key, value) - - # Assert - assert patched_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] == 60 - assert SpanAttributes.LLM_USAGE_PROMPT_TOKENS not in patched_span.attributes - assert SpanAttributes.LLM_USAGE_COMPLETION_TOKENS not in patched_span.attributes - - # -- Pydantic AI -- - - def test_pydantic_ai_token_usage(self, patched_span): - """Test Pydantic AI maps request_tokens/response_tokens to standard keys.""" - # Arrange - response = Mock() - usage = Mock() - usage.request_tokens = 200 - usage.response_tokens = 150 - usage.total_tokens = 350 - usage.requests = 1 - usage.details = {"some_detail": 5} - response.usage.return_value = usage - - # Act - with patch("netra.instrumentation.pydantic_ai.utils._safe_get_attribute") as mock_get: - - def side_effect(obj, attr): - if obj == usage: - return getattr(usage, attr) if hasattr(usage, attr) else usage.details.get(attr) - return getattr(obj, attr, None) - - mock_get.side_effect = side_effect - pydantic_ai_set_usage(patched_span, response) - - # Assert — _safe_set_attribute stringifies values in Pydantic AI utils - assert patched_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == "200" - assert patched_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == "150" - assert patched_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] == "350" - - def test_pydantic_ai_none_tokens_not_written(self, patched_span): - """Test Pydantic AI writes nothing when all token fields are None.""" - # Arrange - response = Mock() - usage = Mock() - usage.request_tokens = None - usage.response_tokens = None - usage.total_tokens = None - usage.requests = None - usage.details = None - response.usage.return_value = usage - - # Act - with patch("netra.instrumentation.pydantic_ai.utils._safe_get_attribute") as mock_get: - - def side_effect(obj, attr): - return getattr(obj, attr, None) - - mock_get.side_effect = side_effect - pydantic_ai_set_usage(patched_span, response) - - # Assert - assert SpanAttributes.LLM_USAGE_PROMPT_TOKENS not in patched_span.attributes - assert SpanAttributes.LLM_USAGE_COMPLETION_TOKENS not in patched_span.attributes - assert SpanAttributes.LLM_USAGE_TOTAL_TOKENS not in patched_span.attributes - - def test_pydantic_ai_partial_usage_only_present_fields_written(self, patched_span): - """Test Pydantic AI with only request_tokens present writes only prompt attribute.""" - # Arrange - response = Mock() - usage = Mock() - usage.request_tokens = 100 - usage.response_tokens = None - usage.total_tokens = None - usage.requests = None - usage.details = None - response.usage.return_value = usage - - # Act - with patch("netra.instrumentation.pydantic_ai.utils._safe_get_attribute") as mock_get: - - def side_effect(obj, attr): - return getattr(obj, attr, None) - - mock_get.side_effect = side_effect - pydantic_ai_set_usage(patched_span, response) - - # Assert - assert patched_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == "100" - assert SpanAttributes.LLM_USAGE_COMPLETION_TOKENS not in patched_span.attributes - - def test_pydantic_ai_dual_attribute_write(self, patched_span): - """Test Pydantic AI CallToolsNode writes both gen_ai.usage.* and pydantic_ai.usage.* keys. - - The dual-write lives in _set_call_tools_node_attributes (lines 295-320 of utils.py), - not in set_pydantic_response_attributes. This test calls that path directly via a - mock node whose model_response carries a usage object. - """ - # Arrange - from netra.instrumentation.pydantic_ai.utils import _set_call_tools_node_attributes - - usage = Mock() - usage.request_tokens = 75 - usage.response_tokens = 50 - usage.total_tokens = 125 - usage.requests = 1 - usage.details = None - - model_response = Mock() - model_response.usage = usage - model_response.parts = [] - model_response.model_name = None - model_response.timestamp = None - - node = Mock() - node.model_response = model_response - node.tool_results = None - - # Act - _set_call_tools_node_attributes(patched_span, node) - - # Assert — standard OTel keys - assert patched_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == "75" - assert patched_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == "50" - assert patched_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] == "125" - # Assert — pydantic_ai-specific dual-write keys written alongside OTel keys - assert patched_span.attributes["pydantic_ai.usage.request_tokens"] == "75" - assert patched_span.attributes["pydantic_ai.usage.response_tokens"] == "50" - assert patched_span.attributes["pydantic_ai.usage.total_tokens"] == "125" - - # -- Google GenAI -- - - def test_google_genai_token_usage(self, patched_span): - """Test Google GenAI sums candidates and thoughts for completion tokens.""" - # Arrange - response = Mock() - usage = Mock() - usage.total_token_count = 500 - usage.candidates_token_count = 100 - usage.thoughts_token_count = 50 - usage.cached_content_token_count = 200 - usage.prompt_token_count = 350 - - # Act - with patch("netra.instrumentation.google_genai.utils._extract_usage_metadata", return_value=usage): - google_genai_set_usage(patched_span, response) - - # Assert — completion = candidates (100) + thoughts (50) = 150 - assert patched_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] == 500 - assert patched_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == 150 - assert patched_span.attributes[SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS] == 200 - assert patched_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == 350 - - def test_google_genai_only_candidates_no_thoughts(self, patched_span): - """Test Google GenAI completion tokens equals candidates alone when thoughts absent.""" - # Arrange - response = Mock() - usage = Mock() - usage.total_token_count = 200 - usage.candidates_token_count = 80 - usage.thoughts_token_count = None - usage.cached_content_token_count = None - usage.prompt_token_count = 120 - - # Act - with patch("netra.instrumentation.google_genai.utils._extract_usage_metadata", return_value=usage): - google_genai_set_usage(patched_span, response) - - # Assert — completion = 80 + 0 = 80 - assert patched_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == 80 - - def test_google_genai_no_candidates_no_thoughts_skips_completion(self, patched_span): - """Test Google GenAI skips completion tokens when both candidates and thoughts are absent.""" - # Arrange - response = Mock() - usage = Mock() - usage.total_token_count = None - usage.candidates_token_count = None - usage.thoughts_token_count = None - usage.cached_content_token_count = None - usage.prompt_token_count = None - - # Act - with patch("netra.instrumentation.google_genai.utils._extract_usage_metadata", return_value=usage): - google_genai_set_usage(patched_span, response) - - # Assert — output sum is 0, so completion key not written - assert SpanAttributes.LLM_USAGE_COMPLETION_TOKENS not in patched_span.attributes - - def test_google_genai_missing_usage_metadata_writes_nothing(self, patched_span): - """Test Google GenAI writes nothing when _extract_usage_metadata returns None.""" - # Arrange - response = Mock() - - # Act - with patch("netra.instrumentation.google_genai.utils._extract_usage_metadata", return_value=None): - google_genai_set_usage(patched_span, response) - - # Assert - assert SpanAttributes.LLM_USAGE_PROMPT_TOKENS not in patched_span.attributes - assert SpanAttributes.LLM_USAGE_COMPLETION_TOKENS not in patched_span.attributes - assert SpanAttributes.LLM_USAGE_TOTAL_TOKENS not in patched_span.attributes - - def test_google_genai_zero_completion_tokens_not_written(self, patched_span): - """Test Google GenAI skips completion tokens when candidates and thoughts both equal zero. - - Although 0 is a valid int and passes the isinstance guard, the final - `if output > 0` check prevents writing a zero sum to the span. - """ - # Arrange - response = Mock() - usage = Mock() - usage.total_token_count = 0 - usage.candidates_token_count = 0 - usage.thoughts_token_count = 0 - usage.cached_content_token_count = None - usage.prompt_token_count = 0 - - # Act - with patch("netra.instrumentation.google_genai.utils._extract_usage_metadata", return_value=usage): - google_genai_set_usage(patched_span, response) - - # Assert — output sum is 0 + 0 = 0, `if output > 0` guard skips write - assert SpanAttributes.LLM_USAGE_COMPLETION_TOKENS not in patched_span.attributes - - def test_google_genai_cached_tokens_not_written_when_absent(self, patched_span): - """Test Google GenAI skips cache key when cached_content_token_count is None.""" - # Arrange - response = Mock() - usage = Mock() - usage.total_token_count = 100 - usage.candidates_token_count = 60 - usage.thoughts_token_count = None - usage.cached_content_token_count = None - usage.prompt_token_count = 40 - - # Act - with patch("netra.instrumentation.google_genai.utils._extract_usage_metadata", return_value=usage): - google_genai_set_usage(patched_span, response) - - # Assert — main tokens written, cache key absent - assert patched_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == 40 - assert patched_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == 60 - assert SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS not in patched_span.attributes + + # Act + litellm_set_usage(patched_span, usage) + + # Assert + assert patched_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == 50 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == 70 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] == 120 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS] == 20 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_REASONING_TOKENS] == 10 + + def test_token_usage_alternative_keys(self, patched_span): + """Test LiteLLM token usage falls back to input_tokens/output_tokens when primary keys absent.""" + # Arrange + usage = { + "input_tokens": 90, + "output_tokens": 45, + "input_tokens_details": {"cached_tokens": 15}, + "output_tokens_details": {"reasoning_tokens": 8}, + } + + # Act + litellm_set_usage(patched_span, usage) + + # Assert + assert patched_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == 90 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == 45 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS] == 15 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_REASONING_TOKENS] == 8 + + def test_missing_usage_fields_writes_nothing(self, patched_span): + """Test LiteLLM with empty usage dict does not write any token attributes.""" + # Arrange + usage = {} + + # Act + litellm_set_usage(patched_span, usage) + + # Assert + assert SpanAttributes.LLM_USAGE_PROMPT_TOKENS not in patched_span.attributes + assert SpanAttributes.LLM_USAGE_COMPLETION_TOKENS not in patched_span.attributes + assert SpanAttributes.LLM_USAGE_TOTAL_TOKENS not in patched_span.attributes + + def test_zero_token_values_not_written(self, patched_span): + """Test LiteLLM skips zero token values due to falsy `or` guard in implementation.""" + # Arrange + usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0} + + # Act + litellm_set_usage(patched_span, usage) + + # Assert + assert SpanAttributes.LLM_USAGE_PROMPT_TOKENS not in patched_span.attributes + assert SpanAttributes.LLM_USAGE_COMPLETION_TOKENS not in patched_span.attributes + + def test_partial_usage_only_present_fields_written(self, patched_span): + """Test LiteLLM with only prompt_tokens present writes only that attribute.""" + # Arrange + usage = {"prompt_tokens": 55} + + # Act + litellm_set_usage(patched_span, usage) + + # Assert + assert patched_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == 55 + assert SpanAttributes.LLM_USAGE_COMPLETION_TOKENS not in patched_span.attributes + + class TestDSPy: + """Tests for DSPy token usage extraction.""" + + def test_token_usage(self, patched_span): + """Test DSPy token usage generator yields correct key-value pairs.""" + # Arrange + response = {"usage": {"prompt_tokens": 80, "completion_tokens": 90, "total_tokens": 170}} + + # Act + with patch("netra.instrumentation.dspy.utils.convert_to_dict", return_value=response): + for key, value in dspy_extract_usage(response): + patched_span.set_attribute(key, value) + + # Assert + assert patched_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == 80 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == 90 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] == 170 + + def test_token_usage_alternative_keys(self, patched_span): + """Test DSPy generator falls back to input_tokens/output_tokens when primary keys absent.""" + # Arrange + response = {"usage": {"input_tokens": 55, "output_tokens": 22, "total_tokens": 77}} + + # Act + with patch("netra.instrumentation.dspy.utils.convert_to_dict", return_value=response): + for key, value in dspy_extract_usage(response): + patched_span.set_attribute(key, value) + + # Assert + assert patched_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == 55 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == 22 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] == 77 + + def test_missing_usage_yields_nothing(self, patched_span): + """Test DSPy generator yields nothing when usage key is absent.""" + # Arrange + response = {} + + # Act + with patch("netra.instrumentation.dspy.utils.convert_to_dict", return_value=response): + result = dict(dspy_extract_usage(response)) + + # Assert + assert result == {} + assert SpanAttributes.LLM_USAGE_PROMPT_TOKENS not in patched_span.attributes + + def test_zero_token_values_not_yielded(self, patched_span): + """Test DSPy skips zero token values despite `is not None` guard.""" + # Arrange + response = {"usage": {"prompt_tokens": 0, "completion_tokens": 0}} + + # Act + with patch("netra.instrumentation.dspy.utils.convert_to_dict", return_value=response): + for key, value in dspy_extract_usage(response): + patched_span.set_attribute(key, value) + + # Assert + assert SpanAttributes.LLM_USAGE_PROMPT_TOKENS not in patched_span.attributes + assert SpanAttributes.LLM_USAGE_COMPLETION_TOKENS not in patched_span.attributes + + def test_partial_usage_only_present_fields_written(self, patched_span): + """Test DSPy with only total_tokens present yields only that key.""" + # Arrange + response = {"usage": {"total_tokens": 60}} + + # Act + with patch("netra.instrumentation.dspy.utils.convert_to_dict", return_value=response): + for key, value in dspy_extract_usage(response): + patched_span.set_attribute(key, value) + + # Assert + assert patched_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] == 60 + assert SpanAttributes.LLM_USAGE_PROMPT_TOKENS not in patched_span.attributes + assert SpanAttributes.LLM_USAGE_COMPLETION_TOKENS not in patched_span.attributes + + class TestPydanticAI: + """Tests for Pydantic AI token usage extraction.""" + + def test_token_usage(self, patched_span): + """Test Pydantic AI maps request_tokens/response_tokens to standard keys.""" + # Arrange + response = Mock() + usage = Mock() + usage.request_tokens = 200 + usage.response_tokens = 150 + usage.total_tokens = 350 + usage.requests = 1 + usage.details = {"some_detail": 5} + response.usage.return_value = usage + + # Act + with patch("netra.instrumentation.pydantic_ai.utils._safe_get_attribute") as mock_get: + + def side_effect(obj, attr): + if obj == usage: + return getattr(usage, attr) if hasattr(usage, attr) else usage.details.get(attr) + return getattr(obj, attr, None) + + mock_get.side_effect = side_effect + pydantic_ai_set_usage(patched_span, response) + + # Assert — _safe_set_attribute stringifies values in Pydantic AI utils + assert patched_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == "200" + assert patched_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == "150" + assert patched_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] == "350" + + def test_none_tokens_not_written(self, patched_span): + """Test Pydantic AI writes nothing when all token fields are None.""" + # Arrange + response = Mock() + usage = Mock() + usage.request_tokens = None + usage.response_tokens = None + usage.total_tokens = None + usage.requests = None + usage.details = None + response.usage.return_value = usage + + # Act + with patch("netra.instrumentation.pydantic_ai.utils._safe_get_attribute") as mock_get: + + def side_effect(obj, attr): + return getattr(obj, attr, None) + + mock_get.side_effect = side_effect + pydantic_ai_set_usage(patched_span, response) + + # Assert + assert SpanAttributes.LLM_USAGE_PROMPT_TOKENS not in patched_span.attributes + assert SpanAttributes.LLM_USAGE_COMPLETION_TOKENS not in patched_span.attributes + assert SpanAttributes.LLM_USAGE_TOTAL_TOKENS not in patched_span.attributes + + def test_partial_usage_only_present_fields_written(self, patched_span): + """Test Pydantic AI with only request_tokens present writes only prompt attribute.""" + # Arrange + response = Mock() + usage = Mock() + usage.request_tokens = 100 + usage.response_tokens = None + usage.total_tokens = None + usage.requests = None + usage.details = None + response.usage.return_value = usage + + # Act + with patch("netra.instrumentation.pydantic_ai.utils._safe_get_attribute") as mock_get: + + def side_effect(obj, attr): + return getattr(obj, attr, None) + + mock_get.side_effect = side_effect + pydantic_ai_set_usage(patched_span, response) + + # Assert + assert patched_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == "100" + assert SpanAttributes.LLM_USAGE_COMPLETION_TOKENS not in patched_span.attributes + + def test_dual_attribute_write(self, patched_span): + """Test Pydantic AI CallToolsNode writes both gen_ai.usage.* and pydantic_ai.usage.* keys.""" + # Arrange + + usage = Mock() + usage.request_tokens = 75 + usage.response_tokens = 50 + usage.total_tokens = 125 + usage.requests = 1 + usage.details = None + + model_response = Mock() + model_response.usage = usage + model_response.parts = [] + model_response.model_name = None + model_response.timestamp = None + + node = Mock() + node.model_response = model_response + node.tool_results = None + + # Act + _set_call_tools_node_attributes(patched_span, node) + + # Assert — standard OTel keys + assert patched_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == "75" + assert patched_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == "50" + assert patched_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] == "125" + # Assert — pydantic_ai-specific dual-write keys written alongside OTel keys + assert patched_span.attributes["pydantic_ai.usage.request_tokens"] == "75" + assert patched_span.attributes["pydantic_ai.usage.response_tokens"] == "50" + assert patched_span.attributes["pydantic_ai.usage.total_tokens"] == "125" + + class TestGoogleGenAI: + """Tests for Google GenAI token usage extraction.""" + + def test_token_usage(self, patched_span): + """Test Google GenAI sums candidates and thoughts for completion tokens.""" + # Arrange + response = Mock() + usage = Mock() + usage.total_token_count = 500 + usage.candidates_token_count = 100 + usage.thoughts_token_count = 50 + usage.cached_content_token_count = 200 + usage.prompt_token_count = 350 + + # Act + with patch("netra.instrumentation.google_genai.utils._extract_usage_metadata", return_value=usage): + google_genai_set_usage(patched_span, response) + + # Assert — completion = candidates (100) + thoughts (50) = 150 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] == 500 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == 150 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS] == 200 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == 350 + + def test_only_candidates_no_thoughts(self, patched_span): + """Test Google GenAI completion tokens equals candidates alone when thoughts absent.""" + # Arrange + response = Mock() + usage = Mock() + usage.total_token_count = 200 + usage.candidates_token_count = 80 + usage.thoughts_token_count = None + usage.cached_content_token_count = None + usage.prompt_token_count = 120 + + # Act + with patch("netra.instrumentation.google_genai.utils._extract_usage_metadata", return_value=usage): + google_genai_set_usage(patched_span, response) + + # Assert — completion = 80 + 0 = 80 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == 80 + + def test_no_candidates_no_thoughts_skips_completion(self, patched_span): + """Test Google GenAI skips completion tokens when both candidates and thoughts are absent.""" + # Arrange + response = Mock() + usage = Mock() + usage.total_token_count = None + usage.candidates_token_count = None + usage.thoughts_token_count = None + usage.cached_content_token_count = None + usage.prompt_token_count = None + + # Act + with patch("netra.instrumentation.google_genai.utils._extract_usage_metadata", return_value=usage): + google_genai_set_usage(patched_span, response) + + # Assert — output sum is 0, so completion key not written + assert SpanAttributes.LLM_USAGE_COMPLETION_TOKENS not in patched_span.attributes + + def test_missing_usage_metadata_writes_nothing(self, patched_span): + """Test Google GenAI writes nothing when _extract_usage_metadata returns None.""" + # Arrange + response = Mock() + + # Act + with patch("netra.instrumentation.google_genai.utils._extract_usage_metadata", return_value=None): + google_genai_set_usage(patched_span, response) + + # Assert + assert SpanAttributes.LLM_USAGE_PROMPT_TOKENS not in patched_span.attributes + assert SpanAttributes.LLM_USAGE_COMPLETION_TOKENS not in patched_span.attributes + assert SpanAttributes.LLM_USAGE_TOTAL_TOKENS not in patched_span.attributes + + def test_zero_completion_tokens_not_written(self, patched_span): + """Test Google GenAI skips completion tokens when candidates and thoughts both equal zero.""" + # Arrange + response = Mock() + usage = Mock() + usage.total_token_count = 0 + usage.candidates_token_count = 0 + usage.thoughts_token_count = 0 + usage.cached_content_token_count = None + usage.prompt_token_count = 0 + + # Act + with patch("netra.instrumentation.google_genai.utils._extract_usage_metadata", return_value=usage): + google_genai_set_usage(patched_span, response) + + # Assert — output sum is 0 + 0 = 0, `if output > 0` guard skips write + assert SpanAttributes.LLM_USAGE_COMPLETION_TOKENS not in patched_span.attributes + + def test_cached_tokens_not_written_when_absent(self, patched_span): + """Test Google GenAI skips cache key when cached_content_token_count is None.""" + # Arrange + response = Mock() + usage = Mock() + usage.total_token_count = 100 + usage.candidates_token_count = 60 + usage.thoughts_token_count = None + usage.cached_content_token_count = None + usage.prompt_token_count = 40 + + # Act + with patch("netra.instrumentation.google_genai.utils._extract_usage_metadata", return_value=usage): + google_genai_set_usage(patched_span, response) + + # Assert — main tokens written, cache key absent + assert patched_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == 40 + assert patched_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == 60 + assert SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS not in patched_span.attributes