Azure · chienyuanchang · Jun 3, 2026 · Jun 3, 2026 · Jun 3, 2026
@@ -1,5 +1,13 @@
 # Release History
 
+## 1.2.0b2 (Unreleased)
+
+### Bugs Fixed
+- Filtered service-emitted `LLMStats:` telemetry entries from the rendered `rai_warnings` front matter.
+
+### Other Changes
+- Updated `to_llm_input` page markers from `<!-- page N -->` to `<!-- InputPageNumber: N -->` and avoided duplicate marker injection when the service markdown already includes `InputPageNumber` markers.
+
 ## 1.2.0b1 (2026-04-28)
 
 ### Features Added

@@ -59,6 +59,7 @@ This table shows the relationship between SDK versions and supported API service
 
 | SDK version | Supported API service version |
 | ----------- | ----------------------------- |
+| 1.2.0b2     | 2025-11-01                    |
 | 1.2.0b1     | 2025-11-01                    |
 | 1.1.0       | 2025-11-01                    |
 | 1.0.1       | 2025-11-01                    |

@@ -15,7 +15,7 @@
 import datetime
 import math
 import re
-from typing import Any, Dict, List, Optional, TYPE_CHECKING
+from typing import Any, Dict, List, Optional, Tuple, TYPE_CHECKING
 
 if TYPE_CHECKING:
     from .models import (
@@ -39,6 +39,37 @@
     }
 )
 
+# Marker emitted by ``to_llm_input`` at each page boundary. Future Content
+# Understanding service versions emit this same marker directly in the
+# returned markdown (per ContentUnderstanding-Docs#249). When the helper sees
+# any occurrence of this prefix in the input markdown it treats the service
+# as having already paginated the content and skips its own injection to
+# avoid duplicate markers.
+_INPUT_PAGE_MARKER_PREFIX = "<!-- InputPageNumber:"
+
+# Message prefixes the Content Understanding service has been observed to
+# emit into the ``warnings`` collection that are *not* real Responsible-AI
+# warnings (they are internal telemetry counters). The helper drops any
+# warning whose message starts with one of these prefixes before rendering
+# the ``rai_warnings:`` block, so the noise never reaches the LLM. Tracked
+# alongside a separate service bug to stop emitting them in the first place.
+_TELEMETRY_MESSAGE_PREFIXES: Tuple[str, ...] = ("LLMStats:",)
+
+
+def _has_input_page_marker(markdown: str) -> bool:
+    """Return True if *markdown* already contains an ``InputPageNumber`` marker.
+
+    Case-sensitive substring check. A single occurrence is sufficient: when
+    the service paginates content it places markers at every boundary, so
+    the presence of any marker means the helper should not inject its own.
+
+    :param str markdown: The markdown text to inspect.
+    :returns: ``True`` if at least one ``<!-- InputPageNumber:`` substring is
+        present, ``False`` otherwise.
+    :rtype: bool
+    """
+    return _INPUT_PAGE_MARKER_PREFIX in markdown
+
 
 # ---------------------------------------------------------------------------
 # Public API
@@ -65,6 +96,11 @@ def to_llm_input(
     helper automatically expands the parent into per-segment blocks
     with category labels and markdown slices.
 
+    For document content, the helper emits ``<!-- InputPageNumber: N -->``
+    markers at page boundaries when the service result does not already
+    include them. Internal telemetry messages such as ``LLMStats: ...``
+    are filtered from the rendered ``rai_warnings`` front matter.
+
     :param result: The ``AnalysisResult`` from a Content Understanding analyze operation.
     :type result: ~azure.ai.contentunderstanding.models.AnalysisResult
     :keyword include_fields: Whether to include structured fields in the
@@ -379,14 +415,21 @@ def _render_content_block(
 
 
 def _add_page_markers(content: "DocumentContent", markdown: str) -> str:
-    """Add ``<!-- page N -->`` markers to document markdown.
+    """Add ``<!-- InputPageNumber: N -->`` markers to document markdown.
+
+    If *markdown* already contains ``<!-- InputPageNumber:`` markers (e.g.,
+    because the service paginated the content itself per
+    ContentUnderstanding-Docs#249), the helper passes the markdown through
+    unchanged to avoid duplicate markers.
 
     :param content: The document content with page information.
     :type content: ~azure.ai.contentunderstanding.models.DocumentContent
     :param str markdown: The markdown text to annotate.
     :returns: The markdown with page markers inserted.
     :rtype: str
     """
+    if _has_input_page_marker(markdown):
+        return markdown
     if content.pages:
         result = _page_markers_from_spans(markdown, content.pages)
         if result is not markdown:  # spans were found and used
@@ -403,7 +446,7 @@ def _page_markers_from_spans(markdown: str, pages: "List[DocumentPage]") -> str:
     :returns: The markdown with page markers inserted at span offsets.
     :rtype: str
     """
-    markers: List[tuple] = []
+    markers: List[Tuple[int, int]] = []
     for page in pages:
         if page.spans:
             markers.append((page.spans[0].offset, page.page_number))
@@ -419,7 +462,7 @@ def _page_markers_from_spans(markdown: str, pages: "List[DocumentPage]") -> str:
     # Compute offset shifts from the cleaning
     # Re-map original offsets to cleaned string positions
     break_pattern = re.compile(r"\n*<!-- PageBreak -->\n*")
-    shifts: List[tuple] = []  # (original_pos, delta)
+    shifts: List[Tuple[int, int]] = []  # (original_pos, delta)
     for m in break_pattern.finditer(markdown):
         replacement_len = 2  # "\n\n"
         delta = m.end() - m.start() - replacement_len
@@ -438,7 +481,7 @@ def _adjusted_offset(orig: int) -> int:
     for offset, page_num in markers:
         adj = _adjusted_offset(offset)
         parts.append(cleaned[prev:adj])
-        parts.append(f"<!-- page {page_num} -->\n\n")
+        parts.append(f"{_INPUT_PAGE_MARKER_PREFIX} {page_num} -->\n\n")
         prev = adj
     parts.append(cleaned[prev:])
 
@@ -464,7 +507,7 @@ def _page_markers_from_breaks(markdown: str, content: "DocumentContent") -> str:
         page_num = start_page + i
         text = chunk.strip()
         if text:
-            parts.append(f"<!-- page {page_num} -->\n\n{text}")
+            parts.append(f"{_INPUT_PAGE_MARKER_PREFIX} {page_num} -->\n\n{text}")
     return "\n\n".join(parts)
 
 
@@ -559,11 +602,18 @@ def _format_warnings(
     """
     items: List[Dict[str, str]] = []
     for w in warnings:
+        message = getattr(w, "message", None)
+        # Skip internal service telemetry strings (e.g. ``LLMStats: ...``)
+        # that occasionally leak into the warnings collection. These are
+        # not Responsible-AI warnings and would otherwise be rendered into
+        # the LLM-facing ``rai_warnings:`` block.
+        if message and message.lstrip().startswith(_TELEMETRY_MESSAGE_PREFIXES):
+            continue
         entry: Dict[str, str] = {}
         if getattr(w, "code", None):
             entry["code"] = w.code  # type: ignore[assignment, union-attr]
-        if getattr(w, "message", None):
-            entry["message"] = w.message  # type: ignore[assignment, union-attr]
+        if message:
+            entry["message"] = message
         if getattr(w, "target", None):
             entry["target"] = w.target  # type: ignore[assignment, union-attr]
         if entry:

@@ -6,4 +6,4 @@
 # Changes may cause incorrect behavior and will be lost if the code is regenerated.
 # --------------------------------------------------------------------------
 
-VERSION = "1.2.0b1"
+VERSION = "1.2.0b2"
@@ -147,16 +147,20 @@ def test_to_llm_input_multi_page_content_range(self, contentunderstanding_endpoi
         print(f"[PASS] to_llm_input output validated ({len(text)} chars, pages='2-3, 5' preserved)")
 
         # Page markers in the markdown body should use the original page numbers
-        # (<!-- page 2 -->, <!-- page 3 -->, <!-- page 5 -->), not renumbered (1, 2, 3).
-        assert "<!-- page 1 -->" not in text, (
-            "Page marker '<!-- page 1 -->' should not appear — we only requested pages 2-3, 5"
+        # (<!-- InputPageNumber: 2 -->, <!-- InputPageNumber: 3 -->, <!-- InputPageNumber: 5 -->),
+        # not renumbered (1, 2, 3).
+        assert "<!-- InputPageNumber: 1 -->" not in text, (
+            "Page marker '<!-- InputPageNumber: 1 -->' should not appear — we only requested pages 2-3, 5"
         )
         for expected_page in [2, 3, 5]:
-            assert f"<!-- page {expected_page} -->" in text, (
-                f"Page marker '<!-- page {expected_page} -->' should appear in the markdown body. "
+            assert f"<!-- InputPageNumber: {expected_page} -->" in text, (
+                f"Page marker '<!-- InputPageNumber: {expected_page} -->' should appear in the markdown body. "
                 f"Output:\n{text[:800]}"
             )
-        print("[PASS] Page markers verified: <!-- page 2 -->, <!-- page 3 -->, <!-- page 5 -->")
+        print(
+            "[PASS] Page markers verified: <!-- InputPageNumber: 2 -->, "
+            "<!-- InputPageNumber: 3 -->, <!-- InputPageNumber: 5 -->"
+        )
 
         print("\n[SUCCESS] All test_to_llm_input_multi_page_content_range assertions passed")
 

@@ -148,16 +148,20 @@ async def test_to_llm_input_multi_page_content_range_async(self, contentundersta
         print(f"[PASS] to_llm_input output validated ({len(text)} chars, pages='2-3, 5' preserved)")
 
         # Page markers in the markdown body should use the original page numbers
-        # (<!-- page 2 -->, <!-- page 3 -->, <!-- page 5 -->), not renumbered (1, 2, 3).
-        assert "<!-- page 1 -->" not in text, (
-            "Page marker '<!-- page 1 -->' should not appear — we only requested pages 2-3, 5"
+        # (<!-- InputPageNumber: 2 -->, <!-- InputPageNumber: 3 -->, <!-- InputPageNumber: 5 -->),
+        # not renumbered (1, 2, 3).
+        assert "<!-- InputPageNumber: 1 -->" not in text, (
+            "Page marker '<!-- InputPageNumber: 1 -->' should not appear — we only requested pages 2-3, 5"
         )
         for expected_page in [2, 3, 5]:
-            assert f"<!-- page {expected_page} -->" in text, (
-                f"Page marker '<!-- page {expected_page} -->' should appear in the markdown body. "
+            assert f"<!-- InputPageNumber: {expected_page} -->" in text, (
+                f"Page marker '<!-- InputPageNumber: {expected_page} -->' should appear in the markdown body. "
                 f"Output:\n{text[:800]}"
             )
-        print("[PASS] Page markers verified: <!-- page 2 -->, <!-- page 3 -->, <!-- page 5 -->")
+        print(
+            "[PASS] Page markers verified: <!-- InputPageNumber: 2 -->, "
+            "<!-- InputPageNumber: 3 -->, <!-- InputPageNumber: 5 -->"
+        )
 
         await client.close()
         print("\n[SUCCESS] All test_to_llm_input_multi_page_content_range_async assertions passed")

@@ -287,8 +287,23 @@ def test_page_markers_from_spans(self):
             ],
         )
         output = to_llm_input(_make_result([doc]))
-        assert "<!-- page 1 -->" in output
-        assert "<!-- page 2 -->" in output
+        assert "<!-- InputPageNumber: 1 -->" in output
+        assert "<!-- InputPageNumber: 2 -->" in output
+
+    def test_page_markers_not_duplicated_when_service_provides_markers(self):
+        doc = DocumentContent(
+            kind="document",
+            markdown="<!-- InputPageNumber: 1 -->\n\nFirst page text.\n\n<!-- InputPageNumber: 2 -->\n\nSecond page text.",
+            start_page_number=1,
+            end_page_number=2,
+            pages=[
+                DocumentPage(page_number=1, spans=[ContentSpan(offset=0, length=47)]),
+                DocumentPage(page_number=2, spans=[ContentSpan(offset=49, length=48)]),
+            ],
+        )
+        output = to_llm_input(_make_result([doc]))
+        assert output.count("<!-- InputPageNumber: 1 -->") == 1
+        assert output.count("<!-- InputPageNumber: 2 -->") == 1
 
     def test_page_markers_from_pagebreak_fallback(self):
         doc = DocumentContent(
@@ -298,8 +313,8 @@ def test_page_markers_from_pagebreak_fallback(self):
             end_page_number=2,
         )
         output = to_llm_input(_make_result([doc]))
-        assert "<!-- page 1 -->" in output
-        assert "<!-- page 2 -->" in output
+        assert "<!-- InputPageNumber: 1 -->" in output
+        assert "<!-- InputPageNumber: 2 -->" in output
         assert "<!-- PageBreak -->" not in output
 
     def test_page_markers_respect_start_page_number(self):
@@ -311,8 +326,8 @@ def test_page_markers_respect_start_page_number(self):
             end_page_number=4,
         )
         output = to_llm_input(_make_result([doc]))
-        assert "<!-- page 3 -->" in output
-        assert "<!-- page 4 -->" in output
+        assert "<!-- InputPageNumber: 3 -->" in output
+        assert "<!-- InputPageNumber: 4 -->" in output
 
     def test_pages_single_page_format(self):
         doc = _make_invoice_doc(start_page_number=1, end_page_number=1)
@@ -867,6 +882,67 @@ def test_warnings_present_regardless_of_include_flags(self):
         output = to_llm_input(result, include_fields=False, include_markdown=False)
         assert "rai_warnings:" in output
 
+    def test_llm_stats_warning_filtered_from_rai_warnings(self):
+        from azure.core.exceptions import ODataV4Format
+        doc = _make_invoice_doc()
+        telemetry_warning = ODataV4Format(
+            {"code": "Telemetry", "message": "LLMStats: completion calls: 2; embedding calls: 1"}
+        )
+        real_warning = ODataV4Format({"code": "ContentWarning", "message": "Potentially sensitive content."})
+        result = AnalysisResult(contents=[doc], warnings=[telemetry_warning, real_warning])
+
+        output = to_llm_input(result)
+
+        assert "rai_warnings:" in output
+        assert "LLMStats:" not in output
+        assert "Potentially sensitive content." in output
+
+    def test_llm_stats_warning_only_omits_rai_warnings_block(self):
+        from azure.core.exceptions import ODataV4Format
+        doc = _make_invoice_doc()
+        warning = ODataV4Format({"code": "Telemetry", "message": "LLMStats: completion latency: 7.71s"})
+        result = AnalysisResult(contents=[doc], warnings=[warning])
+
+        output = to_llm_input(result)
+
+        assert "rai_warnings:" not in output
+        assert "LLMStats:" not in output
+
+    def test_llm_stats_filter_is_case_sensitive(self):
+        from azure.core.exceptions import ODataV4Format
+        doc = _make_invoice_doc()
+        warning = ODataV4Format({"code": "ContentWarning", "message": "llmstats: keep as a real warning"})
+        result = AnalysisResult(contents=[doc], warnings=[warning])
+
+        output = to_llm_input(result)
+
+        assert "rai_warnings:" in output
+        assert "llmstats: keep as a real warning" in output
+
+    def test_llm_stats_text_in_markdown_body_is_preserved(self):
+        from azure.core.exceptions import ODataV4Format
+        body_text = "A log excerpt:\n- LLMStats: keep this body text"
+        doc = _make_invoice_doc(markdown=body_text)
+        warning = ODataV4Format({"code": "Telemetry", "message": "LLMStats: remove this warning text"})
+        result = AnalysisResult(contents=[doc], warnings=[warning])
+
+        output = to_llm_input(result)
+
+        assert "rai_warnings:" not in output
+        assert "LLMStats: keep this body text" in output
+        assert "LLMStats: remove this warning text" not in output
+
+    def test_llm_stats_warning_filtered_with_leading_whitespace(self):
+        from azure.core.exceptions import ODataV4Format
+        doc = _make_invoice_doc()
+        warning = ODataV4Format({"code": "Telemetry", "message": "  LLMStats: completion calls: 2"})
+        result = AnalysisResult(contents=[doc], warnings=[warning])
+
+        output = to_llm_input(result)
+
+        assert "rai_warnings:" not in output
+        assert "LLMStats:" not in output
+
     def test_empty_string_field_value_quoted(self):
         doc = DocumentContent(
             kind="document",
@@ -1029,9 +1105,9 @@ def test_multipage_doc_strips_pagebreak_with_spans(self):
         )
         output = to_llm_input(_make_result([doc]))
         assert "<!-- PageBreak -->" not in output
-        assert "<!-- page 1 -->" in output
-        assert "<!-- page 2 -->" in output
-        assert "<!-- page 3 -->" in output
+        assert "<!-- InputPageNumber: 1 -->" in output
+        assert "<!-- InputPageNumber: 2 -->" in output
+        assert "<!-- InputPageNumber: 3 -->" in output
         assert "Page 1 content." in output
         assert "Page 2 content." in output
         assert "Page 3 content." in output
@@ -1048,7 +1124,7 @@ def test_image_with_empty_page_spans_falls_back(self):
         )
         output = to_llm_input(_make_result([doc]))
         # Should fall back to PageBreak method, which adds page 1 marker
-        assert "<!-- page 1 -->" in output
+        assert "<!-- InputPageNumber: 1 -->" in output
         assert "![image](pages/1)" in output
 
     def test_document_search_png_single_page_with_spans(self):
@@ -1063,7 +1139,7 @@ def test_document_search_png_single_page_with_spans(self):
             pages=[DocumentPage(page_number=1, spans=[ContentSpan(offset=0, length=len(markdown))])],
         )
         output = to_llm_input(_make_result([doc]))
-        assert "<!-- page 1 -->" in output
+        assert "<!-- InputPageNumber: 1 -->" in output
         assert "IAN HANSSON" in output
         assert "Summary: A resume document." in output
 
@@ -1082,8 +1158,8 @@ def test_prebuilt_read_no_fields(self):
         output = to_llm_input(_make_result([doc]))
         assert "contentType: document" in output
         assert "fields:" not in output
-        assert "<!-- page 1 -->" in output
-        assert "<!-- page 2 -->" in output
+        assert "<!-- InputPageNumber: 1 -->" in output
+        assert "<!-- InputPageNumber: 2 -->" in output
 
     def test_metadata_keys_with_yaml_special_chars(self):
         """Metadata keys with YAML-special characters must be quoted to produce valid YAML."""