diff --git a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md
index 576ed70a4396..40e19710337b 100644
--- a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md
+++ b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md
@@ -1,5 +1,15 @@
 # Release History
 
+## 1.17.1 (Unreleased)
+
+### Features Added
+
+- Enabled `ToolCallAccuracyEvaluator`, `_ToolInputAccuracyEvaluator`, and `_ToolCallSuccessEvaluator` to run on conversations that include built-in restricted tools (`bing_grounding`, `bing_custom_search`, `azure_ai_search`, `azure_fabric`, `sharepoint_grounding`). These three evaluators grade the agent's tool selection, input arguments, and call status — none of which require the (redacted) tool output body — so the previous unconditional rejection of conversations containing restricted tools is now lifted. Achieved by setting `check_for_unsupported_tools=False` on each evaluator's input validator. `GroundednessEvaluator` and `ToolOutputUtilizationEvaluator` continue to reject restricted tools because they consume the tool output body.
+- Exported `_ToolInputAccuracyEvaluator` from the top-level `azure.ai.evaluation` namespace so consumers no longer need to reach into the private `_evaluators._tool_input_accuracy` submodule. The other tool evaluators were already exposed there; this brings the four siblings in line.
+- `_ToolCallSuccessEvaluator` now deterministically returns `fail` (score `0`, `_passed=False`) without invoking the LLM when any `tool_call` or `tool_result` in the response carries a known-failure `status` (`failed`, `error`, `incomplete`, `cancelled`/`canceled`). This matches the evaluator's binary contract ("FALSE: at least one tool call failed") and prevents the prompty rubric -- which doesn't see the `status` field -- from mis-grading conversations whose only failure signal is the runtime-reported execution status. Behavior is unchanged for responses where no `status` is populated.
+- Extended `break_tool_call_into_messages` in `_converters/_models.py` with explicit branches for `bing_custom_search` (arguments-only, mirroring `bing_grounding` — Bing-family results stay redacted upstream) and `sharepoint_grounding` (arguments + dumped output, mirroring `azure_ai_search`). Both were silently dropped before because the converter had no `elif` branch for them, which meant the three status-only tool evaluators returned `NOT_APPLICABLE` on conversations that touched either tool. The `bing_grounding` and `bing_custom_search` request-side payloads continue to emit only the `requesturl`; the `sharepoint_grounding` result is dumped onto the `tool_result` so a future Groundedness / Tool Output Utilization extractor can read it.
+- Made the per-tool argument extraction in `break_tool_call_into_messages` resilient to the `query` vs `input` runtime drift observed on `azure_ai_search`, `sharepoint_grounding`, and `fabric_dataagent`. Each branch now reads `details["<tool>"].get("input") or details["<tool>"].get("query") or ""` instead of dereferencing `["input"]` directly, so live agent traces (which emit the search term under `query`) no longer surface as empty `arguments` to the evaluators. Behavior is unchanged when the runtime emits `input`.
+
 ## 1.17.0 (2026-06-03)
 
 ### Breaking Changes
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/__init__.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/__init__.py
index 6703b2ca111f..f32ade1e90e3 100644
--- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/__init__.py
+++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/__init__.py
@@ -34,6 +34,7 @@
 from ._evaluators._document_retrieval import DocumentRetrievalEvaluator
 from ._evaluators._tool_output_utilization import _ToolOutputUtilizationEvaluator
 from ._evaluators._tool_call_success import _ToolCallSuccessEvaluator
+from ._evaluators._tool_input_accuracy import _ToolInputAccuracyEvaluator
 from ._model_configurations import (
     AzureAIProject,
     AzureOpenAIModelConfiguration,
@@ -135,6 +136,7 @@ def lazy_import():
     "ToolCallAccuracyEvaluator",
     "_ToolOutputUtilizationEvaluator",
     "_ToolCallSuccessEvaluator",
+    "_ToolInputAccuracyEvaluator",
     "AzureOpenAIGrader",
     "AzureOpenAILabelGrader",
     "AzureOpenAIStringCheckGrader",
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_converters/_models.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_converters/_models.py
index 443c712a9eac..ce5135fee66c 100644
--- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_converters/_models.py
+++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_converters/_models.py
@@ -327,11 +327,12 @@ def break_tool_call_into_messages(tool_call: ToolCall, run_id: str) -> List[Mess
     # We will use this as our accumulator.
     messages: List[Message] = []
 
-    # As of March 17th, 2025, we only support custom functions due to built-in code interpreters and bing grounding
-    # tooling not reporting their function calls in the same way. Code interpreters don't include the tool call at
-    # all in most of the cases, and bing would only show the API URL, without arguments or results.
-    # Bing grounding would have "bing_grounding" in details with "requesturl" that will just be the API path with query.
-    # TODO: Work with AI Services to add converter support for BingGrounding and CodeInterpreter.
+    # In addition to custom functions, we support a handful of built-in tools whose runtime payload
+    # we have explicit branches for below (code_interpreter, file_search, bing_grounding,
+    # bing_custom_search, azure_ai_search, sharepoint_grounding, fabric_dataagent). Bing variants
+    # only carry the `requesturl` request side (results are redacted upstream for compliance), so
+    # they emit just the tool_call message; the others emit both call and result.
+    # Unknown built-in types are silently skipped by the trailing `return messages`.
     if hasattr(tool_call.details, _FUNCTION) or tool_call.details.get("function"):
         # This is the internals of the content object that will be included with the tool call.
         tool_call_id = tool_call.details.id
@@ -351,15 +352,22 @@ def break_tool_call_into_messages(tool_call: ToolCall, run_id: str) -> List[Mess
             arguments = {"input": tool_call.details.code_interpreter.input}
         elif tool_call.details["type"] == "bing_grounding":
             arguments = {"requesturl": tool_call.details["bing_grounding"]["requesturl"]}
+        elif tool_call.details["type"] == "bing_custom_search":
+            arguments = {"requesturl": tool_call.details["bing_custom_search"]["requesturl"]}
         elif tool_call.details["type"] == "file_search":
             options = tool_call.details["file_search"]["ranking_options"]
             arguments = {
                 "ranking_options": {"ranker": options["ranker"], "score_threshold": options["score_threshold"]}
             }
         elif tool_call.details["type"] == "azure_ai_search":
-            arguments = {"input": tool_call.details["azure_ai_search"]["input"]}
+            ais = tool_call.details["azure_ai_search"]
+            arguments = {"input": ais.get("input") or ais.get("query") or ""}
+        elif tool_call.details["type"] == "sharepoint_grounding":
+            sp = tool_call.details["sharepoint_grounding"]
+            arguments = {"input": sp.get("input") or sp.get("query") or ""}
         elif tool_call.details["type"] == "fabric_dataagent":
-            arguments = {"input": tool_call.details["fabric_dataagent"]["input"]}
+            fab = tool_call.details["fabric_dataagent"]
+            arguments = {"input": fab.get("input") or fab.get("query") or ""}
         else:
             # unsupported tool type, skip
             return messages
@@ -389,11 +397,15 @@ def break_tool_call_into_messages(tool_call: ToolCall, run_id: str) -> List[Mess
             if tool_call.details.type == _CODE_INTERPRETER:
                 output = [result.as_dict() for result in tool_call.details.code_interpreter.outputs]
             elif tool_call.details.type == _BING_GROUNDING:
-                return messages  # not supported yet from bing grounding tool
+                return messages  # results are redacted upstream for Bing; no tool_result to emit
+            elif tool_call.details.type == _BING_CUSTOM_SEARCH:
+                return messages  # results are redacted upstream for Bing; no tool_result to emit
             elif tool_call.details.type == _FILE_SEARCH:
                 output = [result.as_dict() for result in tool_call.details.file_search.results]
             elif tool_call.details.type == _AZURE_AI_SEARCH:
                 output = tool_call.details.azure_ai_search["output"]
+            elif tool_call.details.type == _SHAREPOINT_GROUNDING:
+                output = tool_call.details.sharepoint_grounding["output"]
             elif tool_call.details.type == _FABRIC_DATAAGENT:
                 output = tool_call.details.fabric_dataagent["output"]
         except:
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py
index 3a2ccb1ace85..f5057f09e947 100644
--- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py
+++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py
@@ -103,7 +103,7 @@ def __init__(self, model_config, *, threshold=_DEFAULT_TOOL_CALL_ACCURACY_SCORE,
         # Initialize input validator
         self._validator = ToolCallsValidator(
             error_target=ErrorTarget.TOOL_CALL_ACCURACY_EVALUATOR,
-            check_for_unsupported_tools=True,
+            check_for_unsupported_tools=False,
         )
 
         super().__init__(
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_tool_call_success/_tool_call_success.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_tool_call_success/_tool_call_success.py
index 44e0876bad68..f3ec39e3843e 100644
--- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_tool_call_success/_tool_call_success.py
+++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_tool_call_success/_tool_call_success.py
@@ -78,7 +78,7 @@ def __init__(self, model_config, *, credential=None, **kwargs):
         self._validator = ToolDefinitionsValidator(
             error_target=ErrorTarget.TOOL_CALL_SUCCESS_EVALUATOR,
             requires_query=False,
-            check_for_unsupported_tools=True,
+            check_for_unsupported_tools=False,
         )
 
         super().__init__(
@@ -179,6 +179,37 @@ async def _do_eval(self, eval_input: Dict) -> Dict[str, Union[str, float]]:  # t
                 target=ErrorTarget.TOOL_CALL_SUCCESS_EVALUATOR,
             )
 
+        # Short-circuit: if the agent runtime already reported a failed tool
+        # execution via a known-failure ``status`` (e.g. "failed", "error",
+        # "incomplete"), deterministically return ``fail`` without calling the
+        # LLM. The evaluator's scoring contract is binary -- "FALSE: at least
+        # one tool call failed" -- and the prompty rubric doesn't see the
+        # ``status`` field, so it would otherwise grade only the (typically
+        # empty) result body and frequently mis-score the conversation as a
+        # pass. ``status`` is only populated by upstream converters that
+        # preserve it; absent ``status``, behavior is unchanged.
+        if isinstance(eval_input.get("response"), list):
+            failed_statuses = _collect_failed_tool_statuses(eval_input["response"])
+            if failed_statuses:
+                reason = (
+                    "Detected failed tool execution(s) with status "
+                    + ", ".join(sorted(set(failed_statuses)))
+                    + ". Marked as fail without LLM grading."
+                )
+                return {
+                    self._result_key: 0.0,
+                    f"{self._result_key}_score": 0.0,
+                    f"{self._result_key}_passed": False,
+                    f"{self._result_key}_result": "fail",
+                    f"{self._result_key}_reason": reason,
+                    f"{self._result_key}_status": "completed",
+                    f"{self._result_key}_threshold": self._threshold,
+                    f"{self._result_key}_properties": {
+                        "short_circuit": "tool_status",
+                        "failed_statuses": sorted(set(failed_statuses)),
+                    },
+                }
+
         if isinstance(eval_input.get("response"), list):
             eval_input["response"] = _preprocess_messages(eval_input["response"])
             eval_input["tool_calls"] = _reformat_tool_calls_results(eval_input["response"], logger)
@@ -271,6 +302,43 @@ def _filter_to_used_tools(tool_definitions, msgs_list, logger=None):
         return tool_definitions
 
 
+_FAILED_TOOL_STATUSES = frozenset({"failed", "error", "incomplete", "cancelled", "canceled"})
+
+
+def _collect_failed_tool_statuses(agent_response_msgs):
+    """Return the list of failure statuses seen on any ``tool_call`` or
+    ``tool_result`` content block in ``agent_response_msgs``.
+
+    Inputs are intentionally tolerated -- malformed messages / non-dict
+    content blocks are skipped rather than raised on, so this helper is safe
+    to call on freshly-deserialized agent traces.
+
+    :param agent_response_msgs: The agent's response message list (already
+        validated to be a list by the caller).
+    :type agent_response_msgs: list
+    :return: A list (with duplicates preserved) of lowercased failure status
+        strings. Empty list means no failure signal was found.
+    :rtype: list[str]
+    """
+    found = []
+    if not isinstance(agent_response_msgs, list):
+        return found
+    for msg in agent_response_msgs:
+        if not isinstance(msg, dict):
+            continue
+        content = msg.get("content")
+        if not isinstance(content, list):
+            continue
+        for block in content:
+            if not isinstance(block, dict):
+                continue
+            if block.get("type") in ("tool_call", "tool_result"):
+                status = block.get("status")
+                if isinstance(status, str) and status.lower() in _FAILED_TOOL_STATUSES:
+                    found.append(status.lower())
+    return found
+
+
 def _get_tool_calls_results(agent_response_msgs):
     """Extract formatted agent tool calls and results from response."""
     agent_response_text = []
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_tool_input_accuracy/_tool_input_accuracy.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_tool_input_accuracy/_tool_input_accuracy.py
index 7ebc20c7e130..198fefde02d1 100644
--- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_tool_input_accuracy/_tool_input_accuracy.py
+++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_tool_input_accuracy/_tool_input_accuracy.py
@@ -92,7 +92,7 @@ def __init__(
         self._validator = ToolDefinitionsValidator(
             error_target=ErrorTarget.TOOL_INPUT_ACCURACY_EVALUATOR,
             optional_tool_definitions=False,
-            check_for_unsupported_tools=True,
+            check_for_unsupported_tools=False,
         )
 
         super().__init__(
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_version.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_version.py
index bae6c9895046..9dc1249dff60 100644
--- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_version.py
+++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_version.py
@@ -3,4 +3,4 @@
 # ---------------------------------------------------------
 # represents upcoming version
 
-VERSION = "1.17.0"
+VERSION = "1.17.1"
diff --git a/sdk/evaluation/azure-ai-evaluation/tests/converters/ai_agent_converter/test_ai_agent_converter_internals.py b/sdk/evaluation/azure-ai-evaluation/tests/converters/ai_agent_converter/test_ai_agent_converter_internals.py
index 9c0c4df125d1..c496976ea328 100644
--- a/sdk/evaluation/azure-ai-evaluation/tests/converters/ai_agent_converter/test_ai_agent_converter_internals.py
+++ b/sdk/evaluation/azure-ai-evaluation/tests/converters/ai_agent_converter/test_ai_agent_converter_internals.py
@@ -37,6 +37,42 @@
 from serialization_helper import ToolDecoder, ThreadRunDecoder
 
 
+class _HybridDict(dict):
+    """Dict subclass that also exposes its keys as attributes.
+
+    The converter (`break_tool_call_into_messages`) mixes subscript access on the request side
+    (`tool_call.details["type"]`, `tool_call.details["bing_grounding"]["requesturl"]`) with attribute
+    access on the result side (`tool_call.details.type`, `tool_call.details.azure_ai_search["output"]`).
+    The production code path uses typed runtime models (`RunStep*ToolCall`) that satisfy both shapes;
+    `_HybridDict` mimics that surface in unit tests without depending on the agents SDK models, which
+    have moved between packages and are not guaranteed to be importable in every test environment.
+    """
+
+    def __getattr__(self, name):
+        try:
+            return self[name]
+        except KeyError as e:
+            raise AttributeError(name) from e
+
+
+def _build_builtin_tool_call(call_id: str, tool_type: str, payload: dict) -> ToolCall:
+    """Construct a `ToolCall` for a built-in tool without going through `ToolDecoder`.
+
+    `payload` is the per-tool sub-object (e.g. `{"requesturl": "..."}` for Bing or
+    `{"input": "...", "output": {...}}` for SharePoint). The returned `ToolCall.details` is a
+    nested `_HybridDict` so both subscript and attribute access work.
+    """
+    details = _HybridDict(
+        {
+            "id": call_id,
+            "type": tool_type,
+            tool_type: _HybridDict(payload),
+        }
+    )
+    now = datetime.now()
+    return ToolCall(created=now, completed=now, details=details)
+
+
 class TestAIAgentConverter(unittest.TestCase):
     def test_is_agent_tool_call(self):
         # Test case where message is an agent tool call
@@ -200,6 +236,110 @@ def test_bing_grounding_tool_calls(self):
             tool_call_content["arguments"] == {"requesturl": "https://api.bing.microsoft.com/v7.0/search?q="}
         )
 
+    def test_bing_custom_search_tool_calls(self):
+        # bing_custom_search mirrors bing_grounding: arguments-only tool_call, no tool_result
+        # (results are redacted upstream for Bing-family tools).
+        # Built directly rather than via ToolDecoder so the test does not depend on the
+        # RunStepBingCustomSearchToolCall model being present in the installed agents SDK.
+        tool_call = _build_builtin_tool_call(
+            call_id="call_BCS123",
+            tool_type="bing_custom_search",
+            payload={"requesturl": "https://api.bing.microsoft.com/v7.0/custom/search?customconfig=abc&q=foo"},
+        )
+        messages = break_tool_call_into_messages(tool_call, "abc123")
+        self.assertTrue(len(messages) == 1)  # Bing variants emit no tool_result
+        self.assertTrue(isinstance(messages[0], AssistantMessage))
+        tool_call_content = messages[0].content[0]
+        self.assertTrue(tool_call_content["type"] == "tool_call")
+        self.assertTrue(tool_call_content["tool_call_id"] == "call_BCS123")
+        self.assertTrue(tool_call_content["name"] == "bing_custom_search")
+        self.assertTrue(
+            tool_call_content["arguments"]
+            == {"requesturl": "https://api.bing.microsoft.com/v7.0/custom/search?customconfig=abc&q=foo"}
+        )
+
+    def test_sharepoint_grounding_tool_calls(self):
+        # sharepoint_grounding mirrors azure_ai_search: arguments + dumped output.
+        # Exercises the `input` argument key on the request side.
+        tool_call = _build_builtin_tool_call(
+            call_id="call_SP123",
+            tool_type="sharepoint_grounding",
+            payload={
+                "input": "quarterly sales report",
+                "output": {
+                    "documents": [
+                        {
+                            "title": "Q3 Sales",
+                            "url": "https://contoso.sharepoint.com/Q3.docx",
+                            "content": "Q3 was up 12%",
+                        }
+                    ]
+                },
+            },
+        )
+        messages = break_tool_call_into_messages(tool_call, "abc123")
+        self.assertTrue(len(messages) == 2)
+        self.assertTrue(isinstance(messages[0], AssistantMessage))
+        tool_call_content = messages[0].content[0]
+        self.assertTrue(tool_call_content["type"] == "tool_call")
+        self.assertTrue(tool_call_content["tool_call_id"] == "call_SP123")
+        self.assertTrue(tool_call_content["name"] == "sharepoint_grounding")
+        self.assertTrue(tool_call_content["arguments"] == {"input": "quarterly sales report"})
+        self.assertTrue(isinstance(messages[1], ToolMessage))
+        self.assertTrue(messages[1].content[0]["type"] == "tool_result")
+        self.assertTrue(
+            messages[1].content[0]["tool_result"]
+            == {
+                "documents": [
+                    {
+                        "title": "Q3 Sales",
+                        "url": "https://contoso.sharepoint.com/Q3.docx",
+                        "content": "Q3 was up 12%",
+                    }
+                ]
+            }
+        )
+
+    def test_sharepoint_grounding_tool_calls_query_key_fallback(self):
+        # Live agent traces emit the search term under `query` instead of `input` for SharePoint.
+        # The converter must fall back to `query` so downstream evaluators see a non-empty argument.
+        tool_call = _build_builtin_tool_call(
+            call_id="call_SP456",
+            tool_type="sharepoint_grounding",
+            payload={"query": "vacation policy", "output": {"documents": []}},
+        )
+        messages = break_tool_call_into_messages(tool_call, "abc123")
+        self.assertTrue(len(messages) == 2)
+        tool_call_content = messages[0].content[0]
+        self.assertTrue(tool_call_content["arguments"] == {"input": "vacation policy"})
+
+    def test_azure_ai_search_tool_calls_query_key_fallback(self):
+        # Live agent traces emit the search term under `query` instead of `input` for Azure AI Search.
+        # The converter must fall back to `query` so downstream evaluators see a non-empty argument.
+        tool_call = _build_builtin_tool_call(
+            call_id="call_AIS789",
+            tool_type="azure_ai_search",
+            payload={"query": "refund policy", "output": []},
+        )
+        messages = break_tool_call_into_messages(tool_call, "abc123")
+        self.assertTrue(len(messages) == 2)
+        tool_call_content = messages[0].content[0]
+        self.assertTrue(tool_call_content["name"] == "azure_ai_search")
+        self.assertTrue(tool_call_content["arguments"] == {"input": "refund policy"})
+
+    def test_fabric_dataagent_tool_calls_query_key_fallback(self):
+        # Same `query` vs `input` drift for fabric_dataagent.
+        tool_call = _build_builtin_tool_call(
+            call_id="call_FAB012",
+            tool_type="fabric_dataagent",
+            payload={"query": "top customers by revenue", "output": {}},
+        )
+        messages = break_tool_call_into_messages(tool_call, "abc123")
+        self.assertTrue(len(messages) == 2)
+        tool_call_content = messages[0].content[0]
+        self.assertTrue(tool_call_content["name"] == "fabric_dataagent")
+        self.assertTrue(tool_call_content["arguments"] == {"input": "top customers by revenue"})
+
     def test_extract_tool_definitions(self):
         thread_run_data = """{
   "id": "run_zs3USbTw61ZpRk8bwBPP8Ue7",
diff --git a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_tool_call_success_evaluator.py b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_tool_call_success_evaluator.py
new file mode 100644
index 000000000000..a4bfeb2e3e1a
--- /dev/null
+++ b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_tool_call_success_evaluator.py
@@ -0,0 +1,172 @@
+from unittest.mock import MagicMock
+
+import pytest
+
+from azure.ai.evaluation import _ToolCallSuccessEvaluator
+from azure.ai.evaluation._evaluators._tool_call_success._tool_call_success import (
+    _collect_failed_tool_statuses,
+)
+
+
+# Default prompty mock that always grades as PASS. Tests that exercise the
+# deterministic short-circuit path rely on this mock NOT being called.
+async def _flow_pass(timeout, **kwargs):  # pylint: disable=unused-argument
+    return {
+        "llm_output": {
+            "reason": "All tool calls completed successfully.",
+            "score": 1,
+            "properties": {},
+        }
+    }
+
+
+def _assistant_tool_call(tool_call_id="call_1", name="get_weather", arguments=None, status=None):
+    block = {
+        "type": "tool_call",
+        "tool_call_id": tool_call_id,
+        "name": name,
+        "arguments": arguments or {"location": "NYC"},
+    }
+    if status is not None:
+        block["status"] = status
+    return {"role": "assistant", "content": [block]}
+
+
+def _tool_result(tool_call_id="call_1", result="72F sunny", status=None):
+    block = {"type": "tool_result", "tool_result": result}
+    if status is not None:
+        block["status"] = status
+    return {"role": "tool", "tool_call_id": tool_call_id, "content": [block]}
+
+
+# ---------------------------------------------------------------------------
+# _collect_failed_tool_statuses
+# ---------------------------------------------------------------------------
+
+
+class TestCollectFailedToolStatuses:
+    def test_no_status_returns_empty(self):
+        msgs = [_assistant_tool_call(), _tool_result()]
+        assert _collect_failed_tool_statuses(msgs) == []
+
+    def test_completed_status_returns_empty(self):
+        msgs = [
+            _assistant_tool_call(status="completed"),
+            _tool_result(status="completed"),
+        ]
+        assert _collect_failed_tool_statuses(msgs) == []
+
+    @pytest.mark.parametrize("status", ["failed", "error", "incomplete", "cancelled", "canceled"])
+    def test_known_failure_status_on_tool_call_is_collected(self, status):
+        msgs = [_assistant_tool_call(status=status)]
+        assert _collect_failed_tool_statuses(msgs) == [status]
+
+    @pytest.mark.parametrize("status", ["FAILED", "Error", "Incomplete"])
+    def test_failure_status_is_case_insensitive(self, status):
+        msgs = [_assistant_tool_call(status=status)]
+        assert _collect_failed_tool_statuses(msgs) == [status.lower()]
+
+    def test_failure_status_on_tool_result_is_collected(self):
+        msgs = [_assistant_tool_call(), _tool_result(status="failed")]
+        assert _collect_failed_tool_statuses(msgs) == ["failed"]
+
+    def test_unknown_status_string_is_ignored(self):
+        msgs = [_assistant_tool_call(status="something_weird")]
+        assert _collect_failed_tool_statuses(msgs) == []
+
+    def test_non_string_status_is_ignored(self):
+        msgs = [_assistant_tool_call(status=500)]
+        assert _collect_failed_tool_statuses(msgs) == []
+
+    def test_malformed_inputs_are_tolerated(self):
+        # Non-list input
+        assert _collect_failed_tool_statuses(None) == []
+        assert _collect_failed_tool_statuses("not a list") == []
+        # List with non-dict items + dicts with non-list content
+        msgs = [
+            "string entry",
+            42,
+            {"role": "assistant"},  # no content
+            {"role": "assistant", "content": "not a list"},
+            {"role": "assistant", "content": [None, 1, "x", _assistant_tool_call(status="failed")["content"][0]]},
+        ]
+        assert _collect_failed_tool_statuses(msgs) == ["failed"]
+
+
+# ---------------------------------------------------------------------------
+# _do_eval short-circuit
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.usefixtures("mock_model_config")
+@pytest.mark.unittest
+class TestToolCallSuccessShortCircuit:
+    def test_short_circuits_on_failed_tool_call_status(self, mock_model_config):
+        evaluator = _ToolCallSuccessEvaluator(model_config=mock_model_config)
+        evaluator._flow = MagicMock(side_effect=_flow_pass)
+
+        response = [_assistant_tool_call(status="failed"), _tool_result()]
+        result = evaluator(response=response)
+
+        evaluator._flow.assert_not_called()
+        assert result["tool_call_success"] == 0.0
+        assert result["tool_call_success_score"] == 0.0
+        assert result["tool_call_success_passed"] is False
+        assert result["tool_call_success_result"] == "fail"
+        assert result["tool_call_success_status"] == "completed"
+        assert "failed" in result["tool_call_success_reason"]
+        props = result["tool_call_success_properties"]
+        assert props["short_circuit"] == "tool_status"
+        assert props["failed_statuses"] == ["failed"]
+
+    def test_short_circuits_on_failed_tool_result_status(self, mock_model_config):
+        evaluator = _ToolCallSuccessEvaluator(model_config=mock_model_config)
+        evaluator._flow = MagicMock(side_effect=_flow_pass)
+
+        response = [_assistant_tool_call(), _tool_result(status="error")]
+        result = evaluator(response=response)
+
+        evaluator._flow.assert_not_called()
+        assert result["tool_call_success_result"] == "fail"
+        assert result["tool_call_success_properties"]["failed_statuses"] == ["error"]
+
+    def test_dedupes_and_sorts_failed_statuses_in_reason(self, mock_model_config):
+        evaluator = _ToolCallSuccessEvaluator(model_config=mock_model_config)
+        evaluator._flow = MagicMock(side_effect=_flow_pass)
+
+        response = [
+            _assistant_tool_call(tool_call_id="a", status="failed"),
+            _tool_result(tool_call_id="a", status="failed"),
+            _assistant_tool_call(tool_call_id="b", status="error"),
+        ]
+        result = evaluator(response=response)
+
+        evaluator._flow.assert_not_called()
+        # Reason joins deduped, sorted statuses
+        assert "error, failed" in result["tool_call_success_reason"]
+        assert result["tool_call_success_properties"]["failed_statuses"] == ["error", "failed"]
+
+    def test_no_short_circuit_when_all_statuses_completed(self, mock_model_config):
+        evaluator = _ToolCallSuccessEvaluator(model_config=mock_model_config)
+        evaluator._flow = MagicMock(side_effect=_flow_pass)
+
+        response = [
+            _assistant_tool_call(status="completed"),
+            _tool_result(status="completed"),
+        ]
+        result = evaluator(response=response)
+
+        evaluator._flow.assert_called_once()  # Goes to LLM
+        assert result["tool_call_success_passed"] is True
+
+    def test_no_short_circuit_when_status_absent(self, mock_model_config):
+        """Back-compat: traces produced by converters that do not preserve
+        ``status`` continue to be graded by the LLM as before."""
+        evaluator = _ToolCallSuccessEvaluator(model_config=mock_model_config)
+        evaluator._flow = MagicMock(side_effect=_flow_pass)
+
+        response = [_assistant_tool_call(), _tool_result()]
+        result = evaluator(response=response)
+
+        evaluator._flow.assert_called_once()
+        assert result["tool_call_success_passed"] is True
diff --git a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_unsupported_tools_validation.py b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_unsupported_tools_validation.py
new file mode 100644
index 000000000000..74bc352448a3
--- /dev/null
+++ b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_unsupported_tools_validation.py
@@ -0,0 +1,183 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+"""
+Regression tests for the change that lets the three tool evaluators
+(ToolCallAccuracy, _ToolInputAccuracy, _ToolCallSuccess) accept
+conversations containing restricted built-in tools.
+
+These evaluators previously rejected any conversation containing tools in
+``ConversationValidator.UNSUPPORTED_TOOLS`` (e.g. ``bing_grounding``,
+``azure_ai_search``). Because none of the three grades require the
+(redacted) tool output body, the rejection has been lifted by setting
+``check_for_unsupported_tools=False`` on each evaluator's input validator.
+
+The tests below exercise the validator directly so they do not need the
+prompty flow or a real model deployment. They also confirm that the
+underlying validator class still rejects restricted tools when
+``check_for_unsupported_tools=True``, so the behavior change is limited
+to the evaluator wiring.
+"""
+
+import pytest
+
+from azure.ai.evaluation import ToolCallAccuracyEvaluator
+from azure.ai.evaluation._evaluators._tool_call_success import _ToolCallSuccessEvaluator
+from azure.ai.evaluation._evaluators._tool_input_accuracy import _ToolInputAccuracyEvaluator
+from azure.ai.evaluation._evaluators._common._validators import (
+    ToolCallsValidator,
+    ToolDefinitionsValidator,
+)
+from azure.ai.evaluation._exceptions import ErrorTarget, EvaluationException
+
+
+RESTRICTED_TOOL_NAMES = [
+    "bing_grounding",
+    "bing_custom_search",
+    "azure_ai_search",
+    "azure_fabric",
+    "sharepoint_grounding",
+]
+
+
+def _restricted_response(tool_name: str):
+    return [
+        {
+            "role": "assistant",
+            "content": [
+                {
+                    "type": "tool_call",
+                    "tool_call_id": "call_restricted",
+                    "name": tool_name,
+                    "arguments": {"query": "anything"},
+                }
+            ],
+        }
+    ]
+
+
+def _restricted_tool_definition(tool_name: str):
+    return {
+        "name": tool_name,
+        "description": f"Built-in {tool_name} tool.",
+        "parameters": {
+            "type": "object",
+            "properties": {"query": {"type": "string"}},
+        },
+    }
+
+
+@pytest.mark.usefixtures("mock_model_config")
+@pytest.mark.unittest
+class TestRestrictedToolValidationLifted:
+    """Validator should no longer reject restricted tools for these three evaluators."""
+
+    @pytest.mark.parametrize("tool_name", RESTRICTED_TOOL_NAMES)
+    def test_tool_call_accuracy_accepts_restricted_tool(self, mock_model_config, tool_name):
+        evaluator = ToolCallAccuracyEvaluator(model_config=mock_model_config)
+        eval_input = {
+            "query": "Look it up.",
+            "response": _restricted_response(tool_name),
+            "tool_definitions": [_restricted_tool_definition(tool_name)],
+        }
+        # Should not raise EvaluationException; flag flip made this path legal.
+        assert evaluator._validator.validate_eval_input(eval_input) is True
+
+    @pytest.mark.parametrize("tool_name", RESTRICTED_TOOL_NAMES)
+    def test_tool_input_accuracy_accepts_restricted_tool(self, mock_model_config, tool_name):
+        evaluator = _ToolInputAccuracyEvaluator(model_config=mock_model_config)
+        eval_input = {
+            "query": "Look it up.",
+            "response": _restricted_response(tool_name),
+            "tool_definitions": [_restricted_tool_definition(tool_name)],
+        }
+        assert evaluator._validator.validate_eval_input(eval_input) is True
+
+    @pytest.mark.parametrize("tool_name", RESTRICTED_TOOL_NAMES)
+    def test_tool_call_success_accepts_restricted_tool(self, mock_model_config, tool_name):
+        evaluator = _ToolCallSuccessEvaluator(model_config=mock_model_config)
+        eval_input = {
+            "response": _restricted_response(tool_name),
+            "tool_definitions": [_restricted_tool_definition(tool_name)],
+        }
+        assert evaluator._validator.validate_eval_input(eval_input) is True
+
+    def test_mixed_function_and_restricted_tool_accepted(self, mock_model_config):
+        """Conversation containing both a function call and a restricted tool call validates cleanly."""
+        evaluator = ToolCallAccuracyEvaluator(model_config=mock_model_config)
+        eval_input = {
+            "query": "Find stock price and weather.",
+            "response": [
+                {
+                    "role": "assistant",
+                    "content": [
+                        {
+                            "type": "tool_call",
+                            "tool_call_id": "call_func",
+                            "name": "get_weather",
+                            "arguments": {"location": "Paris"},
+                        },
+                        {
+                            "type": "tool_call",
+                            "tool_call_id": "call_restricted",
+                            "name": "bing_grounding",
+                            "arguments": {"query": "MSFT stock price"},
+                        },
+                    ],
+                }
+            ],
+            "tool_definitions": [
+                {
+                    "name": "get_weather",
+                    "type": "function",
+                    "description": "Weather lookup.",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {"location": {"type": "string"}},
+                        "required": ["location"],
+                    },
+                },
+                _restricted_tool_definition("bing_grounding"),
+            ],
+        }
+        assert evaluator._validator.validate_eval_input(eval_input) is True
+
+
+@pytest.mark.unittest
+class TestUnderlyingValidatorUnchanged:
+    """The validator class itself still rejects restricted tools when the flag is on.
+
+    Ensures the behavior change is limited to per-evaluator wiring; the validator
+    keeps its option to enforce the restricted-tool block for other consumers
+    (e.g. GroundednessEvaluator).
+    """
+
+    @pytest.mark.parametrize("tool_name", RESTRICTED_TOOL_NAMES)
+    def test_tool_calls_validator_still_rejects_when_flag_enabled(self, tool_name):
+        validator = ToolCallsValidator(
+            error_target=ErrorTarget.TOOL_CALL_ACCURACY_EVALUATOR,
+            check_for_unsupported_tools=True,
+        )
+        eval_input = {
+            "query": "Look it up.",
+            "response": _restricted_response(tool_name),
+            "tool_definitions": [_restricted_tool_definition(tool_name)],
+        }
+        with pytest.raises(EvaluationException) as exc_info:
+            validator.validate_eval_input(eval_input)
+        assert "currently not supported" in str(exc_info.value)
+
+    @pytest.mark.parametrize("tool_name", RESTRICTED_TOOL_NAMES)
+    def test_tool_definitions_validator_still_rejects_when_flag_enabled(self, tool_name):
+        validator = ToolDefinitionsValidator(
+            error_target=ErrorTarget.TOOL_CALL_SUCCESS_EVALUATOR,
+            requires_query=False,
+            check_for_unsupported_tools=True,
+        )
+        eval_input = {
+            "response": _restricted_response(tool_name),
+            "tool_definitions": [_restricted_tool_definition(tool_name)],
+        }
+        with pytest.raises(EvaluationException) as exc_info:
+            validator.validate_eval_input(eval_input)
+        assert "currently not supported" in str(exc_info.value)