From f44085155601f058302f9f14db4b3c0a1d75b839 Mon Sep 17 00:00:00 2001
From: aprilkim <aprilk@microsoft.com>
Date: Mon, 1 Jun 2026 15:26:35 -0700
Subject: [PATCH 1/2] [ai-projects] Use typed EvaluatorGenerationJob in rubric
 samples

The service contract nests the job inputs under an inputs field on

EvaluatorGenerationJob. The four rubric-evaluator-generation samples were

passing flat dicts that the SDK was tolerating but the rolling-out service

change requires the nested form. Convert all four samples to use the typed

EvaluatorGenerationJob / EvaluatorGenerationInputs / *EvaluatorGenerationJobSource

models, and drop the stale top-level `name` field which has no home in

the new contract. For the traces source, switch from int unix timestamps

to datetime values (the SDK model serializes them as unix-timestamp).

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 ...rubric_evaluator_generation_all_sources.py | 111 ++++++++++--------
 ...ample_rubric_evaluator_generation_basic.py |  51 ++++----
 ...ple_rubric_evaluator_generation_iterate.py |  39 +++---
 ...e_rubric_evaluator_generation_lifecycle.py |  38 +++---
 4 files changed, 129 insertions(+), 110 deletions(-)

diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/sample_rubric_evaluator_generation_all_sources.py b/sdk/ai/azure-ai-projects/samples/evaluations/sample_rubric_evaluator_generation_all_sources.py
index 2826e865e4ae..09070104956a 100644
--- a/sdk/ai/azure-ai-projects/samples/evaluations/sample_rubric_evaluator_generation_all_sources.py
+++ b/sdk/ai/azure-ai-projects/samples/evaluations/sample_rubric_evaluator_generation_all_sources.py
@@ -50,14 +50,24 @@
 import os
 import time
 import uuid
-from datetime import datetime, timezone
-from typing import Any, Dict, List, cast
+from datetime import datetime, timedelta, timezone
+from typing import List, cast
 
 from dotenv import load_dotenv
 
 from azure.identity import DefaultAzureCredential
 from azure.ai.projects import AIProjectClient
-from azure.ai.projects.models import JobStatus, RubricBasedEvaluatorDefinition
+from azure.ai.projects.models import (
+    AgentEvaluatorGenerationJobSource,
+    DatasetEvaluatorGenerationJobSource,
+    EvaluatorGenerationInputs,
+    EvaluatorGenerationJob,
+    EvaluatorGenerationJobSource,
+    JobStatus,
+    PromptEvaluatorGenerationJobSource,
+    RubricBasedEvaluatorDefinition,
+    TracesEvaluatorGenerationJobSource,
+)
 
 load_dotenv()
 
@@ -85,51 +95,49 @@
     AIProjectClient(endpoint=endpoint, credential=credential) as project_client,
 ):
     # 1. Combined Prompt + Agent + Dataset generation job.
-    multi_sources: List[Dict[str, Any]] = [
-        {
-            "type": "Prompt",
-            "description": "Inline application overview.",
-            "prompt": (
+    multi_sources: List[EvaluatorGenerationJobSource] = [
+        PromptEvaluatorGenerationJobSource(
+            description="Inline application overview.",
+            prompt=(
                 "You are evaluating a customer-support assistant that helps users "
                 "manage their accounts, troubleshoot issues, and place orders. The "
                 "assistant uses tools for account lookup, password reset, and order "
                 "creation. It must confirm intent before performing destructive "
                 "actions and maintain a patient, professional tone."
             ),
-        }
+        ),
     ]
     if agent_name:
         multi_sources.append(
-            {
-                "type": "Agent",
-                "description": "Agent metadata enriches the rubric with tool and instruction signals.",
-                "agent_name": agent_name,
-            }
+            AgentEvaluatorGenerationJobSource(
+                description="Agent metadata enriches the rubric with tool and instruction signals.",
+                agent_name=agent_name,
+            )
         )
     else:
         print("Skipping Agent source (FOUNDRY_AGENT_NAME not set).")
 
     if dataset_name and dataset_version:
         multi_sources.append(
-            {
-                "type": "Dataset",
-                "description": "Reference examples ground dimensions in real data.",
-                "name": dataset_name,
-                "version": dataset_version,
-            }
+            DatasetEvaluatorGenerationJobSource(
+                description="Reference examples ground dimensions in real data.",
+                name=dataset_name,
+                version=dataset_version,
+            )
         )
     else:
         print("Skipping Dataset source (FOUNDRY_REFERENCE_DATASET_NAME / _VERSION not set).")
 
     multi_job = project_client.beta.evaluators.create_generation_job(
-        job={
-            "model": model_name,
-            "name": "Multi-source generation",
-            "evaluator_name": multi_name,
-            "evaluator_display_name": "Customer Support Quality (multi-source)",
-            "evaluator_description": "Generated from prompt, agent, and dataset signals.",
-            "sources": multi_sources,
-        },
+        job=EvaluatorGenerationJob(
+            inputs=EvaluatorGenerationInputs(
+                model=model_name,
+                evaluator_name=multi_name,
+                evaluator_display_name="Customer Support Quality (multi-source)",
+                evaluator_description="Generated from prompt, agent, and dataset signals.",
+                sources=multi_sources,
+            ),
+        ),
         operation_id=f"rubric-multi-{short}",
     )
 
@@ -159,32 +167,31 @@
     if not agent_name:
         print("Skipping traces job (requires FOUNDRY_AGENT_NAME for both the traces source and companion).")
     else:
-        now = int(time.time())
-        start_time = now - traces_window_days * 24 * 3600
-        end_time = now + 600  # small padding for clock skew
+        now = datetime.now(tz=timezone.utc)
+        start_time = now - timedelta(days=traces_window_days)
+        end_time = now + timedelta(seconds=600)  # small padding for clock skew
 
         traces_job = project_client.beta.evaluators.create_generation_job(
-            job={
-                "model": model_name,
-                "name": "Traces-source generation",
-                "evaluator_name": traces_name,
-                "evaluator_display_name": "Customer Support Quality (from traces)",
-                "evaluator_description": "Generated from real Application Insights conversation traces.",
-                "sources": [
-                    {
-                        "type": "traces",
-                        "description": "Application Insights conversation traces for the agent.",
-                        "agent_name": agent_name,
-                        "start_time": start_time,
-                        "end_time": end_time,
-                    },
-                    {
-                        "type": "Agent",
-                        "description": "Companion source (service rejects traces-only).",
-                        "agent_name": agent_name,
-                    },
-                ],
-            },
+            job=EvaluatorGenerationJob(
+                inputs=EvaluatorGenerationInputs(
+                    model=model_name,
+                    evaluator_name=traces_name,
+                    evaluator_display_name="Customer Support Quality (from traces)",
+                    evaluator_description="Generated from real Application Insights conversation traces.",
+                    sources=[
+                        TracesEvaluatorGenerationJobSource(
+                            description="Application Insights conversation traces for the agent.",
+                            agent_name=agent_name,
+                            start_time=start_time,
+                            end_time=end_time,
+                        ),
+                        AgentEvaluatorGenerationJobSource(
+                            description="Companion source (service rejects traces-only).",
+                            agent_name=agent_name,
+                        ),
+                    ],
+                ),
+            ),
             operation_id=f"rubric-traces-{short}",
         )
 
diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/sample_rubric_evaluator_generation_basic.py b/sdk/ai/azure-ai-projects/samples/evaluations/sample_rubric_evaluator_generation_basic.py
index b25344c28ae8..98b97a79e171 100644
--- a/sdk/ai/azure-ai-projects/samples/evaluations/sample_rubric_evaluator_generation_basic.py
+++ b/sdk/ai/azure-ai-projects/samples/evaluations/sample_rubric_evaluator_generation_basic.py
@@ -60,7 +60,10 @@
 from azure.identity import DefaultAzureCredential
 from azure.ai.projects import AIProjectClient
 from azure.ai.projects.models import (
+    EvaluatorGenerationInputs,
+    EvaluatorGenerationJob,
     JobStatus,
+    PromptEvaluatorGenerationJobSource,
     RubricBasedEvaluatorDefinition,
     TestingCriterionAzureAIEvaluator,
 )
@@ -86,31 +89,31 @@
 ):
     # 1. Generate an evaluator from a single `Prompt` source.
     job = project_client.beta.evaluators.create_generation_job(
-        job={
-            "model": model_name,
-            "name": "Reservation Quality (Generated)",
-            "evaluator_name": evaluator_name,
-            "evaluator_display_name": "Reservation Quality (Generated)",
-            "evaluator_description": "Quality evaluator generated from a prompt describing a restaurant reservation assistant.",
-            "sources": [
-                {
-                    "type": "Prompt",
-                    "description": "Application overview - purpose, capabilities, and tools.",
-                    "prompt": (
-                        "You are evaluating a restaurant reservation assistant. The assistant helps "
-                        "users create, modify, and cancel reservations at participating restaurants. "
-                        "It can:\n"
-                        "  - Search for restaurants by name, cuisine, or neighborhood.\n"
-                        "  - Check table availability for a requested date, time, and party size.\n"
-                        "  - Create, update, and cancel reservations on behalf of the user.\n"
-                        "  - Send SMS or email confirmations through a notifications tool.\n"
-                        "It must always confirm the user's intent before committing changes, "
-                        "ask follow-up questions when details are missing, and maintain a polite "
-                        "restaurant-host tone."
+        job=EvaluatorGenerationJob(
+            inputs=EvaluatorGenerationInputs(
+                model=model_name,
+                evaluator_name=evaluator_name,
+                evaluator_display_name="Reservation Quality (Generated)",
+                evaluator_description="Quality evaluator generated from a prompt describing a restaurant reservation assistant.",
+                sources=[
+                    PromptEvaluatorGenerationJobSource(
+                        description="Application overview - purpose, capabilities, and tools.",
+                        prompt=(
+                            "You are evaluating a restaurant reservation assistant. The assistant helps "
+                            "users create, modify, and cancel reservations at participating restaurants. "
+                            "It can:\n"
+                            "  - Search for restaurants by name, cuisine, or neighborhood.\n"
+                            "  - Check table availability for a requested date, time, and party size.\n"
+                            "  - Create, update, and cancel reservations on behalf of the user.\n"
+                            "  - Send SMS or email confirmations through a notifications tool.\n"
+                            "It must always confirm the user's intent before committing changes, "
+                            "ask follow-up questions when details are missing, and maintain a polite "
+                            "restaurant-host tone."
+                        ),
                     ),
-                }
-            ],
-        },
+                ],
+            ),
+        ),
         # `operation_id` makes the call idempotent - re-submitting the same id returns the existing job.
         operation_id=f"rubric-eval-basic-{short}",
     )
diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/sample_rubric_evaluator_generation_iterate.py b/sdk/ai/azure-ai-projects/samples/evaluations/sample_rubric_evaluator_generation_iterate.py
index 7b664defc598..5ab81435203d 100644
--- a/sdk/ai/azure-ai-projects/samples/evaluations/sample_rubric_evaluator_generation_iterate.py
+++ b/sdk/ai/azure-ai-projects/samples/evaluations/sample_rubric_evaluator_generation_iterate.py
@@ -49,7 +49,10 @@
 from azure.ai.projects.models import (
     EvaluatorCategory,
     EvaluatorDefinitionType,
+    EvaluatorGenerationInputs,
+    EvaluatorGenerationJob,
     JobStatus,
+    PromptEvaluatorGenerationJobSource,
     RubricBasedEvaluatorDefinition,
 )
 
@@ -72,25 +75,25 @@
 ):
     # 1. Generate v1 of the evaluator from a single `Prompt` source.
     job = project_client.beta.evaluators.create_generation_job(
-        job={
-            "model": model_name,
-            "name": "Reservation Quality (iterate)",
-            "evaluator_name": evaluator_name,
-            "evaluator_display_name": "Reservation Quality (iterate)",
-            "evaluator_description": "Starting point for human-in-the-loop iteration.",
-            "sources": [
-                {
-                    "type": "Prompt",
-                    "description": "Inline application overview.",
-                    "prompt": (
-                        "You are evaluating a restaurant reservation assistant that creates, "
-                        "modifies, and cancels reservations. It uses tools for restaurant "
-                        "lookup, availability checking, and notifications. It must confirm "
-                        "user intent before committing changes."
+        job=EvaluatorGenerationJob(
+            inputs=EvaluatorGenerationInputs(
+                model=model_name,
+                evaluator_name=evaluator_name,
+                evaluator_display_name="Reservation Quality (iterate)",
+                evaluator_description="Starting point for human-in-the-loop iteration.",
+                sources=[
+                    PromptEvaluatorGenerationJobSource(
+                        description="Inline application overview.",
+                        prompt=(
+                            "You are evaluating a restaurant reservation assistant that creates, "
+                            "modifies, and cancels reservations. It uses tools for restaurant "
+                            "lookup, availability checking, and notifications. It must confirm "
+                            "user intent before committing changes."
+                        ),
                     ),
-                }
-            ],
-        },
+                ],
+            ),
+        ),
         operation_id=f"rubric-iterate-{short}",
     )
 
diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/sample_rubric_evaluator_generation_lifecycle.py b/sdk/ai/azure-ai-projects/samples/evaluations/sample_rubric_evaluator_generation_lifecycle.py
index 904119626366..e75d09e0d6df 100644
--- a/sdk/ai/azure-ai-projects/samples/evaluations/sample_rubric_evaluator_generation_lifecycle.py
+++ b/sdk/ai/azure-ai-projects/samples/evaluations/sample_rubric_evaluator_generation_lifecycle.py
@@ -50,7 +50,13 @@
 from azure.core.exceptions import ResourceNotFoundError
 from azure.identity import DefaultAzureCredential
 from azure.ai.projects import AIProjectClient
-from azure.ai.projects.models import JobStatus, PageOrder
+from azure.ai.projects.models import (
+    EvaluatorGenerationInputs,
+    EvaluatorGenerationJob,
+    JobStatus,
+    PageOrder,
+    PromptEvaluatorGenerationJobSource,
+)
 
 load_dotenv()
 
@@ -66,21 +72,21 @@
 
 TERMINAL_STATUSES = {JobStatus.SUCCEEDED, JobStatus.FAILED, JobStatus.CANCELLED}
 
-# Shared job body used both for the initial create and the idempotency replay.
-job_body = {
-    "model": model_name,
-    "name": "Lifecycle demo",
-    "evaluator_name": evaluator_name,
-    "evaluator_display_name": "Lifecycle demo",
-    "evaluator_description": "Minimal job used to demonstrate the LRO + list/delete lifecycle.",
-    "sources": [
-        {
-            "type": "Prompt",
-            "description": "Inline application overview.",
-            "prompt": "You are evaluating a simple Q&A assistant that answers factual questions clearly and concisely.",
-        }
-    ],
-}
+# Shared job used both for the initial create and the idempotency replay.
+job_body = EvaluatorGenerationJob(
+    inputs=EvaluatorGenerationInputs(
+        model=model_name,
+        evaluator_name=evaluator_name,
+        evaluator_display_name="Lifecycle demo",
+        evaluator_description="Minimal job used to demonstrate the LRO + list/delete lifecycle.",
+        sources=[
+            PromptEvaluatorGenerationJobSource(
+                description="Inline application overview.",
+                prompt="You are evaluating a simple Q&A assistant that answers factual questions clearly and concisely.",
+            ),
+        ],
+    ),
+)
 
 with (
     DefaultAzureCredential() as credential,

From e02a2d6b31cffd0971392cd1af04c9cc9cbaa13d Mon Sep 17 00:00:00 2001
From: aprilkim <aprilk@microsoft.com>
Date: Mon, 1 Jun 2026 15:48:45 -0700
Subject: [PATCH 2/2] [ai-projects] CHANGELOG: note typed
 EvaluatorGenerationJob in rubric samples

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 sdk/ai/azure-ai-projects/CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sdk/ai/azure-ai-projects/CHANGELOG.md b/sdk/ai/azure-ai-projects/CHANGELOG.md
index a805c9f54a7f..b601407b5999 100644
--- a/sdk/ai/azure-ai-projects/CHANGELOG.md
+++ b/sdk/ai/azure-ai-projects/CHANGELOG.md
@@ -9,6 +9,7 @@
 * Added `sample_routines_with_schedule_trigger.py` to demonstrate triggering a routine on a recurring cron schedule via `ScheduleRoutineTrigger`.
 * Updated `sample_dataset_generation_job_traces_for_evaluation.py` and `sample_dataset_generation_job_traces_for_finetuning.py` to create a temporary agent, seed conversations, retry the data generation job over the trace window, and clean up all created resources.
 * Updated `sample_memory_crud.py` and `sample_memory_crud_async.py` to demonstrate memory item CRUD (`create_memory`, `get_memory`, `update_memory`, `list_memories`, `delete_memory`) in addition to memory store CRUD.
+* Updated the rubric evaluator generation samples (`sample_rubric_evaluator_generation_basic.py`, `sample_rubric_evaluator_generation_iterate.py`, `sample_rubric_evaluator_generation_lifecycle.py`, `sample_rubric_evaluator_generation_all_sources.py`) to use the typed `EvaluatorGenerationJob` / `EvaluatorGenerationInputs` / `*EvaluatorGenerationJobSource` models. The job inputs are now nested under `inputs` per the service contract, and the traces source uses `datetime` values for `start_time` / `end_time`.
 
 ## 2.2.0 (2026-05-29)