From f44085155601f058302f9f14db4b3c0a1d75b839 Mon Sep 17 00:00:00 2001 From: aprilkim Date: Mon, 1 Jun 2026 15:26:35 -0700 Subject: [PATCH 1/2] [ai-projects] Use typed EvaluatorGenerationJob in rubric samples The service contract nests the job inputs under an inputs field on EvaluatorGenerationJob. The four rubric-evaluator-generation samples were passing flat dicts that the SDK was tolerating but the rolling-out service change requires the nested form. Convert all four samples to use the typed EvaluatorGenerationJob / EvaluatorGenerationInputs / *EvaluatorGenerationJobSource models, and drop the stale top-level `name` field which has no home in the new contract. For the traces source, switch from int unix timestamps to datetime values (the SDK model serializes them as unix-timestamp). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- ...rubric_evaluator_generation_all_sources.py | 111 ++++++++++-------- ...ample_rubric_evaluator_generation_basic.py | 51 ++++---- ...ple_rubric_evaluator_generation_iterate.py | 39 +++--- ...e_rubric_evaluator_generation_lifecycle.py | 38 +++--- 4 files changed, 129 insertions(+), 110 deletions(-) diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/sample_rubric_evaluator_generation_all_sources.py b/sdk/ai/azure-ai-projects/samples/evaluations/sample_rubric_evaluator_generation_all_sources.py index 2826e865e4ae..09070104956a 100644 --- a/sdk/ai/azure-ai-projects/samples/evaluations/sample_rubric_evaluator_generation_all_sources.py +++ b/sdk/ai/azure-ai-projects/samples/evaluations/sample_rubric_evaluator_generation_all_sources.py @@ -50,14 +50,24 @@ import os import time import uuid -from datetime import datetime, timezone -from typing import Any, Dict, List, cast +from datetime import datetime, timedelta, timezone +from typing import List, cast from dotenv import load_dotenv from azure.identity import DefaultAzureCredential from azure.ai.projects import AIProjectClient -from azure.ai.projects.models import JobStatus, RubricBasedEvaluatorDefinition +from azure.ai.projects.models import ( + AgentEvaluatorGenerationJobSource, + DatasetEvaluatorGenerationJobSource, + EvaluatorGenerationInputs, + EvaluatorGenerationJob, + EvaluatorGenerationJobSource, + JobStatus, + PromptEvaluatorGenerationJobSource, + RubricBasedEvaluatorDefinition, + TracesEvaluatorGenerationJobSource, +) load_dotenv() @@ -85,51 +95,49 @@ AIProjectClient(endpoint=endpoint, credential=credential) as project_client, ): # 1. Combined Prompt + Agent + Dataset generation job. - multi_sources: List[Dict[str, Any]] = [ - { - "type": "Prompt", - "description": "Inline application overview.", - "prompt": ( + multi_sources: List[EvaluatorGenerationJobSource] = [ + PromptEvaluatorGenerationJobSource( + description="Inline application overview.", + prompt=( "You are evaluating a customer-support assistant that helps users " "manage their accounts, troubleshoot issues, and place orders. The " "assistant uses tools for account lookup, password reset, and order " "creation. It must confirm intent before performing destructive " "actions and maintain a patient, professional tone." ), - } + ), ] if agent_name: multi_sources.append( - { - "type": "Agent", - "description": "Agent metadata enriches the rubric with tool and instruction signals.", - "agent_name": agent_name, - } + AgentEvaluatorGenerationJobSource( + description="Agent metadata enriches the rubric with tool and instruction signals.", + agent_name=agent_name, + ) ) else: print("Skipping Agent source (FOUNDRY_AGENT_NAME not set).") if dataset_name and dataset_version: multi_sources.append( - { - "type": "Dataset", - "description": "Reference examples ground dimensions in real data.", - "name": dataset_name, - "version": dataset_version, - } + DatasetEvaluatorGenerationJobSource( + description="Reference examples ground dimensions in real data.", + name=dataset_name, + version=dataset_version, + ) ) else: print("Skipping Dataset source (FOUNDRY_REFERENCE_DATASET_NAME / _VERSION not set).") multi_job = project_client.beta.evaluators.create_generation_job( - job={ - "model": model_name, - "name": "Multi-source generation", - "evaluator_name": multi_name, - "evaluator_display_name": "Customer Support Quality (multi-source)", - "evaluator_description": "Generated from prompt, agent, and dataset signals.", - "sources": multi_sources, - }, + job=EvaluatorGenerationJob( + inputs=EvaluatorGenerationInputs( + model=model_name, + evaluator_name=multi_name, + evaluator_display_name="Customer Support Quality (multi-source)", + evaluator_description="Generated from prompt, agent, and dataset signals.", + sources=multi_sources, + ), + ), operation_id=f"rubric-multi-{short}", ) @@ -159,32 +167,31 @@ if not agent_name: print("Skipping traces job (requires FOUNDRY_AGENT_NAME for both the traces source and companion).") else: - now = int(time.time()) - start_time = now - traces_window_days * 24 * 3600 - end_time = now + 600 # small padding for clock skew + now = datetime.now(tz=timezone.utc) + start_time = now - timedelta(days=traces_window_days) + end_time = now + timedelta(seconds=600) # small padding for clock skew traces_job = project_client.beta.evaluators.create_generation_job( - job={ - "model": model_name, - "name": "Traces-source generation", - "evaluator_name": traces_name, - "evaluator_display_name": "Customer Support Quality (from traces)", - "evaluator_description": "Generated from real Application Insights conversation traces.", - "sources": [ - { - "type": "traces", - "description": "Application Insights conversation traces for the agent.", - "agent_name": agent_name, - "start_time": start_time, - "end_time": end_time, - }, - { - "type": "Agent", - "description": "Companion source (service rejects traces-only).", - "agent_name": agent_name, - }, - ], - }, + job=EvaluatorGenerationJob( + inputs=EvaluatorGenerationInputs( + model=model_name, + evaluator_name=traces_name, + evaluator_display_name="Customer Support Quality (from traces)", + evaluator_description="Generated from real Application Insights conversation traces.", + sources=[ + TracesEvaluatorGenerationJobSource( + description="Application Insights conversation traces for the agent.", + agent_name=agent_name, + start_time=start_time, + end_time=end_time, + ), + AgentEvaluatorGenerationJobSource( + description="Companion source (service rejects traces-only).", + agent_name=agent_name, + ), + ], + ), + ), operation_id=f"rubric-traces-{short}", ) diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/sample_rubric_evaluator_generation_basic.py b/sdk/ai/azure-ai-projects/samples/evaluations/sample_rubric_evaluator_generation_basic.py index b25344c28ae8..98b97a79e171 100644 --- a/sdk/ai/azure-ai-projects/samples/evaluations/sample_rubric_evaluator_generation_basic.py +++ b/sdk/ai/azure-ai-projects/samples/evaluations/sample_rubric_evaluator_generation_basic.py @@ -60,7 +60,10 @@ from azure.identity import DefaultAzureCredential from azure.ai.projects import AIProjectClient from azure.ai.projects.models import ( + EvaluatorGenerationInputs, + EvaluatorGenerationJob, JobStatus, + PromptEvaluatorGenerationJobSource, RubricBasedEvaluatorDefinition, TestingCriterionAzureAIEvaluator, ) @@ -86,31 +89,31 @@ ): # 1. Generate an evaluator from a single `Prompt` source. job = project_client.beta.evaluators.create_generation_job( - job={ - "model": model_name, - "name": "Reservation Quality (Generated)", - "evaluator_name": evaluator_name, - "evaluator_display_name": "Reservation Quality (Generated)", - "evaluator_description": "Quality evaluator generated from a prompt describing a restaurant reservation assistant.", - "sources": [ - { - "type": "Prompt", - "description": "Application overview - purpose, capabilities, and tools.", - "prompt": ( - "You are evaluating a restaurant reservation assistant. The assistant helps " - "users create, modify, and cancel reservations at participating restaurants. " - "It can:\n" - " - Search for restaurants by name, cuisine, or neighborhood.\n" - " - Check table availability for a requested date, time, and party size.\n" - " - Create, update, and cancel reservations on behalf of the user.\n" - " - Send SMS or email confirmations through a notifications tool.\n" - "It must always confirm the user's intent before committing changes, " - "ask follow-up questions when details are missing, and maintain a polite " - "restaurant-host tone." + job=EvaluatorGenerationJob( + inputs=EvaluatorGenerationInputs( + model=model_name, + evaluator_name=evaluator_name, + evaluator_display_name="Reservation Quality (Generated)", + evaluator_description="Quality evaluator generated from a prompt describing a restaurant reservation assistant.", + sources=[ + PromptEvaluatorGenerationJobSource( + description="Application overview - purpose, capabilities, and tools.", + prompt=( + "You are evaluating a restaurant reservation assistant. The assistant helps " + "users create, modify, and cancel reservations at participating restaurants. " + "It can:\n" + " - Search for restaurants by name, cuisine, or neighborhood.\n" + " - Check table availability for a requested date, time, and party size.\n" + " - Create, update, and cancel reservations on behalf of the user.\n" + " - Send SMS or email confirmations through a notifications tool.\n" + "It must always confirm the user's intent before committing changes, " + "ask follow-up questions when details are missing, and maintain a polite " + "restaurant-host tone." + ), ), - } - ], - }, + ], + ), + ), # `operation_id` makes the call idempotent - re-submitting the same id returns the existing job. operation_id=f"rubric-eval-basic-{short}", ) diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/sample_rubric_evaluator_generation_iterate.py b/sdk/ai/azure-ai-projects/samples/evaluations/sample_rubric_evaluator_generation_iterate.py index 7b664defc598..5ab81435203d 100644 --- a/sdk/ai/azure-ai-projects/samples/evaluations/sample_rubric_evaluator_generation_iterate.py +++ b/sdk/ai/azure-ai-projects/samples/evaluations/sample_rubric_evaluator_generation_iterate.py @@ -49,7 +49,10 @@ from azure.ai.projects.models import ( EvaluatorCategory, EvaluatorDefinitionType, + EvaluatorGenerationInputs, + EvaluatorGenerationJob, JobStatus, + PromptEvaluatorGenerationJobSource, RubricBasedEvaluatorDefinition, ) @@ -72,25 +75,25 @@ ): # 1. Generate v1 of the evaluator from a single `Prompt` source. job = project_client.beta.evaluators.create_generation_job( - job={ - "model": model_name, - "name": "Reservation Quality (iterate)", - "evaluator_name": evaluator_name, - "evaluator_display_name": "Reservation Quality (iterate)", - "evaluator_description": "Starting point for human-in-the-loop iteration.", - "sources": [ - { - "type": "Prompt", - "description": "Inline application overview.", - "prompt": ( - "You are evaluating a restaurant reservation assistant that creates, " - "modifies, and cancels reservations. It uses tools for restaurant " - "lookup, availability checking, and notifications. It must confirm " - "user intent before committing changes." + job=EvaluatorGenerationJob( + inputs=EvaluatorGenerationInputs( + model=model_name, + evaluator_name=evaluator_name, + evaluator_display_name="Reservation Quality (iterate)", + evaluator_description="Starting point for human-in-the-loop iteration.", + sources=[ + PromptEvaluatorGenerationJobSource( + description="Inline application overview.", + prompt=( + "You are evaluating a restaurant reservation assistant that creates, " + "modifies, and cancels reservations. It uses tools for restaurant " + "lookup, availability checking, and notifications. It must confirm " + "user intent before committing changes." + ), ), - } - ], - }, + ], + ), + ), operation_id=f"rubric-iterate-{short}", ) diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/sample_rubric_evaluator_generation_lifecycle.py b/sdk/ai/azure-ai-projects/samples/evaluations/sample_rubric_evaluator_generation_lifecycle.py index 904119626366..e75d09e0d6df 100644 --- a/sdk/ai/azure-ai-projects/samples/evaluations/sample_rubric_evaluator_generation_lifecycle.py +++ b/sdk/ai/azure-ai-projects/samples/evaluations/sample_rubric_evaluator_generation_lifecycle.py @@ -50,7 +50,13 @@ from azure.core.exceptions import ResourceNotFoundError from azure.identity import DefaultAzureCredential from azure.ai.projects import AIProjectClient -from azure.ai.projects.models import JobStatus, PageOrder +from azure.ai.projects.models import ( + EvaluatorGenerationInputs, + EvaluatorGenerationJob, + JobStatus, + PageOrder, + PromptEvaluatorGenerationJobSource, +) load_dotenv() @@ -66,21 +72,21 @@ TERMINAL_STATUSES = {JobStatus.SUCCEEDED, JobStatus.FAILED, JobStatus.CANCELLED} -# Shared job body used both for the initial create and the idempotency replay. -job_body = { - "model": model_name, - "name": "Lifecycle demo", - "evaluator_name": evaluator_name, - "evaluator_display_name": "Lifecycle demo", - "evaluator_description": "Minimal job used to demonstrate the LRO + list/delete lifecycle.", - "sources": [ - { - "type": "Prompt", - "description": "Inline application overview.", - "prompt": "You are evaluating a simple Q&A assistant that answers factual questions clearly and concisely.", - } - ], -} +# Shared job used both for the initial create and the idempotency replay. +job_body = EvaluatorGenerationJob( + inputs=EvaluatorGenerationInputs( + model=model_name, + evaluator_name=evaluator_name, + evaluator_display_name="Lifecycle demo", + evaluator_description="Minimal job used to demonstrate the LRO + list/delete lifecycle.", + sources=[ + PromptEvaluatorGenerationJobSource( + description="Inline application overview.", + prompt="You are evaluating a simple Q&A assistant that answers factual questions clearly and concisely.", + ), + ], + ), +) with ( DefaultAzureCredential() as credential, From e02a2d6b31cffd0971392cd1af04c9cc9cbaa13d Mon Sep 17 00:00:00 2001 From: aprilkim Date: Mon, 1 Jun 2026 15:48:45 -0700 Subject: [PATCH 2/2] [ai-projects] CHANGELOG: note typed EvaluatorGenerationJob in rubric samples Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- sdk/ai/azure-ai-projects/CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/sdk/ai/azure-ai-projects/CHANGELOG.md b/sdk/ai/azure-ai-projects/CHANGELOG.md index a805c9f54a7f..b601407b5999 100644 --- a/sdk/ai/azure-ai-projects/CHANGELOG.md +++ b/sdk/ai/azure-ai-projects/CHANGELOG.md @@ -9,6 +9,7 @@ * Added `sample_routines_with_schedule_trigger.py` to demonstrate triggering a routine on a recurring cron schedule via `ScheduleRoutineTrigger`. * Updated `sample_dataset_generation_job_traces_for_evaluation.py` and `sample_dataset_generation_job_traces_for_finetuning.py` to create a temporary agent, seed conversations, retry the data generation job over the trace window, and clean up all created resources. * Updated `sample_memory_crud.py` and `sample_memory_crud_async.py` to demonstrate memory item CRUD (`create_memory`, `get_memory`, `update_memory`, `list_memories`, `delete_memory`) in addition to memory store CRUD. +* Updated the rubric evaluator generation samples (`sample_rubric_evaluator_generation_basic.py`, `sample_rubric_evaluator_generation_iterate.py`, `sample_rubric_evaluator_generation_lifecycle.py`, `sample_rubric_evaluator_generation_all_sources.py`) to use the typed `EvaluatorGenerationJob` / `EvaluatorGenerationInputs` / `*EvaluatorGenerationJobSource` models. The job inputs are now nested under `inputs` per the service contract, and the traces source uses `datetime` values for `start_time` / `end_time`. ## 2.2.0 (2026-05-29)