Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions sdk/ai/azure-ai-projects/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
* Added `sample_routines_with_schedule_trigger.py` to demonstrate triggering a routine on a recurring cron schedule via `ScheduleRoutineTrigger`.
* Updated `sample_dataset_generation_job_traces_for_evaluation.py` and `sample_dataset_generation_job_traces_for_finetuning.py` to create a temporary agent, seed conversations, retry the data generation job over the trace window, and clean up all created resources.
* Updated `sample_memory_crud.py` and `sample_memory_crud_async.py` to demonstrate memory item CRUD (`create_memory`, `get_memory`, `update_memory`, `list_memories`, `delete_memory`) in addition to memory store CRUD.
* Updated the rubric evaluator generation samples (`sample_rubric_evaluator_generation_basic.py`, `sample_rubric_evaluator_generation_iterate.py`, `sample_rubric_evaluator_generation_lifecycle.py`, `sample_rubric_evaluator_generation_all_sources.py`) to use the typed `EvaluatorGenerationJob` / `EvaluatorGenerationInputs` / `*EvaluatorGenerationJobSource` models. The job inputs are now nested under `inputs` per the service contract, and the traces source uses `datetime` values for `start_time` / `end_time`.

## 2.2.0 (2026-05-29)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,24 @@
import os
import time
import uuid
from datetime import datetime, timezone
from typing import Any, Dict, List, cast
from datetime import datetime, timedelta, timezone
from typing import List, cast

from dotenv import load_dotenv

from azure.identity import DefaultAzureCredential
from azure.ai.projects import AIProjectClient
from azure.ai.projects.models import JobStatus, RubricBasedEvaluatorDefinition
from azure.ai.projects.models import (
AgentEvaluatorGenerationJobSource,
DatasetEvaluatorGenerationJobSource,
EvaluatorGenerationInputs,
EvaluatorGenerationJob,
EvaluatorGenerationJobSource,
JobStatus,
PromptEvaluatorGenerationJobSource,
RubricBasedEvaluatorDefinition,
TracesEvaluatorGenerationJobSource,
)

load_dotenv()

Expand Down Expand Up @@ -85,51 +95,49 @@
AIProjectClient(endpoint=endpoint, credential=credential) as project_client,
):
# 1. Combined Prompt + Agent + Dataset generation job.
multi_sources: List[Dict[str, Any]] = [
{
"type": "Prompt",
"description": "Inline application overview.",
"prompt": (
multi_sources: List[EvaluatorGenerationJobSource] = [
PromptEvaluatorGenerationJobSource(
description="Inline application overview.",
prompt=(
"You are evaluating a customer-support assistant that helps users "
"manage their accounts, troubleshoot issues, and place orders. The "
"assistant uses tools for account lookup, password reset, and order "
"creation. It must confirm intent before performing destructive "
"actions and maintain a patient, professional tone."
),
}
),
]
if agent_name:
multi_sources.append(
{
"type": "Agent",
"description": "Agent metadata enriches the rubric with tool and instruction signals.",
"agent_name": agent_name,
}
AgentEvaluatorGenerationJobSource(
description="Agent metadata enriches the rubric with tool and instruction signals.",
agent_name=agent_name,
)
)
else:
print("Skipping Agent source (FOUNDRY_AGENT_NAME not set).")

if dataset_name and dataset_version:
multi_sources.append(
{
"type": "Dataset",
"description": "Reference examples ground dimensions in real data.",
"name": dataset_name,
"version": dataset_version,
}
DatasetEvaluatorGenerationJobSource(
description="Reference examples ground dimensions in real data.",
name=dataset_name,
version=dataset_version,
)
)
else:
print("Skipping Dataset source (FOUNDRY_REFERENCE_DATASET_NAME / _VERSION not set).")

multi_job = project_client.beta.evaluators.create_generation_job(
job={
"model": model_name,
"name": "Multi-source generation",
"evaluator_name": multi_name,
"evaluator_display_name": "Customer Support Quality (multi-source)",
"evaluator_description": "Generated from prompt, agent, and dataset signals.",
"sources": multi_sources,
},
job=EvaluatorGenerationJob(
inputs=EvaluatorGenerationInputs(
model=model_name,
evaluator_name=multi_name,
evaluator_display_name="Customer Support Quality (multi-source)",
evaluator_description="Generated from prompt, agent, and dataset signals.",
sources=multi_sources,
),
),
operation_id=f"rubric-multi-{short}",
)

Expand Down Expand Up @@ -159,32 +167,31 @@
if not agent_name:
print("Skipping traces job (requires FOUNDRY_AGENT_NAME for both the traces source and companion).")
else:
now = int(time.time())
start_time = now - traces_window_days * 24 * 3600
end_time = now + 600 # small padding for clock skew
now = datetime.now(tz=timezone.utc)
start_time = now - timedelta(days=traces_window_days)
end_time = now + timedelta(seconds=600) # small padding for clock skew

traces_job = project_client.beta.evaluators.create_generation_job(
job={
"model": model_name,
"name": "Traces-source generation",
"evaluator_name": traces_name,
"evaluator_display_name": "Customer Support Quality (from traces)",
"evaluator_description": "Generated from real Application Insights conversation traces.",
"sources": [
{
"type": "traces",
"description": "Application Insights conversation traces for the agent.",
"agent_name": agent_name,
"start_time": start_time,
"end_time": end_time,
},
{
"type": "Agent",
"description": "Companion source (service rejects traces-only).",
"agent_name": agent_name,
},
],
},
job=EvaluatorGenerationJob(
inputs=EvaluatorGenerationInputs(
model=model_name,
evaluator_name=traces_name,
evaluator_display_name="Customer Support Quality (from traces)",
evaluator_description="Generated from real Application Insights conversation traces.",
sources=[
TracesEvaluatorGenerationJobSource(
description="Application Insights conversation traces for the agent.",
agent_name=agent_name,
start_time=start_time,
end_time=end_time,
),
AgentEvaluatorGenerationJobSource(
description="Companion source (service rejects traces-only).",
agent_name=agent_name,
),
],
),
),
operation_id=f"rubric-traces-{short}",
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,10 @@
from azure.identity import DefaultAzureCredential
from azure.ai.projects import AIProjectClient
from azure.ai.projects.models import (
EvaluatorGenerationInputs,
EvaluatorGenerationJob,
JobStatus,
PromptEvaluatorGenerationJobSource,
RubricBasedEvaluatorDefinition,
TestingCriterionAzureAIEvaluator,
)
Expand All @@ -86,31 +89,31 @@
):
# 1. Generate an evaluator from a single `Prompt` source.
job = project_client.beta.evaluators.create_generation_job(
job={
"model": model_name,
"name": "Reservation Quality (Generated)",
"evaluator_name": evaluator_name,
"evaluator_display_name": "Reservation Quality (Generated)",
"evaluator_description": "Quality evaluator generated from a prompt describing a restaurant reservation assistant.",
"sources": [
{
"type": "Prompt",
"description": "Application overview - purpose, capabilities, and tools.",
"prompt": (
"You are evaluating a restaurant reservation assistant. The assistant helps "
"users create, modify, and cancel reservations at participating restaurants. "
"It can:\n"
" - Search for restaurants by name, cuisine, or neighborhood.\n"
" - Check table availability for a requested date, time, and party size.\n"
" - Create, update, and cancel reservations on behalf of the user.\n"
" - Send SMS or email confirmations through a notifications tool.\n"
"It must always confirm the user's intent before committing changes, "
"ask follow-up questions when details are missing, and maintain a polite "
"restaurant-host tone."
job=EvaluatorGenerationJob(
inputs=EvaluatorGenerationInputs(
model=model_name,
evaluator_name=evaluator_name,
evaluator_display_name="Reservation Quality (Generated)",
evaluator_description="Quality evaluator generated from a prompt describing a restaurant reservation assistant.",
sources=[
PromptEvaluatorGenerationJobSource(
description="Application overview - purpose, capabilities, and tools.",
prompt=(
"You are evaluating a restaurant reservation assistant. The assistant helps "
"users create, modify, and cancel reservations at participating restaurants. "
"It can:\n"
" - Search for restaurants by name, cuisine, or neighborhood.\n"
" - Check table availability for a requested date, time, and party size.\n"
" - Create, update, and cancel reservations on behalf of the user.\n"
" - Send SMS or email confirmations through a notifications tool.\n"
"It must always confirm the user's intent before committing changes, "
"ask follow-up questions when details are missing, and maintain a polite "
"restaurant-host tone."
),
),
}
],
},
],
),
),
# `operation_id` makes the call idempotent - re-submitting the same id returns the existing job.
operation_id=f"rubric-eval-basic-{short}",
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,10 @@
from azure.ai.projects.models import (
EvaluatorCategory,
EvaluatorDefinitionType,
EvaluatorGenerationInputs,
EvaluatorGenerationJob,
JobStatus,
PromptEvaluatorGenerationJobSource,
RubricBasedEvaluatorDefinition,
)

Expand All @@ -72,25 +75,25 @@
):
# 1. Generate v1 of the evaluator from a single `Prompt` source.
job = project_client.beta.evaluators.create_generation_job(
job={
"model": model_name,
"name": "Reservation Quality (iterate)",
"evaluator_name": evaluator_name,
"evaluator_display_name": "Reservation Quality (iterate)",
"evaluator_description": "Starting point for human-in-the-loop iteration.",
"sources": [
{
"type": "Prompt",
"description": "Inline application overview.",
"prompt": (
"You are evaluating a restaurant reservation assistant that creates, "
"modifies, and cancels reservations. It uses tools for restaurant "
"lookup, availability checking, and notifications. It must confirm "
"user intent before committing changes."
job=EvaluatorGenerationJob(
inputs=EvaluatorGenerationInputs(
model=model_name,
evaluator_name=evaluator_name,
evaluator_display_name="Reservation Quality (iterate)",
evaluator_description="Starting point for human-in-the-loop iteration.",
sources=[
PromptEvaluatorGenerationJobSource(
description="Inline application overview.",
prompt=(
"You are evaluating a restaurant reservation assistant that creates, "
"modifies, and cancels reservations. It uses tools for restaurant "
"lookup, availability checking, and notifications. It must confirm "
"user intent before committing changes."
),
),
}
],
},
],
),
),
operation_id=f"rubric-iterate-{short}",
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,13 @@
from azure.core.exceptions import ResourceNotFoundError
from azure.identity import DefaultAzureCredential
from azure.ai.projects import AIProjectClient
from azure.ai.projects.models import JobStatus, PageOrder
from azure.ai.projects.models import (
EvaluatorGenerationInputs,
EvaluatorGenerationJob,
JobStatus,
PageOrder,
PromptEvaluatorGenerationJobSource,
)

load_dotenv()

Expand All @@ -66,21 +72,21 @@

TERMINAL_STATUSES = {JobStatus.SUCCEEDED, JobStatus.FAILED, JobStatus.CANCELLED}

# Shared job body used both for the initial create and the idempotency replay.
job_body = {
"model": model_name,
"name": "Lifecycle demo",
"evaluator_name": evaluator_name,
"evaluator_display_name": "Lifecycle demo",
"evaluator_description": "Minimal job used to demonstrate the LRO + list/delete lifecycle.",
"sources": [
{
"type": "Prompt",
"description": "Inline application overview.",
"prompt": "You are evaluating a simple Q&A assistant that answers factual questions clearly and concisely.",
}
],
}
# Shared job used both for the initial create and the idempotency replay.
job_body = EvaluatorGenerationJob(
inputs=EvaluatorGenerationInputs(
model=model_name,
evaluator_name=evaluator_name,
evaluator_display_name="Lifecycle demo",
evaluator_description="Minimal job used to demonstrate the LRO + list/delete lifecycle.",
sources=[
PromptEvaluatorGenerationJobSource(
description="Inline application overview.",
prompt="You are evaluating a simple Q&A assistant that answers factual questions clearly and concisely.",
),
],
),
)

with (
DefaultAzureCredential() as credential,
Expand Down
Loading