diff --git a/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/.azure/.state-change b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/.azure/.state-change new file mode 100644 index 000000000000..8eb7d66b9a27 --- /dev/null +++ b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/.azure/.state-change @@ -0,0 +1 @@ +2026-06-03T18:26:57Z \ No newline at end of file diff --git a/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/.azure/config.json b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/.azure/config.json new file mode 100644 index 000000000000..017e5d26dc28 --- /dev/null +++ b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/.azure/config.json @@ -0,0 +1 @@ +{"version":1,"defaultEnvironment":"demo-dev"} diff --git a/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/.azure/demo-dev/.env b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/.azure/demo-dev/.env new file mode 100644 index 000000000000..d6494f4da085 --- /dev/null +++ b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/.azure/demo-dev/.env @@ -0,0 +1,40 @@ +AGENT_DURABLE_RESEARCH_AGENT_ENDPOINT="https://e2e-tests-westus2-account.services.ai.azure.com/api/projects/e2e-tests-westus2/agents/durable-research-agent/versions/31" +AGENT_DURABLE_RESEARCH_AGENT_INVOCATIONS_ENDPOINT="https://e2e-tests-westus2-account.services.ai.azure.com/api/projects/e2e-tests-westus2/agents/durable-research-agent/endpoint/protocols/invocations?api-version=2025-11-15-preview" +AGENT_DURABLE_RESEARCH_AGENT_NAME="durable-research-agent" +AGENT_DURABLE_RESEARCH_AGENT_VERSION=31 +AI_AGENT_PENDING_PROVISION="" +AI_PROJECT_CONNECTION_IDS_JSON="[]" +AI_PROJECT_DEPLOYMENTS="[{\\\"name\\\":\\\"gpt-4.1-mini\\\",\\\"model\\\":{\\\"name\\\":\\\"gpt-4.1-mini\\\",\\\"format\\\":\\\"OpenAI\\\",\\\"version\\\":\\\"2025-04-14\\\"},\\\"sku\\\":{\\\"name\\\":\\\"GlobalStandard\\\",\\\"capacity\\\":1053}}]" +APPLICATIONINSIGHTS_CONNECTION_NAME="appInsights-connection-7543" +APPLICATIONINSIGHTS_CONNECTION_STRING="InstrumentationKey=f25baa58-da74-4602-a955-ce257ff3a5d8;IngestionEndpoint=https://uksouth-1.in.applicationinsights.azure.com/;LiveEndpoint=https://uksouth.livediagnostics.monitor.azure.com/;ApplicationId=9b8190bd-1b0b-4264-89c3-e31ee47b0745" +APPLICATIONINSIGHTS_RESOURCE_ID="" +AZURE_AI_ACCOUNT_ID="/subscriptions/921496dc-987f-410f-bd57-426eb2611356/resourceGroups/agents-e2e-tests-westus2/providers/Microsoft.CognitiveServices/accounts/e2e-tests-westus2-account" +AZURE_AI_ACCOUNT_NAME="e2e-tests-westus2-account" +AZURE_AI_FOUNDRY_PROJECT_ID="/subscriptions/921496dc-987f-410f-bd57-426eb2611356/resourceGroups/agents-e2e-tests-westus2/providers/Microsoft.CognitiveServices/accounts/e2e-tests-westus2-account/projects/e2e-tests-westus2" +AZURE_AI_MODEL_DEPLOYMENT_NAME="gpt-4.1-mini" +AZURE_AI_PROJECT_ACR_CONNECTION_NAME="crdyt765he4tmsy" +AZURE_AI_PROJECT_ENDPOINT="https://e2e-tests-westus2-account.services.ai.azure.com/api/projects/e2e-tests-westus2" +AZURE_AI_PROJECT_ID="/subscriptions/921496dc-987f-410f-bd57-426eb2611356/resourceGroups/agents-e2e-tests-westus2/providers/Microsoft.CognitiveServices/accounts/e2e-tests-westus2-account/projects/e2e-tests-westus2" +AZURE_AI_PROJECT_NAME="e2e-tests-westus2" +AZURE_AI_SEARCH_CONNECTION_NAME="" +AZURE_AI_SEARCH_SERVICE_NAME="" +AZURE_CONTAINER_REGISTRY_ENDPOINT="crdyt765he4tmsy.azurecr.io" +AZURE_ENV_NAME="demo-dev" +AZURE_LOCATION="westus2" +AZURE_OPENAI_ENDPOINT="https://e2e-tests-westus2-account.openai.azure.com/" +AZURE_RESOURCE_GROUP="agents-e2e-tests-westus2" +AZURE_STORAGE_ACCOUNT_NAME="" +AZURE_STORAGE_CONNECTION_NAME="" +AZURE_SUBSCRIPTION_ID="921496dc-987f-410f-bd57-426eb2611356" +AZURE_TENANT_ID="72f988bf-86f1-41af-91ab-2d7cd011db47" +BING_CUSTOM_GROUNDING_CONNECTION_ID="" +BING_CUSTOM_GROUNDING_CONNECTION_NAME="" +BING_CUSTOM_GROUNDING_NAME="" +BING_GROUNDING_CONNECTION_ID="" +BING_GROUNDING_CONNECTION_NAME="" +BING_GROUNDING_RESOURCE_NAME="" +DEMO_MODE=1 +ENABLE_CAPABILITY_HOST="false" +ENABLE_HOSTED_AGENTS="true" +FOUNDRY_PROJECT_ENDPOINT="https://e2e-tests-westus2-account.services.ai.azure.com/api/projects/e2e-tests-westus2" +USE_EXISTING_AI_PROJECT="true" diff --git a/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/.azure/demo-dev/.env.lock b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/.azure/demo-dev/.env.lock new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/.azure/demo-dev/config.json b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/.azure/demo-dev/config.json new file mode 100644 index 000000000000..9e26dfeeb6e6 --- /dev/null +++ b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/.azure/demo-dev/config.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/.gitignore b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/.gitignore index 84631e3e9cdd..017b94ddacc3 100644 --- a/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/.gitignore +++ b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/.gitignore @@ -1,3 +1,10 @@ +# azd environment +.azure/*/state/ +.azure/*/*.env.bak + +# Demo client runtime +.demo-session + # Docker-build staging dir — populated by ./build.sh which copies # the checked-in wheels from sdk/agentserver/wheels/ into here. Never # committed: source of truth is the central wheels directory. diff --git a/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/README.md b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/README.md new file mode 100644 index 000000000000..06830e8a07f2 --- /dev/null +++ b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/README.md @@ -0,0 +1,345 @@ +# Durable Research Agent — Demo + +A `@task`-decorated long-running research agent that demonstrates two +platform capabilities of the Azure AI Hosted Agent + durable-task primitive: + +1. **Long-running tasks run uninterrupted past the platform's sandbox-eviction window.** + The framework's `PATCH .../tasks/` lease-renewal cycle (every ~30s, + half of the 60s lease) signals activity through the task-storage API, + which refreshes the platform's sandbox idle-reclaim timer. The demo + runs for ~33 min with **zero client-side keepalive ingress** and the + sandbox stays warm the whole time. Validated end-to-end against the + `e2e-tests-westus2` deployment. + +2. **Recovery from container crashes.** When the agent container dies + (intentional crash or OOM), the platform's nanny worker brings it + back within ~1 min (43s measured) **without any new client ingress**. + The durable task automatically resumes from its last checkpoint + (`ctx.entry_mode == "recovered"` + a `recovered` SSE event with + `completed_phases`). User-visible: any reconnect attempt — whenever + the user gets around to it — seamlessly continues the run. + +3. **Steering.** Sending a new turn on a running steerable task queues + the input and signals cooperative cancel. The agent winds down the + current turn at the next checkpoint boundary and re-enters with the + queued input as a fresh turn (with the prior topic surfaced for the + viewer to see). + +What the agent actually does: 15 logical research phases on whatever +topic the caller supplies. Each phase runs a small agent loop +(research → critique → refine → synthesize) against `gpt-4.1-mini`, +streaming every token to the consumer. The handler checkpoints to +`ctx.metadata` and flushes **after each subcall** — so a crash +mid-phase recovers at the next un-finished subcall (worst case: the +one that was actively streaming is replayed). A steerer that arrives +mid-phase causes the handler to wind down at the next phase boundary, +not abruptly. Hosted defaults target a ~33-min wall-time run (spanning +2x the sandbox-eviction window so every demo run exercises the lease +keep-alive path); local `agent.py` defaults are shorter for dev +iteration. + +Between subcalls and between phases the agent sleeps for +`INTRA_PHASE_COOLDOWN_SEC` / `INTER_PHASE_COOLDOWN_SEC` (30s each in +the hosted defaults). A `cooldown` SSE event is emitted at the start +of each pause so the terminal shows a low-key +`...cooling down 30s (between subcalls) — next: subcall 3/4 in phase 2/15` +line instead of going silent. + +## Prerequisites + +- Python 3.11+ +- Azure subscription with AI Foundry access +- [Azure Developer CLI](https://learn.microsoft.com/azure/developer/azure-developer-cli/install-azd) +- `azd` AI agents extension: `azd extension install azure.ai.agents` + +## Deploy + +```bash +# 1. Stage the checked-in @task preview wheels into the docker build context +# (build.sh just copies sdk/agentserver/wheels/*.whl into a per-sample +# gitignored staging dir — no compilation, no PyPI fetch) +./build.sh + +# 2. Login + deploy +azd auth login +azd up +``` + +The deploy provisions infra + ships the container image and prints the +invocations endpoint. `demo-client.sh` already points at the canonical +`e2e-tests-westus2` deployment — edit `ENDPOINT=` near the top of +`demo-client.sh` if you deployed elsewhere. + +> The `@task` durable-task primitive is in **private preview** and is +> not on PyPI. It ships only as the pre-release wheels checked into +> [`sdk/agentserver/wheels/`](../../../../wheels). See +> [`sdk/agentserver/wheels/README.md`](../../../../wheels/README.md) +> for the consumption workflow in your own project. + +## demo-client.sh — command reference + +The client is a bash CLI. Each command operates on a single session +tracked locally in `.demo-session`. Run from this directory: + +| Command | What it does | +|---|---| +| `./demo-client.sh start ""` | **Allocates a new session id** (UUID), writes it to `.demo-session`, dispatches `POST /invocations` with the topic, then attaches to the SSE stream. | +| `./demo-client.sh stream` | Reuses the session + invocation from `.demo-session` and (re)attaches to the SSE stream. Passes `?last_event_id=N` so the server skips events you've already seen. | +| `./demo-client.sh steer ""` | Reuses the current session and sends a new `POST /invocations` with the new topic. If the run is still active the framework queues this as a steering input; the agent winds down at the next checkpoint boundary and re-enters on the new topic. | +| `./demo-client.sh cancel` | `POST /invocations/{id}/cancel` on the current invocation. The handler observes `ctx.cancel.is_set()` and winds down cooperatively. | +| `./demo-client.sh crash` | Sends `POST /invocations` with `{"message": "crash"}`. The agent (gated by `DEMO_MODE=1`) calls `os._exit(137)`. The platform's nanny worker brings the container back within ~1 min on its own — `./demo-client.sh stream` any time after will pick up the recovered run (no need to wait for or trigger anything). | +| `./demo-client.sh status` | Prints the local `SESSION_ID`, `INV_ID`, and `LAST_EVENT_ID` from `.demo-session`. Useful when you forget which session you're on. | +| `./demo-client.sh logs` | Tails the agent container's stdout/stderr via `azd ai agent monitor --session-id --follow`. | +| `./demo-client.sh reset` | Deletes `.demo-session`. The next `start` will allocate a fresh session id. | + +### Session-id lifecycle + +There is exactly **one active session per `.demo-session` file**: + +``` +./demo-client.sh start "" + │ + ├─ SESSION_ID = demo- ← newly allocated by the client + ├─ INV_ID = inv_<...> ← assigned by the platform on POST + └─ written to .demo-session + │ + ▼ these commands REUSE the same session id: + ./demo-client.sh stream + ./demo-client.sh steer "" + ./demo-client.sh crash + ./demo-client.sh cancel + ./demo-client.sh logs + ./demo-client.sh status + +To switch to a NEW session id: + ./demo-client.sh reset # clears .demo-session + ./demo-client.sh start "" # allocates a fresh demo- +``` + +### Inspecting container logs + +`./demo-client.sh logs` opens a follow tail on the agent container's +stdout/stderr for the current session. Useful framework log lines: + +- `TaskManager starting (owner=..., instance=worker-N-..., hosted=True)` — + a fresh container booted. +- `Reclaimed stale task ` / `Recovered task is now active` — + durable recovery picked up where the previous lifetime left off. +- `Inbound GET /readiness completed with status 200` — the platform's + container health probe (a good signal that the container just came up). +- `HTTP Request: POST .../openai/v1/responses "HTTP/1.1 200 OK"` — each + LLM call the agent makes. +- `Task suspended` / `Steering drain: task drained next input` — + cooperative wind-down + steering re-entry. + +For one-shot queries, invoke `azd ai agent monitor` directly: + +```bash +SESSION_ID=$(grep SESSION_ID .demo-session | cut -d'"' -f2) +azd ai agent monitor --session-id "$SESSION_ID" --tail 100 +azd ai agent monitor --session-id "$SESSION_ID" --type system # container start/stop events +``` + +## Three demo workflows + +### A. Long-running run with no client-side keepalive (~33 min wall time) + +This run intentionally outlasts the platform's 15-min sandbox-eviction +window — the framework's lease-renewal cycle keeps the sandbox warm. + +```bash +# t = 0:00 +./demo-client.sh start "the future of nuclear fusion" +# Stream events. Note server_time_utc + server_uptime_sec on each event. + +# t = 5:00 +# Detach (Ctrl-C). Make zero ingress for the next 20-25 min. + +# t = 25:00 — open a new terminal: +./demo-client.sh stream +# The container is the SAME instance (no reclaim happened) because the +# framework's PATCH .../tasks/ lease renewals kept the platform's +# idle timer fresh. Your reconnect resumes the live SSE stream; +# server_uptime_sec is now ~25 min, not reset to 0. +``` + +### B. Explicit crash + nanny restoration (no ingress required) + +```bash +# Terminal 1: start a run and leave it streaming. +./demo-client.sh start "fusion energy research priorities" +# Wait until 3-4 phases have completed. + +# Terminal 2: force a crash. +./demo-client.sh crash +# Server returns 202 then os._exit(137). Terminal 1's stream disconnects. + +# Wait — DO NOT send any new ingress. The platform's nanny brings the +# container back within ~1 min entirely on its own (validated: 43 sec +# from crash to new worker_instance_id in the e2e-tests-westus2 +# deployment). The durable task auto-resumes from the last checkpoint +# inside the new process — you don't need to do anything. + +# When you want to verify recovery: +./demo-client.sh stream +# You'll see: +# 🔁 Recovered from crash completed_phases=3 +# server_uptime_sec= +# Stream picks up at phase 4, NOT phase 1. +``` + +### C. Steering (mid-run topic switch) + +```bash +# Terminal 1: +./demo-client.sh start "deep learning interpretability" +# Wait until phase 2 starts streaming. + +# Terminal 2: +./demo-client.sh steer "alignment of frontier models" +# Server queues the new input; the running turn keeps going until the +# next phase boundary. + +# Terminal 1 (within ~3 min, at the next phase boundary): +# ↓ Winding down cause=steering completed=2/15 +# ▶ Run start topic=alignment of frontier models +# (steered from prior topic: deep learning interpretability) +# ▶ Phase 1/15 — Decomposing topic into focused research questions +# ... +``` + +## Architecture + +``` +┌───────────────────────────────────────────────────────────────────────────┐ +│ Foundry Hosted-Agent Sandbox (platform-managed lifecycle) │ +│ ┌─────────────────────────────────────────────────────────────────────┐ │ +│ │ python app.py (InvocationAgentServerHost on :8088) │ │ +│ │ ┌───────────────────────────────────────────────────────────────┐ │ │ +│ │ │ POST /invocations │ │ │ +│ │ │ └─ {"message": ""} → │ │ │ +│ │ │ deep_research.start(task_id=session_id, input=...) │ │ │ +│ │ │ on an active steerable task: queued as a steering input│ │ │ +│ │ │ └─ {"message": "crash"} (DEMO_MODE=1 only) → os._exit │ │ │ +│ │ │ │ │ │ +│ │ │ GET /invocations/{id}?last_event_id=N │ │ │ +│ │ │ └─ live SSE from get_active_run(task_id), else file replay│ │ │ +│ │ │ │ │ │ +│ │ │ POST /invocations/{id}/cancel │ │ │ +│ │ │ └─ run.cancel() │ │ │ +│ │ │ │ │ │ +│ │ │ GET /readiness (called by platform health probe at startup)│ │ │ +│ │ └───────────────────────────────────────────────────────────────┘ │ │ +│ │ │ │ +│ │ deep_research (agent.py) │ │ +│ │ @task(steerable=True, stream_handler_factory=file_stream_factory)│ │ +│ │ loop 1..NUM_PHASES: │ │ +│ │ emit phase_start with server_time_utc + server_uptime_sec │ │ +│ │ run CALLS_PER_PHASE LLM sub-calls (research → critique → …) │ │ +│ │ ctx.metadata["completed_phases"] = i+1 │ │ +│ │ await ctx.metadata.flush() ← crash-recovery boundary │ │ +│ │ emit phase_end │ │ +│ │ if ctx.cancel.is_set(): │ │ +│ │ wind down → return await ctx.suspend(...) │ │ +│ └─────────────────────────────────────────────────────────────────────┘ │ +└───────────────────────────────────────────────────────────────────────────┘ + ▲ │ + │ │ PATCH /api/projects/.../tasks/{id} + │ │ (framework lease renewal + checkpoint flush) + │ ▼ + ┌─────────────────────────────────────────┐ + │ Foundry control plane │ + │ ─ Task-storage API (lease, payload, │ + │ metadata, checkpoint persistence) │ + │ ─ Endpoint proxy: routes /invocations* │ + │ to the sandbox; brings the container │ + │ back up via nanny worker after a │ + │ crash (no client ingress needed) │ + │ ─ Idle-reclaim timer: kept fresh by │ + │ framework lease-renewal traffic so │ + │ long-running tasks survive past 15min│ + └─────────────────────────────────────────┘ +``` + +Notable points: + +- The container runs `python app.py` directly. There is **no + application-level supervisor or auto-restart wrapper** — the platform's + nanny worker handles container restoration on crash. +- `task_id == session_id`: one durable task per session. This is what + routes a steering POST to the active task instead of starting a new one. +- The framework's lease-renewal loop talks to the **task-storage API** + every ~30s (half of the 60s lease). This traffic both (a) refreshes + the lease so a successor instance won't reclaim the task, and (b) + signals activity to the platform's routing layer so the sandbox's + idle-reclaim timer stays fresh — letting the run outlive the 15-min + eviction window without any client ingress. The `/readiness` + endpoint is hit only by the platform's startup health probe; + `/liveness` is hit continuously (~every 2s) by the platform. +- When the platform's nanny restores the container after a crash, the + framework's recovery scan finds the stranded task and re-enters the + handler with `ctx.entry_mode == "recovered"` and `ctx.metadata` + populated from the last checkpoint. A `recovered` SSE event is + emitted to any (re)connecting clients. + +## Environment variables + +These are set in `agent.yaml` (`environment_variables`) and travel with +the deploy. Override by editing `agent.yaml` and re-deploying. + +| Variable | Default (hosted) | Default (`agent.py`) | Description | +|---|---|---|---| +| `FOUNDRY_PROJECT_ENDPOINT` | (required, set by platform) | — | Foundry project endpoint. | +| `AZURE_AI_MODEL_DEPLOYMENT_NAME` | `gpt-4.1-mini` | `gpt-4.1-mini` | Responses-API model deployment name. | +| `DEMO_MODE` | `1` (in the demo image) | unset | Enables the `{"message": "crash"}` sentinel on `POST /invocations`. A production image would leave this off. | +| `NUM_PHASES` | `15` | `15` | Number of research phases. | +| `CALLS_PER_PHASE` | `4` | `4` | Sub-calls per phase (research, critique, refine, synthesize). | +| `TARGET_OUTPUT_TOKENS` | `1500` | `1500` | Max tokens per LLM sub-call. | +| `INTRA_PHASE_COOLDOWN_SEC` | `30` | `10` | Seconds between sub-calls within a phase. Hosted default is bumped to push total wall-time past 30 min. | +| `INTER_PHASE_COOLDOWN_SEC` | `30` | `20` | Seconds between phases. Hosted default is bumped to push total wall-time past 30 min. | + +Note: `azure-ai-agentserver-core` automatically uses `HostedTaskProvider` +in hosted environments (i.e. when the platform sets +`FOUNDRY_HOSTING_ENVIRONMENT`) and `LocalFileTaskProvider` otherwise — +no opt-in env var required. + +For a **fast** development loop (~2 min total instead of ~33 min), edit +`agent.yaml`'s `environment_variables` block: + +```yaml +- name: NUM_PHASES + value: "3" +- name: CALLS_PER_PHASE + value: "1" +- name: INTRA_PHASE_COOLDOWN_SEC + value: "2" +- name: INTER_PHASE_COOLDOWN_SEC + value: "2" +- name: TARGET_OUTPUT_TOKENS + value: "200" +``` + +## File structure + +``` +durable-agent-demo/ +├── demo-client.sh # bash CLI: start, stream, steer, crash, cancel, logs, status, reset +├── azure.yaml # azd service config +├── build.sh # copies sdk/agentserver/wheels/*.whl into src/.../wheels/ for docker +├── infra/ # Bicep templates +├── src/durable-research-agent/ +│ ├── agent.py # @task deep_research — durability + steering logic +│ ├── app.py # InvocationAgentServerHost — minimal HTTP plumbing +│ ├── agent.yaml # Foundry agent definition +│ ├── Dockerfile # python:3.12-slim → python app.py +│ ├── requirements.txt +│ └── wheels/ # GITIGNORED — docker-build staging dir populated by build.sh +└── README.md +``` + +The `@task` private-preview wheels are checked in at +[`sdk/agentserver/wheels/`](../../../../wheels) — `./build.sh` just +copies them into this sample's `wheels/` so the Dockerfile can `COPY` +them at image-build time. See +[`sdk/agentserver/wheels/README.md`](../../../../wheels/README.md) +for the consumer workflow. diff --git a/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/azure.yaml b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/azure.yaml new file mode 100644 index 000000000000..ab04c49b90e6 --- /dev/null +++ b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/azure.yaml @@ -0,0 +1,31 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/Azure/azure-dev/main/schemas/v1.0/azure.yaml.json + +requiredVersions: + extensions: + azure.ai.agents: '>=0.1.0-preview' +name: durable-research-agent-demo +services: + durable-research-agent: + project: src/durable-research-agent + host: azure.ai.agent + language: docker + docker: + remoteBuild: true + config: + container: + resources: + cpu: "1" + memory: 2Gi + deployments: + - model: + format: OpenAI + name: gpt-4.1-mini + version: "2025-04-14" + name: gpt-4.1-mini + sku: + capacity: 1053 + name: GlobalStandard + startupCommand: python app.py +infra: + provider: bicep + path: ./infra diff --git a/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/build.sh b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/build.sh new file mode 100755 index 000000000000..484d16ea39ae --- /dev/null +++ b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/build.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash +# Stage agentserver @task preview wheels into the docker build context. +# Run this BEFORE 'azd up' or 'docker build'. +# +# Wheels are checked into the repo at sdk/agentserver/wheels/ — this +# script just copies them into a per-sample docker-build staging dir +# (src/durable-research-agent/wheels/, gitignored) so the Dockerfile's +# `COPY wheels/ /tmp/wheels/` finds them at build time. +# +# To refresh the source wheels (maintainer-only — devs shouldn't need +# to do this), see ../../../../wheels/README.md. + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/../../../../.." && pwd)" +CENTRAL_WHEELS="$REPO_ROOT/sdk/agentserver/wheels" +STAGING_DIR="$SCRIPT_DIR/src/durable-research-agent/wheels" + +if [[ ! -d "$CENTRAL_WHEELS" ]] || ! ls "$CENTRAL_WHEELS"/*.whl >/dev/null 2>&1; then + echo "ERROR: no checked-in wheels found at $CENTRAL_WHEELS" >&2 + echo " Did you pull the latest from feature/agentserver-durable-tasks?" >&2 + exit 1 +fi + +echo "==> Staging checked-in @task preview wheels into docker build context" +echo " src: $CENTRAL_WHEELS" +echo " dst: $STAGING_DIR" +rm -rf "$STAGING_DIR" +mkdir -p "$STAGING_DIR" +cp "$CENTRAL_WHEELS"/*.whl "$STAGING_DIR"/ +ls -la "$STAGING_DIR"/*.whl + +echo "" +echo "Done. Now run: azd up (or docker build)" + + diff --git a/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/demo-client.sh b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/demo-client.sh new file mode 100755 index 000000000000..a98f3458b619 --- /dev/null +++ b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/demo-client.sh @@ -0,0 +1,577 @@ +#!/usr/bin/env bash +# ───────────────────────────────────────────────────────────────────────────── +# Durable Research Agent — Demo Client +# +# Showcases three platform capabilities of the durable-task primitive +# (all empirically validated against e2e-tests-westus2): +# 1. LONG-RUNNING TASKS — the framework's PATCH .../tasks/ lease +# renewals (every ~30s) keep the platform's sandbox idle-reclaim +# timer fresh, so a single run stays warm well past the 15-min +# eviction window without any client-side keepalive ingress. +# 2. CRASH RECOVERY — when the container dies, the platform's nanny +# worker restarts it within ~1 min on its own (no new ingress +# needed); the durable task auto-resumes from its last checkpoint. +# 3. STEERING — sending a new turn while a turn is still running +# causes the agent to wind down at the next checkpoint and start +# fresh on the new topic. +# +# Commands: +# ./demo-client.sh start "" Dispatch and stream a fresh research run +# ./demo-client.sh stream Reconnect to the active run (no fresh POST) +# ./demo-client.sh steer "" Queue a steering input — agent winds down +# current turn at next checkpoint and switches +# ./demo-client.sh crash Kill the process (DEMO_MODE=1 on server) +# ./demo-client.sh cancel Operator cancel of the active run +# ./demo-client.sh status Show local session info +# ./demo-client.sh logs Stream container stdout/stderr via azd +# ./demo-client.sh reset Clear local session state +# ───────────────────────────────────────────────────────────────────────────── + +set -uo pipefail + +# ── Config ──────────────────────────────────────────────────────────────────── + +ENDPOINT="https://e2e-tests-westus2-account.services.ai.azure.com/api/projects/e2e-tests-westus2/agents/durable-research-agent/endpoint/protocols" +API_VERSION="v1" +SESSION_FILE=".demo-session" + +# ── Colors ──────────────────────────────────────────────────────────────────── + +BOLD='\033[1m' +DIM='\033[2m' +GREEN='\033[32m' +YELLOW='\033[33m' +RED='\033[31m' +CYAN='\033[36m' +MAGENTA='\033[35m' +BLUE='\033[34m' +RESET='\033[0m' + +# ── Session state ───────────────────────────────────────────────────────────── + +load_session() { + if [[ -f "$SESSION_FILE" ]]; then + # shellcheck disable=SC1090 + source "$SESSION_FILE" + fi +} + +save_session() { + { + echo "SESSION_ID=\"${SESSION_ID:-}\"" + echo "INV_ID=\"${INV_ID:-}\"" + echo "LAST_EVENT_ID=\"${LAST_EVENT_ID:-0}\"" + } > "$SESSION_FILE" +} + +ensure_token() { + if [[ -z "${TOKEN:-}" ]]; then + TOKEN=$(az account get-access-token --resource https://ai.azure.com --query accessToken -o tsv 2>/dev/null) + if [[ -z "$TOKEN" ]]; then + echo -e "${RED}Failed to get Azure token. Run 'az login' first.${RESET}" >&2 + exit 1 + fi + fi +} + +# Read a top-level JSON field. Returns empty string on missing/null. Used +# only by the one-shot POST helpers below (start / steer) to extract +# invocation_id / session_id from the dispatch response. The SSE stream +# path does its own parsing in the python renderer. +_jq() { + local json="$1" + local key="$2" + echo "$json" | python3 -c " +import sys, json +try: + d = json.loads(sys.stdin.read()) + v = d.get('$key') + print('' if v is None else v) +except Exception: + print('') +" 2>/dev/null +} + +# ── SSE stream renderer (Python — see comment) ─────────────────────────────── + +# Why a python renderer instead of bash: +# - At LLM emit rate (50-100 tok/s) the original bash 'while read | +# printf' loop made the real interactive terminal the bottleneck: +# one printf-per-token caused syscall thrash and built up a backlog +# that hid the EOF (real crash signal) behind minutes of TTY draining. +# - python with select() + a small in-memory token buffer (flushed +# every FLUSH_MS) writes the terminal in batches — ~20x fewer +# syscalls in steady state, no backlog, EOF is observed promptly. +# - The renderer trusts EOF on stdin as the authoritative crash signal. +# No time-based "is the stream stale?" heuristic — those mis-fire +# during the demo's legitimate 30s cooldowns between subcalls/phases. +# When curl closes (server crash, network drop, ctrl-c) the renderer +# sees EOF and exits. When the server emits 'done' or 'run_complete' +# the renderer exits cleanly. There is no third path. +# - Renderer formatting and color codes match the previous bash version +# exactly so prior demo expectations still hold. +# +# Contract with bash: +# stdin = raw SSE frames from curl (id: N / data: ...) +# env = $INITIAL_EVENT_ID (resume cursor), $STATE_FILE (path to write +# back LAST_EVENT_ID + STREAM_RESULT on exit), $FLUSH_MS +# stdout = rendered output +# exit = 0 normally; non-zero only on hard errors + +_PY_RENDERER=' +import json, os, sys, select, time +from datetime import datetime, timezone + +# Bring the env-provided knobs in once. +INITIAL_EVENT_ID = int(os.environ.get("INITIAL_EVENT_ID", "0") or "0") +STATE_FILE = os.environ.get("STATE_FILE", "") +FLUSH_MS = float(os.environ.get("FLUSH_MS", "50")) + +# CRITICAL: This entire block lives inside a bash heredoc delimited by +# the apostrophe character (the bash assignment `_PY_RENDERER=` then an +# opening apostrophe, opaque content, closing apostrophe at column 1 +# of an otherwise empty line). Any literal apostrophe in Python code +# below will silently end the heredoc and truncate the script — debug +# symptom is a NameError several lines later. Use double quotes for +# every Python string literal. Keys we pull from event dicts are +# aliased to module-level CONSTANTS up here so the per-event code +# stays readable without inline string literals becoming a foot-gun. +_DSEC = "duration_sec" + +# ANSI palette — mirrors demo-client.sh. +BOLD, DIM = "\033[1m", "\033[2m" +GREEN, YELLOW, RED = "\033[32m", "\033[33m", "\033[31m" +CYAN, MAGENTA, BLUE = "\033[36m", "\033[35m", "\033[34m" +RESET = "\033[0m" + +out = sys.stdout +def write(s): out.write(s) +def flush(): out.flush() + +def now_utc(): + return datetime.now(timezone.utc).strftime("%H:%M:%SZ") + +last_event_id = INITIAL_EVENT_ID +result = "disconnected" +token_buf = [] # collected token content +last_flush = time.monotonic() + +def flush_tokens(): + global token_buf, last_flush + if token_buf: + write("".join(token_buf)) + flush() + token_buf = [] + last_flush = time.monotonic() + +def render_block(evt): + """Render any non-token event with the same shape as the old bash render.""" + t = evt.get("type", "") + n = now_utc() + if t == "run_start": + topic = evt.get("topic", "") + em = evt.get("entry_mode", "") + total = evt.get("total_phases", "") + uptime = evt.get("server_uptime_sec", "") + srv = evt.get("server_time_utc", "") + prior = evt.get("prior_topic") + write("\n") + write(f"{BOLD}{CYAN}{chr(0x2550)*62}{RESET}\n") + write(f"{DIM}[{n}]{RESET} {BOLD}{CYAN}\u25b6 Run start{RESET} topic={BOLD}{topic}{RESET} ({total} phases)\n") + if prior: + write(f" {YELLOW}(steered from prior topic: {prior}){RESET}\n") + write(f" entry_mode={em} server_time={srv} uptime={uptime}s\n") + write(f"{BOLD}{CYAN}{chr(0x2550)*62}{RESET}\n") + elif t == "recovered": + c, total = evt.get("completed_phases", ""), evt.get("total_phases", "") + srv, uptime = evt.get("server_time_utc", ""), evt.get("server_uptime_sec", "") + write("\n") + write(f"{DIM}[{n}]{RESET} {BOLD}{GREEN}\U0001f501 Recovered from crash{RESET} resuming from phase {c}/{total}\n") + write(f" server_time={srv} uptime={uptime}s {DIM}(uptime ~0s = fresh container){RESET}\n") + elif t == "phase_start": + ph, total = evt.get("phase", ""), evt.get("total", "") + title = evt.get("title", "") + srv, uptime = evt.get("server_time_utc", ""), evt.get("server_uptime_sec", "") + write("\n") + write(f"{BOLD}{BLUE}{chr(0x2500)*62}{RESET}\n") + write(f"{DIM}[{n}]{RESET} {BOLD}{BLUE}\u25b6 Phase {ph}/{total}{RESET} \u2014 {title}\n") + write(f" \u23f0 server_time={srv} uptime={uptime}s\n") + write(f"{BOLD}{BLUE}{chr(0x2500)*62}{RESET}\n") + elif t == "subcall_start": + role = evt.get("role", "") + idx, of = evt.get("index", ""), evt.get("of", "") + write(f"\n{DIM} [{n}] [{role} {idx}/{of}] \u2500\u2500\u2500{RESET}\n") + elif t == "subcall_end": + write("\n") + elif t == "phase_end": + ph, total = evt.get("phase", ""), evt.get("total", "") + title = evt.get("title", "") + srv, uptime, dur = evt.get("server_time_utc", ""), evt.get("server_uptime_sec", ""), evt.get("duration_sec", "") + write(f"\n{DIM}[{n}]{RESET} {GREEN}\u2705 Phase {ph}/{total} done{RESET} \u2014 {title}\n") + write(f" \u23f0 server_time={srv} uptime={uptime}s \u23f1 duration={dur}s\n") + elif t == "winding_down": + cause = evt.get("cause", ""); c = evt.get("completed_phases", "") + total = evt.get("total_phases", ""); pend = evt.get("pending_steering_inputs", "") + srv, uptime = evt.get("server_time_utc", ""), evt.get("server_uptime_sec", "") + write(f"\n{DIM}[{n}]{RESET} {BOLD}{MAGENTA}\u2193 Winding down{RESET} cause={cause} completed={c}/{total} pending_steers={pend}\n") + write(f" \u23f0 server_time={srv} uptime={uptime}s\n") + elif t == "cooldown": + # Server is intentionally sleeping (between subcalls or phases). + # Render a single low-key line so the terminal is not silent. + # NOTE: keep Python string literals in this heredoc strictly + # double-quoted. A literal apostrophe ends the surrounding + # bash heredoc and causes a confusing NameError several lines + # later when the truncated script is parsed. + try: + dur_str = f"{float(evt.get(_DSEC, 0)):.0f}" + except (TypeError, ValueError): + dur_str = str(evt.get(_DSEC, "?")) + stage = evt.get("stage", "") + ph = evt.get("phase", "") + total = evt.get("total", "") + sub = evt.get("subcall") + of = evt.get("of") + label = "between phases" if stage == "inter_phase" else "between subcalls" + if stage == "inter_phase": + detail = f"next: phase {ph}/{total}" + elif sub is not None and of is not None: + detail = f"next: subcall {sub}/{of} in phase {ph}/{total}" + else: + detail = f"phase {ph}/{total}" + write(f"{DIM}[{n}] ...cooling down {dur_str}s ({label}) \u2014 {detail}{RESET}\n") + elif t == "run_complete": + total = evt.get("phases_completed", "") + srv, uptime = evt.get("server_time_utc", ""), evt.get("server_uptime_sec", "") + write(f"\n{BOLD}{GREEN}{chr(0x2550)*62}{RESET}\n") + write(f"{DIM}[{n}]{RESET} {BOLD}{GREEN}\u2705 Run complete{RESET} {total} phases \u23f0 {srv} uptime={uptime}s\n") + write(f"{BOLD}{GREEN}{chr(0x2550)*62}{RESET}\n") + elif t == "done": + reason = evt.get("reason") + msg = f" ({reason})" if reason else "" + col = YELLOW if reason else GREEN + write(f"\n{DIM}[{n}]{RESET} {col}\u2550\u2550 Stream done{msg} \u2550\u2550{RESET}\n") + else: + write(f"{DIM}[{n}] [unknown event] {json.dumps(evt)}{RESET}\n") + flush() + +stdin_fd = sys.stdin.fileno() + +try: + pending = b"" + while True: + deadline = last_flush + FLUSH_MS / 1000.0 + timeout = max(0.0, deadline - time.monotonic()) + r, _, _ = select.select([stdin_fd], [], [], timeout) + if r: + try: + chunk = os.read(stdin_fd, 65536) + except OSError: + chunk = b"" + if not chunk: + # EOF — server (or proxy) closed the SSE stream. This is + # the authoritative crash/disconnect signal. + flush_tokens() + break + pending += chunk + # Process complete lines only. + while b"\n" in pending: + line_b, pending = pending.split(b"\n", 1) + line = line_b.decode("utf-8", errors="replace").rstrip("\r") + if not line or line.startswith(":"): + continue + if line.startswith("id:"): + try: + last_event_id = int(line[3:].strip()) + except ValueError: + pass + continue + if not line.startswith("data:"): + continue + payload = line[5:].lstrip() + try: + evt = json.loads(payload) + except json.JSONDecodeError: + continue + t = evt.get("type", "") + if t == "token": + # Hot path: buffer content. Periodic flush + flush on + # non-token gives smooth visual output without + # per-token TTY syscall thrash. + c = evt.get("content") + if isinstance(c, str): + token_buf.append(c) + else: + # Flush any pending tokens BEFORE emitting block event + # so they appear in the right place visually. + flush_tokens() + render_block(evt) + if t in ("done", "run_complete"): + result = "complete" + # Drain any remaining buffered tokens (none if we + # just flushed) and exit. + flush_tokens() + raise StopIteration + else: + # Periodic flush deadline reached with no data. + flush_tokens() + # No watchdog: EOF on stdin (above) is the authoritative + # crash/disconnect signal. The select() timeout just drives + # the periodic token-buffer flush. +except StopIteration: + pass +except KeyboardInterrupt: + flush_tokens() +finally: + if STATE_FILE: + try: + with open(STATE_FILE, "w") as fh: + fh.write(f"LAST_EVENT_ID={last_event_id}\n") + fh.write(f"STREAM_RESULT={result}\n") + except OSError: + pass +' + +# ── SSE reader ─────────────────────────────────────────────────────────────── + +STREAM_RESULT="" # "complete" | "disconnected" | "error" + +stream_sse() { + local url="$1" + STREAM_RESULT="disconnected" + + local state_file + state_file=$(mktemp) + + # Pipe curl directly into the python renderer. EOF on the pipe is + # the authoritative disconnect signal — when curl sees the server + # close the TCP socket it closes its stdout, the renderer sees EOF + # on stdin, and we exit cleanly. No watchdog, no PID juggling. + INITIAL_EVENT_ID="${LAST_EVENT_ID:-0}" \ + STATE_FILE="$state_file" \ + FLUSH_MS="${FLUSH_MS:-50}" \ + bash -c 'curl -sN -X GET \ + -H "Authorization: Bearer '"$TOKEN"'" \ + -H "Accept: text/event-stream" \ + -H "Foundry-Features: HostedAgents=V1Preview" \ + "'"$url"'" | python3 -u -c "$1"' _ "$_PY_RENDERER" + + if [[ -f "$state_file" ]]; then + # shellcheck disable=SC1090 + source "$state_file" + rm -f "$state_file" + fi + save_session +} + +# ── Commands ────────────────────────────────────────────────────────────────── + +cmd_start() { + local topic="${1:-Research the future of quantum computing}" + SESSION_ID="demo-$(uuidgen | tr '[:upper:]' '[:lower:]')" + INV_ID="" + LAST_EVENT_ID="0" + save_session + ensure_token + + echo -e "${GREEN}New session: ${SESSION_ID}${RESET}" + echo -e "${DIM}Topic: ${topic}${RESET}" + + local response + response=$(curl -s -X POST \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -H "Foundry-Features: HostedAgents=V1Preview" \ + -d "{\"message\": \"${topic}\"}" \ + "${ENDPOINT}/invocations?api-version=${API_VERSION}&agent_session_id=${SESSION_ID}") + INV_ID=$(_jq "$response" invocation_id) + SESSION_ID=$(_jq "$response" session_id) + save_session + echo -e "${DIM}Dispatched: invocation_id=${INV_ID}${RESET}" + + echo "" + echo -e "${BOLD}Streaming. ${DIM}Use Ctrl-C to detach; reconnect later with './demo-client.sh stream'.${RESET}" + stream_sse "${ENDPOINT}/invocations/${INV_ID}?api-version=${API_VERSION}" + _report_stream_result +} + +cmd_stream() { + load_session + if [[ -z "${INV_ID:-}" ]]; then + echo -e "${RED}No active session. Run './demo-client.sh start \"\"' first.${RESET}" >&2 + exit 1 + fi + ensure_token + + echo -e "${DIM}Reconnecting to invocation ${INV_ID}${RESET}" + local url="${ENDPOINT}/invocations/${INV_ID}?api-version=${API_VERSION}" + if [[ "${LAST_EVENT_ID:-0}" != "0" ]]; then + url="${url}&last_event_id=${LAST_EVENT_ID}" + echo -e "${DIM}Resuming from event ${LAST_EVENT_ID}${RESET}" + fi + stream_sse "$url" + _report_stream_result +} + +cmd_steer() { + local topic="${1:-}" + if [[ -z "$topic" ]]; then + echo -e "${RED}Usage: ./demo-client.sh steer \"\"${RESET}" >&2 + exit 1 + fi + load_session + if [[ -z "${SESSION_ID:-}" ]]; then + echo -e "${RED}No active session. Run './demo-client.sh start \"\"' first.${RESET}" >&2 + exit 1 + fi + ensure_token + + echo -e "${BOLD}${MAGENTA}Steering session ${SESSION_ID} to: ${topic}${RESET}" + + # Send a fresh POST. Because the task is steerable and an in-progress + # run exists, the framework queues this as a steering input. + local response + response=$(curl -s -X POST \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -H "Foundry-Features: HostedAgents=V1Preview" \ + -d "{\"message\": \"${topic}\"}" \ + "${ENDPOINT}/invocations?api-version=${API_VERSION}&agent_session_id=${SESSION_ID}") + echo -e "${DIM}Response: ${response}${RESET}" + local new_inv + new_inv=$(_jq "$response" invocation_id) + if [[ -n "$new_inv" ]]; then + INV_ID="$new_inv" + LAST_EVENT_ID="0" + save_session + echo -e "${DIM}New invocation: ${INV_ID}. Use './demo-client.sh stream' to attach.${RESET}" + fi +} + +cmd_crash() { + load_session + if [[ -z "${SESSION_ID:-}" ]]; then + echo -e "${RED}No active session. Run './demo-client.sh start \"\"' first.${RESET}" >&2 + exit 1 + fi + ensure_token + + echo -e "${RED}${BOLD}💥 Crashing the agent container...${RESET}" + echo -e "${DIM}Session: ${SESSION_ID}${RESET}" + + # The platform only proxies /invocations* — we use the special + # "crash" sentinel message, which the agent (when DEMO_MODE=1) + # interprets as "exit the process". + local response + response=$(curl -s -X POST \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -H "Foundry-Features: HostedAgents=V1Preview" \ + -d '{"message": "crash"}' \ + "${ENDPOINT}/invocations?api-version=${API_VERSION}&agent_session_id=${SESSION_ID}") + echo -e "${DIM}Response: ${response}${RESET}" + echo "" + echo -e "${YELLOW}The container will exit. The platform's nanny worker brings it back${RESET}" + echo -e "${YELLOW}within ~1 min on its own (no client ingress needed) and the durable${RESET}" + echo -e "${YELLOW}task auto-recovers from its last checkpoint.${RESET}" + echo "" + echo -e "${DIM}Run './demo-client.sh stream' whenever you're ready to reconnect.${RESET}" + echo -e "${DIM}Look for a 'Recovered from crash' marker (uptime resets to ~0).${RESET}" +} + +cmd_cancel() { + load_session + if [[ -z "${INV_ID:-}" ]]; then + echo -e "${RED}No active session. Run './demo-client.sh start \"\"' first.${RESET}" >&2 + exit 1 + fi + ensure_token + + echo -e "${YELLOW}🛑 Cancelling invocation ${INV_ID}${RESET}" + local response + response=$(curl -s -X POST \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -H "Foundry-Features: HostedAgents=V1Preview" \ + -d '{}' \ + "${ENDPOINT}/invocations/${INV_ID}/cancel?api-version=${API_VERSION}") + echo -e "${GREEN}${response}${RESET}" +} + +cmd_status() { + load_session + if [[ -f "$SESSION_FILE" ]]; then + echo -e "${CYAN}Session ID:${RESET} ${SESSION_ID:-}" + echo -e "${CYAN}Invocation ID:${RESET} ${INV_ID:-}" + echo -e "${CYAN}Last event ID:${RESET} ${LAST_EVENT_ID:-0}" + else + echo -e "${DIM}No local session.${RESET}" + fi +} + +cmd_logs() { + load_session + if [[ -z "${SESSION_ID:-}" ]]; then + echo -e "${RED}No active session. Run './demo-client.sh start \"\"' first.${RESET}" >&2 + exit 1 + fi + echo -e "${DIM}Streaming container stdout/stderr for session ${SESSION_ID}${RESET}" + azd ai agent monitor --session-id "${SESSION_ID}" --follow +} + +cmd_reset() { + rm -f "$SESSION_FILE" + echo -e "${GREEN}Session cleared.${RESET}" +} + +_report_stream_result() { + case "$STREAM_RESULT" in + complete) + ;; + disconnected) + echo "" + echo -e "${YELLOW}── Stream disconnected ──${RESET}" + echo -e "${DIM}The agent may still be running on the server.${RESET}" + echo -e "${DIM}Reconnect with: ./demo-client.sh stream${RESET}" + ;; + error) + echo -e "${RED}── Stream error ──${RESET}" ;; + esac +} + +# ── Main ────────────────────────────────────────────────────────────────────── + +usage() { + cat <"${RESET} Dispatch a fresh research run and stream it + ${BOLD}stream${RESET} Reconnect to the active run (resumes from last_event_id) + ${BOLD}steer ""${RESET} Queue a steering input — agent winds down at next + checkpoint and starts fresh on the new topic + ${BOLD}crash${RESET} Kill the container (POST /invocations with message="crash"; + requires DEMO_MODE=1 on the server image) + ${BOLD}cancel${RESET} Cooperative cancel of the active run + ${BOLD}status${RESET} Show local session info + ${BOLD}logs${RESET} Stream container stdout/stderr (azd ai agent monitor) + ${BOLD}reset${RESET} Clear local session state + +Three-terminal workflow: + Terminal 1: ./demo-client.sh start "quantum computing" # streams ~33 min of phases + Terminal 2: ./demo-client.sh logs # peek at server logs + Terminal 3: ./demo-client.sh crash # any time → nanny restores ~1 min later + ./demo-client.sh steer "fusion energy" # mid-run pivot +EOF +} + +case "${1:-}" in + start) shift; cmd_start "${1:-}" ;; + stream) cmd_stream ;; + steer) shift; cmd_steer "${1:-}" ;; + crash) cmd_crash ;; + cancel) cmd_cancel ;; + status) cmd_status ;; + logs) cmd_logs ;; + reset) cmd_reset ;; + *) usage ;; +esac diff --git a/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/infra/abbreviations.json b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/infra/abbreviations.json new file mode 100644 index 000000000000..879b2a9507b1 --- /dev/null +++ b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/infra/abbreviations.json @@ -0,0 +1,137 @@ +{ + "aiFoundryAccounts": "aif", + "analysisServicesServers": "as", + "apiManagementService": "apim-", + "appConfigurationStores": "appcs-", + "appManagedEnvironments": "cae-", + "appContainerApps": "ca-", + "authorizationPolicyDefinitions": "policy-", + "automationAutomationAccounts": "aa-", + "blueprintBlueprints": "bp-", + "blueprintBlueprintsArtifacts": "bpa-", + "cacheRedis": "redis-", + "cdnProfiles": "cdnp-", + "cdnProfilesEndpoints": "cdne-", + "cognitiveServicesAccounts": "cog-", + "cognitiveServicesFormRecognizer": "cog-fr-", + "cognitiveServicesTextAnalytics": "cog-ta-", + "computeAvailabilitySets": "avail-", + "computeCloudServices": "cld-", + "computeDiskEncryptionSets": "des", + "computeDisks": "disk", + "computeDisksOs": "osdisk", + "computeGalleries": "gal", + "computeSnapshots": "snap-", + "computeVirtualMachines": "vm", + "computeVirtualMachineScaleSets": "vmss-", + "containerInstanceContainerGroups": "ci", + "containerRegistryRegistries": "cr", + "containerServiceManagedClusters": "aks-", + "databricksWorkspaces": "dbw-", + "dataFactoryFactories": "adf-", + "dataLakeAnalyticsAccounts": "dla", + "dataLakeStoreAccounts": "dls", + "dataMigrationServices": "dms-", + "dBforMySQLServers": "mysql-", + "dBforPostgreSQLServers": "psql-", + "devicesIotHubs": "iot-", + "devicesProvisioningServices": "provs-", + "devicesProvisioningServicesCertificates": "pcert-", + "documentDBDatabaseAccounts": "cosmos-", + "documentDBMongoDatabaseAccounts": "cosmon-", + "eventGridDomains": "evgd-", + "eventGridDomainsTopics": "evgt-", + "eventGridEventSubscriptions": "evgs-", + "eventHubNamespaces": "evhns-", + "eventHubNamespacesEventHubs": "evh-", + "hdInsightClustersHadoop": "hadoop-", + "hdInsightClustersHbase": "hbase-", + "hdInsightClustersKafka": "kafka-", + "hdInsightClustersMl": "mls-", + "hdInsightClustersSpark": "spark-", + "hdInsightClustersStorm": "storm-", + "hybridComputeMachines": "arcs-", + "insightsActionGroups": "ag-", + "insightsComponents": "appi-", + "keyVaultVaults": "kv-", + "kubernetesConnectedClusters": "arck", + "kustoClusters": "dec", + "kustoClustersDatabases": "dedb", + "logicIntegrationAccounts": "ia-", + "logicWorkflows": "logic-", + "machineLearningServicesWorkspaces": "mlw-", + "managedIdentityUserAssignedIdentities": "id-", + "managementManagementGroups": "mg-", + "migrateAssessmentProjects": "migr-", + "networkApplicationGateways": "agw-", + "networkApplicationSecurityGroups": "asg-", + "networkAzureFirewalls": "afw-", + "networkBastionHosts": "bas-", + "networkConnections": "con-", + "networkDnsZones": "dnsz-", + "networkExpressRouteCircuits": "erc-", + "networkFirewallPolicies": "afwp-", + "networkFirewallPoliciesWebApplication": "waf", + "networkFirewallPoliciesRuleGroups": "wafrg", + "networkFrontDoors": "fd-", + "networkFrontdoorWebApplicationFirewallPolicies": "fdfp-", + "networkLoadBalancersExternal": "lbe-", + "networkLoadBalancersInternal": "lbi-", + "networkLoadBalancersInboundNatRules": "rule-", + "networkLocalNetworkGateways": "lgw-", + "networkNatGateways": "ng-", + "networkNetworkInterfaces": "nic-", + "networkNetworkSecurityGroups": "nsg-", + "networkNetworkSecurityGroupsSecurityRules": "nsgsr-", + "networkNetworkWatchers": "nw-", + "networkPrivateDnsZones": "pdnsz-", + "networkPrivateLinkServices": "pl-", + "networkPublicIPAddresses": "pip-", + "networkPublicIPPrefixes": "ippre-", + "networkRouteFilters": "rf-", + "networkRouteTables": "rt-", + "networkRouteTablesRoutes": "udr-", + "networkTrafficManagerProfiles": "traf-", + "networkVirtualNetworkGateways": "vgw-", + "networkVirtualNetworks": "vnet-", + "networkVirtualNetworksSubnets": "snet-", + "networkVirtualNetworksVirtualNetworkPeerings": "peer-", + "networkVirtualWans": "vwan-", + "networkVpnGateways": "vpng-", + "networkVpnGatewaysVpnConnections": "vcn-", + "networkVpnGatewaysVpnSites": "vst-", + "notificationHubsNamespaces": "ntfns-", + "notificationHubsNamespacesNotificationHubs": "ntf-", + "operationalInsightsWorkspaces": "log-", + "portalDashboards": "dash-", + "powerBIDedicatedCapacities": "pbi-", + "purviewAccounts": "pview-", + "recoveryServicesVaults": "rsv-", + "resourcesResourceGroups": "rg-", + "searchSearchServices": "srch-", + "serviceBusNamespaces": "sb-", + "serviceBusNamespacesQueues": "sbq-", + "serviceBusNamespacesTopics": "sbt-", + "serviceEndPointPolicies": "se-", + "serviceFabricClusters": "sf-", + "signalRServiceSignalR": "sigr", + "sqlManagedInstances": "sqlmi-", + "sqlServers": "sql-", + "sqlServersDataWarehouse": "sqldw-", + "sqlServersDatabases": "sqldb-", + "sqlServersDatabasesStretch": "sqlstrdb-", + "storageStorageAccounts": "st", + "storageStorageAccountsVm": "stvm", + "storSimpleManagers": "ssimp", + "streamAnalyticsCluster": "asa-", + "synapseWorkspaces": "syn", + "synapseWorkspacesAnalyticsWorkspaces": "synw", + "synapseWorkspacesSqlPoolsDedicated": "syndp", + "synapseWorkspacesSqlPoolsSpark": "synsp", + "timeSeriesInsightsEnvironments": "tsi-", + "webServerFarms": "plan-", + "webSitesAppService": "app-", + "webSitesAppServiceEnvironment": "ase-", + "webSitesFunctions": "func-", + "webStaticSites": "stapp-" +} diff --git a/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/infra/core/ai/acr-role-assignment.bicep b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/infra/core/ai/acr-role-assignment.bicep new file mode 100644 index 000000000000..3e0c2b218be7 --- /dev/null +++ b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/infra/core/ai/acr-role-assignment.bicep @@ -0,0 +1,27 @@ +targetScope = 'resourceGroup' + +@description('Name of the existing container registry') +param acrName string + +@description('Principal ID to grant AcrPull role') +param principalId string + +@description('Full resource ID of the ACR (for generating unique GUID)') +param acrResourceId string + +// Reference the existing ACR in this resource group +resource acr 'Microsoft.ContainerRegistry/registries@2023-07-01' existing = { + name: acrName +} + +// Grant AcrPull role to the AI project's managed identity +resource acrPullRoleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = { + scope: acr + name: guid(acrResourceId, principalId, '7f951dda-4ed3-4680-a7ca-43fe172d538d') + properties: { + principalId: principalId + principalType: 'ServicePrincipal' + // AcrPull role + roleDefinitionId: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', '7f951dda-4ed3-4680-a7ca-43fe172d538d') + } +} diff --git a/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/infra/core/ai/ai-project.bicep b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/infra/core/ai/ai-project.bicep new file mode 100644 index 000000000000..662b53c001c8 --- /dev/null +++ b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/infra/core/ai/ai-project.bicep @@ -0,0 +1,413 @@ +targetScope = 'resourceGroup' + +@description('Tags that will be applied to all resources') +param tags object = {} + +@description('Main location for the resources') +param location string + +var resourceToken = uniqueString(subscription().id, resourceGroup().id, location) + +@description('Name of the project') +param aiFoundryProjectName string + +param deployments deploymentsType + +@description('Id of the user or app to assign application roles') +param principalId string + +@description('Principal type of user or app') +param principalType string + +@description('Optional. Name of an existing AI Services account in the current resource group. If not provided, a new one will be created.') +param existingAiAccountName string = '' + +@description('List of connections to provision') +param connections array = [] + +@secure() +@description('Map of connection name to credentials object. Kept as @secure to prevent secrets from appearing in deployment logs. Example: { "my-conn": { "key": "secret" } }') +param connectionCredentials object = {} + +@description('Also provision dependent resources and connect to the project') +param additionalDependentResources dependentResourcesType + +@description('Enable monitoring via appinsights and log analytics') +param enableMonitoring bool = true + +@description('Enable hosted agent deployment') +param enableHostedAgents bool = false + +@description('Enable the capability host for agent conversations. When false and hosted agents are enabled, the capability host is not created (v2 hosted agents handle storage automatically).') +param enableCapabilityHost bool = true + +@description('Optional. Existing container registry resource ID. If provided, a connection will be created to this ACR instead of creating a new one.') +param existingContainerRegistryResourceId string = '' + +@description('Optional. Existing container registry login server (e.g., myregistry.azurecr.io). Required if existingContainerRegistryResourceId is provided.') +param existingContainerRegistryEndpoint string = '' + +@description('Optional. Name of an existing ACR connection on the Foundry project. If provided, no new ACR or connection will be created.') +param existingAcrConnectionName string = '' + +@description('Optional. Existing Application Insights connection string. If provided, a connection will be created but no new App Insights resource.') +param existingApplicationInsightsConnectionString string = '' + +@description('Optional. Existing Application Insights resource ID. Used for connection metadata when providing an existing App Insights.') +param existingApplicationInsightsResourceId string = '' + +@description('Optional. Name of an existing Application Insights connection on the Foundry project. If provided, no new App Insights or connection will be created.') +param existingAppInsightsConnectionName string = '' + +// Load abbreviations +var abbrs = loadJsonContent('../../abbreviations.json') + +// Determine which resources to create based on connections +var hasStorageConnection = length(filter(additionalDependentResources, conn => conn.resource == 'storage')) > 0 +var hasAcrConnection = length(filter(additionalDependentResources, conn => conn.resource == 'registry')) > 0 +var hasExistingAcr = !empty(existingContainerRegistryResourceId) +var hasExistingAcrConnection = !empty(existingAcrConnectionName) +var hasExistingAppInsightsConnection = !empty(existingAppInsightsConnectionName) +var hasExistingAppInsightsConnectionString = !empty(existingApplicationInsightsConnectionString) +// Only create new App Insights resources if monitoring enabled and no existing connection/connection string +var shouldCreateAppInsights = enableMonitoring && !hasExistingAppInsightsConnection && !hasExistingAppInsightsConnectionString +var hasSearchConnection = length(filter(additionalDependentResources, conn => conn.resource == 'azure_ai_search')) > 0 +var hasBingConnection = length(filter(additionalDependentResources, conn => conn.resource == 'bing_grounding')) > 0 +var hasBingCustomConnection = length(filter(additionalDependentResources, conn => conn.resource == 'bing_custom_grounding')) > 0 + +// Extract connection names from ai.yaml for each resource type +var storageConnectionName = hasStorageConnection ? filter(additionalDependentResources, conn => conn.resource == 'storage')[0].connectionName : '' +var acrConnectionName = hasAcrConnection ? filter(additionalDependentResources, conn => conn.resource == 'registry')[0].connectionName : '' +var searchConnectionName = hasSearchConnection ? filter(additionalDependentResources, conn => conn.resource == 'azure_ai_search')[0].connectionName : '' +var bingConnectionName = hasBingConnection ? filter(additionalDependentResources, conn => conn.resource == 'bing_grounding')[0].connectionName : '' +var bingCustomConnectionName = hasBingCustomConnection ? filter(additionalDependentResources, conn => conn.resource == 'bing_custom_grounding')[0].connectionName : '' + +// Enable monitoring via Log Analytics and Application Insights +module logAnalytics '../monitor/loganalytics.bicep' = if (shouldCreateAppInsights) { + name: 'logAnalytics' + params: { + location: location + tags: tags + name: 'logs-${resourceToken}' + } +} + +module applicationInsights '../monitor/applicationinsights.bicep' = if (shouldCreateAppInsights) { + name: 'applicationInsights' + params: { + location: location + tags: tags + name: 'appi-${resourceToken}' + logAnalyticsWorkspaceId: logAnalytics.outputs.id + projectMIPrincipalId: aiAccount::project.identity.principalId + } +} + +// Always create a new AI Account for now (simplified approach) +// TODO: Add support for existing accounts in a future version +resource aiAccount 'Microsoft.CognitiveServices/accounts@2025-06-01' = { + name: !empty(existingAiAccountName) ? existingAiAccountName : 'ai-account-${resourceToken}' + location: location + tags: tags + sku: { + name: 'S0' + } + kind: 'AIServices' + identity: { + type: 'SystemAssigned' + } + properties: { + allowProjectManagement: true + customSubDomainName: !empty(existingAiAccountName) ? existingAiAccountName : 'ai-account-${resourceToken}' + networkAcls: { + defaultAction: 'Allow' + virtualNetworkRules: [] + ipRules: [] + } + publicNetworkAccess: 'Enabled' + disableLocalAuth: true + } + + @batchSize(1) + resource seqDeployments 'deployments' = [ + for dep in (deployments??[]): { + name: dep.name + properties: { + model: dep.model + } + sku: dep.sku + } + ] + + resource project 'projects' = { + name: aiFoundryProjectName + location: location + identity: { + type: 'SystemAssigned' + } + properties: { + description: '${aiFoundryProjectName} Project' + displayName: '${aiFoundryProjectName}Project' + } + dependsOn: [ + seqDeployments + ] + } + + resource aiFoundryAccountCapabilityHost 'capabilityHosts@2025-10-01-preview' = if (enableHostedAgents && enableCapabilityHost) { + name: 'agents' + properties: { + capabilityHostKind: 'Agents' + // IMPORTANT: this is required to enable hosted agents deployment + // if no BYO Net is provided + enablePublicHostingEnvironment: true + } + } +} + + +// Create connection towards appinsights: +// - when we create a new App Insights resource, OR +// - when the user provided an existing App Insights connection string + resource ID but no existing connection name +// Both cases are merged into a single resource to avoid duplicate ARM resource definitions (which fail deployment). +var shouldCreateExistingAppInsightsConnection = enableMonitoring && hasExistingAppInsightsConnectionString && !hasExistingAppInsightsConnection && !empty(existingApplicationInsightsResourceId) +var shouldCreateAppInsightsConnection = shouldCreateAppInsights || shouldCreateExistingAppInsightsConnection + +resource appInsightConnection 'Microsoft.CognitiveServices/accounts/projects/connections@2025-04-01-preview' = if (shouldCreateAppInsightsConnection) { + parent: aiAccount::project + name: 'appi-${resourceToken}' + properties: { + category: 'AppInsights' + target: shouldCreateAppInsights ? applicationInsights.outputs.id : existingApplicationInsightsResourceId + authType: 'ApiKey' + isSharedToAll: true + credentials: { + key: shouldCreateAppInsights ? applicationInsights.outputs.connectionString : existingApplicationInsightsConnectionString + } + metadata: { + ApiType: 'Azure' + ResourceId: shouldCreateAppInsights ? applicationInsights.outputs.id : existingApplicationInsightsResourceId + } + } +} + +// Create additional connections from ai.yaml configuration +module aiConnections './connection.bicep' = [for (connection, index) in connections: { + name: 'connection-${connection.name}' + params: { + aiServicesAccountName: aiAccount.name + aiProjectName: aiAccount::project.name + connectionConfig: connection + credentials: connectionCredentials[?connection.name] ?? {} + } +}] + +// Azure AI User for the developer, scoped to the Foundry Project. +// Project scope is sufficient for creating/running agents and calling models via the project endpoint. +resource localUserAzureAIUserRoleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = { + scope: aiAccount::project + name: guid(subscription().id, resourceGroup().id, principalId, '53ca6127-db72-4b80-b1b0-d745d6d5456d') + properties: { + principalId: principalId + principalType: principalType + roleDefinitionId: resourceId('Microsoft.Authorization/roleDefinitions', '53ca6127-db72-4b80-b1b0-d745d6d5456d') + } +} + + +// All connections are now created directly within their respective resource modules +// using the centralized ./connection.bicep module + +// Storage module - deploy if storage connection is defined in ai.yaml +module storage '../storage/storage.bicep' = if (hasStorageConnection) { + name: 'storage' + params: { + location: location + tags: tags + resourceName: 'st${resourceToken}' + connectionName: storageConnectionName + principalId: principalId + principalType: principalType + aiServicesAccountName: aiAccount.name + aiProjectName: aiAccount::project.name + } +} + +// Azure Container Registry module - deploy if ACR connection is defined in ai.yaml +module acr '../host/acr.bicep' = if (hasAcrConnection) { + name: 'acr' + params: { + location: location + tags: tags + resourceName: '${abbrs.containerRegistryRegistries}${resourceToken}' + connectionName: acrConnectionName + principalId: principalId + principalType: principalType + aiServicesAccountName: aiAccount.name + aiProjectName: aiAccount::project.name + } +} + +// Connection for existing ACR - create if user provided an existing ACR resource ID but no existing connection +module existingAcrConnection './connection.bicep' = if (hasExistingAcr && !hasExistingAcrConnection) { + name: 'existing-acr-connection' + params: { + aiServicesAccountName: aiAccount.name + aiProjectName: aiAccount::project.name + connectionConfig: { + name: 'acr-${resourceToken}' + category: 'ContainerRegistry' + target: existingContainerRegistryEndpoint + authType: 'ManagedIdentity' + isSharedToAll: true + metadata: { + ResourceId: existingContainerRegistryResourceId + } + } + credentials: { + clientId: aiAccount::project.identity.principalId + resourceId: existingContainerRegistryResourceId + } + } +} + +// Extract resource group name from the existing ACR resource ID +// Resource ID format: /subscriptions/{sub}/resourceGroups/{rg}/providers/Microsoft.ContainerRegistry/registries/{name} +var existingAcrResourceGroup = hasExistingAcr ? split(existingContainerRegistryResourceId, '/')[4] : '' +var existingAcrName = hasExistingAcr ? last(split(existingContainerRegistryResourceId, '/')) : '' + +// Grant AcrPull role to the AI project's managed identity on the existing ACR +// This allows the hosted agents to pull images from the user-provided registry +// Note: User must have permission to assign roles on the existing ACR (Owner or User Access Administrator) +// Using a module allows scoping to a different resource group if the ACR isn't in the same RG +// Skip if connection already exists (role assignment should already be in place) +module existingAcrRoleAssignment './acr-role-assignment.bicep' = if (hasExistingAcr && !hasExistingAcrConnection) { + name: 'existing-acr-role-assignment' + scope: resourceGroup(existingAcrResourceGroup) + params: { + acrName: existingAcrName + acrResourceId: existingContainerRegistryResourceId + principalId: aiAccount::project.identity.principalId + } +} + +// Bing Search grounding module - deploy if Bing connection is defined in ai.yaml or parameter is enabled +module bingGrounding '../search/bing_grounding.bicep' = if (hasBingConnection) { + name: 'bing-grounding' + params: { + tags: tags + resourceName: 'bing-${resourceToken}' + connectionName: bingConnectionName + aiServicesAccountName: aiAccount.name + aiProjectName: aiAccount::project.name + } +} + +// Bing Custom Search grounding module - deploy if custom Bing connection is defined in ai.yaml or parameter is enabled +module bingCustomGrounding '../search/bing_custom_grounding.bicep' = if (hasBingCustomConnection) { + name: 'bing-custom-grounding' + params: { + tags: tags + resourceName: 'bingcustom-${resourceToken}' + connectionName: bingCustomConnectionName + aiServicesAccountName: aiAccount.name + aiProjectName: aiAccount::project.name + } +} + +// Azure AI Search module - deploy if search connection is defined in ai.yaml +module azureAiSearch '../search/azure_ai_search.bicep' = if (hasSearchConnection) { + name: 'azure-ai-search' + params: { + tags: tags + resourceName: 'search-${resourceToken}' + connectionName: searchConnectionName + storageAccountResourceId: hasStorageConnection ? storage!.outputs.storageAccountId : '' + containerName: 'knowledge' + aiServicesAccountName: aiAccount.name + aiProjectName: aiAccount::project.name + principalId: principalId + principalType: principalType + location: location + } +} + +// Outputs +output AZURE_AI_PROJECT_ENDPOINT string = aiAccount::project.properties.endpoints['AI Foundry API'] +output AZURE_OPENAI_ENDPOINT string = aiAccount.properties.endpoints['OpenAI Language Model Instance API'] +output aiServicesEndpoint string = aiAccount.properties.endpoint +output accountId string = aiAccount.id +output projectId string = aiAccount::project.id +output aiServicesAccountName string = aiAccount.name +output aiServicesProjectName string = aiAccount::project.name +output aiServicesPrincipalId string = aiAccount.identity.principalId +output projectName string = aiAccount::project.name +output APPLICATIONINSIGHTS_CONNECTION_STRING string = shouldCreateAppInsights ? applicationInsights.outputs.connectionString : (hasExistingAppInsightsConnectionString ? existingApplicationInsightsConnectionString : '') +output APPLICATIONINSIGHTS_RESOURCE_ID string = shouldCreateAppInsights ? applicationInsights.outputs.id : (hasExistingAppInsightsConnectionString ? existingApplicationInsightsResourceId : '') + +// Connection outputs from the connections array +output connectionIds array = [for (connection, index) in (connections ?? []): { + name: aiConnections[index].outputs.connectionName + id: aiConnections[index].outputs.connectionId +}] + +// Grouped dependent resources outputs +output dependentResources object = { + registry: { + name: hasAcrConnection ? acr!.outputs.containerRegistryName : '' + loginServer: hasAcrConnection ? acr!.outputs.containerRegistryLoginServer : ((hasExistingAcr || hasExistingAcrConnection) ? existingContainerRegistryEndpoint : '') + connectionName: hasAcrConnection ? acr!.outputs.containerRegistryConnectionName : (hasExistingAcrConnection ? existingAcrConnectionName : (hasExistingAcr ? 'acr-${resourceToken}' : '')) + } + bing_grounding: { + name: (hasBingConnection) ? bingGrounding!.outputs.bingGroundingName : '' + connectionName: (hasBingConnection) ? bingGrounding!.outputs.bingGroundingConnectionName : '' + connectionId: (hasBingConnection) ? bingGrounding!.outputs.bingGroundingConnectionId : '' + } + bing_custom_grounding: { + name: (hasBingCustomConnection) ? bingCustomGrounding!.outputs.bingCustomGroundingName : '' + connectionName: (hasBingCustomConnection) ? bingCustomGrounding!.outputs.bingCustomGroundingConnectionName : '' + connectionId: (hasBingCustomConnection) ? bingCustomGrounding!.outputs.bingCustomGroundingConnectionId : '' + } + search: { + serviceName: hasSearchConnection ? azureAiSearch!.outputs.searchServiceName : '' + connectionName: hasSearchConnection ? azureAiSearch!.outputs.searchConnectionName : '' + } + storage: { + accountName: hasStorageConnection ? storage!.outputs.storageAccountName : '' + connectionName: hasStorageConnection ? storage!.outputs.storageConnectionName : '' + } +} + +type deploymentsType = { + @description('Specify the name of cognitive service account deployment.') + name: string + + @description('Required. Properties of Cognitive Services account deployment model.') + model: { + @description('Required. The name of Cognitive Services account deployment model.') + name: string + + @description('Required. The format of Cognitive Services account deployment model.') + format: string + + @description('Required. The version of Cognitive Services account deployment model.') + version: string + } + + @description('The resource model definition representing SKU.') + sku: { + @description('Required. The name of the resource model definition representing SKU.') + name: string + + @description('The capacity of the resource model definition representing SKU.') + capacity: int + } +}[]? + +type dependentResourcesType = { + @description('The type of dependent resource to create') + resource: 'storage' | 'registry' | 'azure_ai_search' | 'bing_grounding' | 'bing_custom_grounding' + + @description('The connection name for this resource') + connectionName: string +}[] diff --git a/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/infra/core/ai/connection.bicep b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/infra/core/ai/connection.bicep new file mode 100644 index 000000000000..a08726645243 --- /dev/null +++ b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/infra/core/ai/connection.bicep @@ -0,0 +1,112 @@ +targetScope = 'resourceGroup' + +@description('AI Services account name') +param aiServicesAccountName string + +@description('AI project name') +param aiProjectName string + +// Connection configuration type definition +type ConnectionConfig = { + @description('Name of the connection') + name: string + + @description('Category of the connection (e.g., ContainerRegistry, AzureStorageAccount, CognitiveSearch, AzureOpenAI)') + category: string + + @description('Target endpoint or URL for the connection') + target: string + + @description('Authentication type') + authType: 'AAD' | 'AccessKey' | 'AccountKey' | 'AgenticIdentity' | 'ApiKey' | 'CustomKeys' | 'ManagedIdentity' | 'None' | 'OAuth2' | 'PAT' | 'SAS' | 'ServicePrincipal' | 'UsernamePassword' | 'UserEntraToken' | 'ProjectManagedIdentity' + + @description('Whether the connection is shared to all users (optional, defaults to true)') + isSharedToAll: bool? + + @description('Additional metadata for the connection (optional)') + metadata: object? + + @description('Error message if the connection fails (optional)') + error: string? + + @description('Expiry time for the connection (optional)') + expiryTime: string? + + @description('Private endpoint requirement: Required, NotRequired, or NotApplicable (optional)') + peRequirement: ('NotApplicable' | 'NotRequired' | 'Required')? + + @description('Private endpoint status: Active, Inactive, or NotApplicable (optional)') + peStatus: ('Active' | 'Inactive' | 'NotApplicable')? + + @description('List of users to share the connection with (optional, alternative to isSharedToAll)') + sharedUserList: string[]? + + @description('Whether to use workspace managed identity (optional)') + useWorkspaceManagedIdentity: bool? + + @description('OAuth2 authorization endpoint URL (optional, OAuth2 authType only)') + authorizationUrl: string? + + @description('OAuth2 token endpoint URL (optional, OAuth2 authType only)') + tokenUrl: string? + + @description('OAuth2 refresh token endpoint URL (optional, OAuth2 authType only)') + refreshUrl: string? + + @description('OAuth2 scopes to request (optional, OAuth2 authType only)') + scopes: string[]? + + @description('Token audience for UserEntraToken / AgenticIdentity auth types (optional)') + audience: string? + + @description('Managed connector name for OAuth2 managed connectors (optional)') + connectorName: string? +} + +@description('Connection configuration') +param connectionConfig ConnectionConfig + +@secure() +@description('Credentials for the connection. Kept as a separate @secure parameter to prevent secrets from appearing in deployment logs. Shape depends on authType — e.g. { key: "..." } for ApiKey, { clientId: "...", clientSecret: "..." } for OAuth2/ServicePrincipal.') +param credentials object = {} + + +// Get reference to the AI Services account and project +resource aiAccount 'Microsoft.CognitiveServices/accounts@2025-04-01-preview' existing = { + name: aiServicesAccountName + + resource project 'projects' existing = { + name: aiProjectName + } +} + +// Create the connection +resource connection 'Microsoft.CognitiveServices/accounts/projects/connections@2025-04-01-preview' = { + parent: aiAccount::project + name: connectionConfig.name + properties: { + category: connectionConfig.category + target: connectionConfig.target + authType: connectionConfig.authType + isSharedToAll: connectionConfig.?isSharedToAll ?? true + credentials: !empty(credentials) ? credentials : null + metadata: connectionConfig.?metadata + // Only include if they appear in the connectionConfig + ...connectionConfig.?error != null ? { error: connectionConfig.?error } : {} + ...connectionConfig.?expiryTime != null ? { expiryTime: connectionConfig.?expiryTime } : {} + ...connectionConfig.?peRequirement != null ? { peRequirement: connectionConfig.?peRequirement } : {} + ...connectionConfig.?peStatus != null ? { peStatus: connectionConfig.?peStatus } : {} + ...connectionConfig.?sharedUserList != null ? { sharedUserList: connectionConfig.?sharedUserList } : {} + ...connectionConfig.?useWorkspaceManagedIdentity != null ? { useWorkspaceManagedIdentity: connectionConfig.?useWorkspaceManagedIdentity } : {} + ...connectionConfig.?authorizationUrl != null ? { authorizationUrl: connectionConfig.?authorizationUrl } : {} + ...connectionConfig.?tokenUrl != null ? { tokenUrl: connectionConfig.?tokenUrl } : {} + ...connectionConfig.?refreshUrl != null ? { refreshUrl: connectionConfig.?refreshUrl } : {} + ...connectionConfig.?scopes != null ? { scopes: connectionConfig.?scopes } : {} + ...connectionConfig.?audience != null ? { audience: connectionConfig.?audience } : {} + ...connectionConfig.?connectorName != null ? { connectorName: connectionConfig.?connectorName } : {} + } +} + +// Outputs +output connectionName string = connection.name +output connectionId string = connection.id diff --git a/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/infra/core/ai/existing-ai-project.bicep b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/infra/core/ai/existing-ai-project.bicep new file mode 100644 index 000000000000..fea2782fdfa5 --- /dev/null +++ b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/infra/core/ai/existing-ai-project.bicep @@ -0,0 +1,96 @@ +targetScope = 'resourceGroup' + +@description('Name of the existing AI Services account') +param aiServicesAccountName string + +@description('Name of the existing AI Foundry project') +param aiFoundryProjectName string + +@description('Existing ACR connection name (already set in the environment)') +param existingAcrConnectionName string = '' + +@description('Existing container registry endpoint (already set in the environment)') +param existingContainerRegistryEndpoint string = '' + +@description('Existing Application Insights connection string (already set in the environment)') +param existingApplicationInsightsConnectionString string = '' + +@description('Existing Application Insights resource ID (already set in the environment)') +param existingApplicationInsightsResourceId string = '' + +@description('List of connections to provision on the existing project') +param connections array = [] + +@secure() +@description('Map of connection name to credentials object. Kept as @secure to prevent secrets from appearing in deployment logs. Example: { "my-conn": { "key": "secret" } }') +param connectionCredentials object = {} + +// Reference the existing account and project — read-only except for the +// additional connections provisioned below from the agent manifest. +resource aiAccount 'Microsoft.CognitiveServices/accounts@2025-06-01' existing = { + name: aiServicesAccountName + + resource project 'projects' existing = { + name: aiFoundryProjectName + } +} + +// Create additional connections from ai.yaml / agent manifest configuration on +// the existing project. Mirrors the loop in ai-project.bicep so manifest-declared +// connections are provisioned regardless of whether the project itself is new or +// pre-existing. +module aiConnections './connection.bicep' = [for (connection, index) in connections: { + name: 'existing-connection-${connection.name}' + params: { + aiServicesAccountName: aiAccount.name + aiProjectName: aiAccount::project.name + connectionConfig: connection + credentials: connectionCredentials[?connection.name] ?? {} + } +}] + +// Outputs — same shape as ai-project.bicep so main.bicep can use either interchangeably +output AZURE_AI_PROJECT_ENDPOINT string = aiAccount::project.properties.endpoints['AI Foundry API'] +output AZURE_OPENAI_ENDPOINT string = aiAccount.properties.endpoints['OpenAI Language Model Instance API'] +output aiServicesEndpoint string = aiAccount.properties.endpoint +output accountId string = aiAccount.id +output projectId string = aiAccount::project.id +output aiServicesAccountName string = aiAccount.name +output aiServicesProjectName string = aiAccount::project.name +output aiServicesPrincipalId string = aiAccount.identity.principalId +output projectName string = aiAccount::project.name +output APPLICATIONINSIGHTS_CONNECTION_STRING string = existingApplicationInsightsConnectionString +output APPLICATIONINSIGHTS_RESOURCE_ID string = existingApplicationInsightsResourceId + +// Empty connection outputs — these are already set in the azd environment from init +// Connection outputs from the connections array (provisioned above) +output connectionIds array = [for (connection, index) in (connections ?? []): { + name: aiConnections[index].outputs.connectionName + id: aiConnections[index].outputs.connectionId +}] + +output dependentResources object = { + registry: { + name: '' + loginServer: existingContainerRegistryEndpoint + connectionName: existingAcrConnectionName + } + bing_grounding: { + name: '' + connectionName: '' + connectionId: '' + } + bing_custom_grounding: { + name: '' + connectionName: '' + connectionId: '' + } + search: { + serviceName: '' + connectionName: '' + } + storage: { + accountName: '' + connectionName: '' + } +} diff --git a/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/infra/core/host/acr.bicep b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/infra/core/host/acr.bicep new file mode 100644 index 000000000000..f1893d8ff312 --- /dev/null +++ b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/infra/core/host/acr.bicep @@ -0,0 +1,88 @@ +targetScope = 'resourceGroup' + +@description('The location used for all deployed resources') +param location string = resourceGroup().location + +@description('Tags that will be applied to all resources') +param tags object = {} + +@description('Resource name for the container registry') +param resourceName string + +@description('Id of the user or app to assign application roles') +param principalId string + +@description('Principal type of user or app') +param principalType string + +@description('AI Services account name for the project parent') +param aiServicesAccountName string = '' + +@description('AI project name for creating the connection') +param aiProjectName string = '' + +@description('Name for the AI Foundry ACR connection') +param connectionName string + +// Get reference to the AI Services account and project to access their managed identities +resource aiAccount 'Microsoft.CognitiveServices/accounts@2025-04-01-preview' existing = if (!empty(aiServicesAccountName) && !empty(aiProjectName)) { + name: aiServicesAccountName + + resource aiProject 'projects' existing = { + name: aiProjectName + } +} + +// Create the Container Registry +module containerRegistry 'br/public:avm/res/container-registry/registry:0.1.1' = { + name: 'registry' + params: { + name: resourceName + location: location + tags: tags + publicNetworkAccess: 'Enabled' + roleAssignments:[ + { + principalId: principalId + principalType: principalType + // Container Registry Tasks Contributor — build images with ACR tasks and push container images + roleDefinitionIdOrName: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', 'fb382eab-e894-4461-af04-94435c366c3f') + } + // TODO SEPARATELY + { + // the foundry project itself can pull from the ACR + principalId: aiAccount::aiProject.identity.principalId + principalType: 'ServicePrincipal' + roleDefinitionIdOrName: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', '7f951dda-4ed3-4680-a7ca-43fe172d538d') + } + ] + } +} + +// Create the ACR connection using the centralized connection module +module acrConnection '../ai/connection.bicep' = if (!empty(aiServicesAccountName) && !empty(aiProjectName)) { + name: 'acr-connection-creation' + params: { + aiServicesAccountName: aiServicesAccountName + aiProjectName: aiProjectName + connectionConfig: { + name: connectionName + category: 'ContainerRegistry' + target: containerRegistry.outputs.loginServer + authType: 'ManagedIdentity' + isSharedToAll: true + metadata: { + ResourceId: containerRegistry.outputs.resourceId + } + } + credentials: { + clientId: aiAccount::aiProject.identity.principalId + resourceId: containerRegistry.outputs.resourceId + } + } +} + +output containerRegistryName string = containerRegistry.outputs.name +output containerRegistryLoginServer string = containerRegistry.outputs.loginServer +output containerRegistryResourceId string = containerRegistry.outputs.resourceId +output containerRegistryConnectionName string = acrConnection.outputs.connectionName diff --git a/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/infra/core/monitor/applicationinsights-dashboard.bicep b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/infra/core/monitor/applicationinsights-dashboard.bicep new file mode 100644 index 000000000000..d082e668ed9f --- /dev/null +++ b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/infra/core/monitor/applicationinsights-dashboard.bicep @@ -0,0 +1,1236 @@ +metadata description = 'Creates a dashboard for an Application Insights instance.' +param name string +param applicationInsightsName string +param location string = resourceGroup().location +param tags object = {} + +// 2020-09-01-preview because that is the latest valid version +resource applicationInsightsDashboard 'Microsoft.Portal/dashboards@2020-09-01-preview' = { + name: name + location: location + tags: tags + properties: { + lenses: [ + { + order: 0 + parts: [ + { + position: { + x: 0 + y: 0 + colSpan: 2 + rowSpan: 1 + } + metadata: { + inputs: [ + { + name: 'id' + value: '/subscriptions/${subscription().subscriptionId}/resourceGroups/${resourceGroup().name}/providers/Microsoft.Insights/components/${applicationInsights.name}' + } + { + name: 'Version' + value: '1.0' + } + ] + #disable-next-line BCP036 + type: 'Extension/AppInsightsExtension/PartType/AspNetOverviewPinnedPart' + asset: { + idInputName: 'id' + type: 'ApplicationInsights' + } + defaultMenuItemId: 'overview' + } + } + { + position: { + x: 2 + y: 0 + colSpan: 1 + rowSpan: 1 + } + metadata: { + inputs: [ + { + name: 'ComponentId' + value: { + Name: applicationInsights.name + SubscriptionId: subscription().subscriptionId + ResourceGroup: resourceGroup().name + } + } + { + name: 'Version' + value: '1.0' + } + ] + #disable-next-line BCP036 + type: 'Extension/AppInsightsExtension/PartType/ProactiveDetectionAsyncPart' + asset: { + idInputName: 'ComponentId' + type: 'ApplicationInsights' + } + defaultMenuItemId: 'ProactiveDetection' + } + } + { + position: { + x: 3 + y: 0 + colSpan: 1 + rowSpan: 1 + } + metadata: { + inputs: [ + { + name: 'ComponentId' + value: { + Name: applicationInsights.name + SubscriptionId: subscription().subscriptionId + ResourceGroup: resourceGroup().name + } + } + { + name: 'ResourceId' + value: '/subscriptions/${subscription().subscriptionId}/resourceGroups/${resourceGroup().name}/providers/Microsoft.Insights/components/${applicationInsights.name}' + } + ] + #disable-next-line BCP036 + type: 'Extension/AppInsightsExtension/PartType/QuickPulseButtonSmallPart' + asset: { + idInputName: 'ComponentId' + type: 'ApplicationInsights' + } + } + } + { + position: { + x: 4 + y: 0 + colSpan: 1 + rowSpan: 1 + } + metadata: { + inputs: [ + { + name: 'ComponentId' + value: { + Name: applicationInsights.name + SubscriptionId: subscription().subscriptionId + ResourceGroup: resourceGroup().name + } + } + { + name: 'TimeContext' + value: { + durationMs: 86400000 + endTime: null + createdTime: '2018-05-04T01:20:33.345Z' + isInitialTime: true + grain: 1 + useDashboardTimeRange: false + } + } + { + name: 'Version' + value: '1.0' + } + ] + #disable-next-line BCP036 + type: 'Extension/AppInsightsExtension/PartType/AvailabilityNavButtonPart' + asset: { + idInputName: 'ComponentId' + type: 'ApplicationInsights' + } + } + } + { + position: { + x: 5 + y: 0 + colSpan: 1 + rowSpan: 1 + } + metadata: { + inputs: [ + { + name: 'ComponentId' + value: { + Name: applicationInsights.name + SubscriptionId: subscription().subscriptionId + ResourceGroup: resourceGroup().name + } + } + { + name: 'TimeContext' + value: { + durationMs: 86400000 + endTime: null + createdTime: '2018-05-08T18:47:35.237Z' + isInitialTime: true + grain: 1 + useDashboardTimeRange: false + } + } + { + name: 'ConfigurationId' + value: '78ce933e-e864-4b05-a27b-71fd55a6afad' + } + ] + #disable-next-line BCP036 + type: 'Extension/AppInsightsExtension/PartType/AppMapButtonPart' + asset: { + idInputName: 'ComponentId' + type: 'ApplicationInsights' + } + } + } + { + position: { + x: 0 + y: 1 + colSpan: 3 + rowSpan: 1 + } + metadata: { + inputs: [] + type: 'Extension/HubsExtension/PartType/MarkdownPart' + settings: { + content: { + settings: { + content: '# Usage' + title: '' + subtitle: '' + } + } + } + } + } + { + position: { + x: 3 + y: 1 + colSpan: 1 + rowSpan: 1 + } + metadata: { + inputs: [ + { + name: 'ComponentId' + value: { + Name: applicationInsights.name + SubscriptionId: subscription().subscriptionId + ResourceGroup: resourceGroup().name + } + } + { + name: 'TimeContext' + value: { + durationMs: 86400000 + endTime: null + createdTime: '2018-05-04T01:22:35.782Z' + isInitialTime: true + grain: 1 + useDashboardTimeRange: false + } + } + ] + #disable-next-line BCP036 + type: 'Extension/AppInsightsExtension/PartType/UsageUsersOverviewPart' + asset: { + idInputName: 'ComponentId' + type: 'ApplicationInsights' + } + } + } + { + position: { + x: 4 + y: 1 + colSpan: 3 + rowSpan: 1 + } + metadata: { + inputs: [] + type: 'Extension/HubsExtension/PartType/MarkdownPart' + settings: { + content: { + settings: { + content: '# Reliability' + title: '' + subtitle: '' + } + } + } + } + } + { + position: { + x: 7 + y: 1 + colSpan: 1 + rowSpan: 1 + } + metadata: { + inputs: [ + { + name: 'ResourceId' + value: '/subscriptions/${subscription().subscriptionId}/resourceGroups/${resourceGroup().name}/providers/Microsoft.Insights/components/${applicationInsights.name}' + } + { + name: 'DataModel' + value: { + version: '1.0.0' + timeContext: { + durationMs: 86400000 + createdTime: '2018-05-04T23:42:40.072Z' + isInitialTime: false + grain: 1 + useDashboardTimeRange: false + } + } + isOptional: true + } + { + name: 'ConfigurationId' + value: '8a02f7bf-ac0f-40e1-afe9-f0e72cfee77f' + isOptional: true + } + ] + #disable-next-line BCP036 + type: 'Extension/AppInsightsExtension/PartType/CuratedBladeFailuresPinnedPart' + isAdapter: true + asset: { + idInputName: 'ResourceId' + type: 'ApplicationInsights' + } + defaultMenuItemId: 'failures' + } + } + { + position: { + x: 8 + y: 1 + colSpan: 3 + rowSpan: 1 + } + metadata: { + inputs: [] + type: 'Extension/HubsExtension/PartType/MarkdownPart' + settings: { + content: { + settings: { + content: '# Responsiveness\r\n' + title: '' + subtitle: '' + } + } + } + } + } + { + position: { + x: 11 + y: 1 + colSpan: 1 + rowSpan: 1 + } + metadata: { + inputs: [ + { + name: 'ResourceId' + value: '/subscriptions/${subscription().subscriptionId}/resourceGroups/${resourceGroup().name}/providers/Microsoft.Insights/components/${applicationInsights.name}' + } + { + name: 'DataModel' + value: { + version: '1.0.0' + timeContext: { + durationMs: 86400000 + createdTime: '2018-05-04T23:43:37.804Z' + isInitialTime: false + grain: 1 + useDashboardTimeRange: false + } + } + isOptional: true + } + { + name: 'ConfigurationId' + value: '2a8ede4f-2bee-4b9c-aed9-2db0e8a01865' + isOptional: true + } + ] + #disable-next-line BCP036 + type: 'Extension/AppInsightsExtension/PartType/CuratedBladePerformancePinnedPart' + isAdapter: true + asset: { + idInputName: 'ResourceId' + type: 'ApplicationInsights' + } + defaultMenuItemId: 'performance' + } + } + { + position: { + x: 12 + y: 1 + colSpan: 3 + rowSpan: 1 + } + metadata: { + inputs: [] + type: 'Extension/HubsExtension/PartType/MarkdownPart' + settings: { + content: { + settings: { + content: '# Browser' + title: '' + subtitle: '' + } + } + } + } + } + { + position: { + x: 15 + y: 1 + colSpan: 1 + rowSpan: 1 + } + metadata: { + inputs: [ + { + name: 'ComponentId' + value: { + Name: applicationInsights.name + SubscriptionId: subscription().subscriptionId + ResourceGroup: resourceGroup().name + } + } + { + name: 'MetricsExplorerJsonDefinitionId' + value: 'BrowserPerformanceTimelineMetrics' + } + { + name: 'TimeContext' + value: { + durationMs: 86400000 + createdTime: '2018-05-08T12:16:27.534Z' + isInitialTime: false + grain: 1 + useDashboardTimeRange: false + } + } + { + name: 'CurrentFilter' + value: { + eventTypes: [ + 4 + 1 + 3 + 5 + 2 + 6 + 13 + ] + typeFacets: {} + isPermissive: false + } + } + { + name: 'id' + value: { + Name: applicationInsights.name + SubscriptionId: subscription().subscriptionId + ResourceGroup: resourceGroup().name + } + } + { + name: 'Version' + value: '1.0' + } + ] + #disable-next-line BCP036 + type: 'Extension/AppInsightsExtension/PartType/MetricsExplorerBladePinnedPart' + asset: { + idInputName: 'ComponentId' + type: 'ApplicationInsights' + } + defaultMenuItemId: 'browser' + } + } + { + position: { + x: 0 + y: 2 + colSpan: 4 + rowSpan: 3 + } + metadata: { + inputs: [ + { + name: 'options' + value: { + chart: { + metrics: [ + { + resourceMetadata: { + id: '/subscriptions/${subscription().subscriptionId}/resourceGroups/${resourceGroup().name}/providers/Microsoft.Insights/components/${applicationInsights.name}' + } + name: 'sessions/count' + aggregationType: 5 + namespace: 'microsoft.insights/components/kusto' + metricVisualization: { + displayName: 'Sessions' + color: '#47BDF5' + } + } + { + resourceMetadata: { + id: '/subscriptions/${subscription().subscriptionId}/resourceGroups/${resourceGroup().name}/providers/Microsoft.Insights/components/${applicationInsights.name}' + } + name: 'users/count' + aggregationType: 5 + namespace: 'microsoft.insights/components/kusto' + metricVisualization: { + displayName: 'Users' + color: '#7E58FF' + } + } + ] + title: 'Unique sessions and users' + visualization: { + chartType: 2 + legendVisualization: { + isVisible: true + position: 2 + hideSubtitle: false + } + axisVisualization: { + x: { + isVisible: true + axisType: 2 + } + y: { + isVisible: true + axisType: 1 + } + } + } + openBladeOnClick: { + openBlade: true + destinationBlade: { + extensionName: 'HubsExtension' + bladeName: 'ResourceMenuBlade' + parameters: { + id: '/subscriptions/${subscription().subscriptionId}/resourceGroups/${resourceGroup().name}/providers/Microsoft.Insights/components/${applicationInsights.name}' + menuid: 'segmentationUsers' + } + } + } + } + } + } + { + name: 'sharedTimeRange' + isOptional: true + } + ] + #disable-next-line BCP036 + type: 'Extension/HubsExtension/PartType/MonitorChartPart' + settings: {} + } + } + { + position: { + x: 4 + y: 2 + colSpan: 4 + rowSpan: 3 + } + metadata: { + inputs: [ + { + name: 'options' + value: { + chart: { + metrics: [ + { + resourceMetadata: { + id: '/subscriptions/${subscription().subscriptionId}/resourceGroups/${resourceGroup().name}/providers/Microsoft.Insights/components/${applicationInsights.name}' + } + name: 'requests/failed' + aggregationType: 7 + namespace: 'microsoft.insights/components' + metricVisualization: { + displayName: 'Failed requests' + color: '#EC008C' + } + } + ] + title: 'Failed requests' + visualization: { + chartType: 3 + legendVisualization: { + isVisible: true + position: 2 + hideSubtitle: false + } + axisVisualization: { + x: { + isVisible: true + axisType: 2 + } + y: { + isVisible: true + axisType: 1 + } + } + } + openBladeOnClick: { + openBlade: true + destinationBlade: { + extensionName: 'HubsExtension' + bladeName: 'ResourceMenuBlade' + parameters: { + id: '/subscriptions/${subscription().subscriptionId}/resourceGroups/${resourceGroup().name}/providers/Microsoft.Insights/components/${applicationInsights.name}' + menuid: 'failures' + } + } + } + } + } + } + { + name: 'sharedTimeRange' + isOptional: true + } + ] + #disable-next-line BCP036 + type: 'Extension/HubsExtension/PartType/MonitorChartPart' + settings: {} + } + } + { + position: { + x: 8 + y: 2 + colSpan: 4 + rowSpan: 3 + } + metadata: { + inputs: [ + { + name: 'options' + value: { + chart: { + metrics: [ + { + resourceMetadata: { + id: '/subscriptions/${subscription().subscriptionId}/resourceGroups/${resourceGroup().name}/providers/Microsoft.Insights/components/${applicationInsights.name}' + } + name: 'requests/duration' + aggregationType: 4 + namespace: 'microsoft.insights/components' + metricVisualization: { + displayName: 'Server response time' + color: '#00BCF2' + } + } + ] + title: 'Server response time' + visualization: { + chartType: 2 + legendVisualization: { + isVisible: true + position: 2 + hideSubtitle: false + } + axisVisualization: { + x: { + isVisible: true + axisType: 2 + } + y: { + isVisible: true + axisType: 1 + } + } + } + openBladeOnClick: { + openBlade: true + destinationBlade: { + extensionName: 'HubsExtension' + bladeName: 'ResourceMenuBlade' + parameters: { + id: '/subscriptions/${subscription().subscriptionId}/resourceGroups/${resourceGroup().name}/providers/Microsoft.Insights/components/${applicationInsights.name}' + menuid: 'performance' + } + } + } + } + } + } + { + name: 'sharedTimeRange' + isOptional: true + } + ] + #disable-next-line BCP036 + type: 'Extension/HubsExtension/PartType/MonitorChartPart' + settings: {} + } + } + { + position: { + x: 12 + y: 2 + colSpan: 4 + rowSpan: 3 + } + metadata: { + inputs: [ + { + name: 'options' + value: { + chart: { + metrics: [ + { + resourceMetadata: { + id: '/subscriptions/${subscription().subscriptionId}/resourceGroups/${resourceGroup().name}/providers/Microsoft.Insights/components/${applicationInsights.name}' + } + name: 'browserTimings/networkDuration' + aggregationType: 4 + namespace: 'microsoft.insights/components' + metricVisualization: { + displayName: 'Page load network connect time' + color: '#7E58FF' + } + } + { + resourceMetadata: { + id: '/subscriptions/${subscription().subscriptionId}/resourceGroups/${resourceGroup().name}/providers/Microsoft.Insights/components/${applicationInsights.name}' + } + name: 'browserTimings/processingDuration' + aggregationType: 4 + namespace: 'microsoft.insights/components' + metricVisualization: { + displayName: 'Client processing time' + color: '#44F1C8' + } + } + { + resourceMetadata: { + id: '/subscriptions/${subscription().subscriptionId}/resourceGroups/${resourceGroup().name}/providers/Microsoft.Insights/components/${applicationInsights.name}' + } + name: 'browserTimings/sendDuration' + aggregationType: 4 + namespace: 'microsoft.insights/components' + metricVisualization: { + displayName: 'Send request time' + color: '#EB9371' + } + } + { + resourceMetadata: { + id: '/subscriptions/${subscription().subscriptionId}/resourceGroups/${resourceGroup().name}/providers/Microsoft.Insights/components/${applicationInsights.name}' + } + name: 'browserTimings/receiveDuration' + aggregationType: 4 + namespace: 'microsoft.insights/components' + metricVisualization: { + displayName: 'Receiving response time' + color: '#0672F1' + } + } + ] + title: 'Average page load time breakdown' + visualization: { + chartType: 3 + legendVisualization: { + isVisible: true + position: 2 + hideSubtitle: false + } + axisVisualization: { + x: { + isVisible: true + axisType: 2 + } + y: { + isVisible: true + axisType: 1 + } + } + } + } + } + } + { + name: 'sharedTimeRange' + isOptional: true + } + ] + #disable-next-line BCP036 + type: 'Extension/HubsExtension/PartType/MonitorChartPart' + settings: {} + } + } + { + position: { + x: 0 + y: 5 + colSpan: 4 + rowSpan: 3 + } + metadata: { + inputs: [ + { + name: 'options' + value: { + chart: { + metrics: [ + { + resourceMetadata: { + id: '/subscriptions/${subscription().subscriptionId}/resourceGroups/${resourceGroup().name}/providers/Microsoft.Insights/components/${applicationInsights.name}' + } + name: 'availabilityResults/availabilityPercentage' + aggregationType: 4 + namespace: 'microsoft.insights/components' + metricVisualization: { + displayName: 'Availability' + color: '#47BDF5' + } + } + ] + title: 'Average availability' + visualization: { + chartType: 3 + legendVisualization: { + isVisible: true + position: 2 + hideSubtitle: false + } + axisVisualization: { + x: { + isVisible: true + axisType: 2 + } + y: { + isVisible: true + axisType: 1 + } + } + } + openBladeOnClick: { + openBlade: true + destinationBlade: { + extensionName: 'HubsExtension' + bladeName: 'ResourceMenuBlade' + parameters: { + id: '/subscriptions/${subscription().subscriptionId}/resourceGroups/${resourceGroup().name}/providers/Microsoft.Insights/components/${applicationInsights.name}' + menuid: 'availability' + } + } + } + } + } + } + { + name: 'sharedTimeRange' + isOptional: true + } + ] + #disable-next-line BCP036 + type: 'Extension/HubsExtension/PartType/MonitorChartPart' + settings: {} + } + } + { + position: { + x: 4 + y: 5 + colSpan: 4 + rowSpan: 3 + } + metadata: { + inputs: [ + { + name: 'options' + value: { + chart: { + metrics: [ + { + resourceMetadata: { + id: '/subscriptions/${subscription().subscriptionId}/resourceGroups/${resourceGroup().name}/providers/Microsoft.Insights/components/${applicationInsights.name}' + } + name: 'exceptions/server' + aggregationType: 7 + namespace: 'microsoft.insights/components' + metricVisualization: { + displayName: 'Server exceptions' + color: '#47BDF5' + } + } + { + resourceMetadata: { + id: '/subscriptions/${subscription().subscriptionId}/resourceGroups/${resourceGroup().name}/providers/Microsoft.Insights/components/${applicationInsights.name}' + } + name: 'dependencies/failed' + aggregationType: 7 + namespace: 'microsoft.insights/components' + metricVisualization: { + displayName: 'Dependency failures' + color: '#7E58FF' + } + } + ] + title: 'Server exceptions and Dependency failures' + visualization: { + chartType: 2 + legendVisualization: { + isVisible: true + position: 2 + hideSubtitle: false + } + axisVisualization: { + x: { + isVisible: true + axisType: 2 + } + y: { + isVisible: true + axisType: 1 + } + } + } + } + } + } + { + name: 'sharedTimeRange' + isOptional: true + } + ] + #disable-next-line BCP036 + type: 'Extension/HubsExtension/PartType/MonitorChartPart' + settings: {} + } + } + { + position: { + x: 8 + y: 5 + colSpan: 4 + rowSpan: 3 + } + metadata: { + inputs: [ + { + name: 'options' + value: { + chart: { + metrics: [ + { + resourceMetadata: { + id: '/subscriptions/${subscription().subscriptionId}/resourceGroups/${resourceGroup().name}/providers/Microsoft.Insights/components/${applicationInsights.name}' + } + name: 'performanceCounters/processorCpuPercentage' + aggregationType: 4 + namespace: 'microsoft.insights/components' + metricVisualization: { + displayName: 'Processor time' + color: '#47BDF5' + } + } + { + resourceMetadata: { + id: '/subscriptions/${subscription().subscriptionId}/resourceGroups/${resourceGroup().name}/providers/Microsoft.Insights/components/${applicationInsights.name}' + } + name: 'performanceCounters/processCpuPercentage' + aggregationType: 4 + namespace: 'microsoft.insights/components' + metricVisualization: { + displayName: 'Process CPU' + color: '#7E58FF' + } + } + ] + title: 'Average processor and process CPU utilization' + visualization: { + chartType: 2 + legendVisualization: { + isVisible: true + position: 2 + hideSubtitle: false + } + axisVisualization: { + x: { + isVisible: true + axisType: 2 + } + y: { + isVisible: true + axisType: 1 + } + } + } + } + } + } + { + name: 'sharedTimeRange' + isOptional: true + } + ] + #disable-next-line BCP036 + type: 'Extension/HubsExtension/PartType/MonitorChartPart' + settings: {} + } + } + { + position: { + x: 12 + y: 5 + colSpan: 4 + rowSpan: 3 + } + metadata: { + inputs: [ + { + name: 'options' + value: { + chart: { + metrics: [ + { + resourceMetadata: { + id: '/subscriptions/${subscription().subscriptionId}/resourceGroups/${resourceGroup().name}/providers/Microsoft.Insights/components/${applicationInsights.name}' + } + name: 'exceptions/browser' + aggregationType: 7 + namespace: 'microsoft.insights/components' + metricVisualization: { + displayName: 'Browser exceptions' + color: '#47BDF5' + } + } + ] + title: 'Browser exceptions' + visualization: { + chartType: 2 + legendVisualization: { + isVisible: true + position: 2 + hideSubtitle: false + } + axisVisualization: { + x: { + isVisible: true + axisType: 2 + } + y: { + isVisible: true + axisType: 1 + } + } + } + } + } + } + { + name: 'sharedTimeRange' + isOptional: true + } + ] + #disable-next-line BCP036 + type: 'Extension/HubsExtension/PartType/MonitorChartPart' + settings: {} + } + } + { + position: { + x: 0 + y: 8 + colSpan: 4 + rowSpan: 3 + } + metadata: { + inputs: [ + { + name: 'options' + value: { + chart: { + metrics: [ + { + resourceMetadata: { + id: '/subscriptions/${subscription().subscriptionId}/resourceGroups/${resourceGroup().name}/providers/Microsoft.Insights/components/${applicationInsights.name}' + } + name: 'availabilityResults/count' + aggregationType: 7 + namespace: 'microsoft.insights/components' + metricVisualization: { + displayName: 'Availability test results count' + color: '#47BDF5' + } + } + ] + title: 'Availability test results count' + visualization: { + chartType: 2 + legendVisualization: { + isVisible: true + position: 2 + hideSubtitle: false + } + axisVisualization: { + x: { + isVisible: true + axisType: 2 + } + y: { + isVisible: true + axisType: 1 + } + } + } + } + } + } + { + name: 'sharedTimeRange' + isOptional: true + } + ] + #disable-next-line BCP036 + type: 'Extension/HubsExtension/PartType/MonitorChartPart' + settings: {} + } + } + { + position: { + x: 4 + y: 8 + colSpan: 4 + rowSpan: 3 + } + metadata: { + inputs: [ + { + name: 'options' + value: { + chart: { + metrics: [ + { + resourceMetadata: { + id: '/subscriptions/${subscription().subscriptionId}/resourceGroups/${resourceGroup().name}/providers/Microsoft.Insights/components/${applicationInsights.name}' + } + name: 'performanceCounters/processIOBytesPerSecond' + aggregationType: 4 + namespace: 'microsoft.insights/components' + metricVisualization: { + displayName: 'Process IO rate' + color: '#47BDF5' + } + } + ] + title: 'Average process I/O rate' + visualization: { + chartType: 2 + legendVisualization: { + isVisible: true + position: 2 + hideSubtitle: false + } + axisVisualization: { + x: { + isVisible: true + axisType: 2 + } + y: { + isVisible: true + axisType: 1 + } + } + } + } + } + } + { + name: 'sharedTimeRange' + isOptional: true + } + ] + #disable-next-line BCP036 + type: 'Extension/HubsExtension/PartType/MonitorChartPart' + settings: {} + } + } + { + position: { + x: 8 + y: 8 + colSpan: 4 + rowSpan: 3 + } + metadata: { + inputs: [ + { + name: 'options' + value: { + chart: { + metrics: [ + { + resourceMetadata: { + id: '/subscriptions/${subscription().subscriptionId}/resourceGroups/${resourceGroup().name}/providers/Microsoft.Insights/components/${applicationInsights.name}' + } + name: 'performanceCounters/memoryAvailableBytes' + aggregationType: 4 + namespace: 'microsoft.insights/components' + metricVisualization: { + displayName: 'Available memory' + color: '#47BDF5' + } + } + ] + title: 'Average available memory' + visualization: { + chartType: 2 + legendVisualization: { + isVisible: true + position: 2 + hideSubtitle: false + } + axisVisualization: { + x: { + isVisible: true + axisType: 2 + } + y: { + isVisible: true + axisType: 1 + } + } + } + } + } + } + { + name: 'sharedTimeRange' + isOptional: true + } + ] + #disable-next-line BCP036 + type: 'Extension/HubsExtension/PartType/MonitorChartPart' + settings: {} + } + } + ] + } + ] + } +} + +resource applicationInsights 'Microsoft.Insights/components@2020-02-02' existing = { + name: applicationInsightsName +} diff --git a/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/infra/core/monitor/applicationinsights.bicep b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/infra/core/monitor/applicationinsights.bicep new file mode 100644 index 000000000000..73240d1b1c9a --- /dev/null +++ b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/infra/core/monitor/applicationinsights.bicep @@ -0,0 +1,47 @@ +metadata description = 'Creates an Application Insights instance based on an existing Log Analytics workspace.' +param name string +param dashboardName string = '' +param location string = resourceGroup().location +param tags object = {} +param logAnalyticsWorkspaceId string + +@description('Optional. Principal ID of the Foundry Project managed identity to grant Log Analytics Reader.') +param projectMIPrincipalId string = '' + +resource applicationInsights 'Microsoft.Insights/components@2020-02-02' = { + name: name + location: location + tags: tags + kind: 'web' + properties: { + Application_Type: 'web' + WorkspaceResourceId: logAnalyticsWorkspaceId + } +} + +module applicationInsightsDashboard 'applicationinsights-dashboard.bicep' = if (!empty(dashboardName)) { + name: 'application-insights-dashboard' + params: { + name: dashboardName + location: location + applicationInsightsName: applicationInsights.name + } +} + +// Log Analytics Reader for the Foundry Project managed identity. +// Required for running evaluations on traces generated by agents. +resource logAnalyticsReaderRoleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = if (!empty(projectMIPrincipalId)) { + scope: applicationInsights + name: guid(applicationInsights.id, projectMIPrincipalId, '73c42c96-874c-492b-b04d-ab87d138a893') + properties: { + principalId: projectMIPrincipalId + principalType: 'ServicePrincipal' + // Log Analytics Reader + roleDefinitionId: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', '73c42c96-874c-492b-b04d-ab87d138a893') + } +} + +output connectionString string = applicationInsights.properties.ConnectionString +output id string = applicationInsights.id +output instrumentationKey string = applicationInsights.properties.InstrumentationKey +output name string = applicationInsights.name diff --git a/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/infra/core/monitor/loganalytics.bicep b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/infra/core/monitor/loganalytics.bicep new file mode 100644 index 000000000000..33f9dc29443a --- /dev/null +++ b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/infra/core/monitor/loganalytics.bicep @@ -0,0 +1,22 @@ +metadata description = 'Creates a Log Analytics workspace.' +param name string +param location string = resourceGroup().location +param tags object = {} + +resource logAnalytics 'Microsoft.OperationalInsights/workspaces@2021-12-01-preview' = { + name: name + location: location + tags: tags + properties: any({ + retentionInDays: 30 + features: { + searchVersion: 1 + } + sku: { + name: 'PerGB2018' + } + }) +} + +output id string = logAnalytics.id +output name string = logAnalytics.name diff --git a/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/infra/core/search/azure_ai_search.bicep b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/infra/core/search/azure_ai_search.bicep new file mode 100644 index 000000000000..7bb8e6350025 --- /dev/null +++ b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/infra/core/search/azure_ai_search.bicep @@ -0,0 +1,211 @@ +targetScope = 'resourceGroup' + +@description('Tags that will be applied to all resources') +param tags object = {} + +@description('Azure Search resource name') +param resourceName string + +@description('Azure Search SKU name') +param azureSearchSkuName string = 'basic' + +@description('Azure storage account resource ID') +param storageAccountResourceId string + +@description('container name') +param containerName string = 'knowledgebase' + +@description('AI Services account name for the project parent') +param aiServicesAccountName string = '' + +@description('AI project name for creating the connection') +param aiProjectName string = '' + +@description('Id of the user or app to assign application roles') +param principalId string + +@description('Principal type of user or app') +param principalType string + +@description('Name for the AI Foundry search connection') +param connectionName string + +@description('Location for all resources') +param location string = resourceGroup().location + +// Get reference to the AI Services account and project to access their managed identities +resource aiAccount 'Microsoft.CognitiveServices/accounts@2025-04-01-preview' existing = if (!empty(aiServicesAccountName) && !empty(aiProjectName)) { + name: aiServicesAccountName + + resource aiProject 'projects' existing = { + name: aiProjectName + } +} + +// Azure Search Service +resource searchService 'Microsoft.Search/searchServices@2024-06-01-preview' = { + name: resourceName + location: location + tags: tags + sku: { + name: azureSearchSkuName + } + identity: { + type: 'SystemAssigned' + } + properties: { + replicaCount: 1 + partitionCount: 1 + hostingMode: 'default' + authOptions: { + aadOrApiKey: { + aadAuthFailureMode: 'http401WithBearerChallenge' + } + } + disableLocalAuth: false + encryptionWithCmk: { + enforcement: 'Unspecified' + } + publicNetworkAccess: 'enabled' + } +} + +// Reference to existing Storage Account +resource storageAccount 'Microsoft.Storage/storageAccounts@2023-05-01' existing = { + name: last(split(storageAccountResourceId, '/')) +} + +// Reference to existing Blob Service +resource blobService 'Microsoft.Storage/storageAccounts/blobServices@2023-05-01' existing = { + parent: storageAccount + name: 'default' +} + +// Storage Container (create if it doesn't exist) +resource storageContainer 'Microsoft.Storage/storageAccounts/blobServices/containers@2023-05-01' = { + parent: blobService + name: containerName + properties: { + publicAccess: 'None' + } +} + +// RBAC Assignments + +// Search needs to read from Storage +resource searchToStorageRoleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = { + name: guid(storageAccount.id, searchService.id, 'Storage Blob Data Reader', uniqueString(deployment().name)) + scope: storageAccount + properties: { + // GOOD + roleDefinitionId: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', '2a2b9908-6ea1-4ae2-8e65-a410df84e7d1') // Storage Blob Data Reader + principalId: searchService.identity.principalId + principalType: 'ServicePrincipal' + } +} + +// Search needs OpenAI access (AI Services account) +resource searchToAIServicesRoleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = if (!empty(aiServicesAccountName)) { + name: guid(aiServicesAccountName, searchService.id, 'Cognitive Services OpenAI User', uniqueString(deployment().name)) + properties: { + // GOOD + roleDefinitionId: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', '5e0bd9bd-7b93-4f28-af87-19fc36ad61bd') // Cognitive Services OpenAI User + principalId: searchService.identity.principalId + principalType: 'ServicePrincipal' + } +} + +// AI Project needs Search access - Service Contributor +resource aiServicesToSearchServiceRoleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = if (!empty(aiServicesAccountName) && !empty(aiProjectName)) { + name: guid(searchService.id, aiServicesAccountName, aiProjectName, 'Search Service Contributor', uniqueString(deployment().name)) + scope: searchService + properties: { + // GOOD + roleDefinitionId: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', '7ca78c08-252a-4471-8644-bb5ff32d4ba0') // Search Service Contributor + principalId: aiAccount::aiProject.identity.principalId + principalType: 'ServicePrincipal' + } +} + +// AI Project needs Search access - Index Data Contributor +resource aiServicesToSearchDataRoleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = if (!empty(aiServicesAccountName) && !empty(aiProjectName)) { + name: guid(searchService.id, aiServicesAccountName, aiProjectName, 'Search Index Data Contributor', uniqueString(deployment().name)) + scope: searchService + properties: { + // GOOD + roleDefinitionId: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', '8ebe5a00-799e-43f5-93ac-243d3dce84a7') // Search Index Data Contributor + principalId: aiAccount::aiProject.identity.principalId + principalType: 'ServicePrincipal' + } +} + +// User permissions - Search Index Data Contributor +resource userToSearchRoleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = { + name: guid(searchService.id, principalId, 'Search Index Data Contributor', uniqueString(deployment().name)) + scope: searchService + properties: { + // GOOD + roleDefinitionId: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', '8ebe5a00-799e-43f5-93ac-243d3dce84a7') // Search Index Data Contributor + principalId: principalId + principalType: principalType + } +} + +// // User permissions - Storage Blob Data Contributor +// resource userToStorageRoleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = { +// name: guid(storageAccount.id, principalId, 'Storage Blob Data Contributor', uniqueString(deployment().name)) +// scope: storageAccount +// properties: { +// roleDefinitionId: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', 'ba92f5b4-2d11-453d-a403-e96b0029c9fe') // Storage Blob Data Contributor +// principalId: principalId +// principalType: principalType +// } +// } + +// // Project needs Search access - Index Data Contributor +// resource projectToSearchRoleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = { +// name: guid(searchService.id, aiProjectName, 'Search Index Data Contributor', uniqueString(deployment().name)) +// scope: searchService +// properties: { +// roleDefinitionId: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', '8ebe5a00-799e-43f5-93ac-243d3dce84a7') // Search Index Data Contributor +// principalId: aiAccountPrincipalId // Using AI account principal ID as project identity +// principalType: 'ServicePrincipal' +// } +// } + +// Create the AI Search connection using the centralized connection module +module aiSearchConnection '../ai/connection.bicep' = if (!empty(aiServicesAccountName) && !empty(aiProjectName)) { + name: 'ai-search-connection-creation' + params: { + aiServicesAccountName: aiServicesAccountName + aiProjectName: aiProjectName + connectionConfig: { + name: connectionName + category: 'CognitiveSearch' + target: 'https://${searchService.name}.search.windows.net' + authType: 'AAD' + isSharedToAll: true + metadata: { + ApiVersion: '2024-07-01' + ResourceId: searchService.id + ApiType: 'Azure' + type: 'azure_ai_search' + } + } + } + dependsOn: [ + aiServicesToSearchDataRoleAssignment + ] +} + +// Outputs +output searchServiceName string = searchService.name +output searchServiceId string = searchService.id +output searchServicePrincipalId string = searchService.identity.principalId +output storageAccountName string = storageAccount.name +output storageAccountId string = storageAccount.id +output containerName string = storageContainer.name +output storageAccountPrincipalId string = storageAccount.identity.principalId +output searchConnectionName string = (!empty(aiServicesAccountName) && !empty(aiProjectName)) ? aiSearchConnection!.outputs.connectionName : '' +output searchConnectionId string = (!empty(aiServicesAccountName) && !empty(aiProjectName)) ? aiSearchConnection!.outputs.connectionId : '' + diff --git a/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/infra/core/search/bing_custom_grounding.bicep b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/infra/core/search/bing_custom_grounding.bicep new file mode 100644 index 000000000000..1fddea079e2e --- /dev/null +++ b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/infra/core/search/bing_custom_grounding.bicep @@ -0,0 +1,84 @@ +targetScope = 'resourceGroup' + +@description('Tags that will be applied to all resources') +param tags object = {} + +@description('Bing custom grounding resource name') +param resourceName string + +@description('AI Services account name for the project parent') +param aiServicesAccountName string = '' + +@description('AI project name for creating the connection') +param aiProjectName string = '' + +@description('Name for the AI Foundry Bing Custom Search connection') +param connectionName string + +// Get reference to the AI Services account and project to access their managed identities +resource aiAccount 'Microsoft.CognitiveServices/accounts@2025-04-01-preview' existing = if (!empty(aiServicesAccountName) && !empty(aiProjectName)) { + name: aiServicesAccountName + + resource aiProject 'projects' existing = { + name: aiProjectName + } +} + +// Bing Search resource for grounding capability +resource bingCustomSearch 'Microsoft.Bing/accounts@2020-06-10' = { + name: resourceName + location: 'global' + tags: tags + sku: { + name: 'G1' + } + properties: { + statisticsEnabled: false + } + kind: 'Bing.CustomGrounding' +} + +// Role assignment to allow AI project to use Bing Search +resource bingCustomSearchRoleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = if (!empty(aiServicesAccountName) && !empty(aiProjectName)) { + scope: bingCustomSearch + name: guid(subscription().id, resourceGroup().id, 'bing-search-role', aiServicesAccountName, aiProjectName) + properties: { + principalId: aiAccount::aiProject.identity.principalId + principalType: 'ServicePrincipal' + roleDefinitionId: resourceId('Microsoft.Authorization/roleDefinitions', 'a97b65f3-24c7-4388-baec-2e87135dc908') // Cognitive Services User + } +} + +// Create the Bing Custom Search connection using the centralized connection module +module aiSearchConnection '../ai/connection.bicep' = if (!empty(aiServicesAccountName) && !empty(aiProjectName)) { + name: 'bing-custom-search-connection-creation' + params: { + aiServicesAccountName: aiServicesAccountName + aiProjectName: aiProjectName + connectionConfig: { + name: connectionName + category: 'GroundingWithCustomSearch' + target: bingCustomSearch.properties.endpoint + authType: 'ApiKey' + isSharedToAll: true + metadata: { + Location: 'global' + ResourceId: bingCustomSearch.id + ApiType: 'Azure' + type: 'bing_custom_search' + } + } + credentials: { + key: bingCustomSearch.listKeys().key1 + } + } + dependsOn: [ + bingCustomSearchRoleAssignment + ] +} + +// Outputs +output bingCustomGroundingName string = bingCustomSearch.name +output bingCustomGroundingConnectionName string = aiSearchConnection.outputs.connectionName +output bingCustomGroundingResourceId string = bingCustomSearch.id +output bingCustomGroundingConnectionId string = aiSearchConnection.outputs.connectionId diff --git a/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/infra/core/search/bing_grounding.bicep b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/infra/core/search/bing_grounding.bicep new file mode 100644 index 000000000000..20ea5e9f160a --- /dev/null +++ b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/infra/core/search/bing_grounding.bicep @@ -0,0 +1,83 @@ +targetScope = 'resourceGroup' + +@description('Tags that will be applied to all resources') +param tags object = {} + +@description('Bing grounding resource name') +param resourceName string + +@description('AI Services account name for the project parent') +param aiServicesAccountName string = '' + +@description('AI project name for creating the connection') +param aiProjectName string = '' + +@description('Name for the AI Foundry Bing Search connection') +param connectionName string + +// Get reference to the AI Services account and project to access their managed identities +resource aiAccount 'Microsoft.CognitiveServices/accounts@2025-04-01-preview' existing = if (!empty(aiServicesAccountName) && !empty(aiProjectName)) { + name: aiServicesAccountName + + resource aiProject 'projects' existing = { + name: aiProjectName + } +} + +// Bing Search resource for grounding capability +resource bingSearch 'Microsoft.Bing/accounts@2020-06-10' = { + name: resourceName + location: 'global' + tags: tags + sku: { + name: 'G1' + } + properties: { + statisticsEnabled: false + } + kind: 'Bing.Grounding' +} + +// Role assignment to allow AI project to use Bing Search +resource bingSearchRoleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = if (!empty(aiServicesAccountName) && !empty(aiProjectName)) { + scope: bingSearch + name: guid(subscription().id, resourceGroup().id, 'bing-search-role', aiServicesAccountName, aiProjectName) + properties: { + principalId: aiAccount::aiProject.identity.principalId + principalType: 'ServicePrincipal' + roleDefinitionId: resourceId('Microsoft.Authorization/roleDefinitions', 'a97b65f3-24c7-4388-baec-2e87135dc908') // Cognitive Services User + } +} + +// Create the Bing Search connection using the centralized connection module +module bingSearchConnection '../ai/connection.bicep' = if (!empty(aiServicesAccountName) && !empty(aiProjectName)) { + name: 'bing-search-connection-creation' + params: { + aiServicesAccountName: aiServicesAccountName + aiProjectName: aiProjectName + connectionConfig: { + name: connectionName + category: 'GroundingWithBingSearch' + target: bingSearch.properties.endpoint + authType: 'ApiKey' + isSharedToAll: true + metadata: { + Location: 'global' + ResourceId: bingSearch.id + ApiType: 'Azure' + type: 'bing_grounding' + } + } + credentials: { + key: bingSearch.listKeys().key1 + } + } + dependsOn: [ + bingSearchRoleAssignment + ] +} + +output bingGroundingName string = bingSearch.name +output bingGroundingConnectionName string = bingSearchConnection.outputs.connectionName +output bingGroundingResourceId string = bingSearch.id +output bingGroundingConnectionId string = bingSearchConnection.outputs.connectionId diff --git a/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/infra/core/storage/storage.bicep b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/infra/core/storage/storage.bicep new file mode 100644 index 000000000000..18d9535dcd0b --- /dev/null +++ b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/infra/core/storage/storage.bicep @@ -0,0 +1,113 @@ +targetScope = 'resourceGroup' + +@description('The location used for all deployed resources') +param location string = resourceGroup().location + +@description('Tags that will be applied to all resources') +param tags object = {} + +@description('Storage account resource name') +param resourceName string + +@description('Id of the user or app to assign application roles') +param principalId string + +@description('Principal type of user or app') +param principalType string + +@description('AI Services account name for the project parent') +param aiServicesAccountName string = '' + +@description('AI project name for creating the connection') +param aiProjectName string = '' + +@description('Name for the AI Foundry storage connection') +param connectionName string + +// Storage Account for the AI Services account +resource storageAccount 'Microsoft.Storage/storageAccounts@2023-05-01' = { + name: resourceName + location: location + tags: tags + sku: { + name: 'Standard_LRS' + } + kind: 'StorageV2' + identity: { + type: 'SystemAssigned' + } + properties: { + supportsHttpsTrafficOnly: true + allowBlobPublicAccess: false + minimumTlsVersion: 'TLS1_2' + accessTier: 'Hot' + encryption: { + services: { + blob: { + enabled: true + } + file: { + enabled: true + } + } + keySource: 'Microsoft.Storage' + } + } +} + +// Get reference to the AI Services account and project to access their managed identities +resource aiAccount 'Microsoft.CognitiveServices/accounts@2025-04-01-preview' existing = if (!empty(aiServicesAccountName) && !empty(aiProjectName)) { + name: aiServicesAccountName + + resource aiProject 'projects' existing = { + name: aiProjectName + } +} + +// Role assignment for AI Services to access the storage account +resource storageRoleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = if (!empty(aiServicesAccountName) && !empty(aiProjectName)) { + name: guid(storageAccount.id, aiAccount.id, 'ai-storage-contributor') + scope: storageAccount + properties: { + roleDefinitionId: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', 'ba92f5b4-2d11-453d-a403-e96b0029c9fe') // Storage Blob Data Contributor + principalId: aiAccount::aiProject.identity.principalId + principalType: 'ServicePrincipal' + } +} + +// User permissions - Storage Blob Data Contributor +resource userStorageRoleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = { + name: guid(storageAccount.id, principalId, 'Storage Blob Data Contributor') + scope: storageAccount + properties: { + roleDefinitionId: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', 'ba92f5b4-2d11-453d-a403-e96b0029c9fe') // Storage Blob Data Contributor + principalId: principalId + principalType: principalType + } +} + +// Create the storage connection using the centralized connection module +module storageConnection '../ai/connection.bicep' = if (!empty(aiServicesAccountName) && !empty(aiProjectName)) { + name: 'storage-connection-creation' + params: { + aiServicesAccountName: aiServicesAccountName + aiProjectName: aiProjectName + connectionConfig: { + name: connectionName + category: 'AzureStorageAccount' + target: storageAccount.properties.primaryEndpoints.blob + authType: 'AAD' + isSharedToAll: true + metadata: { + ApiType: 'Azure' + ResourceId: storageAccount.id + location: storageAccount.location + } + } + } +} + +output storageAccountName string = storageAccount.name +output storageAccountId string = storageAccount.id +output storageAccountPrincipalId string = storageAccount.identity.principalId +output storageConnectionName string = storageConnection.outputs.connectionName diff --git a/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/infra/main.bicep b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/infra/main.bicep new file mode 100644 index 000000000000..df29abd59bf6 --- /dev/null +++ b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/infra/main.bicep @@ -0,0 +1,239 @@ +targetScope = 'subscription' +// targetScope = 'resourceGroup' + +@minLength(1) +@maxLength(64) +@description('Name of the environment that can be used as part of naming resource convention') +param environmentName string + +@minLength(1) +@maxLength(90) +@description('Name of the resource group to use or create') +param resourceGroupName string = 'rg-${environmentName}' + +// Restricted locations to match list from +// https://learn.microsoft.com/en-us/azure/ai-foundry/openai/how-to/responses?tabs=python-key#region-availability +@minLength(1) +@description('Primary location for all resources') +@allowed([ + 'australiaeast' + 'brazilsouth' + 'canadacentral' + 'canadaeast' + 'eastus' + 'eastus2' + 'francecentral' + 'germanywestcentral' + 'italynorth' + 'japaneast' + 'koreacentral' + 'northcentralus' + 'norwayeast' + 'polandcentral' + 'southafricanorth' + 'southcentralus' + 'southeastasia' + 'southindia' + 'spaincentral' + 'swedencentral' + 'switzerlandnorth' + 'uaenorth' + 'uksouth' + 'westus' + 'westus2' + 'westus3' +]) +param location string + +param aiDeploymentsLocation string + +@description('Id of the user or app to assign application roles') +param principalId string + +@description('Principal type of user or app') +param principalType string + +@description('Optional. Name of an existing AI Services account within the resource group. If not provided, a new one will be created.') +param aiFoundryResourceName string = '' + +@description('Optional. Name of the AI Foundry project. If not provided, a default name will be used.') +param aiFoundryProjectName string = 'ai-project-${environmentName}' + +@description('List of model deployments') +param aiProjectDeploymentsJson string = '[]' + +@description('List of connections') +param aiProjectConnectionsJson string = '[]' + +@secure() +@description('JSON map of connection name to credentials object. Example: {"my-conn":{"key":"secret"}}') +param aiProjectConnectionCredentialsJson string = '{}' + +@description('List of resources to create and connect to the AI project') +param aiProjectDependentResourcesJson string = '[]' + +var aiProjectDeployments = json(aiProjectDeploymentsJson) +var aiProjectConnections = json(aiProjectConnectionsJson) +var aiProjectConnectionCreds = json(aiProjectConnectionCredentialsJson) +var aiProjectDependentResources = json(aiProjectDependentResourcesJson) + +@description('Enable hosted agent deployment') +param enableHostedAgents bool + +@description('Enable the capability host for supporting BYO storage of agent conversations. When false and hosted agents are enabled, the capability host is not created.') +param enableCapabilityHost bool + +@description('Enable monitoring for the AI project') +param enableMonitoring bool + +@description('When true, skip Foundry project/role/connection provisioning and reference the existing project read-only. Use when pointing at an existing Foundry project via --project-id.') +param useExistingAiProject bool = false + +@description('Optional. Existing container registry resource ID. If provided, no new ACR will be created and a connection to this ACR will be established.') +param existingContainerRegistryResourceId string = '' + +@description('Optional. Existing container registry endpoint (login server). Required if existingContainerRegistryResourceId is provided.') +param existingContainerRegistryEndpoint string = '' + +@description('Optional. Name of an existing ACR connection on the Foundry project. If provided, no new ACR or connection will be created.') +param existingAcrConnectionName string = '' + +@description('Optional. Existing Application Insights connection string. If provided, a connection will be created but no new App Insights resource.') +param existingApplicationInsightsConnectionString string = '' + +@description('Optional. Existing Application Insights resource ID. Used for connection metadata when providing an existing App Insights.') +param existingApplicationInsightsResourceId string = '' + +@description('Optional. Name of an existing Application Insights connection on the Foundry project. If provided, no new App Insights or connection will be created.') +param existingAppInsightsConnectionName string = '' + +// Tags that should be applied to all resources. +// +// Note that 'azd-service-name' tags should be applied separately to service host resources. +// Example usage: +// tags: union(tags, { 'azd-service-name': }) +var tags = { + 'azd-env-name': environmentName +} + +// Check if resource group exists and create it if it doesn't +resource rg 'Microsoft.Resources/resourceGroups@2021-04-01' = { + name: resourceGroupName + location: location + tags: tags +} + +// Build dependent resources array conditionally +// Check if ACR already exists in the user-provided array to avoid duplicates +// Also skip if user provided an existing container registry endpoint or connection name +var hasAcr = contains(map(aiProjectDependentResources, r => r.resource), 'registry') +var shouldCreateAcr = enableHostedAgents && !hasAcr && empty(existingContainerRegistryResourceId) && empty(existingAcrConnectionName) +var dependentResources = shouldCreateAcr ? union(aiProjectDependentResources, [ + { + resource: 'registry' + connectionName: 'acr-${uniqueString(subscription().id, resourceGroupName, location)}' + } +]) : aiProjectDependentResources + +// AI Project module — only when creating new resources +module aiProject 'core/ai/ai-project.bicep' = if (!useExistingAiProject) { + scope: rg + name: 'ai-project' + params: { + tags: tags + location: aiDeploymentsLocation + aiFoundryProjectName: aiFoundryProjectName + principalId: principalId + principalType: principalType + existingAiAccountName: aiFoundryResourceName + deployments: aiProjectDeployments + connections: aiProjectConnections + connectionCredentials: aiProjectConnectionCreds + additionalDependentResources: dependentResources + enableMonitoring: enableMonitoring + enableHostedAgents: enableHostedAgents + enableCapabilityHost: enableCapabilityHost + existingContainerRegistryResourceId: existingContainerRegistryResourceId + existingContainerRegistryEndpoint: existingContainerRegistryEndpoint + existingAcrConnectionName: existingAcrConnectionName + existingApplicationInsightsConnectionString: existingApplicationInsightsConnectionString + existingApplicationInsightsResourceId: existingApplicationInsightsResourceId + existingAppInsightsConnectionName: existingAppInsightsConnectionName + } +} + +// Existing project module — read-only reference when reusing an existing Foundry project +module existingAiProject 'core/ai/existing-ai-project.bicep' = if (useExistingAiProject) { + scope: rg + name: 'existing-ai-project' + params: { + aiServicesAccountName: aiFoundryResourceName + aiFoundryProjectName: aiFoundryProjectName + existingAcrConnectionName: existingAcrConnectionName + existingContainerRegistryEndpoint: existingContainerRegistryEndpoint + existingApplicationInsightsConnectionString: existingApplicationInsightsConnectionString + existingApplicationInsightsResourceId: existingApplicationInsightsResourceId + connections: aiProjectConnections + connectionCredentials: aiProjectConnectionCreds + } +} + +// ACR for existing project — create when hosted agents need a registry but the existing project has none +var shouldCreateAcrForExistingProject = useExistingAiProject && shouldCreateAcr +var acrConnectionName = 'acr-${uniqueString(subscription().id, resourceGroupName, location)}' + +module acrForExistingProject 'core/host/acr.bicep' = if (shouldCreateAcrForExistingProject) { + scope: rg + name: 'acr-for-existing-project' + params: { + location: location + tags: tags + resourceName: 'cr${uniqueString(subscription().id, resourceGroupName, location)}' + connectionName: acrConnectionName + principalId: principalId + principalType: principalType + aiServicesAccountName: aiFoundryResourceName + aiProjectName: aiFoundryProjectName + } +} + +// Resources +output AZURE_RESOURCE_GROUP string = resourceGroupName +output AZURE_AI_ACCOUNT_ID string = useExistingAiProject ? existingAiProject.outputs.accountId : aiProject.outputs.accountId +output AZURE_AI_PROJECT_ID string = useExistingAiProject ? existingAiProject.outputs.projectId : aiProject.outputs.projectId +output AZURE_AI_FOUNDRY_PROJECT_ID string = useExistingAiProject ? existingAiProject.outputs.projectId : aiProject.outputs.projectId +output AZURE_AI_ACCOUNT_NAME string = useExistingAiProject ? existingAiProject.outputs.aiServicesAccountName : aiProject.outputs.aiServicesAccountName +output AZURE_AI_PROJECT_NAME string = useExistingAiProject ? existingAiProject.outputs.projectName : aiProject.outputs.projectName + +// Endpoints +output AZURE_AI_PROJECT_ENDPOINT string = useExistingAiProject ? existingAiProject.outputs.AZURE_AI_PROJECT_ENDPOINT : aiProject.outputs.AZURE_AI_PROJECT_ENDPOINT +output AZURE_OPENAI_ENDPOINT string = useExistingAiProject ? existingAiProject.outputs.AZURE_OPENAI_ENDPOINT : aiProject.outputs.AZURE_OPENAI_ENDPOINT +output APPLICATIONINSIGHTS_CONNECTION_STRING string = useExistingAiProject ? existingAiProject.outputs.APPLICATIONINSIGHTS_CONNECTION_STRING : aiProject.outputs.APPLICATIONINSIGHTS_CONNECTION_STRING +output APPLICATIONINSIGHTS_RESOURCE_ID string = useExistingAiProject ? existingAiProject.outputs.APPLICATIONINSIGHTS_RESOURCE_ID : aiProject.outputs.APPLICATIONINSIGHTS_RESOURCE_ID + +// Dependent Resources and Connections + +// ACR +output AZURE_AI_PROJECT_ACR_CONNECTION_NAME string = shouldCreateAcrForExistingProject ? acrForExistingProject.outputs.containerRegistryConnectionName : (useExistingAiProject ? existingAiProject.outputs.dependentResources.registry.connectionName : aiProject.outputs.dependentResources.registry.connectionName) +output AZURE_CONTAINER_REGISTRY_ENDPOINT string = shouldCreateAcrForExistingProject ? acrForExistingProject.outputs.containerRegistryLoginServer : (useExistingAiProject ? existingAiProject.outputs.dependentResources.registry.loginServer : aiProject.outputs.dependentResources.registry.loginServer) + +// Bing Search +output BING_GROUNDING_CONNECTION_NAME string = useExistingAiProject ? existingAiProject.outputs.dependentResources.bing_grounding.connectionName : aiProject.outputs.dependentResources.bing_grounding.connectionName +output BING_GROUNDING_RESOURCE_NAME string = useExistingAiProject ? existingAiProject.outputs.dependentResources.bing_grounding.name : aiProject.outputs.dependentResources.bing_grounding.name +output BING_GROUNDING_CONNECTION_ID string = useExistingAiProject ? existingAiProject.outputs.dependentResources.bing_grounding.connectionId : aiProject.outputs.dependentResources.bing_grounding.connectionId + +// Bing Custom Search +output BING_CUSTOM_GROUNDING_CONNECTION_NAME string = useExistingAiProject ? existingAiProject.outputs.dependentResources.bing_custom_grounding.connectionName : aiProject.outputs.dependentResources.bing_custom_grounding.connectionName +output BING_CUSTOM_GROUNDING_NAME string = useExistingAiProject ? existingAiProject.outputs.dependentResources.bing_custom_grounding.name : aiProject.outputs.dependentResources.bing_custom_grounding.name +output BING_CUSTOM_GROUNDING_CONNECTION_ID string = useExistingAiProject ? existingAiProject.outputs.dependentResources.bing_custom_grounding.connectionId : aiProject.outputs.dependentResources.bing_custom_grounding.connectionId + +// Azure AI Search +output AZURE_AI_SEARCH_CONNECTION_NAME string = useExistingAiProject ? existingAiProject.outputs.dependentResources.search.connectionName : aiProject.outputs.dependentResources.search.connectionName +output AZURE_AI_SEARCH_SERVICE_NAME string = useExistingAiProject ? existingAiProject.outputs.dependentResources.search.serviceName : aiProject.outputs.dependentResources.search.serviceName + +// Azure Storage +output AZURE_STORAGE_CONNECTION_NAME string = useExistingAiProject ? existingAiProject.outputs.dependentResources.storage.connectionName : aiProject.outputs.dependentResources.storage.connectionName +output AZURE_STORAGE_ACCOUNT_NAME string = useExistingAiProject ? existingAiProject.outputs.dependentResources.storage.accountName : aiProject.outputs.dependentResources.storage.accountName + +// Connections +output AI_PROJECT_CONNECTION_IDS_JSON string = useExistingAiProject ? string(existingAiProject.outputs.connectionIds) : string(aiProject.outputs.connectionIds) diff --git a/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/infra/main.parameters.json b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/infra/main.parameters.json new file mode 100644 index 000000000000..dbf643f3f48f --- /dev/null +++ b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/infra/main.parameters.json @@ -0,0 +1,72 @@ +{ + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentParameters.json#", + "contentVersion": "1.0.0.0", + "parameters": { + "resourceGroupName": { + "value": "${AZURE_RESOURCE_GROUP}" + }, + "environmentName": { + "value": "${AZURE_ENV_NAME}" + }, + "location": { + "value": "${AZURE_LOCATION}" + }, + "aiFoundryResourceName": { + "value": "${AZURE_AI_ACCOUNT_NAME}" + }, + "aiFoundryProjectName": { + "value": "${AZURE_AI_PROJECT_NAME}" + }, + "aiDeploymentsLocation": { + "value": "${AZURE_LOCATION}" + }, + "principalId": { + "value": "${AZURE_PRINCIPAL_ID}" + }, + "principalType": { + "value": "${AZURE_PRINCIPAL_TYPE}" + }, + "aiProjectDeploymentsJson": { + "value": "${AI_PROJECT_DEPLOYMENTS=[]}" + }, + "aiProjectConnectionsJson": { + "value": "${AI_PROJECT_CONNECTIONS=[]}" + }, + "aiProjectConnectionCredentialsJson": { + "value": "${AI_PROJECT_CONNECTION_CREDENTIALS}" + }, + "aiProjectDependentResourcesJson": { + "value": "${AI_PROJECT_DEPENDENT_RESOURCES=[]}" + }, + "enableMonitoring": { + "value": "${ENABLE_MONITORING=true}" + }, + "enableHostedAgents": { + "value": "${ENABLE_HOSTED_AGENTS=false}" + }, + "enableCapabilityHost": { + "value": "${ENABLE_CAPABILITY_HOST=true}" + }, + "useExistingAiProject": { + "value": "${USE_EXISTING_AI_PROJECT=false}" + }, + "existingContainerRegistryResourceId": { + "value": "${AZURE_CONTAINER_REGISTRY_RESOURCE_ID=}" + }, + "existingContainerRegistryEndpoint": { + "value": "${AZURE_CONTAINER_REGISTRY_ENDPOINT=}" + }, + "existingAcrConnectionName": { + "value": "${AZURE_AI_PROJECT_ACR_CONNECTION_NAME=}" + }, + "existingApplicationInsightsConnectionString": { + "value": "${APPLICATIONINSIGHTS_CONNECTION_STRING=}" + }, + "existingApplicationInsightsResourceId": { + "value": "${APPLICATIONINSIGHTS_RESOURCE_ID=}" + }, + "existingAppInsightsConnectionName": { + "value": "${APPLICATIONINSIGHTS_CONNECTION_NAME=}" + } + } +} diff --git a/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/src/durable-research-agent/Dockerfile b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/src/durable-research-agent/Dockerfile new file mode 100644 index 000000000000..46f8499f6222 --- /dev/null +++ b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/src/durable-research-agent/Dockerfile @@ -0,0 +1,22 @@ +FROM python:3.12-slim + +WORKDIR /app + +# Install local wheel packages first (built by build.sh before docker build) +COPY wheels/ /tmp/wheels/ +RUN pip install --no-cache-dir /tmp/wheels/*.whl && rm -rf /tmp/wheels + +# Install remaining dependencies +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +COPY app.py agent.py ./ + +EXPOSE 8088 + +# This is a demo image — enables the "crash" sentinel handling. +# A production image would leave this off (default). +ENV DEMO_MODE=1 + +# Platform nanny worker handles restart on crash; we just run the agent. +CMD ["python", "app.py"] diff --git a/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/src/durable-research-agent/agent.py b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/src/durable-research-agent/agent.py new file mode 100644 index 000000000000..5f68c8f3d3c3 --- /dev/null +++ b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/src/durable-research-agent/agent.py @@ -0,0 +1,483 @@ +# Copyright (c) Microsoft. All rights reserved. + +"""The durable research task — crash-resilient, steerable, long-running.""" + +from __future__ import annotations + +import asyncio +import json +import logging +import os +import time +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + +from azure.ai.projects.aio import AIProjectClient +from azure.identity.aio import DefaultAzureCredential + +from azure.ai.agentserver.core.durable import TaskContext, task + +logger = logging.getLogger(__name__) + + +# --- Server wall-clock helpers ---------------------------------------------- + +_APP_STARTED_MONOTONIC = time.monotonic() + + +def _now_iso() -> str: + """UTC ISO-8601 timestamp with millisecond precision and Z suffix.""" + now = datetime.now(timezone.utc) + return now.strftime("%Y-%m-%dT%H:%M:%S.") + f"{now.microsecond // 1000:03d}Z" + + +def _server_uptime_sec() -> float: + """Seconds since this Python process started (resets to ~0 after crash).""" + return round(time.monotonic() - _APP_STARTED_MONOTONIC, 1) + + +# --- Azure AI client setup -------------------------------------------------- + +_endpoint = os.environ.get("FOUNDRY_PROJECT_ENDPOINT") +if not _endpoint: + raise EnvironmentError("FOUNDRY_PROJECT_ENDPOINT is required.") + +_model = os.environ.get("AZURE_AI_MODEL_DEPLOYMENT_NAME", "gpt-4.1-mini") +_credential = DefaultAzureCredential() +_project_client = AIProjectClient(endpoint=_endpoint, credential=_credential) +_openai_client = _project_client.get_openai_client() + + +# --- File-backed stream handler --------------------------------------------- + +_STREAM_DIR = Path.home() / ".durable-tasks" / "_streams" + + +class FileStreamHandler: + """Stream handler that persists every item to disk for crash-resilient replay. + + Each stream item is paired with a **durable** ``event_id`` derived from + its 1-based line number in ``stream.jsonl``. Items go onto the queue + as ``(event_id, item)`` tuples so the SSE consumer (live_stream in + ``app.py``) can advertise the durable id to clients. This lets a + client reconnect with ``?last_event_id=N`` and resume at the correct + point — across container restarts, recovery, and (single) drained + queue states alike. If some items the client expected are no longer + in the queue (already dequeued by a prior consumer), live_stream + simply emits what *is* available; a small gap is acceptable. + """ + + def __init__(self, task_id: str) -> None: + self._task_id = task_id + self._dir = _STREAM_DIR / task_id + self._dir.mkdir(parents=True, exist_ok=True) + self._file = self._dir / "stream.jsonl" + self._queue: asyncio.Queue[Any] = asyncio.Queue() + self._closed = False + self._SENTINEL = object() + # _next_event_id is the disk-line counter; it is bumped on every + # written line (preload + put + __done__ sentinel). The item put + # onto the queue is (event_id, item) — the SSE consumer uses the + # event_id directly so resume semantics are durable across queue + # state, not tied to a per-stream-instance counter. + self._next_event_id = 0 + + if self._file.exists(): + for line in self._file.read_text(encoding="utf-8").splitlines(): + if line.strip(): + self._next_event_id += 1 + data = json.loads(line) + if "__done__" not in data: + self._queue.put_nowait((self._next_event_id, data)) + + async def put(self, item: Any) -> None: + with open(self._file, "a", encoding="utf-8") as f: + f.write(json.dumps(item) + "\n") + self._next_event_id += 1 + await self._queue.put((self._next_event_id, item)) + + async def get(self) -> Any: + item = await self._queue.get() + if item is self._SENTINEL: + raise StopAsyncIteration + return item + + async def close(self) -> None: + self._closed = True + with open(self._file, "a", encoding="utf-8") as f: + f.write(json.dumps({"__done__": True}) + "\n") + # __done__ also occupies a disk line; bump the counter so a + # subsequent get-handler call that crosses this boundary uses an + # id matching the disk row count. + self._next_event_id += 1 + await self._queue.put(self._SENTINEL) + + +def file_stream_factory(task_id: str) -> FileStreamHandler: + return FileStreamHandler(task_id) + + +# --- Research phase plan ---------------------------------------------------- + +PHASE_TITLES = [ + "Decomposing topic into focused research questions", + "Surveying foundational literature and key concepts", + "Identifying leading researchers and institutions", + "Mapping the historical trajectory of the field", + "Analyzing recent breakthroughs and publications", + "Examining competing theories and methodological debates", + "Evaluating experimental evidence and data quality", + "Mapping connections to adjacent fields", + "Identifying open problems and knowledge gaps", + "Assessing real-world applications and current adoption", + "Analyzing funding landscape and research trends", + "Surveying ethical considerations and societal implications", + "Projecting near-term and long-term outlook", + "Synthesizing findings into a coherent narrative", + "Generating key insights and concrete recommendations", +] + +_SUB_CALL_ROLES = [ + ("research", + "Conduct an in-depth investigation of the assigned aspect. Include " + "specific findings, examples, and references where you can. Aim for " + "substantive, multi-paragraph content."), + ("critique", + "Critically evaluate the research above. Identify weak claims, gaps, " + "competing interpretations, and quality concerns. Be specific."), + ("refine", + "Revise the original research, incorporating the critique. Strengthen " + "weak claims, address gaps, and clarify uncertainty. Produce a " + "tightened, more rigorous version."), + ("synthesize", + "Distill the refined material into 2-3 paragraphs of key takeaways " + "suitable for someone briefing a decision-maker on this phase."), +] + +NUM_PHASES = max(1, int(os.environ.get("NUM_PHASES", str(len(PHASE_TITLES))))) +CALLS_PER_PHASE = max(1, min(len(_SUB_CALL_ROLES), + int(os.environ.get("CALLS_PER_PHASE", "4")))) +TARGET_OUTPUT_TOKENS = int(os.environ.get("TARGET_OUTPUT_TOKENS", "1500")) +INTRA_PHASE_COOLDOWN_SEC = float(os.environ.get("INTRA_PHASE_COOLDOWN_SEC", "10")) +INTER_PHASE_COOLDOWN_SEC = float(os.environ.get("INTER_PHASE_COOLDOWN_SEC", "20")) + + +def _phase_title(i: int) -> str: + return PHASE_TITLES[i] if i < len(PHASE_TITLES) else f"Continued research (phase {i + 1})" + + +# --- The durable task ------------------------------------------------------- + +@task( + name="deep_research", + steerable=True, + stream_handler_factory=file_stream_factory, +) +async def deep_research(ctx: TaskContext[dict]) -> dict[str, Any]: + """Long-running deep-research task: crash-resilient, steerable. + + Checkpointing is **per subcall**, not just per phase. After each + LLM subcall finishes we persist {completed_phases, results, + in_progress_phase, completed_subcalls, current_text} to + ctx.metadata. On recovery we resume the in-progress phase at the + next un-finished subcall, re-using the text we had streamed before + the crash — so the worst case is one wasted subcall (the one that + was actively streaming when the container died). + """ + topic: str = ctx.input["topic"] + stored_topic = ctx.metadata.get("topic") + + if stored_topic != topic: + ctx.metadata["topic"] = topic + ctx.metadata["completed_phases"] = 0 + ctx.metadata["results"] = [] + ctx.metadata["in_progress_phase"] = None + ctx.metadata["completed_subcalls"] = 0 + ctx.metadata["current_text"] = "" + await ctx.metadata.flush() + await _emit_run_start(ctx, topic=topic, prior_topic=stored_topic) + else: + await _emit_run_start(ctx, topic=topic, prior_topic=None) + + completed: int = ctx.metadata.get("completed_phases", 0) + results: list = ctx.metadata.get("results", []) + + if ctx.entry_mode == "recovered" and completed > 0: + await ctx.stream(json.dumps({ + "type": "recovered", + "completed_phases": completed, + "total_phases": NUM_PHASES, + "server_time_utc": _now_iso(), + "server_uptime_sec": _server_uptime_sec(), + })) + + for phase_idx in range(completed, NUM_PHASES): + if ctx.cancel.is_set(): + return await _wind_down(ctx, phase_idx, results) + + phase_started_mono = time.monotonic() + title = _phase_title(phase_idx) + + await ctx.stream(json.dumps({ + "type": "phase_start", + "phase": phase_idx + 1, + "total": NUM_PHASES, + "title": title, + "server_time_utc": _now_iso(), + "server_uptime_sec": _server_uptime_sec(), + })) + + phase_text = await _run_phase( + ctx, phase_idx, topic, title, prior_results=results[-3:], + ) + results.append({"phase": phase_idx + 1, "title": title, "text": phase_text}) + + # --- PHASE-COMPLETE CHECKPOINT --- + # Clear in-progress subcall state once the phase is done. + ctx.metadata["completed_phases"] = phase_idx + 1 + ctx.metadata["results"] = results + ctx.metadata["in_progress_phase"] = None + ctx.metadata["completed_subcalls"] = 0 + ctx.metadata["current_text"] = "" + await ctx.metadata.flush() + + phase_duration = round(time.monotonic() - phase_started_mono, 1) + await ctx.stream(json.dumps({ + "type": "phase_end", + "phase": phase_idx + 1, + "total": NUM_PHASES, + "title": title, + "server_time_utc": _now_iso(), + "server_uptime_sec": _server_uptime_sec(), + "duration_sec": phase_duration, + })) + + if ctx.cancel.is_set(): + return await _wind_down(ctx, phase_idx + 1, results) + + if phase_idx + 1 < NUM_PHASES and INTER_PHASE_COOLDOWN_SEC > 0: + await _cooldown( + ctx, INTER_PHASE_COOLDOWN_SEC, + stage="inter_phase", + phase=phase_idx + 2, + total=NUM_PHASES, + ) + if ctx.cancel.is_set(): + return await _wind_down(ctx, phase_idx + 1, results) + + await ctx.stream(json.dumps({ + "type": "run_complete", + "server_time_utc": _now_iso(), + "server_uptime_sec": _server_uptime_sec(), + "phases_completed": NUM_PHASES, + })) + return { + "topic": topic, + "phases_completed": NUM_PHASES, + "report": results[-1]["text"] if results else "", + } + + +# --- Helpers --------------------------------------------------------------- + +async def _emit_run_start( + ctx: TaskContext, *, topic: str, prior_topic: str | None, +) -> None: + await ctx.stream(json.dumps({ + "type": "run_start", + "topic": topic, + "prior_topic": prior_topic, + "entry_mode": ctx.entry_mode, + "total_phases": NUM_PHASES, + "calls_per_phase": CALLS_PER_PHASE, + "server_time_utc": _now_iso(), + "server_uptime_sec": _server_uptime_sec(), + })) + + +async def _wind_down( + ctx: TaskContext, completed_phases: int, results: list, +) -> Any: + """Cooperative wind-down at a phase boundary.""" + # Cause-detection: steering events drain pending_input_count by the + # time we reach here, so detect by exclusion. If neither timeout nor + # operator cancel fired, it's steering. + if ctx.timeout_exceeded: + cause = "timeout" + elif ctx.cancel_requested: + cause = "operator_cancel" + else: + cause = "steering" + + await ctx.stream(json.dumps({ + "type": "winding_down", + "cause": cause, + "completed_phases": completed_phases, + "total_phases": NUM_PHASES, + "pending_steering_inputs": ctx.pending_input_count, + "server_time_utc": _now_iso(), + "server_uptime_sec": _server_uptime_sec(), + })) + + return await ctx.suspend(output={ + "topic": ctx.input["topic"], + "phases_completed": completed_phases, + "wind_down_cause": cause, + }) + + +async def _cooldown( + ctx: TaskContext, + duration_sec: float, + *, + stage: str, + phase: int, + total: int, + subcall: int | None = None, + of: int | None = None, +) -> None: + """Cooldown wait with a visible client-side marker. + + Emits a single ``cooldown`` SSE event before sleeping so the terminal + is not silent during the pause, and the client can render a low-key + progress indicator. The wait is cancel-aware: if ``ctx.cancel`` fires + we return early. + """ + payload: dict[str, Any] = { + "type": "cooldown", + "duration_sec": duration_sec, + "stage": stage, + "phase": phase, + "total": total, + "server_time_utc": _now_iso(), + "server_uptime_sec": _server_uptime_sec(), + } + if subcall is not None: + payload["subcall"] = subcall + if of is not None: + payload["of"] = of + await ctx.stream(json.dumps(payload)) + try: + await asyncio.wait_for(ctx.cancel.wait(), timeout=duration_sec) + except asyncio.TimeoutError: + pass + + +async def _run_phase( + ctx: TaskContext, + phase_idx: int, + topic: str, + phase_title: str, + *, + prior_results: list, +) -> str: + """Run the sub-call loop for one phase. Returns the final synthesized text. + + Checkpoints after each completed subcall so a crash mid-phase + recovers at the next un-finished subcall (loses at most the one + that was actively streaming). + """ + prior_summary = "" + if prior_results: + prior_summary = "\n\nPrior phases (for context):\n" + "\n".join( + f"- {r['title']}: {r['text'][:200]}..." for r in prior_results + ) + + # Resume in-phase state if we crashed mid-phase. The outer loop + # already advanced phase_idx to the right phase via + # completed_phases; here we figure out how many subcalls of *this* + # phase already finished. + in_progress = ctx.metadata.get("in_progress_phase") + if in_progress == phase_idx: + start_sub = int(ctx.metadata.get("completed_subcalls", 0) or 0) + current_text: str = ctx.metadata.get("current_text", "") or "" + else: + start_sub = 0 + current_text = "" + ctx.metadata["in_progress_phase"] = phase_idx + ctx.metadata["completed_subcalls"] = 0 + ctx.metadata["current_text"] = "" + await ctx.metadata.flush() + + for sub_idx in range(start_sub, CALLS_PER_PHASE): + role_name, role_prompt = _SUB_CALL_ROLES[sub_idx] + instructions = ( + f"You are a research analyst working on the topic: '{topic}'.\n" + f"Current phase: '{phase_title}'.\n" + f"Your role in this sub-step: {role_name}.\n\n" + f"{role_prompt}" + ) + if current_text: + user_input = ( + f"Topic: {topic}\nPhase: {phase_title}\n\n" + f"Previous sub-step output:\n{current_text}{prior_summary}" + ) + else: + user_input = f"Topic: {topic}\nPhase: {phase_title}{prior_summary}" + + await ctx.stream(json.dumps({ + "type": "subcall_start", + "role": role_name, + "index": sub_idx + 1, + "of": CALLS_PER_PHASE, + "server_time_utc": _now_iso(), + })) + + sub_text = await _stream_llm( + ctx, instructions=instructions, user_input=user_input, + ) + + await ctx.stream(json.dumps({ + "type": "subcall_end", + "role": role_name, + "index": sub_idx + 1, + "of": CALLS_PER_PHASE, + "server_time_utc": _now_iso(), + })) + + current_text = sub_text + + # --- SUBCALL-LEVEL CHECKPOINT --- + # Persist what we just produced so a mid-phase crash recovers + # at the next subcall, not at subcall 1. + ctx.metadata["completed_subcalls"] = sub_idx + 1 + ctx.metadata["current_text"] = current_text + await ctx.metadata.flush() + + # Intra-phase cooldown — emits cooldown event + cancel-aware wait. + if sub_idx + 1 < CALLS_PER_PHASE and INTRA_PHASE_COOLDOWN_SEC > 0: + await _cooldown( + ctx, INTRA_PHASE_COOLDOWN_SEC, + stage="intra_phase", + phase=phase_idx + 1, + total=NUM_PHASES, + subcall=sub_idx + 2, + of=CALLS_PER_PHASE, + ) + if ctx.cancel.is_set(): + # Cancel observed within a phase — finish the phase + # quickly by skipping remaining sub-calls; wind-down + # happens at the next checkpoint boundary in the outer loop. + break + + return current_text + + +async def _stream_llm( + ctx: TaskContext, *, instructions: str, user_input: str, +) -> str: + """One streaming LLM call. Forwards token deltas via ctx.stream().""" + full_text = "" + async for event in await _openai_client.responses.create( + model=_model, + instructions=instructions, + input=user_input, + store=False, + stream=True, + max_output_tokens=TARGET_OUTPUT_TOKENS, + ): + if event.type == "response.output_text.delta": + full_text += event.delta + await ctx.stream(json.dumps({"type": "token", "content": event.delta})) + return full_text diff --git a/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/src/durable-research-agent/agent.yaml b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/src/durable-research-agent/agent.yaml new file mode 100644 index 000000000000..5cc700c42fe4 --- /dev/null +++ b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/src/durable-research-agent/agent.yaml @@ -0,0 +1,35 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/microsoft/AgentSchema/refs/heads/main/schemas/v1.0/ContainerAgent.yaml + +kind: hosted +name: durable-research-agent +description: | + Demo agent showcasing crash-resilient long-running tasks using @task. + Survives crashes and auto-resumes from last checkpoint on restart. +metadata: + tags: + - AI Agent Hosting + - Invocations Protocol + - Durable Tasks + - Crash Resilience + - Python +protocols: + - protocol: invocations + version: 1.0.0 +resources: + cpu: "1" + memory: 2Gi +environment_variables: + - name: AZURE_AI_MODEL_DEPLOYMENT_NAME + value: gpt-4.1-mini + - name: STAGE_DURATION + value: "10" + # Long-running demo: per-phase ≈ 12s LLM + 3×30s intra + 30s inter ≈ 132s, + # × 15 phases ≈ 33 min total — runs ~2x past the platform's 15-min + # sandbox-eviction window so each demo run exercises the durable-task + # primitive's lease keep-alive path end-to-end (the behavior this + # sample exists to showcase). Local agent.py defaults (10/20s, ~15 min) + # apply when running outside the hosted container for fast iteration. + - name: INTRA_PHASE_COOLDOWN_SEC + value: "30" + - name: INTER_PHASE_COOLDOWN_SEC + value: "30" diff --git a/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/src/durable-research-agent/app.py b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/src/durable-research-agent/app.py new file mode 100644 index 000000000000..a47b4096ede9 --- /dev/null +++ b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/src/durable-research-agent/app.py @@ -0,0 +1,276 @@ +# Copyright (c) Microsoft. All rights reserved. + +"""HTTP host for the durable research agent. + +This file is minimal plumbing. The durability + steering logic is in ``agent.py``. + +Routes (all of them are platform-managed — only ``/invocations*`` is reachable +through the Foundry endpoint proxy): + * ``POST /invocations`` — fire-and-forget dispatch (or + steering input on an in-progress run); + special: ``{"message": "crash"}`` + when ``DEMO_MODE=1`` forces a process + exit so the platform nanny restarts us + * ``GET /invocations/{id}?last_event_id=N`` — SSE stream of the active run + * ``POST /invocations/{id}/cancel`` — operator cancel +""" + +from __future__ import annotations + +import asyncio +import json +import logging +import os +from pathlib import Path + +from starlette.requests import Request +from starlette.responses import JSONResponse, Response, StreamingResponse + +from azure.ai.agentserver.core.durable import TaskCancelled, TaskConflictError, TaskFailed +from azure.ai.agentserver.invocations import InvocationAgentServerHost + +from agent import deep_research + +logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s") +logger = logging.getLogger(__name__) + +app = InvocationAgentServerHost() + + +# --- Invocation handlers --------------------------------------------------- + +@app.invoke_handler +async def handle_invoke(request: Request) -> Response: + """Dispatch a research task (fire-and-forget). + + Input shape: ``{"message": ""}``. + + Two special behaviors driven by the request body: + + * ``{"message": "crash"}`` (when the container has ``DEMO_MODE=1``) forces + ``os._exit(137)`` shortly after returning ``202``. The platform's nanny + worker brings the container back within ~1 min on its own — no new + client ingress required — and the durable task auto-resumes from its + last checkpoint. This is gated by ``DEMO_MODE`` so a stray request + can't accidentally kill a production agent. + + * Any other ``{"message": ""}`` dispatches a normal research run. + If a steerable run is already in progress on this session, the input is + queued as a steering input — the agent winds down the current turn at + the next checkpoint and re-enters with the new topic. + """ + body = await request.body() + try: + data = json.loads(body) if body else {} + except json.JSONDecodeError: + data = {} + topic = str(data.get("message") or "").strip() + if not topic: + return JSONResponse({"error": "Provide a 'message' field"}, status_code=400) + + # Demo-only crash trigger. + if topic.lower() in ("crash", "kill", "💥") and os.environ.get("DEMO_MODE") == "1": + logger.critical("CRASH triggered via /invocations message=%r — exiting in 300ms", topic) + + async def _crash() -> None: + await asyncio.sleep(0.3) + os._exit(137) + + asyncio.get_event_loop().create_task(_crash()) + return JSONResponse( + { + "status": "crashing", + "message": ( + "Process will exit. The platform's nanny worker brings the " + "container back within ~1 min on its own (no new ingress " + "required) and the durable task auto-resumes from its last " + "checkpoint." + ), + }, + status_code=202, + ) + + invocation_id: str = request.state.invocation_id + session_id: str = request.state.session_id + # ONE durable task per session so steering finds the active run. + # invocation_id labels the call; session_id labels the long-lived task. + task_id = session_id + logger.info("POST handler: session_id=%r task_id=%r topic=%r", session_id, task_id, topic) + + status = "started" + try: + await deep_research.start( + task_id=task_id, + input={"topic": topic, "invocation_id": invocation_id}, + ) + except TaskConflictError as exc: + # Steerable task already running. The framework queued our input and + # signalled cancel; the agent will wind down at the next checkpoint + # and re-enter with our input. + status = "steered" + logger.info("POST handler: queued steering input (current_status=%s)", + getattr(exc, "current_status", None)) + + return JSONResponse( + { + "status": status, + "invocation_id": invocation_id, + "session_id": session_id, + }, + status_code=202, + ) + + +@app.get_invocation_handler +async def handle_get(request: Request) -> Response: + """Stream SSE from the active task, or replay from disk if finished. + + The platform routes ``GET /invocations/{id}`` to this container based on + the invocation-to-session mapping set up by the original POST. Clients + can pass ``?last_event_id=N`` to skip events they've already seen on a + reconnect. + + If the durable task is still active we stream live events from the + in-memory run. If the task has already finished (or this container + doesn't currently hold the run) we replay from the persisted + ``stream.jsonl`` file — so a reconnect after completion still shows the + full transcript. + """ + invocation_id = request.state.invocation_id + session_id = ( + getattr(request.state, "session_id", None) or app.config.session_id + ) + task_id = session_id # one task per session — match POST handler + + last_event_id = request.query_params.get("last_event_id", "") + skip_count = int(last_event_id) if last_event_id.isdigit() else 0 + logger.info("GET handler: invocation_id=%r task_id=%r skip=%d", + invocation_id, task_id, skip_count) + + run = await deep_research.get_active_run(task_id) + + if run is not None: + async def live_stream(): + # event_id is now derived durably from FileStreamHandler's disk + # line counter — items arrive as (event_id, chunk) tuples. We + # advertise that id to the client so ?last_event_id=N resume + # is meaningful across reconnects, recovery, and partial queue + # drains. If the requested id is below the queue's current + # head we just emit what's available (gracefully accept a + # small delta gap rather than erroring). + last_id = skip_count + try: + async for item in run: + # FileStreamHandler always emits tuples; defensive + # unpack handles a non-tuple chunk if some other + # handler is ever swapped in. + if isinstance(item, tuple) and len(item) == 2: + event_id, chunk = item + else: + last_id += 1 + event_id, chunk = last_id, item + if event_id <= skip_count: + continue + last_id = event_id + yield f"id: {event_id}\ndata: {chunk}\n\n" + result = await run.result() + last_id += 1 + yield ( + f"id: {last_id}\ndata: " + + json.dumps({ + "type": "done", + "phases_completed": result.output.get("phases_completed", 0), + }) + + "\n\n" + ) + except TaskCancelled: + last_id += 1 + yield ( + f"id: {last_id}\ndata: " + + json.dumps({"type": "done", "reason": "cancelled"}) + + "\n\n" + ) + except TaskFailed as exc: + last_id += 1 + yield ( + f"id: {last_id}\ndata: " + + json.dumps({"type": "done", "reason": "failed", "error": str(exc)}) + + "\n\n" + ) + + return StreamingResponse( + live_stream(), + media_type="text/event-stream", + headers={"Cache-Control": "no-cache"}, + ) + + # No live run — replay from the persisted stream file. + stream_file = ( + Path.home() / ".durable-tasks" / "_streams" / task_id / "stream.jsonl" + ) + if not stream_file.exists(): + return JSONResponse( + {"status": "not_found", + "message": "No active or finished task for this session."}, + status_code=404, + ) + + async def file_replay(): + event_id = 0 + for line in stream_file.read_text(encoding="utf-8").splitlines(): + line = line.strip() + if not line: + continue + # FileStreamHandler.put writes `json.dumps(item) + "\n"` where item + # is a JSON string from `ctx.stream(json.dumps({...}))`, so each line + # on disk is the original JSON dict serialised twice. Decode once + # here — the result is the original JSON string (or the {__done__} + # sentinel dict). Emit raw to avoid re-double-encoding for SSE. + data = json.loads(line) + if isinstance(data, dict) and "__done__" in data: + event_id += 1 + yield ( + f"id: {event_id}\ndata: " + + json.dumps({"type": "done", "reason": "replayed"}) + + "\n\n" + ) + return + event_id += 1 + if event_id <= skip_count: + continue + yield f"id: {event_id}\ndata: {data}\n\n" + # File present but no __done__ sentinel — task may still be recovering. + event_id += 1 + yield ( + f"id: {event_id}\ndata: " + + json.dumps({"type": "done", "reason": "replay_incomplete"}) + + "\n\n" + ) + + return StreamingResponse( + file_replay(), + media_type="text/event-stream", + headers={"Cache-Control": "no-cache"}, + ) + + +@app.cancel_invocation_handler +async def handle_cancel(request: Request) -> Response: + """Cancel the running research task.""" + invocation_id = request.state.invocation_id + session_id = ( + getattr(request.state, "session_id", None) or app.config.session_id + ) + task_id = session_id # one task per session — match POST handler + logger.info("CANCEL handler: invocation_id=%r task_id=%r", invocation_id, task_id) + + run = await deep_research.get_active_run(task_id) + if run is None: + return JSONResponse({"status": "not_found", "message": "No active task to cancel."}) + + await run.cancel() + return JSONResponse({"status": "cancelled", "message": "Task cancellation requested."}) + + +if __name__ == "__main__": + app.run() diff --git a/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/src/durable-research-agent/requirements.txt b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/src/durable-research-agent/requirements.txt new file mode 100644 index 000000000000..95cc4a5a84a7 --- /dev/null +++ b/sdk/agentserver/azure-ai-agentserver-invocations/samples/durable-agent-demo/src/durable-research-agent/requirements.txt @@ -0,0 +1,7 @@ +# Azure AI packages (installed from local wheels during build) +azure-ai-agentserver-core +azure-ai-agentserver-invocations + +# Azure SDKs +azure-ai-projects>=1.0.0b10 +azure-identity>=1.17.0