From ef5e503b3f7d61c522301e86640dd86a8ab10628 Mon Sep 17 00:00:00 2001 From: bgagent Date: Tue, 30 Jun 2026 14:44:38 -0500 Subject: [PATCH 1/8] docs(design): Bedrock cost attribution design (#215) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Design for per-user/per-repo Bedrock spend attribution. Key finding: Bedrock is invoked by the Claude Code CLI subprocess, not the agent's boto3, so both tracks (IAM session tags + request metadata) are wired via Claude Code config (awsCredentialExport, ANTHROPIC_CUSTOM_HEADERS) and a new BedrockInvokeRole — not by extending aws_session.py. Refs #215 --- docs/design/BEDROCK_COST_ATTRIBUTION.md | 114 +++++++++++++++++ .../architecture/Bedrock-cost-attribution.md | 118 ++++++++++++++++++ 2 files changed, 232 insertions(+) create mode 100644 docs/design/BEDROCK_COST_ATTRIBUTION.md create mode 100644 docs/src/content/docs/architecture/Bedrock-cost-attribution.md diff --git a/docs/design/BEDROCK_COST_ATTRIBUTION.md b/docs/design/BEDROCK_COST_ATTRIBUTION.md new file mode 100644 index 00000000..929c6d60 --- /dev/null +++ b/docs/design/BEDROCK_COST_ATTRIBUTION.md @@ -0,0 +1,114 @@ +# Bedrock cost attribution + +Design for [#215](https://github.com/aws-samples/sample-autonomous-cloud-coding-agents/issues/215). Adds AWS-native, per-user/per-repo attribution of **Bedrock model-inference spend** on top of the in-app `cost_usd` meter and the #211 per-session tenant-data isolation. + +## TL;DR + +Bedrock is invoked by the **Claude Code CLI subprocess** (`CLAUDE_CODE_USE_BEDROCK=1`), not by the agent's boto3. So neither track can be built by extending `agent/src/aws_session.py` (which scopes DynamoDB/S3 tenant data only). Both levers live in **Claude Code's own configuration**, set by the agent before it spawns the subprocess: + +| Track | Mechanism | Surfaces in | AC | +|---|---|---|---| +| 1. IAM session-tag attribution | Claude Code `awsCredentialExport` → helper does `sts:AssumeRole --tags {user_id,repo,task_id}` against a new **BedrockInvokeRole** | CUR 2.0 / Cost Explorer (`iamPrincipal/` prefix), aggregated per usage-type/day | #1, #2 | +| 2. Per-request metadata | `ANTHROPIC_CUSTOM_HEADERS: X-Amzn-Bedrock-Request-Metadata: {...}` env var | Model-invocation logs (`requestMetadata` field), per call | #3 | +| 3. Operator docs | `COST_ATTRIBUTION.md` + cross-links | — | #5 | + +The two tracks are **complementary** (per AWS docs): session tags give aggregated chargeback in billing; request metadata gives per-call forensics in logs. Session tags are *not* written to invocation logs, and request metadata is *not* a cost-allocation tag — you need both. + +## Why the issue's original approach doesn't apply + +The issue proposed extending `aws_session.py` / the `DeferredRefreshableCredentials` pattern to route `InvokeModel` through tagged creds. That pattern governs the agent's **boto3** clients for tenant data. But: + +``` +agent/src/runner.py::_setup_agent_env + → os.environ["CLAUDE_CODE_USE_BEDROCK"] = "1" + → ClaudeSDKClient spawns the `claude` CLI subprocess + → subprocess calls bedrock-runtime InvokeModel using the AWS SDK default + credential chain (today: the ambient compute role) +``` + +The agent never makes the `InvokeModel` call, so it cannot attach creds or headers to it directly. The control point is **how Claude Code resolves credentials and headers**, configured via Claude Code settings/env before `client.connect()`. + +Verified Claude Code behavior (code.claude.com/docs/en/amazon-bedrock, /env-vars): + +- **Credentials:** default AWS SDK chain. Mutating the parent process's `AWS_*` env vars mid-session is **not** re-read. For refresh, Claude Code supports `awsCredentialExport` — a helper command run at session start and re-run ~5 min before the `Expiration` it returns. This is exactly what an 8 h task needs to survive the **1 h role-chaining cap**. +- **Request metadata:** Claude Code uses the **Invoke API and does not support Converse**, so the Converse `requestMetadata` field is unreachable. The only lever is `ANTHROPIC_CUSTOM_HEADERS` (static per process). Because ABCA runs **one task per container per Claude Code session**, "static per process" == "per task" — sufficient for `{task_id, user_id, repo}` attribution. No proxy/gateway needed. + +## Track 1 — IAM session-tag attribution + +### New construct: `BedrockInvokeRole` + +A dedicated role the agent assumes *only* to mint tagged credentials for Claude Code's Bedrock calls. Kept separate from `AgentSessionRole` (tenant data) so the trust/grant surfaces stay independent and auditable. + +- **Trust:** same compute roles as `AgentSessionRole` (AgentCore ExecutionRole, ECS task role), with `sts:AssumeRole` + `sts:TagSession`. +- **Grants:** `bedrock:InvokeModel` + `bedrock:InvokeModelWithResponseStream` on the **exact** foundation-model + cross-region inference-profile ARNs already enumerated in `agent.ts` / `ecs-agent-cluster.ts` (Sonnet 4.6, Opus 4, Haiku 4.5). No wildcards — reuses the existing ARN allowlist. +- **`maxSessionDuration`: 1 h** (documents the role-chaining cap; refresh handles longevity). +- Exposes `admitComputeRole()` mirroring `AgentSessionRole`, so ECS wiring is symmetric. + +Once this exists, **the compute role drops `bedrock:InvokeModel`** — model invocation moves entirely onto the tagged BedrockInvokeRole. (The #211 comment "Bedrock intentionally stays on the compute role to avoid 1 h expiry" is resolved by `awsCredentialExport`'s refresh.) + +### Credential helper + Claude Code wiring + +A small helper script (shipped in the agent image) that `awsCredentialExport` invokes: + +``` +assume-role --role-arn $BEDROCK_INVOKE_ROLE_ARN \ + --tags user_id=$USER_ID repo=$REPO task_id=$TASK_ID +→ emits {"Credentials":{AccessKeyId,SecretAccessKey,SessionToken,Expiration}} +``` + +- Reuses the **same STS `assume_role` + tag-truncation logic** already in `aws_session.py` (factor the tag-building + 256-char clamp into a shared helper; don't duplicate). +- `Expiration` is the real STS expiry, so Claude Code re-runs the helper before the 1 h cap. +- `_setup_agent_env` writes Claude Code's `awsCredentialExport` setting (and `BEDROCK_INVOKE_ROLE_ARN` / tag values) **into a trusted, agent-controlled settings location** — *not* the cloned repo's `.claude/settings.json`. + +> **Security note (must not be skipped):** `awsCredentialExport` runs an arbitrary shell command. `setting_sources=["project"]` currently reads the **untrusted cloned target repo's** `.claude/settings.json`. We must inject `awsCredentialExport` via a location the target repo **cannot override** (user-level settings or an explicit `--settings` file the agent owns), and confirm Claude Code's precedence makes project settings unable to redefine it. A repo that could set `awsCredentialExport` would get arbitrary code execution with the compute role. This is the single highest-risk item in the design and gets a dedicated test. + +### Fail-open vs fail-closed + +Unlike #211 tenant isolation (fail **closed** — a scoping failure means cross-tenant exposure), Bedrock attribution is a **billing/observability** control. If the helper can't assume the role, the correct failure mode is to **fall back to the compute role and emit a warning**, not to abort the task — losing chargeback granularity is not a security incident. When `BEDROCK_INVOKE_ROLE_ARN` is unset (local/dev), behavior is identical to today. + +## Track 2 — per-request metadata + +In `_setup_agent_env`, set: + +```python +os.environ["ANTHROPIC_CUSTOM_HEADERS"] = ( + "X-Amzn-Bedrock-Request-Metadata: " + + json.dumps({"task_id": ..., "user_id": ..., "repo": ...}) # 256-char clamp, ≤16 keys +) +``` + +Gated on invocation logging being enabled (it already is — `agent.ts` configures the CloudWatch destination). Surfaces under the `requestMetadata` field in `/aws/bedrock/model-invocation-logs/`. + +> **Open risk to validate before merge:** Bedrock rejects `X-Amzn-Bedrock-Request-Metadata` with `InvalidSignatureException` if the header is omitted from the SigV4 `SignedHeaders`. AWS SDKs that expose metadata as a parameter sign it automatically; a custom header injected via `ANTHROPIC_CUSTOM_HEADERS` may **not** be in Claude Code's signed-headers list. **This must be tested against a live Bedrock endpoint.** If it fails, this track is a documented blocker (AC#3 explicitly allows "or documented blocker if Claude Code cannot pass metadata"), and per-call attribution falls back to correlating invocation-log `identity.arn` + `RoleSessionName` (`abca-`) — which Track 1's tagged session already provides. + +## Track 3 — operator documentation + +New `docs/guides/COST_ATTRIBUTION.md`: + +- The three meters (in-app `cost_usd`, CUR session-tag chargeback, invocation-log per-call) and when to use each. +- FinOps checklist: activate `iamPrincipal/{user_id,repo}` cost-allocation tags in Billing; create a CUR 2.0 export **with caller-identity ARN** (existing exports don't backfill); set budgets. +- Note: tags aren't retroactive and take ≤24 h to appear. + +Cross-link from `COST_MODEL.md#cost-attribution` and `DEPLOYMENT_GUIDE.md`. (Roadmap links from the issue are stale — removed in #505.) + +## Out of scope (unchanged from issue) + +Bedrock Projects/Workspaces (`bedrock-mantle`, not the Claude Code path); replacing in-app `cost_usd`; org-level CUR/Budgets setup (operator responsibility). Application inference profiles per repo → follow-up #489. + +## Acceptance-criteria mapping + +| AC | Met by | +|---|---| +| #1 Bedrock uses session-tagged creds (AgentCore + ECS); dev unchanged when unset | Track 1: BedrockInvokeRole + `awsCredentialExport`; fall back to compute role when `BEDROCK_INVOKE_ROLE_ARN` unset | +| #2 Session tags documented as billable; operator Billing steps | Track 3 | +| #3 Per-request metadata `{task_id,user_id,repo}` when logging enabled (or documented blocker) | Track 2 + SigV4 validation gate | +| #4 Tests: CDK Bedrock grant on role; cred routing; no #211 regression | New `bedrock-invoke-role.test.ts`; helper unit test; #211 tests untouched (orthogonal path) | +| #5 `COST_ATTRIBUTION.md` + accurate shipped/planned | Track 3 | +| #6 Starlight mirrors synced | `mise //docs:sync` | + +## Test plan + +- **CDK:** assert `BedrockInvokeRole` grants `InvokeModel`/`InvokeModelWithResponseStream` on the model+profile ARN allowlist (no wildcard); assert trust admits both compute roles with `TagSession`; assert compute role **no longer** has `bedrock:InvokeModel`. +- **Security test:** assert the agent injects `awsCredentialExport` in a location the cloned repo cannot override (the highest-risk item above). +- **Agent:** unit-test the credential helper (tag building reuses `aws_session` logic; 256-char clamp; JSON shape with `Expiration`); unit-test `ANTHROPIC_CUSTOM_HEADERS` assembly. +- **Live validation (pre-merge, manual):** confirm `X-Amzn-Bedrock-Request-Metadata` is honored (no `InvalidSignatureException`) and lands in invocation logs; confirm `iamPrincipal/user_id` appears in Cost Explorer after tag activation. diff --git a/docs/src/content/docs/architecture/Bedrock-cost-attribution.md b/docs/src/content/docs/architecture/Bedrock-cost-attribution.md new file mode 100644 index 00000000..cd04a593 --- /dev/null +++ b/docs/src/content/docs/architecture/Bedrock-cost-attribution.md @@ -0,0 +1,118 @@ +--- +title: Bedrock cost attribution +--- + +# Bedrock cost attribution + +Design for [#215](https://github.com/aws-samples/sample-autonomous-cloud-coding-agents/issues/215). Adds AWS-native, per-user/per-repo attribution of **Bedrock model-inference spend** on top of the in-app `cost_usd` meter and the #211 per-session tenant-data isolation. + +## TL;DR + +Bedrock is invoked by the **Claude Code CLI subprocess** (`CLAUDE_CODE_USE_BEDROCK=1`), not by the agent's boto3. So neither track can be built by extending `agent/src/aws_session.py` (which scopes DynamoDB/S3 tenant data only). Both levers live in **Claude Code's own configuration**, set by the agent before it spawns the subprocess: + +| Track | Mechanism | Surfaces in | AC | +|---|---|---|---| +| 1. IAM session-tag attribution | Claude Code `awsCredentialExport` → helper does `sts:AssumeRole --tags {user_id,repo,task_id}` against a new **BedrockInvokeRole** | CUR 2.0 / Cost Explorer (`iamPrincipal/` prefix), aggregated per usage-type/day | #1, #2 | +| 2. Per-request metadata | `ANTHROPIC_CUSTOM_HEADERS: X-Amzn-Bedrock-Request-Metadata: {...}` env var | Model-invocation logs (`requestMetadata` field), per call | #3 | +| 3. Operator docs | `COST_ATTRIBUTION.md` + cross-links | — | #5 | + +The two tracks are **complementary** (per AWS docs): session tags give aggregated chargeback in billing; request metadata gives per-call forensics in logs. Session tags are *not* written to invocation logs, and request metadata is *not* a cost-allocation tag — you need both. + +## Why the issue's original approach doesn't apply + +The issue proposed extending `aws_session.py` / the `DeferredRefreshableCredentials` pattern to route `InvokeModel` through tagged creds. That pattern governs the agent's **boto3** clients for tenant data. But: + +``` +agent/src/runner.py::_setup_agent_env + → os.environ["CLAUDE_CODE_USE_BEDROCK"] = "1" + → ClaudeSDKClient spawns the `claude` CLI subprocess + → subprocess calls bedrock-runtime InvokeModel using the AWS SDK default + credential chain (today: the ambient compute role) +``` + +The agent never makes the `InvokeModel` call, so it cannot attach creds or headers to it directly. The control point is **how Claude Code resolves credentials and headers**, configured via Claude Code settings/env before `client.connect()`. + +Verified Claude Code behavior (code.claude.com/docs/en/amazon-bedrock, /env-vars): + +- **Credentials:** default AWS SDK chain. Mutating the parent process's `AWS_*` env vars mid-session is **not** re-read. For refresh, Claude Code supports `awsCredentialExport` — a helper command run at session start and re-run ~5 min before the `Expiration` it returns. This is exactly what an 8 h task needs to survive the **1 h role-chaining cap**. +- **Request metadata:** Claude Code uses the **Invoke API and does not support Converse**, so the Converse `requestMetadata` field is unreachable. The only lever is `ANTHROPIC_CUSTOM_HEADERS` (static per process). Because ABCA runs **one task per container per Claude Code session**, "static per process" == "per task" — sufficient for `{task_id, user_id, repo}` attribution. No proxy/gateway needed. + +## Track 1 — IAM session-tag attribution + +### New construct: `BedrockInvokeRole` + +A dedicated role the agent assumes *only* to mint tagged credentials for Claude Code's Bedrock calls. Kept separate from `AgentSessionRole` (tenant data) so the trust/grant surfaces stay independent and auditable. + +- **Trust:** same compute roles as `AgentSessionRole` (AgentCore ExecutionRole, ECS task role), with `sts:AssumeRole` + `sts:TagSession`. +- **Grants:** `bedrock:InvokeModel` + `bedrock:InvokeModelWithResponseStream` on the **exact** foundation-model + cross-region inference-profile ARNs already enumerated in `agent.ts` / `ecs-agent-cluster.ts` (Sonnet 4.6, Opus 4, Haiku 4.5). No wildcards — reuses the existing ARN allowlist. +- **`maxSessionDuration`: 1 h** (documents the role-chaining cap; refresh handles longevity). +- Exposes `admitComputeRole()` mirroring `AgentSessionRole`, so ECS wiring is symmetric. + +Once this exists, **the compute role drops `bedrock:InvokeModel`** — model invocation moves entirely onto the tagged BedrockInvokeRole. (The #211 comment "Bedrock intentionally stays on the compute role to avoid 1 h expiry" is resolved by `awsCredentialExport`'s refresh.) + +### Credential helper + Claude Code wiring + +A small helper script (shipped in the agent image) that `awsCredentialExport` invokes: + +``` +assume-role --role-arn $BEDROCK_INVOKE_ROLE_ARN \ + --tags user_id=$USER_ID repo=$REPO task_id=$TASK_ID +→ emits {"Credentials":{AccessKeyId,SecretAccessKey,SessionToken,Expiration}} +``` + +- Reuses the **same STS `assume_role` + tag-truncation logic** already in `aws_session.py` (factor the tag-building + 256-char clamp into a shared helper; don't duplicate). +- `Expiration` is the real STS expiry, so Claude Code re-runs the helper before the 1 h cap. +- `_setup_agent_env` writes Claude Code's `awsCredentialExport` setting (and `BEDROCK_INVOKE_ROLE_ARN` / tag values) **into a trusted, agent-controlled settings location** — *not* the cloned repo's `.claude/settings.json`. + +> **Security note (must not be skipped):** `awsCredentialExport` runs an arbitrary shell command. `setting_sources=["project"]` currently reads the **untrusted cloned target repo's** `.claude/settings.json`. We must inject `awsCredentialExport` via a location the target repo **cannot override** (user-level settings or an explicit `--settings` file the agent owns), and confirm Claude Code's precedence makes project settings unable to redefine it. A repo that could set `awsCredentialExport` would get arbitrary code execution with the compute role. This is the single highest-risk item in the design and gets a dedicated test. + +### Fail-open vs fail-closed + +Unlike #211 tenant isolation (fail **closed** — a scoping failure means cross-tenant exposure), Bedrock attribution is a **billing/observability** control. If the helper can't assume the role, the correct failure mode is to **fall back to the compute role and emit a warning**, not to abort the task — losing chargeback granularity is not a security incident. When `BEDROCK_INVOKE_ROLE_ARN` is unset (local/dev), behavior is identical to today. + +## Track 2 — per-request metadata + +In `_setup_agent_env`, set: + +```python +os.environ["ANTHROPIC_CUSTOM_HEADERS"] = ( + "X-Amzn-Bedrock-Request-Metadata: " + + json.dumps({"task_id": ..., "user_id": ..., "repo": ...}) # 256-char clamp, ≤16 keys +) +``` + +Gated on invocation logging being enabled (it already is — `agent.ts` configures the CloudWatch destination). Surfaces under the `requestMetadata` field in `/aws/bedrock/model-invocation-logs/`. + +> **Open risk to validate before merge:** Bedrock rejects `X-Amzn-Bedrock-Request-Metadata` with `InvalidSignatureException` if the header is omitted from the SigV4 `SignedHeaders`. AWS SDKs that expose metadata as a parameter sign it automatically; a custom header injected via `ANTHROPIC_CUSTOM_HEADERS` may **not** be in Claude Code's signed-headers list. **This must be tested against a live Bedrock endpoint.** If it fails, this track is a documented blocker (AC#3 explicitly allows "or documented blocker if Claude Code cannot pass metadata"), and per-call attribution falls back to correlating invocation-log `identity.arn` + `RoleSessionName` (`abca-`) — which Track 1's tagged session already provides. + +## Track 3 — operator documentation + +New `docs/guides/COST_ATTRIBUTION.md`: + +- The three meters (in-app `cost_usd`, CUR session-tag chargeback, invocation-log per-call) and when to use each. +- FinOps checklist: activate `iamPrincipal/{user_id,repo}` cost-allocation tags in Billing; create a CUR 2.0 export **with caller-identity ARN** (existing exports don't backfill); set budgets. +- Note: tags aren't retroactive and take ≤24 h to appear. + +Cross-link from `COST_MODEL.md#cost-attribution` and `DEPLOYMENT_GUIDE.md`. (Roadmap links from the issue are stale — removed in #505.) + +## Out of scope (unchanged from issue) + +Bedrock Projects/Workspaces (`bedrock-mantle`, not the Claude Code path); replacing in-app `cost_usd`; org-level CUR/Budgets setup (operator responsibility). Application inference profiles per repo → follow-up #489. + +## Acceptance-criteria mapping + +| AC | Met by | +|---|---| +| #1 Bedrock uses session-tagged creds (AgentCore + ECS); dev unchanged when unset | Track 1: BedrockInvokeRole + `awsCredentialExport`; fall back to compute role when `BEDROCK_INVOKE_ROLE_ARN` unset | +| #2 Session tags documented as billable; operator Billing steps | Track 3 | +| #3 Per-request metadata `{task_id,user_id,repo}` when logging enabled (or documented blocker) | Track 2 + SigV4 validation gate | +| #4 Tests: CDK Bedrock grant on role; cred routing; no #211 regression | New `bedrock-invoke-role.test.ts`; helper unit test; #211 tests untouched (orthogonal path) | +| #5 `COST_ATTRIBUTION.md` + accurate shipped/planned | Track 3 | +| #6 Starlight mirrors synced | `mise //docs:sync` | + +## Test plan + +- **CDK:** assert `BedrockInvokeRole` grants `InvokeModel`/`InvokeModelWithResponseStream` on the model+profile ARN allowlist (no wildcard); assert trust admits both compute roles with `TagSession`; assert compute role **no longer** has `bedrock:InvokeModel`. +- **Security test:** assert the agent injects `awsCredentialExport` in a location the cloned repo cannot override (the highest-risk item above). +- **Agent:** unit-test the credential helper (tag building reuses `aws_session` logic; 256-char clamp; JSON shape with `Expiration`); unit-test `ANTHROPIC_CUSTOM_HEADERS` assembly. +- **Live validation (pre-merge, manual):** confirm `X-Amzn-Bedrock-Request-Metadata` is honored (no `InvalidSignatureException`) and lands in invocation logs; confirm `iamPrincipal/user_id` appears in Cost Explorer after tag activation. From 6f3e51d290d4c8d6dfb226f591878c640f68610c Mon Sep 17 00:00:00 2001 From: bgagent Date: Tue, 30 Jun 2026 15:31:32 -0500 Subject: [PATCH 2/8] feat(cost): Bedrock cost attribution via session tags + request metadata (#215) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Attribute Bedrock model-inference spend per user/repo. Bedrock is invoked by the Claude Code subprocess (CLAUDE_CODE_USE_BEDROCK=1), so attribution is wired through Claude Code's config, not the agent's boto3. Track 1 — IAM session-tag chargeback (CUR 2.0 / Cost Explorer): - Grant bedrock:InvokeModel* on the existing AgentSessionRole (reuse, not a new role) via grantInvoke, mirroring the compute-role grant exactly so cross-region profiles never AccessDenied. Compute role keeps its grant. - bedrock_creds_helper.py assumes the SessionRole with {user_id,repo,task_id} STS tags and emits creds JSON for Claude Code's awsCredentialExport, which refreshes before the 1h role-chaining cap. Fails OPEN to ambient creds (billing control, not isolation). awsCredentialExport lives in root-owned /etc/claude-code/managed-settings.json so the untrusted repo can't override it (RCE boundary). Track 2 — per-call forensics (model-invocation logs): - Set X-Amzn-Bedrock-Request-Metadata via ANTHROPIC_CUSTOM_HEADERS on the subprocess env (one container = one task, so static-per-process is per-task; process-env so the repo can't alter it). SigV4 signed-headers behavior to be validated live (AC#3 documented-blocker path). Track 3 — operator guide COST_ATTRIBUTION.md + cross-links, plus a prominent warning that in-app cost_usd is a client-side SDK estimate (authoritative source is AWS Cost Explorer / CUR 2.0), mirroring the Claude Agent SDK cost-tracking caveat. Align claude-agent-sdk 0.2.110 (bundles CLI 2.1.191) with the npm CLI pin. Tests: CDK Bedrock grant present/absent; helper assume + fail-open paths; runner file+header wiring. #211 tenant-isolation path untouched. Refs #215 --- agent/Dockerfile | 15 +- agent/managed-settings.json | 3 + agent/pyproject.toml | 2 +- agent/src/aws_session.py | 18 +++ agent/src/bedrock_creds_helper.py | 147 ++++++++++++++++++ agent/src/runner.py | 46 ++++++ agent/tests/test_bedrock_creds_helper.py | 120 ++++++++++++++ agent/tests/test_runner.py | 46 +++++- agent/uv.lock | 16 +- cdk/src/constructs/agent-session-role.ts | 46 +++++- cdk/src/stacks/agent.ts | 11 ++ .../constructs/agent-session-role.test.ts | 47 ++++++ docs/design/BEDROCK_COST_ATTRIBUTION.md | 72 +++++---- docs/design/COST_MODEL.md | 4 +- docs/guides/COST_ATTRIBUTION.md | 54 +++++++ docs/guides/DEPLOYMENT_GUIDE.md | 1 + docs/scripts/sync-starlight.mjs | 6 + .../architecture/Bedrock-cost-attribution.md | 72 +++++---- .../content/docs/architecture/Cost-model.md | 4 +- .../docs/getting-started/Cost-attribution.md | 58 +++++++ .../docs/getting-started/Deployment-guide.md | 1 + 21 files changed, 704 insertions(+), 85 deletions(-) create mode 100644 agent/managed-settings.json create mode 100644 agent/src/bedrock_creds_helper.py create mode 100644 agent/tests/test_bedrock_creds_helper.py create mode 100644 docs/guides/COST_ATTRIBUTION.md create mode 100644 docs/src/content/docs/getting-started/Cost-attribution.md diff --git a/agent/Dockerfile b/agent/Dockerfile index 271c1ddc..82c5f5e1 100644 --- a/agent/Dockerfile +++ b/agent/Dockerfile @@ -46,8 +46,14 @@ RUN curl -fsSL https://deb.nodesource.com/setup_24.x | bash - && \ # Install Claude Code CLI (the Python SDK requires this binary) # Then update known vulnerable transitive packages where fixed versions exist. +# Pinned 2.1.191 to match the CLI bundled by claude-agent-sdk 0.2.110 (see +# agent/pyproject.toml) — the SDK and the on-PATH CLI must agree on the control +# protocol. This version also has the awsCredentialExport behavior #215 needs: +# returned creds are cached until 5 min before the JSON's `Expiration`, so an +# 8 h task re-assumes the 1 h-capped SessionRole before expiry. Older builds +# only refreshed hourly on a timer, racing the role-chaining cap. RUN npm install -g npm@latest && \ - npm install -g @anthropic-ai/claude-code@2.1.142 && \ + npm install -g @anthropic-ai/claude-code@2.1.191 && \ CLAUDE_NPM_ROOT="$(npm root -g)/@anthropic-ai/claude-code" && \ npm --prefix "${CLAUDE_NPM_ROOT}" update tar minimatch glob cross-spawn picomatch @@ -81,6 +87,13 @@ COPY contracts/ /app/contracts/ # ``WorkflowValidationError: workflow '...' not found at /app/workflows/...``. COPY agent/workflows/ /app/workflows/ COPY agent/prepare-commit-msg.sh /app/ +# Claude Code managed settings (#215). The highest-precedence settings layer — +# loaded regardless of setting_sources and unoverridable by the untrusted cloned +# repo's project .claude/settings.json. Carries awsCredentialExport so Bedrock +# calls use session-tagged, refreshable credentials for cost attribution. +# Placing awsCredentialExport (an arbitrary command) anywhere the target repo +# can influence would be RCE with the compute role, so it lives ONLY here. +COPY agent/managed-settings.json /etc/claude-code/managed-settings.json # Create non-root user (Claude Code CLI refuses bypassPermissions as root) RUN useradd -m -s /bin/bash agent && \ diff --git a/agent/managed-settings.json b/agent/managed-settings.json new file mode 100644 index 00000000..86dcaa69 --- /dev/null +++ b/agent/managed-settings.json @@ -0,0 +1,3 @@ +{ + "awsCredentialExport": "/app/.venv/bin/python /app/src/bedrock_creds_helper.py" +} diff --git a/agent/pyproject.toml b/agent/pyproject.toml index fc3de8d0..d21f340c 100644 --- a/agent/pyproject.toml +++ b/agent/pyproject.toml @@ -16,7 +16,7 @@ dependencies = [ # would degrade gracefully — but for now we keep the dep to # preserve the clean code path. "bedrock-agentcore==1.9.1", #https://pypi.org/project/bedrock-agentcore/ - "claude-agent-sdk==0.2.82", #https://github.com/anthropics/claude-agent-sdk-python/releases/tag/v0.2.82 + "claude-agent-sdk==0.2.110", #https://github.com/anthropics/claude-agent-sdk-python/releases/tag/v0.2.110 (bundles claude CLI 2.1.191; kept in lockstep with the npm CLI pin in the Dockerfile, #215) "requests==2.34.2", #https://pypi.org/project/requests/ "fastapi==0.136.1", #https://pypi.org/project/fastapi/ "uvicorn==0.47.0", #https://pypi.org/project/uvicorn/ diff --git a/agent/src/aws_session.py b/agent/src/aws_session.py index 2c6a906c..51c02249 100644 --- a/agent/src/aws_session.py +++ b/agent/src/aws_session.py @@ -78,6 +78,19 @@ class SessionScopingError(RuntimeError): """ +def build_session_tags(user_id: str, repo: str, task_id: str) -> list[dict[str, str]]: + """Build the AssumeRole ``Tags`` list from tenant identifiers. + + Only non-empty values are included. Values are truncated to the IAM limit + so an over-long repo slug can never make ``AssumeRole`` fail. Shared by the + in-process tenant-data session (:func:`_session_tags`) and the out-of-process + Bedrock credential helper (``bedrock_creds_helper.py``) so both mint the + same ``{user_id, repo, task_id}`` tags from one definition. + """ + pairs = (("user_id", user_id), ("repo", repo), ("task_id", task_id)) + return [{"Key": key, "Value": value[:_MAX_TAG_VALUE_LEN]} for key, value in pairs if value] + + def configure_session(user_id: str, repo: str, task_id: str) -> None: """Record session-tag values in private module state for later use. @@ -115,6 +128,11 @@ def _session_tags() -> list[dict[str, str]]: return [{"Key": key, "Value": value[:_MAX_TAG_VALUE_LEN]} for key, value in _tags.items()] +# Public alias of the IAM tag-value length cap, for the Bedrock credential +# helper which builds tags from CLI args rather than module state. +MAX_TAG_VALUE_LEN = _MAX_TAG_VALUE_LEN + + def _build_scoped_session(role_arn: str) -> Any: """Build a boto3 Session backed by refreshable assumed-role credentials. diff --git a/agent/src/bedrock_creds_helper.py b/agent/src/bedrock_creds_helper.py new file mode 100644 index 00000000..035c947e --- /dev/null +++ b/agent/src/bedrock_creds_helper.py @@ -0,0 +1,147 @@ +#!/usr/bin/env python3 +"""Credential helper for Claude Code's Bedrock calls (#215, cost attribution). + +Claude Code (``CLAUDE_CODE_USE_BEDROCK=1``) makes every ``InvokeModel`` call — +not the agent's boto3 — so the per-task tenant-data SessionRole in +``aws_session.py`` cannot tag those calls. Instead Claude Code's +``awsCredentialExport`` setting (in the image's managed-settings layer) runs +this script, captures its JSON stdout, and signs Bedrock requests with the +returned credentials. With a real ``Expiration`` it re-runs ~5 min before +expiry, so an 8 h task survives the 1 h role-chaining cap. + +Goal: assume the per-task SessionRole with ``{user_id, repo, task_id}`` STS +session tags so Bedrock spend is attributable per user/repo in AWS Cost +Explorer / CUR 2.0 (``iamPrincipal/*`` dimensions, after the operator activates +the cost-allocation tags). The same role already carries the tenant-data grants; +Track-1 only adds ``bedrock:InvokeModel*`` to it (see ``agent-session-role.ts``). + +**Fails OPEN.** Bedrock attribution is a billing/observability control, not a +tenant-isolation one (contrast ``aws_session.py``, which fails closed). If the +attribution config is absent or the assume-role fails, this helper emits the +**ambient** compute-role credentials so Bedrock keeps working untagged — losing +chargeback granularity is not a security incident, and the compute role retains +``InvokeModel`` precisely so this fallback works. + +The role ARN and tag values are read from a 0600 JSON file the agent writes at +startup (``write_attribution_file``), not from the environment — so the tenant +identifiers are not inherited by the untrusted repo subprocesses the agent +spawns, matching the discipline in ``aws_session.py``. + +Output shape (consumed by Claude Code's awsCredentialExport): + + {"Credentials": {"AccessKeyId": "...", "SecretAccessKey": "...", + "SessionToken": "...", "Expiration": ""}} +""" + +from __future__ import annotations + +import json +import os +import sys +from typing import Any + +# Fixed path the agent writes (0600) and this helper reads. A fixed path is +# required because the managed-settings ``awsCredentialExport`` command is +# static (baked into the image) and cannot carry per-task arguments. +ATTRIBUTION_FILE_ENV = "BEDROCK_ATTRIBUTION_FILE" +DEFAULT_ATTRIBUTION_FILE = "/home/agent/.bedrock-attribution.json" + +# Role chaining caps the assumed session at 1 hour; request the max the cap +# allows. Claude Code refreshes ~5 min before the returned Expiration. +_CHAINED_SESSION_DURATION_S = 3600 + + +def attribution_file_path() -> str: + return os.environ.get(ATTRIBUTION_FILE_ENV, "").strip() or DEFAULT_ATTRIBUTION_FILE + + +def write_attribution_file( + role_arn: str, tags: list[dict[str, str]], path: str | None = None +) -> str: + """Persist the SessionRole ARN + STS tags for the helper to read. + + Written 0600 and owned by the agent user. Returns the path written. Called + by the agent at startup (see ``runner._setup_agent_env``) only when a + SessionRole is configured; absence is the fail-open signal. + """ + target = path or attribution_file_path() + payload = json.dumps({"role_arn": role_arn, "tags": tags}) + # Create with 0600 from the start (os.open + O_CREAT honors mode, modulo + # umask) so the secret-adjacent file is never briefly world-readable. + fd = os.open(target, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600) + with os.fdopen(fd, "w") as fh: + fh.write(payload) + return target + + +def _emit(creds: dict[str, str]) -> None: + json.dump({"Credentials": creds}, sys.stdout) + + +def _frozen_to_creds(frozen: Any, expiry_iso: str | None) -> dict[str, str]: + out = { + "AccessKeyId": frozen.access_key, + "SecretAccessKey": frozen.secret_key, + "SessionToken": frozen.token or "", + } + if expiry_iso: + out["Expiration"] = expiry_iso + return out + + +def _ambient_credentials() -> dict[str, str]: + """Frozen ambient (compute-role) credentials — the fail-open fallback.""" + import botocore.session + + creds = botocore.session.get_session().get_credentials() + if creds is None: + # No resolvable credentials at all. Emit an empty object; Claude Code + # then falls back to its own default-chain resolution. + return {} + return _frozen_to_creds(creds.get_frozen_credentials(), None) + + +def resolve_credentials() -> dict[str, str]: + """Return tagged assumed-role creds, or ambient creds on any failure.""" + path = attribution_file_path() + try: + with open(path) as fh: + cfg = json.load(fh) + role_arn = cfg["role_arn"] + tags = cfg.get("tags", []) + except (OSError, ValueError, KeyError): + # Attribution not configured (local/dev) or unreadable → fail open. + return _ambient_credentials() + + try: + import boto3 + + region = os.environ.get("AWS_REGION") or os.environ.get("AWS_DEFAULT_REGION") + task_id = next((t["Value"] for t in tags if t.get("Key") == "task_id"), "") + session_name = f"abca-bedrock-{task_id}"[:64] or "abca-bedrock" + resp = boto3.client("sts", region_name=region).assume_role( + RoleArn=role_arn, + RoleSessionName=session_name, + DurationSeconds=_CHAINED_SESSION_DURATION_S, + Tags=tags, + ) + c = resp["Credentials"] + return { + "AccessKeyId": c["AccessKeyId"], + "SecretAccessKey": c["SecretAccessKey"], + "SessionToken": c["SessionToken"], + "Expiration": c["Expiration"].isoformat(), + } + except Exception: + # Assume failed (role not yet provisioned, transient STS error, …). + # Fail open so Bedrock keeps working on the compute role. + return _ambient_credentials() + + +def main() -> int: + _emit(resolve_credentials()) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/agent/src/runner.py b/agent/src/runner.py index 8e6cf3db..c63558bf 100644 --- a/agent/src/runner.py +++ b/agent/src/runner.py @@ -59,6 +59,50 @@ def _parse_token_usage(raw_usage: Any) -> TokenUsage: return TokenUsage(**values) +def _setup_bedrock_cost_attribution(config: TaskConfig) -> None: + """Wire Bedrock cost attribution for the Claude Code subprocess (#215). + + Claude Code makes the ``InvokeModel`` calls, so attribution is configured + through *its* credential + header channels, not the agent's boto3: + + 1. **Per-user/repo chargeback (CUR 2.0 / Cost Explorer).** Write the + SessionRole ARN + ``{user_id, repo, task_id}`` STS tags to a 0600 file + that ``bedrock_creds_helper.py`` reads. Claude Code's managed-settings + ``awsCredentialExport`` runs that helper and signs Bedrock requests with + the tagged assumed-role credentials. Skipped when ``AGENT_SESSION_ROLE_ARN`` + is unset (local/dev) — the helper then fails open to ambient creds. + + 2. **Per-call forensics (model-invocation logs).** Set + ``X-Amzn-Bedrock-Request-Metadata`` via ``ANTHROPIC_CUSTOM_HEADERS`` on the + process env. One container = one task = one Claude Code session, so a + static-per-process header is effectively per-task. Set via the process + env (not project settings) so the untrusted cloned repo cannot alter it. + """ + import json + + from aws_session import MAX_TAG_VALUE_LEN, build_session_tags + + role_arn = os.environ.get("AGENT_SESSION_ROLE_ARN", "").strip() + tags = build_session_tags(config.user_id, config.repo_url, config.task_id) + if role_arn and tags: + try: + from bedrock_creds_helper import write_attribution_file + + write_attribution_file(role_arn, tags) + except OSError as exc: + # Fail open: attribution is observability, not isolation. Bedrock + # still works on the compute role; we just lose tagged chargeback. + log("WARN", f"Bedrock attribution file not written ({exc}); spend will be untagged") + + # Per-request metadata mirrors the STS tag values. Bedrock limits keys/values + # to 256 chars and records them under ``requestMetadata`` in invocation logs. + metadata = {t["Key"]: t["Value"][:MAX_TAG_VALUE_LEN] for t in tags} + if metadata: + os.environ["ANTHROPIC_CUSTOM_HEADERS"] = ( + f"X-Amzn-Bedrock-Request-Metadata: {json.dumps(metadata, separators=(',', ':'))}" + ) + + def _setup_agent_env(config: TaskConfig) -> tuple[str | None, str | None]: """Configure process environment for the Claude Code CLI subprocess. @@ -72,6 +116,8 @@ def _setup_agent_env(config: TaskConfig) -> tuple[str | None, str | None]: os.environ["ANTHROPIC_MODEL"] = config.anthropic_model os.environ["GITHUB_TOKEN"] = config.github_token os.environ["GH_TOKEN"] = config.github_token + + _setup_bedrock_cost_attribution(config) # DO NOT set ANTHROPIC_LOG — any logging level causes the CLI to write to # stderr, which fills the OS pipe buffer (64 KB) and deadlocks the # single-threaded Node.js CLI process (blocked stderr write prevents stdout diff --git a/agent/tests/test_bedrock_creds_helper.py b/agent/tests/test_bedrock_creds_helper.py new file mode 100644 index 00000000..f79ce0d5 --- /dev/null +++ b/agent/tests/test_bedrock_creds_helper.py @@ -0,0 +1,120 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: MIT-0 + +"""Unit tests for the Bedrock credential helper (#215, cost attribution). + +The helper feeds Claude Code's ``awsCredentialExport``: it assumes the per-task +SessionRole with ``{user_id, repo, task_id}`` STS tags so Bedrock spend is +attributable, and **fails open** to ambient credentials when attribution is not +configured or the assume fails — losing chargeback granularity is not a security +incident, unlike the fail-closed tenant-data path in ``aws_session``. +""" + +from __future__ import annotations + +import datetime +import json +import os +import stat +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +import pytest + +import bedrock_creds_helper as helper +from aws_session import build_session_tags + + +@pytest.fixture +def attr_file(tmp_path, monkeypatch): + path = str(tmp_path / "attr.json") + monkeypatch.setenv(helper.ATTRIBUTION_FILE_ENV, path) + return path + + +def test_write_attribution_file_is_0600(attr_file): + tags = build_session_tags("u1", "owner/repo", "task123") + written = helper.write_attribution_file("arn:aws:iam::1:role/SR", tags, attr_file) + assert written == attr_file + mode = stat.S_IMODE(os.stat(attr_file).st_mode) + assert mode == 0o600, f"attribution file must be 0600, got {oct(mode)}" + with open(attr_file) as fh: + saved = json.load(fh) + assert saved["role_arn"] == "arn:aws:iam::1:role/SR" + assert {"Key": "task_id", "Value": "task123"} in saved["tags"] + + +def test_resolve_assumes_role_with_session_tags(attr_file): + tags = build_session_tags("u1", "owner/repo", "task123") + helper.write_attribution_file("arn:aws:iam::1:role/SR", tags, attr_file) + + expiry = datetime.datetime(2026, 1, 1, tzinfo=datetime.UTC) + sts = MagicMock() + sts.assume_role.return_value = { + "Credentials": { + "AccessKeyId": "AK", + "SecretAccessKey": "SK", + "SessionToken": "TK", + "Expiration": expiry, + } + } + with patch("boto3.client", return_value=sts): + creds = helper.resolve_credentials() + + # The assume carried exactly the tenant tags, and a tagged session name. + _, kwargs = sts.assume_role.call_args + assert kwargs["Tags"] == tags + assert kwargs["RoleArn"] == "arn:aws:iam::1:role/SR" + assert kwargs["RoleSessionName"].startswith("abca-bedrock-task123") + assert creds == { + "AccessKeyId": "AK", + "SecretAccessKey": "SK", + "SessionToken": "TK", + "Expiration": expiry.isoformat(), + } + + +def test_resolve_fails_open_when_no_attribution_file(attr_file): + # File never written → fall back to ambient creds, never raise. + frozen = SimpleNamespace(access_key="AMB", secret_key="S", token="T") + ambient = MagicMock() + ambient.get_credentials.return_value.get_frozen_credentials.return_value = frozen + with patch("botocore.session.get_session", return_value=ambient): + creds = helper.resolve_credentials() + assert creds["AccessKeyId"] == "AMB" + assert "Expiration" not in creds # ambient creds are returned unbounded + + +def test_resolve_fails_open_when_assume_role_raises(attr_file): + helper.write_attribution_file( + "arn:aws:iam::1:role/SR", build_session_tags("u", "r", "t"), attr_file + ) + frozen = SimpleNamespace(access_key="AMB", secret_key="S", token="T") + ambient = MagicMock() + ambient.get_credentials.return_value.get_frozen_credentials.return_value = frozen + + sts = MagicMock() + sts.assume_role.side_effect = RuntimeError("AccessDenied") + with ( + patch("boto3.client", return_value=sts), + patch("botocore.session.get_session", return_value=ambient), + ): + creds = helper.resolve_credentials() + assert creds["AccessKeyId"] == "AMB" + + +def test_resolve_emits_empty_when_no_credentials_at_all(attr_file): + ambient = MagicMock() + ambient.get_credentials.return_value = None + with patch("botocore.session.get_session", return_value=ambient): + creds = helper.resolve_credentials() + # Empty object → Claude Code falls back to its own default-chain resolution. + assert creds == {} + + +def test_main_emits_credentials_envelope(attr_file, capsys): + with patch.object(helper, "resolve_credentials", return_value={"AccessKeyId": "X"}): + rc = helper.main() + assert rc == 0 + out = json.loads(capsys.readouterr().out) + assert out == {"Credentials": {"AccessKeyId": "X"}} diff --git a/agent/tests/test_runner.py b/agent/tests/test_runner.py index 8c770d7a..50f0a70a 100644 --- a/agent/tests/test_runner.py +++ b/agent/tests/test_runner.py @@ -13,7 +13,12 @@ from unittest.mock import MagicMock, patch from models import TaskConfig -from runner import _FULL_TOOL_SURFACE, _initialize_policy_engine_and_hooks, _resolve_allowed_tools +from runner import ( + _FULL_TOOL_SURFACE, + _initialize_policy_engine_and_hooks, + _resolve_allowed_tools, + _setup_bedrock_cost_attribution, +) def _config(**overrides: Any) -> TaskConfig: @@ -295,3 +300,42 @@ def test_read_leaning_default_lane_keeps_its_restricted_list(self): assert _resolve_allowed_tools(config) == restricted assert "Bash" not in _resolve_allowed_tools(config) assert "Write" not in _resolve_allowed_tools(config) + + +class TestBedrockCostAttribution: + """#215: wire Claude Code's Bedrock attribution channels (creds + header).""" + + def test_writes_attribution_file_and_sets_metadata_header_when_role_set(self, monkeypatch): + monkeypatch.setenv("AGENT_SESSION_ROLE_ARN", "arn:aws:iam::1:role/SR") + monkeypatch.delenv("ANTHROPIC_CUSTOM_HEADERS", raising=False) + config = _config(user_id="alice", repo_url="owner/repo", task_id="t-9") + + with patch("bedrock_creds_helper.write_attribution_file") as mock_write: + _setup_bedrock_cost_attribution(config) + + role_arn, tags = mock_write.call_args.args + assert role_arn == "arn:aws:iam::1:role/SR" + assert {"Key": "user_id", "Value": "alice"} in tags + + header = __import__("os").environ["ANTHROPIC_CUSTOM_HEADERS"] + name, _, value = header.partition(": ") + assert name == "X-Amzn-Bedrock-Request-Metadata" + import json as _json + + assert _json.loads(value) == { + "user_id": "alice", + "repo": "owner/repo", + "task_id": "t-9", + } + + def test_no_attribution_file_when_role_unset_but_header_still_set(self, monkeypatch): + # Local/dev: no SessionRole → no tagged creds (helper fails open), but the + # invocation-log metadata header is still useful and harmless. + monkeypatch.delenv("AGENT_SESSION_ROLE_ARN", raising=False) + config = _config(user_id="bob", repo_url="o/r", task_id="t-1") + with patch("bedrock_creds_helper.write_attribution_file") as mock_write: + _setup_bedrock_cost_attribution(config) + mock_write.assert_not_called() + assert "X-Amzn-Bedrock-Request-Metadata" in __import__("os").environ.get( + "ANTHROPIC_CUSTOM_HEADERS", "" + ) diff --git a/agent/uv.lock b/agent/uv.lock index e65577c6..04010e22 100644 --- a/agent/uv.lock +++ b/agent/uv.lock @@ -166,7 +166,7 @@ requires-dist = [ { name = "bedrock-agentcore", specifier = "==1.9.1" }, { name = "boto3", specifier = "==1.43.9" }, { name = "cedarpy", specifier = "==4.8.4" }, - { name = "claude-agent-sdk", specifier = "==0.2.82" }, + { name = "claude-agent-sdk", specifier = "==0.2.110" }, { name = "fastapi", specifier = "==0.136.1" }, { name = "jsonschema", specifier = "==4.26.0" }, { name = "mcp", specifier = "==1.27.1" }, @@ -373,20 +373,20 @@ wheels = [ [[package]] name = "claude-agent-sdk" -version = "0.2.82" +version = "0.2.110" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, { name = "mcp" }, { name = "sniffio" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/57/3d/f75aaecf476c2b2a903dbba6042171b6683eb91c1f97f3ad894784cec270/claude_agent_sdk-0.2.82.tar.gz", hash = "sha256:3e907b7d2bf52a5917d96a3ce336b8aa5546ea31e29ce826a7f346622cf7f4bf", size = 252053, upload-time = "2026-05-15T03:45:34.251Z" } +sdist = { url = "https://files.pythonhosted.org/packages/bb/98/8fdab35ed9e1a36bc7afab4d390cc5002094a4950996c079da9aa4541cc4/claude_agent_sdk-0.2.110.tar.gz", hash = "sha256:538b548bac07a22f65686abab063a902ac76ba35989d0f073c942f96248e9fa3", size = 255632, upload-time = "2026-06-24T22:11:52.342Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/19/bc/27cf3aec2a24f2ed1f60277de795496b808a761d2a7a3fd34602a2fec37d/claude_agent_sdk-0.2.82-py3-none-macosx_11_0_arm64.whl", hash = "sha256:24ad8ccbcee9afe206ae5d621a9e40a5022ca3eb8c2c672b36916d3e70746e42", size = 61473506, upload-time = "2026-05-15T03:45:38.745Z" }, - { url = "https://files.pythonhosted.org/packages/96/91/95a83f018dbc8c113233eb542bccf17c1a3f5f689448700daf950602bf5e/claude_agent_sdk-0.2.82-py3-none-macosx_11_0_x86_64.whl", hash = "sha256:13e54d5163d9d4f899c4e2a3f14df597f4e050d5afa104618ccf7bb37b372ad1", size = 63541975, upload-time = "2026-05-15T03:45:46.005Z" }, - { url = "https://files.pythonhosted.org/packages/be/07/9356fe0e30f988bade6b116ecc602b4a9ae4df34fa055305187a835e36e0/claude_agent_sdk-0.2.82-py3-none-manylinux_2_17_aarch64.whl", hash = "sha256:3b0a0e3f0927737f1fc91ee4549185172243a4e8f135d4c1e4f1f1eba91373e1", size = 71212904, upload-time = "2026-05-15T03:45:51.121Z" }, - { url = "https://files.pythonhosted.org/packages/b1/d9/e2920b4b6c75cf79ec87ebfb4cc4447c78a4f26317cb3fed77e79fcc804e/claude_agent_sdk-0.2.82-py3-none-manylinux_2_17_x86_64.whl", hash = "sha256:b05873f9df01c5894930b87f6ca9315f0d97f1563bc2e4dc0fafe0d4a1e31997", size = 71381948, upload-time = "2026-05-15T03:45:56.153Z" }, - { url = "https://files.pythonhosted.org/packages/89/80/c3ec5a89c735a96d35fe12b6262517169b396ff366149a3b9f4387f797c1/claude_agent_sdk-0.2.82-py3-none-win_amd64.whl", hash = "sha256:71e85e4f50d04cd95e687898092f03648e74e1cd2537583de93370d2da1c0586", size = 71990462, upload-time = "2026-05-15T03:46:01.646Z" }, + { url = "https://files.pythonhosted.org/packages/aa/93/29d4fdaa13e69034faf8d3503df915b07c820e2c08e3d6a7515149cde5bb/claude_agent_sdk-0.2.110-py3-none-macosx_11_0_arm64.whl", hash = "sha256:fed0e0f4804d9f9cff80ab7d1b44142ebd1046cdd29ca74caef4c92c35fff8d8", size = 64924533, upload-time = "2026-06-24T22:11:55.612Z" }, + { url = "https://files.pythonhosted.org/packages/aa/03/b40bb673cd93cdc3928262c1be75fde34a7bed4bf2c2c20e04218e2005ea/claude_agent_sdk-0.2.110-py3-none-macosx_11_0_x86_64.whl", hash = "sha256:62b23869d46cef6f6ff1d00ceaa5e846e2f1d297478421c835efb8fe99369d4f", size = 69704449, upload-time = "2026-06-24T22:11:59.149Z" }, + { url = "https://files.pythonhosted.org/packages/f9/18/ab67cb5ce641333385bed55ed8e9665c00f7d30d1f6ab12f8463ddb7695f/claude_agent_sdk-0.2.110-py3-none-manylinux_2_17_aarch64.whl", hash = "sha256:324e49553c6303d6b267217dc2912652b97af2bc96503efd12095ae915b46b83", size = 74879555, upload-time = "2026-06-24T22:12:03.25Z" }, + { url = "https://files.pythonhosted.org/packages/91/88/3627d7d14310cfec66977551263e219365244a906fc7ca1209fb0c3a6cec/claude_agent_sdk-0.2.110-py3-none-manylinux_2_17_x86_64.whl", hash = "sha256:56371dd7a2c66c0bd497dc0b3cab4193a228b196f600676393d69c0ecee37cfb", size = 75924237, upload-time = "2026-06-24T22:12:07.183Z" }, + { url = "https://files.pythonhosted.org/packages/49/79/c9066c5c387d42c19a4b675ec1ff5219f8920cfda8ff8b527119fd69b774/claude_agent_sdk-0.2.110-py3-none-win_amd64.whl", hash = "sha256:4235d4de6d685a189c12612095ab192b759280ede1f3aed0c3e784d52c3555f9", size = 75448209, upload-time = "2026-06-24T22:12:11.283Z" }, ] [[package]] diff --git a/cdk/src/constructs/agent-session-role.ts b/cdk/src/constructs/agent-session-role.ts index 95df0840..1602b734 100644 --- a/cdk/src/constructs/agent-session-role.ts +++ b/cdk/src/constructs/agent-session-role.ts @@ -17,6 +17,7 @@ * SOFTWARE. */ +import * as bedrock from '@aws-cdk/aws-bedrock-alpha'; import { Duration } from 'aws-cdk-lib'; import * as dynamodb from 'aws-cdk-lib/aws-dynamodb'; import * as iam from 'aws-cdk-lib/aws-iam'; @@ -64,6 +65,23 @@ export interface AgentSessionRoleProps { * prefix. */ readonly attachmentsBucket: s3.IBucket; + + /** + * Bedrock models / cross-region inference profiles the agent may invoke + * (#215, cost attribution). When provided, each is `grantInvoke`-ed to the + * SessionRole — the **same** grant the compute role receives, so the + * permission set (including the all-regions foundation-model ARNs a + * cross-region profile fans out to) stays in lockstep and a cross-region + * route can never AccessDenied. Model inference run by the Claude Code + * subprocess is then attributed per `{user_id, repo}` in CUR 2.0 / Cost + * Explorer via the session tags this role already carries. + * + * The compute role keeps its own Bedrock grant: attribution is a billing + * control that fails open (the credential helper falls back to compute-role + * creds if the assume-role fails), so model invocation never depends on this. + * Omit (e.g. isolated construct tests) to skip the Bedrock grant. + */ + readonly invokableModels?: bedrock.IBedrockInvokable[]; } /** @@ -85,9 +103,12 @@ export interface AgentSessionRoleProps { * code. Backend-agnostic: the same role serves agents booted under either the * AgentCore Runtime execution role or the ECS Fargate task role. * - * Bedrock model invocation and CloudWatch Logs intentionally remain on the - * compute role (shared, non-tenant access; and keeping `InvokeModel` off the - * 1-hour-capped chained session avoids breaking long tasks). + * CloudWatch Logs remains on the compute role (shared, non-tenant access). The + * compute role *also* keeps `InvokeModel`; this role adds a parallel, session- + * tagged Bedrock grant (#215) used by the Claude Code subprocess for cost + * attribution. Long-task safety on the 1-hour-capped chained session is handled + * by Claude Code's `awsCredentialExport` refresh, and the helper falls back to + * the compute role if assume fails — so model invocation never breaks. */ export class AgentSessionRole extends Construct { /** Actions sufficient for the agent's DynamoDB access. Excludes Scan. */ @@ -191,6 +212,20 @@ export class AgentSessionRole extends Construct { }), ); + // --- Bedrock model invocation: tagged for cost attribution (#215) --- + // Reuse grantInvoke so this role's Bedrock permissions exactly mirror the + // compute role's (cross-region profiles fan out to the foundation model in + // every routed region — replicating that by hand would risk an AccessDenied + // on a cross-region route). Claude Code assumes this role (via its + // awsCredentialExport helper) so InvokeModel rides the session's + // {user_id, repo, task_id} tags, surfacing per-user/repo Bedrock spend in + // CUR 2.0 / Cost Explorer. No PrincipalTag condition: the tags are for + // billing attribution, not access scoping, so a condition would add no + // isolation and only risk breakage. + for (const invokable of props.invokableModels ?? []) { + invokable.grantInvoke(this.role); + } + // The object-level prefix conditions above already constrain access to the // session's own tenant prefix; the remaining wildcard is the per-object // suffix (task_id/attachment_id/filename), which is the intended scope. @@ -205,7 +240,10 @@ export class AgentSessionRole extends Construct { + 'attachments/${aws:PrincipalTag/user_id}/*, ' + 'artifacts/${aws:PrincipalTag/task_id}/*) and the DynamoDB item ' + 'set gated by a dynamodb:LeadingKeys = ${aws:PrincipalTag/task_id} ' - + 'condition — narrower than the compute role this replaces.', + + 'condition — narrower than the compute role this replaces. Bedrock ' + + 'InvokeModel resources are the explicit model + inference-profile ' + + 'ARNs from grantInvoke (cross-region profiles fan out to per-region ' + + 'foundation-model ARNs), matching the compute role grant (#215).', }, ], true, diff --git a/cdk/src/stacks/agent.ts b/cdk/src/stacks/agent.ts index 604f794d..85a4d5e1 100644 --- a/cdk/src/stacks/agent.ts +++ b/cdk/src/stacks/agent.ts @@ -458,6 +458,17 @@ export class AgentStack extends Stack { ], traceArtifactsBucket: traceArtifactsBucket.bucket, attachmentsBucket: attachmentsBucket.bucket, + // #215: session-tagged Bedrock grant for cost attribution — the same + // invokables grantInvoke-ed to the compute role above, so the grants + // stay in lockstep. + invokableModels: [ + model, + inferenceProfile, + model3, + inferenceProfile3, + model2, + inferenceProfile2, + ], }); sessionRoleArnHolder = agentSessionRole.role.roleArn; diff --git a/cdk/test/constructs/agent-session-role.test.ts b/cdk/test/constructs/agent-session-role.test.ts index 1f990a63..676b34f6 100644 --- a/cdk/test/constructs/agent-session-role.test.ts +++ b/cdk/test/constructs/agent-session-role.test.ts @@ -17,6 +17,7 @@ * SOFTWARE. */ +import * as bedrock from '@aws-cdk/aws-bedrock-alpha'; import { App, Stack } from 'aws-cdk-lib'; import { Template, Match } from 'aws-cdk-lib/assertions'; import * as dynamodb from 'aws-cdk-lib/aws-dynamodb'; @@ -193,6 +194,52 @@ describe('AgentSessionRole construct', () => { expect(JSON.stringify(trustDoc)).toContain('ComputeRole'); }); + test('grants bedrock:InvokeModel on the supplied invokables when invokableModels is set (#215)', () => { + const app = new App(); + const stack = new Stack(app, 'BedrockStack'); + const computeRole = new iam.Role(stack, 'ComputeRole', { + assumedBy: new iam.ServicePrincipal('bedrock-agentcore.amazonaws.com'), + }); + const table = new dynamodb.Table(stack, 'T', { + partitionKey: { name: 'task_id', type: dynamodb.AttributeType.STRING }, + }); + const model = new bedrock.BedrockFoundationModel('anthropic.claude-sonnet-4-6', { + supportsCrossRegion: true, + }); + new AgentSessionRole(stack, 'SR', { + assumingRoles: [computeRole], + taskScopedTables: [table], + traceArtifactsBucket: new s3.Bucket(stack, 'TB'), + attachmentsBucket: new s3.Bucket(stack, 'AB'), + invokableModels: [model], + }); + + const stackTemplate = Template.fromStack(stack); + const sessionPolicy = Object.entries( + stackTemplate.findResources('AWS::IAM::Policy'), + ).find(([id]) => id.includes('SR'))![1]; + const statements = sessionPolicy.Properties.PolicyDocument.Statement; + // grantInvoke emits the wildcard-suffixed action bedrock:InvokeModel*. + const bedrockStatement = statements.find((s: { Action: string | string[] }) => { + const actions = Array.isArray(s.Action) ? s.Action : [s.Action]; + return actions.some((a: string) => a.startsWith('bedrock:InvokeModel')); + }); + expect(bedrockStatement).toBeDefined(); + // The model ARN must be present, scoped (no Resource:'*'). + expect(JSON.stringify(bedrockStatement.Resource)).toContain('anthropic.claude-sonnet-4-6'); + expect(bedrockStatement.Resource).not.toBe('*'); + }); + + test('omitting invokableModels grants no bedrock action (isolated tests)', () => { + const { template: t } = createStack(); + const policies = t.findResources('AWS::IAM::Policy'); + const sessionPolicy = Object.entries(policies).find(([id]) => + id.includes('AgentSessionRole'), + )![1]; + const serialized = JSON.stringify(sessionPolicy.Properties.PolicyDocument.Statement); + expect(serialized).not.toContain('bedrock:InvokeModel'); + }); + test('admitComputeRole wires both trust and grant for an additional compute role', () => { const app = new App(); const stack = new Stack(app, 'MultiPrincipalStack'); diff --git a/docs/design/BEDROCK_COST_ATTRIBUTION.md b/docs/design/BEDROCK_COST_ATTRIBUTION.md index 929c6d60..66480092 100644 --- a/docs/design/BEDROCK_COST_ATTRIBUTION.md +++ b/docs/design/BEDROCK_COST_ATTRIBUTION.md @@ -8,12 +8,14 @@ Bedrock is invoked by the **Claude Code CLI subprocess** (`CLAUDE_CODE_USE_BEDRO | Track | Mechanism | Surfaces in | AC | |---|---|---|---| -| 1. IAM session-tag attribution | Claude Code `awsCredentialExport` → helper does `sts:AssumeRole --tags {user_id,repo,task_id}` against a new **BedrockInvokeRole** | CUR 2.0 / Cost Explorer (`iamPrincipal/` prefix), aggregated per usage-type/day | #1, #2 | -| 2. Per-request metadata | `ANTHROPIC_CUSTOM_HEADERS: X-Amzn-Bedrock-Request-Metadata: {...}` env var | Model-invocation logs (`requestMetadata` field), per call | #3 | +| 1. IAM session-tag attribution | Claude Code `awsCredentialExport` → `bedrock_creds_helper.py` does `sts:AssumeRole --tags {user_id,repo,task_id}` against the existing **`AgentSessionRole`** (now also granted `bedrock:InvokeModel*`) | CUR 2.0 / Cost Explorer (`iamPrincipal/` prefix), aggregated per usage-type/day | #1, #2 | +| 2. Per-request metadata | `ANTHROPIC_CUSTOM_HEADERS: X-Amzn-Bedrock-Request-Metadata: {...}` on the subprocess env | Model-invocation logs (`requestMetadata` field), per call | #3 | | 3. Operator docs | `COST_ATTRIBUTION.md` + cross-links | — | #5 | The two tracks are **complementary** (per AWS docs): session tags give aggregated chargeback in billing; request metadata gives per-call forensics in logs. Session tags are *not* written to invocation logs, and request metadata is *not* a cost-allocation tag — you need both. +> **`cost_usd` is a client-side estimate, not billing.** The in-app `cost_usd` is the SDK's `total_cost_usd` (`runner.py`), computed from a build-time price table; it drifts from the real bill on pricing changes, unrecognized models, cache rates, and AWS discounts. It is for per-task guardrails only — the authoritative source is AWS Cost Explorer / CUR 2.0 (Track 1). This is the same caveat the [Claude Agent SDK cost-tracking docs](https://code.claude.com/docs/en/agent-sdk/cost-tracking) raise, adapted for Bedrock (authoritative source is the AWS bill, not the Claude Console). Both this design and the operator guide surface it. + ## Why the issue's original approach doesn't apply The issue proposed extending `aws_session.py` / the `DeferredRefreshableCredentials` pattern to route `InvokeModel` through tagged creds. That pattern governs the agent's **boto3** clients for tenant data. But: @@ -26,60 +28,63 @@ agent/src/runner.py::_setup_agent_env credential chain (today: the ambient compute role) ``` -The agent never makes the `InvokeModel` call, so it cannot attach creds or headers to it directly. The control point is **how Claude Code resolves credentials and headers**, configured via Claude Code settings/env before `client.connect()`. +The agent never makes the `InvokeModel` call, so it cannot attach creds or headers to it directly. The control point is **how Claude Code resolves credentials and headers**, configured before `client.connect()`. -Verified Claude Code behavior (code.claude.com/docs/en/amazon-bedrock, /env-vars): +Verified Claude Code behavior (code.claude.com/docs/en/amazon-bedrock, /env-vars, /settings): -- **Credentials:** default AWS SDK chain. Mutating the parent process's `AWS_*` env vars mid-session is **not** re-read. For refresh, Claude Code supports `awsCredentialExport` — a helper command run at session start and re-run ~5 min before the `Expiration` it returns. This is exactly what an 8 h task needs to survive the **1 h role-chaining cap**. -- **Request metadata:** Claude Code uses the **Invoke API and does not support Converse**, so the Converse `requestMetadata` field is unreachable. The only lever is `ANTHROPIC_CUSTOM_HEADERS` (static per process). Because ABCA runs **one task per container per Claude Code session**, "static per process" == "per task" — sufficient for `{task_id, user_id, repo}` attribution. No proxy/gateway needed. +- **Credentials:** default AWS SDK chain. Mutating the parent process's `AWS_*` env vars mid-session is **not** re-read. For refresh, Claude Code supports `awsCredentialExport` — a settings-only key (no env/flag equivalent) naming a helper command run at session start and re-run ~5 min before the `Expiration` the helper returns (≥ CLI 2.1.176). This beats the **1 h role-chaining cap** on an 8 h task. +- **Request metadata:** Claude Code uses the **Invoke API and does not support Converse**, so the Converse `requestMetadata` field is unreachable. The only lever is `ANTHROPIC_CUSTOM_HEADERS` (static per process), which **is read from the process environment** and process-env wins over any settings `env` block. Because ABCA runs **one task per container per Claude Code session**, "static per process" == "per task" — sufficient. No proxy/gateway needed. +- **Settings precedence (security-critical):** under `setting_sources=["project"]` Claude Code loads **only the cloned repo's `.claude/settings.json`** (user settings are dropped) — but the **managed-settings layer is loaded in all cases and outranks everything**, so the untrusted repo cannot override it. ## Track 1 — IAM session-tag attribution -### New construct: `BedrockInvokeRole` +### Reuse `AgentSessionRole` (no new role) + +`AgentSessionRole` is *already* assumed by the compute roles with `{user_id, repo, task_id}` STS session tags, and `AGENT_SESSION_ROLE_ARN` is already injected into the container. A second "BedrockInvokeRole" would duplicate that entire trust/grant surface for an identical principal. Instead we add a single grant to it: -A dedicated role the agent assumes *only* to mint tagged credentials for Claude Code's Bedrock calls. Kept separate from `AgentSessionRole` (tenant data) so the trust/grant surfaces stay independent and auditable. +- New optional prop `invokableModels: IBedrockInvokable[]`. For each, the construct calls `invokable.grantInvoke(this.role)` — **the same grant the compute role receives**. Reusing `grantInvoke` (rather than hand-building ARNs) is load-bearing: a cross-region inference profile fans out to the foundation-model ARN in *every routed region*; replicating that by hand would risk an `AccessDenied` on a cross-region route. No `aws:PrincipalTag` condition — the tags are for billing attribution, not access scoping. +- `agent.ts` passes the six existing invokables (Sonnet 4.6 / Opus 4 / Haiku 4.5 models + their cross-region profiles). The ECS path reuses the same `AgentSessionRole` instance, so it is covered automatically. -- **Trust:** same compute roles as `AgentSessionRole` (AgentCore ExecutionRole, ECS task role), with `sts:AssumeRole` + `sts:TagSession`. -- **Grants:** `bedrock:InvokeModel` + `bedrock:InvokeModelWithResponseStream` on the **exact** foundation-model + cross-region inference-profile ARNs already enumerated in `agent.ts` / `ecs-agent-cluster.ts` (Sonnet 4.6, Opus 4, Haiku 4.5). No wildcards — reuses the existing ARN allowlist. -- **`maxSessionDuration`: 1 h** (documents the role-chaining cap; refresh handles longevity). -- Exposes `admitComputeRole()` mirroring `AgentSessionRole`, so ECS wiring is symmetric. +### The compute role KEEPS its Bedrock grant -Once this exists, **the compute role drops `bedrock:InvokeModel`** — model invocation moves entirely onto the tagged BedrockInvokeRole. (The #211 comment "Bedrock intentionally stays on the compute role to avoid 1 h expiry" is resolved by `awsCredentialExport`'s refresh.) +The #211 comment "Bedrock intentionally stays on the compute role to avoid 1 h expiry" is *resolved* by `awsCredentialExport`'s pre-expiry refresh — but we still leave `InvokeModel` on the compute role, because Track 1 **fails open** (below) and the compute-role grant is exactly the fallback path. The SessionRole grant is parallel, not a replacement. ### Credential helper + Claude Code wiring -A small helper script (shipped in the agent image) that `awsCredentialExport` invokes: +`agent/src/bedrock_creds_helper.py` (invoked by `awsCredentialExport`): -``` -assume-role --role-arn $BEDROCK_INVOKE_ROLE_ARN \ - --tags user_id=$USER_ID repo=$REPO task_id=$TASK_ID -→ emits {"Credentials":{AccessKeyId,SecretAccessKey,SessionToken,Expiration}} -``` +1. Reads a 0600 JSON file (`/home/agent/.bedrock-attribution.json`) the agent writes at startup, carrying the SessionRole ARN + STS tags. Read from a file, not the environment, so tenant identifiers don't leak into the untrusted repo subprocesses the agent spawns (matching `aws_session.py` discipline). +2. `sts:AssumeRole` with those tags and emits `{"Credentials":{...,"Expiration":}}`. The real `Expiration` drives Claude Code's pre-cap refresh. +3. Tag building reuses `aws_session.build_session_tags` (one definition of the `{user_id,repo,task_id}` tags + 256-char clamp). -- Reuses the **same STS `assume_role` + tag-truncation logic** already in `aws_session.py` (factor the tag-building + 256-char clamp into a shared helper; don't duplicate). -- `Expiration` is the real STS expiry, so Claude Code re-runs the helper before the 1 h cap. -- `_setup_agent_env` writes Claude Code's `awsCredentialExport` setting (and `BEDROCK_INVOKE_ROLE_ARN` / tag values) **into a trusted, agent-controlled settings location** — *not* the cloned repo's `.claude/settings.json`. +`runner._setup_bedrock_cost_attribution` writes the attribution file when `AGENT_SESSION_ROLE_ARN` is set, and always sets the metadata header (Track 2). -> **Security note (must not be skipped):** `awsCredentialExport` runs an arbitrary shell command. `setting_sources=["project"]` currently reads the **untrusted cloned target repo's** `.claude/settings.json`. We must inject `awsCredentialExport` via a location the target repo **cannot override** (user-level settings or an explicit `--settings` file the agent owns), and confirm Claude Code's precedence makes project settings unable to redefine it. A repo that could set `awsCredentialExport` would get arbitrary code execution with the compute role. This is the single highest-risk item in the design and gets a dedicated test. +### Where `awsCredentialExport` lives (RCE boundary) -### Fail-open vs fail-closed +`awsCredentialExport` runs an arbitrary command. It is baked into the **managed-settings layer** at `/etc/claude-code/managed-settings.json` (root-owned, copied in the Dockerfile before `USER agent`). This is the only repo-proof location: it loads regardless of `setting_sources=["project"]` and outranks the cloned repo's project `.claude/settings.json`, so a malicious repo cannot define or override it. Putting it anywhere the target repo can influence would be RCE with the compute role. -Unlike #211 tenant isolation (fail **closed** — a scoping failure means cross-tenant exposure), Bedrock attribution is a **billing/observability** control. If the helper can't assume the role, the correct failure mode is to **fall back to the compute role and emit a warning**, not to abort the task — losing chargeback granularity is not a security incident. When `BEDROCK_INVOKE_ROLE_ARN` is unset (local/dev), behavior is identical to today. +### Fail-open (not fail-closed) + +Unlike #211 tenant isolation (fail **closed** — a scoping failure means cross-tenant exposure), Bedrock attribution is a **billing/observability** control. If the attribution file is absent or the assume fails, the helper emits the **ambient compute-role credentials** so Bedrock keeps working untagged — losing chargeback granularity is not a security incident. When `AGENT_SESSION_ROLE_ARN` is unset (local/dev), the helper fails open and behavior matches today. ## Track 2 — per-request metadata -In `_setup_agent_env`, set: +In `_setup_bedrock_cost_attribution`, set on the process env: ```python os.environ["ANTHROPIC_CUSTOM_HEADERS"] = ( "X-Amzn-Bedrock-Request-Metadata: " - + json.dumps({"task_id": ..., "user_id": ..., "repo": ...}) # 256-char clamp, ≤16 keys + + json.dumps({"user_id": ..., "repo": ..., "task_id": ...}) # 256-char clamp, ≤16 keys ) ``` -Gated on invocation logging being enabled (it already is — `agent.ts` configures the CloudWatch destination). Surfaces under the `requestMetadata` field in `/aws/bedrock/model-invocation-logs/`. +Set via the process env (not project settings) so the untrusted repo can't alter it. Surfaces under `requestMetadata` in `/aws/bedrock/model-invocation-logs/` (logging already enabled in `agent.ts`). + +> **Open risk to validate against a live endpoint:** Bedrock rejects `X-Amzn-Bedrock-Request-Metadata` with `InvalidSignatureException` if the header is omitted from the SigV4 `SignedHeaders`. Whether Claude Code signs custom headers is unverified. AC#3 explicitly permits "or documented blocker if Claude Code cannot pass metadata." If it fails, per-call attribution falls back to invocation-log `identity.arn` + `RoleSessionName` (`abca-bedrock-`) that Track 1's tagged session already provides. + +## Version alignment -> **Open risk to validate before merge:** Bedrock rejects `X-Amzn-Bedrock-Request-Metadata` with `InvalidSignatureException` if the header is omitted from the SigV4 `SignedHeaders`. AWS SDKs that expose metadata as a parameter sign it automatically; a custom header injected via `ANTHROPIC_CUSTOM_HEADERS` may **not** be in Claude Code's signed-headers list. **This must be tested against a live Bedrock endpoint.** If it fails, this track is a documented blocker (AC#3 explicitly allows "or documented blocker if Claude Code cannot pass metadata"), and per-call attribution falls back to correlating invocation-log `identity.arn` + `RoleSessionName` (`abca-`) — which Track 1's tagged session already provides. +The agent runs Claude Code two ways that must agree on the control protocol: the `claude-agent-sdk` Python wheel **bundles** a CLI, and the Dockerfile also installs the CLI via npm. Both are pinned in lockstep — `claude-agent-sdk==0.2.110` (bundles CLI 2.1.191) and npm `@anthropic-ai/claude-code@2.1.191`. 2.1.191 also satisfies the ≥2.1.176 `awsCredentialExport`-with-`Expiration` requirement. ## Track 3 — operator documentation @@ -99,16 +104,15 @@ Bedrock Projects/Workspaces (`bedrock-mantle`, not the Claude Code path); replac | AC | Met by | |---|---| -| #1 Bedrock uses session-tagged creds (AgentCore + ECS); dev unchanged when unset | Track 1: BedrockInvokeRole + `awsCredentialExport`; fall back to compute role when `BEDROCK_INVOKE_ROLE_ARN` unset | +| #1 Bedrock uses session-tagged creds (AgentCore + ECS); dev unchanged when unset | Track 1: `AgentSessionRole` Bedrock grant + `awsCredentialExport`; helper fails open to compute role when `AGENT_SESSION_ROLE_ARN` unset | | #2 Session tags documented as billable; operator Billing steps | Track 3 | | #3 Per-request metadata `{task_id,user_id,repo}` when logging enabled (or documented blocker) | Track 2 + SigV4 validation gate | -| #4 Tests: CDK Bedrock grant on role; cred routing; no #211 regression | New `bedrock-invoke-role.test.ts`; helper unit test; #211 tests untouched (orthogonal path) | +| #4 Tests: CDK Bedrock grant on role; cred routing; no #211 regression | `agent-session-role.test.ts` (Bedrock grant present/absent); `test_bedrock_creds_helper.py` (assume + fail-open); `test_runner.py` (file + header wiring); #211 tests untouched | | #5 `COST_ATTRIBUTION.md` + accurate shipped/planned | Track 3 | | #6 Starlight mirrors synced | `mise //docs:sync` | ## Test plan -- **CDK:** assert `BedrockInvokeRole` grants `InvokeModel`/`InvokeModelWithResponseStream` on the model+profile ARN allowlist (no wildcard); assert trust admits both compute roles with `TagSession`; assert compute role **no longer** has `bedrock:InvokeModel`. -- **Security test:** assert the agent injects `awsCredentialExport` in a location the cloned repo cannot override (the highest-risk item above). -- **Agent:** unit-test the credential helper (tag building reuses `aws_session` logic; 256-char clamp; JSON shape with `Expiration`); unit-test `ANTHROPIC_CUSTOM_HEADERS` assembly. +- **CDK:** assert `AgentSessionRole` grants `bedrock:InvokeModel*` on the model/profile ARNs (no `Resource:'*'`) when `invokableModels` is set, and grants none when omitted. (#211 trust/grant/tenant-scope tests unchanged.) +- **Agent:** `bedrock_creds_helper` — assume-role carries the tenant tags + tagged session name; **fails open** to ambient creds when the attribution file is missing, when assume raises, and emits `{}` when no creds resolve at all; 0600 file mode. `runner._setup_bedrock_cost_attribution` — writes the file when the role ARN is set, skips it when unset, always sets the metadata header. - **Live validation (pre-merge, manual):** confirm `X-Amzn-Bedrock-Request-Metadata` is honored (no `InvalidSignatureException`) and lands in invocation logs; confirm `iamPrincipal/user_id` appears in Cost Explorer after tag activation. diff --git a/docs/design/COST_MODEL.md b/docs/design/COST_MODEL.md index cea17d4e..997f7237 100644 --- a/docs/design/COST_MODEL.md +++ b/docs/design/COST_MODEL.md @@ -88,11 +88,13 @@ These estimates assume Claude Sonnet with prompt caching enabled and average tas For multi-user deployments, cost should be attributable to individual users and repositories: -- **Per-task:** Token usage and compute duration are captured in task metadata (`agent.cost_usd`, `agent.turns` - see [OBSERVABILITY.md](./OBSERVABILITY.md)). +- **Per-task:** Token usage and compute duration are captured in task metadata (`agent.cost_usd`, `agent.turns` - see [OBSERVABILITY.md](./OBSERVABILITY.md)). Note: `agent.cost_usd` is the Claude Agent SDK's **client-side estimate** (a build-time price table), not authoritative billing — use it for guardrails, and AWS Cost Explorer / CUR 2.0 for the real bill (see [COST_ATTRIBUTION.md](../guides/COST_ATTRIBUTION.md)). - **Per-user:** Aggregate task costs by `user_id`. - **Per-repo:** Aggregate task costs by `repo`. - **Dashboard:** Cost attribution dashboards should be built from the same task-level metrics. +For **AWS-native** chargeback of Bedrock spend (Cost Explorer / CUR 2.0 by `user_id` / `repo`, plus per-call invocation-log forensics) — beyond the in-app `cost_usd` meter above — see the operator guide [COST_ATTRIBUTION.md](../guides/COST_ATTRIBUTION.md) and the platform design [BEDROCK_COST_ATTRIBUTION.md](./BEDROCK_COST_ATTRIBUTION.md). + ## Cost guardrails (current) | Guardrail | Mechanism | Default | diff --git a/docs/guides/COST_ATTRIBUTION.md b/docs/guides/COST_ATTRIBUTION.md new file mode 100644 index 00000000..e27dd061 --- /dev/null +++ b/docs/guides/COST_ATTRIBUTION.md @@ -0,0 +1,54 @@ +# Cost attribution (operator guide) + +How to attribute **Amazon Bedrock model-inference spend** to individual users and repositories in a multi-user ABCA deployment. This is the operator-facing companion to the platform design in [BEDROCK_COST_ATTRIBUTION.md](../design/BEDROCK_COST_ATTRIBUTION.md) and the cost model in [COST_MODEL.md](../design/COST_MODEL.md#cost-attribution). + +> [!WARNING] +> **The in-app `cost_usd` is a client-side estimate, not authoritative billing data.** It is the Claude Agent SDK's `total_cost_usd` (`agent/src/runner.py`), computed locally from a price table bundled into the SDK at build time. It can drift from your actual AWS bill when Bedrock pricing changes, the SDK version does not recognize a model, prompt-cache read/write rates apply, or AWS discounts/commitments/free-tier apply that the client cannot model. Use it for per-task budget guardrails and approximate insight — **do not bill end users or trigger financial decisions from it.** For authoritative cost, use **AWS Cost Explorer / CUR 2.0** (the session-tag chargeback meter below), which reflects your actual invoice. (ABCA runs on Bedrock, so the authoritative source is your AWS bill — not the Claude Console.) + +## Three meters, three questions + +ABCA gives you three independent views of cost. They answer different questions; use them together. + +| Meter | Granularity | Source of truth for | Where | +|---|---|---|---| +| **In-app `cost_usd`** | Per task | Per-task budget guardrails (`max_budget_usd`) | Task metadata / control panel | +| **CUR session-tag chargeback** | Per user / per repo, aggregated per usage-type per day | AWS-native FinOps chargeback | Cost Explorer / CUR 2.0 | +| **Invocation-log metadata** | Per Bedrock call | Per-call forensics, reconciliation | `/aws/bedrock/model-invocation-logs/` | + +Why all three: the in-app meter is an estimate the platform computes; it does not reflect AWS discounts/commitments. IAM session tags flow to your **bill** but only as aggregated billing data (they are *not* written to invocation logs). Request metadata gives **per-call** detail in logs but is *not* a cost-allocation tag and never appears in Cost Explorer. Per [AWS docs](https://docs.aws.amazon.com/bedrock/latest/userguide/cost-mgmt-iam-principal-tracking.html), session tags and request metadata are complementary mechanisms. + +## What the platform does automatically + +Once deployed, each agent task makes its Bedrock calls under **session-tagged, refreshable credentials** carrying `{user_id, repo, task_id}`, and stamps the same values as **request metadata** on every call. You do **not** need to change any code. What remains is **operator setup in the AWS Billing console** — AWS does not surface tag-based cost data until you activate it. + +## FinOps checklist + +These steps are a one-time operator responsibility (CDK does not automate org-level billing — see [Out of scope](../design/BEDROCK_COST_ATTRIBUTION.md#out-of-scope-unchanged-from-issue)). + +1. **Activate IAM-principal cost-allocation tags.** Billing console → **Cost allocation tags** → filter by type **IAM principal** → activate `user_id` and `repo` (the low-cardinality dimensions; `task_id` is high-cardinality — keep it for logs, not Cost Explorer). + - Tags appear only **after** the first Bedrock call carrying them, and can take **up to 24 h** to show. + - Activation is **not retroactive** — only spend incurred after activation is tagged. +2. **Create a CUR 2.0 export with caller identity.** Billing console → **Data Exports** → create a CUR 2.0 export and select the option to include the **caller-identity ARN**. + - If you already have a CUR 2.0 export, you must create a **new** one — existing exports do not backfill identity data. +3. **Set budgets / alerts** per `user_id` or `repo` tag as needed (AWS Budgets), independent of the in-app `max_budget_usd` per-task guardrail. + +## Querying per-call detail (invocation logs) + +Request metadata lands under the top-level `requestMetadata` field of each log record. Example CloudWatch Logs Insights query (tokens per user + model): + +``` +fields requestMetadata.user_id as user, modelId, + input.inputTokenCount as inTokens, + output.outputTokenCount as outTokens +| stats sum(inTokens) as totalInput, sum(outTokens) as totalOutput, count() as calls + by user, modelId +| sort totalInput desc +``` + +To turn tokens into cost, multiply by the current [Bedrock per-token rates](https://aws.amazon.com/bedrock/pricing/), or join logs to CUR on `requestId` for invoice-accurate reconciliation at the model + usage-type grain. + +## Caveats + +- **Request-metadata header is best-effort.** It depends on Claude Code signing the `X-Amzn-Bedrock-Request-Metadata` header into the SigV4 request; if a Claude Code release does not, the header is rejected and per-call metadata is absent. Per-user/repo chargeback (the session-tag track) is unaffected — it does not rely on the header. See the [validation note](../design/BEDROCK_COST_ATTRIBUTION.md#track-2--per-request-metadata). +- **Attribution fails open.** If the per-task credential helper cannot assume the SessionRole, Bedrock still works under the shared compute role — spend for that task is simply untagged, not blocked. +- **No PII in tags/metadata.** `user_id` and `repo` are recorded in your bill and logs; do not map them to anything sensitive. diff --git a/docs/guides/DEPLOYMENT_GUIDE.md b/docs/guides/DEPLOYMENT_GUIDE.md index 435e5583..a32b0922 100644 --- a/docs/guides/DEPLOYMENT_GUIDE.md +++ b/docs/guides/DEPLOYMENT_GUIDE.md @@ -229,4 +229,5 @@ For users without AWS CLI access. - [User guide](./USER_GUIDE.md) -- API reference, CLI usage, task management. - [DEPLOYMENT_ROLES.md](../design/DEPLOYMENT_ROLES.md) -- Least-privilege IAM policies for CloudFormation execution. - [COST_MODEL.md](../design/COST_MODEL.md) -- Per-task costs, cost guardrails, cost at scale. +- [COST_ATTRIBUTION.md](./COST_ATTRIBUTION.md) -- Operator FinOps setup for per-user/per-repo Bedrock chargeback (Cost Explorer / CUR 2.0, invocation-log forensics). - [COMPUTE.md](../design/COMPUTE.md) -- Compute backend architecture and trade-offs. diff --git a/docs/scripts/sync-starlight.mjs b/docs/scripts/sync-starlight.mjs index 4326e407..56b082c2 100644 --- a/docs/scripts/sync-starlight.mjs +++ b/docs/scripts/sync-starlight.mjs @@ -253,6 +253,12 @@ mirrorMarkdownFile( path.join('src', 'content', 'docs', 'getting-started', 'Deployment-guide.md'), ); +// --- Cost Attribution Guide: mirror to getting-started/ (operator FinOps setup) --- +mirrorMarkdownFile( + path.join(docsRoot, 'guides', 'COST_ATTRIBUTION.md'), + path.join('src', 'content', 'docs', 'getting-started', 'Cost-attribution.md'), +); + // --- Prompt Guide: mirror to customizing/ --- mirrorMarkdownFile( path.join(docsRoot, 'guides', 'PROMPT_GUIDE.md'), diff --git a/docs/src/content/docs/architecture/Bedrock-cost-attribution.md b/docs/src/content/docs/architecture/Bedrock-cost-attribution.md index cd04a593..51c70972 100644 --- a/docs/src/content/docs/architecture/Bedrock-cost-attribution.md +++ b/docs/src/content/docs/architecture/Bedrock-cost-attribution.md @@ -12,12 +12,14 @@ Bedrock is invoked by the **Claude Code CLI subprocess** (`CLAUDE_CODE_USE_BEDRO | Track | Mechanism | Surfaces in | AC | |---|---|---|---| -| 1. IAM session-tag attribution | Claude Code `awsCredentialExport` → helper does `sts:AssumeRole --tags {user_id,repo,task_id}` against a new **BedrockInvokeRole** | CUR 2.0 / Cost Explorer (`iamPrincipal/` prefix), aggregated per usage-type/day | #1, #2 | -| 2. Per-request metadata | `ANTHROPIC_CUSTOM_HEADERS: X-Amzn-Bedrock-Request-Metadata: {...}` env var | Model-invocation logs (`requestMetadata` field), per call | #3 | +| 1. IAM session-tag attribution | Claude Code `awsCredentialExport` → `bedrock_creds_helper.py` does `sts:AssumeRole --tags {user_id,repo,task_id}` against the existing **`AgentSessionRole`** (now also granted `bedrock:InvokeModel*`) | CUR 2.0 / Cost Explorer (`iamPrincipal/` prefix), aggregated per usage-type/day | #1, #2 | +| 2. Per-request metadata | `ANTHROPIC_CUSTOM_HEADERS: X-Amzn-Bedrock-Request-Metadata: {...}` on the subprocess env | Model-invocation logs (`requestMetadata` field), per call | #3 | | 3. Operator docs | `COST_ATTRIBUTION.md` + cross-links | — | #5 | The two tracks are **complementary** (per AWS docs): session tags give aggregated chargeback in billing; request metadata gives per-call forensics in logs. Session tags are *not* written to invocation logs, and request metadata is *not* a cost-allocation tag — you need both. +> **`cost_usd` is a client-side estimate, not billing.** The in-app `cost_usd` is the SDK's `total_cost_usd` (`runner.py`), computed from a build-time price table; it drifts from the real bill on pricing changes, unrecognized models, cache rates, and AWS discounts. It is for per-task guardrails only — the authoritative source is AWS Cost Explorer / CUR 2.0 (Track 1). This is the same caveat the [Claude Agent SDK cost-tracking docs](https://code.claude.com/docs/en/agent-sdk/cost-tracking) raise, adapted for Bedrock (authoritative source is the AWS bill, not the Claude Console). Both this design and the operator guide surface it. + ## Why the issue's original approach doesn't apply The issue proposed extending `aws_session.py` / the `DeferredRefreshableCredentials` pattern to route `InvokeModel` through tagged creds. That pattern governs the agent's **boto3** clients for tenant data. But: @@ -30,60 +32,63 @@ agent/src/runner.py::_setup_agent_env credential chain (today: the ambient compute role) ``` -The agent never makes the `InvokeModel` call, so it cannot attach creds or headers to it directly. The control point is **how Claude Code resolves credentials and headers**, configured via Claude Code settings/env before `client.connect()`. +The agent never makes the `InvokeModel` call, so it cannot attach creds or headers to it directly. The control point is **how Claude Code resolves credentials and headers**, configured before `client.connect()`. -Verified Claude Code behavior (code.claude.com/docs/en/amazon-bedrock, /env-vars): +Verified Claude Code behavior (code.claude.com/docs/en/amazon-bedrock, /env-vars, /settings): -- **Credentials:** default AWS SDK chain. Mutating the parent process's `AWS_*` env vars mid-session is **not** re-read. For refresh, Claude Code supports `awsCredentialExport` — a helper command run at session start and re-run ~5 min before the `Expiration` it returns. This is exactly what an 8 h task needs to survive the **1 h role-chaining cap**. -- **Request metadata:** Claude Code uses the **Invoke API and does not support Converse**, so the Converse `requestMetadata` field is unreachable. The only lever is `ANTHROPIC_CUSTOM_HEADERS` (static per process). Because ABCA runs **one task per container per Claude Code session**, "static per process" == "per task" — sufficient for `{task_id, user_id, repo}` attribution. No proxy/gateway needed. +- **Credentials:** default AWS SDK chain. Mutating the parent process's `AWS_*` env vars mid-session is **not** re-read. For refresh, Claude Code supports `awsCredentialExport` — a settings-only key (no env/flag equivalent) naming a helper command run at session start and re-run ~5 min before the `Expiration` the helper returns (≥ CLI 2.1.176). This beats the **1 h role-chaining cap** on an 8 h task. +- **Request metadata:** Claude Code uses the **Invoke API and does not support Converse**, so the Converse `requestMetadata` field is unreachable. The only lever is `ANTHROPIC_CUSTOM_HEADERS` (static per process), which **is read from the process environment** and process-env wins over any settings `env` block. Because ABCA runs **one task per container per Claude Code session**, "static per process" == "per task" — sufficient. No proxy/gateway needed. +- **Settings precedence (security-critical):** under `setting_sources=["project"]` Claude Code loads **only the cloned repo's `.claude/settings.json`** (user settings are dropped) — but the **managed-settings layer is loaded in all cases and outranks everything**, so the untrusted repo cannot override it. ## Track 1 — IAM session-tag attribution -### New construct: `BedrockInvokeRole` +### Reuse `AgentSessionRole` (no new role) + +`AgentSessionRole` is *already* assumed by the compute roles with `{user_id, repo, task_id}` STS session tags, and `AGENT_SESSION_ROLE_ARN` is already injected into the container. A second "BedrockInvokeRole" would duplicate that entire trust/grant surface for an identical principal. Instead we add a single grant to it: -A dedicated role the agent assumes *only* to mint tagged credentials for Claude Code's Bedrock calls. Kept separate from `AgentSessionRole` (tenant data) so the trust/grant surfaces stay independent and auditable. +- New optional prop `invokableModels: IBedrockInvokable[]`. For each, the construct calls `invokable.grantInvoke(this.role)` — **the same grant the compute role receives**. Reusing `grantInvoke` (rather than hand-building ARNs) is load-bearing: a cross-region inference profile fans out to the foundation-model ARN in *every routed region*; replicating that by hand would risk an `AccessDenied` on a cross-region route. No `aws:PrincipalTag` condition — the tags are for billing attribution, not access scoping. +- `agent.ts` passes the six existing invokables (Sonnet 4.6 / Opus 4 / Haiku 4.5 models + their cross-region profiles). The ECS path reuses the same `AgentSessionRole` instance, so it is covered automatically. -- **Trust:** same compute roles as `AgentSessionRole` (AgentCore ExecutionRole, ECS task role), with `sts:AssumeRole` + `sts:TagSession`. -- **Grants:** `bedrock:InvokeModel` + `bedrock:InvokeModelWithResponseStream` on the **exact** foundation-model + cross-region inference-profile ARNs already enumerated in `agent.ts` / `ecs-agent-cluster.ts` (Sonnet 4.6, Opus 4, Haiku 4.5). No wildcards — reuses the existing ARN allowlist. -- **`maxSessionDuration`: 1 h** (documents the role-chaining cap; refresh handles longevity). -- Exposes `admitComputeRole()` mirroring `AgentSessionRole`, so ECS wiring is symmetric. +### The compute role KEEPS its Bedrock grant -Once this exists, **the compute role drops `bedrock:InvokeModel`** — model invocation moves entirely onto the tagged BedrockInvokeRole. (The #211 comment "Bedrock intentionally stays on the compute role to avoid 1 h expiry" is resolved by `awsCredentialExport`'s refresh.) +The #211 comment "Bedrock intentionally stays on the compute role to avoid 1 h expiry" is *resolved* by `awsCredentialExport`'s pre-expiry refresh — but we still leave `InvokeModel` on the compute role, because Track 1 **fails open** (below) and the compute-role grant is exactly the fallback path. The SessionRole grant is parallel, not a replacement. ### Credential helper + Claude Code wiring -A small helper script (shipped in the agent image) that `awsCredentialExport` invokes: +`agent/src/bedrock_creds_helper.py` (invoked by `awsCredentialExport`): -``` -assume-role --role-arn $BEDROCK_INVOKE_ROLE_ARN \ - --tags user_id=$USER_ID repo=$REPO task_id=$TASK_ID -→ emits {"Credentials":{AccessKeyId,SecretAccessKey,SessionToken,Expiration}} -``` +1. Reads a 0600 JSON file (`/home/agent/.bedrock-attribution.json`) the agent writes at startup, carrying the SessionRole ARN + STS tags. Read from a file, not the environment, so tenant identifiers don't leak into the untrusted repo subprocesses the agent spawns (matching `aws_session.py` discipline). +2. `sts:AssumeRole` with those tags and emits `{"Credentials":{...,"Expiration":}}`. The real `Expiration` drives Claude Code's pre-cap refresh. +3. Tag building reuses `aws_session.build_session_tags` (one definition of the `{user_id,repo,task_id}` tags + 256-char clamp). -- Reuses the **same STS `assume_role` + tag-truncation logic** already in `aws_session.py` (factor the tag-building + 256-char clamp into a shared helper; don't duplicate). -- `Expiration` is the real STS expiry, so Claude Code re-runs the helper before the 1 h cap. -- `_setup_agent_env` writes Claude Code's `awsCredentialExport` setting (and `BEDROCK_INVOKE_ROLE_ARN` / tag values) **into a trusted, agent-controlled settings location** — *not* the cloned repo's `.claude/settings.json`. +`runner._setup_bedrock_cost_attribution` writes the attribution file when `AGENT_SESSION_ROLE_ARN` is set, and always sets the metadata header (Track 2). -> **Security note (must not be skipped):** `awsCredentialExport` runs an arbitrary shell command. `setting_sources=["project"]` currently reads the **untrusted cloned target repo's** `.claude/settings.json`. We must inject `awsCredentialExport` via a location the target repo **cannot override** (user-level settings or an explicit `--settings` file the agent owns), and confirm Claude Code's precedence makes project settings unable to redefine it. A repo that could set `awsCredentialExport` would get arbitrary code execution with the compute role. This is the single highest-risk item in the design and gets a dedicated test. +### Where `awsCredentialExport` lives (RCE boundary) -### Fail-open vs fail-closed +`awsCredentialExport` runs an arbitrary command. It is baked into the **managed-settings layer** at `/etc/claude-code/managed-settings.json` (root-owned, copied in the Dockerfile before `USER agent`). This is the only repo-proof location: it loads regardless of `setting_sources=["project"]` and outranks the cloned repo's project `.claude/settings.json`, so a malicious repo cannot define or override it. Putting it anywhere the target repo can influence would be RCE with the compute role. -Unlike #211 tenant isolation (fail **closed** — a scoping failure means cross-tenant exposure), Bedrock attribution is a **billing/observability** control. If the helper can't assume the role, the correct failure mode is to **fall back to the compute role and emit a warning**, not to abort the task — losing chargeback granularity is not a security incident. When `BEDROCK_INVOKE_ROLE_ARN` is unset (local/dev), behavior is identical to today. +### Fail-open (not fail-closed) + +Unlike #211 tenant isolation (fail **closed** — a scoping failure means cross-tenant exposure), Bedrock attribution is a **billing/observability** control. If the attribution file is absent or the assume fails, the helper emits the **ambient compute-role credentials** so Bedrock keeps working untagged — losing chargeback granularity is not a security incident. When `AGENT_SESSION_ROLE_ARN` is unset (local/dev), the helper fails open and behavior matches today. ## Track 2 — per-request metadata -In `_setup_agent_env`, set: +In `_setup_bedrock_cost_attribution`, set on the process env: ```python os.environ["ANTHROPIC_CUSTOM_HEADERS"] = ( "X-Amzn-Bedrock-Request-Metadata: " - + json.dumps({"task_id": ..., "user_id": ..., "repo": ...}) # 256-char clamp, ≤16 keys + + json.dumps({"user_id": ..., "repo": ..., "task_id": ...}) # 256-char clamp, ≤16 keys ) ``` -Gated on invocation logging being enabled (it already is — `agent.ts` configures the CloudWatch destination). Surfaces under the `requestMetadata` field in `/aws/bedrock/model-invocation-logs/`. +Set via the process env (not project settings) so the untrusted repo can't alter it. Surfaces under `requestMetadata` in `/aws/bedrock/model-invocation-logs/` (logging already enabled in `agent.ts`). + +> **Open risk to validate against a live endpoint:** Bedrock rejects `X-Amzn-Bedrock-Request-Metadata` with `InvalidSignatureException` if the header is omitted from the SigV4 `SignedHeaders`. Whether Claude Code signs custom headers is unverified. AC#3 explicitly permits "or documented blocker if Claude Code cannot pass metadata." If it fails, per-call attribution falls back to invocation-log `identity.arn` + `RoleSessionName` (`abca-bedrock-`) that Track 1's tagged session already provides. + +## Version alignment -> **Open risk to validate before merge:** Bedrock rejects `X-Amzn-Bedrock-Request-Metadata` with `InvalidSignatureException` if the header is omitted from the SigV4 `SignedHeaders`. AWS SDKs that expose metadata as a parameter sign it automatically; a custom header injected via `ANTHROPIC_CUSTOM_HEADERS` may **not** be in Claude Code's signed-headers list. **This must be tested against a live Bedrock endpoint.** If it fails, this track is a documented blocker (AC#3 explicitly allows "or documented blocker if Claude Code cannot pass metadata"), and per-call attribution falls back to correlating invocation-log `identity.arn` + `RoleSessionName` (`abca-`) — which Track 1's tagged session already provides. +The agent runs Claude Code two ways that must agree on the control protocol: the `claude-agent-sdk` Python wheel **bundles** a CLI, and the Dockerfile also installs the CLI via npm. Both are pinned in lockstep — `claude-agent-sdk==0.2.110` (bundles CLI 2.1.191) and npm `@anthropic-ai/claude-code@2.1.191`. 2.1.191 also satisfies the ≥2.1.176 `awsCredentialExport`-with-`Expiration` requirement. ## Track 3 — operator documentation @@ -103,16 +108,15 @@ Bedrock Projects/Workspaces (`bedrock-mantle`, not the Claude Code path); replac | AC | Met by | |---|---| -| #1 Bedrock uses session-tagged creds (AgentCore + ECS); dev unchanged when unset | Track 1: BedrockInvokeRole + `awsCredentialExport`; fall back to compute role when `BEDROCK_INVOKE_ROLE_ARN` unset | +| #1 Bedrock uses session-tagged creds (AgentCore + ECS); dev unchanged when unset | Track 1: `AgentSessionRole` Bedrock grant + `awsCredentialExport`; helper fails open to compute role when `AGENT_SESSION_ROLE_ARN` unset | | #2 Session tags documented as billable; operator Billing steps | Track 3 | | #3 Per-request metadata `{task_id,user_id,repo}` when logging enabled (or documented blocker) | Track 2 + SigV4 validation gate | -| #4 Tests: CDK Bedrock grant on role; cred routing; no #211 regression | New `bedrock-invoke-role.test.ts`; helper unit test; #211 tests untouched (orthogonal path) | +| #4 Tests: CDK Bedrock grant on role; cred routing; no #211 regression | `agent-session-role.test.ts` (Bedrock grant present/absent); `test_bedrock_creds_helper.py` (assume + fail-open); `test_runner.py` (file + header wiring); #211 tests untouched | | #5 `COST_ATTRIBUTION.md` + accurate shipped/planned | Track 3 | | #6 Starlight mirrors synced | `mise //docs:sync` | ## Test plan -- **CDK:** assert `BedrockInvokeRole` grants `InvokeModel`/`InvokeModelWithResponseStream` on the model+profile ARN allowlist (no wildcard); assert trust admits both compute roles with `TagSession`; assert compute role **no longer** has `bedrock:InvokeModel`. -- **Security test:** assert the agent injects `awsCredentialExport` in a location the cloned repo cannot override (the highest-risk item above). -- **Agent:** unit-test the credential helper (tag building reuses `aws_session` logic; 256-char clamp; JSON shape with `Expiration`); unit-test `ANTHROPIC_CUSTOM_HEADERS` assembly. +- **CDK:** assert `AgentSessionRole` grants `bedrock:InvokeModel*` on the model/profile ARNs (no `Resource:'*'`) when `invokableModels` is set, and grants none when omitted. (#211 trust/grant/tenant-scope tests unchanged.) +- **Agent:** `bedrock_creds_helper` — assume-role carries the tenant tags + tagged session name; **fails open** to ambient creds when the attribution file is missing, when assume raises, and emits `{}` when no creds resolve at all; 0600 file mode. `runner._setup_bedrock_cost_attribution` — writes the file when the role ARN is set, skips it when unset, always sets the metadata header. - **Live validation (pre-merge, manual):** confirm `X-Amzn-Bedrock-Request-Metadata` is honored (no `InvalidSignatureException`) and lands in invocation logs; confirm `iamPrincipal/user_id` appears in Cost Explorer after tag activation. diff --git a/docs/src/content/docs/architecture/Cost-model.md b/docs/src/content/docs/architecture/Cost-model.md index e683e92e..d9606fb2 100644 --- a/docs/src/content/docs/architecture/Cost-model.md +++ b/docs/src/content/docs/architecture/Cost-model.md @@ -92,11 +92,13 @@ These estimates assume Claude Sonnet with prompt caching enabled and average tas For multi-user deployments, cost should be attributable to individual users and repositories: -- **Per-task:** Token usage and compute duration are captured in task metadata (`agent.cost_usd`, `agent.turns` - see [OBSERVABILITY.md](/sample-autonomous-cloud-coding-agents/architecture/observability)). +- **Per-task:** Token usage and compute duration are captured in task metadata (`agent.cost_usd`, `agent.turns` - see [OBSERVABILITY.md](/sample-autonomous-cloud-coding-agents/architecture/observability)). Note: `agent.cost_usd` is the Claude Agent SDK's **client-side estimate** (a build-time price table), not authoritative billing — use it for guardrails, and AWS Cost Explorer / CUR 2.0 for the real bill (see [COST_ATTRIBUTION.md](../guides/COST_ATTRIBUTION.md)). - **Per-user:** Aggregate task costs by `user_id`. - **Per-repo:** Aggregate task costs by `repo`. - **Dashboard:** Cost attribution dashboards should be built from the same task-level metrics. +For **AWS-native** chargeback of Bedrock spend (Cost Explorer / CUR 2.0 by `user_id` / `repo`, plus per-call invocation-log forensics) — beyond the in-app `cost_usd` meter above — see the operator guide [COST_ATTRIBUTION.md](../guides/COST_ATTRIBUTION.md) and the platform design [BEDROCK_COST_ATTRIBUTION.md](/sample-autonomous-cloud-coding-agents/architecture/bedrock-cost-attribution). + ## Cost guardrails (current) | Guardrail | Mechanism | Default | diff --git a/docs/src/content/docs/getting-started/Cost-attribution.md b/docs/src/content/docs/getting-started/Cost-attribution.md new file mode 100644 index 00000000..896b0293 --- /dev/null +++ b/docs/src/content/docs/getting-started/Cost-attribution.md @@ -0,0 +1,58 @@ +--- +title: Cost attribution +--- + +# Cost attribution (operator guide) + +How to attribute **Amazon Bedrock model-inference spend** to individual users and repositories in a multi-user ABCA deployment. This is the operator-facing companion to the platform design in [BEDROCK_COST_ATTRIBUTION.md](/sample-autonomous-cloud-coding-agents/architecture/bedrock-cost-attribution) and the cost model in [COST_MODEL.md](/sample-autonomous-cloud-coding-agents/architecture/cost-model#cost-attribution). + +> [!WARNING] +> **The in-app `cost_usd` is a client-side estimate, not authoritative billing data.** It is the Claude Agent SDK's `total_cost_usd` (`agent/src/runner.py`), computed locally from a price table bundled into the SDK at build time. It can drift from your actual AWS bill when Bedrock pricing changes, the SDK version does not recognize a model, prompt-cache read/write rates apply, or AWS discounts/commitments/free-tier apply that the client cannot model. Use it for per-task budget guardrails and approximate insight — **do not bill end users or trigger financial decisions from it.** For authoritative cost, use **AWS Cost Explorer / CUR 2.0** (the session-tag chargeback meter below), which reflects your actual invoice. (ABCA runs on Bedrock, so the authoritative source is your AWS bill — not the Claude Console.) + +## Three meters, three questions + +ABCA gives you three independent views of cost. They answer different questions; use them together. + +| Meter | Granularity | Source of truth for | Where | +|---|---|---|---| +| **In-app `cost_usd`** | Per task | Per-task budget guardrails (`max_budget_usd`) | Task metadata / control panel | +| **CUR session-tag chargeback** | Per user / per repo, aggregated per usage-type per day | AWS-native FinOps chargeback | Cost Explorer / CUR 2.0 | +| **Invocation-log metadata** | Per Bedrock call | Per-call forensics, reconciliation | `/aws/bedrock/model-invocation-logs/` | + +Why all three: the in-app meter is an estimate the platform computes; it does not reflect AWS discounts/commitments. IAM session tags flow to your **bill** but only as aggregated billing data (they are *not* written to invocation logs). Request metadata gives **per-call** detail in logs but is *not* a cost-allocation tag and never appears in Cost Explorer. Per [AWS docs](https://docs.aws.amazon.com/bedrock/latest/userguide/cost-mgmt-iam-principal-tracking.html), session tags and request metadata are complementary mechanisms. + +## What the platform does automatically + +Once deployed, each agent task makes its Bedrock calls under **session-tagged, refreshable credentials** carrying `{user_id, repo, task_id}`, and stamps the same values as **request metadata** on every call. You do **not** need to change any code. What remains is **operator setup in the AWS Billing console** — AWS does not surface tag-based cost data until you activate it. + +## FinOps checklist + +These steps are a one-time operator responsibility (CDK does not automate org-level billing — see [Out of scope](/sample-autonomous-cloud-coding-agents/architecture/bedrock-cost-attribution#out-of-scope-unchanged-from-issue)). + +1. **Activate IAM-principal cost-allocation tags.** Billing console → **Cost allocation tags** → filter by type **IAM principal** → activate `user_id` and `repo` (the low-cardinality dimensions; `task_id` is high-cardinality — keep it for logs, not Cost Explorer). + - Tags appear only **after** the first Bedrock call carrying them, and can take **up to 24 h** to show. + - Activation is **not retroactive** — only spend incurred after activation is tagged. +2. **Create a CUR 2.0 export with caller identity.** Billing console → **Data Exports** → create a CUR 2.0 export and select the option to include the **caller-identity ARN**. + - If you already have a CUR 2.0 export, you must create a **new** one — existing exports do not backfill identity data. +3. **Set budgets / alerts** per `user_id` or `repo` tag as needed (AWS Budgets), independent of the in-app `max_budget_usd` per-task guardrail. + +## Querying per-call detail (invocation logs) + +Request metadata lands under the top-level `requestMetadata` field of each log record. Example CloudWatch Logs Insights query (tokens per user + model): + +``` +fields requestMetadata.user_id as user, modelId, + input.inputTokenCount as inTokens, + output.outputTokenCount as outTokens +| stats sum(inTokens) as totalInput, sum(outTokens) as totalOutput, count() as calls + by user, modelId +| sort totalInput desc +``` + +To turn tokens into cost, multiply by the current [Bedrock per-token rates](https://aws.amazon.com/bedrock/pricing/), or join logs to CUR on `requestId` for invoice-accurate reconciliation at the model + usage-type grain. + +## Caveats + +- **Request-metadata header is best-effort.** It depends on Claude Code signing the `X-Amzn-Bedrock-Request-Metadata` header into the SigV4 request; if a Claude Code release does not, the header is rejected and per-call metadata is absent. Per-user/repo chargeback (the session-tag track) is unaffected — it does not rely on the header. See the [validation note](/sample-autonomous-cloud-coding-agents/architecture/bedrock-cost-attribution#track-2--per-request-metadata). +- **Attribution fails open.** If the per-task credential helper cannot assume the SessionRole, Bedrock still works under the shared compute role — spend for that task is simply untagged, not blocked. +- **No PII in tags/metadata.** `user_id` and `repo` are recorded in your bill and logs; do not map them to anything sensitive. diff --git a/docs/src/content/docs/getting-started/Deployment-guide.md b/docs/src/content/docs/getting-started/Deployment-guide.md index f31e890e..01f6e2ce 100644 --- a/docs/src/content/docs/getting-started/Deployment-guide.md +++ b/docs/src/content/docs/getting-started/Deployment-guide.md @@ -233,4 +233,5 @@ For users without AWS CLI access. - [User guide](/sample-autonomous-cloud-coding-agents/using/overview) -- API reference, CLI usage, task management. - [DEPLOYMENT_ROLES.md](/sample-autonomous-cloud-coding-agents/architecture/deployment-roles) -- Least-privilege IAM policies for CloudFormation execution. - [COST_MODEL.md](/sample-autonomous-cloud-coding-agents/architecture/cost-model) -- Per-task costs, cost guardrails, cost at scale. +- [COST_ATTRIBUTION.md](/sample-autonomous-cloud-coding-agents/architecture/cost-attribution) -- Operator FinOps setup for per-user/per-repo Bedrock chargeback (Cost Explorer / CUR 2.0, invocation-log forensics). - [COMPUTE.md](/sample-autonomous-cloud-coding-agents/architecture/compute) -- Compute backend architecture and trade-offs. From 40712c10d48a16473066f0b258a0a7f1f593b6f1 Mon Sep 17 00:00:00 2001 From: bgagent Date: Tue, 30 Jun 2026 15:38:54 -0500 Subject: [PATCH 3/8] refactor(cdk): collect invokable Bedrock models in a loop (pre-empt #434) PR #434 replaces the six named model/profile bindings in agent.ts with a loop over a single source-of-truth id list. Our #215 SessionRole grant referenced those bindings by name, so the merge would break compilation. Adopt #434's loop+collection shape now: build each foundation model + its cross-region profile in a loop, grant the runtime, and collect into one list passed to AgentSessionRole.invokableModels. Behavior is byte-for-byte identical in synth; the eventual #434 merge becomes a one-line swap of the local id array for resolveBedrockModelIds(this.node). Refs #215, #434 --- cdk/src/stacks/agent.ts | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/cdk/src/stacks/agent.ts b/cdk/src/stacks/agent.ts index 85a4d5e1..9d2824bc 100644 --- a/cdk/src/stacks/agent.ts +++ b/cdk/src/stacks/agent.ts @@ -421,12 +421,15 @@ export class AgentStack extends Stack { applicationLogGroup.grantWrite(runtime); agentMemory.grantReadWrite(runtime); - // Grant the runtime invoke on each configured foundation model + its - // US cross-Region inference profile. The model set is a single source of - // truth (constructs/bedrock-models.ts), shared with the ECS task role, and - // overridable via the `bedrockModels` CDK context — add a model by config, - // no construct edits. Scoping stays per-model (no Resource:'*'); account- - // level Bedrock access remains the outer gate. + // Grant the runtime invoke on each configured foundation model + its US + // cross-Region inference profile. The model set is a single source of truth + // (constructs/bedrock-models.ts, #434), shared with the ECS task role and + // overridable via the `bedrockModels` CDK context. Each invokable is also + // collected so the same set is granted to the SessionRole below (#215 cost + // attribution) — the two grants derive from one list and can't drift. + // Scoping stays per-model (no Resource:'*'); account-level Bedrock access + // remains the outer gate. + const invokableBedrockModels: bedrock.IBedrockInvokable[] = []; for (const modelId of resolveBedrockModelIds(this.node)) { const foundationModel = new bedrock.BedrockFoundationModel(modelId, { supportsAgents: true, @@ -438,6 +441,7 @@ export class AgentStack extends Stack { }); foundationModel.grantInvoke(runtime); crossRegionProfile.grantInvoke(runtime); + invokableBedrockModels.push(foundationModel, crossRegionProfile); } // --- Per-task SessionRole (#209) --- @@ -459,16 +463,9 @@ export class AgentStack extends Stack { traceArtifactsBucket: traceArtifactsBucket.bucket, attachmentsBucket: attachmentsBucket.bucket, // #215: session-tagged Bedrock grant for cost attribution — the same - // invokables grantInvoke-ed to the compute role above, so the grants - // stay in lockstep. - invokableModels: [ - model, - inferenceProfile, - model3, - inferenceProfile3, - model2, - inferenceProfile2, - ], + // invokables grantInvoke-ed to the runtime above, so the grants stay in + // lockstep. + invokableModels: invokableBedrockModels, }); sessionRoleArnHolder = agentSessionRole.role.roleArn; From 82c1ad0e77dcf3437760138f74dba8295e35ad1d Mon Sep 17 00:00:00 2001 From: bgagent Date: Tue, 30 Jun 2026 15:54:42 -0500 Subject: [PATCH 4/8] fix(agent): make Bedrock creds-helper fail-open paths observable (#215 review) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Silent-failure review flagged that bedrock_creds_helper.py degraded silently: a persistent assume-role denial would drop chargeback for weeks with no signal pointing back to this code — the 'invisible degradation' AI004 forbids even when the fallback itself is intended. - Add _warn() (stderr only — stdout is the credential channel Claude Code parses, so shell.log/fd1 is unusable here). - Log every fail-open path; distinguish severities: absent file (benign) vs present-but-unreadable (write bug), and expected ClientError/BotoCoreError assume failure vs UNEXPECTED errors. - Narrow the assume catch to (ClientError, BotoCoreError); catch ImportError on boto3 separately (packaging defect, not AccessDenied). All still fail open. Behavior unchanged (still fail-open to ambient creds); degradations are now visible and correlatable. Tests cover each distinguished path + its diagnostic. Refs #215 --- agent/src/bedrock_creds_helper.py | 72 ++++++++++++++++++++---- agent/tests/test_bedrock_creds_helper.py | 53 ++++++++++++++--- 2 files changed, 108 insertions(+), 17 deletions(-) diff --git a/agent/src/bedrock_creds_helper.py b/agent/src/bedrock_creds_helper.py index 035c947e..6f3ede39 100644 --- a/agent/src/bedrock_creds_helper.py +++ b/agent/src/bedrock_creds_helper.py @@ -74,6 +74,20 @@ def write_attribution_file( return target +def _warn(message: str) -> None: + """Emit a diagnostic to stderr. + + This process's **stdout is the credential channel** — Claude Code parses it + as the ``awsCredentialExport`` JSON result — so diagnostics MUST go to + stderr or they would corrupt the credential envelope. (This is also why + ``shell.log``, which writes to fd 1, is unusable here.) Every fail-open path + logs through here so a silent, weeks-long loss of cost attribution is + instead a visible, correlatable signal — the fallback stays open, but it is + never invisible. + """ + print(f"[bedrock-creds] {message}", file=sys.stderr) + + def _emit(creds: dict[str, str]) -> None: json.dump({"Credentials": creds}, sys.stdout) @@ -95,8 +109,14 @@ def _ambient_credentials() -> dict[str, str]: creds = botocore.session.get_session().get_credentials() if creds is None: - # No resolvable credentials at all. Emit an empty object; Claude Code - # then falls back to its own default-chain resolution. + # No resolvable credentials at all — the deepest degradation. Emit an + # empty object; Claude Code then falls back to its own default-chain + # resolution. Surface it: if that fallback also fails, this stderr line + # is the only breadcrumb. + _warn( + "no resolvable AWS credentials; emitting empty envelope, " + "Claude Code will use its default chain" + ) return {} return _frozen_to_creds(creds.get_frozen_credentials(), None) @@ -109,16 +129,34 @@ def resolve_credentials() -> dict[str, str]: cfg = json.load(fh) role_arn = cfg["role_arn"] tags = cfg.get("tags", []) - except (OSError, ValueError, KeyError): - # Attribution not configured (local/dev) or unreadable → fail open. + except FileNotFoundError: + # Attribution not configured (local/dev, or pre-provisioning). Expected + # and benign — debug-level signal only. + _warn("attribution file absent; not configured — using ambient creds") + return _ambient_credentials() + except (OSError, ValueError, KeyError) as exc: + # File present but unreadable/malformed/schema-drifted. This is NOT the + # benign "not configured" case — it points at a write_attribution_file + # bug or a partial write, so it warrants a louder signal. + _warn( + f"attribution file present but unreadable ({type(exc).__name__}: {exc}); " + "using ambient creds" + ) return _ambient_credentials() try: import boto3 + from botocore.exceptions import BotoCoreError, ClientError + except ImportError as exc: + # boto3 missing/broken in the image is a packaging defect, not the + # expected assume-role failure — name it explicitly so it can't hide. + _warn(f"boto3 unavailable ({exc}); using ambient creds — fix the image") + return _ambient_credentials() - region = os.environ.get("AWS_REGION") or os.environ.get("AWS_DEFAULT_REGION") - task_id = next((t["Value"] for t in tags if t.get("Key") == "task_id"), "") - session_name = f"abca-bedrock-{task_id}"[:64] or "abca-bedrock" + region = os.environ.get("AWS_REGION") or os.environ.get("AWS_DEFAULT_REGION") + task_id = next((t["Value"] for t in tags if t.get("Key") == "task_id"), "") + session_name = f"abca-bedrock-{task_id}"[:64] or "abca-bedrock" + try: resp = boto3.client("sts", region_name=region).assume_role( RoleArn=role_arn, RoleSessionName=session_name, @@ -132,9 +170,23 @@ def resolve_credentials() -> dict[str, str]: "SessionToken": c["SessionToken"], "Expiration": c["Expiration"].isoformat(), } - except Exception: - # Assume failed (role not yet provisioned, transient STS error, …). - # Fail open so Bedrock keeps working on the compute role. + except (ClientError, BotoCoreError) as exc: + # Expected assume failure: role not yet provisioned, AccessDenied, + # transient STS error. Fail open so Bedrock keeps working on the + # compute role; spend for this task is untagged. + _warn( + f"assume_role failed ({type(exc).__name__}: {exc}); using ambient creds " + "— Bedrock spend will be UNTAGGED" + ) + return _ambient_credentials() + except Exception as exc: + # Anything else (unexpected STS response shape, a logic bug here) is NOT + # the expected fallback. Still fail open — this is a billing control, not + # isolation — but flag it distinctly so it isn't mistaken for AccessDenied. + _warn( + f"UNEXPECTED error minting tagged creds ({type(exc).__name__}: {exc}); " + "using ambient creds" + ) return _ambient_credentials() diff --git a/agent/tests/test_bedrock_creds_helper.py b/agent/tests/test_bedrock_creds_helper.py index f79ce0d5..426aad8d 100644 --- a/agent/tests/test_bedrock_creds_helper.py +++ b/agent/tests/test_bedrock_creds_helper.py @@ -85,31 +85,70 @@ def test_resolve_fails_open_when_no_attribution_file(attr_file): assert "Expiration" not in creds # ambient creds are returned unbounded -def test_resolve_fails_open_when_assume_role_raises(attr_file): +def _ambient(access_key="AMB"): + frozen = SimpleNamespace(access_key=access_key, secret_key="S", token="T") + ambient = MagicMock() + ambient.get_credentials.return_value.get_frozen_credentials.return_value = frozen + return ambient + + +def test_resolve_fails_open_on_expected_assume_error_and_warns(attr_file, capsys): + from botocore.exceptions import ClientError + helper.write_attribution_file( "arn:aws:iam::1:role/SR", build_session_tags("u", "r", "t"), attr_file ) - frozen = SimpleNamespace(access_key="AMB", secret_key="S", token="T") - ambient = MagicMock() - ambient.get_credentials.return_value.get_frozen_credentials.return_value = frozen + sts = MagicMock() + sts.assume_role.side_effect = ClientError( + {"Error": {"Code": "AccessDenied", "Message": "denied"}}, "AssumeRole" + ) + with ( + patch("boto3.client", return_value=sts), + patch("botocore.session.get_session", return_value=_ambient()), + ): + creds = helper.resolve_credentials() + assert creds["AccessKeyId"] == "AMB" + # Fail-open must be observable, and flagged as the expected (not unexpected) case. + err = capsys.readouterr().err + assert "assume_role failed" in err and "UNTAGGED" in err + assert "UNEXPECTED" not in err + +def test_resolve_flags_unexpected_error_distinctly(attr_file, capsys): + # A non-boto error (e.g. a logic bug, bad STS response shape) must still + # fail open but be labeled UNEXPECTED so it isn't mistaken for AccessDenied. + helper.write_attribution_file( + "arn:aws:iam::1:role/SR", build_session_tags("u", "r", "t"), attr_file + ) sts = MagicMock() - sts.assume_role.side_effect = RuntimeError("AccessDenied") + sts.assume_role.side_effect = RuntimeError("boom") with ( patch("boto3.client", return_value=sts), - patch("botocore.session.get_session", return_value=ambient), + patch("botocore.session.get_session", return_value=_ambient()), ): creds = helper.resolve_credentials() assert creds["AccessKeyId"] == "AMB" + assert "UNEXPECTED" in capsys.readouterr().err + + +def test_resolve_distinguishes_corrupt_config_from_absent(attr_file, capsys): + # File present but malformed → louder signal than a plain absent file. + with open(attr_file, "w") as fh: + fh.write("{not json") + with patch("botocore.session.get_session", return_value=_ambient()): + creds = helper.resolve_credentials() + assert creds["AccessKeyId"] == "AMB" + assert "present but unreadable" in capsys.readouterr().err -def test_resolve_emits_empty_when_no_credentials_at_all(attr_file): +def test_resolve_emits_empty_when_no_credentials_at_all(attr_file, capsys): ambient = MagicMock() ambient.get_credentials.return_value = None with patch("botocore.session.get_session", return_value=ambient): creds = helper.resolve_credentials() # Empty object → Claude Code falls back to its own default-chain resolution. assert creds == {} + assert "no resolvable AWS credentials" in capsys.readouterr().err def test_main_emits_credentials_envelope(attr_file, capsys): From 37974233d3226db8197e2cb63402e031d73dabef Mon Sep 17 00:00:00 2001 From: bgagent Date: Tue, 30 Jun 2026 16:03:10 -0500 Subject: [PATCH 5/8] docs(215): note the deliberate ANTHROPIC_CUSTOM_HEADERS env exception Security review (LOW/accepted): unlike tenant-data tags, the request-metadata header lives on os.environ because Claude Code reads it from there. Document why that's safe (self-referential non-secret values; json.dumps escaping blocks header injection) in both the code and the design doc, so it reads as intent rather than an oversight against the 'tenant ids out of os.environ' discipline. Refs #215 --- agent/src/runner.py | 8 ++++++++ docs/design/BEDROCK_COST_ATTRIBUTION.md | 2 ++ .../content/docs/architecture/Bedrock-cost-attribution.md | 2 ++ 3 files changed, 12 insertions(+) diff --git a/agent/src/runner.py b/agent/src/runner.py index c63558bf..13e8bf27 100644 --- a/agent/src/runner.py +++ b/agent/src/runner.py @@ -96,6 +96,14 @@ def _setup_bedrock_cost_attribution(config: TaskConfig) -> None: # Per-request metadata mirrors the STS tag values. Bedrock limits keys/values # to 256 chars and records them under ``requestMetadata`` in invocation logs. + # + # Unlike the tenant-data tags (kept out of os.environ so untrusted repo + # subprocesses don't inherit them), this header MUST go on os.environ — + # Claude Code reads ANTHROPIC_CUSTOM_HEADERS from the process env. The + # exposure is acceptable: the values are the task's OWN {user_id, repo, + # task_id} (self-referential, non-secret), so a spawned subprocess learns + # only who it is already running for. json.dumps escapes newlines/quotes, so + # a crafted repo slug cannot inject an extra (newline-separated) header. metadata = {t["Key"]: t["Value"][:MAX_TAG_VALUE_LEN] for t in tags} if metadata: os.environ["ANTHROPIC_CUSTOM_HEADERS"] = ( diff --git a/docs/design/BEDROCK_COST_ATTRIBUTION.md b/docs/design/BEDROCK_COST_ATTRIBUTION.md index 66480092..5553d46f 100644 --- a/docs/design/BEDROCK_COST_ATTRIBUTION.md +++ b/docs/design/BEDROCK_COST_ATTRIBUTION.md @@ -80,6 +80,8 @@ os.environ["ANTHROPIC_CUSTOM_HEADERS"] = ( Set via the process env (not project settings) so the untrusted repo can't alter it. Surfaces under `requestMetadata` in `/aws/bedrock/model-invocation-logs/` (logging already enabled in `agent.ts`). +> **Note — a deliberate exception to the "tenant ids out of `os.environ`" rule.** The tenant-data path keeps `{user_id, repo, task_id}` out of `os.environ` so spawned (untrusted) repo subprocesses don't inherit them. This header *must* live on `os.environ` because Claude Code reads `ANTHROPIC_CUSTOM_HEADERS` from the process env. The exposure is acceptable: the values are the task's *own* identifiers (self-referential, non-secret) — a subprocess learns only who it is already running for. `json.dumps` escaping prevents a crafted slug from injecting an extra (newline-separated) header. + > **Open risk to validate against a live endpoint:** Bedrock rejects `X-Amzn-Bedrock-Request-Metadata` with `InvalidSignatureException` if the header is omitted from the SigV4 `SignedHeaders`. Whether Claude Code signs custom headers is unverified. AC#3 explicitly permits "or documented blocker if Claude Code cannot pass metadata." If it fails, per-call attribution falls back to invocation-log `identity.arn` + `RoleSessionName` (`abca-bedrock-`) that Track 1's tagged session already provides. ## Version alignment diff --git a/docs/src/content/docs/architecture/Bedrock-cost-attribution.md b/docs/src/content/docs/architecture/Bedrock-cost-attribution.md index 51c70972..355ac4cd 100644 --- a/docs/src/content/docs/architecture/Bedrock-cost-attribution.md +++ b/docs/src/content/docs/architecture/Bedrock-cost-attribution.md @@ -84,6 +84,8 @@ os.environ["ANTHROPIC_CUSTOM_HEADERS"] = ( Set via the process env (not project settings) so the untrusted repo can't alter it. Surfaces under `requestMetadata` in `/aws/bedrock/model-invocation-logs/` (logging already enabled in `agent.ts`). +> **Note — a deliberate exception to the "tenant ids out of `os.environ`" rule.** The tenant-data path keeps `{user_id, repo, task_id}` out of `os.environ` so spawned (untrusted) repo subprocesses don't inherit them. This header *must* live on `os.environ` because Claude Code reads `ANTHROPIC_CUSTOM_HEADERS` from the process env. The exposure is acceptable: the values are the task's *own* identifiers (self-referential, non-secret) — a subprocess learns only who it is already running for. `json.dumps` escaping prevents a crafted slug from injecting an extra (newline-separated) header. + > **Open risk to validate against a live endpoint:** Bedrock rejects `X-Amzn-Bedrock-Request-Metadata` with `InvalidSignatureException` if the header is omitted from the SigV4 `SignedHeaders`. Whether Claude Code signs custom headers is unverified. AC#3 explicitly permits "or documented blocker if Claude Code cannot pass metadata." If it fails, per-call attribution falls back to invocation-log `identity.arn` + `RoleSessionName` (`abca-bedrock-`) that Track 1's tagged session already provides. ## Version alignment From dd1cca280e6170d8eedc3196f2605b841a1d9757 Mon Sep 17 00:00:00 2001 From: bgagent Date: Tue, 30 Jun 2026 16:28:34 -0500 Subject: [PATCH 6/8] docs(215): correct cost-allocation tag activation steps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The IAM-principal tag keys can't be pre-activated — they only appear in the Billing console after the platform makes tagged Bedrock calls. Fix the ordering (deploy → run task → wait ≤24h → activate), point to Billing → Cost allocation tags (not Tag Editor / Resource Groups, which lists resource types), and note the capability may not be enabled in every account/region yet. Refs #215 --- docs/guides/COST_ATTRIBUTION.md | 11 ++++++++--- .../content/docs/getting-started/Cost-attribution.md | 11 ++++++++--- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/docs/guides/COST_ATTRIBUTION.md b/docs/guides/COST_ATTRIBUTION.md index e27dd061..667c77c3 100644 --- a/docs/guides/COST_ATTRIBUTION.md +++ b/docs/guides/COST_ATTRIBUTION.md @@ -19,15 +19,20 @@ Why all three: the in-app meter is an estimate the platform computes; it does no ## What the platform does automatically -Once deployed, each agent task makes its Bedrock calls under **session-tagged, refreshable credentials** carrying `{user_id, repo, task_id}`, and stamps the same values as **request metadata** on every call. You do **not** need to change any code. What remains is **operator setup in the AWS Billing console** — AWS does not surface tag-based cost data until you activate it. +Once deployed, each agent task makes its Bedrock calls under **session-tagged, refreshable credentials** carrying `{user_id, repo, task_id}`, and stamps the same values as **request metadata** on every call. You do **not** need to change any code. What remains is **operator setup in the AWS Billing console** — AWS does not surface tag-based cost data until you activate it, and (see the ordering note below) you can only activate *after* the platform has run tagged tasks. ## FinOps checklist These steps are a one-time operator responsibility (CDK does not automate org-level billing — see [Out of scope](../design/BEDROCK_COST_ATTRIBUTION.md#out-of-scope-unchanged-from-issue)). -1. **Activate IAM-principal cost-allocation tags.** Billing console → **Cost allocation tags** → filter by type **IAM principal** → activate `user_id` and `repo` (the low-cardinality dimensions; `task_id` is high-cardinality — keep it for logs, not Cost Explorer). - - Tags appear only **after** the first Bedrock call carrying them, and can take **up to 24 h** to show. +> **Ordering matters — the tags can't be pre-activated.** IAM-principal cost-allocation tag *keys* (`user_id`, `repo`) do not exist in the Billing console until the deployed platform has actually made tagged Bedrock calls. So the sequence is: **deploy → run at least one task → wait up to 24 h → then activate** (step 1). You cannot activate them before the first tagged call exists. +> +> **Use the Billing console, not Tag Editor / Resource Groups.** Cost-allocation tags live at **Billing and Cost Management → Cost allocation tags** (left nav). The *Tag Editor* (Resource Groups) is a different tool — it lists taggable *resource types* (`AWS::IAM::InstanceProfile`, etc.) and is **not** where you activate these. + +1. **Activate IAM-principal cost-allocation tags.** Billing and Cost Management console → **Cost allocation tags** (left nav) → the **User-defined cost allocation tags** tab → the `user_id` and `repo` keys appear with tag type **IAM principal** → select them → **Activate**. (`task_id` is high-cardinality — keep it for logs, not Cost Explorer.) + - Keys appear only **after** the first Bedrock call carrying them, and can take **up to 24 h** to show. - Activation is **not retroactive** — only spend incurred after activation is tagged. + - IAM-principal cost-allocation tags are a recent Bedrock capability. If the keys never appear a day after running tagged tasks, your account/region may not have it enabled yet — the invocation-log path (below) attributes per call regardless. 2. **Create a CUR 2.0 export with caller identity.** Billing console → **Data Exports** → create a CUR 2.0 export and select the option to include the **caller-identity ARN**. - If you already have a CUR 2.0 export, you must create a **new** one — existing exports do not backfill identity data. 3. **Set budgets / alerts** per `user_id` or `repo` tag as needed (AWS Budgets), independent of the in-app `max_budget_usd` per-task guardrail. diff --git a/docs/src/content/docs/getting-started/Cost-attribution.md b/docs/src/content/docs/getting-started/Cost-attribution.md index 896b0293..60820907 100644 --- a/docs/src/content/docs/getting-started/Cost-attribution.md +++ b/docs/src/content/docs/getting-started/Cost-attribution.md @@ -23,15 +23,20 @@ Why all three: the in-app meter is an estimate the platform computes; it does no ## What the platform does automatically -Once deployed, each agent task makes its Bedrock calls under **session-tagged, refreshable credentials** carrying `{user_id, repo, task_id}`, and stamps the same values as **request metadata** on every call. You do **not** need to change any code. What remains is **operator setup in the AWS Billing console** — AWS does not surface tag-based cost data until you activate it. +Once deployed, each agent task makes its Bedrock calls under **session-tagged, refreshable credentials** carrying `{user_id, repo, task_id}`, and stamps the same values as **request metadata** on every call. You do **not** need to change any code. What remains is **operator setup in the AWS Billing console** — AWS does not surface tag-based cost data until you activate it, and (see the ordering note below) you can only activate *after* the platform has run tagged tasks. ## FinOps checklist These steps are a one-time operator responsibility (CDK does not automate org-level billing — see [Out of scope](/sample-autonomous-cloud-coding-agents/architecture/bedrock-cost-attribution#out-of-scope-unchanged-from-issue)). -1. **Activate IAM-principal cost-allocation tags.** Billing console → **Cost allocation tags** → filter by type **IAM principal** → activate `user_id` and `repo` (the low-cardinality dimensions; `task_id` is high-cardinality — keep it for logs, not Cost Explorer). - - Tags appear only **after** the first Bedrock call carrying them, and can take **up to 24 h** to show. +> **Ordering matters — the tags can't be pre-activated.** IAM-principal cost-allocation tag *keys* (`user_id`, `repo`) do not exist in the Billing console until the deployed platform has actually made tagged Bedrock calls. So the sequence is: **deploy → run at least one task → wait up to 24 h → then activate** (step 1). You cannot activate them before the first tagged call exists. +> +> **Use the Billing console, not Tag Editor / Resource Groups.** Cost-allocation tags live at **Billing and Cost Management → Cost allocation tags** (left nav). The *Tag Editor* (Resource Groups) is a different tool — it lists taggable *resource types* (`AWS::IAM::InstanceProfile`, etc.) and is **not** where you activate these. + +1. **Activate IAM-principal cost-allocation tags.** Billing and Cost Management console → **Cost allocation tags** (left nav) → the **User-defined cost allocation tags** tab → the `user_id` and `repo` keys appear with tag type **IAM principal** → select them → **Activate**. (`task_id` is high-cardinality — keep it for logs, not Cost Explorer.) + - Keys appear only **after** the first Bedrock call carrying them, and can take **up to 24 h** to show. - Activation is **not retroactive** — only spend incurred after activation is tagged. + - IAM-principal cost-allocation tags are a recent Bedrock capability. If the keys never appear a day after running tagged tasks, your account/region may not have it enabled yet — the invocation-log path (below) attributes per call regardless. 2. **Create a CUR 2.0 export with caller identity.** Billing console → **Data Exports** → create a CUR 2.0 export and select the option to include the **caller-identity ARN**. - If you already have a CUR 2.0 export, you must create a **new** one — existing exports do not backfill identity data. 3. **Set budgets / alerts** per `user_id` or `repo` tag as needed (AWS Budgets), independent of the in-app `max_budget_usd` per-task guardrail. From 5b5e13fb27b470d5c49f79c6760034b9cc935a78 Mon Sep 17 00:00:00 2001 From: bgagent Date: Tue, 30 Jun 2026 16:56:12 -0500 Subject: [PATCH 7/8] fix(cdk): enable Bedrock model-invocation logging on deploy (#215) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ModelInvocationLogging custom resource sent largeDataDeliveryS3Config with an empty bucketName. Bedrock rejects that client-side (ValidationException, 'min length: 3'), and ignoreErrorCodesMatching: '.*' swallowed it while onUpdate never re-fired (static props) — so a fresh deploy silently left model-invocation logging DISABLED, and Bedrock recorded no requestMetadata (#215 Track 2 produced nothing to query). Found during live verification of task 01KWD7S.... - Omit largeDataDeliveryS3Config entirely (optional; only for S3 large-data delivery, which this stack doesn't use). The 'required by API schema' comment was wrong. - Narrow ignoreErrorCodesMatching from '.*' to transient service errors only (Throttling/ServiceUnavailable/InternalServer) so a client-side misconfiguration fails the deploy loudly instead of disabling logging silently. - Tests: assert the CR never sends largeDataDeliveryS3Config and never uses a catch-all error ignore. - Docs: COST_ATTRIBUTION.md now tells operators to verify logging is on in the agent's Region (get-model-invocation-logging-configuration) and how to re-enable it, since metadata is only recorded when logging is active. Verified live: with logging on, invocation logs show requestMetadata.{user_id, repo,task_id} and the abca-bedrock- session ARN — Tracks 1 and 2 both confirmed working end-to-end. Refs #215 --- cdk/src/stacks/agent.ts | 19 ++++++++---- cdk/test/stacks/agent.test.ts | 30 +++++++++++++++++++ docs/guides/COST_ATTRIBUTION.md | 13 ++++++++ .../docs/getting-started/Cost-attribution.md | 13 ++++++++ 4 files changed, 70 insertions(+), 5 deletions(-) diff --git a/cdk/src/stacks/agent.ts b/cdk/src/stacks/agent.ts index 9d2824bc..9cc0b02c 100644 --- a/cdk/src/stacks/agent.ts +++ b/cdk/src/stacks/agent.ts @@ -1003,8 +1003,14 @@ export class AgentStack extends Stack { cloudWatchConfig: { logGroupName: invocationLogGroup.logGroupName, roleArn: bedrockLoggingRole.roleArn, - // Required by API schema but unused — text logs go to CloudWatch only. - largeDataDeliveryS3Config: { bucketName: '', keyPrefix: '' }, + // largeDataDeliveryS3Config is OPTIONAL and intentionally omitted: + // it only governs S3 delivery of oversized payloads, which this + // stack does not use (text logs go to CloudWatch). Sending it with + // an empty bucketName fails client-side validation + // ("valid min length: 3") — and because the errors below are + // swallowed and onUpdate never re-fires (static props), that + // failure silently leaves model-invocation logging DISABLED, which + // in turn means Bedrock records no requestMetadata (#215 Track 2). }, textDataDeliveryEnabled: true, imageDataDeliveryEnabled: false, @@ -1012,7 +1018,11 @@ export class AgentStack extends Stack { }, }, physicalResourceId: cr.PhysicalResourceId.of('bedrock-invocation-logging'), - ignoreErrorCodesMatching: '.*', + // Scope the ignore to genuine service-side errors (e.g. a concurrent + // account-level change). Do NOT use '.*' — that also hides client-side + // ValidationExceptions like the empty-bucket bug above, turning a + // deploy-time misconfiguration into silently-absent logging. + ignoreErrorCodesMatching: 'ThrottlingException|ServiceUnavailableException|InternalServerException', }, // onUpdate re-applies the same config to handle drift (e.g., if another // stack or manual action changed the account-level logging config). @@ -1024,7 +1034,6 @@ export class AgentStack extends Stack { cloudWatchConfig: { logGroupName: invocationLogGroup.logGroupName, roleArn: bedrockLoggingRole.roleArn, - largeDataDeliveryS3Config: { bucketName: '', keyPrefix: '' }, }, textDataDeliveryEnabled: true, imageDataDeliveryEnabled: false, @@ -1032,7 +1041,7 @@ export class AgentStack extends Stack { }, }, physicalResourceId: cr.PhysicalResourceId.of('bedrock-invocation-logging'), - ignoreErrorCodesMatching: '.*', + ignoreErrorCodesMatching: 'ThrottlingException|ServiceUnavailableException|InternalServerException', }, // onDelete intentionally omitted — model invocation logging is account-level; // deleting one stack should not disable logging that another stack relies on. diff --git a/cdk/test/stacks/agent.test.ts b/cdk/test/stacks/agent.test.ts index 486b4ad9..00849061 100644 --- a/cdk/test/stacks/agent.test.ts +++ b/cdk/test/stacks/agent.test.ts @@ -370,6 +370,36 @@ describe('AgentStack', () => { expect(loggingConfigs.length).toBe(1); }); + test('model invocation logging does NOT send an empty largeDataDeliveryS3Config', () => { + // Regression guard (#215): sending largeDataDeliveryS3Config with an empty + // bucketName fails client-side validation ("valid min length: 3"), and with + // a catch-all ignoreErrorCodesMatching that failure silently leaves logging + // DISABLED — so Bedrock records no requestMetadata. The field is optional; + // omit it entirely. Assert it never reappears with an empty bucket. + const customs = template.findResources('Custom::AWS'); + const logging = Object.values(customs).find(r => + JSON.stringify(r.Properties?.Create).includes('putModelInvocationLoggingConfiguration'), + ); + expect(logging).toBeDefined(); + for (const phase of ['Create', 'Update'] as const) { + const body = JSON.stringify(logging!.Properties?.[phase] ?? ''); + // Either absent, or — if ever re-added — must carry a real bucket name. + expect(body).not.toContain('largeDataDeliveryS3Config'); + } + }); + + test('model invocation logging ignores only transient errors, not client-side validation', () => { + // A catch-all '.*' would also swallow the empty-bucket ValidationException + // above, hiding a deploy-time misconfiguration as silently-absent logging. + const customs = template.findResources('Custom::AWS'); + const logging = Object.values(customs).find(r => + JSON.stringify(r.Properties?.Create).includes('putModelInvocationLoggingConfiguration'), + ); + const create = JSON.stringify(logging!.Properties?.Create ?? ''); + expect(create).not.toContain('".*"'); + expect(create).toContain('ThrottlingException'); + }); + test('enables session storage with persistent filesystem', () => { template.hasResourceProperties('AWS::BedrockAgentCore::Runtime', { FilesystemConfigurations: [ diff --git a/docs/guides/COST_ATTRIBUTION.md b/docs/guides/COST_ATTRIBUTION.md index 667c77c3..a99b0b80 100644 --- a/docs/guides/COST_ATTRIBUTION.md +++ b/docs/guides/COST_ATTRIBUTION.md @@ -39,6 +39,19 @@ These steps are a one-time operator responsibility (CDK does not automate org-le ## Querying per-call detail (invocation logs) +> **Model-invocation logging must be ON in the agent's Region, or there is no `requestMetadata` to query.** Bedrock records request metadata **only** when account-level model-invocation logging is enabled in the Region where the call is made. The stack provisions this automatically (a custom resource pointing at the `/aws/bedrock/model-invocation-logs/` log group), but it is **account- and Region-scoped**, so confirm it after deploy — especially if logging was previously disabled, or the stack Region differs from where you expect calls. +> +> Verify it is on: +> ``` +> aws bedrock get-model-invocation-logging-configuration --region +> ``` +> An empty result means logging is **off** and no metadata is being captured. Re-enable it (pointing at the stack's own log group + `BedrockLoggingRole`): +> ``` +> aws bedrock put-model-invocation-logging-configuration --region \ +> --logging-config '{"cloudWatchConfig":{"logGroupName":"/aws/bedrock/model-invocation-logs/","roleArn":""},"textDataDeliveryEnabled":true,"imageDataDeliveryEnabled":false,"embeddingDataDeliveryEnabled":false}' +> ``` +> Do **not** include `largeDataDeliveryS3Config` with an empty bucket name — Bedrock rejects it (`min length: 3`) and the call fails. Only calls made *after* logging is enabled are recorded; re-run a task to populate logs. + Request metadata lands under the top-level `requestMetadata` field of each log record. Example CloudWatch Logs Insights query (tokens per user + model): ``` diff --git a/docs/src/content/docs/getting-started/Cost-attribution.md b/docs/src/content/docs/getting-started/Cost-attribution.md index 60820907..d148fbc3 100644 --- a/docs/src/content/docs/getting-started/Cost-attribution.md +++ b/docs/src/content/docs/getting-started/Cost-attribution.md @@ -43,6 +43,19 @@ These steps are a one-time operator responsibility (CDK does not automate org-le ## Querying per-call detail (invocation logs) +> **Model-invocation logging must be ON in the agent's Region, or there is no `requestMetadata` to query.** Bedrock records request metadata **only** when account-level model-invocation logging is enabled in the Region where the call is made. The stack provisions this automatically (a custom resource pointing at the `/aws/bedrock/model-invocation-logs/` log group), but it is **account- and Region-scoped**, so confirm it after deploy — especially if logging was previously disabled, or the stack Region differs from where you expect calls. +> +> Verify it is on: +> ``` +> aws bedrock get-model-invocation-logging-configuration --region +> ``` +> An empty result means logging is **off** and no metadata is being captured. Re-enable it (pointing at the stack's own log group + `BedrockLoggingRole`): +> ``` +> aws bedrock put-model-invocation-logging-configuration --region \ +> --logging-config '{"cloudWatchConfig":{"logGroupName":"/aws/bedrock/model-invocation-logs/","roleArn":""},"textDataDeliveryEnabled":true,"imageDataDeliveryEnabled":false,"embeddingDataDeliveryEnabled":false}' +> ``` +> Do **not** include `largeDataDeliveryS3Config` with an empty bucket name — Bedrock rejects it (`min length: 3`) and the call fails. Only calls made *after* logging is enabled are recorded; re-run a task to populate logs. + Request metadata lands under the top-level `requestMetadata` field of each log record. Example CloudWatch Logs Insights query (tokens per user + model): ``` From 87abbda6b94f8c4f0ab16471726bdf838f1ecf30 Mon Sep 17 00:00:00 2001 From: bgagent Date: Tue, 30 Jun 2026 17:12:12 -0500 Subject: [PATCH 8/8] fix(cdk): grant iam:PassRole for Bedrock invocation-logging custom resource (#215) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the empty-bucket validation error fixed, PutModelInvocationLoggingConfiguration now actually reaches Bedrock at deploy — and fails because the custom resource's Lambda role lacks iam:PassRole on BedrockLoggingRole (the role it hands to the Bedrock service to write the log group). This was masked by the earlier client-side ValidationException that ignoreErrorCodesMatching: '.*' swallowed. Add iam:PassRole scoped to the BedrockLoggingRole ARN (not a wildcard). Test asserts the grant is present. Refs #215 --- cdk/src/stacks/agent.ts | 10 ++++++++++ cdk/test/stacks/agent.test.ts | 17 +++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/cdk/src/stacks/agent.ts b/cdk/src/stacks/agent.ts index 9cc0b02c..08de9223 100644 --- a/cdk/src/stacks/agent.ts +++ b/cdk/src/stacks/agent.ts @@ -1053,6 +1053,16 @@ export class AgentStack extends Stack { ], resources: ['*'], }), + // PutModelInvocationLoggingConfiguration hands bedrockLoggingRole to the + // Bedrock service (so Bedrock can write to the log group), which requires + // the caller to hold iam:PassRole on that role. Scoped to the one role — + // not a wildcard. (Previously masked by the empty-bucket validation error + // that ignoreErrorCodesMatching: '.*' swallowed; now that the call + // actually reaches Bedrock, this is required.) + new iam.PolicyStatement({ + actions: ['iam:PassRole'], + resources: [bedrockLoggingRole.roleArn], + }), ]), }); diff --git a/cdk/test/stacks/agent.test.ts b/cdk/test/stacks/agent.test.ts index 00849061..9671be12 100644 --- a/cdk/test/stacks/agent.test.ts +++ b/cdk/test/stacks/agent.test.ts @@ -400,6 +400,23 @@ describe('AgentStack', () => { expect(create).toContain('ThrottlingException'); }); + test('model invocation logging custom resource can iam:PassRole the logging role', () => { + // PutModelInvocationLoggingConfiguration passes BedrockLoggingRole to the + // Bedrock service, so the custom resource's role needs iam:PassRole on it. + // Without this the API call fails at deploy (was previously masked by the + // empty-bucket validation error). Assert the policy grants PassRole. + template.hasResourceProperties('AWS::IAM::Policy', { + PolicyDocument: { + Statement: Match.arrayWith([ + Match.objectLike({ + Action: 'iam:PassRole', + Effect: 'Allow', + }), + ]), + }, + }); + }); + test('enables session storage with persistent filesystem', () => { template.hasResourceProperties('AWS::BedrockAgentCore::Runtime', { FilesystemConfigurations: [