diff --git a/agent/Dockerfile b/agent/Dockerfile index 271c1ddc..82c5f5e1 100644 --- a/agent/Dockerfile +++ b/agent/Dockerfile @@ -46,8 +46,14 @@ RUN curl -fsSL https://deb.nodesource.com/setup_24.x | bash - && \ # Install Claude Code CLI (the Python SDK requires this binary) # Then update known vulnerable transitive packages where fixed versions exist. +# Pinned 2.1.191 to match the CLI bundled by claude-agent-sdk 0.2.110 (see +# agent/pyproject.toml) — the SDK and the on-PATH CLI must agree on the control +# protocol. This version also has the awsCredentialExport behavior #215 needs: +# returned creds are cached until 5 min before the JSON's `Expiration`, so an +# 8 h task re-assumes the 1 h-capped SessionRole before expiry. Older builds +# only refreshed hourly on a timer, racing the role-chaining cap. RUN npm install -g npm@latest && \ - npm install -g @anthropic-ai/claude-code@2.1.142 && \ + npm install -g @anthropic-ai/claude-code@2.1.191 && \ CLAUDE_NPM_ROOT="$(npm root -g)/@anthropic-ai/claude-code" && \ npm --prefix "${CLAUDE_NPM_ROOT}" update tar minimatch glob cross-spawn picomatch @@ -81,6 +87,13 @@ COPY contracts/ /app/contracts/ # ``WorkflowValidationError: workflow '...' not found at /app/workflows/...``. COPY agent/workflows/ /app/workflows/ COPY agent/prepare-commit-msg.sh /app/ +# Claude Code managed settings (#215). The highest-precedence settings layer — +# loaded regardless of setting_sources and unoverridable by the untrusted cloned +# repo's project .claude/settings.json. Carries awsCredentialExport so Bedrock +# calls use session-tagged, refreshable credentials for cost attribution. +# Placing awsCredentialExport (an arbitrary command) anywhere the target repo +# can influence would be RCE with the compute role, so it lives ONLY here. +COPY agent/managed-settings.json /etc/claude-code/managed-settings.json # Create non-root user (Claude Code CLI refuses bypassPermissions as root) RUN useradd -m -s /bin/bash agent && \ diff --git a/agent/managed-settings.json b/agent/managed-settings.json new file mode 100644 index 00000000..86dcaa69 --- /dev/null +++ b/agent/managed-settings.json @@ -0,0 +1,3 @@ +{ + "awsCredentialExport": "/app/.venv/bin/python /app/src/bedrock_creds_helper.py" +} diff --git a/agent/pyproject.toml b/agent/pyproject.toml index fc3de8d0..d21f340c 100644 --- a/agent/pyproject.toml +++ b/agent/pyproject.toml @@ -16,7 +16,7 @@ dependencies = [ # would degrade gracefully — but for now we keep the dep to # preserve the clean code path. "bedrock-agentcore==1.9.1", #https://pypi.org/project/bedrock-agentcore/ - "claude-agent-sdk==0.2.82", #https://github.com/anthropics/claude-agent-sdk-python/releases/tag/v0.2.82 + "claude-agent-sdk==0.2.110", #https://github.com/anthropics/claude-agent-sdk-python/releases/tag/v0.2.110 (bundles claude CLI 2.1.191; kept in lockstep with the npm CLI pin in the Dockerfile, #215) "requests==2.34.2", #https://pypi.org/project/requests/ "fastapi==0.136.1", #https://pypi.org/project/fastapi/ "uvicorn==0.47.0", #https://pypi.org/project/uvicorn/ diff --git a/agent/src/aws_session.py b/agent/src/aws_session.py index 2c6a906c..51c02249 100644 --- a/agent/src/aws_session.py +++ b/agent/src/aws_session.py @@ -78,6 +78,19 @@ class SessionScopingError(RuntimeError): """ +def build_session_tags(user_id: str, repo: str, task_id: str) -> list[dict[str, str]]: + """Build the AssumeRole ``Tags`` list from tenant identifiers. + + Only non-empty values are included. Values are truncated to the IAM limit + so an over-long repo slug can never make ``AssumeRole`` fail. Shared by the + in-process tenant-data session (:func:`_session_tags`) and the out-of-process + Bedrock credential helper (``bedrock_creds_helper.py``) so both mint the + same ``{user_id, repo, task_id}`` tags from one definition. + """ + pairs = (("user_id", user_id), ("repo", repo), ("task_id", task_id)) + return [{"Key": key, "Value": value[:_MAX_TAG_VALUE_LEN]} for key, value in pairs if value] + + def configure_session(user_id: str, repo: str, task_id: str) -> None: """Record session-tag values in private module state for later use. @@ -115,6 +128,11 @@ def _session_tags() -> list[dict[str, str]]: return [{"Key": key, "Value": value[:_MAX_TAG_VALUE_LEN]} for key, value in _tags.items()] +# Public alias of the IAM tag-value length cap, for the Bedrock credential +# helper which builds tags from CLI args rather than module state. +MAX_TAG_VALUE_LEN = _MAX_TAG_VALUE_LEN + + def _build_scoped_session(role_arn: str) -> Any: """Build a boto3 Session backed by refreshable assumed-role credentials. diff --git a/agent/src/bedrock_creds_helper.py b/agent/src/bedrock_creds_helper.py new file mode 100644 index 00000000..6f3ede39 --- /dev/null +++ b/agent/src/bedrock_creds_helper.py @@ -0,0 +1,199 @@ +#!/usr/bin/env python3 +"""Credential helper for Claude Code's Bedrock calls (#215, cost attribution). + +Claude Code (``CLAUDE_CODE_USE_BEDROCK=1``) makes every ``InvokeModel`` call — +not the agent's boto3 — so the per-task tenant-data SessionRole in +``aws_session.py`` cannot tag those calls. Instead Claude Code's +``awsCredentialExport`` setting (in the image's managed-settings layer) runs +this script, captures its JSON stdout, and signs Bedrock requests with the +returned credentials. With a real ``Expiration`` it re-runs ~5 min before +expiry, so an 8 h task survives the 1 h role-chaining cap. + +Goal: assume the per-task SessionRole with ``{user_id, repo, task_id}`` STS +session tags so Bedrock spend is attributable per user/repo in AWS Cost +Explorer / CUR 2.0 (``iamPrincipal/*`` dimensions, after the operator activates +the cost-allocation tags). The same role already carries the tenant-data grants; +Track-1 only adds ``bedrock:InvokeModel*`` to it (see ``agent-session-role.ts``). + +**Fails OPEN.** Bedrock attribution is a billing/observability control, not a +tenant-isolation one (contrast ``aws_session.py``, which fails closed). If the +attribution config is absent or the assume-role fails, this helper emits the +**ambient** compute-role credentials so Bedrock keeps working untagged — losing +chargeback granularity is not a security incident, and the compute role retains +``InvokeModel`` precisely so this fallback works. + +The role ARN and tag values are read from a 0600 JSON file the agent writes at +startup (``write_attribution_file``), not from the environment — so the tenant +identifiers are not inherited by the untrusted repo subprocesses the agent +spawns, matching the discipline in ``aws_session.py``. + +Output shape (consumed by Claude Code's awsCredentialExport): + + {"Credentials": {"AccessKeyId": "...", "SecretAccessKey": "...", + "SessionToken": "...", "Expiration": ""}} +""" + +from __future__ import annotations + +import json +import os +import sys +from typing import Any + +# Fixed path the agent writes (0600) and this helper reads. A fixed path is +# required because the managed-settings ``awsCredentialExport`` command is +# static (baked into the image) and cannot carry per-task arguments. +ATTRIBUTION_FILE_ENV = "BEDROCK_ATTRIBUTION_FILE" +DEFAULT_ATTRIBUTION_FILE = "/home/agent/.bedrock-attribution.json" + +# Role chaining caps the assumed session at 1 hour; request the max the cap +# allows. Claude Code refreshes ~5 min before the returned Expiration. +_CHAINED_SESSION_DURATION_S = 3600 + + +def attribution_file_path() -> str: + return os.environ.get(ATTRIBUTION_FILE_ENV, "").strip() or DEFAULT_ATTRIBUTION_FILE + + +def write_attribution_file( + role_arn: str, tags: list[dict[str, str]], path: str | None = None +) -> str: + """Persist the SessionRole ARN + STS tags for the helper to read. + + Written 0600 and owned by the agent user. Returns the path written. Called + by the agent at startup (see ``runner._setup_agent_env``) only when a + SessionRole is configured; absence is the fail-open signal. + """ + target = path or attribution_file_path() + payload = json.dumps({"role_arn": role_arn, "tags": tags}) + # Create with 0600 from the start (os.open + O_CREAT honors mode, modulo + # umask) so the secret-adjacent file is never briefly world-readable. + fd = os.open(target, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600) + with os.fdopen(fd, "w") as fh: + fh.write(payload) + return target + + +def _warn(message: str) -> None: + """Emit a diagnostic to stderr. + + This process's **stdout is the credential channel** — Claude Code parses it + as the ``awsCredentialExport`` JSON result — so diagnostics MUST go to + stderr or they would corrupt the credential envelope. (This is also why + ``shell.log``, which writes to fd 1, is unusable here.) Every fail-open path + logs through here so a silent, weeks-long loss of cost attribution is + instead a visible, correlatable signal — the fallback stays open, but it is + never invisible. + """ + print(f"[bedrock-creds] {message}", file=sys.stderr) + + +def _emit(creds: dict[str, str]) -> None: + json.dump({"Credentials": creds}, sys.stdout) + + +def _frozen_to_creds(frozen: Any, expiry_iso: str | None) -> dict[str, str]: + out = { + "AccessKeyId": frozen.access_key, + "SecretAccessKey": frozen.secret_key, + "SessionToken": frozen.token or "", + } + if expiry_iso: + out["Expiration"] = expiry_iso + return out + + +def _ambient_credentials() -> dict[str, str]: + """Frozen ambient (compute-role) credentials — the fail-open fallback.""" + import botocore.session + + creds = botocore.session.get_session().get_credentials() + if creds is None: + # No resolvable credentials at all — the deepest degradation. Emit an + # empty object; Claude Code then falls back to its own default-chain + # resolution. Surface it: if that fallback also fails, this stderr line + # is the only breadcrumb. + _warn( + "no resolvable AWS credentials; emitting empty envelope, " + "Claude Code will use its default chain" + ) + return {} + return _frozen_to_creds(creds.get_frozen_credentials(), None) + + +def resolve_credentials() -> dict[str, str]: + """Return tagged assumed-role creds, or ambient creds on any failure.""" + path = attribution_file_path() + try: + with open(path) as fh: + cfg = json.load(fh) + role_arn = cfg["role_arn"] + tags = cfg.get("tags", []) + except FileNotFoundError: + # Attribution not configured (local/dev, or pre-provisioning). Expected + # and benign — debug-level signal only. + _warn("attribution file absent; not configured — using ambient creds") + return _ambient_credentials() + except (OSError, ValueError, KeyError) as exc: + # File present but unreadable/malformed/schema-drifted. This is NOT the + # benign "not configured" case — it points at a write_attribution_file + # bug or a partial write, so it warrants a louder signal. + _warn( + f"attribution file present but unreadable ({type(exc).__name__}: {exc}); " + "using ambient creds" + ) + return _ambient_credentials() + + try: + import boto3 + from botocore.exceptions import BotoCoreError, ClientError + except ImportError as exc: + # boto3 missing/broken in the image is a packaging defect, not the + # expected assume-role failure — name it explicitly so it can't hide. + _warn(f"boto3 unavailable ({exc}); using ambient creds — fix the image") + return _ambient_credentials() + + region = os.environ.get("AWS_REGION") or os.environ.get("AWS_DEFAULT_REGION") + task_id = next((t["Value"] for t in tags if t.get("Key") == "task_id"), "") + session_name = f"abca-bedrock-{task_id}"[:64] or "abca-bedrock" + try: + resp = boto3.client("sts", region_name=region).assume_role( + RoleArn=role_arn, + RoleSessionName=session_name, + DurationSeconds=_CHAINED_SESSION_DURATION_S, + Tags=tags, + ) + c = resp["Credentials"] + return { + "AccessKeyId": c["AccessKeyId"], + "SecretAccessKey": c["SecretAccessKey"], + "SessionToken": c["SessionToken"], + "Expiration": c["Expiration"].isoformat(), + } + except (ClientError, BotoCoreError) as exc: + # Expected assume failure: role not yet provisioned, AccessDenied, + # transient STS error. Fail open so Bedrock keeps working on the + # compute role; spend for this task is untagged. + _warn( + f"assume_role failed ({type(exc).__name__}: {exc}); using ambient creds " + "— Bedrock spend will be UNTAGGED" + ) + return _ambient_credentials() + except Exception as exc: + # Anything else (unexpected STS response shape, a logic bug here) is NOT + # the expected fallback. Still fail open — this is a billing control, not + # isolation — but flag it distinctly so it isn't mistaken for AccessDenied. + _warn( + f"UNEXPECTED error minting tagged creds ({type(exc).__name__}: {exc}); " + "using ambient creds" + ) + return _ambient_credentials() + + +def main() -> int: + _emit(resolve_credentials()) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/agent/src/runner.py b/agent/src/runner.py index 8e6cf3db..13e8bf27 100644 --- a/agent/src/runner.py +++ b/agent/src/runner.py @@ -59,6 +59,58 @@ def _parse_token_usage(raw_usage: Any) -> TokenUsage: return TokenUsage(**values) +def _setup_bedrock_cost_attribution(config: TaskConfig) -> None: + """Wire Bedrock cost attribution for the Claude Code subprocess (#215). + + Claude Code makes the ``InvokeModel`` calls, so attribution is configured + through *its* credential + header channels, not the agent's boto3: + + 1. **Per-user/repo chargeback (CUR 2.0 / Cost Explorer).** Write the + SessionRole ARN + ``{user_id, repo, task_id}`` STS tags to a 0600 file + that ``bedrock_creds_helper.py`` reads. Claude Code's managed-settings + ``awsCredentialExport`` runs that helper and signs Bedrock requests with + the tagged assumed-role credentials. Skipped when ``AGENT_SESSION_ROLE_ARN`` + is unset (local/dev) — the helper then fails open to ambient creds. + + 2. **Per-call forensics (model-invocation logs).** Set + ``X-Amzn-Bedrock-Request-Metadata`` via ``ANTHROPIC_CUSTOM_HEADERS`` on the + process env. One container = one task = one Claude Code session, so a + static-per-process header is effectively per-task. Set via the process + env (not project settings) so the untrusted cloned repo cannot alter it. + """ + import json + + from aws_session import MAX_TAG_VALUE_LEN, build_session_tags + + role_arn = os.environ.get("AGENT_SESSION_ROLE_ARN", "").strip() + tags = build_session_tags(config.user_id, config.repo_url, config.task_id) + if role_arn and tags: + try: + from bedrock_creds_helper import write_attribution_file + + write_attribution_file(role_arn, tags) + except OSError as exc: + # Fail open: attribution is observability, not isolation. Bedrock + # still works on the compute role; we just lose tagged chargeback. + log("WARN", f"Bedrock attribution file not written ({exc}); spend will be untagged") + + # Per-request metadata mirrors the STS tag values. Bedrock limits keys/values + # to 256 chars and records them under ``requestMetadata`` in invocation logs. + # + # Unlike the tenant-data tags (kept out of os.environ so untrusted repo + # subprocesses don't inherit them), this header MUST go on os.environ — + # Claude Code reads ANTHROPIC_CUSTOM_HEADERS from the process env. The + # exposure is acceptable: the values are the task's OWN {user_id, repo, + # task_id} (self-referential, non-secret), so a spawned subprocess learns + # only who it is already running for. json.dumps escapes newlines/quotes, so + # a crafted repo slug cannot inject an extra (newline-separated) header. + metadata = {t["Key"]: t["Value"][:MAX_TAG_VALUE_LEN] for t in tags} + if metadata: + os.environ["ANTHROPIC_CUSTOM_HEADERS"] = ( + f"X-Amzn-Bedrock-Request-Metadata: {json.dumps(metadata, separators=(',', ':'))}" + ) + + def _setup_agent_env(config: TaskConfig) -> tuple[str | None, str | None]: """Configure process environment for the Claude Code CLI subprocess. @@ -72,6 +124,8 @@ def _setup_agent_env(config: TaskConfig) -> tuple[str | None, str | None]: os.environ["ANTHROPIC_MODEL"] = config.anthropic_model os.environ["GITHUB_TOKEN"] = config.github_token os.environ["GH_TOKEN"] = config.github_token + + _setup_bedrock_cost_attribution(config) # DO NOT set ANTHROPIC_LOG — any logging level causes the CLI to write to # stderr, which fills the OS pipe buffer (64 KB) and deadlocks the # single-threaded Node.js CLI process (blocked stderr write prevents stdout diff --git a/agent/tests/test_bedrock_creds_helper.py b/agent/tests/test_bedrock_creds_helper.py new file mode 100644 index 00000000..426aad8d --- /dev/null +++ b/agent/tests/test_bedrock_creds_helper.py @@ -0,0 +1,159 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: MIT-0 + +"""Unit tests for the Bedrock credential helper (#215, cost attribution). + +The helper feeds Claude Code's ``awsCredentialExport``: it assumes the per-task +SessionRole with ``{user_id, repo, task_id}`` STS tags so Bedrock spend is +attributable, and **fails open** to ambient credentials when attribution is not +configured or the assume fails — losing chargeback granularity is not a security +incident, unlike the fail-closed tenant-data path in ``aws_session``. +""" + +from __future__ import annotations + +import datetime +import json +import os +import stat +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +import pytest + +import bedrock_creds_helper as helper +from aws_session import build_session_tags + + +@pytest.fixture +def attr_file(tmp_path, monkeypatch): + path = str(tmp_path / "attr.json") + monkeypatch.setenv(helper.ATTRIBUTION_FILE_ENV, path) + return path + + +def test_write_attribution_file_is_0600(attr_file): + tags = build_session_tags("u1", "owner/repo", "task123") + written = helper.write_attribution_file("arn:aws:iam::1:role/SR", tags, attr_file) + assert written == attr_file + mode = stat.S_IMODE(os.stat(attr_file).st_mode) + assert mode == 0o600, f"attribution file must be 0600, got {oct(mode)}" + with open(attr_file) as fh: + saved = json.load(fh) + assert saved["role_arn"] == "arn:aws:iam::1:role/SR" + assert {"Key": "task_id", "Value": "task123"} in saved["tags"] + + +def test_resolve_assumes_role_with_session_tags(attr_file): + tags = build_session_tags("u1", "owner/repo", "task123") + helper.write_attribution_file("arn:aws:iam::1:role/SR", tags, attr_file) + + expiry = datetime.datetime(2026, 1, 1, tzinfo=datetime.UTC) + sts = MagicMock() + sts.assume_role.return_value = { + "Credentials": { + "AccessKeyId": "AK", + "SecretAccessKey": "SK", + "SessionToken": "TK", + "Expiration": expiry, + } + } + with patch("boto3.client", return_value=sts): + creds = helper.resolve_credentials() + + # The assume carried exactly the tenant tags, and a tagged session name. + _, kwargs = sts.assume_role.call_args + assert kwargs["Tags"] == tags + assert kwargs["RoleArn"] == "arn:aws:iam::1:role/SR" + assert kwargs["RoleSessionName"].startswith("abca-bedrock-task123") + assert creds == { + "AccessKeyId": "AK", + "SecretAccessKey": "SK", + "SessionToken": "TK", + "Expiration": expiry.isoformat(), + } + + +def test_resolve_fails_open_when_no_attribution_file(attr_file): + # File never written → fall back to ambient creds, never raise. + frozen = SimpleNamespace(access_key="AMB", secret_key="S", token="T") + ambient = MagicMock() + ambient.get_credentials.return_value.get_frozen_credentials.return_value = frozen + with patch("botocore.session.get_session", return_value=ambient): + creds = helper.resolve_credentials() + assert creds["AccessKeyId"] == "AMB" + assert "Expiration" not in creds # ambient creds are returned unbounded + + +def _ambient(access_key="AMB"): + frozen = SimpleNamespace(access_key=access_key, secret_key="S", token="T") + ambient = MagicMock() + ambient.get_credentials.return_value.get_frozen_credentials.return_value = frozen + return ambient + + +def test_resolve_fails_open_on_expected_assume_error_and_warns(attr_file, capsys): + from botocore.exceptions import ClientError + + helper.write_attribution_file( + "arn:aws:iam::1:role/SR", build_session_tags("u", "r", "t"), attr_file + ) + sts = MagicMock() + sts.assume_role.side_effect = ClientError( + {"Error": {"Code": "AccessDenied", "Message": "denied"}}, "AssumeRole" + ) + with ( + patch("boto3.client", return_value=sts), + patch("botocore.session.get_session", return_value=_ambient()), + ): + creds = helper.resolve_credentials() + assert creds["AccessKeyId"] == "AMB" + # Fail-open must be observable, and flagged as the expected (not unexpected) case. + err = capsys.readouterr().err + assert "assume_role failed" in err and "UNTAGGED" in err + assert "UNEXPECTED" not in err + + +def test_resolve_flags_unexpected_error_distinctly(attr_file, capsys): + # A non-boto error (e.g. a logic bug, bad STS response shape) must still + # fail open but be labeled UNEXPECTED so it isn't mistaken for AccessDenied. + helper.write_attribution_file( + "arn:aws:iam::1:role/SR", build_session_tags("u", "r", "t"), attr_file + ) + sts = MagicMock() + sts.assume_role.side_effect = RuntimeError("boom") + with ( + patch("boto3.client", return_value=sts), + patch("botocore.session.get_session", return_value=_ambient()), + ): + creds = helper.resolve_credentials() + assert creds["AccessKeyId"] == "AMB" + assert "UNEXPECTED" in capsys.readouterr().err + + +def test_resolve_distinguishes_corrupt_config_from_absent(attr_file, capsys): + # File present but malformed → louder signal than a plain absent file. + with open(attr_file, "w") as fh: + fh.write("{not json") + with patch("botocore.session.get_session", return_value=_ambient()): + creds = helper.resolve_credentials() + assert creds["AccessKeyId"] == "AMB" + assert "present but unreadable" in capsys.readouterr().err + + +def test_resolve_emits_empty_when_no_credentials_at_all(attr_file, capsys): + ambient = MagicMock() + ambient.get_credentials.return_value = None + with patch("botocore.session.get_session", return_value=ambient): + creds = helper.resolve_credentials() + # Empty object → Claude Code falls back to its own default-chain resolution. + assert creds == {} + assert "no resolvable AWS credentials" in capsys.readouterr().err + + +def test_main_emits_credentials_envelope(attr_file, capsys): + with patch.object(helper, "resolve_credentials", return_value={"AccessKeyId": "X"}): + rc = helper.main() + assert rc == 0 + out = json.loads(capsys.readouterr().out) + assert out == {"Credentials": {"AccessKeyId": "X"}} diff --git a/agent/tests/test_runner.py b/agent/tests/test_runner.py index 8c770d7a..50f0a70a 100644 --- a/agent/tests/test_runner.py +++ b/agent/tests/test_runner.py @@ -13,7 +13,12 @@ from unittest.mock import MagicMock, patch from models import TaskConfig -from runner import _FULL_TOOL_SURFACE, _initialize_policy_engine_and_hooks, _resolve_allowed_tools +from runner import ( + _FULL_TOOL_SURFACE, + _initialize_policy_engine_and_hooks, + _resolve_allowed_tools, + _setup_bedrock_cost_attribution, +) def _config(**overrides: Any) -> TaskConfig: @@ -295,3 +300,42 @@ def test_read_leaning_default_lane_keeps_its_restricted_list(self): assert _resolve_allowed_tools(config) == restricted assert "Bash" not in _resolve_allowed_tools(config) assert "Write" not in _resolve_allowed_tools(config) + + +class TestBedrockCostAttribution: + """#215: wire Claude Code's Bedrock attribution channels (creds + header).""" + + def test_writes_attribution_file_and_sets_metadata_header_when_role_set(self, monkeypatch): + monkeypatch.setenv("AGENT_SESSION_ROLE_ARN", "arn:aws:iam::1:role/SR") + monkeypatch.delenv("ANTHROPIC_CUSTOM_HEADERS", raising=False) + config = _config(user_id="alice", repo_url="owner/repo", task_id="t-9") + + with patch("bedrock_creds_helper.write_attribution_file") as mock_write: + _setup_bedrock_cost_attribution(config) + + role_arn, tags = mock_write.call_args.args + assert role_arn == "arn:aws:iam::1:role/SR" + assert {"Key": "user_id", "Value": "alice"} in tags + + header = __import__("os").environ["ANTHROPIC_CUSTOM_HEADERS"] + name, _, value = header.partition(": ") + assert name == "X-Amzn-Bedrock-Request-Metadata" + import json as _json + + assert _json.loads(value) == { + "user_id": "alice", + "repo": "owner/repo", + "task_id": "t-9", + } + + def test_no_attribution_file_when_role_unset_but_header_still_set(self, monkeypatch): + # Local/dev: no SessionRole → no tagged creds (helper fails open), but the + # invocation-log metadata header is still useful and harmless. + monkeypatch.delenv("AGENT_SESSION_ROLE_ARN", raising=False) + config = _config(user_id="bob", repo_url="o/r", task_id="t-1") + with patch("bedrock_creds_helper.write_attribution_file") as mock_write: + _setup_bedrock_cost_attribution(config) + mock_write.assert_not_called() + assert "X-Amzn-Bedrock-Request-Metadata" in __import__("os").environ.get( + "ANTHROPIC_CUSTOM_HEADERS", "" + ) diff --git a/agent/uv.lock b/agent/uv.lock index e65577c6..04010e22 100644 --- a/agent/uv.lock +++ b/agent/uv.lock @@ -166,7 +166,7 @@ requires-dist = [ { name = "bedrock-agentcore", specifier = "==1.9.1" }, { name = "boto3", specifier = "==1.43.9" }, { name = "cedarpy", specifier = "==4.8.4" }, - { name = "claude-agent-sdk", specifier = "==0.2.82" }, + { name = "claude-agent-sdk", specifier = "==0.2.110" }, { name = "fastapi", specifier = "==0.136.1" }, { name = "jsonschema", specifier = "==4.26.0" }, { name = "mcp", specifier = "==1.27.1" }, @@ -373,20 +373,20 @@ wheels = [ [[package]] name = "claude-agent-sdk" -version = "0.2.82" +version = "0.2.110" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, { name = "mcp" }, { name = "sniffio" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/57/3d/f75aaecf476c2b2a903dbba6042171b6683eb91c1f97f3ad894784cec270/claude_agent_sdk-0.2.82.tar.gz", hash = "sha256:3e907b7d2bf52a5917d96a3ce336b8aa5546ea31e29ce826a7f346622cf7f4bf", size = 252053, upload-time = "2026-05-15T03:45:34.251Z" } +sdist = { url = "https://files.pythonhosted.org/packages/bb/98/8fdab35ed9e1a36bc7afab4d390cc5002094a4950996c079da9aa4541cc4/claude_agent_sdk-0.2.110.tar.gz", hash = "sha256:538b548bac07a22f65686abab063a902ac76ba35989d0f073c942f96248e9fa3", size = 255632, upload-time = "2026-06-24T22:11:52.342Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/19/bc/27cf3aec2a24f2ed1f60277de795496b808a761d2a7a3fd34602a2fec37d/claude_agent_sdk-0.2.82-py3-none-macosx_11_0_arm64.whl", hash = "sha256:24ad8ccbcee9afe206ae5d621a9e40a5022ca3eb8c2c672b36916d3e70746e42", size = 61473506, upload-time = "2026-05-15T03:45:38.745Z" }, - { url = "https://files.pythonhosted.org/packages/96/91/95a83f018dbc8c113233eb542bccf17c1a3f5f689448700daf950602bf5e/claude_agent_sdk-0.2.82-py3-none-macosx_11_0_x86_64.whl", hash = "sha256:13e54d5163d9d4f899c4e2a3f14df597f4e050d5afa104618ccf7bb37b372ad1", size = 63541975, upload-time = "2026-05-15T03:45:46.005Z" }, - { url = "https://files.pythonhosted.org/packages/be/07/9356fe0e30f988bade6b116ecc602b4a9ae4df34fa055305187a835e36e0/claude_agent_sdk-0.2.82-py3-none-manylinux_2_17_aarch64.whl", hash = "sha256:3b0a0e3f0927737f1fc91ee4549185172243a4e8f135d4c1e4f1f1eba91373e1", size = 71212904, upload-time = "2026-05-15T03:45:51.121Z" }, - { url = "https://files.pythonhosted.org/packages/b1/d9/e2920b4b6c75cf79ec87ebfb4cc4447c78a4f26317cb3fed77e79fcc804e/claude_agent_sdk-0.2.82-py3-none-manylinux_2_17_x86_64.whl", hash = "sha256:b05873f9df01c5894930b87f6ca9315f0d97f1563bc2e4dc0fafe0d4a1e31997", size = 71381948, upload-time = "2026-05-15T03:45:56.153Z" }, - { url = "https://files.pythonhosted.org/packages/89/80/c3ec5a89c735a96d35fe12b6262517169b396ff366149a3b9f4387f797c1/claude_agent_sdk-0.2.82-py3-none-win_amd64.whl", hash = "sha256:71e85e4f50d04cd95e687898092f03648e74e1cd2537583de93370d2da1c0586", size = 71990462, upload-time = "2026-05-15T03:46:01.646Z" }, + { url = "https://files.pythonhosted.org/packages/aa/93/29d4fdaa13e69034faf8d3503df915b07c820e2c08e3d6a7515149cde5bb/claude_agent_sdk-0.2.110-py3-none-macosx_11_0_arm64.whl", hash = "sha256:fed0e0f4804d9f9cff80ab7d1b44142ebd1046cdd29ca74caef4c92c35fff8d8", size = 64924533, upload-time = "2026-06-24T22:11:55.612Z" }, + { url = "https://files.pythonhosted.org/packages/aa/03/b40bb673cd93cdc3928262c1be75fde34a7bed4bf2c2c20e04218e2005ea/claude_agent_sdk-0.2.110-py3-none-macosx_11_0_x86_64.whl", hash = "sha256:62b23869d46cef6f6ff1d00ceaa5e846e2f1d297478421c835efb8fe99369d4f", size = 69704449, upload-time = "2026-06-24T22:11:59.149Z" }, + { url = "https://files.pythonhosted.org/packages/f9/18/ab67cb5ce641333385bed55ed8e9665c00f7d30d1f6ab12f8463ddb7695f/claude_agent_sdk-0.2.110-py3-none-manylinux_2_17_aarch64.whl", hash = "sha256:324e49553c6303d6b267217dc2912652b97af2bc96503efd12095ae915b46b83", size = 74879555, upload-time = "2026-06-24T22:12:03.25Z" }, + { url = "https://files.pythonhosted.org/packages/91/88/3627d7d14310cfec66977551263e219365244a906fc7ca1209fb0c3a6cec/claude_agent_sdk-0.2.110-py3-none-manylinux_2_17_x86_64.whl", hash = "sha256:56371dd7a2c66c0bd497dc0b3cab4193a228b196f600676393d69c0ecee37cfb", size = 75924237, upload-time = "2026-06-24T22:12:07.183Z" }, + { url = "https://files.pythonhosted.org/packages/49/79/c9066c5c387d42c19a4b675ec1ff5219f8920cfda8ff8b527119fd69b774/claude_agent_sdk-0.2.110-py3-none-win_amd64.whl", hash = "sha256:4235d4de6d685a189c12612095ab192b759280ede1f3aed0c3e784d52c3555f9", size = 75448209, upload-time = "2026-06-24T22:12:11.283Z" }, ] [[package]] diff --git a/cdk/src/constructs/agent-session-role.ts b/cdk/src/constructs/agent-session-role.ts index 95df0840..1602b734 100644 --- a/cdk/src/constructs/agent-session-role.ts +++ b/cdk/src/constructs/agent-session-role.ts @@ -17,6 +17,7 @@ * SOFTWARE. */ +import * as bedrock from '@aws-cdk/aws-bedrock-alpha'; import { Duration } from 'aws-cdk-lib'; import * as dynamodb from 'aws-cdk-lib/aws-dynamodb'; import * as iam from 'aws-cdk-lib/aws-iam'; @@ -64,6 +65,23 @@ export interface AgentSessionRoleProps { * prefix. */ readonly attachmentsBucket: s3.IBucket; + + /** + * Bedrock models / cross-region inference profiles the agent may invoke + * (#215, cost attribution). When provided, each is `grantInvoke`-ed to the + * SessionRole — the **same** grant the compute role receives, so the + * permission set (including the all-regions foundation-model ARNs a + * cross-region profile fans out to) stays in lockstep and a cross-region + * route can never AccessDenied. Model inference run by the Claude Code + * subprocess is then attributed per `{user_id, repo}` in CUR 2.0 / Cost + * Explorer via the session tags this role already carries. + * + * The compute role keeps its own Bedrock grant: attribution is a billing + * control that fails open (the credential helper falls back to compute-role + * creds if the assume-role fails), so model invocation never depends on this. + * Omit (e.g. isolated construct tests) to skip the Bedrock grant. + */ + readonly invokableModels?: bedrock.IBedrockInvokable[]; } /** @@ -85,9 +103,12 @@ export interface AgentSessionRoleProps { * code. Backend-agnostic: the same role serves agents booted under either the * AgentCore Runtime execution role or the ECS Fargate task role. * - * Bedrock model invocation and CloudWatch Logs intentionally remain on the - * compute role (shared, non-tenant access; and keeping `InvokeModel` off the - * 1-hour-capped chained session avoids breaking long tasks). + * CloudWatch Logs remains on the compute role (shared, non-tenant access). The + * compute role *also* keeps `InvokeModel`; this role adds a parallel, session- + * tagged Bedrock grant (#215) used by the Claude Code subprocess for cost + * attribution. Long-task safety on the 1-hour-capped chained session is handled + * by Claude Code's `awsCredentialExport` refresh, and the helper falls back to + * the compute role if assume fails — so model invocation never breaks. */ export class AgentSessionRole extends Construct { /** Actions sufficient for the agent's DynamoDB access. Excludes Scan. */ @@ -191,6 +212,20 @@ export class AgentSessionRole extends Construct { }), ); + // --- Bedrock model invocation: tagged for cost attribution (#215) --- + // Reuse grantInvoke so this role's Bedrock permissions exactly mirror the + // compute role's (cross-region profiles fan out to the foundation model in + // every routed region — replicating that by hand would risk an AccessDenied + // on a cross-region route). Claude Code assumes this role (via its + // awsCredentialExport helper) so InvokeModel rides the session's + // {user_id, repo, task_id} tags, surfacing per-user/repo Bedrock spend in + // CUR 2.0 / Cost Explorer. No PrincipalTag condition: the tags are for + // billing attribution, not access scoping, so a condition would add no + // isolation and only risk breakage. + for (const invokable of props.invokableModels ?? []) { + invokable.grantInvoke(this.role); + } + // The object-level prefix conditions above already constrain access to the // session's own tenant prefix; the remaining wildcard is the per-object // suffix (task_id/attachment_id/filename), which is the intended scope. @@ -205,7 +240,10 @@ export class AgentSessionRole extends Construct { + 'attachments/${aws:PrincipalTag/user_id}/*, ' + 'artifacts/${aws:PrincipalTag/task_id}/*) and the DynamoDB item ' + 'set gated by a dynamodb:LeadingKeys = ${aws:PrincipalTag/task_id} ' - + 'condition — narrower than the compute role this replaces.', + + 'condition — narrower than the compute role this replaces. Bedrock ' + + 'InvokeModel resources are the explicit model + inference-profile ' + + 'ARNs from grantInvoke (cross-region profiles fan out to per-region ' + + 'foundation-model ARNs), matching the compute role grant (#215).', }, ], true, diff --git a/cdk/src/stacks/agent.ts b/cdk/src/stacks/agent.ts index 604f794d..08de9223 100644 --- a/cdk/src/stacks/agent.ts +++ b/cdk/src/stacks/agent.ts @@ -421,12 +421,15 @@ export class AgentStack extends Stack { applicationLogGroup.grantWrite(runtime); agentMemory.grantReadWrite(runtime); - // Grant the runtime invoke on each configured foundation model + its - // US cross-Region inference profile. The model set is a single source of - // truth (constructs/bedrock-models.ts), shared with the ECS task role, and - // overridable via the `bedrockModels` CDK context — add a model by config, - // no construct edits. Scoping stays per-model (no Resource:'*'); account- - // level Bedrock access remains the outer gate. + // Grant the runtime invoke on each configured foundation model + its US + // cross-Region inference profile. The model set is a single source of truth + // (constructs/bedrock-models.ts, #434), shared with the ECS task role and + // overridable via the `bedrockModels` CDK context. Each invokable is also + // collected so the same set is granted to the SessionRole below (#215 cost + // attribution) — the two grants derive from one list and can't drift. + // Scoping stays per-model (no Resource:'*'); account-level Bedrock access + // remains the outer gate. + const invokableBedrockModels: bedrock.IBedrockInvokable[] = []; for (const modelId of resolveBedrockModelIds(this.node)) { const foundationModel = new bedrock.BedrockFoundationModel(modelId, { supportsAgents: true, @@ -438,6 +441,7 @@ export class AgentStack extends Stack { }); foundationModel.grantInvoke(runtime); crossRegionProfile.grantInvoke(runtime); + invokableBedrockModels.push(foundationModel, crossRegionProfile); } // --- Per-task SessionRole (#209) --- @@ -458,6 +462,10 @@ export class AgentStack extends Stack { ], traceArtifactsBucket: traceArtifactsBucket.bucket, attachmentsBucket: attachmentsBucket.bucket, + // #215: session-tagged Bedrock grant for cost attribution — the same + // invokables grantInvoke-ed to the runtime above, so the grants stay in + // lockstep. + invokableModels: invokableBedrockModels, }); sessionRoleArnHolder = agentSessionRole.role.roleArn; @@ -995,8 +1003,14 @@ export class AgentStack extends Stack { cloudWatchConfig: { logGroupName: invocationLogGroup.logGroupName, roleArn: bedrockLoggingRole.roleArn, - // Required by API schema but unused — text logs go to CloudWatch only. - largeDataDeliveryS3Config: { bucketName: '', keyPrefix: '' }, + // largeDataDeliveryS3Config is OPTIONAL and intentionally omitted: + // it only governs S3 delivery of oversized payloads, which this + // stack does not use (text logs go to CloudWatch). Sending it with + // an empty bucketName fails client-side validation + // ("valid min length: 3") — and because the errors below are + // swallowed and onUpdate never re-fires (static props), that + // failure silently leaves model-invocation logging DISABLED, which + // in turn means Bedrock records no requestMetadata (#215 Track 2). }, textDataDeliveryEnabled: true, imageDataDeliveryEnabled: false, @@ -1004,7 +1018,11 @@ export class AgentStack extends Stack { }, }, physicalResourceId: cr.PhysicalResourceId.of('bedrock-invocation-logging'), - ignoreErrorCodesMatching: '.*', + // Scope the ignore to genuine service-side errors (e.g. a concurrent + // account-level change). Do NOT use '.*' — that also hides client-side + // ValidationExceptions like the empty-bucket bug above, turning a + // deploy-time misconfiguration into silently-absent logging. + ignoreErrorCodesMatching: 'ThrottlingException|ServiceUnavailableException|InternalServerException', }, // onUpdate re-applies the same config to handle drift (e.g., if another // stack or manual action changed the account-level logging config). @@ -1016,7 +1034,6 @@ export class AgentStack extends Stack { cloudWatchConfig: { logGroupName: invocationLogGroup.logGroupName, roleArn: bedrockLoggingRole.roleArn, - largeDataDeliveryS3Config: { bucketName: '', keyPrefix: '' }, }, textDataDeliveryEnabled: true, imageDataDeliveryEnabled: false, @@ -1024,7 +1041,7 @@ export class AgentStack extends Stack { }, }, physicalResourceId: cr.PhysicalResourceId.of('bedrock-invocation-logging'), - ignoreErrorCodesMatching: '.*', + ignoreErrorCodesMatching: 'ThrottlingException|ServiceUnavailableException|InternalServerException', }, // onDelete intentionally omitted — model invocation logging is account-level; // deleting one stack should not disable logging that another stack relies on. @@ -1036,6 +1053,16 @@ export class AgentStack extends Stack { ], resources: ['*'], }), + // PutModelInvocationLoggingConfiguration hands bedrockLoggingRole to the + // Bedrock service (so Bedrock can write to the log group), which requires + // the caller to hold iam:PassRole on that role. Scoped to the one role — + // not a wildcard. (Previously masked by the empty-bucket validation error + // that ignoreErrorCodesMatching: '.*' swallowed; now that the call + // actually reaches Bedrock, this is required.) + new iam.PolicyStatement({ + actions: ['iam:PassRole'], + resources: [bedrockLoggingRole.roleArn], + }), ]), }); diff --git a/cdk/test/constructs/agent-session-role.test.ts b/cdk/test/constructs/agent-session-role.test.ts index 1f990a63..676b34f6 100644 --- a/cdk/test/constructs/agent-session-role.test.ts +++ b/cdk/test/constructs/agent-session-role.test.ts @@ -17,6 +17,7 @@ * SOFTWARE. */ +import * as bedrock from '@aws-cdk/aws-bedrock-alpha'; import { App, Stack } from 'aws-cdk-lib'; import { Template, Match } from 'aws-cdk-lib/assertions'; import * as dynamodb from 'aws-cdk-lib/aws-dynamodb'; @@ -193,6 +194,52 @@ describe('AgentSessionRole construct', () => { expect(JSON.stringify(trustDoc)).toContain('ComputeRole'); }); + test('grants bedrock:InvokeModel on the supplied invokables when invokableModels is set (#215)', () => { + const app = new App(); + const stack = new Stack(app, 'BedrockStack'); + const computeRole = new iam.Role(stack, 'ComputeRole', { + assumedBy: new iam.ServicePrincipal('bedrock-agentcore.amazonaws.com'), + }); + const table = new dynamodb.Table(stack, 'T', { + partitionKey: { name: 'task_id', type: dynamodb.AttributeType.STRING }, + }); + const model = new bedrock.BedrockFoundationModel('anthropic.claude-sonnet-4-6', { + supportsCrossRegion: true, + }); + new AgentSessionRole(stack, 'SR', { + assumingRoles: [computeRole], + taskScopedTables: [table], + traceArtifactsBucket: new s3.Bucket(stack, 'TB'), + attachmentsBucket: new s3.Bucket(stack, 'AB'), + invokableModels: [model], + }); + + const stackTemplate = Template.fromStack(stack); + const sessionPolicy = Object.entries( + stackTemplate.findResources('AWS::IAM::Policy'), + ).find(([id]) => id.includes('SR'))![1]; + const statements = sessionPolicy.Properties.PolicyDocument.Statement; + // grantInvoke emits the wildcard-suffixed action bedrock:InvokeModel*. + const bedrockStatement = statements.find((s: { Action: string | string[] }) => { + const actions = Array.isArray(s.Action) ? s.Action : [s.Action]; + return actions.some((a: string) => a.startsWith('bedrock:InvokeModel')); + }); + expect(bedrockStatement).toBeDefined(); + // The model ARN must be present, scoped (no Resource:'*'). + expect(JSON.stringify(bedrockStatement.Resource)).toContain('anthropic.claude-sonnet-4-6'); + expect(bedrockStatement.Resource).not.toBe('*'); + }); + + test('omitting invokableModels grants no bedrock action (isolated tests)', () => { + const { template: t } = createStack(); + const policies = t.findResources('AWS::IAM::Policy'); + const sessionPolicy = Object.entries(policies).find(([id]) => + id.includes('AgentSessionRole'), + )![1]; + const serialized = JSON.stringify(sessionPolicy.Properties.PolicyDocument.Statement); + expect(serialized).not.toContain('bedrock:InvokeModel'); + }); + test('admitComputeRole wires both trust and grant for an additional compute role', () => { const app = new App(); const stack = new Stack(app, 'MultiPrincipalStack'); diff --git a/cdk/test/stacks/agent.test.ts b/cdk/test/stacks/agent.test.ts index 486b4ad9..9671be12 100644 --- a/cdk/test/stacks/agent.test.ts +++ b/cdk/test/stacks/agent.test.ts @@ -370,6 +370,53 @@ describe('AgentStack', () => { expect(loggingConfigs.length).toBe(1); }); + test('model invocation logging does NOT send an empty largeDataDeliveryS3Config', () => { + // Regression guard (#215): sending largeDataDeliveryS3Config with an empty + // bucketName fails client-side validation ("valid min length: 3"), and with + // a catch-all ignoreErrorCodesMatching that failure silently leaves logging + // DISABLED — so Bedrock records no requestMetadata. The field is optional; + // omit it entirely. Assert it never reappears with an empty bucket. + const customs = template.findResources('Custom::AWS'); + const logging = Object.values(customs).find(r => + JSON.stringify(r.Properties?.Create).includes('putModelInvocationLoggingConfiguration'), + ); + expect(logging).toBeDefined(); + for (const phase of ['Create', 'Update'] as const) { + const body = JSON.stringify(logging!.Properties?.[phase] ?? ''); + // Either absent, or — if ever re-added — must carry a real bucket name. + expect(body).not.toContain('largeDataDeliveryS3Config'); + } + }); + + test('model invocation logging ignores only transient errors, not client-side validation', () => { + // A catch-all '.*' would also swallow the empty-bucket ValidationException + // above, hiding a deploy-time misconfiguration as silently-absent logging. + const customs = template.findResources('Custom::AWS'); + const logging = Object.values(customs).find(r => + JSON.stringify(r.Properties?.Create).includes('putModelInvocationLoggingConfiguration'), + ); + const create = JSON.stringify(logging!.Properties?.Create ?? ''); + expect(create).not.toContain('".*"'); + expect(create).toContain('ThrottlingException'); + }); + + test('model invocation logging custom resource can iam:PassRole the logging role', () => { + // PutModelInvocationLoggingConfiguration passes BedrockLoggingRole to the + // Bedrock service, so the custom resource's role needs iam:PassRole on it. + // Without this the API call fails at deploy (was previously masked by the + // empty-bucket validation error). Assert the policy grants PassRole. + template.hasResourceProperties('AWS::IAM::Policy', { + PolicyDocument: { + Statement: Match.arrayWith([ + Match.objectLike({ + Action: 'iam:PassRole', + Effect: 'Allow', + }), + ]), + }, + }); + }); + test('enables session storage with persistent filesystem', () => { template.hasResourceProperties('AWS::BedrockAgentCore::Runtime', { FilesystemConfigurations: [ diff --git a/docs/design/BEDROCK_COST_ATTRIBUTION.md b/docs/design/BEDROCK_COST_ATTRIBUTION.md new file mode 100644 index 00000000..5553d46f --- /dev/null +++ b/docs/design/BEDROCK_COST_ATTRIBUTION.md @@ -0,0 +1,120 @@ +# Bedrock cost attribution + +Design for [#215](https://github.com/aws-samples/sample-autonomous-cloud-coding-agents/issues/215). Adds AWS-native, per-user/per-repo attribution of **Bedrock model-inference spend** on top of the in-app `cost_usd` meter and the #211 per-session tenant-data isolation. + +## TL;DR + +Bedrock is invoked by the **Claude Code CLI subprocess** (`CLAUDE_CODE_USE_BEDROCK=1`), not by the agent's boto3. So neither track can be built by extending `agent/src/aws_session.py` (which scopes DynamoDB/S3 tenant data only). Both levers live in **Claude Code's own configuration**, set by the agent before it spawns the subprocess: + +| Track | Mechanism | Surfaces in | AC | +|---|---|---|---| +| 1. IAM session-tag attribution | Claude Code `awsCredentialExport` → `bedrock_creds_helper.py` does `sts:AssumeRole --tags {user_id,repo,task_id}` against the existing **`AgentSessionRole`** (now also granted `bedrock:InvokeModel*`) | CUR 2.0 / Cost Explorer (`iamPrincipal/` prefix), aggregated per usage-type/day | #1, #2 | +| 2. Per-request metadata | `ANTHROPIC_CUSTOM_HEADERS: X-Amzn-Bedrock-Request-Metadata: {...}` on the subprocess env | Model-invocation logs (`requestMetadata` field), per call | #3 | +| 3. Operator docs | `COST_ATTRIBUTION.md` + cross-links | — | #5 | + +The two tracks are **complementary** (per AWS docs): session tags give aggregated chargeback in billing; request metadata gives per-call forensics in logs. Session tags are *not* written to invocation logs, and request metadata is *not* a cost-allocation tag — you need both. + +> **`cost_usd` is a client-side estimate, not billing.** The in-app `cost_usd` is the SDK's `total_cost_usd` (`runner.py`), computed from a build-time price table; it drifts from the real bill on pricing changes, unrecognized models, cache rates, and AWS discounts. It is for per-task guardrails only — the authoritative source is AWS Cost Explorer / CUR 2.0 (Track 1). This is the same caveat the [Claude Agent SDK cost-tracking docs](https://code.claude.com/docs/en/agent-sdk/cost-tracking) raise, adapted for Bedrock (authoritative source is the AWS bill, not the Claude Console). Both this design and the operator guide surface it. + +## Why the issue's original approach doesn't apply + +The issue proposed extending `aws_session.py` / the `DeferredRefreshableCredentials` pattern to route `InvokeModel` through tagged creds. That pattern governs the agent's **boto3** clients for tenant data. But: + +``` +agent/src/runner.py::_setup_agent_env + → os.environ["CLAUDE_CODE_USE_BEDROCK"] = "1" + → ClaudeSDKClient spawns the `claude` CLI subprocess + → subprocess calls bedrock-runtime InvokeModel using the AWS SDK default + credential chain (today: the ambient compute role) +``` + +The agent never makes the `InvokeModel` call, so it cannot attach creds or headers to it directly. The control point is **how Claude Code resolves credentials and headers**, configured before `client.connect()`. + +Verified Claude Code behavior (code.claude.com/docs/en/amazon-bedrock, /env-vars, /settings): + +- **Credentials:** default AWS SDK chain. Mutating the parent process's `AWS_*` env vars mid-session is **not** re-read. For refresh, Claude Code supports `awsCredentialExport` — a settings-only key (no env/flag equivalent) naming a helper command run at session start and re-run ~5 min before the `Expiration` the helper returns (≥ CLI 2.1.176). This beats the **1 h role-chaining cap** on an 8 h task. +- **Request metadata:** Claude Code uses the **Invoke API and does not support Converse**, so the Converse `requestMetadata` field is unreachable. The only lever is `ANTHROPIC_CUSTOM_HEADERS` (static per process), which **is read from the process environment** and process-env wins over any settings `env` block. Because ABCA runs **one task per container per Claude Code session**, "static per process" == "per task" — sufficient. No proxy/gateway needed. +- **Settings precedence (security-critical):** under `setting_sources=["project"]` Claude Code loads **only the cloned repo's `.claude/settings.json`** (user settings are dropped) — but the **managed-settings layer is loaded in all cases and outranks everything**, so the untrusted repo cannot override it. + +## Track 1 — IAM session-tag attribution + +### Reuse `AgentSessionRole` (no new role) + +`AgentSessionRole` is *already* assumed by the compute roles with `{user_id, repo, task_id}` STS session tags, and `AGENT_SESSION_ROLE_ARN` is already injected into the container. A second "BedrockInvokeRole" would duplicate that entire trust/grant surface for an identical principal. Instead we add a single grant to it: + +- New optional prop `invokableModels: IBedrockInvokable[]`. For each, the construct calls `invokable.grantInvoke(this.role)` — **the same grant the compute role receives**. Reusing `grantInvoke` (rather than hand-building ARNs) is load-bearing: a cross-region inference profile fans out to the foundation-model ARN in *every routed region*; replicating that by hand would risk an `AccessDenied` on a cross-region route. No `aws:PrincipalTag` condition — the tags are for billing attribution, not access scoping. +- `agent.ts` passes the six existing invokables (Sonnet 4.6 / Opus 4 / Haiku 4.5 models + their cross-region profiles). The ECS path reuses the same `AgentSessionRole` instance, so it is covered automatically. + +### The compute role KEEPS its Bedrock grant + +The #211 comment "Bedrock intentionally stays on the compute role to avoid 1 h expiry" is *resolved* by `awsCredentialExport`'s pre-expiry refresh — but we still leave `InvokeModel` on the compute role, because Track 1 **fails open** (below) and the compute-role grant is exactly the fallback path. The SessionRole grant is parallel, not a replacement. + +### Credential helper + Claude Code wiring + +`agent/src/bedrock_creds_helper.py` (invoked by `awsCredentialExport`): + +1. Reads a 0600 JSON file (`/home/agent/.bedrock-attribution.json`) the agent writes at startup, carrying the SessionRole ARN + STS tags. Read from a file, not the environment, so tenant identifiers don't leak into the untrusted repo subprocesses the agent spawns (matching `aws_session.py` discipline). +2. `sts:AssumeRole` with those tags and emits `{"Credentials":{...,"Expiration":}}`. The real `Expiration` drives Claude Code's pre-cap refresh. +3. Tag building reuses `aws_session.build_session_tags` (one definition of the `{user_id,repo,task_id}` tags + 256-char clamp). + +`runner._setup_bedrock_cost_attribution` writes the attribution file when `AGENT_SESSION_ROLE_ARN` is set, and always sets the metadata header (Track 2). + +### Where `awsCredentialExport` lives (RCE boundary) + +`awsCredentialExport` runs an arbitrary command. It is baked into the **managed-settings layer** at `/etc/claude-code/managed-settings.json` (root-owned, copied in the Dockerfile before `USER agent`). This is the only repo-proof location: it loads regardless of `setting_sources=["project"]` and outranks the cloned repo's project `.claude/settings.json`, so a malicious repo cannot define or override it. Putting it anywhere the target repo can influence would be RCE with the compute role. + +### Fail-open (not fail-closed) + +Unlike #211 tenant isolation (fail **closed** — a scoping failure means cross-tenant exposure), Bedrock attribution is a **billing/observability** control. If the attribution file is absent or the assume fails, the helper emits the **ambient compute-role credentials** so Bedrock keeps working untagged — losing chargeback granularity is not a security incident. When `AGENT_SESSION_ROLE_ARN` is unset (local/dev), the helper fails open and behavior matches today. + +## Track 2 — per-request metadata + +In `_setup_bedrock_cost_attribution`, set on the process env: + +```python +os.environ["ANTHROPIC_CUSTOM_HEADERS"] = ( + "X-Amzn-Bedrock-Request-Metadata: " + + json.dumps({"user_id": ..., "repo": ..., "task_id": ...}) # 256-char clamp, ≤16 keys +) +``` + +Set via the process env (not project settings) so the untrusted repo can't alter it. Surfaces under `requestMetadata` in `/aws/bedrock/model-invocation-logs/` (logging already enabled in `agent.ts`). + +> **Note — a deliberate exception to the "tenant ids out of `os.environ`" rule.** The tenant-data path keeps `{user_id, repo, task_id}` out of `os.environ` so spawned (untrusted) repo subprocesses don't inherit them. This header *must* live on `os.environ` because Claude Code reads `ANTHROPIC_CUSTOM_HEADERS` from the process env. The exposure is acceptable: the values are the task's *own* identifiers (self-referential, non-secret) — a subprocess learns only who it is already running for. `json.dumps` escaping prevents a crafted slug from injecting an extra (newline-separated) header. + +> **Open risk to validate against a live endpoint:** Bedrock rejects `X-Amzn-Bedrock-Request-Metadata` with `InvalidSignatureException` if the header is omitted from the SigV4 `SignedHeaders`. Whether Claude Code signs custom headers is unverified. AC#3 explicitly permits "or documented blocker if Claude Code cannot pass metadata." If it fails, per-call attribution falls back to invocation-log `identity.arn` + `RoleSessionName` (`abca-bedrock-`) that Track 1's tagged session already provides. + +## Version alignment + +The agent runs Claude Code two ways that must agree on the control protocol: the `claude-agent-sdk` Python wheel **bundles** a CLI, and the Dockerfile also installs the CLI via npm. Both are pinned in lockstep — `claude-agent-sdk==0.2.110` (bundles CLI 2.1.191) and npm `@anthropic-ai/claude-code@2.1.191`. 2.1.191 also satisfies the ≥2.1.176 `awsCredentialExport`-with-`Expiration` requirement. + +## Track 3 — operator documentation + +New `docs/guides/COST_ATTRIBUTION.md`: + +- The three meters (in-app `cost_usd`, CUR session-tag chargeback, invocation-log per-call) and when to use each. +- FinOps checklist: activate `iamPrincipal/{user_id,repo}` cost-allocation tags in Billing; create a CUR 2.0 export **with caller-identity ARN** (existing exports don't backfill); set budgets. +- Note: tags aren't retroactive and take ≤24 h to appear. + +Cross-link from `COST_MODEL.md#cost-attribution` and `DEPLOYMENT_GUIDE.md`. (Roadmap links from the issue are stale — removed in #505.) + +## Out of scope (unchanged from issue) + +Bedrock Projects/Workspaces (`bedrock-mantle`, not the Claude Code path); replacing in-app `cost_usd`; org-level CUR/Budgets setup (operator responsibility). Application inference profiles per repo → follow-up #489. + +## Acceptance-criteria mapping + +| AC | Met by | +|---|---| +| #1 Bedrock uses session-tagged creds (AgentCore + ECS); dev unchanged when unset | Track 1: `AgentSessionRole` Bedrock grant + `awsCredentialExport`; helper fails open to compute role when `AGENT_SESSION_ROLE_ARN` unset | +| #2 Session tags documented as billable; operator Billing steps | Track 3 | +| #3 Per-request metadata `{task_id,user_id,repo}` when logging enabled (or documented blocker) | Track 2 + SigV4 validation gate | +| #4 Tests: CDK Bedrock grant on role; cred routing; no #211 regression | `agent-session-role.test.ts` (Bedrock grant present/absent); `test_bedrock_creds_helper.py` (assume + fail-open); `test_runner.py` (file + header wiring); #211 tests untouched | +| #5 `COST_ATTRIBUTION.md` + accurate shipped/planned | Track 3 | +| #6 Starlight mirrors synced | `mise //docs:sync` | + +## Test plan + +- **CDK:** assert `AgentSessionRole` grants `bedrock:InvokeModel*` on the model/profile ARNs (no `Resource:'*'`) when `invokableModels` is set, and grants none when omitted. (#211 trust/grant/tenant-scope tests unchanged.) +- **Agent:** `bedrock_creds_helper` — assume-role carries the tenant tags + tagged session name; **fails open** to ambient creds when the attribution file is missing, when assume raises, and emits `{}` when no creds resolve at all; 0600 file mode. `runner._setup_bedrock_cost_attribution` — writes the file when the role ARN is set, skips it when unset, always sets the metadata header. +- **Live validation (pre-merge, manual):** confirm `X-Amzn-Bedrock-Request-Metadata` is honored (no `InvalidSignatureException`) and lands in invocation logs; confirm `iamPrincipal/user_id` appears in Cost Explorer after tag activation. diff --git a/docs/design/COST_MODEL.md b/docs/design/COST_MODEL.md index cea17d4e..997f7237 100644 --- a/docs/design/COST_MODEL.md +++ b/docs/design/COST_MODEL.md @@ -88,11 +88,13 @@ These estimates assume Claude Sonnet with prompt caching enabled and average tas For multi-user deployments, cost should be attributable to individual users and repositories: -- **Per-task:** Token usage and compute duration are captured in task metadata (`agent.cost_usd`, `agent.turns` - see [OBSERVABILITY.md](./OBSERVABILITY.md)). +- **Per-task:** Token usage and compute duration are captured in task metadata (`agent.cost_usd`, `agent.turns` - see [OBSERVABILITY.md](./OBSERVABILITY.md)). Note: `agent.cost_usd` is the Claude Agent SDK's **client-side estimate** (a build-time price table), not authoritative billing — use it for guardrails, and AWS Cost Explorer / CUR 2.0 for the real bill (see [COST_ATTRIBUTION.md](../guides/COST_ATTRIBUTION.md)). - **Per-user:** Aggregate task costs by `user_id`. - **Per-repo:** Aggregate task costs by `repo`. - **Dashboard:** Cost attribution dashboards should be built from the same task-level metrics. +For **AWS-native** chargeback of Bedrock spend (Cost Explorer / CUR 2.0 by `user_id` / `repo`, plus per-call invocation-log forensics) — beyond the in-app `cost_usd` meter above — see the operator guide [COST_ATTRIBUTION.md](../guides/COST_ATTRIBUTION.md) and the platform design [BEDROCK_COST_ATTRIBUTION.md](./BEDROCK_COST_ATTRIBUTION.md). + ## Cost guardrails (current) | Guardrail | Mechanism | Default | diff --git a/docs/guides/COST_ATTRIBUTION.md b/docs/guides/COST_ATTRIBUTION.md new file mode 100644 index 00000000..a99b0b80 --- /dev/null +++ b/docs/guides/COST_ATTRIBUTION.md @@ -0,0 +1,72 @@ +# Cost attribution (operator guide) + +How to attribute **Amazon Bedrock model-inference spend** to individual users and repositories in a multi-user ABCA deployment. This is the operator-facing companion to the platform design in [BEDROCK_COST_ATTRIBUTION.md](../design/BEDROCK_COST_ATTRIBUTION.md) and the cost model in [COST_MODEL.md](../design/COST_MODEL.md#cost-attribution). + +> [!WARNING] +> **The in-app `cost_usd` is a client-side estimate, not authoritative billing data.** It is the Claude Agent SDK's `total_cost_usd` (`agent/src/runner.py`), computed locally from a price table bundled into the SDK at build time. It can drift from your actual AWS bill when Bedrock pricing changes, the SDK version does not recognize a model, prompt-cache read/write rates apply, or AWS discounts/commitments/free-tier apply that the client cannot model. Use it for per-task budget guardrails and approximate insight — **do not bill end users or trigger financial decisions from it.** For authoritative cost, use **AWS Cost Explorer / CUR 2.0** (the session-tag chargeback meter below), which reflects your actual invoice. (ABCA runs on Bedrock, so the authoritative source is your AWS bill — not the Claude Console.) + +## Three meters, three questions + +ABCA gives you three independent views of cost. They answer different questions; use them together. + +| Meter | Granularity | Source of truth for | Where | +|---|---|---|---| +| **In-app `cost_usd`** | Per task | Per-task budget guardrails (`max_budget_usd`) | Task metadata / control panel | +| **CUR session-tag chargeback** | Per user / per repo, aggregated per usage-type per day | AWS-native FinOps chargeback | Cost Explorer / CUR 2.0 | +| **Invocation-log metadata** | Per Bedrock call | Per-call forensics, reconciliation | `/aws/bedrock/model-invocation-logs/` | + +Why all three: the in-app meter is an estimate the platform computes; it does not reflect AWS discounts/commitments. IAM session tags flow to your **bill** but only as aggregated billing data (they are *not* written to invocation logs). Request metadata gives **per-call** detail in logs but is *not* a cost-allocation tag and never appears in Cost Explorer. Per [AWS docs](https://docs.aws.amazon.com/bedrock/latest/userguide/cost-mgmt-iam-principal-tracking.html), session tags and request metadata are complementary mechanisms. + +## What the platform does automatically + +Once deployed, each agent task makes its Bedrock calls under **session-tagged, refreshable credentials** carrying `{user_id, repo, task_id}`, and stamps the same values as **request metadata** on every call. You do **not** need to change any code. What remains is **operator setup in the AWS Billing console** — AWS does not surface tag-based cost data until you activate it, and (see the ordering note below) you can only activate *after* the platform has run tagged tasks. + +## FinOps checklist + +These steps are a one-time operator responsibility (CDK does not automate org-level billing — see [Out of scope](../design/BEDROCK_COST_ATTRIBUTION.md#out-of-scope-unchanged-from-issue)). + +> **Ordering matters — the tags can't be pre-activated.** IAM-principal cost-allocation tag *keys* (`user_id`, `repo`) do not exist in the Billing console until the deployed platform has actually made tagged Bedrock calls. So the sequence is: **deploy → run at least one task → wait up to 24 h → then activate** (step 1). You cannot activate them before the first tagged call exists. +> +> **Use the Billing console, not Tag Editor / Resource Groups.** Cost-allocation tags live at **Billing and Cost Management → Cost allocation tags** (left nav). The *Tag Editor* (Resource Groups) is a different tool — it lists taggable *resource types* (`AWS::IAM::InstanceProfile`, etc.) and is **not** where you activate these. + +1. **Activate IAM-principal cost-allocation tags.** Billing and Cost Management console → **Cost allocation tags** (left nav) → the **User-defined cost allocation tags** tab → the `user_id` and `repo` keys appear with tag type **IAM principal** → select them → **Activate**. (`task_id` is high-cardinality — keep it for logs, not Cost Explorer.) + - Keys appear only **after** the first Bedrock call carrying them, and can take **up to 24 h** to show. + - Activation is **not retroactive** — only spend incurred after activation is tagged. + - IAM-principal cost-allocation tags are a recent Bedrock capability. If the keys never appear a day after running tagged tasks, your account/region may not have it enabled yet — the invocation-log path (below) attributes per call regardless. +2. **Create a CUR 2.0 export with caller identity.** Billing console → **Data Exports** → create a CUR 2.0 export and select the option to include the **caller-identity ARN**. + - If you already have a CUR 2.0 export, you must create a **new** one — existing exports do not backfill identity data. +3. **Set budgets / alerts** per `user_id` or `repo` tag as needed (AWS Budgets), independent of the in-app `max_budget_usd` per-task guardrail. + +## Querying per-call detail (invocation logs) + +> **Model-invocation logging must be ON in the agent's Region, or there is no `requestMetadata` to query.** Bedrock records request metadata **only** when account-level model-invocation logging is enabled in the Region where the call is made. The stack provisions this automatically (a custom resource pointing at the `/aws/bedrock/model-invocation-logs/` log group), but it is **account- and Region-scoped**, so confirm it after deploy — especially if logging was previously disabled, or the stack Region differs from where you expect calls. +> +> Verify it is on: +> ``` +> aws bedrock get-model-invocation-logging-configuration --region +> ``` +> An empty result means logging is **off** and no metadata is being captured. Re-enable it (pointing at the stack's own log group + `BedrockLoggingRole`): +> ``` +> aws bedrock put-model-invocation-logging-configuration --region \ +> --logging-config '{"cloudWatchConfig":{"logGroupName":"/aws/bedrock/model-invocation-logs/","roleArn":""},"textDataDeliveryEnabled":true,"imageDataDeliveryEnabled":false,"embeddingDataDeliveryEnabled":false}' +> ``` +> Do **not** include `largeDataDeliveryS3Config` with an empty bucket name — Bedrock rejects it (`min length: 3`) and the call fails. Only calls made *after* logging is enabled are recorded; re-run a task to populate logs. + +Request metadata lands under the top-level `requestMetadata` field of each log record. Example CloudWatch Logs Insights query (tokens per user + model): + +``` +fields requestMetadata.user_id as user, modelId, + input.inputTokenCount as inTokens, + output.outputTokenCount as outTokens +| stats sum(inTokens) as totalInput, sum(outTokens) as totalOutput, count() as calls + by user, modelId +| sort totalInput desc +``` + +To turn tokens into cost, multiply by the current [Bedrock per-token rates](https://aws.amazon.com/bedrock/pricing/), or join logs to CUR on `requestId` for invoice-accurate reconciliation at the model + usage-type grain. + +## Caveats + +- **Request-metadata header is best-effort.** It depends on Claude Code signing the `X-Amzn-Bedrock-Request-Metadata` header into the SigV4 request; if a Claude Code release does not, the header is rejected and per-call metadata is absent. Per-user/repo chargeback (the session-tag track) is unaffected — it does not rely on the header. See the [validation note](../design/BEDROCK_COST_ATTRIBUTION.md#track-2--per-request-metadata). +- **Attribution fails open.** If the per-task credential helper cannot assume the SessionRole, Bedrock still works under the shared compute role — spend for that task is simply untagged, not blocked. +- **No PII in tags/metadata.** `user_id` and `repo` are recorded in your bill and logs; do not map them to anything sensitive. diff --git a/docs/guides/DEPLOYMENT_GUIDE.md b/docs/guides/DEPLOYMENT_GUIDE.md index 435e5583..a32b0922 100644 --- a/docs/guides/DEPLOYMENT_GUIDE.md +++ b/docs/guides/DEPLOYMENT_GUIDE.md @@ -229,4 +229,5 @@ For users without AWS CLI access. - [User guide](./USER_GUIDE.md) -- API reference, CLI usage, task management. - [DEPLOYMENT_ROLES.md](../design/DEPLOYMENT_ROLES.md) -- Least-privilege IAM policies for CloudFormation execution. - [COST_MODEL.md](../design/COST_MODEL.md) -- Per-task costs, cost guardrails, cost at scale. +- [COST_ATTRIBUTION.md](./COST_ATTRIBUTION.md) -- Operator FinOps setup for per-user/per-repo Bedrock chargeback (Cost Explorer / CUR 2.0, invocation-log forensics). - [COMPUTE.md](../design/COMPUTE.md) -- Compute backend architecture and trade-offs. diff --git a/docs/scripts/sync-starlight.mjs b/docs/scripts/sync-starlight.mjs index 4326e407..56b082c2 100644 --- a/docs/scripts/sync-starlight.mjs +++ b/docs/scripts/sync-starlight.mjs @@ -253,6 +253,12 @@ mirrorMarkdownFile( path.join('src', 'content', 'docs', 'getting-started', 'Deployment-guide.md'), ); +// --- Cost Attribution Guide: mirror to getting-started/ (operator FinOps setup) --- +mirrorMarkdownFile( + path.join(docsRoot, 'guides', 'COST_ATTRIBUTION.md'), + path.join('src', 'content', 'docs', 'getting-started', 'Cost-attribution.md'), +); + // --- Prompt Guide: mirror to customizing/ --- mirrorMarkdownFile( path.join(docsRoot, 'guides', 'PROMPT_GUIDE.md'), diff --git a/docs/src/content/docs/architecture/Bedrock-cost-attribution.md b/docs/src/content/docs/architecture/Bedrock-cost-attribution.md new file mode 100644 index 00000000..355ac4cd --- /dev/null +++ b/docs/src/content/docs/architecture/Bedrock-cost-attribution.md @@ -0,0 +1,124 @@ +--- +title: Bedrock cost attribution +--- + +# Bedrock cost attribution + +Design for [#215](https://github.com/aws-samples/sample-autonomous-cloud-coding-agents/issues/215). Adds AWS-native, per-user/per-repo attribution of **Bedrock model-inference spend** on top of the in-app `cost_usd` meter and the #211 per-session tenant-data isolation. + +## TL;DR + +Bedrock is invoked by the **Claude Code CLI subprocess** (`CLAUDE_CODE_USE_BEDROCK=1`), not by the agent's boto3. So neither track can be built by extending `agent/src/aws_session.py` (which scopes DynamoDB/S3 tenant data only). Both levers live in **Claude Code's own configuration**, set by the agent before it spawns the subprocess: + +| Track | Mechanism | Surfaces in | AC | +|---|---|---|---| +| 1. IAM session-tag attribution | Claude Code `awsCredentialExport` → `bedrock_creds_helper.py` does `sts:AssumeRole --tags {user_id,repo,task_id}` against the existing **`AgentSessionRole`** (now also granted `bedrock:InvokeModel*`) | CUR 2.0 / Cost Explorer (`iamPrincipal/` prefix), aggregated per usage-type/day | #1, #2 | +| 2. Per-request metadata | `ANTHROPIC_CUSTOM_HEADERS: X-Amzn-Bedrock-Request-Metadata: {...}` on the subprocess env | Model-invocation logs (`requestMetadata` field), per call | #3 | +| 3. Operator docs | `COST_ATTRIBUTION.md` + cross-links | — | #5 | + +The two tracks are **complementary** (per AWS docs): session tags give aggregated chargeback in billing; request metadata gives per-call forensics in logs. Session tags are *not* written to invocation logs, and request metadata is *not* a cost-allocation tag — you need both. + +> **`cost_usd` is a client-side estimate, not billing.** The in-app `cost_usd` is the SDK's `total_cost_usd` (`runner.py`), computed from a build-time price table; it drifts from the real bill on pricing changes, unrecognized models, cache rates, and AWS discounts. It is for per-task guardrails only — the authoritative source is AWS Cost Explorer / CUR 2.0 (Track 1). This is the same caveat the [Claude Agent SDK cost-tracking docs](https://code.claude.com/docs/en/agent-sdk/cost-tracking) raise, adapted for Bedrock (authoritative source is the AWS bill, not the Claude Console). Both this design and the operator guide surface it. + +## Why the issue's original approach doesn't apply + +The issue proposed extending `aws_session.py` / the `DeferredRefreshableCredentials` pattern to route `InvokeModel` through tagged creds. That pattern governs the agent's **boto3** clients for tenant data. But: + +``` +agent/src/runner.py::_setup_agent_env + → os.environ["CLAUDE_CODE_USE_BEDROCK"] = "1" + → ClaudeSDKClient spawns the `claude` CLI subprocess + → subprocess calls bedrock-runtime InvokeModel using the AWS SDK default + credential chain (today: the ambient compute role) +``` + +The agent never makes the `InvokeModel` call, so it cannot attach creds or headers to it directly. The control point is **how Claude Code resolves credentials and headers**, configured before `client.connect()`. + +Verified Claude Code behavior (code.claude.com/docs/en/amazon-bedrock, /env-vars, /settings): + +- **Credentials:** default AWS SDK chain. Mutating the parent process's `AWS_*` env vars mid-session is **not** re-read. For refresh, Claude Code supports `awsCredentialExport` — a settings-only key (no env/flag equivalent) naming a helper command run at session start and re-run ~5 min before the `Expiration` the helper returns (≥ CLI 2.1.176). This beats the **1 h role-chaining cap** on an 8 h task. +- **Request metadata:** Claude Code uses the **Invoke API and does not support Converse**, so the Converse `requestMetadata` field is unreachable. The only lever is `ANTHROPIC_CUSTOM_HEADERS` (static per process), which **is read from the process environment** and process-env wins over any settings `env` block. Because ABCA runs **one task per container per Claude Code session**, "static per process" == "per task" — sufficient. No proxy/gateway needed. +- **Settings precedence (security-critical):** under `setting_sources=["project"]` Claude Code loads **only the cloned repo's `.claude/settings.json`** (user settings are dropped) — but the **managed-settings layer is loaded in all cases and outranks everything**, so the untrusted repo cannot override it. + +## Track 1 — IAM session-tag attribution + +### Reuse `AgentSessionRole` (no new role) + +`AgentSessionRole` is *already* assumed by the compute roles with `{user_id, repo, task_id}` STS session tags, and `AGENT_SESSION_ROLE_ARN` is already injected into the container. A second "BedrockInvokeRole" would duplicate that entire trust/grant surface for an identical principal. Instead we add a single grant to it: + +- New optional prop `invokableModels: IBedrockInvokable[]`. For each, the construct calls `invokable.grantInvoke(this.role)` — **the same grant the compute role receives**. Reusing `grantInvoke` (rather than hand-building ARNs) is load-bearing: a cross-region inference profile fans out to the foundation-model ARN in *every routed region*; replicating that by hand would risk an `AccessDenied` on a cross-region route. No `aws:PrincipalTag` condition — the tags are for billing attribution, not access scoping. +- `agent.ts` passes the six existing invokables (Sonnet 4.6 / Opus 4 / Haiku 4.5 models + their cross-region profiles). The ECS path reuses the same `AgentSessionRole` instance, so it is covered automatically. + +### The compute role KEEPS its Bedrock grant + +The #211 comment "Bedrock intentionally stays on the compute role to avoid 1 h expiry" is *resolved* by `awsCredentialExport`'s pre-expiry refresh — but we still leave `InvokeModel` on the compute role, because Track 1 **fails open** (below) and the compute-role grant is exactly the fallback path. The SessionRole grant is parallel, not a replacement. + +### Credential helper + Claude Code wiring + +`agent/src/bedrock_creds_helper.py` (invoked by `awsCredentialExport`): + +1. Reads a 0600 JSON file (`/home/agent/.bedrock-attribution.json`) the agent writes at startup, carrying the SessionRole ARN + STS tags. Read from a file, not the environment, so tenant identifiers don't leak into the untrusted repo subprocesses the agent spawns (matching `aws_session.py` discipline). +2. `sts:AssumeRole` with those tags and emits `{"Credentials":{...,"Expiration":}}`. The real `Expiration` drives Claude Code's pre-cap refresh. +3. Tag building reuses `aws_session.build_session_tags` (one definition of the `{user_id,repo,task_id}` tags + 256-char clamp). + +`runner._setup_bedrock_cost_attribution` writes the attribution file when `AGENT_SESSION_ROLE_ARN` is set, and always sets the metadata header (Track 2). + +### Where `awsCredentialExport` lives (RCE boundary) + +`awsCredentialExport` runs an arbitrary command. It is baked into the **managed-settings layer** at `/etc/claude-code/managed-settings.json` (root-owned, copied in the Dockerfile before `USER agent`). This is the only repo-proof location: it loads regardless of `setting_sources=["project"]` and outranks the cloned repo's project `.claude/settings.json`, so a malicious repo cannot define or override it. Putting it anywhere the target repo can influence would be RCE with the compute role. + +### Fail-open (not fail-closed) + +Unlike #211 tenant isolation (fail **closed** — a scoping failure means cross-tenant exposure), Bedrock attribution is a **billing/observability** control. If the attribution file is absent or the assume fails, the helper emits the **ambient compute-role credentials** so Bedrock keeps working untagged — losing chargeback granularity is not a security incident. When `AGENT_SESSION_ROLE_ARN` is unset (local/dev), the helper fails open and behavior matches today. + +## Track 2 — per-request metadata + +In `_setup_bedrock_cost_attribution`, set on the process env: + +```python +os.environ["ANTHROPIC_CUSTOM_HEADERS"] = ( + "X-Amzn-Bedrock-Request-Metadata: " + + json.dumps({"user_id": ..., "repo": ..., "task_id": ...}) # 256-char clamp, ≤16 keys +) +``` + +Set via the process env (not project settings) so the untrusted repo can't alter it. Surfaces under `requestMetadata` in `/aws/bedrock/model-invocation-logs/` (logging already enabled in `agent.ts`). + +> **Note — a deliberate exception to the "tenant ids out of `os.environ`" rule.** The tenant-data path keeps `{user_id, repo, task_id}` out of `os.environ` so spawned (untrusted) repo subprocesses don't inherit them. This header *must* live on `os.environ` because Claude Code reads `ANTHROPIC_CUSTOM_HEADERS` from the process env. The exposure is acceptable: the values are the task's *own* identifiers (self-referential, non-secret) — a subprocess learns only who it is already running for. `json.dumps` escaping prevents a crafted slug from injecting an extra (newline-separated) header. + +> **Open risk to validate against a live endpoint:** Bedrock rejects `X-Amzn-Bedrock-Request-Metadata` with `InvalidSignatureException` if the header is omitted from the SigV4 `SignedHeaders`. Whether Claude Code signs custom headers is unverified. AC#3 explicitly permits "or documented blocker if Claude Code cannot pass metadata." If it fails, per-call attribution falls back to invocation-log `identity.arn` + `RoleSessionName` (`abca-bedrock-`) that Track 1's tagged session already provides. + +## Version alignment + +The agent runs Claude Code two ways that must agree on the control protocol: the `claude-agent-sdk` Python wheel **bundles** a CLI, and the Dockerfile also installs the CLI via npm. Both are pinned in lockstep — `claude-agent-sdk==0.2.110` (bundles CLI 2.1.191) and npm `@anthropic-ai/claude-code@2.1.191`. 2.1.191 also satisfies the ≥2.1.176 `awsCredentialExport`-with-`Expiration` requirement. + +## Track 3 — operator documentation + +New `docs/guides/COST_ATTRIBUTION.md`: + +- The three meters (in-app `cost_usd`, CUR session-tag chargeback, invocation-log per-call) and when to use each. +- FinOps checklist: activate `iamPrincipal/{user_id,repo}` cost-allocation tags in Billing; create a CUR 2.0 export **with caller-identity ARN** (existing exports don't backfill); set budgets. +- Note: tags aren't retroactive and take ≤24 h to appear. + +Cross-link from `COST_MODEL.md#cost-attribution` and `DEPLOYMENT_GUIDE.md`. (Roadmap links from the issue are stale — removed in #505.) + +## Out of scope (unchanged from issue) + +Bedrock Projects/Workspaces (`bedrock-mantle`, not the Claude Code path); replacing in-app `cost_usd`; org-level CUR/Budgets setup (operator responsibility). Application inference profiles per repo → follow-up #489. + +## Acceptance-criteria mapping + +| AC | Met by | +|---|---| +| #1 Bedrock uses session-tagged creds (AgentCore + ECS); dev unchanged when unset | Track 1: `AgentSessionRole` Bedrock grant + `awsCredentialExport`; helper fails open to compute role when `AGENT_SESSION_ROLE_ARN` unset | +| #2 Session tags documented as billable; operator Billing steps | Track 3 | +| #3 Per-request metadata `{task_id,user_id,repo}` when logging enabled (or documented blocker) | Track 2 + SigV4 validation gate | +| #4 Tests: CDK Bedrock grant on role; cred routing; no #211 regression | `agent-session-role.test.ts` (Bedrock grant present/absent); `test_bedrock_creds_helper.py` (assume + fail-open); `test_runner.py` (file + header wiring); #211 tests untouched | +| #5 `COST_ATTRIBUTION.md` + accurate shipped/planned | Track 3 | +| #6 Starlight mirrors synced | `mise //docs:sync` | + +## Test plan + +- **CDK:** assert `AgentSessionRole` grants `bedrock:InvokeModel*` on the model/profile ARNs (no `Resource:'*'`) when `invokableModels` is set, and grants none when omitted. (#211 trust/grant/tenant-scope tests unchanged.) +- **Agent:** `bedrock_creds_helper` — assume-role carries the tenant tags + tagged session name; **fails open** to ambient creds when the attribution file is missing, when assume raises, and emits `{}` when no creds resolve at all; 0600 file mode. `runner._setup_bedrock_cost_attribution` — writes the file when the role ARN is set, skips it when unset, always sets the metadata header. +- **Live validation (pre-merge, manual):** confirm `X-Amzn-Bedrock-Request-Metadata` is honored (no `InvalidSignatureException`) and lands in invocation logs; confirm `iamPrincipal/user_id` appears in Cost Explorer after tag activation. diff --git a/docs/src/content/docs/architecture/Cost-model.md b/docs/src/content/docs/architecture/Cost-model.md index e683e92e..d9606fb2 100644 --- a/docs/src/content/docs/architecture/Cost-model.md +++ b/docs/src/content/docs/architecture/Cost-model.md @@ -92,11 +92,13 @@ These estimates assume Claude Sonnet with prompt caching enabled and average tas For multi-user deployments, cost should be attributable to individual users and repositories: -- **Per-task:** Token usage and compute duration are captured in task metadata (`agent.cost_usd`, `agent.turns` - see [OBSERVABILITY.md](/sample-autonomous-cloud-coding-agents/architecture/observability)). +- **Per-task:** Token usage and compute duration are captured in task metadata (`agent.cost_usd`, `agent.turns` - see [OBSERVABILITY.md](/sample-autonomous-cloud-coding-agents/architecture/observability)). Note: `agent.cost_usd` is the Claude Agent SDK's **client-side estimate** (a build-time price table), not authoritative billing — use it for guardrails, and AWS Cost Explorer / CUR 2.0 for the real bill (see [COST_ATTRIBUTION.md](../guides/COST_ATTRIBUTION.md)). - **Per-user:** Aggregate task costs by `user_id`. - **Per-repo:** Aggregate task costs by `repo`. - **Dashboard:** Cost attribution dashboards should be built from the same task-level metrics. +For **AWS-native** chargeback of Bedrock spend (Cost Explorer / CUR 2.0 by `user_id` / `repo`, plus per-call invocation-log forensics) — beyond the in-app `cost_usd` meter above — see the operator guide [COST_ATTRIBUTION.md](../guides/COST_ATTRIBUTION.md) and the platform design [BEDROCK_COST_ATTRIBUTION.md](/sample-autonomous-cloud-coding-agents/architecture/bedrock-cost-attribution). + ## Cost guardrails (current) | Guardrail | Mechanism | Default | diff --git a/docs/src/content/docs/getting-started/Cost-attribution.md b/docs/src/content/docs/getting-started/Cost-attribution.md new file mode 100644 index 00000000..d148fbc3 --- /dev/null +++ b/docs/src/content/docs/getting-started/Cost-attribution.md @@ -0,0 +1,76 @@ +--- +title: Cost attribution +--- + +# Cost attribution (operator guide) + +How to attribute **Amazon Bedrock model-inference spend** to individual users and repositories in a multi-user ABCA deployment. This is the operator-facing companion to the platform design in [BEDROCK_COST_ATTRIBUTION.md](/sample-autonomous-cloud-coding-agents/architecture/bedrock-cost-attribution) and the cost model in [COST_MODEL.md](/sample-autonomous-cloud-coding-agents/architecture/cost-model#cost-attribution). + +> [!WARNING] +> **The in-app `cost_usd` is a client-side estimate, not authoritative billing data.** It is the Claude Agent SDK's `total_cost_usd` (`agent/src/runner.py`), computed locally from a price table bundled into the SDK at build time. It can drift from your actual AWS bill when Bedrock pricing changes, the SDK version does not recognize a model, prompt-cache read/write rates apply, or AWS discounts/commitments/free-tier apply that the client cannot model. Use it for per-task budget guardrails and approximate insight — **do not bill end users or trigger financial decisions from it.** For authoritative cost, use **AWS Cost Explorer / CUR 2.0** (the session-tag chargeback meter below), which reflects your actual invoice. (ABCA runs on Bedrock, so the authoritative source is your AWS bill — not the Claude Console.) + +## Three meters, three questions + +ABCA gives you three independent views of cost. They answer different questions; use them together. + +| Meter | Granularity | Source of truth for | Where | +|---|---|---|---| +| **In-app `cost_usd`** | Per task | Per-task budget guardrails (`max_budget_usd`) | Task metadata / control panel | +| **CUR session-tag chargeback** | Per user / per repo, aggregated per usage-type per day | AWS-native FinOps chargeback | Cost Explorer / CUR 2.0 | +| **Invocation-log metadata** | Per Bedrock call | Per-call forensics, reconciliation | `/aws/bedrock/model-invocation-logs/` | + +Why all three: the in-app meter is an estimate the platform computes; it does not reflect AWS discounts/commitments. IAM session tags flow to your **bill** but only as aggregated billing data (they are *not* written to invocation logs). Request metadata gives **per-call** detail in logs but is *not* a cost-allocation tag and never appears in Cost Explorer. Per [AWS docs](https://docs.aws.amazon.com/bedrock/latest/userguide/cost-mgmt-iam-principal-tracking.html), session tags and request metadata are complementary mechanisms. + +## What the platform does automatically + +Once deployed, each agent task makes its Bedrock calls under **session-tagged, refreshable credentials** carrying `{user_id, repo, task_id}`, and stamps the same values as **request metadata** on every call. You do **not** need to change any code. What remains is **operator setup in the AWS Billing console** — AWS does not surface tag-based cost data until you activate it, and (see the ordering note below) you can only activate *after* the platform has run tagged tasks. + +## FinOps checklist + +These steps are a one-time operator responsibility (CDK does not automate org-level billing — see [Out of scope](/sample-autonomous-cloud-coding-agents/architecture/bedrock-cost-attribution#out-of-scope-unchanged-from-issue)). + +> **Ordering matters — the tags can't be pre-activated.** IAM-principal cost-allocation tag *keys* (`user_id`, `repo`) do not exist in the Billing console until the deployed platform has actually made tagged Bedrock calls. So the sequence is: **deploy → run at least one task → wait up to 24 h → then activate** (step 1). You cannot activate them before the first tagged call exists. +> +> **Use the Billing console, not Tag Editor / Resource Groups.** Cost-allocation tags live at **Billing and Cost Management → Cost allocation tags** (left nav). The *Tag Editor* (Resource Groups) is a different tool — it lists taggable *resource types* (`AWS::IAM::InstanceProfile`, etc.) and is **not** where you activate these. + +1. **Activate IAM-principal cost-allocation tags.** Billing and Cost Management console → **Cost allocation tags** (left nav) → the **User-defined cost allocation tags** tab → the `user_id` and `repo` keys appear with tag type **IAM principal** → select them → **Activate**. (`task_id` is high-cardinality — keep it for logs, not Cost Explorer.) + - Keys appear only **after** the first Bedrock call carrying them, and can take **up to 24 h** to show. + - Activation is **not retroactive** — only spend incurred after activation is tagged. + - IAM-principal cost-allocation tags are a recent Bedrock capability. If the keys never appear a day after running tagged tasks, your account/region may not have it enabled yet — the invocation-log path (below) attributes per call regardless. +2. **Create a CUR 2.0 export with caller identity.** Billing console → **Data Exports** → create a CUR 2.0 export and select the option to include the **caller-identity ARN**. + - If you already have a CUR 2.0 export, you must create a **new** one — existing exports do not backfill identity data. +3. **Set budgets / alerts** per `user_id` or `repo` tag as needed (AWS Budgets), independent of the in-app `max_budget_usd` per-task guardrail. + +## Querying per-call detail (invocation logs) + +> **Model-invocation logging must be ON in the agent's Region, or there is no `requestMetadata` to query.** Bedrock records request metadata **only** when account-level model-invocation logging is enabled in the Region where the call is made. The stack provisions this automatically (a custom resource pointing at the `/aws/bedrock/model-invocation-logs/` log group), but it is **account- and Region-scoped**, so confirm it after deploy — especially if logging was previously disabled, or the stack Region differs from where you expect calls. +> +> Verify it is on: +> ``` +> aws bedrock get-model-invocation-logging-configuration --region +> ``` +> An empty result means logging is **off** and no metadata is being captured. Re-enable it (pointing at the stack's own log group + `BedrockLoggingRole`): +> ``` +> aws bedrock put-model-invocation-logging-configuration --region \ +> --logging-config '{"cloudWatchConfig":{"logGroupName":"/aws/bedrock/model-invocation-logs/","roleArn":""},"textDataDeliveryEnabled":true,"imageDataDeliveryEnabled":false,"embeddingDataDeliveryEnabled":false}' +> ``` +> Do **not** include `largeDataDeliveryS3Config` with an empty bucket name — Bedrock rejects it (`min length: 3`) and the call fails. Only calls made *after* logging is enabled are recorded; re-run a task to populate logs. + +Request metadata lands under the top-level `requestMetadata` field of each log record. Example CloudWatch Logs Insights query (tokens per user + model): + +``` +fields requestMetadata.user_id as user, modelId, + input.inputTokenCount as inTokens, + output.outputTokenCount as outTokens +| stats sum(inTokens) as totalInput, sum(outTokens) as totalOutput, count() as calls + by user, modelId +| sort totalInput desc +``` + +To turn tokens into cost, multiply by the current [Bedrock per-token rates](https://aws.amazon.com/bedrock/pricing/), or join logs to CUR on `requestId` for invoice-accurate reconciliation at the model + usage-type grain. + +## Caveats + +- **Request-metadata header is best-effort.** It depends on Claude Code signing the `X-Amzn-Bedrock-Request-Metadata` header into the SigV4 request; if a Claude Code release does not, the header is rejected and per-call metadata is absent. Per-user/repo chargeback (the session-tag track) is unaffected — it does not rely on the header. See the [validation note](/sample-autonomous-cloud-coding-agents/architecture/bedrock-cost-attribution#track-2--per-request-metadata). +- **Attribution fails open.** If the per-task credential helper cannot assume the SessionRole, Bedrock still works under the shared compute role — spend for that task is simply untagged, not blocked. +- **No PII in tags/metadata.** `user_id` and `repo` are recorded in your bill and logs; do not map them to anything sensitive. diff --git a/docs/src/content/docs/getting-started/Deployment-guide.md b/docs/src/content/docs/getting-started/Deployment-guide.md index f31e890e..01f6e2ce 100644 --- a/docs/src/content/docs/getting-started/Deployment-guide.md +++ b/docs/src/content/docs/getting-started/Deployment-guide.md @@ -233,4 +233,5 @@ For users without AWS CLI access. - [User guide](/sample-autonomous-cloud-coding-agents/using/overview) -- API reference, CLI usage, task management. - [DEPLOYMENT_ROLES.md](/sample-autonomous-cloud-coding-agents/architecture/deployment-roles) -- Least-privilege IAM policies for CloudFormation execution. - [COST_MODEL.md](/sample-autonomous-cloud-coding-agents/architecture/cost-model) -- Per-task costs, cost guardrails, cost at scale. +- [COST_ATTRIBUTION.md](/sample-autonomous-cloud-coding-agents/architecture/cost-attribution) -- Operator FinOps setup for per-user/per-repo Bedrock chargeback (Cost Explorer / CUR 2.0, invocation-log forensics). - [COMPUTE.md](/sample-autonomous-cloud-coding-agents/architecture/compute) -- Compute backend architecture and trade-offs.