aws-samples · krokoko · Jun 30, 2026 · Jun 30, 2026 · Jun 30, 2026 · Jun 30, 2026
@@ -46,8 +46,14 @@ RUN curl -fsSL https://deb.nodesource.com/setup_24.x | bash - && \
 
 # Install Claude Code CLI (the Python SDK requires this binary)
 # Then update known vulnerable transitive packages where fixed versions exist.
+# Pinned 2.1.191 to match the CLI bundled by claude-agent-sdk 0.2.110 (see
+# agent/pyproject.toml) — the SDK and the on-PATH CLI must agree on the control
+# protocol. This version also has the awsCredentialExport behavior #215 needs:
+# returned creds are cached until 5 min before the JSON's `Expiration`, so an
+# 8 h task re-assumes the 1 h-capped SessionRole before expiry. Older builds
+# only refreshed hourly on a timer, racing the role-chaining cap.
 RUN npm install -g npm@latest && \
-    npm install -g @anthropic-ai/claude-code@2.1.142 && \
+    npm install -g @anthropic-ai/claude-code@2.1.191 && \
     CLAUDE_NPM_ROOT="$(npm root -g)/@anthropic-ai/claude-code" && \
     npm --prefix "${CLAUDE_NPM_ROOT}" update tar minimatch glob cross-spawn picomatch
 
@@ -81,6 +87,13 @@ COPY contracts/ /app/contracts/
 # ``WorkflowValidationError: workflow '...' not found at /app/workflows/...``.
 COPY agent/workflows/ /app/workflows/
 COPY agent/prepare-commit-msg.sh /app/
+# Claude Code managed settings (#215). The highest-precedence settings layer —
+# loaded regardless of setting_sources and unoverridable by the untrusted cloned
+# repo's project .claude/settings.json. Carries awsCredentialExport so Bedrock
+# calls use session-tagged, refreshable credentials for cost attribution.
+# Placing awsCredentialExport (an arbitrary command) anywhere the target repo
+# can influence would be RCE with the compute role, so it lives ONLY here.
+COPY agent/managed-settings.json /etc/claude-code/managed-settings.json
 
 # Create non-root user (Claude Code CLI refuses bypassPermissions as root)
 RUN useradd -m -s /bin/bash agent && \

@@ -0,0 +1,3 @@
+{
+  "awsCredentialExport": "/app/.venv/bin/python /app/src/bedrock_creds_helper.py"
+}
@@ -16,7 +16,7 @@ dependencies = [
     # would degrade gracefully — but for now we keep the dep to
     # preserve the clean code path.
     "bedrock-agentcore==1.9.1", #https://pypi.org/project/bedrock-agentcore/
-    "claude-agent-sdk==0.2.82", #https://github.com/anthropics/claude-agent-sdk-python/releases/tag/v0.2.82
+    "claude-agent-sdk==0.2.110", #https://github.com/anthropics/claude-agent-sdk-python/releases/tag/v0.2.110 (bundles claude CLI 2.1.191; kept in lockstep with the npm CLI pin in the Dockerfile, #215)
     "requests==2.34.2", #https://pypi.org/project/requests/
     "fastapi==0.136.1", #https://pypi.org/project/fastapi/
     "uvicorn==0.47.0", #https://pypi.org/project/uvicorn/

@@ -78,6 +78,19 @@ class SessionScopingError(RuntimeError):
     """
 
 
+def build_session_tags(user_id: str, repo: str, task_id: str) -> list[dict[str, str]]:
+    """Build the AssumeRole ``Tags`` list from tenant identifiers.
+
+    Only non-empty values are included. Values are truncated to the IAM limit
+    so an over-long repo slug can never make ``AssumeRole`` fail. Shared by the
+    in-process tenant-data session (:func:`_session_tags`) and the out-of-process
+    Bedrock credential helper (``bedrock_creds_helper.py``) so both mint the
+    same ``{user_id, repo, task_id}`` tags from one definition.
+    """
+    pairs = (("user_id", user_id), ("repo", repo), ("task_id", task_id))
+    return [{"Key": key, "Value": value[:_MAX_TAG_VALUE_LEN]} for key, value in pairs if value]
+
+
 def configure_session(user_id: str, repo: str, task_id: str) -> None:
     """Record session-tag values in private module state for later use.
 
@@ -115,6 +128,11 @@ def _session_tags() -> list[dict[str, str]]:
     return [{"Key": key, "Value": value[:_MAX_TAG_VALUE_LEN]} for key, value in _tags.items()]
 
 
+# Public alias of the IAM tag-value length cap, for the Bedrock credential
+# helper which builds tags from CLI args rather than module state.
+MAX_TAG_VALUE_LEN = _MAX_TAG_VALUE_LEN
+
+
 def _build_scoped_session(role_arn: str) -> Any:
     """Build a boto3 Session backed by refreshable assumed-role credentials.
 

@@ -0,0 +1,199 @@
+#!/usr/bin/env python3
+"""Credential helper for Claude Code's Bedrock calls (#215, cost attribution).
+
+Claude Code (``CLAUDE_CODE_USE_BEDROCK=1``) makes every ``InvokeModel`` call —
+not the agent's boto3 — so the per-task tenant-data SessionRole in
+``aws_session.py`` cannot tag those calls. Instead Claude Code's
+``awsCredentialExport`` setting (in the image's managed-settings layer) runs
+this script, captures its JSON stdout, and signs Bedrock requests with the
+returned credentials. With a real ``Expiration`` it re-runs ~5 min before
+expiry, so an 8 h task survives the 1 h role-chaining cap.
+
+Goal: assume the per-task SessionRole with ``{user_id, repo, task_id}`` STS
+session tags so Bedrock spend is attributable per user/repo in AWS Cost
+Explorer / CUR 2.0 (``iamPrincipal/*`` dimensions, after the operator activates
+the cost-allocation tags). The same role already carries the tenant-data grants;
+Track-1 only adds ``bedrock:InvokeModel*`` to it (see ``agent-session-role.ts``).
+
+**Fails OPEN.** Bedrock attribution is a billing/observability control, not a
+tenant-isolation one (contrast ``aws_session.py``, which fails closed). If the
+attribution config is absent or the assume-role fails, this helper emits the
+**ambient** compute-role credentials so Bedrock keeps working untagged — losing
+chargeback granularity is not a security incident, and the compute role retains
+``InvokeModel`` precisely so this fallback works.
+
+The role ARN and tag values are read from a 0600 JSON file the agent writes at
+startup (``write_attribution_file``), not from the environment — so the tenant
+identifiers are not inherited by the untrusted repo subprocesses the agent
+spawns, matching the discipline in ``aws_session.py``.
+
+Output shape (consumed by Claude Code's awsCredentialExport):
+
+    {"Credentials": {"AccessKeyId": "...", "SecretAccessKey": "...",
+                     "SessionToken": "...", "Expiration": "<ISO8601>"}}
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import sys
+from typing import Any
+
+# Fixed path the agent writes (0600) and this helper reads. A fixed path is
+# required because the managed-settings ``awsCredentialExport`` command is
+# static (baked into the image) and cannot carry per-task arguments.
+ATTRIBUTION_FILE_ENV = "BEDROCK_ATTRIBUTION_FILE"
+DEFAULT_ATTRIBUTION_FILE = "/home/agent/.bedrock-attribution.json"
+
+# Role chaining caps the assumed session at 1 hour; request the max the cap
+# allows. Claude Code refreshes ~5 min before the returned Expiration.
+_CHAINED_SESSION_DURATION_S = 3600
+
+
+def attribution_file_path() -> str:
+    return os.environ.get(ATTRIBUTION_FILE_ENV, "").strip() or DEFAULT_ATTRIBUTION_FILE
+
+
+def write_attribution_file(
+    role_arn: str, tags: list[dict[str, str]], path: str | None = None
+) -> str:
+    """Persist the SessionRole ARN + STS tags for the helper to read.
+
+    Written 0600 and owned by the agent user. Returns the path written. Called
+    by the agent at startup (see ``runner._setup_agent_env``) only when a
+    SessionRole is configured; absence is the fail-open signal.
+    """
+    target = path or attribution_file_path()
+    payload = json.dumps({"role_arn": role_arn, "tags": tags})
+    # Create with 0600 from the start (os.open + O_CREAT honors mode, modulo
+    # umask) so the secret-adjacent file is never briefly world-readable.
+    fd = os.open(target, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
+    with os.fdopen(fd, "w") as fh:
+        fh.write(payload)
+    return target
+
+
+def _warn(message: str) -> None:
+    """Emit a diagnostic to stderr.
+
+    This process's **stdout is the credential channel** — Claude Code parses it
+    as the ``awsCredentialExport`` JSON result — so diagnostics MUST go to
+    stderr or they would corrupt the credential envelope. (This is also why
+    ``shell.log``, which writes to fd 1, is unusable here.) Every fail-open path
+    logs through here so a silent, weeks-long loss of cost attribution is
+    instead a visible, correlatable signal — the fallback stays open, but it is
+    never invisible.
+    """
+    print(f"[bedrock-creds] {message}", file=sys.stderr)
+
+
+def _emit(creds: dict[str, str]) -> None:
+    json.dump({"Credentials": creds}, sys.stdout)
+
+
+def _frozen_to_creds(frozen: Any, expiry_iso: str | None) -> dict[str, str]:
+    out = {
+        "AccessKeyId": frozen.access_key,
+        "SecretAccessKey": frozen.secret_key,
+        "SessionToken": frozen.token or "",
+    }
+    if expiry_iso:
+        out["Expiration"] = expiry_iso
+    return out
+
+
+def _ambient_credentials() -> dict[str, str]:
+    """Frozen ambient (compute-role) credentials — the fail-open fallback."""
+    import botocore.session
+
+    creds = botocore.session.get_session().get_credentials()
+    if creds is None:
+        # No resolvable credentials at all — the deepest degradation. Emit an
+        # empty object; Claude Code then falls back to its own default-chain
+        # resolution. Surface it: if that fallback also fails, this stderr line
+        # is the only breadcrumb.
+        _warn(
+            "no resolvable AWS credentials; emitting empty envelope, "
+            "Claude Code will use its default chain"
+        )
+        return {}
+    return _frozen_to_creds(creds.get_frozen_credentials(), None)
+
+
+def resolve_credentials() -> dict[str, str]:
+    """Return tagged assumed-role creds, or ambient creds on any failure."""
+    path = attribution_file_path()
+    try:
+        with open(path) as fh:
+            cfg = json.load(fh)
+        role_arn = cfg["role_arn"]
+        tags = cfg.get("tags", [])
+    except FileNotFoundError:
+        # Attribution not configured (local/dev, or pre-provisioning). Expected
+        # and benign — debug-level signal only.
+        _warn("attribution file absent; not configured — using ambient creds")
+        return _ambient_credentials()
+    except (OSError, ValueError, KeyError) as exc:
+        # File present but unreadable/malformed/schema-drifted. This is NOT the
+        # benign "not configured" case — it points at a write_attribution_file
+        # bug or a partial write, so it warrants a louder signal.
+        _warn(
+            f"attribution file present but unreadable ({type(exc).__name__}: {exc}); "
+            "using ambient creds"
+        )
+        return _ambient_credentials()
+
+    try:
+        import boto3
+        from botocore.exceptions import BotoCoreError, ClientError
+    except ImportError as exc:
+        # boto3 missing/broken in the image is a packaging defect, not the
+        # expected assume-role failure — name it explicitly so it can't hide.
+        _warn(f"boto3 unavailable ({exc}); using ambient creds — fix the image")
+        return _ambient_credentials()
+
+    region = os.environ.get("AWS_REGION") or os.environ.get("AWS_DEFAULT_REGION")
+    task_id = next((t["Value"] for t in tags if t.get("Key") == "task_id"), "")
+    session_name = f"abca-bedrock-{task_id}"[:64] or "abca-bedrock"
+    try:
+        resp = boto3.client("sts", region_name=region).assume_role(
+            RoleArn=role_arn,
+            RoleSessionName=session_name,
+            DurationSeconds=_CHAINED_SESSION_DURATION_S,
+            Tags=tags,
+        )
+        c = resp["Credentials"]
+        return {
+            "AccessKeyId": c["AccessKeyId"],
+            "SecretAccessKey": c["SecretAccessKey"],
+            "SessionToken": c["SessionToken"],
+            "Expiration": c["Expiration"].isoformat(),
+        }
+    except (ClientError, BotoCoreError) as exc:
+        # Expected assume failure: role not yet provisioned, AccessDenied,
+        # transient STS error. Fail open so Bedrock keeps working on the
+        # compute role; spend for this task is untagged.
+        _warn(
+            f"assume_role failed ({type(exc).__name__}: {exc}); using ambient creds "
+            "— Bedrock spend will be UNTAGGED"
+        )
+        return _ambient_credentials()
+    except Exception as exc:
+        # Anything else (unexpected STS response shape, a logic bug here) is NOT
+        # the expected fallback. Still fail open — this is a billing control, not
+        # isolation — but flag it distinctly so it isn't mistaken for AccessDenied.
+        _warn(
+            f"UNEXPECTED error minting tagged creds ({type(exc).__name__}: {exc}); "
+            "using ambient creds"
+        )
+        return _ambient_credentials()
+
+
+def main() -> int:
+    _emit(resolve_credentials())
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
@@ -59,6 +59,58 @@ def _parse_token_usage(raw_usage: Any) -> TokenUsage:
     return TokenUsage(**values)
 
 
+def _setup_bedrock_cost_attribution(config: TaskConfig) -> None:
+    """Wire Bedrock cost attribution for the Claude Code subprocess (#215).
+
+    Claude Code makes the ``InvokeModel`` calls, so attribution is configured
+    through *its* credential + header channels, not the agent's boto3:
+
+    1. **Per-user/repo chargeback (CUR 2.0 / Cost Explorer).** Write the
+       SessionRole ARN + ``{user_id, repo, task_id}`` STS tags to a 0600 file
+       that ``bedrock_creds_helper.py`` reads. Claude Code's managed-settings
+       ``awsCredentialExport`` runs that helper and signs Bedrock requests with
+       the tagged assumed-role credentials. Skipped when ``AGENT_SESSION_ROLE_ARN``
+       is unset (local/dev) — the helper then fails open to ambient creds.
+
+    2. **Per-call forensics (model-invocation logs).** Set
+       ``X-Amzn-Bedrock-Request-Metadata`` via ``ANTHROPIC_CUSTOM_HEADERS`` on the
+       process env. One container = one task = one Claude Code session, so a
+       static-per-process header is effectively per-task. Set via the process
+       env (not project settings) so the untrusted cloned repo cannot alter it.
+    """
+    import json
+
+    from aws_session import MAX_TAG_VALUE_LEN, build_session_tags
+
+    role_arn = os.environ.get("AGENT_SESSION_ROLE_ARN", "").strip()
+    tags = build_session_tags(config.user_id, config.repo_url, config.task_id)
+    if role_arn and tags:
+        try:
+            from bedrock_creds_helper import write_attribution_file
+
+            write_attribution_file(role_arn, tags)
+        except OSError as exc:
+            # Fail open: attribution is observability, not isolation. Bedrock
+            # still works on the compute role; we just lose tagged chargeback.
+            log("WARN", f"Bedrock attribution file not written ({exc}); spend will be untagged")
+
+    # Per-request metadata mirrors the STS tag values. Bedrock limits keys/values
+    # to 256 chars and records them under ``requestMetadata`` in invocation logs.
+    #
+    # Unlike the tenant-data tags (kept out of os.environ so untrusted repo
+    # subprocesses don't inherit them), this header MUST go on os.environ —
+    # Claude Code reads ANTHROPIC_CUSTOM_HEADERS from the process env. The
+    # exposure is acceptable: the values are the task's OWN {user_id, repo,
+    # task_id} (self-referential, non-secret), so a spawned subprocess learns
+    # only who it is already running for. json.dumps escapes newlines/quotes, so
+    # a crafted repo slug cannot inject an extra (newline-separated) header.
+    metadata = {t["Key"]: t["Value"][:MAX_TAG_VALUE_LEN] for t in tags}
+    if metadata:
+        os.environ["ANTHROPIC_CUSTOM_HEADERS"] = (
+            f"X-Amzn-Bedrock-Request-Metadata: {json.dumps(metadata, separators=(',', ':'))}"
+        )
+
+
 def _setup_agent_env(config: TaskConfig) -> tuple[str | None, str | None]:
     """Configure process environment for the Claude Code CLI subprocess.
 
@@ -72,6 +124,8 @@ def _setup_agent_env(config: TaskConfig) -> tuple[str | None, str | None]:
     os.environ["ANTHROPIC_MODEL"] = config.anthropic_model
     os.environ["GITHUB_TOKEN"] = config.github_token
     os.environ["GH_TOKEN"] = config.github_token
+
+    _setup_bedrock_cost_attribution(config)
     # DO NOT set ANTHROPIC_LOG — any logging level causes the CLI to write to
     # stderr, which fills the OS pipe buffer (64 KB) and deadlocks the
     # single-threaded Node.js CLI process (blocked stderr write prevents stdout