Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion agent/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,14 @@ RUN curl -fsSL https://deb.nodesource.com/setup_24.x | bash - && \

# Install Claude Code CLI (the Python SDK requires this binary)
# Then update known vulnerable transitive packages where fixed versions exist.
# Pinned 2.1.191 to match the CLI bundled by claude-agent-sdk 0.2.110 (see
# agent/pyproject.toml) — the SDK and the on-PATH CLI must agree on the control
# protocol. This version also has the awsCredentialExport behavior #215 needs:
# returned creds are cached until 5 min before the JSON's `Expiration`, so an
# 8 h task re-assumes the 1 h-capped SessionRole before expiry. Older builds
# only refreshed hourly on a timer, racing the role-chaining cap.
RUN npm install -g npm@latest && \
npm install -g @anthropic-ai/claude-code@2.1.142 && \
npm install -g @anthropic-ai/claude-code@2.1.191 && \
CLAUDE_NPM_ROOT="$(npm root -g)/@anthropic-ai/claude-code" && \
npm --prefix "${CLAUDE_NPM_ROOT}" update tar minimatch glob cross-spawn picomatch

Expand Down Expand Up @@ -81,6 +87,13 @@ COPY contracts/ /app/contracts/
# ``WorkflowValidationError: workflow '...' not found at /app/workflows/...``.
COPY agent/workflows/ /app/workflows/
COPY agent/prepare-commit-msg.sh /app/
# Claude Code managed settings (#215). The highest-precedence settings layer —
# loaded regardless of setting_sources and unoverridable by the untrusted cloned
# repo's project .claude/settings.json. Carries awsCredentialExport so Bedrock
# calls use session-tagged, refreshable credentials for cost attribution.
# Placing awsCredentialExport (an arbitrary command) anywhere the target repo
# can influence would be RCE with the compute role, so it lives ONLY here.
COPY agent/managed-settings.json /etc/claude-code/managed-settings.json

# Create non-root user (Claude Code CLI refuses bypassPermissions as root)
RUN useradd -m -s /bin/bash agent && \
Expand Down
3 changes: 3 additions & 0 deletions agent/managed-settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"awsCredentialExport": "/app/.venv/bin/python /app/src/bedrock_creds_helper.py"
}
2 changes: 1 addition & 1 deletion agent/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ dependencies = [
# would degrade gracefully — but for now we keep the dep to
# preserve the clean code path.
"bedrock-agentcore==1.9.1", #https://pypi.org/project/bedrock-agentcore/
"claude-agent-sdk==0.2.82", #https://github.com/anthropics/claude-agent-sdk-python/releases/tag/v0.2.82
"claude-agent-sdk==0.2.110", #https://github.com/anthropics/claude-agent-sdk-python/releases/tag/v0.2.110 (bundles claude CLI 2.1.191; kept in lockstep with the npm CLI pin in the Dockerfile, #215)
"requests==2.34.2", #https://pypi.org/project/requests/
"fastapi==0.136.1", #https://pypi.org/project/fastapi/
"uvicorn==0.47.0", #https://pypi.org/project/uvicorn/
Expand Down
18 changes: 18 additions & 0 deletions agent/src/aws_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,19 @@ class SessionScopingError(RuntimeError):
"""


def build_session_tags(user_id: str, repo: str, task_id: str) -> list[dict[str, str]]:
"""Build the AssumeRole ``Tags`` list from tenant identifiers.

Only non-empty values are included. Values are truncated to the IAM limit
so an over-long repo slug can never make ``AssumeRole`` fail. Shared by the
in-process tenant-data session (:func:`_session_tags`) and the out-of-process
Bedrock credential helper (``bedrock_creds_helper.py``) so both mint the
same ``{user_id, repo, task_id}`` tags from one definition.
"""
pairs = (("user_id", user_id), ("repo", repo), ("task_id", task_id))
return [{"Key": key, "Value": value[:_MAX_TAG_VALUE_LEN]} for key, value in pairs if value]


def configure_session(user_id: str, repo: str, task_id: str) -> None:
"""Record session-tag values in private module state for later use.

Expand Down Expand Up @@ -115,6 +128,11 @@ def _session_tags() -> list[dict[str, str]]:
return [{"Key": key, "Value": value[:_MAX_TAG_VALUE_LEN]} for key, value in _tags.items()]


# Public alias of the IAM tag-value length cap, for the Bedrock credential
# helper which builds tags from CLI args rather than module state.
MAX_TAG_VALUE_LEN = _MAX_TAG_VALUE_LEN


def _build_scoped_session(role_arn: str) -> Any:
"""Build a boto3 Session backed by refreshable assumed-role credentials.

Expand Down
199 changes: 199 additions & 0 deletions agent/src/bedrock_creds_helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
#!/usr/bin/env python3
"""Credential helper for Claude Code's Bedrock calls (#215, cost attribution).

Claude Code (``CLAUDE_CODE_USE_BEDROCK=1``) makes every ``InvokeModel`` call —
not the agent's boto3 — so the per-task tenant-data SessionRole in
``aws_session.py`` cannot tag those calls. Instead Claude Code's
``awsCredentialExport`` setting (in the image's managed-settings layer) runs
this script, captures its JSON stdout, and signs Bedrock requests with the
returned credentials. With a real ``Expiration`` it re-runs ~5 min before
expiry, so an 8 h task survives the 1 h role-chaining cap.

Goal: assume the per-task SessionRole with ``{user_id, repo, task_id}`` STS
session tags so Bedrock spend is attributable per user/repo in AWS Cost
Explorer / CUR 2.0 (``iamPrincipal/*`` dimensions, after the operator activates
the cost-allocation tags). The same role already carries the tenant-data grants;
Track-1 only adds ``bedrock:InvokeModel*`` to it (see ``agent-session-role.ts``).

**Fails OPEN.** Bedrock attribution is a billing/observability control, not a
tenant-isolation one (contrast ``aws_session.py``, which fails closed). If the
attribution config is absent or the assume-role fails, this helper emits the
**ambient** compute-role credentials so Bedrock keeps working untagged — losing
chargeback granularity is not a security incident, and the compute role retains
``InvokeModel`` precisely so this fallback works.

The role ARN and tag values are read from a 0600 JSON file the agent writes at
startup (``write_attribution_file``), not from the environment — so the tenant
identifiers are not inherited by the untrusted repo subprocesses the agent
spawns, matching the discipline in ``aws_session.py``.

Output shape (consumed by Claude Code's awsCredentialExport):

{"Credentials": {"AccessKeyId": "...", "SecretAccessKey": "...",
"SessionToken": "...", "Expiration": "<ISO8601>"}}
"""

from __future__ import annotations

import json
import os
import sys
from typing import Any

# Fixed path the agent writes (0600) and this helper reads. A fixed path is
# required because the managed-settings ``awsCredentialExport`` command is
# static (baked into the image) and cannot carry per-task arguments.
ATTRIBUTION_FILE_ENV = "BEDROCK_ATTRIBUTION_FILE"
DEFAULT_ATTRIBUTION_FILE = "/home/agent/.bedrock-attribution.json"

# Role chaining caps the assumed session at 1 hour; request the max the cap
# allows. Claude Code refreshes ~5 min before the returned Expiration.
_CHAINED_SESSION_DURATION_S = 3600


def attribution_file_path() -> str:
return os.environ.get(ATTRIBUTION_FILE_ENV, "").strip() or DEFAULT_ATTRIBUTION_FILE


def write_attribution_file(
role_arn: str, tags: list[dict[str, str]], path: str | None = None
) -> str:
"""Persist the SessionRole ARN + STS tags for the helper to read.

Written 0600 and owned by the agent user. Returns the path written. Called
by the agent at startup (see ``runner._setup_agent_env``) only when a
SessionRole is configured; absence is the fail-open signal.
"""
target = path or attribution_file_path()
payload = json.dumps({"role_arn": role_arn, "tags": tags})
# Create with 0600 from the start (os.open + O_CREAT honors mode, modulo
# umask) so the secret-adjacent file is never briefly world-readable.
fd = os.open(target, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
with os.fdopen(fd, "w") as fh:
fh.write(payload)
return target


def _warn(message: str) -> None:
"""Emit a diagnostic to stderr.

This process's **stdout is the credential channel** — Claude Code parses it
as the ``awsCredentialExport`` JSON result — so diagnostics MUST go to
stderr or they would corrupt the credential envelope. (This is also why
``shell.log``, which writes to fd 1, is unusable here.) Every fail-open path
logs through here so a silent, weeks-long loss of cost attribution is
instead a visible, correlatable signal — the fallback stays open, but it is
never invisible.
"""
print(f"[bedrock-creds] {message}", file=sys.stderr)


def _emit(creds: dict[str, str]) -> None:
json.dump({"Credentials": creds}, sys.stdout)


def _frozen_to_creds(frozen: Any, expiry_iso: str | None) -> dict[str, str]:
out = {
"AccessKeyId": frozen.access_key,
"SecretAccessKey": frozen.secret_key,
"SessionToken": frozen.token or "",
}
if expiry_iso:
out["Expiration"] = expiry_iso
return out


def _ambient_credentials() -> dict[str, str]:
"""Frozen ambient (compute-role) credentials — the fail-open fallback."""
import botocore.session

creds = botocore.session.get_session().get_credentials()
if creds is None:
# No resolvable credentials at all — the deepest degradation. Emit an
# empty object; Claude Code then falls back to its own default-chain
# resolution. Surface it: if that fallback also fails, this stderr line
# is the only breadcrumb.
_warn(
"no resolvable AWS credentials; emitting empty envelope, "
"Claude Code will use its default chain"
)
return {}
return _frozen_to_creds(creds.get_frozen_credentials(), None)


def resolve_credentials() -> dict[str, str]:
"""Return tagged assumed-role creds, or ambient creds on any failure."""
path = attribution_file_path()
try:
with open(path) as fh:
cfg = json.load(fh)
role_arn = cfg["role_arn"]
tags = cfg.get("tags", [])
except FileNotFoundError:
# Attribution not configured (local/dev, or pre-provisioning). Expected
# and benign — debug-level signal only.
_warn("attribution file absent; not configured — using ambient creds")
return _ambient_credentials()
except (OSError, ValueError, KeyError) as exc:
# File present but unreadable/malformed/schema-drifted. This is NOT the
# benign "not configured" case — it points at a write_attribution_file
# bug or a partial write, so it warrants a louder signal.
_warn(
f"attribution file present but unreadable ({type(exc).__name__}: {exc}); "
"using ambient creds"
)
return _ambient_credentials()

try:
import boto3
from botocore.exceptions import BotoCoreError, ClientError
except ImportError as exc:
# boto3 missing/broken in the image is a packaging defect, not the
# expected assume-role failure — name it explicitly so it can't hide.
_warn(f"boto3 unavailable ({exc}); using ambient creds — fix the image")
return _ambient_credentials()

region = os.environ.get("AWS_REGION") or os.environ.get("AWS_DEFAULT_REGION")
task_id = next((t["Value"] for t in tags if t.get("Key") == "task_id"), "")
session_name = f"abca-bedrock-{task_id}"[:64] or "abca-bedrock"
try:
resp = boto3.client("sts", region_name=region).assume_role(
RoleArn=role_arn,
RoleSessionName=session_name,
DurationSeconds=_CHAINED_SESSION_DURATION_S,
Tags=tags,
)
c = resp["Credentials"]
return {
"AccessKeyId": c["AccessKeyId"],
"SecretAccessKey": c["SecretAccessKey"],
"SessionToken": c["SessionToken"],
"Expiration": c["Expiration"].isoformat(),
}
except (ClientError, BotoCoreError) as exc:
# Expected assume failure: role not yet provisioned, AccessDenied,
# transient STS error. Fail open so Bedrock keeps working on the
# compute role; spend for this task is untagged.
_warn(
f"assume_role failed ({type(exc).__name__}: {exc}); using ambient creds "
"— Bedrock spend will be UNTAGGED"
)
return _ambient_credentials()
except Exception as exc:
# Anything else (unexpected STS response shape, a logic bug here) is NOT
# the expected fallback. Still fail open — this is a billing control, not
# isolation — but flag it distinctly so it isn't mistaken for AccessDenied.
_warn(
f"UNEXPECTED error minting tagged creds ({type(exc).__name__}: {exc}); "
"using ambient creds"
)
return _ambient_credentials()


def main() -> int:
_emit(resolve_credentials())
return 0


if __name__ == "__main__":
sys.exit(main())
54 changes: 54 additions & 0 deletions agent/src/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,58 @@ def _parse_token_usage(raw_usage: Any) -> TokenUsage:
return TokenUsage(**values)


def _setup_bedrock_cost_attribution(config: TaskConfig) -> None:
"""Wire Bedrock cost attribution for the Claude Code subprocess (#215).

Claude Code makes the ``InvokeModel`` calls, so attribution is configured
through *its* credential + header channels, not the agent's boto3:

1. **Per-user/repo chargeback (CUR 2.0 / Cost Explorer).** Write the
SessionRole ARN + ``{user_id, repo, task_id}`` STS tags to a 0600 file
that ``bedrock_creds_helper.py`` reads. Claude Code's managed-settings
``awsCredentialExport`` runs that helper and signs Bedrock requests with
the tagged assumed-role credentials. Skipped when ``AGENT_SESSION_ROLE_ARN``
is unset (local/dev) — the helper then fails open to ambient creds.

2. **Per-call forensics (model-invocation logs).** Set
``X-Amzn-Bedrock-Request-Metadata`` via ``ANTHROPIC_CUSTOM_HEADERS`` on the
process env. One container = one task = one Claude Code session, so a
static-per-process header is effectively per-task. Set via the process
env (not project settings) so the untrusted cloned repo cannot alter it.
"""
import json

from aws_session import MAX_TAG_VALUE_LEN, build_session_tags

role_arn = os.environ.get("AGENT_SESSION_ROLE_ARN", "").strip()
tags = build_session_tags(config.user_id, config.repo_url, config.task_id)
if role_arn and tags:
try:
from bedrock_creds_helper import write_attribution_file

write_attribution_file(role_arn, tags)
except OSError as exc:
# Fail open: attribution is observability, not isolation. Bedrock
# still works on the compute role; we just lose tagged chargeback.
log("WARN", f"Bedrock attribution file not written ({exc}); spend will be untagged")

# Per-request metadata mirrors the STS tag values. Bedrock limits keys/values
# to 256 chars and records them under ``requestMetadata`` in invocation logs.
#
# Unlike the tenant-data tags (kept out of os.environ so untrusted repo
# subprocesses don't inherit them), this header MUST go on os.environ —
# Claude Code reads ANTHROPIC_CUSTOM_HEADERS from the process env. The
# exposure is acceptable: the values are the task's OWN {user_id, repo,
# task_id} (self-referential, non-secret), so a spawned subprocess learns
# only who it is already running for. json.dumps escapes newlines/quotes, so
# a crafted repo slug cannot inject an extra (newline-separated) header.
metadata = {t["Key"]: t["Value"][:MAX_TAG_VALUE_LEN] for t in tags}
if metadata:
os.environ["ANTHROPIC_CUSTOM_HEADERS"] = (
f"X-Amzn-Bedrock-Request-Metadata: {json.dumps(metadata, separators=(',', ':'))}"
)


def _setup_agent_env(config: TaskConfig) -> tuple[str | None, str | None]:
"""Configure process environment for the Claude Code CLI subprocess.

Expand All @@ -72,6 +124,8 @@ def _setup_agent_env(config: TaskConfig) -> tuple[str | None, str | None]:
os.environ["ANTHROPIC_MODEL"] = config.anthropic_model
os.environ["GITHUB_TOKEN"] = config.github_token
os.environ["GH_TOKEN"] = config.github_token

_setup_bedrock_cost_attribution(config)
# DO NOT set ANTHROPIC_LOG — any logging level causes the CLI to write to
# stderr, which fills the OS pipe buffer (64 KB) and deadlocks the
# single-threaded Node.js CLI process (blocked stderr write prevents stdout
Expand Down
Loading
Loading