diff --git a/cdk/src/constructs/ecs-agent-cluster.ts b/cdk/src/constructs/ecs-agent-cluster.ts index 1ccbf7c4..5795c00a 100644 --- a/cdk/src/constructs/ecs-agent-cluster.ts +++ b/cdk/src/constructs/ecs-agent-cluster.ts @@ -24,6 +24,7 @@ import * as ecr_assets from 'aws-cdk-lib/aws-ecr-assets'; import * as ecs from 'aws-cdk-lib/aws-ecs'; import * as iam from 'aws-cdk-lib/aws-iam'; import * as logs from 'aws-cdk-lib/aws-logs'; +import * as s3 from 'aws-cdk-lib/aws-s3'; import * as secretsmanager from 'aws-cdk-lib/aws-secretsmanager'; import { NagSuppressions } from 'cdk-nag'; import { Construct } from 'constructs'; @@ -38,6 +39,18 @@ export interface EcsAgentClusterProps { readonly githubTokenSecret: secretsmanager.ISecret; readonly memoryId?: string; + /** + * S3 bucket holding per-task ECS payloads (#502). The orchestrator writes the + * payload (incl. the large hydrated_context, which can't fit in the 8 KB + * RunTask containerOverrides limit) here and passes only an + * `AGENT_PAYLOAD_S3_URI` pointer; the container fetches it on boot. The task + * role gets **read-only** on this bucket — the container runs untrusted repo + * code, so it must not be able to delete payloads (the trusted orchestrator + * owns write + delete). When omitted (isolated construct tests / deployments + * that still pass the payload inline), no grant or env var is added. + */ + readonly payloadBucket?: s3.IBucket; + /** * Per-task SessionRole (#209). When provided, tenant-data DynamoDB access * (task/events tables) is NOT granted to the Fargate task role; instead the @@ -132,6 +145,10 @@ export class EcsAgentCluster extends Construct { LOG_GROUP_NAME: logGroup.logGroupName, GITHUB_TOKEN_SECRET_ARN: props.githubTokenSecret.secretArn, ...(props.memoryId && { MEMORY_ID: props.memoryId }), + // #502: the payload bucket name so the orchestrator-issued + // AGENT_PAYLOAD_S3_URI can be fetched. (The orchestrator sets the URI + // per-task via container override; this is informational parity.) + ...(props.payloadBucket && { ECS_PAYLOAD_BUCKET: props.payloadBucket.bucketName }), // Per-session IAM scoping (#209): when a SessionRole is wired, the // agent assumes it for tenant-data access (see aws_session.py). ...(props.agentSessionRole && { @@ -160,6 +177,15 @@ export class EcsAgentCluster extends Construct { // agent assumes the SessionRole — stays on the task role). props.githubTokenSecret.grantRead(taskRole); + // #502: read-only on the ECS payload bucket so the container can fetch its + // payload (AGENT_PAYLOAD_S3_URI) at boot. READ only — the container runs + // untrusted repo code, so it must not be able to write or delete payloads + // (the trusted orchestrator owns write + delete). Stays on the task role + // (read once at startup, before the agent assumes any SessionRole). + if (props.payloadBucket) { + props.payloadBucket.grantRead(taskRole); + } + // Bedrock model invocation — scoped to explicit foundation-model and // cross-region inference-profile ARNs (parity with the AgentCore runtime // grants in agent.ts), replacing the prior Resource: '*' wildcard. @@ -201,7 +227,7 @@ export class EcsAgentCluster extends Construct { NagSuppressions.addResourceSuppressions(this.taskDefinition, [ { id: 'AwsSolutions-IAM5', - reason: 'DynamoDB index/* wildcards from CDK grantReadWriteData (UserConcurrencyTable, and task tables only when no SessionRole is wired); Secrets Manager wildcards from CDK grantRead; CloudWatch Logs wildcards from CDK grantWrite. Bedrock InvokeModel is scoped to explicit model/inference-profile ARNs (no wildcard resource).', + reason: 'DynamoDB index/* wildcards from CDK grantReadWriteData (UserConcurrencyTable, and task tables only when no SessionRole is wired); Secrets Manager wildcards from CDK grantRead; CloudWatch Logs wildcards from CDK grantWrite; S3 object/* wildcard from CDK grantRead on the ECS payload bucket (read-only, scoped to that bucket — #502). Bedrock InvokeModel is scoped to explicit model/inference-profile ARNs (no wildcard resource).', }, { id: 'AwsSolutions-ECS2', diff --git a/cdk/src/constructs/ecs-payload-bucket.ts b/cdk/src/constructs/ecs-payload-bucket.ts new file mode 100644 index 00000000..62a8c64c --- /dev/null +++ b/cdk/src/constructs/ecs-payload-bucket.ts @@ -0,0 +1,117 @@ +/** + * MIT No Attribution + * + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +import { Duration, RemovalPolicy } from 'aws-cdk-lib'; +import * as s3 from 'aws-cdk-lib/aws-s3'; +import { Construct } from 'constructs'; + +/** + * Lifecycle expiry for ECS task payloads. The payload is consumed once, at + * container boot, and the orchestrator deletes it promptly in the ``finalize`` + * step. This 1-day rule is only a crash backstop: if the orchestrator dies + * before finalize (rare — it runs under durable execution), the object is still + * reaped within a day instead of lingering. Payloads carry the hydrated prompt + * context, so a tight TTL also keeps the blast radius of an accidental + * permission leak small. + */ +export const ECS_PAYLOAD_TTL_DAYS = 1; + +/** + * Object-key prefix for ECS task payloads. Key layout: + * ``/payload.json``. Each task writes a single object under its own + * task-id prefix; the orchestrator deletes it on terminal. + */ +export const ECS_PAYLOAD_OBJECT_KEY_PREFIX = ''; + +/** + * Properties for the EcsPayloadBucket construct. + */ +export interface EcsPayloadBucketProps { + /** + * Removal policy for the bucket. + * @default RemovalPolicy.DESTROY + */ + readonly removalPolicy?: RemovalPolicy; + + /** + * Whether to auto-delete objects when the bucket is removed (so ``cdk + * destroy`` does not need a manual bucket-empty first). Mirrors + * ``TraceArtifactsBucket`` / ``AttachmentsBucket``. Deploys CDK's + * ``Custom::S3AutoDeleteObjects`` Lambda with delete permissions on this + * bucket — acceptable here because the contents are ephemeral throwaway + * payloads. + * @default true + */ + readonly autoDeleteObjects?: boolean; +} + +/** + * S3 bucket for ECS task payloads (#502). + * + * The ECS compute strategy cannot pass the orchestrator payload (repo URL, + * prompt, and the large ``hydrated_context``) inline: a Fargate ``RunTask`` + * caps the entire ``containerOverrides`` blob at 8192 bytes, and the hydrated + * context routinely exceeds that, so the call is rejected with + * ``InvalidParameterException``. (AgentCore is unaffected — it passes the + * payload in the ``InvokeAgentRuntime`` request body, which has no comparable + * limit.) Instead, the orchestrator writes the payload to + * ``s3:////payload.json`` and passes only a small + * ``AGENT_PAYLOAD_S3_URI`` pointer in the override; the container fetches and + * parses it on boot. + * + * Dedicated (not co-tenant with attachments/traces) so the boundary is + * structural: the ECS task role gets S3 **read** here and nowhere else, the + * attachments feature can never collide with payload keys, and the tight + * 1-day TTL is whole-bucket rather than a prefix-scoped rule grafted onto a + * shared bucket. + * + * Security / hygiene (parity with TraceArtifactsBucket): + * - ``blockPublicAccess: BLOCK_ALL`` + ``enforceSSL: true`` — no public read, + * TLS-only transport. + * - ``encryption: S3_MANAGED`` — server-side encryption at rest. + * - 1-day lifecycle expiry — payloads are ephemeral (read once at boot, + * deleted by the orchestrator at finalize); this is the crash backstop. + */ +export class EcsPayloadBucket extends Construct { + /** The underlying S3 bucket. */ + public readonly bucket: s3.Bucket; + + constructor(scope: Construct, id: string, props: EcsPayloadBucketProps = {}) { + super(scope, id); + + this.bucket = new s3.Bucket(this, 'Bucket', { + blockPublicAccess: s3.BlockPublicAccess.BLOCK_ALL, + encryption: s3.BucketEncryption.S3_MANAGED, + enforceSSL: true, + lifecycleRules: [ + { + id: 'ecs-payload-ttl', + enabled: true, + expiration: Duration.days(ECS_PAYLOAD_TTL_DAYS), + // Reap incomplete multipart uploads after 1 day. Object expiration + // does not apply to in-flight MPUs (they are not objects yet), so a + // separate reaper keeps stale upload parts from lingering. + abortIncompleteMultipartUploadAfter: Duration.days(1), + }, + ], + removalPolicy: props.removalPolicy ?? RemovalPolicy.DESTROY, + autoDeleteObjects: props.autoDeleteObjects ?? true, + }); + } +} diff --git a/cdk/src/constructs/task-orchestrator.ts b/cdk/src/constructs/task-orchestrator.ts index b8b380ce..a638b369 100644 --- a/cdk/src/constructs/task-orchestrator.ts +++ b/cdk/src/constructs/task-orchestrator.ts @@ -159,6 +159,16 @@ export interface TaskOrchestratorProps { readonly executionRoleArn: string; }; + /** + * S3 bucket for per-task ECS payloads (#502). When provided (alongside + * ``ecsConfig``), the orchestrator writes the payload here and passes only an + * ``AGENT_PAYLOAD_S3_URI`` pointer in the RunTask override (the full payload + * exceeds the 8 KB containerOverrides limit), then deletes the object in the + * finalize step. The orchestrator gets write + delete; the ECS task role gets + * read-only (granted on the bucket by ``EcsAgentCluster``). + */ + readonly ecsPayloadBucket?: s3.IBucket; + /** * S3 bucket for task attachments. When provided, the orchestrator gets * ReadWrite grants for URL fetch/screen/upload during hydration. @@ -220,6 +230,7 @@ export class TaskOrchestrator extends Construct { '@aws-sdk/client-ecs', '@aws-sdk/client-lambda', '@aws-sdk/client-bedrock-runtime', + '@aws-sdk/client-s3', '@aws-sdk/client-secrets-manager', '@aws-sdk/lib-dynamodb', '@aws-sdk/util-dynamodb', @@ -262,6 +273,9 @@ export class TaskOrchestrator extends Construct { ECS_SECURITY_GROUP: props.ecsConfig.securityGroup, ECS_CONTAINER_NAME: props.ecsConfig.containerName, }), + // #502: bucket the orchestrator writes the ECS payload to (and deletes + // from at finalize); the ECS strategy reads this to build the S3 URI. + ...(props.ecsPayloadBucket && { ECS_PAYLOAD_BUCKET: props.ecsPayloadBucket.bucketName }), ...(props.attachmentsBucket && { ATTACHMENTS_BUCKET_NAME: props.attachmentsBucket.bucketName }), }, bundling: orchestratorBundling, @@ -280,6 +294,15 @@ export class TaskOrchestrator extends Construct { props.attachmentsBucket.grantReadWrite(this.fn); } + // #502: ECS payload bucket — the orchestrator writes the payload before + // RunTask and deletes it at finalize. Write + delete only (it never reads + // its own payload back; the ECS container is the reader, with its own + // read-only grant from EcsAgentCluster). + if (props.ecsPayloadBucket) { + props.ecsPayloadBucket.grantPut(this.fn); + props.ecsPayloadBucket.grantDelete(this.fn); + } + // Durable execution managed policy this.fn.role!.addManagedPolicy( iam.ManagedPolicy.fromAwsManagedPolicyName('service-role/AWSLambdaBasicDurableExecutionRolePolicy'), diff --git a/cdk/src/handlers/orchestrate-task.ts b/cdk/src/handlers/orchestrate-task.ts index 2e7be6de..d547c990 100644 --- a/cdk/src/handlers/orchestrate-task.ts +++ b/cdk/src/handlers/orchestrate-task.ts @@ -36,6 +36,7 @@ import { type PollState, } from './shared/orchestrator'; import { runPreflightChecks } from './shared/preflight'; +import { deleteEcsPayload } from './shared/strategies/ecs-strategy'; import type { TaskRecord } from './shared/types'; import { workflowIsReadOnly, workflowRequiresRepo } from './shared/workflows'; @@ -297,6 +298,14 @@ const durableHandler: DurableExecutionHandler = asyn // Step 6: Finalize — update terminal status, emit events, release concurrency await context.step('finalize', async () => { await finalizeTask(taskId, finalPollState, task.user_id); + // #502: the task is terminal — the container has long since read its + // payload, so delete the ephemeral S3 payload object now. Best-effort + // (deleteEcsPayload swallows errors) and a no-op for AgentCore tasks / + // deployments without a payload bucket; the bucket's 1-day lifecycle rule + // is the backstop if this delete or the whole step never runs. + if (blueprintConfig.compute_type === 'ecs') { + await deleteEcsPayload(taskId); + } }); }; diff --git a/cdk/src/handlers/shared/strategies/ecs-strategy.ts b/cdk/src/handlers/shared/strategies/ecs-strategy.ts index 4ad3d0cb..df70e8a2 100644 --- a/cdk/src/handlers/shared/strategies/ecs-strategy.ts +++ b/cdk/src/handlers/shared/strategies/ecs-strategy.ts @@ -18,6 +18,7 @@ */ import { ECSClient, RunTaskCommand, DescribeTasksCommand, StopTaskCommand } from '@aws-sdk/client-ecs'; +import { S3Client, PutObjectCommand, DeleteObjectCommand } from '@aws-sdk/client-s3'; import type { ComputeStrategy, SessionHandle, SessionStatus } from '../compute-strategy'; import { logger } from '../logger'; import type { BlueprintConfig } from '../repo-config'; @@ -30,11 +31,60 @@ function getClient(): ECSClient { return sharedClient; } +let sharedS3Client: S3Client | undefined; +function getS3Client(): S3Client { + if (!sharedS3Client) { + sharedS3Client = new S3Client({}); + } + return sharedS3Client; +} + const ECS_CLUSTER_ARN = process.env.ECS_CLUSTER_ARN; const ECS_TASK_DEFINITION_ARN = process.env.ECS_TASK_DEFINITION_ARN; const ECS_SUBNETS = process.env.ECS_SUBNETS; const ECS_SECURITY_GROUP = process.env.ECS_SECURITY_GROUP; const ECS_CONTAINER_NAME = process.env.ECS_CONTAINER_NAME ?? 'AgentContainer'; +const ECS_PAYLOAD_BUCKET = process.env.ECS_PAYLOAD_BUCKET; + +/** + * Inline-payload size (bytes) above which we warn that RunTask will likely + * reject the call when no payload bucket is configured. ECS caps the TOTAL + * containerOverrides blob at 8192 bytes; the other env vars + command consume + * some of that, so 6 KB of payload is the practical danger line (#502). + */ +const INLINE_PAYLOAD_WARN_BYTES = 6144; + +/** + * S3 object key for a task's ECS payload. One object per task under its own + * task-id prefix; deleted by the orchestrator at finalize (see + * ``deleteEcsPayload``), with the bucket's 1-day lifecycle rule as a backstop. + */ +export function ecsPayloadKey(taskId: string): string { + return `${taskId}/payload.json`; +} + +/** + * Delete a task's ECS payload object. Best-effort: a failed delete must never + * fail the task — the bucket's 1-day lifecycle rule reaps it regardless. Called + * from the orchestrator's ``finalize`` step once the task is terminal. No-ops + * when the payload bucket isn't configured (AgentCore-only deployments). + */ +export async function deleteEcsPayload(taskId: string): Promise { + if (!ECS_PAYLOAD_BUCKET) return; + try { + await getS3Client().send(new DeleteObjectCommand({ + Bucket: ECS_PAYLOAD_BUCKET, + Key: ecsPayloadKey(taskId), + })); + logger.info('Deleted ECS payload object', { task_id: taskId }); + } catch (err) { + // Non-fatal — the lifecycle rule is the backstop. + logger.warn('Failed to delete ECS payload object (non-fatal)', { + task_id: taskId, + error: err instanceof Error ? err.message : String(err), + }); + } +} export class EcsComputeStrategy implements ComputeStrategy { readonly type = 'ecs'; @@ -63,6 +113,37 @@ export class EcsComputeStrategy implements ComputeStrategy { // This avoids the server entirely and runs the agent in batch mode. const payloadJson = JSON.stringify(payload); + // #502: the payload (esp. hydrated_context) routinely exceeds the 8192-byte + // cap that ECS RunTask enforces on the TOTAL containerOverrides blob, which + // rejected the call with InvalidParameterException. Write the payload to S3 + // and pass only a small pointer (AGENT_PAYLOAD_S3_URI); the container fetches + // it on boot. The inline AGENT_PAYLOAD remains as a fallback for small + // payloads / deployments without a payload bucket configured. + let payloadS3Uri: string | undefined; + if (ECS_PAYLOAD_BUCKET) { + const key = ecsPayloadKey(taskId); + await getS3Client().send(new PutObjectCommand({ + Bucket: ECS_PAYLOAD_BUCKET, + Key: key, + Body: payloadJson, + ContentType: 'application/json', + })); + payloadS3Uri = `s3://${ECS_PAYLOAD_BUCKET}/${key}`; + logger.info('Wrote ECS payload to S3', { + task_id: taskId, + bytes: payloadJson.length, + uri: payloadS3Uri, + }); + } else if (payloadJson.length > INLINE_PAYLOAD_WARN_BYTES) { + // No bucket configured AND the payload is large enough that the inline + // path will almost certainly blow the 8192-byte overrides cap. Surface a + // clear cause rather than a raw InvalidParameterException from RunTask. + logger.warn('ECS payload is large but ECS_PAYLOAD_BUCKET is not set — RunTask may reject it (see #502)', { + task_id: taskId, + bytes: payloadJson.length, + }); + } + const containerEnv = [ { name: 'TASK_ID', value: taskId }, { name: 'REPO_URL', value: String(payload.repo_url ?? '') }, @@ -73,9 +154,12 @@ export class EcsComputeStrategy implements ComputeStrategy { ...(blueprintConfig.model_id ? [{ name: 'ANTHROPIC_MODEL', value: blueprintConfig.model_id }] : []), ...(blueprintConfig.system_prompt_overrides ? [{ name: 'SYSTEM_PROMPT_OVERRIDES', value: blueprintConfig.system_prompt_overrides }] : []), { name: 'CLAUDE_CODE_USE_BEDROCK', value: '1' }, - // Full orchestrator payload as JSON — the Python wrapper reads this to - // call run_task() with all fields including hydrated_context. - { name: 'AGENT_PAYLOAD', value: payloadJson }, + // #502: prefer the S3 pointer; fall back to the inline payload when no + // bucket is configured (keeps small-payload / AgentCore-only deployments + // working with no behavior change). + ...(payloadS3Uri + ? [{ name: 'AGENT_PAYLOAD_S3_URI', value: payloadS3Uri }] + : [{ name: 'AGENT_PAYLOAD', value: payloadJson }]), ...(payload.github_token_secret_arn ? [{ name: 'GITHUB_TOKEN_SECRET_ARN', value: String(payload.github_token_secret_arn) }] : []), @@ -83,16 +167,22 @@ export class EcsComputeStrategy implements ComputeStrategy { ]; // Override the container command to run a Python one-liner that: - // 1. Reads the AGENT_PAYLOAD env var (full orchestrator payload JSON) - // 2. Calls entrypoint.run_task() directly with all fields - // 3. Exits with code 0 on success, 1 on failure + // 1. Loads the payload — from S3 (AGENT_PAYLOAD_S3_URI) when set, else the + // inline AGENT_PAYLOAD env var (fallback). + // 2. Calls entrypoint.run_task() directly with all fields. + // 3. Exits with code 0 on success, 1 on failure. // This bypasses the uvicorn server entirely — no HTTP, no OTEL noise. const bootCommand = [ 'python', '-c', 'import json, os, sys; ' + 'sys.path.insert(0, "/app/src"); ' + 'from entrypoint import run_task; ' - + 'p = json.loads(os.environ["AGENT_PAYLOAD"]); ' + + '_uri = os.environ.get("AGENT_PAYLOAD_S3_URI"); ' + + 'p = (' + + 'json.loads(__import__("boto3").client("s3").get_object(' + + 'Bucket=_uri.split("/",3)[2], Key=_uri.split("/",3)[3])["Body"].read()) ' + + 'if _uri else json.loads(os.environ["AGENT_PAYLOAD"])' + + '); ' + 'r = run_task(' + 'repo_url=p.get("repo_url",""), ' + 'task_description=p.get("prompt",""), ' diff --git a/cdk/src/stacks/agent.ts b/cdk/src/stacks/agent.ts index 192496d3..95bc3191 100644 --- a/cdk/src/stacks/agent.ts +++ b/cdk/src/stacks/agent.ts @@ -40,6 +40,7 @@ import { CedarWasmLayer } from '../constructs/cedar-wasm-layer'; import { ConcurrencyReconciler } from '../constructs/concurrency-reconciler'; import { DnsFirewall } from '../constructs/dns-firewall'; // import { EcsAgentCluster } from '../constructs/ecs-agent-cluster'; +// import { EcsPayloadBucket } from '../constructs/ecs-payload-bucket'; import { FanOutConsumer } from '../constructs/fanout-consumer'; import { GitHubScreenshotIntegration } from '../constructs/github-screenshot-integration'; import { JiraIntegration } from '../constructs/jira-integration'; @@ -594,6 +595,11 @@ export class AgentStack extends Stack { // platform: ecr_assets.Platform.LINUX_ARM64, // }); // + // // #502: ephemeral bucket for ECS task payloads — the orchestrator writes + // // the payload here (it exceeds the 8 KB RunTask containerOverrides limit) + // // and passes only an S3 URI pointer; the container fetches it on boot. + // const ecsPayloadBucket = new EcsPayloadBucket(this, 'EcsPayloadBucket'); + // // const ecsCluster = new EcsAgentCluster(this, 'EcsAgentCluster', { // vpc: agentVpc.vpc, // agentImageAsset, @@ -602,6 +608,8 @@ export class AgentStack extends Stack { // userConcurrencyTable: userConcurrencyTable.table, // githubTokenSecret, // memoryId: agentMemory.memory.memoryId, + // // #502: read-only grant so the container can fetch its payload. + // payloadBucket: ecsPayloadBucket.bucket, // // Per-session IAM scoping (#209): the ECS task role assumes the same // // SessionRole as the AgentCore runtime for tenant-data access. The // // construct admits the task role to the trust and injects @@ -631,6 +639,9 @@ export class AgentStack extends Stack { // taskRoleArn: ecsCluster.taskRoleArn, // executionRoleArn: ecsCluster.executionRoleArn, // }, + // // #502: pass the payload bucket so the orchestrator writes/deletes the + // // out-of-band payload and the ECS strategy builds the S3 URI pointer. + // ecsPayloadBucket: ecsPayloadBucket.bucket, }); // Now that the orchestrator exists, resolve the Lazy used by TaskApi at synth. diff --git a/cdk/test/constructs/ecs-agent-cluster.test.ts b/cdk/test/constructs/ecs-agent-cluster.test.ts index a6164a56..2d351644 100644 --- a/cdk/test/constructs/ecs-agent-cluster.test.ts +++ b/cdk/test/constructs/ecs-agent-cluster.test.ts @@ -310,3 +310,78 @@ describe('EcsAgentCluster construct', () => { }); }); }); + +describe('EcsAgentCluster payload bucket (#502)', () => { + function createWithPayloadBucket(): Template { + const app = new App(); + const stack = new Stack(app, 'TestStack'); + const vpc = new ec2.Vpc(stack, 'Vpc', { maxAzs: 2 }); + const agentImageAsset = new ecr_assets.DockerImageAsset(stack, 'AgentImage', { + directory: path.join(__dirname, '..', '..', '..', 'agent'), + }); + const taskTable = new dynamodb.Table(stack, 'TaskTable', { + partitionKey: { name: 'task_id', type: dynamodb.AttributeType.STRING }, + }); + const taskEventsTable = new dynamodb.Table(stack, 'TaskEventsTable', { + partitionKey: { name: 'task_id', type: dynamodb.AttributeType.STRING }, + sortKey: { name: 'event_id', type: dynamodb.AttributeType.STRING }, + }); + const userConcurrencyTable = new dynamodb.Table(stack, 'UserConcurrencyTable', { + partitionKey: { name: 'user_id', type: dynamodb.AttributeType.STRING }, + }); + const githubTokenSecret = new secretsmanager.Secret(stack, 'GitHubTokenSecret'); + const payloadBucket = new s3.Bucket(stack, 'PayloadBucket'); + + new EcsAgentCluster(stack, 'EcsAgentCluster', { + vpc, + agentImageAsset, + taskTable, + taskEventsTable, + userConcurrencyTable, + githubTokenSecret, + payloadBucket, + }); + return Template.fromStack(stack); + } + + test('injects ECS_PAYLOAD_BUCKET into the container env', () => { + createWithPayloadBucket().hasResourceProperties('AWS::ECS::TaskDefinition', { + ContainerDefinitions: Match.arrayWith([ + Match.objectLike({ + Environment: Match.arrayWith([ + Match.objectLike({ Name: 'ECS_PAYLOAD_BUCKET', Value: Match.anyValue() }), + ]), + }), + ]), + }); + }); + + test('grants the task role READ on the payload bucket, never write/delete', () => { + const template = createWithPayloadBucket(); + const policies = template.findResources('AWS::IAM::Policy'); + const s3Actions = new Set(); + for (const policy of Object.values(policies)) { + for (const stmt of policy.Properties.PolicyDocument.Statement) { + const actions = Array.isArray(stmt.Action) ? stmt.Action : [stmt.Action]; + for (const a of actions) { + if (typeof a === 'string' && a.startsWith('s3:')) s3Actions.add(a); + } + } + } + // Read actions present... + expect([...s3Actions].some(a => a === 's3:GetObject' || a === 's3:GetObject*')).toBe(true); + // ...and NO write/delete on the payload bucket from the task role. + expect(s3Actions.has('s3:PutObject')).toBe(false); + expect(s3Actions.has('s3:DeleteObject')).toBe(false); + expect([...s3Actions].some(a => a.startsWith('s3:Put') || a.startsWith('s3:Delete'))).toBe(false); + }); + + test('omits ECS_PAYLOAD_BUCKET when no payload bucket is provided', () => { + const { template } = createStack(); + const taskDefs = template.findResources('AWS::ECS::TaskDefinition'); + for (const def of Object.values(taskDefs)) { + const env = def.Properties.ContainerDefinitions[0].Environment ?? []; + expect(env.some((e: { Name: string }) => e.Name === 'ECS_PAYLOAD_BUCKET')).toBe(false); + } + }); +}); diff --git a/cdk/test/constructs/ecs-payload-bucket.test.ts b/cdk/test/constructs/ecs-payload-bucket.test.ts new file mode 100644 index 00000000..acf96bac --- /dev/null +++ b/cdk/test/constructs/ecs-payload-bucket.test.ts @@ -0,0 +1,140 @@ +/** + * MIT No Attribution + * + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +import { App, RemovalPolicy, Stack } from 'aws-cdk-lib'; +import { Match, Template } from 'aws-cdk-lib/assertions'; +import { ECS_PAYLOAD_TTL_DAYS, EcsPayloadBucket } from '../../src/constructs/ecs-payload-bucket'; + +describe('EcsPayloadBucket', () => { + let template: Template; + + beforeEach(() => { + const app = new App(); + const stack = new Stack(app, 'TestStack'); + new EcsPayloadBucket(stack, 'EcsPayloadBucket'); + template = Template.fromStack(stack); + }); + + test('creates an S3 bucket with all public access blocked', () => { + template.hasResourceProperties('AWS::S3::Bucket', { + PublicAccessBlockConfiguration: { + BlockPublicAcls: true, + BlockPublicPolicy: true, + IgnorePublicAcls: true, + RestrictPublicBuckets: true, + }, + }); + }); + + test('enables S3-managed server-side encryption', () => { + template.hasResourceProperties('AWS::S3::Bucket', { + BucketEncryption: { + ServerSideEncryptionConfiguration: [ + { ServerSideEncryptionByDefault: { SSEAlgorithm: 'AES256' } }, + ], + }, + }); + }); + + test('attaches a bucket policy enforcing TLS-only access', () => { + template.hasResourceProperties('AWS::S3::BucketPolicy', { + PolicyDocument: { + Statement: Match.arrayWith([ + Match.objectLike({ + Effect: 'Deny', + Action: 's3:*', + Condition: { Bool: { 'aws:SecureTransport': 'false' } }, + }), + ]), + }, + }); + }); + + test('configures a 1-day expiration lifecycle rule (payloads are ephemeral)', () => { + template.hasResourceProperties('AWS::S3::Bucket', { + LifecycleConfiguration: { + Rules: Match.arrayWith([ + Match.objectLike({ + Id: 'ecs-payload-ttl', + Status: 'Enabled', + ExpirationInDays: ECS_PAYLOAD_TTL_DAYS, + }), + ]), + }, + }); + }); + + test('aborts incomplete multipart uploads within a day', () => { + template.hasResourceProperties('AWS::S3::Bucket', { + LifecycleConfiguration: { + Rules: Match.arrayWith([ + Match.objectLike({ + AbortIncompleteMultipartUpload: { DaysAfterInitiation: 1 }, + }), + ]), + }, + }); + }); + + test('sets DESTROY removal policy by default', () => { + template.hasResource('AWS::S3::Bucket', { + DeletionPolicy: 'Delete', + UpdateReplacePolicy: 'Delete', + }); + }); + + test('enables autoDeleteObjects by default (matches TraceArtifactsBucket / AttachmentsBucket)', () => { + template.hasResourceProperties('Custom::S3AutoDeleteObjects', { + BucketName: Match.anyValue(), + }); + }); + + test('exposes a bucket handle via the `bucket` property', () => { + const app = new App(); + const stack = new Stack(app, 'TestStack'); + const payload = new EcsPayloadBucket(stack, 'EcsPayloadBucket'); + expect(payload.bucket).toBeDefined(); + expect(payload.bucket.bucketName).toBeDefined(); + }); +}); + +describe('EcsPayloadBucket with custom props', () => { + test('accepts custom removal policy and disables autoDelete', () => { + const app = new App(); + const stack = new Stack(app, 'TestStack'); + new EcsPayloadBucket(stack, 'EcsPayloadBucket', { + removalPolicy: RemovalPolicy.RETAIN, + autoDeleteObjects: false, + }); + const template = Template.fromStack(stack); + + template.hasResource('AWS::S3::Bucket', { + DeletionPolicy: 'Retain', + UpdateReplacePolicy: 'Retain', + }); + const customResources = template.findResources('Custom::S3AutoDeleteObjects'); + expect(Object.keys(customResources)).toHaveLength(0); + }); +}); + +describe('EcsPayloadBucket exported constants', () => { + test('TTL is 1 day (ephemeral payload, deleted promptly at finalize)', () => { + expect(ECS_PAYLOAD_TTL_DAYS).toBe(1); + }); +}); diff --git a/cdk/test/handlers/shared/strategies/ecs-strategy.test.ts b/cdk/test/handlers/shared/strategies/ecs-strategy.test.ts index 44748370..bee9c41a 100644 --- a/cdk/test/handlers/shared/strategies/ecs-strategy.test.ts +++ b/cdk/test/handlers/shared/strategies/ecs-strategy.test.ts @@ -36,6 +36,13 @@ jest.mock('@aws-sdk/client-ecs', () => ({ StopTaskCommand: jest.fn((input: unknown) => ({ _type: 'StopTask', input })), })); +const mockS3Send = jest.fn(); +jest.mock('@aws-sdk/client-s3', () => ({ + S3Client: jest.fn(() => ({ send: mockS3Send })), + PutObjectCommand: jest.fn((input: unknown) => ({ _type: 'PutObject', input })), + DeleteObjectCommand: jest.fn((input: unknown) => ({ _type: 'DeleteObject', input })), +})); + import { EcsComputeStrategy } from '../../../../src/handlers/shared/strategies/ecs-strategy'; beforeEach(() => { @@ -88,12 +95,15 @@ describe('EcsComputeStrategy', () => { { name: 'CLAUDE_CODE_USE_BEDROCK', value: '1' }, ])); - // AGENT_PAYLOAD contains the full orchestrator payload for direct run_task() invocation + // No ECS_PAYLOAD_BUCKET in this module's env → inline fallback (#502): the + // full payload rides in AGENT_PAYLOAD and nothing is written to S3. const agentPayload = envVars.find((e: { name: string }) => e.name === 'AGENT_PAYLOAD'); expect(agentPayload).toBeDefined(); const parsed = JSON.parse(agentPayload.value); expect(parsed.repo_url).toBe('org/repo'); expect(parsed.prompt).toBe('Fix the bug'); + expect(envVars.find((e: { name: string }) => e.name === 'AGENT_PAYLOAD_S3_URI')).toBeUndefined(); + expect(mockS3Send).not.toHaveBeenCalled(); // Container command override — runs Python directly instead of uvicorn expect(override.command).toBeDefined(); @@ -336,3 +346,110 @@ describe('EcsComputeStrategy', () => { }); }); }); + +// #502: the S3-pointer path requires ECS_PAYLOAD_BUCKET to be set BEFORE the +// module is imported (it's a module-level constant). Re-import under +// jest.isolateModules with the env var set so these tests don't perturb the +// inline-fallback tests above. +describe('EcsComputeStrategy with ECS_PAYLOAD_BUCKET (S3-pointer path, #502)', () => { + const PAYLOAD_BUCKET = 'test-ecs-payload-bucket'; + + function loadStrategyWithBucket(): { + EcsComputeStrategy: typeof import('../../../../src/handlers/shared/strategies/ecs-strategy').EcsComputeStrategy; + deleteEcsPayload: typeof import('../../../../src/handlers/shared/strategies/ecs-strategy').deleteEcsPayload; + ecsPayloadKey: typeof import('../../../../src/handlers/shared/strategies/ecs-strategy').ecsPayloadKey; + } { + let mod!: ReturnType; + jest.isolateModules(() => { + process.env.ECS_PAYLOAD_BUCKET = PAYLOAD_BUCKET; + process.env.ECS_CLUSTER_ARN = CLUSTER_ARN; + process.env.ECS_TASK_DEFINITION_ARN = TASK_DEF_ARN; + process.env.ECS_SUBNETS = 'subnet-aaa,subnet-bbb'; + process.env.ECS_SECURITY_GROUP = 'sg-12345'; + process.env.ECS_CONTAINER_NAME = 'AgentContainer'; + // eslint-disable-next-line @typescript-eslint/no-require-imports + mod = require('../../../../src/handlers/shared/strategies/ecs-strategy'); + }); + return mod; + } + + afterEach(() => { + delete process.env.ECS_PAYLOAD_BUCKET; + }); + + test('writes payload to S3 and passes AGENT_PAYLOAD_S3_URI, not the inline blob', async () => { + mockS3Send.mockResolvedValueOnce({}); + mockSend.mockResolvedValueOnce({ tasks: [{ taskArn: TASK_ARN }] }); + + const { EcsComputeStrategy: Strategy } = loadStrategyWithBucket(); + const strategy = new Strategy(); + await strategy.startSession({ + taskId: 'TASK001', + userId: 'cognito-test', + payload: { repo_url: 'org/repo', prompt: 'Fix the bug', hydrated_context: { big: 'x'.repeat(10000) } }, + blueprintConfig: { compute_type: 'ecs', runtime_arn: '' }, + }); + + // PutObject to the payload bucket at /payload.json + expect(mockS3Send).toHaveBeenCalledTimes(1); + const put = mockS3Send.mock.calls[0][0]; + expect(put._type).toBe('PutObject'); + expect(put.input.Bucket).toBe(PAYLOAD_BUCKET); + expect(put.input.Key).toBe('TASK001/payload.json'); + expect(JSON.parse(put.input.Body).repo_url).toBe('org/repo'); + + // Override carries the URI pointer, NOT the inline payload + const envVars = mockSend.mock.calls[0][0].input.overrides.containerOverrides[0].environment; + const uri = envVars.find((e: { name: string }) => e.name === 'AGENT_PAYLOAD_S3_URI'); + expect(uri.value).toBe(`s3://${PAYLOAD_BUCKET}/TASK001/payload.json`); + expect(envVars.find((e: { name: string }) => e.name === 'AGENT_PAYLOAD')).toBeUndefined(); + }); + + test('boot command loads payload from S3 when the URI is set, else inline', async () => { + mockS3Send.mockResolvedValueOnce({}); + mockSend.mockResolvedValueOnce({ tasks: [{ taskArn: TASK_ARN }] }); + + const { EcsComputeStrategy: Strategy } = loadStrategyWithBucket(); + await new Strategy().startSession({ + taskId: 'TASK001', + userId: 'cognito-test', + payload: { repo_url: 'org/repo' }, + blueprintConfig: { compute_type: 'ecs', runtime_arn: '' }, + }); + + const cmd = mockSend.mock.calls[0][0].input.overrides.containerOverrides[0].command; + const src = cmd[2]; + // Reads the URI, fetches via boto3 S3 when set, falls back to inline env. + expect(src).toContain('AGENT_PAYLOAD_S3_URI'); + expect(src).toContain('get_object'); + expect(src).toContain('AGENT_PAYLOAD'); + }); + + test('deleteEcsPayload deletes the task payload object', async () => { + mockS3Send.mockResolvedValueOnce({}); + const { deleteEcsPayload, ecsPayloadKey } = loadStrategyWithBucket(); + await deleteEcsPayload('TASK001'); + expect(mockS3Send).toHaveBeenCalledTimes(1); + const del = mockS3Send.mock.calls[0][0]; + expect(del._type).toBe('DeleteObject'); + expect(del.input.Bucket).toBe(PAYLOAD_BUCKET); + expect(del.input.Key).toBe(ecsPayloadKey('TASK001')); + expect(ecsPayloadKey('TASK001')).toBe('TASK001/payload.json'); + }); + + test('deleteEcsPayload swallows S3 errors (best-effort — lifecycle is the backstop)', async () => { + mockS3Send.mockRejectedValueOnce(new Error('AccessDenied')); + const { deleteEcsPayload } = loadStrategyWithBucket(); + await expect(deleteEcsPayload('TASK001')).resolves.toBeUndefined(); + }); +}); + +describe('deleteEcsPayload without ECS_PAYLOAD_BUCKET', () => { + test('no-ops when no payload bucket is configured', async () => { + // The top-of-file import has no ECS_PAYLOAD_BUCKET set. + // eslint-disable-next-line @typescript-eslint/no-require-imports + const { deleteEcsPayload } = require('../../../../src/handlers/shared/strategies/ecs-strategy'); + await expect(deleteEcsPayload('TASK001')).resolves.toBeUndefined(); + expect(mockS3Send).not.toHaveBeenCalled(); + }); +});