From 0bbbf65c7b02fd1abe1e3fba8cc78e2f98385cf6 Mon Sep 17 00:00:00 2001 From: Garrett Beatty Date: Wed, 10 Jun 2026 14:26:55 -0400 Subject: [PATCH 1/6] cancellation token --- .../110c67f8-15d8-44b5-beae-6f1223933c27.json | 11 + .../CallbackException.cs | 4 +- .../ChildContextConfig.cs | 2 +- .../DurableContext.cs | 55 ++-- .../DurableFunction.cs | 3 +- .../IDurableContext.cs | 72 +++-- .../IWaitForCallbackContext.cs | 2 +- .../Internal/ChildContextOperation.cs | 20 +- .../Internal/StepOperation.cs | 28 +- .../Internal/WaitForConditionOperation.cs | 20 +- .../Internal/WorkflowCancellation.cs | 46 +++ .../Amazon.Lambda.DurableExecution/README.md | 7 +- .../WaitForCallbackConfig.cs | 2 +- .../docs/core/callbacks.md | 12 +- .../docs/core/child-contexts.md | 14 +- .../docs/core/steps.md | 18 +- .../docs/core/wait-for-condition.md | 8 +- .../CallbackOperationTests.cs | 4 +- .../ChildContextOperationTests.cs | 44 +-- .../DurableContextTests.cs | 86 +++--- .../DurableFunctionTests.cs | 12 +- .../InvokeOperationTests.cs | 16 +- .../WaitForCallbackTests.cs | 28 +- .../WaitForConditionOperationTests.cs | 62 ++-- .../WorkflowCancellationTests.cs | 267 ++++++++++++++++++ 25 files changed, 627 insertions(+), 216 deletions(-) create mode 100644 .autover/changes/110c67f8-15d8-44b5-beae-6f1223933c27.json create mode 100644 Libraries/src/Amazon.Lambda.DurableExecution/Internal/WorkflowCancellation.cs create mode 100644 Libraries/test/Amazon.Lambda.DurableExecution.Tests/WorkflowCancellationTests.cs diff --git a/.autover/changes/110c67f8-15d8-44b5-beae-6f1223933c27.json b/.autover/changes/110c67f8-15d8-44b5-beae-6f1223933c27.json new file mode 100644 index 000000000..292e95fa0 --- /dev/null +++ b/.autover/changes/110c67f8-15d8-44b5-beae-6f1223933c27.json @@ -0,0 +1,11 @@ +{ + "Projects": [ + { + "Name": "Amazon.Lambda.DurableExecution", + "Type": "Patch", + "ChangelogMessages": [ + "Thread CancellationToken into every user Func accepted by IDurableContext (StepAsync, RunInChildContextAsync, WaitForCallbackAsync, WaitForConditionAsync). The token links the caller-supplied cancellation token with an SDK-owned workflow-shutdown signal so user step bodies unwind cleanly when the workflow is being torn down. Cancellation via the linked token is not checkpointed; user-thrown OperationCanceledException unrelated to the linked token continues to be treated as a normal step failure." + ] + } + ] +} \ No newline at end of file diff --git a/Libraries/src/Amazon.Lambda.DurableExecution/CallbackException.cs b/Libraries/src/Amazon.Lambda.DurableExecution/CallbackException.cs index 2d1244b2b..61fb4a4f6 100644 --- a/Libraries/src/Amazon.Lambda.DurableExecution/CallbackException.cs +++ b/Libraries/src/Amazon.Lambda.DurableExecution/CallbackException.cs @@ -7,7 +7,7 @@ namespace Amazon.Lambda.DurableExecution; /// Base exception type for callback failures surfaced from /// /// or -/// . +/// . /// Concrete subclasses distinguish failure modes — pattern-match /// , , /// or in catch clauses. @@ -71,7 +71,7 @@ public CallbackTimeoutException(string message, Exception innerException) : base /// /// Thrown only from -/// +/// /// when the user-supplied submitter delegate (the step that hands the callback /// ID to the external system) fails after retries are exhausted. Wraps the /// underlying as . diff --git a/Libraries/src/Amazon.Lambda.DurableExecution/ChildContextConfig.cs b/Libraries/src/Amazon.Lambda.DurableExecution/ChildContextConfig.cs index c97418a6a..c00adf909 100644 --- a/Libraries/src/Amazon.Lambda.DurableExecution/ChildContextConfig.cs +++ b/Libraries/src/Amazon.Lambda.DurableExecution/ChildContextConfig.cs @@ -9,7 +9,7 @@ namespace Amazon.Lambda.DurableExecution; /// /// A child context is a logical sub-workflow with its own deterministic /// operation-ID space, persisted as a CONTEXT operation. Use -/// +/// /// (and overloads) to run code inside one. /// public sealed class ChildContextConfig diff --git a/Libraries/src/Amazon.Lambda.DurableExecution/DurableContext.cs b/Libraries/src/Amazon.Lambda.DurableExecution/DurableContext.cs index e0f36720e..2a2ec3bb4 100644 --- a/Libraries/src/Amazon.Lambda.DurableExecution/DurableContext.cs +++ b/Libraries/src/Amazon.Lambda.DurableExecution/DurableContext.cs @@ -16,6 +16,7 @@ internal sealed class DurableContext : IDurableContext { private readonly ExecutionState _state; private readonly TerminationManager _terminationManager; + private readonly WorkflowCancellation _workflowCancellation; private readonly OperationIdGenerator _idGenerator; private readonly string _durableExecutionArn; private readonly CheckpointBatcher? _batcher; @@ -24,6 +25,7 @@ internal sealed class DurableContext : IDurableContext public DurableContext( ExecutionState state, TerminationManager terminationManager, + WorkflowCancellation workflowCancellation, OperationIdGenerator idGenerator, string durableExecutionArn, ILambdaContext lambdaContext, @@ -31,6 +33,7 @@ public DurableContext( { _state = state; _terminationManager = terminationManager; + _workflowCancellation = workflowCancellation; _idGenerator = idGenerator; _durableExecutionArn = durableExecutionArn; _batcher = batcher; @@ -55,14 +58,14 @@ public void ConfigureLogger(LoggerConfig config) } public Task StepAsync( - Func> func, + Func> func, string? name = null, StepConfig? config = null, CancellationToken cancellationToken = default) => RunStep(func, name, config, cancellationToken); public async Task StepAsync( - Func func, + Func func, string? name = null, StepConfig? config = null, CancellationToken cancellationToken = default) @@ -71,12 +74,12 @@ public async Task StepAsync( // step that always returns null. The serializer isn't actually invoked // with a non-null value, so any registered ILambdaSerializer suffices. await RunStep( - async (ctx) => { await func(ctx); return null; }, + async (ctx, ct) => { await func(ctx, ct); return null; }, name, config, cancellationToken); } private Task RunStep( - Func> func, + Func> func, string? name, StepConfig? config, CancellationToken cancellationToken) @@ -86,7 +89,7 @@ private Task RunStep( var operationId = _idGenerator.NextId(); var op = new StepOperation( operationId, name, _idGenerator.ParentId, func, config, serializer, Logger, - _state, _terminationManager, _durableExecutionArn, _batcher); + _state, _terminationManager, _workflowCancellation, _durableExecutionArn, _batcher); return op.ExecuteAsync(cancellationToken); } @@ -114,14 +117,14 @@ public Task WaitAsync( } public Task RunInChildContextAsync( - Func> func, + Func> func, string? name = null, ChildContextConfig? config = null, CancellationToken cancellationToken = default) => RunChildContext(func, name, config, cancellationToken); public async Task RunInChildContextAsync( - Func func, + Func func, string? name = null, ChildContextConfig? config = null, CancellationToken cancellationToken = default) @@ -130,12 +133,12 @@ public async Task RunInChildContextAsync( // returns null so the registered ILambdaSerializer is never asked to // serialize a real value. await RunChildContext( - async (ctx) => { await func(ctx); return null; }, + async (ctx, ct) => { await func(ctx, ct); return null; }, name, config, cancellationToken); } public Task WaitForConditionAsync( - Func> check, + Func> check, WaitForConditionConfig config, string? name = null, CancellationToken cancellationToken = default) @@ -148,12 +151,12 @@ public Task WaitForConditionAsync( var operationId = _idGenerator.NextId(); var op = new WaitForConditionOperation( operationId, name, _idGenerator.ParentId, check, config, serializer, Logger, - _state, _terminationManager, _durableExecutionArn, _batcher); + _state, _terminationManager, _workflowCancellation, _durableExecutionArn, _batcher); return op.ExecuteAsync(cancellationToken); } private Task RunChildContext( - Func> func, + Func> func, string? name, ChildContextConfig? config, CancellationToken cancellationToken) @@ -163,16 +166,16 @@ private Task RunChildContext( var operationId = _idGenerator.NextId(); // Capture this DurableContext's collaborators; the child shares state, - // termination, batcher, ARN, and Lambda context — but uses a child - // OperationIdGenerator so its operation IDs are deterministically - // namespaced under the parent op ID. + // termination, workflow cancellation, batcher, ARN, and Lambda context — + // but uses a child OperationIdGenerator so its operation IDs are + // deterministically namespaced under the parent op ID. IDurableContext ChildFactory(string parentOpId) => new DurableContext( - _state, _terminationManager, _idGenerator.CreateChild(parentOpId), + _state, _terminationManager, _workflowCancellation, _idGenerator.CreateChild(parentOpId), _durableExecutionArn, LambdaContext, _batcher); var op = new ChildContextOperation( operationId, name, _idGenerator.ParentId, func, config, serializer, ChildFactory, - _state, _terminationManager, _durableExecutionArn, _batcher); + _state, _terminationManager, _workflowCancellation, _durableExecutionArn, _batcher); return op.ExecuteAsync(cancellationToken); } @@ -197,7 +200,7 @@ private Task> RunCallback( } public Task WaitForCallbackAsync( - Func submitter, + Func submitter, string? name = null, WaitForCallbackConfig? config = null, CancellationToken cancellationToken = default) @@ -218,7 +221,7 @@ public Task WaitForCallbackAsync( /// /// private Task RunWaitForCallback( - Func submitter, + Func submitter, string? name, WaitForCallbackConfig? config, CancellationToken cancellationToken) @@ -239,26 +242,28 @@ private Task RunWaitForCallback( // Delegate to RunInChildContextAsync; the inner CreateCallbackAsync and // StepAsync calls each pull the registered ILambdaSerializer from // ILambdaContext.Serializer, so AOT and reflection-based scenarios share - // the same code path. + // the same code path. The token threaded into childCtx and the inner + // submitter step is the linked workflow+caller token forwarded by the + // child context machinery. return RunInChildContextAsync( - async childCtx => + async (childCtx, childToken) => { var callback = await childCtx.CreateCallbackAsync( name: callbackName, config: callbackConfig, - cancellationToken: cancellationToken); + cancellationToken: childToken); await childCtx.StepAsync( - async (stepCtx) => + async (stepCtx, stepToken) => { var submitterCtx = new WaitForCallbackContext(stepCtx.Logger); - await submitter(callback.CallbackId, submitterCtx); + await submitter(callback.CallbackId, submitterCtx, stepToken); }, name: submitterName, config: stepConfig, - cancellationToken: cancellationToken); + cancellationToken: childToken); - return await callback.GetResultAsync(cancellationToken); + return await callback.GetResultAsync(childToken); }, name, new ChildContextConfig diff --git a/Libraries/src/Amazon.Lambda.DurableExecution/DurableFunction.cs b/Libraries/src/Amazon.Lambda.DurableExecution/DurableFunction.cs index cb5a7a297..09fe15331 100644 --- a/Libraries/src/Amazon.Lambda.DurableExecution/DurableFunction.cs +++ b/Libraries/src/Amazon.Lambda.DurableExecution/DurableFunction.cs @@ -95,6 +95,7 @@ private static async Task WrapAsyncCore(invocationInput, serializer); var terminationManager = new TerminationManager(); + using var workflowCancellation = new WorkflowCancellation(terminationManager); var idGenerator = new OperationIdGenerator(); await using var batcher = new CheckpointBatcher( @@ -108,7 +109,7 @@ private static async Task WrapAsyncCore result; diff --git a/Libraries/src/Amazon.Lambda.DurableExecution/IDurableContext.cs b/Libraries/src/Amazon.Lambda.DurableExecution/IDurableContext.cs index 960090a99..356a3ffcd 100644 --- a/Libraries/src/Amazon.Lambda.DurableExecution/IDurableContext.cs +++ b/Libraries/src/Amazon.Lambda.DurableExecution/IDurableContext.cs @@ -54,7 +54,11 @@ public interface IDurableContext /// The type of the step's result. /// /// The step body to execute. Receives an exposing - /// the step's logger, attempt number, and operation ID. + /// the step's logger, attempt number, and operation ID, and a + /// linking the caller-supplied token with + /// the SDK's workflow-shutdown signal — pass it to cancellation-aware APIs + /// (HttpClient.SendAsync, Task.Delay, AWS SDK calls) so the + /// step body unwinds cleanly when the workflow is being torn down. /// /// /// An optional name for the step, used for observability and to derive the @@ -63,10 +67,13 @@ public interface IDurableContext /// /// Optional step configuration (e.g. retry policy). Defaults are used when null. /// - /// A token to observe for cancellation. + /// + /// A token to observe for cancellation. Linked with an SDK-owned workflow + /// shutdown source; the resulting token is forwarded to . + /// /// The deserialized result of the step. Task StepAsync( - Func> func, + Func> func, string? name = null, StepConfig? config = null, CancellationToken cancellationToken = default); @@ -76,7 +83,9 @@ Task StepAsync( /// /// /// The step body to execute. Receives an exposing - /// the step's logger, attempt number, and operation ID. + /// the step's logger, attempt number, and operation ID, and a + /// linking the caller-supplied token with + /// the SDK's workflow-shutdown signal. /// /// /// An optional name for the step, used for observability and to derive the @@ -85,9 +94,12 @@ Task StepAsync( /// /// Optional step configuration (e.g. retry policy). Defaults are used when null. /// - /// A token to observe for cancellation. + /// + /// A token to observe for cancellation. Linked with an SDK-owned workflow + /// shutdown source; the resulting token is forwarded to . + /// Task StepAsync( - Func func, + Func func, string? name = null, StepConfig? config = null, CancellationToken cancellationToken = default); @@ -129,7 +141,9 @@ Task WaitAsync( /// The type of the child context's result. /// /// The user function to run inside the child context. Receives a nested - /// with its own deterministic operation-ID space. + /// with its own deterministic operation-ID space, + /// and a linking the caller-supplied token with + /// the SDK's workflow-shutdown signal. /// /// /// An optional name for the child context, used for observability and to derive @@ -139,10 +153,13 @@ Task WaitAsync( /// Optional child context configuration (e.g. /// ). Defaults are used when null. /// - /// A token to observe for cancellation. + /// + /// A token to observe for cancellation. Linked with an SDK-owned workflow + /// shutdown source; the resulting token is forwarded to . + /// /// The deserialized result of the child context. Task RunInChildContextAsync( - Func> func, + Func> func, string? name = null, ChildContextConfig? config = null, CancellationToken cancellationToken = default); @@ -162,7 +179,9 @@ Task RunInChildContextAsync( /// /// /// The user function to run inside the child context. Receives a nested - /// with its own deterministic operation-ID space. + /// with its own deterministic operation-ID space, + /// and a linking the caller-supplied token with + /// the SDK's workflow-shutdown signal. /// /// /// An optional name for the child context, used for observability and to derive @@ -172,9 +191,12 @@ Task RunInChildContextAsync( /// Optional child context configuration (e.g. /// ). Defaults are used when null. /// - /// A token to observe for cancellation. + /// + /// A token to observe for cancellation. Linked with an SDK-owned workflow + /// shutdown source; the resulting token is forwarded to . + /// Task RunInChildContextAsync( - Func func, + Func func, string? name = null, ChildContextConfig? config = null, CancellationToken cancellationToken = default); @@ -222,7 +244,7 @@ Task> CreateCallbackAsync( /// (which hands the callbackId to an external system), and suspends /// until the external system delivers a result. Equivalent to manually /// composing - /// + + /// + /// + /// inside a child context. /// @@ -235,7 +257,9 @@ Task> CreateCallbackAsync( /// The type of the result the callback will deliver. /// /// A function that hands the service-allocated callbackId to the external - /// system. Receives the callback ID and an . + /// system. Receives the callback ID, an , + /// and a linking the caller-supplied token with + /// the SDK's workflow-shutdown signal. /// /// /// An optional name for the operation, used for observability and to derive the @@ -245,10 +269,13 @@ Task> CreateCallbackAsync( /// Optional configuration (e.g. submitter retry policy and callback timeout). /// Defaults are used when null. /// - /// A token to observe for cancellation. + /// + /// A token to observe for cancellation. Linked with an SDK-owned workflow + /// shutdown source; the resulting token is forwarded to . + /// /// The deserialized result delivered by the external system. Task WaitForCallbackAsync( - Func submitter, + Func submitter, string? name = null, WaitForCallbackConfig? config = null, CancellationToken cancellationToken = default); @@ -316,8 +343,10 @@ Task InvokeAsync( /// /// The condition check invoked on each poll. Receives the state returned by the /// previous invocation (seeded by - /// on the first call) - /// and an , and returns the next state. + /// on the first call), + /// an , and a + /// linking the caller-supplied token with the SDK's workflow-shutdown signal, + /// and returns the next state. /// /// /// The configuration controlling polling, including the @@ -327,10 +356,13 @@ Task InvokeAsync( /// An optional name for the operation, used for observability and to derive the /// deterministic operation ID. Defaults to a name inferred from the call site. /// - /// A token to observe for cancellation. + /// + /// A token to observe for cancellation. Linked with an SDK-owned workflow + /// shutdown source; the resulting token is forwarded to . + /// /// The final state observed when the strategy decides to stop. Task WaitForConditionAsync( - Func> check, + Func> check, WaitForConditionConfig config, string? name = null, CancellationToken cancellationToken = default); diff --git a/Libraries/src/Amazon.Lambda.DurableExecution/IWaitForCallbackContext.cs b/Libraries/src/Amazon.Lambda.DurableExecution/IWaitForCallbackContext.cs index 866fb3bab..d282e2f72 100644 --- a/Libraries/src/Amazon.Lambda.DurableExecution/IWaitForCallbackContext.cs +++ b/Libraries/src/Amazon.Lambda.DurableExecution/IWaitForCallbackContext.cs @@ -7,7 +7,7 @@ namespace Amazon.Lambda.DurableExecution; /// /// Context passed to the submitter delegate of -/// . +/// . /// Provides a replay-safe logger scoped to the submitter step. /// /// diff --git a/Libraries/src/Amazon.Lambda.DurableExecution/Internal/ChildContextOperation.cs b/Libraries/src/Amazon.Lambda.DurableExecution/Internal/ChildContextOperation.cs index a0abbf99e..4a25990fc 100644 --- a/Libraries/src/Amazon.Lambda.DurableExecution/Internal/ChildContextOperation.cs +++ b/Libraries/src/Amazon.Lambda.DurableExecution/Internal/ChildContextOperation.cs @@ -38,21 +38,23 @@ namespace Amazon.Lambda.DurableExecution.Internal; /// internal sealed class ChildContextOperation : DurableOperation { - private readonly Func> _func; + private readonly Func> _func; private readonly ChildContextConfig? _config; private readonly ILambdaSerializer _serializer; private readonly Func _childContextFactory; + private readonly WorkflowCancellation _workflowCancellation; public ChildContextOperation( string operationId, string? name, string? parentId, - Func> func, + Func> func, ChildContextConfig? config, ILambdaSerializer serializer, Func childContextFactory, ExecutionState state, TerminationManager termination, + WorkflowCancellation workflowCancellation, string durableExecutionArn, CheckpointBatcher? batcher = null) : base(operationId, name, parentId, state, termination, durableExecutionArn, batcher) @@ -61,6 +63,7 @@ public ChildContextOperation( _config = config; _serializer = serializer; _childContextFactory = childContextFactory; + _workflowCancellation = workflowCancellation; } protected override string OperationType => OperationTypes.Context; @@ -116,13 +119,22 @@ private async Task ExecuteFunc(CancellationToken cancellationToken) var childContext = _childContextFactory(OperationId); + // Link the caller's token with the workflow-shutdown token. The user + // func observes both signals; the SDK's checkpoint writes (CONTEXT + // FAIL / SUCCEED below) continue to use the caller's token only. + using var linked = CancellationTokenSource.CreateLinkedTokenSource( + cancellationToken, _workflowCancellation.Token); + T result; try { - result = await _func(childContext); + result = await _func(childContext, linked.Token); } - catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested) + catch (OperationCanceledException) when (linked.IsCancellationRequested) { + // Cancellation owned by the linked source — caller cancel or workflow + // shutdown. Do NOT checkpoint CONTEXT FAIL: the termination signal + // (or upstream cancel) owns the outcome. throw; } catch (NonDeterministicExecutionException) diff --git a/Libraries/src/Amazon.Lambda.DurableExecution/Internal/StepOperation.cs b/Libraries/src/Amazon.Lambda.DurableExecution/Internal/StepOperation.cs index 4d04d8a72..76780957a 100644 --- a/Libraries/src/Amazon.Lambda.DurableExecution/Internal/StepOperation.cs +++ b/Libraries/src/Amazon.Lambda.DurableExecution/Internal/StepOperation.cs @@ -38,21 +38,23 @@ namespace Amazon.Lambda.DurableExecution.Internal; /// internal sealed class StepOperation : DurableOperation { - private readonly Func> _func; + private readonly Func> _func; private readonly StepConfig? _config; private readonly ILambdaSerializer _serializer; private readonly ILogger _logger; + private readonly WorkflowCancellation _workflowCancellation; public StepOperation( string operationId, string? name, string? parentId, - Func> func, + Func> func, StepConfig? config, ILambdaSerializer serializer, ILogger logger, ExecutionState state, TerminationManager termination, + WorkflowCancellation workflowCancellation, string durableExecutionArn, CheckpointBatcher? batcher = null) : base(operationId, name, parentId, state, termination, durableExecutionArn, batcher) @@ -61,6 +63,7 @@ public StepOperation( _config = config; _serializer = serializer; _logger = logger; + _workflowCancellation = workflowCancellation; } protected override string OperationType => OperationTypes.Step; @@ -204,6 +207,14 @@ private async Task ExecuteFunc(int attemptNumber, CancellationToken cancellat } + // Link the caller's token with the workflow-shutdown token so the user + // step body observes both upstream cancel intent and SDK-driven workflow + // teardown. The linked token is passed to the user Func only; checkpoint + // writes still use the caller's token (workflow shutdown must NOT abort + // a successful step's SUCCEED checkpoint — see cancellation-design.md §7). + using var linked = CancellationTokenSource.CreateLinkedTokenSource( + cancellationToken, _workflowCancellation.Token); + try { var stepContext = new StepContext(OperationId, attemptNumber, _logger); @@ -220,7 +231,7 @@ private async Task ExecuteFunc(int attemptNumber, CancellationToken cancellat ["attempt"] = attemptNumber, })) { - result = await _func(stepContext); + result = await _func(stepContext, linked.Token); } await EnqueueAsync(new SdkOperationUpdate @@ -236,14 +247,21 @@ await EnqueueAsync(new SdkOperationUpdate return result; } - catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested) + catch (OperationCanceledException) when (linked.IsCancellationRequested) { + // Cancellation owned by the linked source (caller-cancel or workflow + // shutdown). Do NOT checkpoint FAIL and do NOT consult the retry + // strategy — the termination signal that fired (if any) owns the + // suspend/abort decision; an upstream caller-cancel propagates up + // as a fault on the workflow user task. throw; } catch (Exception ex) { // Funnel into the retry/fail decision tree. May checkpoint RETRY and - // suspend (Pending), or checkpoint FAIL and rethrow to user. + // suspend (Pending), or checkpoint FAIL and rethrow to user. A user- + // thrown OperationCanceledException unrelated to our linked token + // falls through here and is treated as a normal step failure. return await HandleStepFailureAsync(ex, attemptNumber, cancellationToken); } } diff --git a/Libraries/src/Amazon.Lambda.DurableExecution/Internal/WaitForConditionOperation.cs b/Libraries/src/Amazon.Lambda.DurableExecution/Internal/WaitForConditionOperation.cs index 742265782..79e011efd 100644 --- a/Libraries/src/Amazon.Lambda.DurableExecution/Internal/WaitForConditionOperation.cs +++ b/Libraries/src/Amazon.Lambda.DurableExecution/Internal/WaitForConditionOperation.cs @@ -51,21 +51,23 @@ namespace Amazon.Lambda.DurableExecution.Internal; /// internal sealed class WaitForConditionOperation : DurableOperation { - private readonly Func> _check; + private readonly Func> _check; private readonly WaitForConditionConfig _config; private readonly ILambdaSerializer _serializer; private readonly ILogger _logger; + private readonly WorkflowCancellation _workflowCancellation; public WaitForConditionOperation( string operationId, string? name, string? parentId, - Func> check, + Func> check, WaitForConditionConfig config, ILambdaSerializer serializer, ILogger logger, ExecutionState state, TerminationManager termination, + WorkflowCancellation workflowCancellation, string durableExecutionArn, CheckpointBatcher? batcher = null) : base(operationId, name, parentId, state, termination, durableExecutionArn, batcher) @@ -74,6 +76,7 @@ public WaitForConditionOperation( _config = config; _serializer = serializer; _logger = logger; + _workflowCancellation = workflowCancellation; } protected override string OperationType => OperationTypes.Step; @@ -167,14 +170,23 @@ await EnqueueAsync(new SdkOperationUpdate }, cancellationToken); } + // Link the caller's token with the workflow-shutdown token. The check + // function observes both signals; the SDK's RETRY/SUCCEED/FAIL + // checkpoint writes still use the caller's token only. + using var linked = CancellationTokenSource.CreateLinkedTokenSource( + cancellationToken, _workflowCancellation.Token); + TState newState; try { var checkContext = new ConditionCheckContext(attemptNumber, _logger); - newState = await _check(currentState, checkContext); + newState = await _check(currentState, checkContext, linked.Token); } - catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested) + catch (OperationCanceledException) when (linked.IsCancellationRequested) { + // Cancellation owned by the linked source — caller cancel or workflow + // shutdown. Do NOT checkpoint FAIL: the termination signal (or + // upstream cancel) owns the outcome. throw; } catch (Exception ex) diff --git a/Libraries/src/Amazon.Lambda.DurableExecution/Internal/WorkflowCancellation.cs b/Libraries/src/Amazon.Lambda.DurableExecution/Internal/WorkflowCancellation.cs new file mode 100644 index 000000000..115dd585e --- /dev/null +++ b/Libraries/src/Amazon.Lambda.DurableExecution/Internal/WorkflowCancellation.cs @@ -0,0 +1,46 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +namespace Amazon.Lambda.DurableExecution.Internal; + +/// +/// Workflow-scoped cancellation source. Cancels when the +/// resolves so abandoned user-Func bodies +/// (the WhenAny loser in ) unwind via +/// instead of running to completion on +/// the threadpool while Lambda is mid-response. +/// +/// +/// One instance per durable function invocation, constructed and disposed by +/// . Operation classes that invoke user +/// Funcs build a per-call linked CTS combining the caller's token with +/// and pass the linked token into the user code. +/// +/// Checkpoint writes, batcher flushes, and other SDK-internal work do NOT +/// observe this token: successful work must persist even when the workflow is +/// being torn down. +/// +/// +internal sealed class WorkflowCancellation : IDisposable +{ + private readonly CancellationTokenSource _cts = new(); + + public CancellationToken Token => _cts.Token; + + public WorkflowCancellation(TerminationManager terminationManager) + { + terminationManager.TerminationTask.ContinueWith( + static (_, state) => + { + var cts = (CancellationTokenSource)state!; + try { cts.Cancel(); } + catch (ObjectDisposedException) { } + }, + _cts, + CancellationToken.None, + TaskContinuationOptions.ExecuteSynchronously, + TaskScheduler.Default); + } + + public void Dispose() => _cts.Dispose(); +} diff --git a/Libraries/src/Amazon.Lambda.DurableExecution/README.md b/Libraries/src/Amazon.Lambda.DurableExecution/README.md index 482024e0c..264703397 100644 --- a/Libraries/src/Amazon.Lambda.DurableExecution/README.md +++ b/Libraries/src/Amazon.Lambda.DurableExecution/README.md @@ -22,6 +22,7 @@ Your handler delegates to `DurableFunction.WrapAsync`, which gives your workflow - `ctx.WaitForConditionAsync` — poll a check function until a condition is met, suspending between polls. ([docs](docs/core/wait-for-condition.md)) - `ctx.CreateCallbackAsync` / `ctx.WaitForCallbackAsync` — wait for external events (approvals, webhooks). ([docs](docs/core/callbacks.md)) - `ctx.RunInChildContextAsync` — run an isolated child context with its own checkpoint log. ([docs](docs/core/child-contexts.md)) +- Every user `Func` receives a `CancellationToken` linking the caller's token with the SDK's workflow-shutdown signal. ([docs](docs/core/cancellation.md)) ## Quick Start @@ -64,17 +65,17 @@ public class OrderProcessor private async Task Workflow(Order order, IDurableContext ctx) { var reservation = await ctx.StepAsync( - async _ => await InventoryService.ReserveAsync(order.Items), + async (_, ct) => await InventoryService.ReserveAsync(order.Items, ct), name: "reserve-inventory"); var payment = await ctx.StepAsync( - async _ => await PaymentService.ChargeAsync(order.PaymentMethod, order.Total), + async (_, ct) => await PaymentService.ChargeAsync(order.PaymentMethod, order.Total, ct), name: "process-payment"); await ctx.WaitAsync(TimeSpan.FromHours(2), name: "warehouse-processing"); var shipment = await ctx.StepAsync( - async _ => await ShippingService.ShipAsync(reservation, order.Address), + async (_, ct) => await ShippingService.ShipAsync(reservation, order.Address, ct), name: "confirm-shipment"); return new OrderResult(order.Id, shipment.TrackingNumber); diff --git a/Libraries/src/Amazon.Lambda.DurableExecution/WaitForCallbackConfig.cs b/Libraries/src/Amazon.Lambda.DurableExecution/WaitForCallbackConfig.cs index 90cf1f420..9aed6da08 100644 --- a/Libraries/src/Amazon.Lambda.DurableExecution/WaitForCallbackConfig.cs +++ b/Libraries/src/Amazon.Lambda.DurableExecution/WaitForCallbackConfig.cs @@ -5,7 +5,7 @@ namespace Amazon.Lambda.DurableExecution; /// /// Configuration for the composite -/// +/// /// operation. Inherits the callback's and /// ; adds a /// for the submitter step. diff --git a/Libraries/src/Amazon.Lambda.DurableExecution/docs/core/callbacks.md b/Libraries/src/Amazon.Lambda.DurableExecution/docs/core/callbacks.md index 573ad17e3..00aee3cd4 100644 --- a/Libraries/src/Amazon.Lambda.DurableExecution/docs/core/callbacks.md +++ b/Libraries/src/Amazon.Lambda.DurableExecution/docs/core/callbacks.md @@ -11,13 +11,13 @@ Two APIs are available: ```csharp Task WaitForCallbackAsync( - Func submitter, + Func submitter, string? name = null, WaitForCallbackConfig? config = null, CancellationToken cancellationToken = default); ``` -The submitter receives the freshly allocated `callbackId` and an `IWaitForCallbackContext` (logger-only). Submitter failures (after retries are exhausted) surface as `CallbackSubmitterException`; callback failures and timeouts surface as `CallbackFailedException` / `CallbackTimeoutException`. +The submitter receives the freshly allocated `callbackId`, an `IWaitForCallbackContext` (logger-only), and a `CancellationToken` linking the caller-supplied token with the SDK's workflow-shutdown signal. Submitter failures (after retries are exhausted) surface as `CallbackSubmitterException`; callback failures and timeouts surface as `CallbackFailedException` / `CallbackTimeoutException`. ## `CreateCallbackAsync` @@ -78,7 +78,7 @@ public class Function // with this callback ID. The submitter is invoked once with a freshly-allocated // ID; it hands the ID to the approver and returns immediately. var result = await ctx.WaitForCallbackAsync( - submitter: async (callbackId, cbCtx) => + submitter: async (callbackId, cbCtx, ct) => { var payload = $$"""{"callbackId":"{{callbackId}}","orderId":"{{input.OrderId}}"}"""; await LambdaClient.InvokeAsync(new InvokeRequest @@ -86,7 +86,7 @@ public class Function FunctionName = approverFunctionName, InvocationType = InvocationType.Event, // fire-and-forget Payload = payload - }); + }, ct); }, name: "approve"); @@ -154,7 +154,7 @@ private async Task Workflow(OrderInput input, IDurableContext ct { var cb = await ctx.CreateCallbackAsync(name: "approve"); - await ctx.StepAsync(async _ => + await ctx.StepAsync(async (_, ct) => { var payload = $$"""{"callbackId":"{{cb.CallbackId}}","orderId":"{{input.OrderId}}"}"""; await LambdaClient.InvokeAsync(new InvokeRequest @@ -162,7 +162,7 @@ private async Task Workflow(OrderInput input, IDurableContext ct FunctionName = approverFunctionName, InvocationType = InvocationType.Event, Payload = payload - }); + }, ct); }, name: "submit"); return await cb.GetResultAsync(); diff --git a/Libraries/src/Amazon.Lambda.DurableExecution/docs/core/child-contexts.md b/Libraries/src/Amazon.Lambda.DurableExecution/docs/core/child-contexts.md index 4a664e11e..34904f290 100644 --- a/Libraries/src/Amazon.Lambda.DurableExecution/docs/core/child-contexts.md +++ b/Libraries/src/Amazon.Lambda.DurableExecution/docs/core/child-contexts.md @@ -6,27 +6,29 @@ ```csharp Task RunInChildContextAsync( - Func> func, + Func> func, string? name = null, ChildContextConfig? config = null, CancellationToken cancellationToken = default); Task RunInChildContextAsync( - Func func, + Func func, string? name = null, ChildContextConfig? config = null, CancellationToken cancellationToken = default); ``` +The `CancellationToken` parameter is a linked token combining the caller-supplied token with the SDK's workflow-shutdown signal — forward it to `StepAsync` and other operations inside the child so cancellation propagates uniformly. + ## Example ```csharp var phaseResult = await ctx.RunInChildContextAsync( - async childCtx => + async (childCtx, ct) => { - var validated = await childCtx.StepAsync(async _ => Validate(input), name: "validate"); - await childCtx.WaitAsync(TimeSpan.FromSeconds(2), name: "short_wait"); - var processed = await childCtx.StepAsync(async _ => Process(validated), name: "process"); + var validated = await childCtx.StepAsync(async (_, c) => Validate(input, c), name: "validate", cancellationToken: ct); + await childCtx.WaitAsync(TimeSpan.FromSeconds(2), name: "short_wait", cancellationToken: ct); + var processed = await childCtx.StepAsync(async (_, c) => Process(validated, c), name: "process", cancellationToken: ct); return processed; }, name: "phase", diff --git a/Libraries/src/Amazon.Lambda.DurableExecution/docs/core/steps.md b/Libraries/src/Amazon.Lambda.DurableExecution/docs/core/steps.md index c7f9e9f22..cb6fa48a2 100644 --- a/Libraries/src/Amazon.Lambda.DurableExecution/docs/core/steps.md +++ b/Libraries/src/Amazon.Lambda.DurableExecution/docs/core/steps.md @@ -6,34 +6,34 @@ ```csharp Task StepAsync( - Func> func, + Func> func, string? name = null, StepConfig? config = null, CancellationToken cancellationToken = default); Task StepAsync( - Func func, + Func func, string? name = null, StepConfig? config = null, CancellationToken cancellationToken = default); ``` -The `IStepContext` parameter exposes the current `AttemptNumber`, the deterministic `OperationId`, and a scoped `Logger`. Returned values are serialized via the `ILambdaSerializer` registered on `ILambdaContext.Serializer`. +The `IStepContext` parameter exposes the current `AttemptNumber`, the deterministic `OperationId`, and a scoped `Logger`. The `CancellationToken` parameter is a linked token combining the caller-supplied token with the SDK's workflow-shutdown signal — pass it to cancellation-aware APIs (`HttpClient.SendAsync`, `Task.Delay`, AWS SDK calls) so the step body unwinds cleanly when the workflow is being torn down. Returned values are serialized via the `ILambdaSerializer` registered on `ILambdaContext.Serializer`. ## Basic step ```csharp var user = await ctx.StepAsync( - async _ => await userService.GetUserAsync(userId), + async (_, ct) => await userService.GetUserAsync(userId, ct), name: "fetch-user"); ``` ## Multiple steps ```csharp -var a = await ctx.StepAsync(async _ => $"a-{input.OrderId}", name: "step_1"); -var b = await ctx.StepAsync(async _ => $"{a}-b", name: "step_2"); -var c = await ctx.StepAsync(async _ => $"{b}-c", name: "step_3"); +var a = await ctx.StepAsync(async (_, _) => $"a-{input.OrderId}", name: "step_1"); +var b = await ctx.StepAsync(async (_, _) => $"{a}-b", name: "step_2"); +var c = await ctx.StepAsync(async (_, _) => $"{b}-c", name: "step_3"); ``` ## Step configuration @@ -99,7 +99,7 @@ When `retryableExceptions` and `retryableMessagePatterns` are both null (default ```csharp var result = await ctx.StepAsync( - async stepCtx => + async (stepCtx, _) => { if (stepCtx.AttemptNumber < 3) throw new InvalidOperationException($"flake on attempt {stepCtx.AttemptNumber}"); @@ -138,7 +138,7 @@ These semantics apply *per retry attempt*, not per overall execution. To achieve ```csharp var result = await ctx.StepAsync( - async _ => await paymentService.ChargeAsync(amount), + async (_, ct) => await paymentService.ChargeAsync(amount, ct), name: "charge-payment", config: new StepConfig { diff --git a/Libraries/src/Amazon.Lambda.DurableExecution/docs/core/wait-for-condition.md b/Libraries/src/Amazon.Lambda.DurableExecution/docs/core/wait-for-condition.md index 93ea3f4d9..b900b8ffe 100644 --- a/Libraries/src/Amazon.Lambda.DurableExecution/docs/core/wait-for-condition.md +++ b/Libraries/src/Amazon.Lambda.DurableExecution/docs/core/wait-for-condition.md @@ -8,7 +8,7 @@ Use it when you're waiting on something whose readiness you can only learn by *a ```csharp Task WaitForConditionAsync( - Func> check, + Func> check, WaitForConditionConfig config, string? name = null, CancellationToken cancellationToken = default); @@ -16,7 +16,7 @@ Task WaitForConditionAsync( On every iteration the `check` function receives the state returned by the previous invocation — seeded by `config.InitialState` on the very first call — and returns the next state. The configured `IWaitStrategy` then decides whether to keep polling and how long to wait. State is checkpointed each iteration, so the polling loop survives Lambda re-invocations deterministically and you can carry per-poll bookkeeping (a cursor, a counter) inside the state itself. -The `IConditionCheckContext` parameter exposes the current `AttemptNumber` (1-based) and a scoped `Logger`. The returned state is serialized via the `ILambdaSerializer` registered on `ILambdaContext.Serializer`. +The `IConditionCheckContext` parameter exposes the current `AttemptNumber` (1-based) and a scoped `Logger`. The `CancellationToken` parameter is a linked token combining the caller-supplied token with the SDK's workflow-shutdown signal — pass it to the underlying I/O so the check unwinds cleanly when the workflow is being torn down. The returned state is serialized via the `ILambdaSerializer` registered on `ILambdaContext.Serializer`. When the strategy stops because its `maxAttempts` limit is reached — rather than because the condition was met — the operation throws `WaitForConditionException` carrying `AttemptsExhausted` and the last observed `LastState`. @@ -26,10 +26,10 @@ Poll an order's status until it reaches a terminal value: ```csharp var finalStatus = await ctx.WaitForConditionAsync( - check: async (state, checkCtx) => + check: async (state, checkCtx, ct) => { checkCtx.Logger.LogInformation("Polling order on attempt {Attempt}", checkCtx.AttemptNumber); - return await orderService.GetStatusAsync(orderId); + return await orderService.GetStatusAsync(orderId, ct); }, config: new WaitForConditionConfig { diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.Tests/CallbackOperationTests.cs b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/CallbackOperationTests.cs index c70dc75fb..99a1342fe 100644 --- a/Libraries/test/Amazon.Lambda.DurableExecution.Tests/CallbackOperationTests.cs +++ b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/CallbackOperationTests.cs @@ -29,7 +29,7 @@ private static (DurableContext context, RecordingBatcher recorder, TerminationMa var idGen = new OperationIdGenerator(); var lambdaContext = CreateLambdaContext(); var recorder = new RecordingBatcher(); - var context = new DurableContext(state, tm, idGen, "arn:test", lambdaContext, recorder.Batcher); + var context = new DurableContext(state, tm, new WorkflowCancellation(tm), idGen, "arn:test", lambdaContext, recorder.Batcher); return (context, recorder, tm, state); } @@ -477,7 +477,7 @@ public async Task CreateCallbackAsync_NoSerializer_Throws() var idGen = new OperationIdGenerator(); var lambdaContext = new TestLambdaContext(); // no Serializer set var recorder = new RecordingBatcher(); - var context = new DurableContext(state, tm, idGen, "arn:test", lambdaContext, recorder.Batcher); + var context = new DurableContext(state, tm, new WorkflowCancellation(tm), idGen, "arn:test", lambdaContext, recorder.Batcher); var ex = await Assert.ThrowsAsync(() => context.CreateCallbackAsync(name: "no-serializer")); diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.Tests/ChildContextOperationTests.cs b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/ChildContextOperationTests.cs index 3aa182248..8d1d9d591 100644 --- a/Libraries/test/Amazon.Lambda.DurableExecution.Tests/ChildContextOperationTests.cs +++ b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/ChildContextOperationTests.cs @@ -29,7 +29,7 @@ private static (DurableContext context, RecordingBatcher recorder, TerminationMa var lambdaContext = new TestLambdaContext { Serializer = new DefaultLambdaJsonSerializer() }; #pragma warning restore AWSLAMBDA001 var recorder = new RecordingBatcher(); - var context = new DurableContext(state, tm, idGen, "arn:test", lambdaContext, recorder.Batcher); + var context = new DurableContext(state, tm, new WorkflowCancellation(tm), idGen, "arn:test", lambdaContext, recorder.Batcher); return (context, recorder, tm, state); } @@ -40,10 +40,10 @@ public async Task RunInChildContextAsync_FreshExecution_RunsFuncAndCheckpoints() var executed = false; var result = await context.RunInChildContextAsync( - async (childCtx) => + async (childCtx, _) => { executed = true; - return await childCtx.StepAsync(async (_) => { await Task.CompletedTask; return "inner"; }, name: "inner_step"); + return await childCtx.StepAsync(async (_, _) => { await Task.CompletedTask; return "inner"; }, name: "inner_step"); }, name: "phase"); @@ -76,10 +76,10 @@ public async Task RunInChildContextAsync_FreshExecution_ChildOperationIdsDetermi var (context, recorder, _, _) = CreateContext(); await context.RunInChildContextAsync( - async (childCtx) => + async (childCtx, _) => { - await childCtx.StepAsync(async (_) => { await Task.CompletedTask; return "a"; }, name: "first"); - await childCtx.StepAsync(async (_) => { await Task.CompletedTask; return "b"; }, name: "second"); + await childCtx.StepAsync(async (_, _) => { await Task.CompletedTask; return "a"; }, name: "first"); + await childCtx.StepAsync(async (_, _) => { await Task.CompletedTask; return "b"; }, name: "second"); return 0; }, name: "phase"); @@ -116,7 +116,7 @@ public async Task RunInChildContextAsync_ReplaySucceeded_ReturnsCachedAndDoesNot var executed = false; var result = await context.RunInChildContextAsync( - async (childCtx) => + async (childCtx, _) => { executed = true; await Task.CompletedTask; @@ -161,7 +161,7 @@ public async Task RunInChildContextAsync_ReplayFailed_ThrowsChildContextExceptio var ex = await Assert.ThrowsAsync(() => context.RunInChildContextAsync( - async (_) => { await Task.CompletedTask; return "should not run"; }, + async (_, _) => { await Task.CompletedTask; return "should not run"; }, name: "phase")); Assert.Equal("child went wrong", ex.Message); @@ -202,7 +202,7 @@ public async Task RunInChildContextAsync_ReplayFailed_AppliesErrorMapping() var ex = await Assert.ThrowsAsync(() => context.RunInChildContextAsync( - async (_) => { await Task.CompletedTask; return "x"; }, + async (_, _) => { await Task.CompletedTask; return "x"; }, name: "phase", config: new ChildContextConfig { @@ -223,7 +223,7 @@ public async Task RunInChildContextAsync_FuncThrows_CheckpointsFailAndThrows() var ex = await Assert.ThrowsAsync(() => context.RunInChildContextAsync( - async (_) => { await Task.CompletedTask; throw new InvalidOperationException("inner boom"); }, + async (_, _) => { await Task.CompletedTask; throw new InvalidOperationException("inner boom"); }, name: "phase")); Assert.Equal("inner boom", ex.Message); @@ -273,10 +273,10 @@ public async Task RunInChildContextAsync_InnerNonDeterminism_BubblesUpWithoutChe await Assert.ThrowsAsync(() => context.RunInChildContextAsync( - async (childCtx) => + async (childCtx, _) => { return await childCtx.StepAsync( - async (_) => { await Task.CompletedTask; return "x"; }, + async (_, _) => { await Task.CompletedTask; return "x"; }, name: "inner_step"); }, name: "phase")); @@ -292,7 +292,7 @@ public async Task RunInChildContextAsync_FuncThrows_AppliesErrorMapping() var ex = await Assert.ThrowsAsync(() => context.RunInChildContextAsync( - async (_) => { await Task.CompletedTask; throw new TimeoutException("inner timeout"); }, + async (_, _) => { await Task.CompletedTask; throw new TimeoutException("inner timeout"); }, name: "phase", config: new ChildContextConfig { @@ -315,7 +315,7 @@ public async Task RunInChildContextAsync_ChildSuspendsOnWait_TerminatesWithWaitS // wins on the termination signal; the test below short-circuits via // the same TerminationManager.IsTerminated check. var task = context.RunInChildContextAsync( - async (childCtx) => + async (childCtx, _) => { await childCtx.WaitAsync(TimeSpan.FromSeconds(5), name: "wait_inside"); return "should not return"; @@ -366,10 +366,10 @@ public async Task RunInChildContextAsync_ReplayStarted_ReExecutesFuncWithInnerCa var innerExecuted = false; var result = await context.RunInChildContextAsync( - async (childCtx) => + async (childCtx, _) => { return await childCtx.StepAsync( - async (_) => { innerExecuted = true; await Task.CompletedTask; return "fresh_inner"; }, + async (_, _) => { innerExecuted = true; await Task.CompletedTask; return "fresh_inner"; }, name: "inner_step"); }, name: "phase"); @@ -397,10 +397,10 @@ public async Task RunInChildContextAsync_VoidOverload_RunsAndCheckpoints() var executed = false; await context.RunInChildContextAsync( - async (childCtx) => + async (childCtx, _) => { await childCtx.StepAsync( - async (_) => { executed = true; await Task.CompletedTask; }, + async (_, _) => { executed = true; await Task.CompletedTask; }, name: "inner_void"); }, name: "phase"); @@ -444,7 +444,7 @@ public async Task RunInChildContextAsync_ReplayTypeMismatch_ThrowsNonDeterminist var ex = await Assert.ThrowsAsync(() => context.RunInChildContextAsync( - async (_) => { await Task.CompletedTask; return "x"; }, + async (_, _) => { await Task.CompletedTask; return "x"; }, name: "phase")); Assert.Contains("expected type 'CONTEXT'", ex.Message); @@ -471,7 +471,7 @@ public async Task RunInChildContextAsync_ReplayNameMismatch_ThrowsNonDeterminist var ex = await Assert.ThrowsAsync(() => context.RunInChildContextAsync( - async (_) => { await Task.CompletedTask; return "x"; }, + async (_, _) => { await Task.CompletedTask; return "x"; }, name: "new_name")); Assert.Contains("expected name 'new_name'", ex.Message); @@ -497,7 +497,7 @@ public async Task RunInChildContextAsync_ReplayUnknownStatus_ThrowsNonDeterminis await Assert.ThrowsAsync(() => context.RunInChildContextAsync( - async (_) => { await Task.CompletedTask; return "x"; }, + async (_, _) => { await Task.CompletedTask; return "x"; }, name: "phase")); } @@ -507,7 +507,7 @@ public async Task RunInChildContextAsync_SubTypeAndName_PropagateToCheckpoint() var (context, recorder, _, _) = CreateContext(); await context.RunInChildContextAsync( - async (_) => { await Task.CompletedTask; return "ok"; }, + async (_, _) => { await Task.CompletedTask; return "ok"; }, name: "phase", config: new ChildContextConfig { SubType = "WaitForCallback" }); diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.Tests/DurableContextTests.cs b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/DurableContextTests.cs index 20411dbab..5c9e83193 100644 --- a/Libraries/test/Amazon.Lambda.DurableExecution.Tests/DurableContextTests.cs +++ b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/DurableContextTests.cs @@ -33,7 +33,7 @@ private static DurableContext CreateContext( var idGen = new OperationIdGenerator(); var lambdaContext = CreateLambdaContext(); - return new DurableContext(state, tm, idGen, "arn:aws:lambda:us-east-1:123:durable-execution:test", lambdaContext); + return new DurableContext(state, tm, new WorkflowCancellation(tm), idGen, "arn:aws:lambda:us-east-1:123:durable-execution:test", lambdaContext); } #region StepAsync Tests @@ -44,7 +44,7 @@ public async Task StepAsync_NewExecution_RunsFunction() var context = CreateContext(); var executed = false; - var result = await context.StepAsync(async (_) => + var result = await context.StepAsync(async (_, _) => { executed = true; await Task.CompletedTask; @@ -73,7 +73,7 @@ public async Task StepAsync_Replay_ReturnsCachedResult() }); var executed = false; - var result = await context.StepAsync(async (_) => + var result = await context.StepAsync(async (_, _) => { executed = true; await Task.CompletedTask; @@ -111,7 +111,7 @@ public async Task StepAsync_ReplayFailed_ThrowsStepException() }); var ex = await Assert.ThrowsAsync(() => - context.StepAsync(async (_) => { await Task.CompletedTask; return "x"; }, name: "bad_step")); + context.StepAsync(async (_, _) => { await Task.CompletedTask; return "x"; }, name: "bad_step")); Assert.Equal("System.TimeoutException", ex.ErrorType); Assert.Equal("timed out", ex.Message); @@ -127,7 +127,7 @@ public async Task StepAsync_Throws_FailsWithStepException() var attempts = 0; await Assert.ThrowsAsync(() => - context.StepAsync(async (_) => + context.StepAsync(async (_, _) => { attempts++; await Task.CompletedTask; @@ -146,7 +146,7 @@ public async Task StepAsync_WithStepContext_ReceivesMetadata() int receivedAttempt = 0; Microsoft.Extensions.Logging.ILogger? receivedLogger = null; - await context.StepAsync(async (step) => + await context.StepAsync(async (step, _) => { receivedOpId = step.OperationId; receivedAttempt = step.AttemptNumber; @@ -166,7 +166,7 @@ public async Task StepAsync_VoidOverload_Works() var context = CreateContext(); var executed = false; - await context.StepAsync(async (_) => + await context.StepAsync(async (_, _) => { executed = true; await Task.CompletedTask; @@ -180,9 +180,9 @@ public async Task StepAsync_MultipleSteps_DeterministicIds() { var context = CreateContext(); - var r1 = await context.StepAsync(async (_) => { await Task.CompletedTask; return "a"; }, name: "first"); - var r2 = await context.StepAsync(async (_) => { await Task.CompletedTask; return "b"; }, name: "second"); - var r3 = await context.StepAsync(async (_) => { await Task.CompletedTask; return "c"; }); + var r1 = await context.StepAsync(async (_, _) => { await Task.CompletedTask; return "a"; }, name: "first"); + var r2 = await context.StepAsync(async (_, _) => { await Task.CompletedTask; return "b"; }, name: "second"); + var r3 = await context.StepAsync(async (_, _) => { await Task.CompletedTask; return "c"; }); Assert.Equal("a", r1); Assert.Equal("b", r2); @@ -207,7 +207,7 @@ public async Task StepAsync_ComplexType_SerializesCorrectly() }); var result = await context.StepAsync( - async (_) => { await Task.CompletedTask; return new TestPerson { Name = "Bob", Age = 25 }; }, + async (_, _) => { await Task.CompletedTask; return new TestPerson { Name = "Bob", Age = 25 }; }, name: "fetch"); Assert.Equal("Alice", result.Name); @@ -224,10 +224,10 @@ public async Task StepAsync_NoSerializerOnContext_ThrowsInvalidOperation() var tm = new TerminationManager(); var idGen = new OperationIdGenerator(); var lambdaContext = new TestLambdaContext(); // no Serializer set - var context = new DurableContext(state, tm, idGen, "arn:test", lambdaContext); + var context = new DurableContext(state, tm, new WorkflowCancellation(tm), idGen, "arn:test", lambdaContext); var ex = await Assert.ThrowsAsync(() => - context.StepAsync(async (_) => { await Task.CompletedTask; return "x"; }, name: "no_serializer")); + context.StepAsync(async (_, _) => { await Task.CompletedTask; return "x"; }, name: "no_serializer")); Assert.Contains("ILambdaSerializer", ex.Message); } @@ -322,7 +322,7 @@ public async Task StepAsync_Replay_NullResult_ReturnsDefault() }); var result = await context.StepAsync( - async (_) => { await Task.CompletedTask; return "fresh"; }, + async (_, _) => { await Task.CompletedTask; return "fresh"; }, name: "no_result"); Assert.Null(result); @@ -337,7 +337,7 @@ public async Task StepAsync_CancelledToken_ThrowsOperationCanceled() await Assert.ThrowsAnyAsync(() => context.StepAsync( - async (_) => + async (_, _) => { cts.Token.ThrowIfCancellationRequested(); await Task.CompletedTask; @@ -429,7 +429,7 @@ public async Task WaitAsync_StartedButNotExpired_ResuspendsWithoutNewCheckpoint( var idGen = new OperationIdGenerator(); var lambdaContext = CreateLambdaContext(); var recorder = new RecordingBatcher(); - var context = new DurableContext(state, tm, idGen, "arn:test", lambdaContext, recorder.Batcher); + var context = new DurableContext(state, tm, new WorkflowCancellation(tm), idGen, "arn:test", lambdaContext, recorder.Batcher); var waitTask = context.WaitAsync(TimeSpan.FromSeconds(30), name: "pending_wait"); @@ -495,15 +495,15 @@ public async Task EndToEnd_StepWaitStep_FirstInvocation_SuspendsOnWait() state.LoadFromCheckpoint(null); var idGen = new OperationIdGenerator(); var lambdaContext = CreateLambdaContext(); - var context = new DurableContext(state, tm, idGen, "arn:test", lambdaContext); + var context = new DurableContext(state, tm, new WorkflowCancellation(tm), idGen, "arn:test", lambdaContext); var result = await DurableExecutionHandler.RunAsync( state, tm, async () => { - await context.StepAsync(async (_) => { await Task.CompletedTask; return "fetched"; }, name: "fetch"); + await context.StepAsync(async (_, _) => { await Task.CompletedTask; return "fetched"; }, name: "fetch"); await context.WaitAsync(TimeSpan.FromSeconds(30), name: "delay"); - var final = await context.StepAsync(async (_) => { await Task.CompletedTask; return "processed"; }, name: "process"); + var final = await context.StepAsync(async (_, _) => { await Task.CompletedTask; return "processed"; }, name: "process"); return final; }); @@ -539,20 +539,20 @@ public async Task EndToEnd_StepWaitStep_SecondInvocation_Completes() var idGen = new OperationIdGenerator(); var lambdaContext = CreateLambdaContext(); - var context = new DurableContext(state, tm, idGen, "arn:test", lambdaContext); + var context = new DurableContext(state, tm, new WorkflowCancellation(tm), idGen, "arn:test", lambdaContext); var processExecuted = false; var result = await DurableExecutionHandler.RunAsync( state, tm, async () => { - var fetched = await context.StepAsync(async (_) => { await Task.CompletedTask; return "fresh_fetch"; }, name: "fetch"); + var fetched = await context.StepAsync(async (_, _) => { await Task.CompletedTask; return "fresh_fetch"; }, name: "fetch"); Assert.Equal("fetched", fetched); // cached from replay await context.WaitAsync(TimeSpan.FromSeconds(30), name: "delay"); // wait is elapsed, continues - var final = await context.StepAsync(async (_) => + var final = await context.StepAsync(async (_, _) => { processExecuted = true; await Task.CompletedTask; @@ -589,11 +589,11 @@ public async Task StepAsync_ReplayTypeMismatch_ThrowsNonDeterministicException() var tm = new TerminationManager(); var idGen = new OperationIdGenerator(); var lambdaContext = CreateLambdaContext(); - var context = new DurableContext(state, tm, idGen, "arn:test", lambdaContext); + var context = new DurableContext(state, tm, new WorkflowCancellation(tm), idGen, "arn:test", lambdaContext); var ex = await Assert.ThrowsAsync(async () => await context.StepAsync( - async (_) => { await Task.CompletedTask; return "should not run"; }, + async (_, _) => { await Task.CompletedTask; return "should not run"; }, name: "my_op")); Assert.Contains("expected type 'STEP'", ex.Message); @@ -620,7 +620,7 @@ public async Task WaitAsync_ReplayTypeMismatch_ThrowsNonDeterministicException() var tm = new TerminationManager(); var idGen = new OperationIdGenerator(); var lambdaContext = CreateLambdaContext(); - var context = new DurableContext(state, tm, idGen, "arn:test", lambdaContext); + var context = new DurableContext(state, tm, new WorkflowCancellation(tm), idGen, "arn:test", lambdaContext); var ex = await Assert.ThrowsAsync(async () => await context.WaitAsync(TimeSpan.FromSeconds(10), name: "my_op")); @@ -652,11 +652,11 @@ public async Task StepAsync_ReplayNameMismatch_ThrowsNonDeterministicException() var tm = new TerminationManager(); var idGen = new OperationIdGenerator(); var lambdaContext = CreateLambdaContext(); - var context = new DurableContext(state, tm, idGen, "arn:test", lambdaContext); + var context = new DurableContext(state, tm, new WorkflowCancellation(tm), idGen, "arn:test", lambdaContext); var ex = await Assert.ThrowsAsync(async () => await context.StepAsync( - async (_) => { await Task.CompletedTask; return "new"; }, + async (_, _) => { await Task.CompletedTask; return "new"; }, name: "my_step")); Assert.Contains("expected name 'my_step'", ex.Message); @@ -669,7 +669,7 @@ public async Task StepAsync_NoReplay_SkipsValidation() var context = CreateContext(); var result = await context.StepAsync( - async (_) => { await Task.CompletedTask; return "ok"; }, + async (_, _) => { await Task.CompletedTask; return "ok"; }, name: "anything"); Assert.Equal("ok", result); @@ -694,10 +694,10 @@ public async Task StepAsync_FailsWithRetryStrategy_CheckpointsRetryAndSuspends() var idGen = new OperationIdGenerator(); var lambdaContext = CreateLambdaContext(); var recorder = new RecordingBatcher(); - var context = new DurableContext(state, tm, idGen, "arn:test", lambdaContext, recorder.Batcher); + var context = new DurableContext(state, tm, new WorkflowCancellation(tm), idGen, "arn:test", lambdaContext, recorder.Batcher); var stepTask = context.StepAsync( - async (_) => { await Task.CompletedTask; throw new InvalidOperationException("transient"); }, + async (_, _) => { await Task.CompletedTask; throw new InvalidOperationException("transient"); }, name: "flaky_step", config: new StepConfig { @@ -730,7 +730,7 @@ public async Task StepAsync_FailsNoRetryStrategy_CheckpointsFail() var ex = await Assert.ThrowsAsync(() => context.StepAsync( - async (_) => { await Task.CompletedTask; throw new InvalidOperationException("permanent"); }, + async (_, _) => { await Task.CompletedTask; throw new InvalidOperationException("permanent"); }, name: "fail_step")); Assert.Equal("permanent", ex.Message); @@ -761,12 +761,12 @@ public async Task StepAsync_RetryExhausted_CheckpointsFail() var idGen = new OperationIdGenerator(); var lambdaContext = CreateLambdaContext(); var recorder = new RecordingBatcher(); - var context = new DurableContext(state, tm, idGen, "arn:test", lambdaContext, recorder.Batcher); + var context = new DurableContext(state, tm, new WorkflowCancellation(tm), idGen, "arn:test", lambdaContext, recorder.Batcher); // Attempt 3 (last one) — should fail after this var ex = await Assert.ThrowsAsync(() => context.StepAsync( - async (_) => { await Task.CompletedTask; throw new InvalidOperationException("still failing"); }, + async (_, _) => { await Task.CompletedTask; throw new InvalidOperationException("still failing"); }, name: "exhaust_step", config: new StepConfig { @@ -809,10 +809,10 @@ public async Task StepAsync_PendingWithFutureTimestamp_Suspends() var idGen = new OperationIdGenerator(); var lambdaContext = CreateLambdaContext(); var recorder = new RecordingBatcher(); - var context = new DurableContext(state, tm, idGen, "arn:test", lambdaContext, recorder.Batcher); + var context = new DurableContext(state, tm, new WorkflowCancellation(tm), idGen, "arn:test", lambdaContext, recorder.Batcher); var stepTask = context.StepAsync( - async (_) => { await Task.CompletedTask; return "should not run"; }, + async (_, _) => { await Task.CompletedTask; return "should not run"; }, name: "pending_step", config: new StepConfig { RetryStrategy = RetryStrategy.Default }); @@ -848,10 +848,10 @@ public async Task StepAsync_PendingWithPastTimestamp_ReExecutes() var tm = new TerminationManager(); var idGen = new OperationIdGenerator(); var lambdaContext = CreateLambdaContext(); - var context = new DurableContext(state, tm, idGen, "arn:test", lambdaContext); + var context = new DurableContext(state, tm, new WorkflowCancellation(tm), idGen, "arn:test", lambdaContext); var result = await context.StepAsync( - async (ctx) => + async (ctx, _) => { await Task.CompletedTask; Assert.Equal(2, ctx.AttemptNumber); @@ -886,11 +886,11 @@ public async Task StepAsync_ReadyReplay_AdvancesAttemptAndExecutes() var tm = new TerminationManager(); var idGen = new OperationIdGenerator(); var lambdaContext = CreateLambdaContext(); - var context = new DurableContext(state, tm, idGen, "arn:test", lambdaContext); + var context = new DurableContext(state, tm, new WorkflowCancellation(tm), idGen, "arn:test", lambdaContext); var executed = false; var result = await context.StepAsync( - async (ctx) => + async (ctx, _) => { executed = true; Assert.Equal(3, ctx.AttemptNumber); @@ -915,12 +915,12 @@ public async Task StepAsync_AtMostOnce_FlushesStartBeforeExecution() var idGen = new OperationIdGenerator(); var lambdaContext = CreateLambdaContext(); var recorder = new RecordingBatcher(); - var context = new DurableContext(state, tm, idGen, "arn:test", lambdaContext, recorder.Batcher); + var context = new DurableContext(state, tm, new WorkflowCancellation(tm), idGen, "arn:test", lambdaContext, recorder.Batcher); IReadOnlyList? flushedAtFuncEntry = null; var result = await context.StepAsync( - async (_) => + async (_, _) => { flushedAtFuncEntry = recorder.Flushed.Select(o => o.Action.ToString()).ToArray(); await Task.CompletedTask; @@ -960,11 +960,11 @@ public async Task StepAsync_AtMostOnce_StartedReplay_TriggersRetryHandler() var idGen = new OperationIdGenerator(); var lambdaContext = CreateLambdaContext(); var recorder = new RecordingBatcher(); - var context = new DurableContext(state, tm, idGen, "arn:test", lambdaContext, recorder.Batcher); + var context = new DurableContext(state, tm, new WorkflowCancellation(tm), idGen, "arn:test", lambdaContext, recorder.Batcher); var executed = false; var stepTask = context.StepAsync( - async (_) => { executed = true; await Task.CompletedTask; return "should not run"; }, + async (_, _) => { executed = true; await Task.CompletedTask; return "should not run"; }, name: "amo_replay", config: new StepConfig { diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.Tests/DurableFunctionTests.cs b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/DurableFunctionTests.cs index 8078b0242..af38a4549 100644 --- a/Libraries/test/Amazon.Lambda.DurableExecution.Tests/DurableFunctionTests.cs +++ b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/DurableFunctionTests.cs @@ -161,7 +161,7 @@ public async Task WrapAsync_VoidWorkflow_ReturnSucceeded() var output = await DurableFunction.WrapAsync( async (evt, ctx) => { - await ctx.StepAsync(async (_) => { await Task.CompletedTask; executed = true; }, name: "do_work"); + await ctx.StepAsync(async (_, _) => { await Task.CompletedTask; executed = true; }, name: "do_work"); }, input, CreateLambdaContext(), @@ -387,11 +387,11 @@ public async Task WrapAsync_PaginatedInitialState_HydratesAllPages() // without re-executing — if the loop missed a page, the corresponding step // would run fresh and append a different value to `observed`. observed.Add(await ctx.StepAsync( - async (_) => { await Task.CompletedTask; return "fresh"; }, name: "step1")); + async (_, _) => { await Task.CompletedTask; return "fresh"; }, name: "step1")); observed.Add(await ctx.StepAsync( - async (_) => { await Task.CompletedTask; return "fresh"; }, name: "step2")); + async (_, _) => { await Task.CompletedTask; return "fresh"; }, name: "step2")); observed.Add(await ctx.StepAsync( - async (_) => { await Task.CompletedTask; return "fresh"; }, name: "step3")); + async (_, _) => { await Task.CompletedTask; return "fresh"; }, name: "step3")); return new OrderResult { Status = "ok", OrderId = evt.OrderId }; }, input, @@ -573,7 +573,7 @@ private static AmazonServiceException MakeServiceException(string code, HttpStat private static async Task SingleStepWorkflow(OrderEvent input, IDurableContext context) { // One step succeed → forces a checkpoint flush, which the mock fails. - await context.StepAsync(async (_) => { await Task.CompletedTask; return "ok"; }, name: "s1"); + await context.StepAsync(async (_, _) => { await Task.CompletedTask; return "ok"; }, name: "s1"); return new OrderResult { Status = "done" }; } @@ -757,7 +757,7 @@ public async Task WrapAsync_ReplayDeterminism_CallbackIdStableAcrossInvocations( private static async Task MyWorkflow(OrderEvent input, IDurableContext context) { var validation = await context.StepAsync( - async (_) => { await Task.CompletedTask; return new ValidationResult { IsValid = true }; }, + async (_, _) => { await Task.CompletedTask; return new ValidationResult { IsValid = true }; }, name: "validate"); await context.WaitAsync(TimeSpan.FromSeconds(30), name: "delay"); diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.Tests/InvokeOperationTests.cs b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/InvokeOperationTests.cs index eb8b7a757..daf933cb5 100644 --- a/Libraries/test/Amazon.Lambda.DurableExecution.Tests/InvokeOperationTests.cs +++ b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/InvokeOperationTests.cs @@ -27,7 +27,7 @@ private static (DurableContext context, RecordingBatcher recorder, TerminationMa var lambdaContext = new TestLambdaContext { Serializer = new DefaultLambdaJsonSerializer() }; #pragma warning restore AWSLAMBDA001 var recorder = new RecordingBatcher(); - var context = new DurableContext(state, tm, idGen, "arn:test", lambdaContext, recorder.Batcher); + var context = new DurableContext(state, tm, new WorkflowCancellation(tm), idGen, "arn:test", lambdaContext, recorder.Batcher); return (context, recorder, tm, state); } @@ -189,7 +189,7 @@ public async Task InvokeAsync_NoSerializerRegistered_ThrowsInvalidOperationExcep var idGen = new OperationIdGenerator(); var lambdaContext = new TestLambdaContext(); // no serializer! var recorder = new RecordingBatcher(); - var context = new DurableContext(state, tm, idGen, "arn:test", lambdaContext, recorder.Batcher); + var context = new DurableContext(state, tm, new WorkflowCancellation(tm), idGen, "arn:test", lambdaContext, recorder.Batcher); await Assert.ThrowsAsync(() => context.InvokeAsync(FunctionArn, "x", name: "no_serializer")); @@ -479,16 +479,16 @@ public async Task EndToEnd_StepInvokeStep_FirstInvocation_SuspendsOnInvoke() var lambdaContext = new TestLambdaContext { Serializer = new DefaultLambdaJsonSerializer() }; #pragma warning restore AWSLAMBDA001 var batcher = new RecordingBatcher(); - var context = new DurableContext(state, tm, idGen, "arn:test", lambdaContext, batcher.Batcher); + var context = new DurableContext(state, tm, new WorkflowCancellation(tm), idGen, "arn:test", lambdaContext, batcher.Batcher); var result = await DurableExecutionHandler.RunAsync( state, tm, async () => { - await context.StepAsync(async (_) => { await Task.CompletedTask; return "validated"; }, name: "validate"); + await context.StepAsync(async (_, _) => { await Task.CompletedTask; return "validated"; }, name: "validate"); var paymentId = await context.InvokeAsync( FunctionArn, "validated", name: "process_payment"); - return await context.StepAsync(async (_) => { await Task.CompletedTask; return paymentId + "-done"; }, name: "finalize"); + return await context.StepAsync(async (_, _) => { await Task.CompletedTask; return paymentId + "-done"; }, name: "finalize"); }); Assert.Equal(InvocationStatus.Pending, result.Status); @@ -530,21 +530,21 @@ public async Task EndToEnd_StepInvokeStep_SecondInvocation_ResumesAndCompletes() #pragma warning disable AWSLAMBDA001 var lambdaContext = new TestLambdaContext { Serializer = new DefaultLambdaJsonSerializer() }; #pragma warning restore AWSLAMBDA001 - var context = new DurableContext(state, tm, idGen, "arn:test", lambdaContext); + var context = new DurableContext(state, tm, new WorkflowCancellation(tm), idGen, "arn:test", lambdaContext); var finalizeRan = false; var result = await DurableExecutionHandler.RunAsync( state, tm, async () => { - var validated = await context.StepAsync(async (_) => { await Task.CompletedTask; return "fresh-validated"; }, name: "validate"); + var validated = await context.StepAsync(async (_, _) => { await Task.CompletedTask; return "fresh-validated"; }, name: "validate"); Assert.Equal("validated", validated); // cached var paymentId = await context.InvokeAsync( FunctionArn, validated, name: "process_payment"); Assert.Equal("pmt-42", paymentId); // cached - return await context.StepAsync(async (_) => + return await context.StepAsync(async (_, _) => { finalizeRan = true; await Task.CompletedTask; diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.Tests/WaitForCallbackTests.cs b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/WaitForCallbackTests.cs index 430df41c5..3596eb856 100644 --- a/Libraries/test/Amazon.Lambda.DurableExecution.Tests/WaitForCallbackTests.cs +++ b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/WaitForCallbackTests.cs @@ -32,7 +32,7 @@ private static (DurableContext context, RecordingBatcher recorder, TerminationMa var idGen = new OperationIdGenerator(); var lambdaContext = CreateLambdaContext(); var recorder = new RecordingBatcher(); - var context = new DurableContext(state, tm, idGen, "arn:test", lambdaContext, recorder.Batcher); + var context = new DurableContext(state, tm, new WorkflowCancellation(tm), idGen, "arn:test", lambdaContext, recorder.Batcher); return (context, recorder, tm, state); } @@ -69,7 +69,7 @@ public async Task WaitForCallbackAsync_FreshExecution_RunsSubmitterAndSuspendsFo string? receivedCallbackId = null; var resultTask = context.WaitForCallbackAsync( - async (callbackId, ctx) => + async (callbackId, ctx, _) => { receivedCallbackId = callbackId; Assert.NotNull(ctx.Logger); @@ -106,7 +106,7 @@ public async Task WaitForCallbackAsync_FreshExecution_KebabSuffixedSubOpNames() WireServiceCallbackIdAllocation(recorder, state, "cb-1"); var resultTask = context.WaitForCallbackAsync( - async (_, _) => await Task.CompletedTask, + async (_, _, _) => await Task.CompletedTask, name: "approval"); await Task.WhenAny(resultTask, tm.TerminationTask); @@ -129,7 +129,7 @@ public async Task WaitForCallbackAsync_FreshExecution_NullParentName_LeavesSubOp WireServiceCallbackIdAllocation(recorder, state, "cb-1"); var resultTask = context.WaitForCallbackAsync( - async (_, _) => await Task.CompletedTask); + async (_, _, _) => await Task.CompletedTask); await Task.WhenAny(resultTask, tm.TerminationTask); await recorder.Batcher.DrainAsync(); @@ -150,7 +150,7 @@ public async Task WaitForCallbackAsync_ChildOperationIdsDeterministic() WireServiceCallbackIdAllocation(recorder, state, "cb-1"); var resultTask = context.WaitForCallbackAsync( - async (_, _) => await Task.CompletedTask, + async (_, _, _) => await Task.CompletedTask, name: "approval"); await Task.WhenAny(resultTask, tm.TerminationTask); @@ -177,7 +177,7 @@ public async Task WaitForCallbackAsync_CallbackTimeoutInheritsFromConfig() WireServiceCallbackIdAllocation(recorder, state, "cb-1"); var resultTask = context.WaitForCallbackAsync( - async (_, _) => await Task.CompletedTask, + async (_, _, _) => await Task.CompletedTask, name: "approval", config: new WaitForCallbackConfig { @@ -219,7 +219,7 @@ public async Task WaitForCallbackAsync_ReplayWithCallbackSucceeded_ReturnsResult var executed = false; var result = await context.WaitForCallbackAsync( - async (_, _) => { executed = true; await Task.CompletedTask; }, + async (_, _, _) => { executed = true; await Task.CompletedTask; }, name: "approval"); Assert.False(executed); // Replay returns cached without re-running submitter. @@ -278,7 +278,7 @@ public async Task WaitForCallbackAsync_ReplayCallbackTimedOut_ThrowsCallbackTime var ex = await Assert.ThrowsAsync(() => context.WaitForCallbackAsync( - async (_, _) => await Task.CompletedTask, + async (_, _, _) => await Task.CompletedTask, name: "approval")); Assert.Equal("callback timed out", ex.Message); @@ -335,7 +335,7 @@ public async Task WaitForCallbackAsync_ReplayCallbackFailed_ThrowsCallbackFailed var ex = await Assert.ThrowsAsync(() => context.WaitForCallbackAsync( - async (_, _) => await Task.CompletedTask, + async (_, _, _) => await Task.CompletedTask, name: "approval")); Assert.Equal("external rejected", ex.Message); @@ -374,7 +374,7 @@ public async Task WaitForCallbackAsync_SubmitterFails_ThrowsCallbackSubmitterExc var ex = await Assert.ThrowsAsync(() => context.WaitForCallbackAsync( - async (_, _) => await Task.CompletedTask, + async (_, _, _) => await Task.CompletedTask, name: "approval")); Assert.IsAssignableFrom(ex); @@ -421,7 +421,7 @@ public async Task WaitForCallbackAsync_ReplayParentContextFailedWithCallbackTime var ex = await Assert.ThrowsAsync(() => context.WaitForCallbackAsync( - async (_, _) => await Task.CompletedTask, + async (_, _, _) => await Task.CompletedTask, name: "approval")); // Concrete-type check: not just `is CallbackException` — must be the @@ -462,7 +462,7 @@ public async Task WaitForCallbackAsync_ReplayParentContextFailedWithCallbackFail var ex = await Assert.ThrowsAsync(() => context.WaitForCallbackAsync( - async (_, _) => await Task.CompletedTask, + async (_, _, _) => await Task.CompletedTask, name: "approval")); Assert.Equal(typeof(CallbackFailedException), ex.GetType()); @@ -479,7 +479,7 @@ public async Task WaitForCallbackAsync_RetryStrategyForwardedToSubmitterStep() var seenAttempts = new List(); var resultTask = context.WaitForCallbackAsync( - async (_, ctx) => + async (_, ctx, _) => { // The submitter receives an IWaitForCallbackContext (no AttemptNumber) // — but this test doesn't need to verify retry mechanics, only @@ -514,7 +514,7 @@ public async Task WaitForCallbackAsync_SubmitterContext_IsIWaitForCallbackContex Type? observedContextType = null; var resultTask = context.WaitForCallbackAsync( - async (_, ctx) => + async (_, ctx, _) => { observedContextType = ctx.GetType(); await Task.CompletedTask; diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.Tests/WaitForConditionOperationTests.cs b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/WaitForConditionOperationTests.cs index 6d355c47a..50f7557b3 100644 --- a/Libraries/test/Amazon.Lambda.DurableExecution.Tests/WaitForConditionOperationTests.cs +++ b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/WaitForConditionOperationTests.cs @@ -28,7 +28,7 @@ private static (DurableContext context, RecordingBatcher recorder, TerminationMa var idGen = new OperationIdGenerator(); var lambdaContext = CreateLambdaContext(serializer); var recorder = new RecordingBatcher(); - var context = new DurableContext(state, tm, idGen, "arn:test", lambdaContext, recorder.Batcher); + var context = new DurableContext(state, tm, new WorkflowCancellation(tm), idGen, "arn:test", lambdaContext, recorder.Batcher); return (context, recorder, tm, state); } @@ -44,7 +44,7 @@ public async Task FreshExecution_StrategyStopsImmediately_SucceedsWithFinalState // success path with no polling iterations. int checkInvocations = 0; var result = await context.WaitForConditionAsync( - check: async (state, ctx) => + check: async (state, ctx, _) => { checkInvocations++; Assert.Equal(checkInvocations, ctx.AttemptNumber); @@ -81,7 +81,7 @@ public async Task FreshExecution_StrategyContinues_EmitsRetryAndSuspends() // Strategy says continue → operation must emit RETRY and suspend. var task = context.WaitForConditionAsync( - check: async (state, _) => { await Task.CompletedTask; return state + 1; }, + check: async (state, _, _) => { await Task.CompletedTask; return state + 1; }, config: new WaitForConditionConfig { InitialState = 0, @@ -113,7 +113,7 @@ public async Task FreshExecution_UsesInitialStateOnFirstCall() int? observedInitial = null; await context.WaitForConditionAsync( - check: async (state, _) => + check: async (state, _, _) => { observedInitial ??= state; await Task.CompletedTask; @@ -136,7 +136,7 @@ public async Task FreshExecution_AttemptNumberIs1OnFirstCall() int observed = -1; await context.WaitForConditionAsync( - check: async (state, ctx) => + check: async (state, ctx, _) => { observed = ctx.AttemptNumber; await Task.CompletedTask; @@ -161,7 +161,7 @@ public async Task CheckContext_ExposesLogger() ILogger? observedLogger = null; await context.WaitForConditionAsync( - check: async (state, ctx) => + check: async (state, ctx, _) => { observedLogger = ctx.Logger; await Task.CompletedTask; @@ -199,7 +199,7 @@ public async Task Replay_Succeeded_ReturnsCachedAndSkipsCheck() var checkInvoked = false; var result = await context.WaitForConditionAsync( - check: async (_, _) => { checkInvoked = true; await Task.CompletedTask; return 0; }, + check: async (_, _, _) => { checkInvoked = true; await Task.CompletedTask; return 0; }, config: new WaitForConditionConfig { InitialState = 0, @@ -244,7 +244,7 @@ public async Task Replay_PendingTimerNotFired_ReSuspends() var checkInvoked = false; var task = context.WaitForConditionAsync( - check: async (_, _) => { checkInvoked = true; await Task.CompletedTask; return 0; }, + check: async (_, _, _) => { checkInvoked = true; await Task.CompletedTask; return 0; }, config: new WaitForConditionConfig { InitialState = 0, @@ -293,7 +293,7 @@ public async Task Replay_PendingTimerFired_ResumesWithCheckpointedState() int? observedState = null; int? observedAttempt = null; var result = await context.WaitForConditionAsync( - check: async (state, ctx) => + check: async (state, ctx, _) => { observedState = state; observedAttempt = ctx.AttemptNumber; @@ -346,7 +346,7 @@ public async Task Replay_Ready_ResumesWithCheckpointedState() int? observedState = null; int? observedAttempt = null; var result = await context.WaitForConditionAsync( - check: async (state, ctx) => + check: async (state, ctx, _) => { observedState = state; observedAttempt = ctx.AttemptNumber; @@ -392,7 +392,7 @@ public async Task Replay_Started_ResumesWithInitialState() int? observedState = null; int? observedAttempt = null; var result = await context.WaitForConditionAsync( - check: async (state, ctx) => + check: async (state, ctx, _) => { observedState = state; observedAttempt = ctx.AttemptNumber; @@ -444,7 +444,7 @@ public async Task Replay_Failed_FromCheckException_ThrowsStepException() var ex = await Assert.ThrowsAsync(() => context.WaitForConditionAsync( - check: async (_, _) => { await Task.CompletedTask; return 0; }, + check: async (_, _, _) => { await Task.CompletedTask; return 0; }, config: new WaitForConditionConfig { InitialState = 0, @@ -490,7 +490,7 @@ public async Task Replay_Failed_FromMaxAttempts_ThrowsWaitForConditionException( var ex = await Assert.ThrowsAsync(() => context.WaitForConditionAsync( - check: async (_, _) => { await Task.CompletedTask; return 0; }, + check: async (_, _, _) => { await Task.CompletedTask; return 0; }, config: new WaitForConditionConfig { InitialState = 0, @@ -513,7 +513,7 @@ public async Task Replay_Failed_FromMaxAttempts_LastState_MatchesLiveExecution() var liveEx = await Assert.ThrowsAsync(() => liveCtx.WaitForConditionAsync( - check: async (state, _) => { await Task.CompletedTask; return state + 1; }, + check: async (state, _, _) => { await Task.CompletedTask; return state + 1; }, config: new WaitForConditionConfig { InitialState = 5, @@ -555,7 +555,7 @@ public async Task Replay_Failed_FromMaxAttempts_LastState_MatchesLiveExecution() var replayEx = await Assert.ThrowsAsync(() => replayCtx.WaitForConditionAsync( - check: async (_, _) => { await Task.CompletedTask; return 0; }, + check: async (_, _, _) => { await Task.CompletedTask; return 0; }, config: new WaitForConditionConfig { InitialState = 0, @@ -601,7 +601,7 @@ public async Task Replay_Failed_FromMaxAttempts_NullPayload_LeavesLastStateNull( var ex = await Assert.ThrowsAsync(() => context.WaitForConditionAsync( - check: async (_, _) => { await Task.CompletedTask; return 0; }, + check: async (_, _, _) => { await Task.CompletedTask; return 0; }, config: new WaitForConditionConfig { InitialState = 0, @@ -625,7 +625,7 @@ public async Task MaxAttemptsExhausted_FreshExecution_ThrowsWaitForConditionExce // condition was met. Operation must throw, not SUCCEED. var ex = await Assert.ThrowsAsync(() => context.WaitForConditionAsync( - check: async (state, _) => { await Task.CompletedTask; return state + 1; }, + check: async (state, _, _) => { await Task.CompletedTask; return state + 1; }, config: new WaitForConditionConfig { InitialState = 5, @@ -658,7 +658,7 @@ public async Task MaxAttemptsExhausted_DistinguishesFromConditionMet() // The same maxAttempts=1 strategy WITH an isDone that's satisfied // should SUCCEED, not throw. var result = await context.WaitForConditionAsync( - check: async (_, _) => { await Task.CompletedTask; return 99; }, + check: async (_, _, _) => { await Task.CompletedTask; return 99; }, config: new WaitForConditionConfig { InitialState = 0, @@ -681,7 +681,7 @@ public async Task CheckThrows_CheckpointsFailAndThrows() var ex = await Assert.ThrowsAsync(() => context.WaitForConditionAsync( - check: async (_, _) => { await Task.CompletedTask; throw new InvalidOperationException("boom"); }, + check: async (_, _, _) => { await Task.CompletedTask; throw new InvalidOperationException("boom"); }, config: new WaitForConditionConfig { InitialState = 0, @@ -735,7 +735,7 @@ public async Task ReplayDeterminism_StateIsCarriedAcrossIterations() CounterState? observed = null; int? observedAttempt = null; var result = await context.WaitForConditionAsync( - check: async (state, ctx) => + check: async (state, ctx, _) => { observed = state; observedAttempt = ctx.AttemptNumber; @@ -786,7 +786,7 @@ public async Task ReplayDeterminism_RoundTripsThroughLambdaSerializer() serializer: serializer); var result = await context.WaitForConditionAsync( - check: async (_, _) => { await Task.CompletedTask; return new TestPerson { Name = "ignored", Age = 0 }; }, + check: async (_, _, _) => { await Task.CompletedTask; return new TestPerson { Name = "ignored", Age = 0 }; }, config: new WaitForConditionConfig { InitialState = new TestPerson { Name = "init", Age = 0 }, @@ -810,7 +810,7 @@ public async Task FreshExecution_FlushesStartBeforeSuspending() var (context, recorder, tm, _) = CreateContext(); var task = context.WaitForConditionAsync( - check: async (state, _) => { await Task.CompletedTask; return state + 1; }, + check: async (state, _, _) => { await Task.CompletedTask; return state + 1; }, config: new WaitForConditionConfig { InitialState = 0, @@ -852,7 +852,7 @@ public async Task ReplayUnknownStatus_ThrowsNonDeterministicException() await Assert.ThrowsAsync(() => context.WaitForConditionAsync( - check: async (_, _) => { await Task.CompletedTask; return 0; }, + check: async (_, _, _) => { await Task.CompletedTask; return 0; }, config: new WaitForConditionConfig { InitialState = 0, @@ -882,7 +882,7 @@ public async Task ReplayTypeMismatch_ThrowsNonDeterministicException() var ex = await Assert.ThrowsAsync(() => context.WaitForConditionAsync( - check: async (_, _) => { await Task.CompletedTask; return 0; }, + check: async (_, _, _) => { await Task.CompletedTask; return 0; }, config: new WaitForConditionConfig { InitialState = 0, @@ -915,7 +915,7 @@ public async Task NullConfig_ThrowsArgumentNullException() var (context, _, _, _) = CreateContext(); await Assert.ThrowsAsync(() => context.WaitForConditionAsync( - check: async (_, _) => { await Task.CompletedTask; return 0; }, + check: async (_, _, _) => { await Task.CompletedTask; return 0; }, config: null!)); } @@ -948,11 +948,12 @@ public async Task DeserializeStateOrInitial_CorruptPayload_LogsWarningAndFallsBa var recorder = new RecordingBatcher(); var logger = new RecordingLogger(); + var tm = new TerminationManager(); var op = new WaitForConditionOperation( operationId: IdAt(1), name: "poll", parentId: null, - check: async (s, _) => { await Task.CompletedTask; return s; }, + check: async (s, _, _) => { await Task.CompletedTask; return s; }, config: new WaitForConditionConfig { InitialState = 999, @@ -961,7 +962,8 @@ public async Task DeserializeStateOrInitial_CorruptPayload_LogsWarningAndFallsBa serializer: new ThrowingLambdaSerializer(), logger: logger, state: state, - termination: new TerminationManager(), + termination: tm, + workflowCancellation: new WorkflowCancellation(tm), durableExecutionArn: "arn:test", batcher: recorder.Batcher); @@ -1008,11 +1010,12 @@ public async Task ReplayFailed_CorruptLastStatePayload_LogsWarningAndLastStateNu var recorder = new RecordingBatcher(); var logger = new RecordingLogger(); + var tm = new TerminationManager(); var op = new WaitForConditionOperation( operationId: IdAt(1), name: "poll", parentId: null, - check: async (s, _) => { await Task.CompletedTask; return s; }, + check: async (s, _, _) => { await Task.CompletedTask; return s; }, config: new WaitForConditionConfig { InitialState = 0, @@ -1021,7 +1024,8 @@ public async Task ReplayFailed_CorruptLastStatePayload_LogsWarningAndLastStateNu serializer: new ThrowingLambdaSerializer(), logger: logger, state: state, - termination: new TerminationManager(), + termination: tm, + workflowCancellation: new WorkflowCancellation(tm), durableExecutionArn: "arn:test", batcher: recorder.Batcher); diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.Tests/WorkflowCancellationTests.cs b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/WorkflowCancellationTests.cs new file mode 100644 index 000000000..06fbb35d9 --- /dev/null +++ b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/WorkflowCancellationTests.cs @@ -0,0 +1,267 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +using Amazon.Lambda.DurableExecution.Internal; +using Amazon.Lambda.Serialization.SystemTextJson; +using Amazon.Lambda.TestUtilities; +using Xunit; + +namespace Amazon.Lambda.DurableExecution.Tests; + +/// +/// Cancellation-flow tests for and the +/// linked-token contract surfaced through . +/// Companion to docs/design/cancellation-design.md. +/// +public class WorkflowCancellationTests +{ + private static TestLambdaContext CreateLambdaContext() => +#pragma warning disable AWSLAMBDA001 // TestLambdaContext.Serializer is experimental. + new() { Serializer = new DefaultLambdaJsonSerializer() }; +#pragma warning restore AWSLAMBDA001 + + private sealed record Harness( + DurableContext Context, + TerminationManager Termination, + WorkflowCancellation WorkflowCancellation, + RecordingBatcher Recorder); + + private static Harness CreateHarness() + { + var state = new ExecutionState(); + var tm = new TerminationManager(); + var wfc = new WorkflowCancellation(tm); + var idGen = new OperationIdGenerator(); + var recorder = new RecordingBatcher(); + var ctx = new DurableContext(state, tm, wfc, idGen, "arn:test", CreateLambdaContext(), recorder.Batcher); + return new Harness(ctx, tm, wfc, recorder); + } + + // ── WorkflowCancellation primitive ────────────────────────────────── + + [Fact] + public void Token_NotCancelled_BeforeTermination() + { + var tm = new TerminationManager(); + using var wfc = new WorkflowCancellation(tm); + + Assert.False(wfc.Token.IsCancellationRequested); + } + + [Fact] + public async Task Token_CancelledWhenTerminationFires() + { + var tm = new TerminationManager(); + using var wfc = new WorkflowCancellation(tm); + var observed = new TaskCompletionSource(); + wfc.Token.Register(() => observed.TrySetResult()); + + tm.Terminate(TerminationReason.WaitScheduled); + + await observed.Task.WaitAsync(TimeSpan.FromSeconds(2)); + Assert.True(wfc.Token.IsCancellationRequested); + } + + [Fact] + public void Dispose_AfterTermination_DoesNotThrow() + { + var tm = new TerminationManager(); + var wfc = new WorkflowCancellation(tm); + tm.Terminate(TerminationReason.WaitScheduled); + wfc.Dispose(); + } + + // ── StepAsync token plumbing ──────────────────────────────────────── + + [Fact] + public async Task StepAsync_CallerToken_PropagatesIntoFunc() + { + var harness = CreateHarness(); + using var caller = new CancellationTokenSource(); + CancellationToken seen = default; + + caller.Cancel(); + + await Assert.ThrowsAsync(() => + harness.Context.StepAsync(async (_, ct) => + { + seen = ct; + await Task.CompletedTask; + return 0; + }, name: "step", cancellationToken: caller.Token)); + } + + [Fact] + public async Task StepAsync_LinkedToken_FiresWhenWorkflowCancels() + { + var harness = CreateHarness(); + var enteredFunc = new TaskCompletionSource(); + CancellationToken stepToken = default; + + var task = harness.Context.StepAsync(async (_, ct) => + { + stepToken = ct; + enteredFunc.TrySetResult(); + await Task.Delay(Timeout.Infinite, ct); + return 0; + }, name: "step"); + + await enteredFunc.Task.WaitAsync(TimeSpan.FromSeconds(2)); + harness.Termination.Terminate(TerminationReason.WaitScheduled); + + await Assert.ThrowsAsync(() => task); + Assert.True(stepToken.IsCancellationRequested); + } + + [Fact] + public async Task StepAsync_UserThrownOCE_IsTreatedAsFailureAndRetried() + { + // A user-thrown OperationCanceledException unrelated to our linked token + // falls through the cancellation when-clause and is funneled through + // the retry strategy like any other exception. + var harness = CreateHarness(); + var attempts = 0; + + var ex = await Assert.ThrowsAsync(() => + harness.Context.StepAsync(async (_, _) => + { + attempts++; + await Task.CompletedTask; + throw new OperationCanceledException("user-thrown, unrelated to SDK token"); + }, name: "step")); + + Assert.Equal(1, attempts); + Assert.Equal(typeof(OperationCanceledException).FullName, ex.ErrorType); + Assert.Contains(harness.Recorder.Flushed, + u => u.Action == OperationAction.FAIL && u.Type == OperationTypes.Step); + } + + [Fact] + public async Task StepAsync_CancellationViaLinkedToken_DoesNotCheckpointFailOrSucceed() + { + var harness = CreateHarness(); + var entered = new TaskCompletionSource(); + + var task = harness.Context.StepAsync(async (_, ct) => + { + entered.TrySetResult(); + await Task.Delay(Timeout.Infinite, ct); + return 0; + }, name: "step"); + + await entered.Task.WaitAsync(TimeSpan.FromSeconds(2)); + harness.Termination.Terminate(TerminationReason.WaitScheduled); + + await Assert.ThrowsAsync(() => task); + + // No FAIL/SUCCEED checkpoint emitted (only any START fire-and-forget that + // may have flushed under AtLeastOncePerRetry semantics). + Assert.DoesNotContain(harness.Recorder.Flushed, u => u.Action == OperationAction.FAIL); + Assert.DoesNotContain(harness.Recorder.Flushed, u => u.Action == OperationAction.SUCCEED); + } + + // ── Child context propagation ─────────────────────────────────────── + + [Fact] + public async Task RunInChildContextAsync_LinkedToken_CancelsInnerStep() + { + var harness = CreateHarness(); + var entered = new TaskCompletionSource(); + CancellationToken childToken = default; + CancellationToken stepToken = default; + + var task = harness.Context.RunInChildContextAsync(async (childCtx, ct) => + { + childToken = ct; + return await childCtx.StepAsync(async (_, stepCt) => + { + stepToken = stepCt; + entered.TrySetResult(); + await Task.Delay(Timeout.Infinite, stepCt); + return 0; + }, name: "inner"); + }, name: "outer"); + + await entered.Task.WaitAsync(TimeSpan.FromSeconds(2)); + harness.Termination.Terminate(TerminationReason.WaitScheduled); + + await Assert.ThrowsAsync(() => task); + Assert.True(childToken.IsCancellationRequested); + Assert.True(stepToken.IsCancellationRequested); + } + + // ── WaitForConditionAsync ─────────────────────────────────────────── + + [Fact] + public async Task WaitForConditionAsync_CheckReceivesLinkedToken() + { + var harness = CreateHarness(); + var entered = new TaskCompletionSource(); + CancellationToken seen = default; + + var task = harness.Context.WaitForConditionAsync(async (state, _, ct) => + { + seen = ct; + entered.TrySetResult(); + await Task.Delay(Timeout.Infinite, ct); + return state; + }, + new WaitForConditionConfig + { + InitialState = 0, + WaitStrategy = WaitStrategy.Fixed(TimeSpan.FromSeconds(1)), + }, + name: "poll"); + + await entered.Task.WaitAsync(TimeSpan.FromSeconds(2)); + harness.Termination.Terminate(TerminationReason.WaitScheduled); + + await Assert.ThrowsAsync(() => task); + Assert.True(seen.IsCancellationRequested); + } + + // ── Replay short-circuit ──────────────────────────────────────────── + + [Fact] + public async Task StepAsync_Replay_DoesNotInvokeFunc_EvenWithCancelledToken() + { + // Cached SUCCESS replay must short-circuit without calling the user + // Func, regardless of token state — replay determinism is structural. + var operationId = OperationIdGenerator.HashOperationId("1"); + var state = new ExecutionState(); + state.LoadFromCheckpoint(new InitialExecutionState + { + Operations = new List + { + new() + { + Id = operationId, + Type = OperationTypes.Step, + Status = OperationStatuses.Succeeded, + Name = "step", + StepDetails = new StepDetails { Result = "42" } + } + } + }); + + var tm = new TerminationManager(); + var wfc = new WorkflowCancellation(tm); + tm.Terminate(TerminationReason.WaitScheduled); // cancel before invocation + await Task.Yield(); + Assert.True(wfc.Token.IsCancellationRequested); + + var idGen = new OperationIdGenerator(); + var ctx = new DurableContext(state, tm, wfc, idGen, "arn:test", CreateLambdaContext()); + var invoked = false; + + var result = await ctx.StepAsync(async (_, _) => + { + invoked = true; + await Task.CompletedTask; + return 99; + }, name: "step"); + + Assert.False(invoked); + Assert.Equal(42, result); + } +} From a92f8ef45d0ffac2ca3a80a1718249e8869dcd78 Mon Sep 17 00:00:00 2001 From: Garrett Beatty Date: Wed, 10 Jun 2026 14:33:14 -0400 Subject: [PATCH 2/6] cancellation token --- .../Program.cs | 2 +- .../TestFunctions/AtMostOnceCrashFunction/Function.cs | 2 +- .../TestFunctions/CallbackFailedFunction/Function.cs | 2 +- .../ChildContextFailsFunction/Function.cs | 4 ++-- .../TestFunctions/ChildContextFunction/Function.cs | 6 +++--- .../ChildContextRetryFailsFunction/Function.cs | 4 ++-- .../CreateCallbackHappyPathFunction/Function.cs | 2 +- .../InvokeChildTenantFunction/Function.cs | 2 +- .../InvokeFailureChildFunction/Function.cs | 2 +- .../InvokeHappyPathChildFunction/Function.cs | 2 +- .../InvokeReplayDeterminismChildFunction/Function.cs | 2 +- .../InvokeReplayDeterminismParentFunction/Function.cs | 4 ++-- .../TestFunctions/LongRetryChainFunction/Function.cs | 2 +- .../TestFunctions/LongerWaitFunction/Function.cs | 4 ++-- .../TestFunctions/MultipleStepsFunction/Function.cs | 10 +++++----- .../ReplayAwareLoggerFunction/Function.cs | 4 ++-- .../ReplayDeterminismFunction/Function.cs | 4 ++-- .../TestFunctions/RetryExhaustionFunction/Function.cs | 2 +- .../TestFunctions/RetryFunction/Function.cs | 2 +- .../TestFunctions/StepFailsFunction/Function.cs | 2 +- .../TestFunctions/StepWaitStepFunction/Function.cs | 4 ++-- .../WaitForCallbackHappyPathFunction/Function.cs | 2 +- .../WaitForCallbackSubmitterFailsFunction/Function.cs | 2 +- .../WaitForConditionExponentialFunction/Function.cs | 2 +- .../WaitForConditionHappyPathFunction/Function.cs | 2 +- .../WaitForConditionMaxAttemptsFunction/Function.cs | 2 +- .../Function.cs | 6 +++--- .../Function.cs | 2 +- 28 files changed, 43 insertions(+), 43 deletions(-) diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.AotPublishTest/Program.cs b/Libraries/test/Amazon.Lambda.DurableExecution.AotPublishTest/Program.cs index 41404ca96..10d9dd3ca 100644 --- a/Libraries/test/Amazon.Lambda.DurableExecution.AotPublishTest/Program.cs +++ b/Libraries/test/Amazon.Lambda.DurableExecution.AotPublishTest/Program.cs @@ -35,7 +35,7 @@ public static Task HandlerAsync( private static async Task WorkflowAsync(OrderEvent input, IDurableContext context) { var validation = await context.StepAsync( - async (_) => + async (_, _) => { await Task.CompletedTask; return new ValidationResult { IsValid = true }; diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/AtMostOnceCrashFunction/Function.cs b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/AtMostOnceCrashFunction/Function.cs index 443d05b8a..bdc9a9d55 100644 --- a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/AtMostOnceCrashFunction/Function.cs +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/AtMostOnceCrashFunction/Function.cs @@ -39,7 +39,7 @@ public Task Handler( private async Task Workflow(TestEvent input, IDurableContext context) { var result = await context.StepAsync( - async (ctx) => + async (ctx, _) => { await Task.CompletedTask; if (ctx.AttemptNumber == 1) diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/CallbackFailedFunction/Function.cs b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/CallbackFailedFunction/Function.cs index 721302ed3..57876259d 100644 --- a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/CallbackFailedFunction/Function.cs +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/CallbackFailedFunction/Function.cs @@ -40,7 +40,7 @@ private async Task Workflow(TestEvent input, IDurableContext context) var cb = await context.CreateCallbackAsync(name: "approve"); // Wrap the hand-off in a step so replays don't re-invoke the rejecter. - await context.StepAsync(async _ => + await context.StepAsync(async (_, _) => { var payload = $$"""{"callbackId":"{{cb.CallbackId}}","orderId":"{{input.OrderId}}"}"""; await LambdaClient.InvokeAsync(new InvokeRequest diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/ChildContextFailsFunction/Function.cs b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/ChildContextFailsFunction/Function.cs index ae3134f24..d62207f74 100644 --- a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/ChildContextFailsFunction/Function.cs +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/ChildContextFailsFunction/Function.cs @@ -29,10 +29,10 @@ private async Task Workflow(TestEvent input, IDurableContext context // service must record a ContextFailed event with the error details and // mark the workflow FAILED. await context.RunInChildContextAsync( - async (childCtx) => + async (childCtx, _) => { await childCtx.StepAsync( - async (_) => { await Task.CompletedTask; return $"prepared-{input.OrderId}"; }, + async (_, _) => { await Task.CompletedTask; return $"prepared-{input.OrderId}"; }, name: "prepare"); throw new InvalidOperationException("intentional child context failure for integration test"); diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/ChildContextFunction/Function.cs b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/ChildContextFunction/Function.cs index 507f1df0f..e62cca8c0 100644 --- a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/ChildContextFunction/Function.cs +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/ChildContextFunction/Function.cs @@ -29,16 +29,16 @@ private async Task Workflow(TestEvent input, IDurableContext context // return value is checkpointed at the parent level as a CONTEXT // SUCCEED record, so on replay we'd see it returned from cache. var phaseResult = await context.RunInChildContextAsync( - async (childCtx) => + async (childCtx, _) => { var validated = await childCtx.StepAsync( - async (_) => { await Task.CompletedTask; return $"validated-{input.OrderId}"; }, + async (_, _) => { await Task.CompletedTask; return $"validated-{input.OrderId}"; }, name: "validate"); await childCtx.WaitAsync(TimeSpan.FromSeconds(2), name: "short_wait"); var processed = await childCtx.StepAsync( - async (_) => { await Task.CompletedTask; return $"processed-{validated}"; }, + async (_, _) => { await Task.CompletedTask; return $"processed-{validated}"; }, name: "process"); return processed; diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/ChildContextRetryFailsFunction/Function.cs b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/ChildContextRetryFailsFunction/Function.cs index 521a7fa50..7c7dd4974 100644 --- a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/ChildContextRetryFailsFunction/Function.cs +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/ChildContextRetryFailsFunction/Function.cs @@ -30,10 +30,10 @@ private async Task Workflow(TestEvent input, IDurableContext context // close as ContextFailed when retries are exhausted — proving the // child is a single retry/error boundary. await context.RunInChildContextAsync( - async (childCtx) => + async (childCtx, _) => { return await childCtx.StepAsync( - async (ctx) => + async (ctx, _) => { await Task.CompletedTask; throw new InvalidOperationException( diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/CreateCallbackHappyPathFunction/Function.cs b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/CreateCallbackHappyPathFunction/Function.cs index e9712e6ea..87b92f32e 100644 --- a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/CreateCallbackHappyPathFunction/Function.cs +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/CreateCallbackHappyPathFunction/Function.cs @@ -42,7 +42,7 @@ private async Task Workflow(TestEvent input, IDurableContext context) var cb = await context.CreateCallbackAsync(name: "approve"); // Wrap the hand-off in a step so replays don't re-invoke the approver. - await context.StepAsync(async _ => + await context.StepAsync(async (_, _) => { var payload = $$"""{"callbackId":"{{cb.CallbackId}}","orderId":"integ-test"}"""; await LambdaClient.InvokeAsync(new InvokeRequest diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/InvokeChildTenantFunction/Function.cs b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/InvokeChildTenantFunction/Function.cs index 240565384..922e0b946 100644 --- a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/InvokeChildTenantFunction/Function.cs +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/InvokeChildTenantFunction/Function.cs @@ -26,7 +26,7 @@ public Task Handler( private async Task Workflow(int input, IDurableContext context) { var formatted = await context.StepAsync( - async (_) => { await Task.CompletedTask; return $"tenant-aware-{input}"; }, + async (_, _) => { await Task.CompletedTask; return $"tenant-aware-{input}"; }, name: "tenant_step"); return formatted; } diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/InvokeFailureChildFunction/Function.cs b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/InvokeFailureChildFunction/Function.cs index 7e96ff0c8..291afbf2a 100644 --- a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/InvokeFailureChildFunction/Function.cs +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/InvokeFailureChildFunction/Function.cs @@ -30,7 +30,7 @@ private async Task Workflow(int input, IDurableContext context) // FAILED chained invocation and raises InvokeFailedException with the // step's error type (System.InvalidOperationException) attached. await context.StepAsync( - async (_) => + async (_, _) => { await Task.CompletedTask; throw new InvalidOperationException("intentional child failure"); diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/InvokeHappyPathChildFunction/Function.cs b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/InvokeHappyPathChildFunction/Function.cs index 898021cdd..bc3c0fc39 100644 --- a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/InvokeHappyPathChildFunction/Function.cs +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/InvokeHappyPathChildFunction/Function.cs @@ -26,7 +26,7 @@ public Task Handler( private async Task Workflow(int input, IDurableContext context) { var prefixed = await context.StepAsync( - async (_) => { await Task.CompletedTask; return $"got-{input}"; }, + async (_, _) => { await Task.CompletedTask; return $"got-{input}"; }, name: "format"); return prefixed; } diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/InvokeReplayDeterminismChildFunction/Function.cs b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/InvokeReplayDeterminismChildFunction/Function.cs index 5115101e1..7d5e8b70c 100644 --- a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/InvokeReplayDeterminismChildFunction/Function.cs +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/InvokeReplayDeterminismChildFunction/Function.cs @@ -26,7 +26,7 @@ public Task Handler( private async Task Workflow(string input, IDurableContext context) { var echoed = await context.StepAsync( - async (_) => { await Task.CompletedTask; return $"echoed:{input}"; }, + async (_, _) => { await Task.CompletedTask; return $"echoed:{input}"; }, name: "child_echo"); return echoed; } diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/InvokeReplayDeterminismParentFunction/Function.cs b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/InvokeReplayDeterminismParentFunction/Function.cs index b00be9c95..4ba7dfcdc 100644 --- a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/InvokeReplayDeterminismParentFunction/Function.cs +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/InvokeReplayDeterminismParentFunction/Function.cs @@ -32,7 +32,7 @@ private async Task Workflow(TestEvent input, IDurableContext context // checkpointed value — proves the SDK's deterministic operation IDs // line up with the service's view of the state. var generatedId = await context.StepAsync( - async (_) => { await Task.CompletedTask; return Guid.NewGuid().ToString(); }, + async (_, _) => { await Task.CompletedTask; return Guid.NewGuid().ToString(); }, name: "before_invoke"); // The chained invoke forces a suspend/resume cycle. After the resume, @@ -44,7 +44,7 @@ private async Task Workflow(TestEvent input, IDurableContext context name: "echo_invoke"); var afterInvoke = await context.StepAsync( - async (_) => { await Task.CompletedTask; return $"final:{invokeResult}"; }, + async (_, _) => { await Task.CompletedTask; return $"final:{invokeResult}"; }, name: "after_invoke"); return new TestResult { Status = "completed", Data = afterInvoke }; diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/LongRetryChainFunction/Function.cs b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/LongRetryChainFunction/Function.cs index 7d3c0f0e1..bdbf811df 100644 --- a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/LongRetryChainFunction/Function.cs +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/LongRetryChainFunction/Function.cs @@ -33,7 +33,7 @@ public Task Handler( private async Task Workflow(TestEvent input, IDurableContext context) { var result = await context.StepAsync( - async (ctx) => + async (ctx, _) => { await Task.CompletedTask; if (ctx.AttemptNumber < 6) diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/LongerWaitFunction/Function.cs b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/LongerWaitFunction/Function.cs index 401066c0e..7d241a02f 100644 --- a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/LongerWaitFunction/Function.cs +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/LongerWaitFunction/Function.cs @@ -26,13 +26,13 @@ public Task Handler( private async Task Workflow(TestEvent input, IDurableContext context) { var step1 = await context.StepAsync( - async (_) => { await Task.CompletedTask; return $"started-{input.OrderId}"; }, + async (_, _) => { await Task.CompletedTask; return $"started-{input.OrderId}"; }, name: "before_wait"); await context.WaitAsync(TimeSpan.FromSeconds(15), name: "long_wait"); var step2 = await context.StepAsync( - async (_) => { await Task.CompletedTask; return $"after_wait-{step1}"; }, + async (_, _) => { await Task.CompletedTask; return $"after_wait-{step1}"; }, name: "after_wait"); return new TestResult { Status = "completed", Data = step2 }; diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/MultipleStepsFunction/Function.cs b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/MultipleStepsFunction/Function.cs index cdf5992b6..986126a3f 100644 --- a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/MultipleStepsFunction/Function.cs +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/MultipleStepsFunction/Function.cs @@ -26,23 +26,23 @@ public Task Handler( private async Task Workflow(TestEvent input, IDurableContext context) { var step1 = await context.StepAsync( - async (_) => { await Task.CompletedTask; return $"a-{input.OrderId}"; }, + async (_, _) => { await Task.CompletedTask; return $"a-{input.OrderId}"; }, name: "step_1"); var step2 = await context.StepAsync( - async (_) => { await Task.CompletedTask; return $"{step1}-b"; }, + async (_, _) => { await Task.CompletedTask; return $"{step1}-b"; }, name: "step_2"); var step3 = await context.StepAsync( - async (_) => { await Task.CompletedTask; return $"{step2}-c"; }, + async (_, _) => { await Task.CompletedTask; return $"{step2}-c"; }, name: "step_3"); var step4 = await context.StepAsync( - async (_) => { await Task.CompletedTask; return $"{step3}-d"; }, + async (_, _) => { await Task.CompletedTask; return $"{step3}-d"; }, name: "step_4"); var step5 = await context.StepAsync( - async (_) => { await Task.CompletedTask; return $"{step4}-e"; }, + async (_, _) => { await Task.CompletedTask; return $"{step4}-e"; }, name: "step_5"); return new TestResult { Status = "completed", Data = step5 }; diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/ReplayAwareLoggerFunction/Function.cs b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/ReplayAwareLoggerFunction/Function.cs index dbbcc24a9..55b4a5be9 100644 --- a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/ReplayAwareLoggerFunction/Function.cs +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/ReplayAwareLoggerFunction/Function.cs @@ -36,7 +36,7 @@ private async Task Workflow(TestEvent input, IDurableContext context Console.WriteLine($"LOG_REPLAY_CONTROL workflow_start order={input.OrderId}"); var step1 = await context.StepAsync( - async (_) => + async (_, _) => { // Emitted inside the step's BeginScope, so the line carries // both execution-level scope (durableExecutionArn, awsRequestId) @@ -57,7 +57,7 @@ private async Task Workflow(TestEvent input, IDurableContext context // Step 2 runs fresh on invocation 2 — its EnterExecutionMode flips the // logger from suppress to passthrough. The next LogInformation lands. var step2 = await context.StepAsync( - async (_) => + async (_, _) => { await Task.CompletedTask; return $"processed-{step1}"; diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/ReplayDeterminismFunction/Function.cs b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/ReplayDeterminismFunction/Function.cs index 22f919900..688a8227c 100644 --- a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/ReplayDeterminismFunction/Function.cs +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/ReplayDeterminismFunction/Function.cs @@ -27,7 +27,7 @@ private async Task Workflow(TestEvent input, IDurableContext context { // Step 1 generates a fresh GUID. On replay, this MUST return the cached value. var generatedId = await context.StepAsync( - async (_) => { await Task.CompletedTask; return Guid.NewGuid().ToString(); }, + async (_, _) => { await Task.CompletedTask; return Guid.NewGuid().ToString(); }, name: "generate_id"); // Force a suspend/resume cycle to trigger replay @@ -35,7 +35,7 @@ private async Task Workflow(TestEvent input, IDurableContext context // Step 2 echoes the GUID. After replay, it should see the SAME GUID from step 1. var echoed = await context.StepAsync( - async (_) => { await Task.CompletedTask; return $"echo:{generatedId}"; }, + async (_, _) => { await Task.CompletedTask; return $"echo:{generatedId}"; }, name: "echo_id"); return new TestResult { Status = "completed", Data = echoed }; diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/RetryExhaustionFunction/Function.cs b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/RetryExhaustionFunction/Function.cs index 3e78ffd9d..97602186e 100644 --- a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/RetryExhaustionFunction/Function.cs +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/RetryExhaustionFunction/Function.cs @@ -26,7 +26,7 @@ public Task Handler( private async Task Workflow(TestEvent input, IDurableContext context) { var result = await context.StepAsync( - async (ctx) => + async (ctx, _) => { await Task.CompletedTask; throw new InvalidOperationException($"always-fails attempt {ctx.AttemptNumber}"); diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/RetryFunction/Function.cs b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/RetryFunction/Function.cs index 800dc075f..5f81ca7dd 100644 --- a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/RetryFunction/Function.cs +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/RetryFunction/Function.cs @@ -26,7 +26,7 @@ public Task Handler( private async Task Workflow(TestEvent input, IDurableContext context) { var result = await context.StepAsync( - async (ctx) => + async (ctx, _) => { await Task.CompletedTask; if (ctx.AttemptNumber < 3) diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/StepFailsFunction/Function.cs b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/StepFailsFunction/Function.cs index de0246a50..293b83424 100644 --- a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/StepFailsFunction/Function.cs +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/StepFailsFunction/Function.cs @@ -26,7 +26,7 @@ public Task Handler( private async Task Workflow(TestEvent input, IDurableContext context) { await context.StepAsync( - async (_) => + async (_, _) => { await Task.CompletedTask; throw new InvalidOperationException("intentional failure for integration test"); diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/StepWaitStepFunction/Function.cs b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/StepWaitStepFunction/Function.cs index 97f7edd51..7de143800 100644 --- a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/StepWaitStepFunction/Function.cs +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/StepWaitStepFunction/Function.cs @@ -26,13 +26,13 @@ public Task Handler( private async Task Workflow(TestEvent input, IDurableContext context) { var step1 = await context.StepAsync( - async (_) => { await Task.CompletedTask; return $"validated-{input.OrderId}"; }, + async (_, _) => { await Task.CompletedTask; return $"validated-{input.OrderId}"; }, name: "validate"); await context.WaitAsync(TimeSpan.FromSeconds(3), name: "short_wait"); var step2 = await context.StepAsync( - async (_) => { await Task.CompletedTask; return $"processed-{step1}"; }, + async (_, _) => { await Task.CompletedTask; return $"processed-{step1}"; }, name: "process"); return new TestResult { Status = "completed", Data = step2 }; diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/WaitForCallbackHappyPathFunction/Function.cs b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/WaitForCallbackHappyPathFunction/Function.cs index 129344d25..76e8ac3dd 100644 --- a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/WaitForCallbackHappyPathFunction/Function.cs +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/WaitForCallbackHappyPathFunction/Function.cs @@ -41,7 +41,7 @@ private async Task Workflow(TestEvent input, IDurableContext context) ?? throw new InvalidOperationException("EXTERNAL_FUNCTION_NAME env var not set"); var result = await context.WaitForCallbackAsync( - submitter: async (callbackId, cbCtx) => + submitter: async (callbackId, cbCtx, _) => { var payload = $$"""{"callbackId":"{{callbackId}}","orderId":"{{input.OrderId}}"}"""; await LambdaClient.InvokeAsync(new InvokeRequest diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/WaitForCallbackSubmitterFailsFunction/Function.cs b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/WaitForCallbackSubmitterFailsFunction/Function.cs index 19b60d567..b9851d5ea 100644 --- a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/WaitForCallbackSubmitterFailsFunction/Function.cs +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/WaitForCallbackSubmitterFailsFunction/Function.cs @@ -30,7 +30,7 @@ private async Task Workflow(TestEvent input, IDurableContext context) // failure as CallbackSubmitterException. The workflow does not catch // it, so the durable execution surfaces FAILED with that exception. var result = await context.WaitForCallbackAsync( - submitter: async (callbackId, cbCtx) => + submitter: async (callbackId, cbCtx, _) => { await Task.CompletedTask; throw new InvalidOperationException("submitter intentional failure"); diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/WaitForConditionExponentialFunction/Function.cs b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/WaitForConditionExponentialFunction/Function.cs index d73161e60..f3aad3f52 100644 --- a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/WaitForConditionExponentialFunction/Function.cs +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/WaitForConditionExponentialFunction/Function.cs @@ -27,7 +27,7 @@ private async Task Workflow(TestEvent input, IDurableContext context // backoffRate=1.5, maxDelay=4s, no jitter: delays are 1s, 1.5s // (which the SDK ceilings to 2s due to 1s timer granularity). var finalState = await context.WaitForConditionAsync( - check: async (state, ctx) => + check: async (state, ctx, _) => { await Task.CompletedTask; var done = ctx.AttemptNumber >= 3; diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/WaitForConditionHappyPathFunction/Function.cs b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/WaitForConditionHappyPathFunction/Function.cs index 086eb6bba..00d68b4c3 100644 --- a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/WaitForConditionHappyPathFunction/Function.cs +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/WaitForConditionHappyPathFunction/Function.cs @@ -26,7 +26,7 @@ private async Task Workflow(TestEvent input, IDurableContext context // Each poll iteration is a separate Lambda invocation; the state is // carried across iterations via the RETRY checkpoint payload. var finalState = await context.WaitForConditionAsync( - check: async (state, ctx) => + check: async (state, ctx, _) => { await Task.CompletedTask; return new State(state.Counter + 1, ctx.AttemptNumber); diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/WaitForConditionMaxAttemptsFunction/Function.cs b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/WaitForConditionMaxAttemptsFunction/Function.cs index 8f631fe86..8bdda540a 100644 --- a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/WaitForConditionMaxAttemptsFunction/Function.cs +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/WaitForConditionMaxAttemptsFunction/Function.cs @@ -30,7 +30,7 @@ private async Task Workflow(TestEvent input, IDurableContext context try { await context.WaitForConditionAsync( - check: async (state, _) => + check: async (state, _, _) => { await Task.CompletedTask; return state + 1; diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/WaitForConditionReplayDeterminismFunction/Function.cs b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/WaitForConditionReplayDeterminismFunction/Function.cs index 6300bb6fe..940b75ff1 100644 --- a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/WaitForConditionReplayDeterminismFunction/Function.cs +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/WaitForConditionReplayDeterminismFunction/Function.cs @@ -25,14 +25,14 @@ private async Task Workflow(TestEvent input, IDurableContext context // Step 1: capture a fresh value. On replay this MUST return the // checkpointed value rather than re-executing. var generatedId = await context.StepAsync( - async (_) => { await Task.CompletedTask; return Guid.NewGuid().ToString(); }, + async (_, _) => { await Task.CompletedTask; return Guid.NewGuid().ToString(); }, name: "before_poll"); // Wait-for-condition with 3 polls. Each poll iteration is a separate // invocation, and the operation's deterministic ID + RETRY-payload // state must round-trip across re-invocations. var pollResult = await context.WaitForConditionAsync( - check: async (state, ctx) => + check: async (state, ctx, _) => { await Task.CompletedTask; return new Counter(state.Count + 1); @@ -50,7 +50,7 @@ private async Task Workflow(TestEvent input, IDurableContext context // Step 2: echo the generated ID. After replay, this should see the // SAME GUID from step 1 — proves replay returned the cached value. var echoed = await context.StepAsync( - async (_) => { await Task.CompletedTask; return $"echo:{generatedId}:{pollResult.Count}"; }, + async (_, _) => { await Task.CompletedTask; return $"echo:{generatedId}:{pollResult.Count}"; }, name: "after_poll"); return new TestResult { Status = "completed", Data = echoed }; diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/WaitForConditionUserCheckThrowsFunction/Function.cs b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/WaitForConditionUserCheckThrowsFunction/Function.cs index 404114dc4..3e18594ab 100644 --- a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/WaitForConditionUserCheckThrowsFunction/Function.cs +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/TestFunctions/WaitForConditionUserCheckThrowsFunction/Function.cs @@ -31,7 +31,7 @@ private async Task Workflow(TestEvent input, IDurableContext context try { await context.WaitForConditionAsync( - check: async (state, ctx) => + check: async (state, ctx, _) => { await Task.CompletedTask; if (ctx.AttemptNumber == 2) From d288f96c7f20627cddd9637dc496e74b7254d306 Mon Sep 17 00:00:00 2001 From: Garrett Beatty Date: Wed, 10 Jun 2026 14:36:24 -0400 Subject: [PATCH 3/6] cancellation token --- .../docs/core/cancellation.md | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 Libraries/src/Amazon.Lambda.DurableExecution/docs/core/cancellation.md diff --git a/Libraries/src/Amazon.Lambda.DurableExecution/docs/core/cancellation.md b/Libraries/src/Amazon.Lambda.DurableExecution/docs/core/cancellation.md new file mode 100644 index 000000000..1abc75858 --- /dev/null +++ b/Libraries/src/Amazon.Lambda.DurableExecution/docs/core/cancellation.md @@ -0,0 +1,34 @@ +# Cancellation + +Every user `Func` accepted by `IDurableContext` (`StepAsync`, `RunInChildContextAsync`, `WaitForCallbackAsync`, `WaitForConditionAsync`) receives a `CancellationToken` parameter. Pass it to cancellation-aware APIs inside the body so the workflow can tear down cleanly. + +## What the token observes + +The token is a linked source combining: + +1. The `CancellationToken` you passed to the `IDurableContext` method (so the caller's cancel intent reaches the body). +2. An SDK-owned workflow-shutdown signal that fires when the workflow is being torn down (a sibling operation suspended, a checkpoint failed, or a future parallel branch aborted). + +```csharp +var user = await ctx.StepAsync( + async (_, ct) => await httpClient.GetAsync(url, ct), + name: "fetch"); +``` + +When either trigger fires, the token transitions to `IsCancellationRequested = true` and `await`s on cancellation-aware APIs unwind via `OperationCanceledException`. + +## Semantics + +- **`OperationCanceledException` thrown out of a step body via the token** (i.e. `linked.IsCancellationRequested` is true) is treated as cancellation: no FAIL checkpoint is written, no retry is consulted. The exception propagates up. +- **`OperationCanceledException` thrown by user code for unrelated reasons** (token never fired) is treated as a normal step failure: FAIL checkpoint, retry per the configured `RetryStrategy`. +- **The SDK's own writes (checkpoints, batcher flush, the runtime API response)** never observe the workflow-shutdown signal. Successful work is never lost to teardown. + +## Guidance + +- **Do** pass `ct` into every cancellation-aware call inside the step body (`HttpClient.SendAsync(ct)`, `Task.Delay(ct)`, AWS SDK calls). This is what makes caller-cancel and shutdown-cancel actually unwind. +- **Don't** branch workflow logic on `IsCancellationRequested`. It is a runtime concern, not a workflow concern; branching on it makes the workflow non-deterministic across replays. +- **Don't** `catch (OperationCanceledException)` and continue. Either don't catch, or catch and rethrow. + +## Replay + +Cached operations short-circuit before the user `Func` is invoked. A `SUCCESS` checkpoint replays its serialized result; the token is never built or observed. Replay determinism is structural — cancellation cannot affect it. From a3fe3a512861209d29b868162172ef0ec204f284 Mon Sep 17 00:00:00 2001 From: Garrett Beatty Date: Wed, 10 Jun 2026 15:02:01 -0400 Subject: [PATCH 4/6] copilot comments --- .../DurableContext.cs | 22 +- .../design/annotations-integration-plan.md | 378 ++++++++++++++++++ .../docs/design/cancellation-design.md | 341 ++++++++++++++++ .../WorkflowCancellationTests.cs | 26 +- 4 files changed, 751 insertions(+), 16 deletions(-) create mode 100644 Libraries/src/Amazon.Lambda.DurableExecution/docs/design/annotations-integration-plan.md create mode 100644 Libraries/src/Amazon.Lambda.DurableExecution/docs/design/cancellation-design.md diff --git a/Libraries/src/Amazon.Lambda.DurableExecution/DurableContext.cs b/Libraries/src/Amazon.Lambda.DurableExecution/DurableContext.cs index 2a2ec3bb4..1064e6965 100644 --- a/Libraries/src/Amazon.Lambda.DurableExecution/DurableContext.cs +++ b/Libraries/src/Amazon.Lambda.DurableExecution/DurableContext.cs @@ -242,16 +242,24 @@ private Task RunWaitForCallback( // Delegate to RunInChildContextAsync; the inner CreateCallbackAsync and // StepAsync calls each pull the registered ILambdaSerializer from // ILambdaContext.Serializer, so AOT and reflection-based scenarios share - // the same code path. The token threaded into childCtx and the inner - // submitter step is the linked workflow+caller token forwarded by the - // child context machinery. + // the same code path. + // + // Pass the OUTER cancellationToken (not childCtx's linked token) into the + // inner operations. Each inner operation will re-link the caller's token + // with the workflow-shutdown CTS itself when it invokes its user Func, so + // the submitter still observes both signals. Threading the already-linked + // childToken through here would propagate the workflow-shutdown signal + // into the inner operations' checkpoint writes (EnqueueAsync uses the + // cancellationToken parameter directly), which would risk lost START / + // SUCCEED checkpoints when termination fires mid-flush. See §7 of + // docs/design/cancellation-design.md. return RunInChildContextAsync( - async (childCtx, childToken) => + async (childCtx, _) => { var callback = await childCtx.CreateCallbackAsync( name: callbackName, config: callbackConfig, - cancellationToken: childToken); + cancellationToken: cancellationToken); await childCtx.StepAsync( async (stepCtx, stepToken) => @@ -261,9 +269,9 @@ await childCtx.StepAsync( }, name: submitterName, config: stepConfig, - cancellationToken: childToken); + cancellationToken: cancellationToken); - return await callback.GetResultAsync(childToken); + return await callback.GetResultAsync(cancellationToken); }, name, new ChildContextConfig diff --git a/Libraries/src/Amazon.Lambda.DurableExecution/docs/design/annotations-integration-plan.md b/Libraries/src/Amazon.Lambda.DurableExecution/docs/design/annotations-integration-plan.md new file mode 100644 index 000000000..a45478bdf --- /dev/null +++ b/Libraries/src/Amazon.Lambda.DurableExecution/docs/design/annotations-integration-plan.md @@ -0,0 +1,378 @@ +# Implementation Plan: Integrating `[DurableExecution]` with the Amazon.Lambda.Annotations Source Generator + +> Status: **Ready with must-fixes.** This plan folds in every adversarial-reviewer blocker. Items that depend on undefined infrastructure (the runtime string, the IAM-shape decision) are flagged inline and gated behind explicit pre-merge confirmations rather than buried. + +## Verified ground truth + +All load-bearing claims confirmed against the codebase: + +- IAM action names `lambda:CheckpointDurableExecution` and `lambda:GetDurableExecutionState` verified in the reference template (lines 52-53). Note the reference uses an inline `PolicyName: DurableExecutionPolicy` role-attached policy, not a SAM `Policies` array entry — relevant to the IAM section. +- README line 35 states the executable-only constraint is a preview limitation pending RuntimeSupport changes (resolves the "temporary vs permanent" contradiction). +- README line 37 shows `dotnet10` in its example, but durable functions run on **either `dotnet8` or `dotnet10`** (user-confirmed 2026-06-08) — the generator does not force a runtime. Line 53 confirms the `HandlerWrapper.GetHandlerWrapper` typed contract. +- Package multi-targets net8.0 + net10.0. + +--- + +## 1. Goal & Scope + +### Goal +Let a developer annotate a method with `[DurableExecution]` (alongside `[LambdaFunction]`) and have the Amazon.Lambda.Annotations source generator emit: +1. A **typed-envelope handler wrapper** that delegates to `Amazon.Lambda.DurableExecution.DurableFunction.WrapAsync`. +2. A `serverless.template` resource carrying durable-specific config (`DurableConfig`) and the IAM permissions the function needs to call the checkpoint APIs. + +### In scope +- New public attribute `Amazon.Lambda.Annotations.DurableExecutionAttribute` (in the Annotations package). +- Source-generator recognition (TypeFullNames, EventType, builders). +- Generated wrapper shape (typed in/typed out). +- CloudFormation/SAM `DurableConfig` + inline checkpoint IAM policy emission with orphan removal. +- Diagnostics, snapshot tests, change file, docs. + +### Out of scope +- Changes to `Amazon.Lambda.DurableExecution` runtime behavior (`DurableFunction`, `DurableContext`, the wire format). These ship independently; this work consumes them. +- Scoped (least-privilege) checkpoint ARNs — deferred until the service publishes a scopable ARN format (see Risks). + +### The executable-only constraint (VERIFIED, and its sharp edge) +`Amazon.Lambda.DurableExecution/README.md` line 35 states the preview **only supports the executable programming model** — the function is an executable assembly hosting its own bootstrap loop and passing the serializer to the runtime in code. Class-library/managed-runtime support lands only after RuntimeSupport changes are deployed. So the constraint is **temporary-but-real for preview**: `[DurableExecution]` requires `OutputType=Exe` today. + +**MUST-FIX (reviewer blocker — enforcement is post-hoc, not preventive).** `LambdaFunctionModelBuilder.BuildAndValidate` (verified line 17) receives `isExecutable` as a **caller-supplied parameter** from the generator driver; it is not derived from the attribute. A diagnostic can *report* `[DurableExecution]` on a non-executable project, but the framework does not abort generation on diagnostic severity alone. Therefore the plan must make `IsValid=false` the gate: +- `LambdaFunctionValidator.ValidateFunction` (called at line 26) returns the model's `IsValid`. When `[DurableExecution]` is present and `isExecutable == false`, emit `DurableExecutionRequiresExecutable` (Error) **and** force `IsValid=false` so no wrapper is generated. This is the only mechanism in the existing framework that actually halts emission for a function. + +--- + +## 2. The `[DurableExecution]` Attribute Design + +**Placement (REVISED 2026-06-08): `Amazon.Lambda.Annotations` package, top-level namespace `Amazon.Lambda.Annotations`** — file `Libraries/src/Amazon.Lambda.Annotations/DurableExecutionAttribute.cs`. This matches every other annotation attribute (`LambdaFunctionAttribute`, `ScheduleEventAttribute`, …) and lets the generator use the standard strongly-typed `AttributeModel` pattern (the generator already references `Amazon.Lambda.Annotations` and reaches its internals via `InternalsVisibleTo`, so it can call `Validate()`/`IsXxxSet` directly). The attribute holds only `int` values, so this adds no dependency from `Amazon.Lambda.Annotations` onto the DurableExecution SDK. + +> **Superseded earlier design:** an initial draft placed the attribute in the `Amazon.Lambda.DurableExecution` package. That was wrong — the generator must target `netstandard2.0` and cannot reference that package (net8/net10 + AWSSDK.Lambda), which made the generic `AttributeModel` pattern impossible and forced an awkward string-keyed POCO workaround. Moving the attribute to `Amazon.Lambda.Annotations` removes the problem entirely. The matching style follows `LambdaFunctionAttribute` (block namespace, no nullable), not the file-scoped style of the DurableExecution package. + +Implemented shape (matches `LambdaFunctionAttribute`'s block-namespace, non-nullable style): + +```csharp +using System; +using System.Collections.Generic; + +namespace Amazon.Lambda.Annotations +{ + [AttributeUsage(AttributeTargets.Method, AllowMultiple = false)] + public class DurableExecutionAttribute : Attribute + { + private int _retentionPeriodInDays; + public int RetentionPeriodInDays + { + get => _retentionPeriodInDays; + set { _retentionPeriodInDays = value; IsRetentionPeriodInDaysSet = true; } + } + internal bool IsRetentionPeriodInDaysSet { get; private set; } + + private int _executionTimeout; // seconds + public int ExecutionTimeout + { + get => _executionTimeout; + set { _executionTimeout = value; IsExecutionTimeoutSet = true; } + } + internal bool IsExecutionTimeoutSet { get; private set; } + + internal List Validate() + { + var validationErrors = new List(); + if (IsRetentionPeriodInDaysSet && RetentionPeriodInDays <= 0) + validationErrors.Add($"{nameof(RetentionPeriodInDays)} = {RetentionPeriodInDays}. It must be a positive integer."); + if (IsExecutionTimeoutSet && ExecutionTimeout <= 0) + validationErrors.Add($"{nameof(ExecutionTimeout)} = {ExecutionTimeout}. It must be a positive integer."); + return validationErrors; + } + } +} +``` + +Design notes: +- **Parameterless** — `[DurableExecution]` with no args is valid (unlike `[SQSEvent]`'s required queue arg). +- **`IsXxxSet` flags are `internal`** (consumed by the generator via `InternalsVisibleTo`), following the `ScheduleEventAttribute` convention so unset values are omitted from CFN. +- **No `WorkflowName`/`Input`/`ResourceName` argument.** Input is carried by the durable envelope (the EXECUTION op — verified in `DurableFunction.ExtractUserPayload`, lines 200-221); the function name derives from `[LambdaFunction]`. A second name source would create a duplicate-key hazard. +- **No signature change** to the user method. The user method stays `(TInput, IDurableContext) -> Task` or `(TInput, IDurableContext) -> Task`, enforced by `DurableExecutionInvalidSignature`. +- Validate rejects `<= 0` now; exact upper bounds are a follow-up once service limits are confirmed. + +--- + +## 3. Source-Generator Recognition (Models, TypeFullNames) + +**MUST-FIX (reviewer): exact namespace match or silent skip.** The string below must match the attribute's real namespace exactly, or `EventTypeBuilder`/`AttributeModelBuilder` silently skip it and the method routes to `NoEventMethodBody`. A dedicated test (Component H) covers discovery. + +1. **`TypeFullNames.cs`** — add four constants (note the attribute is now in the Annotations namespace; the invocation envelopes + `DurableFunction` remain in the SDK namespace because the **user's** compilation references them and the generator only matches them by string): + - `DurableExecutionAttribute = "Amazon.Lambda.Annotations.DurableExecutionAttribute"` + - `DurableExecutionInvocationInput = "Amazon.Lambda.DurableExecution.DurableExecutionInvocationInput"` + - `DurableExecutionInvocationOutput = "Amazon.Lambda.DurableExecution.DurableExecutionInvocationOutput"` + - `DurableFunction = "Amazon.Lambda.DurableExecution.DurableFunction"` + +2. **`Models/EventType.cs`** — add `DurableExecution` enum member. + +3. **`Models/EventTypeBuilder.cs`** — add `else if (attribute.AttributeClass.ToDisplayString() == TypeFullNames.DurableExecutionAttribute) events.Add(EventType.DurableExecution);`. + +4. **`Models/Attributes/AttributeModelBuilder.cs`** (IMPLEMENTED) — add an `else if` case (`SymbolEqualityComparer` against `GetTypeByMetadataName(TypeFullNames.DurableExecutionAttribute)`) constructing the standard strongly-typed `AttributeModel` via `DurableExecutionAttributeBuilder.Build`. Because the attribute now lives in `Amazon.Lambda.Annotations` (which the generator references), this is the same generic pattern every other attribute uses — no workaround needed. + +5. **`Models/Attributes/DurableExecutionAttributeBuilder.cs` (NEW, IMPLEMENTED):** returns a real `DurableExecutionAttribute`, reading `att.NamedArguments` by `nameof` (`RetentionPeriodInDays` / `ExecutionTimeout`); assigning each property also flips its `IsXxxSet` flag (so unset values are omitted from the template). Mirrors `ScheduleEventAttributeBuilder` but with no constructor args (the attribute is parameterless). + +6. **`Models/GeneratedMethodModelBuilder.cs`** — early branches gated on `Events.Contains(EventType.DurableExecution)`, placed **BEFORE** the API/HttpApi/ALB branches: + - `BuildParameters` → exactly `[ __request__ : DurableExecutionInvocationInput, __context__ : ILambdaContext ]` + - `BuildResponseType` → `Task` (auto-async) + - `BuildUsings` → conditionally add `Amazon.Lambda.DurableExecution`. + - The wrapper DOES need `TInput`/`TOutput` to emit **explicit** generic arguments (see Section 4 correction) — read from `LambdaMethod.Parameters[0].Type.FullName` and `LambdaMethod.ReturnType.TaskTypeArgument`. No new model fields are required; the existing model already carries these. + +**Branch-ordering is load-bearing** (reviewer): if these run after the API/ALB checks, a method routes to the wrong template. A test must assert a file containing both a durable and an API method produces the durable wrapper for the durable method. + +--- + +## 4. Generated Handler Wrapper + +The wrapper is a **typed-envelope** method (matches README line 53's `HandlerWrapper.GetHandlerWrapper` contract), **NOT** Stream→Stream. + +**Why typed, not Stream→Stream (VERIFIED dual-serializer hazard):** `DurableFunction.WrapAsyncCore` (verified line 79) reads the serializer off the **context** via `LambdaSerializerHelper.GetRequired(lambdaContext)`, not off any wrapper field. A Stream→Stream wrapper that deserialized with its own `serializer` field (a different instance than the one the bootstrap attaches to the context) would be a real bug. So the wrapper does typed in/typed out and lets the runtime `HandlerWrapper` do envelope (de)serialization. + +**Generated signature:** +```csharp +public async Task ( + Amazon.Lambda.DurableExecution.DurableExecutionInvocationInput __request__, + ILambdaContext __context__) +``` + +**Generated body (single delegation, bound method-group):** +```csharp +return await Amazon.Lambda.DurableExecution.DurableFunction.WrapAsync( + ., __request__, __context__); +``` +- `` = the `containingType` field (non-DI) or `scope.ServiceProvider.GetRequiredService()` (DI). Both resolution paths already exist in `FieldsAndConstructor`. +- **Which overload (VERIFIED, four exist — DurableFunction.cs lines 36-71):** the wrapper uses the **three-argument** (no explicit client) overloads — `WrapAsync(Func>, …)` for a typed-returning method or `WrapAsync(Func, …)` for a void method. The lazy `_cachedLambdaClient` (line 30) backs the no-client path — correct for the generated case. +- **CORRECTION (2026-06-08, found by Component H): the wrapper MUST emit EXPLICIT generic type arguments.** The original plan said to emit none and rely on overload resolution — that is **wrong** and produces `CS0411` ("type arguments cannot be inferred"): C# cannot infer `TInput`/`TOutput` from a **method-group** argument bound to a `Func<,,>` parameter. Every real call site confirms this — README line 61 (`WrapAsync(Workflow, …)`) and all `DurableFunctionTests` use explicit generics. The generated wrapper therefore emits `WrapAsync(instance.Method, …)` for typed workflows and `WrapAsync(instance.Method, …)` for void (`Task`) workflows, where `TInput` = the user method's first parameter type and `TOutput` = the `Task` argument. Verified by a compile test that the explicit-generic call binds and the inference-free form fails with `CS0411`. +- The wrapper does **not** deserialize a Stream, does **not** touch its own `serializer` field, and does **not** reconstruct `[FromX]` params. + +**MUST-FIX (reviewer): signature constraint must be validated.** Method-group overload resolution assumes `Task` or `Task`. A `ValueTask`-returning or wrong-shape user method produces a C# compile error in generated code. `LambdaFunctionValidator.ValidateFunction` must add a durable-specific check: the user method must be exactly `(TInput, IDurableContext) -> Task` or `-> Task`; otherwise emit `DurableExecutionInvalidSignature` (Error) and set `IsValid=false`. + +**MUST-FIX (reviewer): runtime serializer contract.** `WrapAsyncCore` calls `LambdaSerializerHelper.GetRequired(__context__)` and throws if no serializer is on the context. The generated wrapper assumes the bootstrap populated `ILambdaContext.Serializer`. This is a runtime contract not exercisable in generator snapshot tests; the `DurableExecutionInvoke.tt` template must carry a code comment stating the serializer is expected from the context, and Component A must include a serializer round-trip unit test (Section 8). + +**Build note (IMPORTANT, discovered during Component C):** there is **no command-line T4 step**. The `TextTemplatingFilePreprocessor` entries are VS-design-time only; `dotnet build` compiles the **committed** `.cs` partials, not the `.tt`. So every template requires THREE checked-in files kept in sync: `X.tt` (source of truth), `X.cs` (the T4-style transform output — `TransformText()` + the generated boilerplate base class), and `XCode.cs` (the constructor partial holding `_model`). The durable body is a single delegation line, authored across all three for `DurableExecutionInvoke`. + +**Template wiring (IMPLEMENTED):** +- `LambdaFunctionTemplate.tt` **and** `LambdaFunctionTemplate.cs` — durable branch placed **FIRST** in the dispatch chain (`if (Events.Contains(EventType.DurableExecution)) Write(new DurableExecutionInvoke(_model)...)`), before Authorizer/API/ALB/else. Both files edited (the `.cs` is what compiles). +- `DurableExecutionInvoke.tt` + `.cs` + `Code.cs` (NEW) — emits `return await Amazon.Lambda.DurableExecution.DurableFunction.WrapAsync(., __request__, __context__);` with **explicit** generic arguments (see Section 4 correction — `WrapAsync` for typed, `WrapAsync` for void). `` is the camel-cased containing-type field (non-DI) or the DI-resolved local that `LambdaFunctionTemplate`'s shared prologue already sets up. csproj registered the new `.tt`/`.cs` pair like its siblings. +- `GeneratedMethodModelBuilder` (IMPLEMENTED) — durable branches in `BuildResponseType` (→ `Task`), `BuildParameters` (→ `DurableExecutionInvocationInput __request__, ILambdaContext __context__`), and `BuildUsings` (adds `Amazon.Lambda.DurableExecution`). The durable check is placed before the API/Authorizer/ALB checks in each. + +**Original "separate template" wiring notes (superseded by the above):** +- `Templates/LambdaFunctionTemplate.tt` — add `else if (_model.LambdaMethod.Events.Contains(EventType.DurableExecution)) { Write(new DurableExecutionInvoke(_model).TransformText()); }` placed **FIRST**, before the Authorizer/API/ALB branches. The signature line already renders the forced params/return from `GeneratedMethod`, with `async` emitted because the return is a generic `Task`. +- `Templates/DurableExecutionInvoke.tt` (NEW, + checked-in `.cs` partial if the existing template convention requires one) — emits the single `WrapAsync` delegation, handling DI (`scope.ServiceProvider`) and non-DI (`containingType` field) resolution. **MUST-FIX: this template must be authored before snapshots can be produced.** +- `ExecutableAssembly.tt` — **no change.** Verified: it already emits `Func<{p.Type.FullName}, {ReturnType.FullName}>` generically and calls `LambdaBootstrapBuilder.Create(handler, new SerializerName())`. A regression test asserts no change is needed for durable return types. + +**DI lifetime (reviewer gap):** the DI scope is **per-invocation**, matching existing API-Gateway scope semantics — the scope is created and disposed around a single Lambda invocation, NOT held open across a multi-hour suspended workflow (the service re-invokes; each invocation gets a fresh scope). Document this in the template comment. + +--- + +## 5. CloudFormation / SAM Template Changes + +`DurableConfig` is a function **Properties** block (not a SAM `Events` entry), tracked via a `Metadata` marker, modeled exactly on the verified `SyncedFunctionUrlConfig` pattern (`CloudFormationWriter.cs` lines 245-249 write the marker; lines 267+ do orphan removal). + +In `ProcessLambdaFunctionEventAttributes` (verified switch at lines 220-262), add: +```csharp +case AttributeModel durableModel: + ProcessDurableExecutionAttribute(lambdaFunction, durableModel.Data); // Data is DurableExecutionAttribute + hasDurableExecution = true; // initialized = false near line 218 + break; // do NOT add to currentSyncedEvents — durable is not an event +``` + +`ProcessDurableExecutionAttribute` writes (only when the corresponding `IsXxxSet` flag is true): +- `Resources..Properties.DurableConfig.RetentionPeriodInDays` +- `Resources..Properties.DurableConfig.ExecutionTimeout` +- marker `Resources..Metadata.SyncedDurableConfig = true` + +**Expected JSON shape** (snapshot expectation, resolving the reviewer's ambiguity): +```json +"Properties": { + "DurableConfig": { "RetentionPeriodInDays": 7, "ExecutionTimeout": 300 } +} +``` +YAML equivalent under `Properties: DurableConfig:`. + +**Orphan removal** (mirroring the `FunctionUrl` block at lines 267+): when `!hasDurableExecution`, if `Metadata.SyncedDurableConfig` is true, `RemoveToken Properties.DurableConfig`, remove the injected checkpoint policy (Section 6), and remove the markers. + +**Runtime:** NOT set here. Forced at model-build time (Section 7), because `ProcessPackageTypeProperty` line 185 (`SetToken …Runtime = lambdaFunction.Runtime`) would clobber any writer-side injection in the Zip branch. + +**PackageType:** durable functions are Zip/executable only. The `Image` branch (verified lines 190-196) strips `Handler`/`Runtime`, so `PackageType.Image` is structurally unsupported → `DurableExecutionZipOnly` (Error, `IsValid=false`) at model-build. **MUST-FIX: this diagnostic must be Error and gate `IsValid`, not a warning** — otherwise the Image branch silently produces a broken template. + +**Tool guard:** the existing `Metadata.Tool = Amazon.Lambda.Annotations` guard is preserved (DurableConfig only written/refreshed for generator-owned functions). + +--- + +## 6. IAM Policy Statements for Checkpoint APIs + +**Action names (VERIFIED against the reference template, 2026-06-08):** attested snapshot from `C:\dev\repos\aws-durable-execution-sdk-python\packages\aws-durable-execution-sdk-python-examples\template.yaml` (the file is JSON despite the `.yaml` extension), `DurableFunctionRole.Properties.Policies[0]`, lines 43-60: +```json +"Policies": [ + { + "PolicyName": "DurableExecutionPolicy", + "PolicyDocument": { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "lambda:CheckpointDurableExecution", + "lambda:GetDurableExecutionState" + ], + "Resource": "*" + } + ] + } + } +] +``` +So the two checkpoint actions are confirmed: `lambda:CheckpointDurableExecution`, `lambda:GetDurableExecutionState`. + +**FLAGGED — the reference IAM pattern diverges MORE than first assumed (corrected 2026-06-08 after reading the full reference template):** +- The reference does **not** put any IAM on the function resources at all. It defines a **single shared standalone `AWS::IAM::Role`** (`DurableFunctionRole`, lines 25-62) carrying `ManagedPolicyArns: [AWSLambdaBasicExecutionRole]` **plus** the inline `PolicyName: DurableExecutionPolicy` above, and **every `AWS::Serverless::Function` sets `Role: {Fn::GetAtt: [DurableFunctionRole, Arn]}`** (e.g. lines 69-74) — no function uses a SAM `Policies` array. +- **Consequence for this plan's design:** under the plan's own rule "when `lambdaFunction.Role` IS set, do NOT touch IAM," the reference pattern would never trigger the plan's injection — because in the reference, every function *does* set `Role`. The generator's auto-IAM path (no explicit `Role` → emit a SAM `Policies`-array inline statement) is therefore **a distinct, generator-idiomatic adaptation, not a reproduction of the reference**. The SAM transform expands a per-function `Policies` array into a generated per-function role, so it is functionally equivalent (each function gets the two actions), but the resulting template shape (N generated roles vs. one shared role) differs from the reference. +- **DECISION MADE (2026-06-08): Option 1 — per-function SAM `Policies` array.** Rationale (user): follow the same mechanism the generator already uses for IAM (it appends to the per-function `Policies` list it already manages for `AWSLambdaBasicExecutionRole`), rather than introducing standalone-role emission the writer does not do today. The two options considered were: + 1. **Per-function SAM `Policies` array** (CHOSEN): idiomatic to how the generator already emits `AWSLambdaBasicExecutionRole`; produces one role per function via the SAM transform. Mixed string/object array — see the round-trip risk below. + 2. ~~Shared standalone role (matches reference exactly): generator emits one `DurableFunctionRole` resource and points every durable function's `Role` at it. Larger change to the writer (it does not emit standalone roles today) and interacts with user-specified `Role`.~~ Not chosen. +- `Resource: "*"` is used because the DurableExecutionArn is allocated at runtime and is not knowable at template-synth time (matches the reference, line 55). Whether a scopable ARN will ever exist is **undefined** — flagged as a follow-up, not promised. + +When `[DurableExecution]` is present AND `lambdaFunction.Role` is NOT set, after `ProcessLambdaFunctionProperties` has run (so the `Policies` array exists from the line 161-166 split), read-modify-write `Properties.Policies` via `GetToken`/`SetToken(TokenType.List)`, appending one inline statement object: +```json +{ + "Statement": [ + { + "Effect": "Allow", + "Action": ["lambda:CheckpointDurableExecution", "lambda:GetDurableExecutionState"], + "Resource": "*" + } + ] +} +``` +Producing a mixed string/object array, e.g. `["AWSLambdaBasicExecutionRole", { "Statement": [ … ] }]`. Track via `Metadata.SyncedDurablePolicy = true` for idempotent regeneration; remove the injected statement + marker on orphan removal. + +**When `lambdaFunction.Role` IS set** (Role/Policies mutually exclusive — verified lines 155-166): do NOT touch IAM. Emit `DurableExecutionExplicitRoleNeedsCheckpointPolicy` (Info) instructing the user to attach the two actions manually. The diagnostic fires whenever both `[DurableExecution]` and `Role` are present at generation time. + +**MUST-FIX (highest regression risk):** the mixed string/object `Policies` array must round-trip through both `JsonWriter` and `YamlWriter`. A dedicated JSON+YAML round-trip snapshot test is mandatory (Section 8, Test G). If `SetToken(TokenType.List)` cannot preserve heterogeneous types, this approach is not viable and must be revisited before merge. + +--- + +## 7. Component-by-Component Implementation Steps (real file paths) + +All paths are absolute. `.tt` template changes require regenerating the corresponding `.cs` via the project's T4 step. + +### Component A — `DurableExecutionAttribute` (public API) +- **NEW** `C:\dev\repos\aws-lambda-dotnet\Libraries\src\Amazon.Lambda.DurableExecution\DurableExecutionAttribute.cs` — the attribute from Section 2. +- Add a serializer round-trip unit test (Section 8). + +### Component B — Attribute discovery + model wiring +- `C:\dev\repos\aws-lambda-dotnet\Libraries\src\Amazon.Lambda.Annotations.SourceGenerator\TypeFullNames.cs` — four constants. +- `C:\dev\repos\aws-lambda-dotnet\Libraries\src\Amazon.Lambda.Annotations.SourceGenerator\Models\EventType.cs` — `DurableExecution` member. +- `C:\dev\repos\aws-lambda-dotnet\Libraries\src\Amazon.Lambda.Annotations.SourceGenerator\Models\EventTypeBuilder.cs` — mapping `else if`. +- `C:\dev\repos\aws-lambda-dotnet\Libraries\src\Amazon.Lambda.Annotations.SourceGenerator\Models\Attributes\AttributeModelBuilder.cs` — `SymbolEqualityComparer` case + `using Amazon.Lambda.DurableExecution`. +- **NEW** `C:\dev\repos\aws-lambda-dotnet\Libraries\src\Amazon.Lambda.Annotations.SourceGenerator\Models\Attributes\DurableExecutionAttributeBuilder.cs` — copied from `ScheduleEventAttributeBuilder.cs`. + +### Component C — Generated wrapper shape +- `C:\dev\repos\aws-lambda-dotnet\Libraries\src\Amazon.Lambda.Annotations.SourceGenerator\Models\GeneratedMethodModelBuilder.cs` — early `BuildParameters`/`BuildResponseType`/`BuildUsings` branches, ordered before API/ALB. +- **NEW** `C:\dev\repos\aws-lambda-dotnet\Libraries\src\Amazon.Lambda.Annotations.SourceGenerator\Templates\DurableExecutionInvoke.tt` (+ generated `.cs`). +- `C:\dev\repos\aws-lambda-dotnet\Libraries\src\Amazon.Lambda.Annotations.SourceGenerator\Templates\LambdaFunctionTemplate.tt` — durable branch placed FIRST. +- Verify `ExecutableAssembly.tt` needs no change (regression test). + +### Component D — Package/model validation +- `C:\dev\repos\aws-lambda-dotnet\Libraries\src\Amazon.Lambda.Annotations.SourceGenerator\Models\LambdaFunctionModelBuilder.cs`. + +**Runtime: NO forcing (DECISION 2026-06-08).** Durable functions run on **either `dotnet8` or `dotnet10`**, so the generator does **not** force or override the runtime — the caller-supplied/default `runtime` flows through unchanged exactly like every other function. No `DurableRuntime` constant, no `model.Runtime` override. (This removes the former "MUST-FIX runtime contradiction" and BLOCKING risk #1 entirely.) + +- Run the durable validation pass (executable-only, Zip-only, exclusive-event, signature) and force `IsValid=false` on any Error-severity finding. This is the substance of Component D now that runtime forcing is gone. + +**IMPLEMENTED (2026-06-08, Components D+E):** added a `ValidateDurableExecution` method to `LambdaFunctionValidator` (called alongside the other `ValidateXxxEvents`), which adds Error diagnostics to the list — `ReportDiagnostics` already returns `IsValid=false` whenever any Error is present, so no separate gating wiring is needed. Checks: `OutputKind != ConsoleApplication` → 0140; `PackageType == Image` → 0141; signature (param count, second param `== IDurableContext`, return classified via the model's existing `ReturnsVoidOrGenericTask`) → 0142; explicit `Role` set → 0143 (Info). Added `TypeFullNames.IDurableContext`. Two build-system findings: (1) **RS1032** — a `messageFormat` ending in a `{0}` placeholder must use `: {0}` not `. {0}` (trailing-period rule); (2) the SourceGenerators.Tests project **cannot reference the DurableExecution package** (its AWSSDK.Core 4.x downgrades the test project's pinned 3.7.x → NU1605), so diagnostic tests supply minimal durable **stub types as source** (`IDurableContext` / the two envelopes) — the generator only needs them resolvable by metadata name. Diagnostic tests use the `VerifyCS.Test` harness with exact `WithSpan`/`WithArguments` (the framework demands precise locations and prints the expected `DiagnosticResult` on mismatch). + +### Component E — Diagnostics set +- `C:\dev\repos\aws-lambda-dotnet\Libraries\src\Amazon.Lambda.Annotations.SourceGenerator\Diagnostics\DiagnosticDescriptors.cs`. + +**RESOLVED (2026-06-08): concrete IDs allocated.** Verified against `DiagnosticDescriptors.cs`: the highest allocated id is `AWSLambda0139` (`InvalidScheduleEventAttribute`). (Note: `AWSLambda0126` is skipped in the existing file — 0125 jumps to 0127 — but the durable IDs continue cleanly from the top.) All descriptors use `category: "AWSLambdaCSharpGenerator"` and `isEnabledByDefault: true`, matching the file's convention. + +**REVISED (2026-06-08): only THREE new descriptors — `DurableExecutionExclusiveEvent` dropped (redundant).** Code verification: `LambdaFunctionValidator.ValidateFunction` (line 58) already emits `MultipleEventsNotSupported` (AWSLambda0102) and returns early with `IsValid=false` whenever `Events.Count > 1`. Component B added `DurableExecutionAttribute` to `TypeFullNames.Events` and `EventType.DurableExecution`, so `[DurableExecution] + [RestApi]` already produces `Events.Count == 2` → fires AWSLambda0102 → halts generation. No new exclusive-event diagnostic is needed; just add a **test** asserting the combination triggers AWSLambda0102 (locks in the dispatch-order behavior). The durable descriptors take **`AWSLambda0140`–`AWSLambda0143`**: + +| Name | Id | Severity | Gates generation? | Message (summary) | +|---|---|---|---|---| +| `DurableExecutionRequiresExecutable` | `AWSLambda0140` | Error | Yes (`IsValid=false`) | `[DurableExecution]` requires an executable (OutputType=Exe) project; class-library handlers are not supported in preview. | +| `DurableExecutionZipOnly` | `AWSLambda0141` | Error | Yes | `[DurableExecution]` requires PackageType=Zip; Image packaging is not supported. | +| `DurableExecutionInvalidSignature` | `AWSLambda0142` | Error | Yes | A `[DurableExecution]` method must be `(TInput, IDurableContext) -> Task` or `-> Task`. | +| `DurableExecutionExplicitRoleNeedsCheckpointPolicy` | `AWSLambda0143` | Info | No | Function uses an explicit Role; attach `lambda:CheckpointDurableExecution` and `lambda:GetDurableExecutionState` manually. | + +**Exclusive-event enforcement (RESOLVED):** handled by the existing `MultipleEventsNotSupported` (AWSLambda0102) — see above. No new diagnostic. + +**Executable detection (RESOLVED 2026-06-08 — gate kept, but key off `OutputKind`):** the generator's `isExecutable` flag (Generator.cs:129) is derived from the `GenerateMain` named arg on `[assembly: LambdaGlobalProperties]` — i.e. "generator should synthesize `Main`." That is the WRONG signal for the durable gate, because the README's quick-start uses the **manual** bootstrap model (`GenerateMain` is false, user writes their own `Main` + `LambdaBootstrap`) yet is still a valid executable. `DurableExecutionRequiresExecutable` must therefore gate on **`context.Compilation.Options.OutputKind != OutputKind.ConsoleApplication`** ("is this an executable project at all"), NOT on `isExecutable`. This correctly allows both the manual-bootstrap model (today) and a future generated-`Main` model, and only rejects true class-library projects. + +### Component F — CFN `DurableConfig` writer (IMPLEMENTED 2026-06-08) +- `CloudFormationWriter.cs` — added a `case AttributeModel` to the event-attribute switch that calls `ProcessDurableExecutionAttribute` and sets `hasDurableExecution = true` (and does NOT add to `currentSyncedEvents` — durable is a Properties/IAM concern, not an event). `ProcessDurableExecutionAttribute` clears any prior `DurableConfig`, re-emits `RetentionPeriodInDays`/`ExecutionTimeout` only when their `IsXxxSet` flags are true (creating an empty `DurableConfig` object via `TokenType.Object` when neither is set so the function is still marked durable), and sets the `Metadata.SyncedDurableConfig` marker. Orphan removal mirrors the verified `FunctionUrl` block. + +### Component G — CFN checkpoint IAM writer (IMPLEMENTED 2026-06-08) +- `CloudFormationWriter.cs` — kept inline (no separate writer class), matching `ProcessFunctionUrlAttribute` style. When `Role` is empty, `AddDurableCheckpointPolicy` reads the existing `Policies` via `GetToken>`, appends one inline statement object (`{Statement:[{Effect,Action:[2 actions],Resource:"*"}]}` built as nested `Dictionary`/`List`), and re-sets with `TokenType.List` — producing the mixed string/object array (`["AWSLambdaBasicExecutionRole", {Statement…}]`). Idempotency + orphan removal use `IsDurableCheckpointStatement` (recognizes the statement by its action names via JSON serialization). When `Role` is set, IAM is left untouched and `AWSLambda0143` (Info) is emitted in the validator. +- **HIGHEST-RISK ITEM RESOLVED:** the mixed string/object `Policies` array round-trips cleanly through **both** `JsonWriter` (JSON.NET `JToken`) and `YamlWriter` (`TokenType.List` → `YamlSequenceNode`). Verified by `DurableExecution_InjectsCheckpointPolicy_AsMixedArray` (JSON + YAML) plus idempotency and orphan-removal tests. `SetToken(TokenType.List)` handles heterogeneous types fine — the approach is viable. + +### Component H — End-to-end / compile tests (IMPLEMENTED 2026-06-08) +- `DurableExecutionWrapperCompilesTests.cs` — compiles the exact generated wrapper shape against realistic `WrapAsync` overloads. **This layer found a real bug:** the planned no-explicit-generics call fails with `CS0411` (see Section 4 correction). Tests assert the typed (`WrapAsync`) and void (`WrapAsync`) forms bind, and a guard test asserts the inference-free form fails with `CS0411`. +- Note on approach: a full `Microsoft.CodeAnalysis.Testing` snapshot E2E (committed `.g.cs` + `Program.g.cs` + RuntimeSupport sources) was attempted but is high-friction here (exact `AWSLambda0103` content match + the AWSSDK.Core 3.7.x/4.x conflict that blocks referencing the durable package). The compile-test approach covers the unique remaining risk (overload binding) without that friction; the wrapper *text* is pinned by Component C's template tests and the *template* output by F/G's writer tests. + +### Component I — Change file + docs (IMPLEMENTED 2026-06-08) +- `.autover/changes/durable-execution-annotations-integration.json` — single `Amazon.Lambda.Annotations` Minor entry (that autover project spans both the attributes csproj and the SourceGenerator csproj, so it covers everything added here). +- `Amazon.Lambda.DurableExecution/README.md` — added a "Using Lambda Annotations" subsection showing the `[LambdaFunction]` + `[DurableExecution]` model that removes the manual handler/`WrapAsync` boilerplate. +- **NEW** `C:\dev\repos\aws-lambda-dotnet\.autover\changes\.json` — increment **Minor**, projects `Amazon.Lambda.Annotations.SourceGenerator` + `Amazon.Lambda.DurableExecution`. Create via `autover change`. +- Update `C:\dev\repos\aws-lambda-dotnet\Libraries\src\Amazon.Lambda.DurableExecution\README.md` to note that `[DurableExecution]` generates the bootstrap wiring for the executable model. + +--- + +## 8. Test Strategy (snapshot tests) + +Snapshot harness: `CSharpGeneratorDriver` against files in `Libraries\test\Amazon.Lambda.Annotations.SourceGenerators.Tests\Snapshots\`. CFN writer tests mirror `WriterTests\FunctionUrlTests.cs`, parameterized `[InlineData(CloudFormationTemplateFormat.Json)]` / `[InlineData(CloudFormationTemplateFormat.Yaml)]`. + +**Unit (Component A)** — `Libraries\test\Amazon.Lambda.DurableExecution.Tests\` (or the existing durable test project): constructor defaults, `IsXxxSet` tracking, `Validate()` rejects `<= 0`. **Serializer round-trip:** the default `ILambdaSerializer` deserializes `DurableExecutionInvocationInput` and serializes `DurableExecutionInvocationOutput` including `UpperSnakeCaseEnumConverter` on `InvocationStatus` (Succeeded/Failed/Pending), and a nested `InitialExecutionState`/`Operations` round-trips without loss. This must pass before the typed-envelope wrapper is relied upon. + +**Generated-wrapper snapshots (Component C):** +- A. Non-DI typed-output method → verify signature (`DurableExecutionInvocationInput`/`ILambdaContext` params, `Task` return) and single `WrapAsync(containingType.Method, __request__, __context__)` delegation, no Stream deserialization. +- B. DI variant → `scope.ServiceProvider.GetRequiredService()` resolution. +- C. Void user method (`Task` return) → confirms overload resolution compiles without explicit generic args. +- D. **Branch-ordering test:** one file with both a durable method and a `[RestApi]` method → durable method gets the durable wrapper. +- E. `ExecutableAssembly.tt` regression → executable assembly snapshot unchanged in shape for durable return types. + +**Diagnostics (Component E):** one test each for `DurableExecutionRequiresExecutable` (non-exe / class library), `DurableExecutionZipOnly` (Image), `DurableExecutionInvalidSignature` (ValueTask / wrong params), and `DurableExecutionExplicitRoleNeedsCheckpointPolicy` (explicit Role). Plus a test that `[DurableExecution]` + `[RestApi]` triggers the **existing** `MultipleEventsNotSupported` (AWSLambda0102). For the three durable Errors (and AWSLambda0102), also assert no wrapper is generated (`IsValid=false`). + +**CFN (Components F/G)** — `Libraries\test\Amazon.Lambda.Annotations.SourceGenerators.Tests\WriterTests\DurableExecutionTests.cs` (NEW): +- F1. `DurableConfig` with both props set (JSON + YAML); `Metadata.SyncedDurableConfig == true`. +- F2. Partial emit — only `RetentionPeriodInDays` set → `ExecutionTimeout` absent. +- F3. Orphan removal — attribute dropped → `DurableConfig` + marker removed. +- G. **Highest-risk:** mixed string/object `Policies` array round-trip (JSON + YAML), asserting `["AWSLambdaBasicExecutionRole", { "Statement": [ … checkpoint … ] }]` order preserved after write and re-parse. +- G2. Idempotency — regeneration does not duplicate the policy statement. +- G3. Role suppression — `Role` set → `Policies` untouched, Info diagnostic emitted. + +Snapshot fixtures with the exact JSON/YAML shapes (Sections 5 and 6) must be authored as part of this work, not deferred. + +--- + +## 9. Risks, Open Questions, and Must-Fix-First Items + +### BLOCKING (resolve before implementation starts) +1. ~~**Runtime string is undefined infra.**~~ **DROPPED (2026-06-08): not an issue.** Durable functions run on either `dotnet8` or `dotnet10`, so the generator does **not** force a runtime — it lets the user's normal runtime selection flow through. No `DurableRuntime` constant, no override. (Component D no longer touches runtime at all.) +2. ~~**IAM emission shape — role shape still a DECISION.**~~ **RESOLVED (2026-06-08): Option 1 — per-function SAM `Policies`-array inline statement**, matching how the generator already emits `AWSLambdaBasicExecutionRole`. Action names verified against the reference snapshot (`lambda:CheckpointDurableExecution`, `lambda:GetDurableExecutionState`; lines 51-54). The remaining risk here is purely mechanical — the mixed string/object `Policies` array round-trip (see item 7), not a shape decision. +3. ~~**Diagnostic IDs.**~~ **RESOLVED (2026-06-08): `AWSLambda0140`–`AWSLambda0143`** (highest existing is `AWSLambda0139`; `0126` is skipped in the file but the durable IDs continue cleanly from the top). Only three new descriptors — the exclusive-event case reuses the existing `AWSLambda0102`. See the Section 7 / Component E table. + +### REQUIRED-BEFORE-CODING (artifacts that gate the rest) +4. Author `DurableExecutionInvoke.tt` first — snapshots cannot exist without it. +5. Create `DurableExecutionAttributeBuilder.cs` by copying the real `ScheduleEventAttributeBuilder.cs`, not from prose. +6. Author the exact JSON/YAML snapshot fixtures for `DurableConfig` and the mixed `Policies` array. + +### Highest regression risk +7. **Mixed string/object `Policies` array** round-trip via `SetToken(TokenType.List)` through both `JsonWriter` and `YamlWriter`. Dedicated round-trip test mandatory; if `SetToken` cannot preserve heterogeneous types, the inline-policy approach is not viable and must be reconsidered. + +### Correctness gates (enforced via `IsValid=false`, not severity alone) +8. **Validation gates must set `IsValid=false`** (diagnostic severity alone does not halt generation). Applies to `DurableExecutionRequiresExecutable` (gate on `OutputKind != ConsoleApplication`), `DurableExecutionZipOnly`, and `DurableExecutionInvalidSignature`. The exclusive-event case is already handled by the existing `MultipleEventsNotSupported` (AWSLambda0102), which returns early with `IsValid=false`. +9. **Branch ordering** is load-bearing in two files (`GeneratedMethodModelBuilder` and `LambdaFunctionTemplate.tt`) — durable must be checked before API/HttpApi/ALB. Covered by Test D. +10. **Signature constraint** — `ValueTask`/non-`(TInput, IDurableContext)` returns produce generated-code compile errors. `ValidateFunction` must reject them. +11. **Runtime serializer contract** — `WrapAsyncCore` reads the serializer off `__context__` (verified line 79); the generated wrapper assumes the bootstrap populated it. Not testable in snapshots; covered by Component A's round-trip unit test + a template comment. + +### Accepted-for-preview (documented follow-ups, not promises) +12. `Resource: "*"` on the checkpoint statement is broad. Acceptable for preview per the reference; tightening depends on the service defining a scopable durable-execution ARN — **existence of such an ARN is undefined**, so this is flagged, not committed. +13. **Executable-only is a sharp edge** until managed-runtime support lands in RuntimeSupport (README line 35). Temporary-for-preview, not architectural. +14. **TypeFullNames must exactly match** `Amazon.Lambda.DurableExecution.DurableExecutionAttribute` or the attribute is silently skipped → routed to `NoEventMethodBody`. Covered by the discovery test. + +### Open questions deferred (non-blocking) +15. Upper bounds for `RetentionPeriodInDays`/`ExecutionTimeout` — `Validate()` only rejects `<= 0` now; tighten once service limits are published. +16. Whether, when a user adds an explicit `Role` to a function that previously had an auto-injected checkpoint policy, the old policy should be actively removed. The Role/Policies mutual-exclusivity (lines 155-166) clears `Policies` automatically in `ProcessLambdaFunctionProperties`, so the stale statement is removed as a side effect; verify this in the Role-suppression test and document it. diff --git a/Libraries/src/Amazon.Lambda.DurableExecution/docs/design/cancellation-design.md b/Libraries/src/Amazon.Lambda.DurableExecution/docs/design/cancellation-design.md new file mode 100644 index 000000000..0e12a32e3 --- /dev/null +++ b/Libraries/src/Amazon.Lambda.DurableExecution/docs/design/cancellation-design.md @@ -0,0 +1,341 @@ +# Cancellation in Amazon.Lambda.DurableExecution — Design + +> Status: design (2026-06-10). Targets the preview window before GA so the breaking delegate-shape change lands once. + +Thread a `CancellationToken` into every user `Func` accepted by `IDurableContext`. Internally the SDK owns a workflow-scoped `CancellationTokenSource` linked with the caller's token, so user code observes cancel for both upstream caller intent and SDK-driven workflow teardown. + +## Table of contents + +1. [Motivation](#1-motivation) +2. [Goals and non-goals](#2-goals-and-non-goals) +3. [Public API changes](#3-public-api-changes) +4. [Internal scaffold](#4-internal-scaffold) +5. [Cancellation semantics](#5-cancellation-semantics) +6. [Replay and determinism](#6-replay-and-determinism) +7. [What is NOT cancellable](#7-what-is-not-cancellable) +8. [User-facing guidance](#8-user-facing-guidance) +9. [Phased plan](#9-phased-plan) +10. [Open questions](#10-open-questions) +11. [Out of scope](#11-out-of-scope) + +--- + +## 1. Motivation + +`IDurableContext` methods that take a user `Func` (`StepAsync`, `RunInChildContextAsync`, `WaitForCallbackAsync`, `WaitForConditionAsync`) accept a `CancellationToken` parameter that the SDK observes only in its *own* machinery — waiting on the result `Task`, retry-backoff `Task.Delay`s, checkpoint writes. The token never reaches the user-supplied `Func` body. Two consequences: + +1. **Caller intent is silently dropped.** A user lambda invoked by an ASP.NET request handler or a host shutdown sequence has no way to forward its caller's `CancellationToken` into the step body. The token is accepted on the public API, then ignored. +2. **No clean teardown of inflight user code.** When the SDK decides to suspend (wait, callback pending, retry scheduled), the `Task.WhenAny` race in `DurableExecutionHandler.RunAsync` returns Pending and abandons the user `Task`. The abandoned `Task` keeps running on the threadpool until either it finishes naturally or Lambda freezes the process. During the window between `WhenAny` resolving and the freeze (checkpoint flush, response serialization, runtime API write), abandoned `HttpClient` calls and other side effects can still complete, and on the next invocation those orphaned operations may resume during a warm thaw. + +In the single-threaded model shipping today, the second point is small — `TerminationManager.Terminate()` fires *after* the relevant operation's user code has already resolved. The first point is a real bug on the public surface. We are landing the change now so the breaking delegate signature ships in preview, before GA, and so the same hook is in place when parallel/child-context cancellation needs it. + +## 2. Goals and non-goals + +### Goals + +- Every user `Func` accepted by `IDurableContext` receives a `CancellationToken` parameter. +- The token observes both the caller's `CancellationToken` (passed on each method) and an SDK-owned workflow `CancellationTokenSource`. +- The SDK's workflow CTS fires when `TerminationManager.Terminate()` resolves, so abandoned step bodies unwind via `OperationCanceledException` rather than running to completion in the background. +- Replay determinism is preserved: cached operations short-circuit before the user `Func` is invoked, so a cancelled or already-cancelled token cannot cause divergent re-execution. +- SDK-internal work (checkpoint serialization, runtime API writes, batcher flush) does **not** observe the workflow token — successful work is never lost to teardown. + +### Non-goals + +- No deadline timer. Lambda's own timeout is the deadline backstop; an SDK background timer that pre-emptively cancels user code adds magic-margin tuning we are not buying. +- No cancellation of `WaitAsync`, `CreateCallbackAsync`, or `InvokeAsync` user-side bodies — those operations do not run user `Func`s. +- No source-generator changes in this design. The `Amazon.Lambda.Annotations` source generator that emits the durable function entry point is updated separately. +- No support for resuming a cancelled workflow. Cancellation is workflow-fatal at the top level; the workflow either suspends per the standard termination flow or fails. +- No changes to the wire format, checkpoint shape, or `ExecutionState`. + +## 3. Public API changes + +Six `IDurableContext` methods change shape — each gains a `CancellationToken` parameter on its user-supplied `Func`. The trailing `CancellationToken cancellationToken = default` parameter on the method itself is unchanged. + +```csharp +// Before +Task StepAsync( + Func> func, + string? name = null, + StepConfig? config = null, + CancellationToken cancellationToken = default); + +// After +Task StepAsync( + Func> func, + string? name = null, + StepConfig? config = null, + CancellationToken cancellationToken = default); +``` + +The same shape change applies to: + +- `StepAsync` (void overload) — `Func` +- `RunInChildContextAsync` — `Func>` +- `RunInChildContextAsync` (void overload) — `Func` +- `WaitForCallbackAsync` — `Func` +- `WaitForConditionAsync` — `Func>` + +`WaitAsync`, `CreateCallbackAsync`, `InvokeAsync` and the `ConfigureLogger` / property surface are unchanged. + +This is a **breaking** change to public delegate signatures. Every existing user lambda must add the parameter (or `_`). It is a major version bump per the change-file rules. + +### Why a parameter, not a context property + +A property on `IStepContext` (`step.CancellationToken`) is non-breaking and was considered. The Func-parameter shape was chosen because: + +1. It is far more discoverable. The signature itself tells the user the token exists; a property requires reading docs. +2. It is consistent with .NET conventions for `Func` overloads that accept cancellation (e.g. `Channel.Reader.ReadAllAsync`, `Parallel.ForEachAsync`). +3. We are still in preview. The cost of changing it later, post-GA, is far higher than the cost of changing it now. + +The trade-off: every existing test, doc example, and customer preview lambda needs the parameter added. That is paid once. + +## 4. Internal scaffold + +### New type — `WorkflowCancellation` + +```csharp +// Libraries/src/Amazon.Lambda.DurableExecution/Internal/WorkflowCancellation.cs +internal sealed class WorkflowCancellation : IDisposable +{ + private readonly CancellationTokenSource _cts = new(); + + public CancellationToken Token => _cts.Token; + + public WorkflowCancellation(TerminationManager terminationManager) + { + // When the SDK decides to suspend or abort the workflow, cancel. + // Abandoned user Tasks (the WhenAny loser in DurableExecutionHandler) + // unwind via OperationCanceledException instead of running to + // completion on the threadpool while Lambda is mid-response. + terminationManager.TerminationTask.ContinueWith( + _ => { try { _cts.Cancel(); } catch (ObjectDisposedException) { } }, + CancellationToken.None, + TaskContinuationOptions.ExecuteSynchronously, + TaskScheduler.Default); + } + + public void Dispose() => _cts.Dispose(); +} +``` + +One instance per durable function invocation. Lives alongside `TerminationManager`; constructed in the same place that constructs the `TerminationManager` (the entry point that calls `DurableExecutionHandler.RunAsync`). + +### `DurableExecutionHandler.RunAsync` — owns the lifecycle + +```csharp +internal static async Task> RunAsync( + ExecutionState executionState, + TerminationManager terminationManager, + WorkflowCancellation workflowCancellation, + Func> userHandler) { /* race unchanged */ } +``` + +The caller constructs `WorkflowCancellation(terminationManager)` and disposes it after `RunAsync` returns. The `Task.Run(userHandler)` race is unchanged. + +### `DurableContext` — accepts the workflow CTS, exposes a linker + +```csharp +internal sealed class DurableContext : IDurableContext +{ + private readonly WorkflowCancellation _workflowCancellation; + // ... existing fields ... + + public DurableContext( + ExecutionState state, + TerminationManager terminationManager, + WorkflowCancellation workflowCancellation, // new + OperationIdGenerator idGenerator, + string durableExecutionArn, + ILambdaContext lambdaContext, + CheckpointBatcher? batcher = null) { ... } +} +``` + +Each operation construction passes `_workflowCancellation` down to the operation class; the operation class is responsible for building the linked CTS at the point of user-`Func` invocation (see below). `DurableContext` itself does not build linked CTSes — it only forwards the `WorkflowCancellation`. + +The child-context factory passes the **same** `WorkflowCancellation` to the child `DurableContext`. A child does not get an independent cancellation scope; cancelling the workflow cancels the child too. + +### Operation classes — link at the user-`Func` boundary + +`StepOperation` is the canonical pattern. The same shape applies to `ChildContextOperation`, `WaitForConditionOperation`, and the inline submitter `Step` invocation inside `WaitForCallbackAsync`. `CallbackOperation` does not invoke a user `Func` and is unchanged. + +```csharp +// inside StepOperation.ExecuteAsync(callerToken) +using var linked = CancellationTokenSource.CreateLinkedTokenSource( + callerToken, + _workflowCancellation.Token); + +// ... replay-cache short-circuit (returns cached SUCCESS without invoking _func) ... +// ... retry-loop unchanged ... + +var stepCtx = new StepContext(operationId, attempt, scopedLogger); +try +{ + var result = await _func(stepCtx, linked.Token).ConfigureAwait(false); + // checkpoint SUCCESS (uses CancellationToken.None — see §7) + return result; +} +catch (OperationCanceledException oce) when (linked.IsCancellationRequested) +{ + // Cancellation: do NOT checkpoint FAIL, do NOT retry. Re-throw so the + // termination signal owns the suspend/abort decision. + throw; +} +catch (Exception ex) +{ + // Non-cancellation failure — existing path: checkpoint FAIL, apply + // retry strategy, etc. Unchanged. +} +``` + +Two semantic points encoded above: + +1. **`when (linked.IsCancellationRequested)` distinguishes our cancellation from a stray `OperationCanceledException` the user threw for unrelated reasons.** A user OCE thrown without our token cancelling falls through to the generic `catch` and is treated as a normal step failure (FAIL checkpoint + retry). +2. **A cancelled step is not checkpointed.** The next invocation will replay the operation from scratch (no SUCCESS, no FAIL) and either re-execute or, if the workflow is itself terminating, never reach this point. + +### Void overload wrappers + +`StepAsync(Func)` and the void `RunInChildContextAsync` already wrap the user `Func` to return a synthetic `null`. The wrapper threads the token through: + +```csharp +public async Task StepAsync( + Func func, + string? name = null, + StepConfig? config = null, + CancellationToken cancellationToken = default) +{ + await RunStep( + async (ctx, ct) => { await func(ctx, ct); return null; }, + name, config, cancellationToken); +} +``` + +### `WaitForCallbackAsync` — composed submitter + +`WaitForCallbackAsync` composes a child context that runs `CreateCallbackAsync` + `StepAsync(submitter)` + `callback.GetResultAsync`. The submitter call propagates the token: + +```csharp +await childCtx.StepAsync( + async (stepCtx, ct) => + { + var submitterCtx = new WaitForCallbackContext(stepCtx.Logger); + await submitter(callback.CallbackId, submitterCtx, ct); + }, + name: submitterName, + config: stepConfig, + cancellationToken: cancellationToken); +``` + +## 5. Cancellation semantics + +The decision tree for an `OperationCanceledException` thrown out of a user `Func`: + +| Workflow CTS fired? | Caller token fired? | Step body threw OCE? | Result | +|---|---|---|---| +| no | no | yes (user-thrown OCE, unrelated) | Treated as a normal step failure: FAIL checkpoint, retry per `RetryStrategy`. | +| no | yes | yes | Step is abandoned: no checkpoint written, OCE propagates up, the workflow's user-handler `Task` faults. The `WhenAny` race in `RunAsync` returns FAILED with the OCE as the cause. | +| yes | either | yes | Step is abandoned: no checkpoint written, OCE propagates up. The termination signal that cancelled the workflow CTS has already resolved `TerminationTask`, so `WhenAny` returns Pending (or Failed if termination carried an exception). The user OCE is observed by `userTask` but never reaches the handler result — the termination outcome wins. | + +Implementation: `catch (OperationCanceledException) when (linked.IsCancellationRequested)` separates "our cancellation" from "user-thrown OCE." The latter falls through to the generic `catch (Exception)` path. + +### Behavior of the workflow CTS over a workflow's lifetime + +- Constructed at workflow-entry time, before `Task.Run(userHandler)`. +- Cancels exactly once, when `TerminationManager.TerminationTask` resolves (any reason). Termination's reason set today: `WaitScheduled`, `RetryScheduled`, `CallbackPending`, `InvokePending`, `CheckpointFailed`. The CTS does not distinguish reasons; user code observing cancel only knows "the workflow is being torn down." +- Disposed after `RunAsync` returns, in the same scope as the `TerminationManager`. + +### Why "always cancel on termination" rather than "only on hard-abort reasons" + +`TerminationManager.Terminate()` fires for both resumable suspensions (wait, callback pending, retry scheduled) and hard aborts (checkpoint failed). In every case the user `Task` is being abandoned — the operation that caused termination has already resolved its own result, and any other in-flight user code in the same `Task.Run` lineage is now dead weight. Cancelling them all gives: + +- Cleaner threadpool: abandoned `HttpClient` calls release connections promptly. +- Less risk of orphaned side effects landing during the freeze window. +- Simpler model: one signal, one meaning. + +The cost is small in the single-threaded model: today, `Terminate()` fires only after the user `Func` for the relevant operation has already returned, so there is rarely user code mid-await to cancel. The mechanism becomes load-bearing once parallel branches exist. + +## 6. Replay and determinism + +The cancellation token does not interact with replay state, by design. Specifics: + +1. **Cached operations short-circuit before the user `Func` is invoked.** Each `*Operation.ExecuteAsync` checks `ExecutionState` for a SUCCESS checkpoint matching the deterministic operation ID and returns the cached result without ever building the linked CTS or calling `_func`. A cancelled token cannot cause divergent re-execution because the user code never runs. +2. **The workflow CTS is per-invocation and fresh on replay.** Invocation N's CTS state is not reconstructed on N+1. User code that branches on `IsCancellationRequested` could in principle observe different values across replays of the same logical step. This is a misuse — see §8 — and is documented, not engineered around. +3. **Termination fires after, not during, user-`Func` execution in single-threaded mode.** Today, the termination signal that cancels the workflow CTS is raised by an operation that has already resolved its own result. In single-threaded code, the `Task.Run` user task is not concurrently awaiting anything else when termination fires. So in single-threaded land, the workflow CTS rarely interrupts an in-progress user `Func` body — its observable effect is propagating the **caller's** `CancellationToken` into user code. The mechanism becomes load-bearing for parallel. +4. **A cancelled step does not produce a checkpoint.** No SUCCESS, no FAIL. The next invocation replays the operation from scratch — either re-executes the body, or never reaches the operation because the workflow itself is terminating. + +## 7. What is NOT cancellable + +The workflow CTS is for **user-side I/O only**. The following code paths must complete even when the workflow is being torn down, and therefore must **not** observe the workflow token: + +- Checkpoint serialization and the runtime API write (the SDK's call to record SUCCESS/FAIL after a user step body resolves). +- `CheckpointBatcher` flush. +- Construction of the response payload returned to RuntimeSupport. +- Any `LambdaSerializerHelper` invocation that serializes a step result before checkpointing. + +Implementation rule: code on these paths uses `CancellationToken.None` for any cancellation parameter, never the workflow token. A test verifies that a step that succeeds and is then cancelled (workflow CTS fires after `_func` returns successfully) still has its SUCCESS checkpoint persisted. + +## 8. User-facing guidance + +The following are documented as misuses in `docs/core/steps.md`, `child-contexts.md`, `callbacks.md`, and `wait.md`: + +- **Do not branch workflow logic on `IsCancellationRequested`.** It is a runtime concern, not a workflow concern. Branching on it makes the workflow non-deterministic across replays. +- **Do not catch `OperationCanceledException` thrown on the workflow token and continue.** If the workflow is being torn down, continued work is wasted. If the caller cancelled, the user expects unwind. Either swallow-and-rethrow, or do not catch. +- **Do pass `step.CancellationToken` into every cancellation-aware API call inside the step body** (`HttpClient.SendAsync(ct)`, `Task.Delay(ct)`, AWS SDK calls). This is what makes deadline propagation and caller-token propagation actually work. + +## 9. Phased plan + +### Phase 1 — internal plumbing (no public API changes) + +1. Add `WorkflowCancellation` (Internal/). +2. Construct `WorkflowCancellation(terminationManager)` in the entry point that today constructs `TerminationManager`. Add as a new parameter to `DurableExecutionHandler.RunAsync`. +3. Add `WorkflowCancellation` to the `DurableContext` constructor. Forward to operation classes (no behavior change yet — operations ignore it). +4. Unit test: `WorkflowCancellation.Token.IsCancellationRequested` becomes `true` after `terminationManager.Terminate(...)` resolves; remains `false` until then. + +### Phase 2 — operation classes link and pass through + +5. Each operation class that invokes a user `Func` (`StepOperation`, `ChildContextOperation`, `WaitForConditionOperation`, the inline submitter step in `WaitForCallbackAsync`) accepts `WorkflowCancellation` via its constructor. +6. Inside `ExecuteAsync`, build `using var linked = CancellationTokenSource.CreateLinkedTokenSource(callerToken, _workflowCancellation.Token);` and pass `linked.Token` into the user `Func`. +7. Add the cancellation-aware exception path: `catch (OperationCanceledException) when (linked.IsCancellationRequested) { throw; }` — no FAIL checkpoint, no retry. +8. Verify SDK-internal paths (checkpoint write, batcher flush, response build) continue to use `CancellationToken.None` and never the linked or workflow token. + +### Phase 3 — public Func signatures + +9. Update `IDurableContext` (six methods) to accept the new Func shape. +10. Update `DurableContext` to match. The internal `RunStep`, `RunChildContext`, `RunWaitForCallback` glue threads the new parameter into the operation classes. +11. Update the void-step and void-child-context wrappers to forward the token. +12. Update `WaitForCallbackAsync`'s composed submitter call to pass the token. + +### Phase 4 — tests + +13. Update every existing test that passes a `Func` body — add `_` or `ct`. +14. New tests: + - Caller's token fires → user `Func` observes cancel via `linked.Token`. + - `terminationManager.Terminate(WaitScheduled)` while user `Func` is mid-await → user `Func` observes cancel. + - User-thrown `OperationCanceledException` (without our token cancelling) is treated as a normal step failure and retried per the `RetryStrategy`. + - Cancelled step writes no checkpoint (neither SUCCESS nor FAIL). + - Successful step that races with workflow cancel still writes its SUCCESS checkpoint (the §7 invariant). + - Replay path: cached step result returns without invoking the user `Func` even when the workflow token is already cancelled. + - Child context propagates the workflow CTS to its inner `IDurableContext`; cancelling the workflow cancels in-flight child operations. + - `WaitForConditionAsync` check function receives the linked token. + - `WaitForCallbackAsync` submitter receives the linked token. + +### Phase 5 — docs and change file + +15. Update XML doc on every changed `IDurableContext` Func parameter to describe the linked-token contract (caller token + SDK shutdown signal). +16. Update `docs/core/steps.md`, `child-contexts.md`, `callbacks.md`, `wait.md` examples to take and forward the token. +17. Add a new short doc `docs/core/cancellation.md` covering the §8 guidance. +18. `autover change` — major increment for `Amazon.Lambda.DurableExecution`. Changelog message names the breaking delegate-signature change explicitly so preview users see it. + +## 10. Open questions + +1. **Should termination always cancel, or only for hard-abort reasons?** Current decision: always (see §5). Worth flagging to reviewers in case the parallel design wants to distinguish "we're suspending, sibling branches should stop" from "we're aborting, sibling branches must stop." +2. **Should `DurableContext` expose `WorkflowCancellation.Token` as a property on `IDurableContext`** (e.g. `IDurableContext.CancellationToken`) for advanced users who want to observe workflow-wide cancel without being inside an operation? Defer until a concrete use case appears; adding it later is non-breaking. +3. **`InvokeAsync` and the workflow CTS.** `InvokeAsync` does not accept a user `Func`, but it does fire an outbound durable-service call. §7 says runtime API writes do not observe the workflow token — the same rule should apply here so an in-flight invoke is not torn down mid-call once we have already committed an INVOKE START checkpoint. The caller's `cancellationToken` parameter is honored as today (synchronous `ThrowIfCancellationRequested` before the call); the workflow CTS is not linked. Confirm at implementation time. + +## 11. Out of scope + +- **The `Amazon.Lambda.Annotations` source generator.** Once Phases 1–3 land, the generator's emitted entry-point wrapper passes a workflow `CancellationToken` into the user's top-level handler. That is a separate change and design. +- **Parallel branches and map operations.** Their cancellation rides on the same `WorkflowCancellation`, but the semantic decisions (one branch failure cancels siblings? failure modes carried in error-aggregate?) are owned by the parallel design. +- **Lambda deadline timer.** Considered and rejected (see §2 non-goals). If we later decide deadline-aware cancel is worth it, it will be added as an explicit `Terminate` reason raised by code that owns the deadline policy, not as a generic background timer in `WorkflowCancellation`. +- **A way to resume a cancelled workflow.** Cancellation is workflow-fatal at the top level. +- **Wire format, checkpoint shape, or `ExecutionState` changes.** None. diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.Tests/WorkflowCancellationTests.cs b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/WorkflowCancellationTests.cs index 06fbb35d9..4b1f04cdc 100644 --- a/Libraries/test/Amazon.Lambda.DurableExecution.Tests/WorkflowCancellationTests.cs +++ b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/WorkflowCancellationTests.cs @@ -11,7 +11,8 @@ namespace Amazon.Lambda.DurableExecution.Tests; /// /// Cancellation-flow tests for and the /// linked-token contract surfaced through . -/// Companion to docs/design/cancellation-design.md. +/// Companion to +/// Libraries/src/Amazon.Lambda.DurableExecution/docs/design/cancellation-design.md. /// public class WorkflowCancellationTests { @@ -76,19 +77,26 @@ public void Dispose_AfterTermination_DoesNotThrow() [Fact] public async Task StepAsync_CallerToken_PropagatesIntoFunc() { + // Cancel AFTER the func has started — pre-cancellation would short-circuit + // in StepOperation.ExecuteFunc's ThrowIfCancellationRequested before the + // user body runs and we'd never observe the propagation. var harness = CreateHarness(); using var caller = new CancellationTokenSource(); - CancellationToken seen = default; + var entered = new TaskCompletionSource(); + var task = harness.Context.StepAsync(async (_, ct) => + { + entered.TrySetResult(); + // Block on the linked token; if the caller's cancel propagates into + // ct via the linked CTS, this throws. + await Task.Delay(Timeout.Infinite, ct); + return 0; + }, name: "step", cancellationToken: caller.Token); + + await entered.Task.WaitAsync(TimeSpan.FromSeconds(2)); caller.Cancel(); - await Assert.ThrowsAsync(() => - harness.Context.StepAsync(async (_, ct) => - { - seen = ct; - await Task.CompletedTask; - return 0; - }, name: "step", cancellationToken: caller.Token)); + await Assert.ThrowsAsync(() => task); } [Fact] From a0fc71d03e9879d63c0c78773c4d133fac202b05 Mon Sep 17 00:00:00 2001 From: Garrett Beatty Date: Wed, 10 Jun 2026 15:02:39 -0400 Subject: [PATCH 5/6] Delete Libraries/src/Amazon.Lambda.DurableExecution/docs/design/annotations-integration-plan.md --- .../design/annotations-integration-plan.md | 378 ------------------ 1 file changed, 378 deletions(-) delete mode 100644 Libraries/src/Amazon.Lambda.DurableExecution/docs/design/annotations-integration-plan.md diff --git a/Libraries/src/Amazon.Lambda.DurableExecution/docs/design/annotations-integration-plan.md b/Libraries/src/Amazon.Lambda.DurableExecution/docs/design/annotations-integration-plan.md deleted file mode 100644 index a45478bdf..000000000 --- a/Libraries/src/Amazon.Lambda.DurableExecution/docs/design/annotations-integration-plan.md +++ /dev/null @@ -1,378 +0,0 @@ -# Implementation Plan: Integrating `[DurableExecution]` with the Amazon.Lambda.Annotations Source Generator - -> Status: **Ready with must-fixes.** This plan folds in every adversarial-reviewer blocker. Items that depend on undefined infrastructure (the runtime string, the IAM-shape decision) are flagged inline and gated behind explicit pre-merge confirmations rather than buried. - -## Verified ground truth - -All load-bearing claims confirmed against the codebase: - -- IAM action names `lambda:CheckpointDurableExecution` and `lambda:GetDurableExecutionState` verified in the reference template (lines 52-53). Note the reference uses an inline `PolicyName: DurableExecutionPolicy` role-attached policy, not a SAM `Policies` array entry — relevant to the IAM section. -- README line 35 states the executable-only constraint is a preview limitation pending RuntimeSupport changes (resolves the "temporary vs permanent" contradiction). -- README line 37 shows `dotnet10` in its example, but durable functions run on **either `dotnet8` or `dotnet10`** (user-confirmed 2026-06-08) — the generator does not force a runtime. Line 53 confirms the `HandlerWrapper.GetHandlerWrapper` typed contract. -- Package multi-targets net8.0 + net10.0. - ---- - -## 1. Goal & Scope - -### Goal -Let a developer annotate a method with `[DurableExecution]` (alongside `[LambdaFunction]`) and have the Amazon.Lambda.Annotations source generator emit: -1. A **typed-envelope handler wrapper** that delegates to `Amazon.Lambda.DurableExecution.DurableFunction.WrapAsync`. -2. A `serverless.template` resource carrying durable-specific config (`DurableConfig`) and the IAM permissions the function needs to call the checkpoint APIs. - -### In scope -- New public attribute `Amazon.Lambda.Annotations.DurableExecutionAttribute` (in the Annotations package). -- Source-generator recognition (TypeFullNames, EventType, builders). -- Generated wrapper shape (typed in/typed out). -- CloudFormation/SAM `DurableConfig` + inline checkpoint IAM policy emission with orphan removal. -- Diagnostics, snapshot tests, change file, docs. - -### Out of scope -- Changes to `Amazon.Lambda.DurableExecution` runtime behavior (`DurableFunction`, `DurableContext`, the wire format). These ship independently; this work consumes them. -- Scoped (least-privilege) checkpoint ARNs — deferred until the service publishes a scopable ARN format (see Risks). - -### The executable-only constraint (VERIFIED, and its sharp edge) -`Amazon.Lambda.DurableExecution/README.md` line 35 states the preview **only supports the executable programming model** — the function is an executable assembly hosting its own bootstrap loop and passing the serializer to the runtime in code. Class-library/managed-runtime support lands only after RuntimeSupport changes are deployed. So the constraint is **temporary-but-real for preview**: `[DurableExecution]` requires `OutputType=Exe` today. - -**MUST-FIX (reviewer blocker — enforcement is post-hoc, not preventive).** `LambdaFunctionModelBuilder.BuildAndValidate` (verified line 17) receives `isExecutable` as a **caller-supplied parameter** from the generator driver; it is not derived from the attribute. A diagnostic can *report* `[DurableExecution]` on a non-executable project, but the framework does not abort generation on diagnostic severity alone. Therefore the plan must make `IsValid=false` the gate: -- `LambdaFunctionValidator.ValidateFunction` (called at line 26) returns the model's `IsValid`. When `[DurableExecution]` is present and `isExecutable == false`, emit `DurableExecutionRequiresExecutable` (Error) **and** force `IsValid=false` so no wrapper is generated. This is the only mechanism in the existing framework that actually halts emission for a function. - ---- - -## 2. The `[DurableExecution]` Attribute Design - -**Placement (REVISED 2026-06-08): `Amazon.Lambda.Annotations` package, top-level namespace `Amazon.Lambda.Annotations`** — file `Libraries/src/Amazon.Lambda.Annotations/DurableExecutionAttribute.cs`. This matches every other annotation attribute (`LambdaFunctionAttribute`, `ScheduleEventAttribute`, …) and lets the generator use the standard strongly-typed `AttributeModel` pattern (the generator already references `Amazon.Lambda.Annotations` and reaches its internals via `InternalsVisibleTo`, so it can call `Validate()`/`IsXxxSet` directly). The attribute holds only `int` values, so this adds no dependency from `Amazon.Lambda.Annotations` onto the DurableExecution SDK. - -> **Superseded earlier design:** an initial draft placed the attribute in the `Amazon.Lambda.DurableExecution` package. That was wrong — the generator must target `netstandard2.0` and cannot reference that package (net8/net10 + AWSSDK.Lambda), which made the generic `AttributeModel` pattern impossible and forced an awkward string-keyed POCO workaround. Moving the attribute to `Amazon.Lambda.Annotations` removes the problem entirely. The matching style follows `LambdaFunctionAttribute` (block namespace, no nullable), not the file-scoped style of the DurableExecution package. - -Implemented shape (matches `LambdaFunctionAttribute`'s block-namespace, non-nullable style): - -```csharp -using System; -using System.Collections.Generic; - -namespace Amazon.Lambda.Annotations -{ - [AttributeUsage(AttributeTargets.Method, AllowMultiple = false)] - public class DurableExecutionAttribute : Attribute - { - private int _retentionPeriodInDays; - public int RetentionPeriodInDays - { - get => _retentionPeriodInDays; - set { _retentionPeriodInDays = value; IsRetentionPeriodInDaysSet = true; } - } - internal bool IsRetentionPeriodInDaysSet { get; private set; } - - private int _executionTimeout; // seconds - public int ExecutionTimeout - { - get => _executionTimeout; - set { _executionTimeout = value; IsExecutionTimeoutSet = true; } - } - internal bool IsExecutionTimeoutSet { get; private set; } - - internal List Validate() - { - var validationErrors = new List(); - if (IsRetentionPeriodInDaysSet && RetentionPeriodInDays <= 0) - validationErrors.Add($"{nameof(RetentionPeriodInDays)} = {RetentionPeriodInDays}. It must be a positive integer."); - if (IsExecutionTimeoutSet && ExecutionTimeout <= 0) - validationErrors.Add($"{nameof(ExecutionTimeout)} = {ExecutionTimeout}. It must be a positive integer."); - return validationErrors; - } - } -} -``` - -Design notes: -- **Parameterless** — `[DurableExecution]` with no args is valid (unlike `[SQSEvent]`'s required queue arg). -- **`IsXxxSet` flags are `internal`** (consumed by the generator via `InternalsVisibleTo`), following the `ScheduleEventAttribute` convention so unset values are omitted from CFN. -- **No `WorkflowName`/`Input`/`ResourceName` argument.** Input is carried by the durable envelope (the EXECUTION op — verified in `DurableFunction.ExtractUserPayload`, lines 200-221); the function name derives from `[LambdaFunction]`. A second name source would create a duplicate-key hazard. -- **No signature change** to the user method. The user method stays `(TInput, IDurableContext) -> Task` or `(TInput, IDurableContext) -> Task`, enforced by `DurableExecutionInvalidSignature`. -- Validate rejects `<= 0` now; exact upper bounds are a follow-up once service limits are confirmed. - ---- - -## 3. Source-Generator Recognition (Models, TypeFullNames) - -**MUST-FIX (reviewer): exact namespace match or silent skip.** The string below must match the attribute's real namespace exactly, or `EventTypeBuilder`/`AttributeModelBuilder` silently skip it and the method routes to `NoEventMethodBody`. A dedicated test (Component H) covers discovery. - -1. **`TypeFullNames.cs`** — add four constants (note the attribute is now in the Annotations namespace; the invocation envelopes + `DurableFunction` remain in the SDK namespace because the **user's** compilation references them and the generator only matches them by string): - - `DurableExecutionAttribute = "Amazon.Lambda.Annotations.DurableExecutionAttribute"` - - `DurableExecutionInvocationInput = "Amazon.Lambda.DurableExecution.DurableExecutionInvocationInput"` - - `DurableExecutionInvocationOutput = "Amazon.Lambda.DurableExecution.DurableExecutionInvocationOutput"` - - `DurableFunction = "Amazon.Lambda.DurableExecution.DurableFunction"` - -2. **`Models/EventType.cs`** — add `DurableExecution` enum member. - -3. **`Models/EventTypeBuilder.cs`** — add `else if (attribute.AttributeClass.ToDisplayString() == TypeFullNames.DurableExecutionAttribute) events.Add(EventType.DurableExecution);`. - -4. **`Models/Attributes/AttributeModelBuilder.cs`** (IMPLEMENTED) — add an `else if` case (`SymbolEqualityComparer` against `GetTypeByMetadataName(TypeFullNames.DurableExecutionAttribute)`) constructing the standard strongly-typed `AttributeModel` via `DurableExecutionAttributeBuilder.Build`. Because the attribute now lives in `Amazon.Lambda.Annotations` (which the generator references), this is the same generic pattern every other attribute uses — no workaround needed. - -5. **`Models/Attributes/DurableExecutionAttributeBuilder.cs` (NEW, IMPLEMENTED):** returns a real `DurableExecutionAttribute`, reading `att.NamedArguments` by `nameof` (`RetentionPeriodInDays` / `ExecutionTimeout`); assigning each property also flips its `IsXxxSet` flag (so unset values are omitted from the template). Mirrors `ScheduleEventAttributeBuilder` but with no constructor args (the attribute is parameterless). - -6. **`Models/GeneratedMethodModelBuilder.cs`** — early branches gated on `Events.Contains(EventType.DurableExecution)`, placed **BEFORE** the API/HttpApi/ALB branches: - - `BuildParameters` → exactly `[ __request__ : DurableExecutionInvocationInput, __context__ : ILambdaContext ]` - - `BuildResponseType` → `Task` (auto-async) - - `BuildUsings` → conditionally add `Amazon.Lambda.DurableExecution`. - - The wrapper DOES need `TInput`/`TOutput` to emit **explicit** generic arguments (see Section 4 correction) — read from `LambdaMethod.Parameters[0].Type.FullName` and `LambdaMethod.ReturnType.TaskTypeArgument`. No new model fields are required; the existing model already carries these. - -**Branch-ordering is load-bearing** (reviewer): if these run after the API/ALB checks, a method routes to the wrong template. A test must assert a file containing both a durable and an API method produces the durable wrapper for the durable method. - ---- - -## 4. Generated Handler Wrapper - -The wrapper is a **typed-envelope** method (matches README line 53's `HandlerWrapper.GetHandlerWrapper` contract), **NOT** Stream→Stream. - -**Why typed, not Stream→Stream (VERIFIED dual-serializer hazard):** `DurableFunction.WrapAsyncCore` (verified line 79) reads the serializer off the **context** via `LambdaSerializerHelper.GetRequired(lambdaContext)`, not off any wrapper field. A Stream→Stream wrapper that deserialized with its own `serializer` field (a different instance than the one the bootstrap attaches to the context) would be a real bug. So the wrapper does typed in/typed out and lets the runtime `HandlerWrapper` do envelope (de)serialization. - -**Generated signature:** -```csharp -public async Task ( - Amazon.Lambda.DurableExecution.DurableExecutionInvocationInput __request__, - ILambdaContext __context__) -``` - -**Generated body (single delegation, bound method-group):** -```csharp -return await Amazon.Lambda.DurableExecution.DurableFunction.WrapAsync( - ., __request__, __context__); -``` -- `` = the `containingType` field (non-DI) or `scope.ServiceProvider.GetRequiredService()` (DI). Both resolution paths already exist in `FieldsAndConstructor`. -- **Which overload (VERIFIED, four exist — DurableFunction.cs lines 36-71):** the wrapper uses the **three-argument** (no explicit client) overloads — `WrapAsync(Func>, …)` for a typed-returning method or `WrapAsync(Func, …)` for a void method. The lazy `_cachedLambdaClient` (line 30) backs the no-client path — correct for the generated case. -- **CORRECTION (2026-06-08, found by Component H): the wrapper MUST emit EXPLICIT generic type arguments.** The original plan said to emit none and rely on overload resolution — that is **wrong** and produces `CS0411` ("type arguments cannot be inferred"): C# cannot infer `TInput`/`TOutput` from a **method-group** argument bound to a `Func<,,>` parameter. Every real call site confirms this — README line 61 (`WrapAsync(Workflow, …)`) and all `DurableFunctionTests` use explicit generics. The generated wrapper therefore emits `WrapAsync(instance.Method, …)` for typed workflows and `WrapAsync(instance.Method, …)` for void (`Task`) workflows, where `TInput` = the user method's first parameter type and `TOutput` = the `Task` argument. Verified by a compile test that the explicit-generic call binds and the inference-free form fails with `CS0411`. -- The wrapper does **not** deserialize a Stream, does **not** touch its own `serializer` field, and does **not** reconstruct `[FromX]` params. - -**MUST-FIX (reviewer): signature constraint must be validated.** Method-group overload resolution assumes `Task` or `Task`. A `ValueTask`-returning or wrong-shape user method produces a C# compile error in generated code. `LambdaFunctionValidator.ValidateFunction` must add a durable-specific check: the user method must be exactly `(TInput, IDurableContext) -> Task` or `-> Task`; otherwise emit `DurableExecutionInvalidSignature` (Error) and set `IsValid=false`. - -**MUST-FIX (reviewer): runtime serializer contract.** `WrapAsyncCore` calls `LambdaSerializerHelper.GetRequired(__context__)` and throws if no serializer is on the context. The generated wrapper assumes the bootstrap populated `ILambdaContext.Serializer`. This is a runtime contract not exercisable in generator snapshot tests; the `DurableExecutionInvoke.tt` template must carry a code comment stating the serializer is expected from the context, and Component A must include a serializer round-trip unit test (Section 8). - -**Build note (IMPORTANT, discovered during Component C):** there is **no command-line T4 step**. The `TextTemplatingFilePreprocessor` entries are VS-design-time only; `dotnet build` compiles the **committed** `.cs` partials, not the `.tt`. So every template requires THREE checked-in files kept in sync: `X.tt` (source of truth), `X.cs` (the T4-style transform output — `TransformText()` + the generated boilerplate base class), and `XCode.cs` (the constructor partial holding `_model`). The durable body is a single delegation line, authored across all three for `DurableExecutionInvoke`. - -**Template wiring (IMPLEMENTED):** -- `LambdaFunctionTemplate.tt` **and** `LambdaFunctionTemplate.cs` — durable branch placed **FIRST** in the dispatch chain (`if (Events.Contains(EventType.DurableExecution)) Write(new DurableExecutionInvoke(_model)...)`), before Authorizer/API/ALB/else. Both files edited (the `.cs` is what compiles). -- `DurableExecutionInvoke.tt` + `.cs` + `Code.cs` (NEW) — emits `return await Amazon.Lambda.DurableExecution.DurableFunction.WrapAsync(., __request__, __context__);` with **explicit** generic arguments (see Section 4 correction — `WrapAsync` for typed, `WrapAsync` for void). `` is the camel-cased containing-type field (non-DI) or the DI-resolved local that `LambdaFunctionTemplate`'s shared prologue already sets up. csproj registered the new `.tt`/`.cs` pair like its siblings. -- `GeneratedMethodModelBuilder` (IMPLEMENTED) — durable branches in `BuildResponseType` (→ `Task`), `BuildParameters` (→ `DurableExecutionInvocationInput __request__, ILambdaContext __context__`), and `BuildUsings` (adds `Amazon.Lambda.DurableExecution`). The durable check is placed before the API/Authorizer/ALB checks in each. - -**Original "separate template" wiring notes (superseded by the above):** -- `Templates/LambdaFunctionTemplate.tt` — add `else if (_model.LambdaMethod.Events.Contains(EventType.DurableExecution)) { Write(new DurableExecutionInvoke(_model).TransformText()); }` placed **FIRST**, before the Authorizer/API/ALB branches. The signature line already renders the forced params/return from `GeneratedMethod`, with `async` emitted because the return is a generic `Task`. -- `Templates/DurableExecutionInvoke.tt` (NEW, + checked-in `.cs` partial if the existing template convention requires one) — emits the single `WrapAsync` delegation, handling DI (`scope.ServiceProvider`) and non-DI (`containingType` field) resolution. **MUST-FIX: this template must be authored before snapshots can be produced.** -- `ExecutableAssembly.tt` — **no change.** Verified: it already emits `Func<{p.Type.FullName}, {ReturnType.FullName}>` generically and calls `LambdaBootstrapBuilder.Create(handler, new SerializerName())`. A regression test asserts no change is needed for durable return types. - -**DI lifetime (reviewer gap):** the DI scope is **per-invocation**, matching existing API-Gateway scope semantics — the scope is created and disposed around a single Lambda invocation, NOT held open across a multi-hour suspended workflow (the service re-invokes; each invocation gets a fresh scope). Document this in the template comment. - ---- - -## 5. CloudFormation / SAM Template Changes - -`DurableConfig` is a function **Properties** block (not a SAM `Events` entry), tracked via a `Metadata` marker, modeled exactly on the verified `SyncedFunctionUrlConfig` pattern (`CloudFormationWriter.cs` lines 245-249 write the marker; lines 267+ do orphan removal). - -In `ProcessLambdaFunctionEventAttributes` (verified switch at lines 220-262), add: -```csharp -case AttributeModel durableModel: - ProcessDurableExecutionAttribute(lambdaFunction, durableModel.Data); // Data is DurableExecutionAttribute - hasDurableExecution = true; // initialized = false near line 218 - break; // do NOT add to currentSyncedEvents — durable is not an event -``` - -`ProcessDurableExecutionAttribute` writes (only when the corresponding `IsXxxSet` flag is true): -- `Resources..Properties.DurableConfig.RetentionPeriodInDays` -- `Resources..Properties.DurableConfig.ExecutionTimeout` -- marker `Resources..Metadata.SyncedDurableConfig = true` - -**Expected JSON shape** (snapshot expectation, resolving the reviewer's ambiguity): -```json -"Properties": { - "DurableConfig": { "RetentionPeriodInDays": 7, "ExecutionTimeout": 300 } -} -``` -YAML equivalent under `Properties: DurableConfig:`. - -**Orphan removal** (mirroring the `FunctionUrl` block at lines 267+): when `!hasDurableExecution`, if `Metadata.SyncedDurableConfig` is true, `RemoveToken Properties.DurableConfig`, remove the injected checkpoint policy (Section 6), and remove the markers. - -**Runtime:** NOT set here. Forced at model-build time (Section 7), because `ProcessPackageTypeProperty` line 185 (`SetToken …Runtime = lambdaFunction.Runtime`) would clobber any writer-side injection in the Zip branch. - -**PackageType:** durable functions are Zip/executable only. The `Image` branch (verified lines 190-196) strips `Handler`/`Runtime`, so `PackageType.Image` is structurally unsupported → `DurableExecutionZipOnly` (Error, `IsValid=false`) at model-build. **MUST-FIX: this diagnostic must be Error and gate `IsValid`, not a warning** — otherwise the Image branch silently produces a broken template. - -**Tool guard:** the existing `Metadata.Tool = Amazon.Lambda.Annotations` guard is preserved (DurableConfig only written/refreshed for generator-owned functions). - ---- - -## 6. IAM Policy Statements for Checkpoint APIs - -**Action names (VERIFIED against the reference template, 2026-06-08):** attested snapshot from `C:\dev\repos\aws-durable-execution-sdk-python\packages\aws-durable-execution-sdk-python-examples\template.yaml` (the file is JSON despite the `.yaml` extension), `DurableFunctionRole.Properties.Policies[0]`, lines 43-60: -```json -"Policies": [ - { - "PolicyName": "DurableExecutionPolicy", - "PolicyDocument": { - "Version": "2012-10-17", - "Statement": [ - { - "Effect": "Allow", - "Action": [ - "lambda:CheckpointDurableExecution", - "lambda:GetDurableExecutionState" - ], - "Resource": "*" - } - ] - } - } -] -``` -So the two checkpoint actions are confirmed: `lambda:CheckpointDurableExecution`, `lambda:GetDurableExecutionState`. - -**FLAGGED — the reference IAM pattern diverges MORE than first assumed (corrected 2026-06-08 after reading the full reference template):** -- The reference does **not** put any IAM on the function resources at all. It defines a **single shared standalone `AWS::IAM::Role`** (`DurableFunctionRole`, lines 25-62) carrying `ManagedPolicyArns: [AWSLambdaBasicExecutionRole]` **plus** the inline `PolicyName: DurableExecutionPolicy` above, and **every `AWS::Serverless::Function` sets `Role: {Fn::GetAtt: [DurableFunctionRole, Arn]}`** (e.g. lines 69-74) — no function uses a SAM `Policies` array. -- **Consequence for this plan's design:** under the plan's own rule "when `lambdaFunction.Role` IS set, do NOT touch IAM," the reference pattern would never trigger the plan's injection — because in the reference, every function *does* set `Role`. The generator's auto-IAM path (no explicit `Role` → emit a SAM `Policies`-array inline statement) is therefore **a distinct, generator-idiomatic adaptation, not a reproduction of the reference**. The SAM transform expands a per-function `Policies` array into a generated per-function role, so it is functionally equivalent (each function gets the two actions), but the resulting template shape (N generated roles vs. one shared role) differs from the reference. -- **DECISION MADE (2026-06-08): Option 1 — per-function SAM `Policies` array.** Rationale (user): follow the same mechanism the generator already uses for IAM (it appends to the per-function `Policies` list it already manages for `AWSLambdaBasicExecutionRole`), rather than introducing standalone-role emission the writer does not do today. The two options considered were: - 1. **Per-function SAM `Policies` array** (CHOSEN): idiomatic to how the generator already emits `AWSLambdaBasicExecutionRole`; produces one role per function via the SAM transform. Mixed string/object array — see the round-trip risk below. - 2. ~~Shared standalone role (matches reference exactly): generator emits one `DurableFunctionRole` resource and points every durable function's `Role` at it. Larger change to the writer (it does not emit standalone roles today) and interacts with user-specified `Role`.~~ Not chosen. -- `Resource: "*"` is used because the DurableExecutionArn is allocated at runtime and is not knowable at template-synth time (matches the reference, line 55). Whether a scopable ARN will ever exist is **undefined** — flagged as a follow-up, not promised. - -When `[DurableExecution]` is present AND `lambdaFunction.Role` is NOT set, after `ProcessLambdaFunctionProperties` has run (so the `Policies` array exists from the line 161-166 split), read-modify-write `Properties.Policies` via `GetToken`/`SetToken(TokenType.List)`, appending one inline statement object: -```json -{ - "Statement": [ - { - "Effect": "Allow", - "Action": ["lambda:CheckpointDurableExecution", "lambda:GetDurableExecutionState"], - "Resource": "*" - } - ] -} -``` -Producing a mixed string/object array, e.g. `["AWSLambdaBasicExecutionRole", { "Statement": [ … ] }]`. Track via `Metadata.SyncedDurablePolicy = true` for idempotent regeneration; remove the injected statement + marker on orphan removal. - -**When `lambdaFunction.Role` IS set** (Role/Policies mutually exclusive — verified lines 155-166): do NOT touch IAM. Emit `DurableExecutionExplicitRoleNeedsCheckpointPolicy` (Info) instructing the user to attach the two actions manually. The diagnostic fires whenever both `[DurableExecution]` and `Role` are present at generation time. - -**MUST-FIX (highest regression risk):** the mixed string/object `Policies` array must round-trip through both `JsonWriter` and `YamlWriter`. A dedicated JSON+YAML round-trip snapshot test is mandatory (Section 8, Test G). If `SetToken(TokenType.List)` cannot preserve heterogeneous types, this approach is not viable and must be revisited before merge. - ---- - -## 7. Component-by-Component Implementation Steps (real file paths) - -All paths are absolute. `.tt` template changes require regenerating the corresponding `.cs` via the project's T4 step. - -### Component A — `DurableExecutionAttribute` (public API) -- **NEW** `C:\dev\repos\aws-lambda-dotnet\Libraries\src\Amazon.Lambda.DurableExecution\DurableExecutionAttribute.cs` — the attribute from Section 2. -- Add a serializer round-trip unit test (Section 8). - -### Component B — Attribute discovery + model wiring -- `C:\dev\repos\aws-lambda-dotnet\Libraries\src\Amazon.Lambda.Annotations.SourceGenerator\TypeFullNames.cs` — four constants. -- `C:\dev\repos\aws-lambda-dotnet\Libraries\src\Amazon.Lambda.Annotations.SourceGenerator\Models\EventType.cs` — `DurableExecution` member. -- `C:\dev\repos\aws-lambda-dotnet\Libraries\src\Amazon.Lambda.Annotations.SourceGenerator\Models\EventTypeBuilder.cs` — mapping `else if`. -- `C:\dev\repos\aws-lambda-dotnet\Libraries\src\Amazon.Lambda.Annotations.SourceGenerator\Models\Attributes\AttributeModelBuilder.cs` — `SymbolEqualityComparer` case + `using Amazon.Lambda.DurableExecution`. -- **NEW** `C:\dev\repos\aws-lambda-dotnet\Libraries\src\Amazon.Lambda.Annotations.SourceGenerator\Models\Attributes\DurableExecutionAttributeBuilder.cs` — copied from `ScheduleEventAttributeBuilder.cs`. - -### Component C — Generated wrapper shape -- `C:\dev\repos\aws-lambda-dotnet\Libraries\src\Amazon.Lambda.Annotations.SourceGenerator\Models\GeneratedMethodModelBuilder.cs` — early `BuildParameters`/`BuildResponseType`/`BuildUsings` branches, ordered before API/ALB. -- **NEW** `C:\dev\repos\aws-lambda-dotnet\Libraries\src\Amazon.Lambda.Annotations.SourceGenerator\Templates\DurableExecutionInvoke.tt` (+ generated `.cs`). -- `C:\dev\repos\aws-lambda-dotnet\Libraries\src\Amazon.Lambda.Annotations.SourceGenerator\Templates\LambdaFunctionTemplate.tt` — durable branch placed FIRST. -- Verify `ExecutableAssembly.tt` needs no change (regression test). - -### Component D — Package/model validation -- `C:\dev\repos\aws-lambda-dotnet\Libraries\src\Amazon.Lambda.Annotations.SourceGenerator\Models\LambdaFunctionModelBuilder.cs`. - -**Runtime: NO forcing (DECISION 2026-06-08).** Durable functions run on **either `dotnet8` or `dotnet10`**, so the generator does **not** force or override the runtime — the caller-supplied/default `runtime` flows through unchanged exactly like every other function. No `DurableRuntime` constant, no `model.Runtime` override. (This removes the former "MUST-FIX runtime contradiction" and BLOCKING risk #1 entirely.) - -- Run the durable validation pass (executable-only, Zip-only, exclusive-event, signature) and force `IsValid=false` on any Error-severity finding. This is the substance of Component D now that runtime forcing is gone. - -**IMPLEMENTED (2026-06-08, Components D+E):** added a `ValidateDurableExecution` method to `LambdaFunctionValidator` (called alongside the other `ValidateXxxEvents`), which adds Error diagnostics to the list — `ReportDiagnostics` already returns `IsValid=false` whenever any Error is present, so no separate gating wiring is needed. Checks: `OutputKind != ConsoleApplication` → 0140; `PackageType == Image` → 0141; signature (param count, second param `== IDurableContext`, return classified via the model's existing `ReturnsVoidOrGenericTask`) → 0142; explicit `Role` set → 0143 (Info). Added `TypeFullNames.IDurableContext`. Two build-system findings: (1) **RS1032** — a `messageFormat` ending in a `{0}` placeholder must use `: {0}` not `. {0}` (trailing-period rule); (2) the SourceGenerators.Tests project **cannot reference the DurableExecution package** (its AWSSDK.Core 4.x downgrades the test project's pinned 3.7.x → NU1605), so diagnostic tests supply minimal durable **stub types as source** (`IDurableContext` / the two envelopes) — the generator only needs them resolvable by metadata name. Diagnostic tests use the `VerifyCS.Test` harness with exact `WithSpan`/`WithArguments` (the framework demands precise locations and prints the expected `DiagnosticResult` on mismatch). - -### Component E — Diagnostics set -- `C:\dev\repos\aws-lambda-dotnet\Libraries\src\Amazon.Lambda.Annotations.SourceGenerator\Diagnostics\DiagnosticDescriptors.cs`. - -**RESOLVED (2026-06-08): concrete IDs allocated.** Verified against `DiagnosticDescriptors.cs`: the highest allocated id is `AWSLambda0139` (`InvalidScheduleEventAttribute`). (Note: `AWSLambda0126` is skipped in the existing file — 0125 jumps to 0127 — but the durable IDs continue cleanly from the top.) All descriptors use `category: "AWSLambdaCSharpGenerator"` and `isEnabledByDefault: true`, matching the file's convention. - -**REVISED (2026-06-08): only THREE new descriptors — `DurableExecutionExclusiveEvent` dropped (redundant).** Code verification: `LambdaFunctionValidator.ValidateFunction` (line 58) already emits `MultipleEventsNotSupported` (AWSLambda0102) and returns early with `IsValid=false` whenever `Events.Count > 1`. Component B added `DurableExecutionAttribute` to `TypeFullNames.Events` and `EventType.DurableExecution`, so `[DurableExecution] + [RestApi]` already produces `Events.Count == 2` → fires AWSLambda0102 → halts generation. No new exclusive-event diagnostic is needed; just add a **test** asserting the combination triggers AWSLambda0102 (locks in the dispatch-order behavior). The durable descriptors take **`AWSLambda0140`–`AWSLambda0143`**: - -| Name | Id | Severity | Gates generation? | Message (summary) | -|---|---|---|---|---| -| `DurableExecutionRequiresExecutable` | `AWSLambda0140` | Error | Yes (`IsValid=false`) | `[DurableExecution]` requires an executable (OutputType=Exe) project; class-library handlers are not supported in preview. | -| `DurableExecutionZipOnly` | `AWSLambda0141` | Error | Yes | `[DurableExecution]` requires PackageType=Zip; Image packaging is not supported. | -| `DurableExecutionInvalidSignature` | `AWSLambda0142` | Error | Yes | A `[DurableExecution]` method must be `(TInput, IDurableContext) -> Task` or `-> Task`. | -| `DurableExecutionExplicitRoleNeedsCheckpointPolicy` | `AWSLambda0143` | Info | No | Function uses an explicit Role; attach `lambda:CheckpointDurableExecution` and `lambda:GetDurableExecutionState` manually. | - -**Exclusive-event enforcement (RESOLVED):** handled by the existing `MultipleEventsNotSupported` (AWSLambda0102) — see above. No new diagnostic. - -**Executable detection (RESOLVED 2026-06-08 — gate kept, but key off `OutputKind`):** the generator's `isExecutable` flag (Generator.cs:129) is derived from the `GenerateMain` named arg on `[assembly: LambdaGlobalProperties]` — i.e. "generator should synthesize `Main`." That is the WRONG signal for the durable gate, because the README's quick-start uses the **manual** bootstrap model (`GenerateMain` is false, user writes their own `Main` + `LambdaBootstrap`) yet is still a valid executable. `DurableExecutionRequiresExecutable` must therefore gate on **`context.Compilation.Options.OutputKind != OutputKind.ConsoleApplication`** ("is this an executable project at all"), NOT on `isExecutable`. This correctly allows both the manual-bootstrap model (today) and a future generated-`Main` model, and only rejects true class-library projects. - -### Component F — CFN `DurableConfig` writer (IMPLEMENTED 2026-06-08) -- `CloudFormationWriter.cs` — added a `case AttributeModel` to the event-attribute switch that calls `ProcessDurableExecutionAttribute` and sets `hasDurableExecution = true` (and does NOT add to `currentSyncedEvents` — durable is a Properties/IAM concern, not an event). `ProcessDurableExecutionAttribute` clears any prior `DurableConfig`, re-emits `RetentionPeriodInDays`/`ExecutionTimeout` only when their `IsXxxSet` flags are true (creating an empty `DurableConfig` object via `TokenType.Object` when neither is set so the function is still marked durable), and sets the `Metadata.SyncedDurableConfig` marker. Orphan removal mirrors the verified `FunctionUrl` block. - -### Component G — CFN checkpoint IAM writer (IMPLEMENTED 2026-06-08) -- `CloudFormationWriter.cs` — kept inline (no separate writer class), matching `ProcessFunctionUrlAttribute` style. When `Role` is empty, `AddDurableCheckpointPolicy` reads the existing `Policies` via `GetToken>`, appends one inline statement object (`{Statement:[{Effect,Action:[2 actions],Resource:"*"}]}` built as nested `Dictionary`/`List`), and re-sets with `TokenType.List` — producing the mixed string/object array (`["AWSLambdaBasicExecutionRole", {Statement…}]`). Idempotency + orphan removal use `IsDurableCheckpointStatement` (recognizes the statement by its action names via JSON serialization). When `Role` is set, IAM is left untouched and `AWSLambda0143` (Info) is emitted in the validator. -- **HIGHEST-RISK ITEM RESOLVED:** the mixed string/object `Policies` array round-trips cleanly through **both** `JsonWriter` (JSON.NET `JToken`) and `YamlWriter` (`TokenType.List` → `YamlSequenceNode`). Verified by `DurableExecution_InjectsCheckpointPolicy_AsMixedArray` (JSON + YAML) plus idempotency and orphan-removal tests. `SetToken(TokenType.List)` handles heterogeneous types fine — the approach is viable. - -### Component H — End-to-end / compile tests (IMPLEMENTED 2026-06-08) -- `DurableExecutionWrapperCompilesTests.cs` — compiles the exact generated wrapper shape against realistic `WrapAsync` overloads. **This layer found a real bug:** the planned no-explicit-generics call fails with `CS0411` (see Section 4 correction). Tests assert the typed (`WrapAsync`) and void (`WrapAsync`) forms bind, and a guard test asserts the inference-free form fails with `CS0411`. -- Note on approach: a full `Microsoft.CodeAnalysis.Testing` snapshot E2E (committed `.g.cs` + `Program.g.cs` + RuntimeSupport sources) was attempted but is high-friction here (exact `AWSLambda0103` content match + the AWSSDK.Core 3.7.x/4.x conflict that blocks referencing the durable package). The compile-test approach covers the unique remaining risk (overload binding) without that friction; the wrapper *text* is pinned by Component C's template tests and the *template* output by F/G's writer tests. - -### Component I — Change file + docs (IMPLEMENTED 2026-06-08) -- `.autover/changes/durable-execution-annotations-integration.json` — single `Amazon.Lambda.Annotations` Minor entry (that autover project spans both the attributes csproj and the SourceGenerator csproj, so it covers everything added here). -- `Amazon.Lambda.DurableExecution/README.md` — added a "Using Lambda Annotations" subsection showing the `[LambdaFunction]` + `[DurableExecution]` model that removes the manual handler/`WrapAsync` boilerplate. -- **NEW** `C:\dev\repos\aws-lambda-dotnet\.autover\changes\.json` — increment **Minor**, projects `Amazon.Lambda.Annotations.SourceGenerator` + `Amazon.Lambda.DurableExecution`. Create via `autover change`. -- Update `C:\dev\repos\aws-lambda-dotnet\Libraries\src\Amazon.Lambda.DurableExecution\README.md` to note that `[DurableExecution]` generates the bootstrap wiring for the executable model. - ---- - -## 8. Test Strategy (snapshot tests) - -Snapshot harness: `CSharpGeneratorDriver` against files in `Libraries\test\Amazon.Lambda.Annotations.SourceGenerators.Tests\Snapshots\`. CFN writer tests mirror `WriterTests\FunctionUrlTests.cs`, parameterized `[InlineData(CloudFormationTemplateFormat.Json)]` / `[InlineData(CloudFormationTemplateFormat.Yaml)]`. - -**Unit (Component A)** — `Libraries\test\Amazon.Lambda.DurableExecution.Tests\` (or the existing durable test project): constructor defaults, `IsXxxSet` tracking, `Validate()` rejects `<= 0`. **Serializer round-trip:** the default `ILambdaSerializer` deserializes `DurableExecutionInvocationInput` and serializes `DurableExecutionInvocationOutput` including `UpperSnakeCaseEnumConverter` on `InvocationStatus` (Succeeded/Failed/Pending), and a nested `InitialExecutionState`/`Operations` round-trips without loss. This must pass before the typed-envelope wrapper is relied upon. - -**Generated-wrapper snapshots (Component C):** -- A. Non-DI typed-output method → verify signature (`DurableExecutionInvocationInput`/`ILambdaContext` params, `Task` return) and single `WrapAsync(containingType.Method, __request__, __context__)` delegation, no Stream deserialization. -- B. DI variant → `scope.ServiceProvider.GetRequiredService()` resolution. -- C. Void user method (`Task` return) → confirms overload resolution compiles without explicit generic args. -- D. **Branch-ordering test:** one file with both a durable method and a `[RestApi]` method → durable method gets the durable wrapper. -- E. `ExecutableAssembly.tt` regression → executable assembly snapshot unchanged in shape for durable return types. - -**Diagnostics (Component E):** one test each for `DurableExecutionRequiresExecutable` (non-exe / class library), `DurableExecutionZipOnly` (Image), `DurableExecutionInvalidSignature` (ValueTask / wrong params), and `DurableExecutionExplicitRoleNeedsCheckpointPolicy` (explicit Role). Plus a test that `[DurableExecution]` + `[RestApi]` triggers the **existing** `MultipleEventsNotSupported` (AWSLambda0102). For the three durable Errors (and AWSLambda0102), also assert no wrapper is generated (`IsValid=false`). - -**CFN (Components F/G)** — `Libraries\test\Amazon.Lambda.Annotations.SourceGenerators.Tests\WriterTests\DurableExecutionTests.cs` (NEW): -- F1. `DurableConfig` with both props set (JSON + YAML); `Metadata.SyncedDurableConfig == true`. -- F2. Partial emit — only `RetentionPeriodInDays` set → `ExecutionTimeout` absent. -- F3. Orphan removal — attribute dropped → `DurableConfig` + marker removed. -- G. **Highest-risk:** mixed string/object `Policies` array round-trip (JSON + YAML), asserting `["AWSLambdaBasicExecutionRole", { "Statement": [ … checkpoint … ] }]` order preserved after write and re-parse. -- G2. Idempotency — regeneration does not duplicate the policy statement. -- G3. Role suppression — `Role` set → `Policies` untouched, Info diagnostic emitted. - -Snapshot fixtures with the exact JSON/YAML shapes (Sections 5 and 6) must be authored as part of this work, not deferred. - ---- - -## 9. Risks, Open Questions, and Must-Fix-First Items - -### BLOCKING (resolve before implementation starts) -1. ~~**Runtime string is undefined infra.**~~ **DROPPED (2026-06-08): not an issue.** Durable functions run on either `dotnet8` or `dotnet10`, so the generator does **not** force a runtime — it lets the user's normal runtime selection flow through. No `DurableRuntime` constant, no override. (Component D no longer touches runtime at all.) -2. ~~**IAM emission shape — role shape still a DECISION.**~~ **RESOLVED (2026-06-08): Option 1 — per-function SAM `Policies`-array inline statement**, matching how the generator already emits `AWSLambdaBasicExecutionRole`. Action names verified against the reference snapshot (`lambda:CheckpointDurableExecution`, `lambda:GetDurableExecutionState`; lines 51-54). The remaining risk here is purely mechanical — the mixed string/object `Policies` array round-trip (see item 7), not a shape decision. -3. ~~**Diagnostic IDs.**~~ **RESOLVED (2026-06-08): `AWSLambda0140`–`AWSLambda0143`** (highest existing is `AWSLambda0139`; `0126` is skipped in the file but the durable IDs continue cleanly from the top). Only three new descriptors — the exclusive-event case reuses the existing `AWSLambda0102`. See the Section 7 / Component E table. - -### REQUIRED-BEFORE-CODING (artifacts that gate the rest) -4. Author `DurableExecutionInvoke.tt` first — snapshots cannot exist without it. -5. Create `DurableExecutionAttributeBuilder.cs` by copying the real `ScheduleEventAttributeBuilder.cs`, not from prose. -6. Author the exact JSON/YAML snapshot fixtures for `DurableConfig` and the mixed `Policies` array. - -### Highest regression risk -7. **Mixed string/object `Policies` array** round-trip via `SetToken(TokenType.List)` through both `JsonWriter` and `YamlWriter`. Dedicated round-trip test mandatory; if `SetToken` cannot preserve heterogeneous types, the inline-policy approach is not viable and must be reconsidered. - -### Correctness gates (enforced via `IsValid=false`, not severity alone) -8. **Validation gates must set `IsValid=false`** (diagnostic severity alone does not halt generation). Applies to `DurableExecutionRequiresExecutable` (gate on `OutputKind != ConsoleApplication`), `DurableExecutionZipOnly`, and `DurableExecutionInvalidSignature`. The exclusive-event case is already handled by the existing `MultipleEventsNotSupported` (AWSLambda0102), which returns early with `IsValid=false`. -9. **Branch ordering** is load-bearing in two files (`GeneratedMethodModelBuilder` and `LambdaFunctionTemplate.tt`) — durable must be checked before API/HttpApi/ALB. Covered by Test D. -10. **Signature constraint** — `ValueTask`/non-`(TInput, IDurableContext)` returns produce generated-code compile errors. `ValidateFunction` must reject them. -11. **Runtime serializer contract** — `WrapAsyncCore` reads the serializer off `__context__` (verified line 79); the generated wrapper assumes the bootstrap populated it. Not testable in snapshots; covered by Component A's round-trip unit test + a template comment. - -### Accepted-for-preview (documented follow-ups, not promises) -12. `Resource: "*"` on the checkpoint statement is broad. Acceptable for preview per the reference; tightening depends on the service defining a scopable durable-execution ARN — **existence of such an ARN is undefined**, so this is flagged, not committed. -13. **Executable-only is a sharp edge** until managed-runtime support lands in RuntimeSupport (README line 35). Temporary-for-preview, not architectural. -14. **TypeFullNames must exactly match** `Amazon.Lambda.DurableExecution.DurableExecutionAttribute` or the attribute is silently skipped → routed to `NoEventMethodBody`. Covered by the discovery test. - -### Open questions deferred (non-blocking) -15. Upper bounds for `RetentionPeriodInDays`/`ExecutionTimeout` — `Validate()` only rejects `<= 0` now; tighten once service limits are published. -16. Whether, when a user adds an explicit `Role` to a function that previously had an auto-injected checkpoint policy, the old policy should be actively removed. The Role/Policies mutual-exclusivity (lines 155-166) clears `Policies` automatically in `ProcessLambdaFunctionProperties`, so the stale statement is removed as a side effect; verify this in the Role-suppression test and document it. From 69f2918ed2fae413571d55a09f1a05c9097b3334 Mon Sep 17 00:00:00 2001 From: Garrett Beatty Date: Wed, 10 Jun 2026 15:02:50 -0400 Subject: [PATCH 6/6] Delete Libraries/src/Amazon.Lambda.DurableExecution/docs/design/cancellation-design.md --- .../docs/design/cancellation-design.md | 341 ------------------ 1 file changed, 341 deletions(-) delete mode 100644 Libraries/src/Amazon.Lambda.DurableExecution/docs/design/cancellation-design.md diff --git a/Libraries/src/Amazon.Lambda.DurableExecution/docs/design/cancellation-design.md b/Libraries/src/Amazon.Lambda.DurableExecution/docs/design/cancellation-design.md deleted file mode 100644 index 0e12a32e3..000000000 --- a/Libraries/src/Amazon.Lambda.DurableExecution/docs/design/cancellation-design.md +++ /dev/null @@ -1,341 +0,0 @@ -# Cancellation in Amazon.Lambda.DurableExecution — Design - -> Status: design (2026-06-10). Targets the preview window before GA so the breaking delegate-shape change lands once. - -Thread a `CancellationToken` into every user `Func` accepted by `IDurableContext`. Internally the SDK owns a workflow-scoped `CancellationTokenSource` linked with the caller's token, so user code observes cancel for both upstream caller intent and SDK-driven workflow teardown. - -## Table of contents - -1. [Motivation](#1-motivation) -2. [Goals and non-goals](#2-goals-and-non-goals) -3. [Public API changes](#3-public-api-changes) -4. [Internal scaffold](#4-internal-scaffold) -5. [Cancellation semantics](#5-cancellation-semantics) -6. [Replay and determinism](#6-replay-and-determinism) -7. [What is NOT cancellable](#7-what-is-not-cancellable) -8. [User-facing guidance](#8-user-facing-guidance) -9. [Phased plan](#9-phased-plan) -10. [Open questions](#10-open-questions) -11. [Out of scope](#11-out-of-scope) - ---- - -## 1. Motivation - -`IDurableContext` methods that take a user `Func` (`StepAsync`, `RunInChildContextAsync`, `WaitForCallbackAsync`, `WaitForConditionAsync`) accept a `CancellationToken` parameter that the SDK observes only in its *own* machinery — waiting on the result `Task`, retry-backoff `Task.Delay`s, checkpoint writes. The token never reaches the user-supplied `Func` body. Two consequences: - -1. **Caller intent is silently dropped.** A user lambda invoked by an ASP.NET request handler or a host shutdown sequence has no way to forward its caller's `CancellationToken` into the step body. The token is accepted on the public API, then ignored. -2. **No clean teardown of inflight user code.** When the SDK decides to suspend (wait, callback pending, retry scheduled), the `Task.WhenAny` race in `DurableExecutionHandler.RunAsync` returns Pending and abandons the user `Task`. The abandoned `Task` keeps running on the threadpool until either it finishes naturally or Lambda freezes the process. During the window between `WhenAny` resolving and the freeze (checkpoint flush, response serialization, runtime API write), abandoned `HttpClient` calls and other side effects can still complete, and on the next invocation those orphaned operations may resume during a warm thaw. - -In the single-threaded model shipping today, the second point is small — `TerminationManager.Terminate()` fires *after* the relevant operation's user code has already resolved. The first point is a real bug on the public surface. We are landing the change now so the breaking delegate signature ships in preview, before GA, and so the same hook is in place when parallel/child-context cancellation needs it. - -## 2. Goals and non-goals - -### Goals - -- Every user `Func` accepted by `IDurableContext` receives a `CancellationToken` parameter. -- The token observes both the caller's `CancellationToken` (passed on each method) and an SDK-owned workflow `CancellationTokenSource`. -- The SDK's workflow CTS fires when `TerminationManager.Terminate()` resolves, so abandoned step bodies unwind via `OperationCanceledException` rather than running to completion in the background. -- Replay determinism is preserved: cached operations short-circuit before the user `Func` is invoked, so a cancelled or already-cancelled token cannot cause divergent re-execution. -- SDK-internal work (checkpoint serialization, runtime API writes, batcher flush) does **not** observe the workflow token — successful work is never lost to teardown. - -### Non-goals - -- No deadline timer. Lambda's own timeout is the deadline backstop; an SDK background timer that pre-emptively cancels user code adds magic-margin tuning we are not buying. -- No cancellation of `WaitAsync`, `CreateCallbackAsync`, or `InvokeAsync` user-side bodies — those operations do not run user `Func`s. -- No source-generator changes in this design. The `Amazon.Lambda.Annotations` source generator that emits the durable function entry point is updated separately. -- No support for resuming a cancelled workflow. Cancellation is workflow-fatal at the top level; the workflow either suspends per the standard termination flow or fails. -- No changes to the wire format, checkpoint shape, or `ExecutionState`. - -## 3. Public API changes - -Six `IDurableContext` methods change shape — each gains a `CancellationToken` parameter on its user-supplied `Func`. The trailing `CancellationToken cancellationToken = default` parameter on the method itself is unchanged. - -```csharp -// Before -Task StepAsync( - Func> func, - string? name = null, - StepConfig? config = null, - CancellationToken cancellationToken = default); - -// After -Task StepAsync( - Func> func, - string? name = null, - StepConfig? config = null, - CancellationToken cancellationToken = default); -``` - -The same shape change applies to: - -- `StepAsync` (void overload) — `Func` -- `RunInChildContextAsync` — `Func>` -- `RunInChildContextAsync` (void overload) — `Func` -- `WaitForCallbackAsync` — `Func` -- `WaitForConditionAsync` — `Func>` - -`WaitAsync`, `CreateCallbackAsync`, `InvokeAsync` and the `ConfigureLogger` / property surface are unchanged. - -This is a **breaking** change to public delegate signatures. Every existing user lambda must add the parameter (or `_`). It is a major version bump per the change-file rules. - -### Why a parameter, not a context property - -A property on `IStepContext` (`step.CancellationToken`) is non-breaking and was considered. The Func-parameter shape was chosen because: - -1. It is far more discoverable. The signature itself tells the user the token exists; a property requires reading docs. -2. It is consistent with .NET conventions for `Func` overloads that accept cancellation (e.g. `Channel.Reader.ReadAllAsync`, `Parallel.ForEachAsync`). -3. We are still in preview. The cost of changing it later, post-GA, is far higher than the cost of changing it now. - -The trade-off: every existing test, doc example, and customer preview lambda needs the parameter added. That is paid once. - -## 4. Internal scaffold - -### New type — `WorkflowCancellation` - -```csharp -// Libraries/src/Amazon.Lambda.DurableExecution/Internal/WorkflowCancellation.cs -internal sealed class WorkflowCancellation : IDisposable -{ - private readonly CancellationTokenSource _cts = new(); - - public CancellationToken Token => _cts.Token; - - public WorkflowCancellation(TerminationManager terminationManager) - { - // When the SDK decides to suspend or abort the workflow, cancel. - // Abandoned user Tasks (the WhenAny loser in DurableExecutionHandler) - // unwind via OperationCanceledException instead of running to - // completion on the threadpool while Lambda is mid-response. - terminationManager.TerminationTask.ContinueWith( - _ => { try { _cts.Cancel(); } catch (ObjectDisposedException) { } }, - CancellationToken.None, - TaskContinuationOptions.ExecuteSynchronously, - TaskScheduler.Default); - } - - public void Dispose() => _cts.Dispose(); -} -``` - -One instance per durable function invocation. Lives alongside `TerminationManager`; constructed in the same place that constructs the `TerminationManager` (the entry point that calls `DurableExecutionHandler.RunAsync`). - -### `DurableExecutionHandler.RunAsync` — owns the lifecycle - -```csharp -internal static async Task> RunAsync( - ExecutionState executionState, - TerminationManager terminationManager, - WorkflowCancellation workflowCancellation, - Func> userHandler) { /* race unchanged */ } -``` - -The caller constructs `WorkflowCancellation(terminationManager)` and disposes it after `RunAsync` returns. The `Task.Run(userHandler)` race is unchanged. - -### `DurableContext` — accepts the workflow CTS, exposes a linker - -```csharp -internal sealed class DurableContext : IDurableContext -{ - private readonly WorkflowCancellation _workflowCancellation; - // ... existing fields ... - - public DurableContext( - ExecutionState state, - TerminationManager terminationManager, - WorkflowCancellation workflowCancellation, // new - OperationIdGenerator idGenerator, - string durableExecutionArn, - ILambdaContext lambdaContext, - CheckpointBatcher? batcher = null) { ... } -} -``` - -Each operation construction passes `_workflowCancellation` down to the operation class; the operation class is responsible for building the linked CTS at the point of user-`Func` invocation (see below). `DurableContext` itself does not build linked CTSes — it only forwards the `WorkflowCancellation`. - -The child-context factory passes the **same** `WorkflowCancellation` to the child `DurableContext`. A child does not get an independent cancellation scope; cancelling the workflow cancels the child too. - -### Operation classes — link at the user-`Func` boundary - -`StepOperation` is the canonical pattern. The same shape applies to `ChildContextOperation`, `WaitForConditionOperation`, and the inline submitter `Step` invocation inside `WaitForCallbackAsync`. `CallbackOperation` does not invoke a user `Func` and is unchanged. - -```csharp -// inside StepOperation.ExecuteAsync(callerToken) -using var linked = CancellationTokenSource.CreateLinkedTokenSource( - callerToken, - _workflowCancellation.Token); - -// ... replay-cache short-circuit (returns cached SUCCESS without invoking _func) ... -// ... retry-loop unchanged ... - -var stepCtx = new StepContext(operationId, attempt, scopedLogger); -try -{ - var result = await _func(stepCtx, linked.Token).ConfigureAwait(false); - // checkpoint SUCCESS (uses CancellationToken.None — see §7) - return result; -} -catch (OperationCanceledException oce) when (linked.IsCancellationRequested) -{ - // Cancellation: do NOT checkpoint FAIL, do NOT retry. Re-throw so the - // termination signal owns the suspend/abort decision. - throw; -} -catch (Exception ex) -{ - // Non-cancellation failure — existing path: checkpoint FAIL, apply - // retry strategy, etc. Unchanged. -} -``` - -Two semantic points encoded above: - -1. **`when (linked.IsCancellationRequested)` distinguishes our cancellation from a stray `OperationCanceledException` the user threw for unrelated reasons.** A user OCE thrown without our token cancelling falls through to the generic `catch` and is treated as a normal step failure (FAIL checkpoint + retry). -2. **A cancelled step is not checkpointed.** The next invocation will replay the operation from scratch (no SUCCESS, no FAIL) and either re-execute or, if the workflow is itself terminating, never reach this point. - -### Void overload wrappers - -`StepAsync(Func)` and the void `RunInChildContextAsync` already wrap the user `Func` to return a synthetic `null`. The wrapper threads the token through: - -```csharp -public async Task StepAsync( - Func func, - string? name = null, - StepConfig? config = null, - CancellationToken cancellationToken = default) -{ - await RunStep( - async (ctx, ct) => { await func(ctx, ct); return null; }, - name, config, cancellationToken); -} -``` - -### `WaitForCallbackAsync` — composed submitter - -`WaitForCallbackAsync` composes a child context that runs `CreateCallbackAsync` + `StepAsync(submitter)` + `callback.GetResultAsync`. The submitter call propagates the token: - -```csharp -await childCtx.StepAsync( - async (stepCtx, ct) => - { - var submitterCtx = new WaitForCallbackContext(stepCtx.Logger); - await submitter(callback.CallbackId, submitterCtx, ct); - }, - name: submitterName, - config: stepConfig, - cancellationToken: cancellationToken); -``` - -## 5. Cancellation semantics - -The decision tree for an `OperationCanceledException` thrown out of a user `Func`: - -| Workflow CTS fired? | Caller token fired? | Step body threw OCE? | Result | -|---|---|---|---| -| no | no | yes (user-thrown OCE, unrelated) | Treated as a normal step failure: FAIL checkpoint, retry per `RetryStrategy`. | -| no | yes | yes | Step is abandoned: no checkpoint written, OCE propagates up, the workflow's user-handler `Task` faults. The `WhenAny` race in `RunAsync` returns FAILED with the OCE as the cause. | -| yes | either | yes | Step is abandoned: no checkpoint written, OCE propagates up. The termination signal that cancelled the workflow CTS has already resolved `TerminationTask`, so `WhenAny` returns Pending (or Failed if termination carried an exception). The user OCE is observed by `userTask` but never reaches the handler result — the termination outcome wins. | - -Implementation: `catch (OperationCanceledException) when (linked.IsCancellationRequested)` separates "our cancellation" from "user-thrown OCE." The latter falls through to the generic `catch (Exception)` path. - -### Behavior of the workflow CTS over a workflow's lifetime - -- Constructed at workflow-entry time, before `Task.Run(userHandler)`. -- Cancels exactly once, when `TerminationManager.TerminationTask` resolves (any reason). Termination's reason set today: `WaitScheduled`, `RetryScheduled`, `CallbackPending`, `InvokePending`, `CheckpointFailed`. The CTS does not distinguish reasons; user code observing cancel only knows "the workflow is being torn down." -- Disposed after `RunAsync` returns, in the same scope as the `TerminationManager`. - -### Why "always cancel on termination" rather than "only on hard-abort reasons" - -`TerminationManager.Terminate()` fires for both resumable suspensions (wait, callback pending, retry scheduled) and hard aborts (checkpoint failed). In every case the user `Task` is being abandoned — the operation that caused termination has already resolved its own result, and any other in-flight user code in the same `Task.Run` lineage is now dead weight. Cancelling them all gives: - -- Cleaner threadpool: abandoned `HttpClient` calls release connections promptly. -- Less risk of orphaned side effects landing during the freeze window. -- Simpler model: one signal, one meaning. - -The cost is small in the single-threaded model: today, `Terminate()` fires only after the user `Func` for the relevant operation has already returned, so there is rarely user code mid-await to cancel. The mechanism becomes load-bearing once parallel branches exist. - -## 6. Replay and determinism - -The cancellation token does not interact with replay state, by design. Specifics: - -1. **Cached operations short-circuit before the user `Func` is invoked.** Each `*Operation.ExecuteAsync` checks `ExecutionState` for a SUCCESS checkpoint matching the deterministic operation ID and returns the cached result without ever building the linked CTS or calling `_func`. A cancelled token cannot cause divergent re-execution because the user code never runs. -2. **The workflow CTS is per-invocation and fresh on replay.** Invocation N's CTS state is not reconstructed on N+1. User code that branches on `IsCancellationRequested` could in principle observe different values across replays of the same logical step. This is a misuse — see §8 — and is documented, not engineered around. -3. **Termination fires after, not during, user-`Func` execution in single-threaded mode.** Today, the termination signal that cancels the workflow CTS is raised by an operation that has already resolved its own result. In single-threaded code, the `Task.Run` user task is not concurrently awaiting anything else when termination fires. So in single-threaded land, the workflow CTS rarely interrupts an in-progress user `Func` body — its observable effect is propagating the **caller's** `CancellationToken` into user code. The mechanism becomes load-bearing for parallel. -4. **A cancelled step does not produce a checkpoint.** No SUCCESS, no FAIL. The next invocation replays the operation from scratch — either re-executes the body, or never reaches the operation because the workflow itself is terminating. - -## 7. What is NOT cancellable - -The workflow CTS is for **user-side I/O only**. The following code paths must complete even when the workflow is being torn down, and therefore must **not** observe the workflow token: - -- Checkpoint serialization and the runtime API write (the SDK's call to record SUCCESS/FAIL after a user step body resolves). -- `CheckpointBatcher` flush. -- Construction of the response payload returned to RuntimeSupport. -- Any `LambdaSerializerHelper` invocation that serializes a step result before checkpointing. - -Implementation rule: code on these paths uses `CancellationToken.None` for any cancellation parameter, never the workflow token. A test verifies that a step that succeeds and is then cancelled (workflow CTS fires after `_func` returns successfully) still has its SUCCESS checkpoint persisted. - -## 8. User-facing guidance - -The following are documented as misuses in `docs/core/steps.md`, `child-contexts.md`, `callbacks.md`, and `wait.md`: - -- **Do not branch workflow logic on `IsCancellationRequested`.** It is a runtime concern, not a workflow concern. Branching on it makes the workflow non-deterministic across replays. -- **Do not catch `OperationCanceledException` thrown on the workflow token and continue.** If the workflow is being torn down, continued work is wasted. If the caller cancelled, the user expects unwind. Either swallow-and-rethrow, or do not catch. -- **Do pass `step.CancellationToken` into every cancellation-aware API call inside the step body** (`HttpClient.SendAsync(ct)`, `Task.Delay(ct)`, AWS SDK calls). This is what makes deadline propagation and caller-token propagation actually work. - -## 9. Phased plan - -### Phase 1 — internal plumbing (no public API changes) - -1. Add `WorkflowCancellation` (Internal/). -2. Construct `WorkflowCancellation(terminationManager)` in the entry point that today constructs `TerminationManager`. Add as a new parameter to `DurableExecutionHandler.RunAsync`. -3. Add `WorkflowCancellation` to the `DurableContext` constructor. Forward to operation classes (no behavior change yet — operations ignore it). -4. Unit test: `WorkflowCancellation.Token.IsCancellationRequested` becomes `true` after `terminationManager.Terminate(...)` resolves; remains `false` until then. - -### Phase 2 — operation classes link and pass through - -5. Each operation class that invokes a user `Func` (`StepOperation`, `ChildContextOperation`, `WaitForConditionOperation`, the inline submitter step in `WaitForCallbackAsync`) accepts `WorkflowCancellation` via its constructor. -6. Inside `ExecuteAsync`, build `using var linked = CancellationTokenSource.CreateLinkedTokenSource(callerToken, _workflowCancellation.Token);` and pass `linked.Token` into the user `Func`. -7. Add the cancellation-aware exception path: `catch (OperationCanceledException) when (linked.IsCancellationRequested) { throw; }` — no FAIL checkpoint, no retry. -8. Verify SDK-internal paths (checkpoint write, batcher flush, response build) continue to use `CancellationToken.None` and never the linked or workflow token. - -### Phase 3 — public Func signatures - -9. Update `IDurableContext` (six methods) to accept the new Func shape. -10. Update `DurableContext` to match. The internal `RunStep`, `RunChildContext`, `RunWaitForCallback` glue threads the new parameter into the operation classes. -11. Update the void-step and void-child-context wrappers to forward the token. -12. Update `WaitForCallbackAsync`'s composed submitter call to pass the token. - -### Phase 4 — tests - -13. Update every existing test that passes a `Func` body — add `_` or `ct`. -14. New tests: - - Caller's token fires → user `Func` observes cancel via `linked.Token`. - - `terminationManager.Terminate(WaitScheduled)` while user `Func` is mid-await → user `Func` observes cancel. - - User-thrown `OperationCanceledException` (without our token cancelling) is treated as a normal step failure and retried per the `RetryStrategy`. - - Cancelled step writes no checkpoint (neither SUCCESS nor FAIL). - - Successful step that races with workflow cancel still writes its SUCCESS checkpoint (the §7 invariant). - - Replay path: cached step result returns without invoking the user `Func` even when the workflow token is already cancelled. - - Child context propagates the workflow CTS to its inner `IDurableContext`; cancelling the workflow cancels in-flight child operations. - - `WaitForConditionAsync` check function receives the linked token. - - `WaitForCallbackAsync` submitter receives the linked token. - -### Phase 5 — docs and change file - -15. Update XML doc on every changed `IDurableContext` Func parameter to describe the linked-token contract (caller token + SDK shutdown signal). -16. Update `docs/core/steps.md`, `child-contexts.md`, `callbacks.md`, `wait.md` examples to take and forward the token. -17. Add a new short doc `docs/core/cancellation.md` covering the §8 guidance. -18. `autover change` — major increment for `Amazon.Lambda.DurableExecution`. Changelog message names the breaking delegate-signature change explicitly so preview users see it. - -## 10. Open questions - -1. **Should termination always cancel, or only for hard-abort reasons?** Current decision: always (see §5). Worth flagging to reviewers in case the parallel design wants to distinguish "we're suspending, sibling branches should stop" from "we're aborting, sibling branches must stop." -2. **Should `DurableContext` expose `WorkflowCancellation.Token` as a property on `IDurableContext`** (e.g. `IDurableContext.CancellationToken`) for advanced users who want to observe workflow-wide cancel without being inside an operation? Defer until a concrete use case appears; adding it later is non-breaking. -3. **`InvokeAsync` and the workflow CTS.** `InvokeAsync` does not accept a user `Func`, but it does fire an outbound durable-service call. §7 says runtime API writes do not observe the workflow token — the same rule should apply here so an in-flight invoke is not torn down mid-call once we have already committed an INVOKE START checkpoint. The caller's `cancellationToken` parameter is honored as today (synchronous `ThrowIfCancellationRequested` before the call); the workflow CTS is not linked. Confirm at implementation time. - -## 11. Out of scope - -- **The `Amazon.Lambda.Annotations` source generator.** Once Phases 1–3 land, the generator's emitted entry-point wrapper passes a workflow `CancellationToken` into the user's top-level handler. That is a separate change and design. -- **Parallel branches and map operations.** Their cancellation rides on the same `WorkflowCancellation`, but the semantic decisions (one branch failure cancels siblings? failure modes carried in error-aggregate?) are owned by the parallel design. -- **Lambda deadline timer.** Considered and rejected (see §2 non-goals). If we later decide deadline-aware cancel is worth it, it will be added as an explicit `Terminate` reason raised by code that owns the deadline policy, not as a generic background timer in `WorkflowCancellation`. -- **A way to resume a cancelled workflow.** Cancellation is workflow-fatal at the top level. -- **Wire format, checkpoint shape, or `ExecutionState` changes.** None.