From 8416718ea7656d8dc4677aa940934071eedff47a Mon Sep 17 00:00:00 2001 From: kai lin Date: Thu, 28 May 2026 11:48:01 -0400 Subject: [PATCH 01/11] Implement Retry Behavior 2.1 core logic gated behind AWS_NEW_RETRIES_2026 --- .../aws/core/client/AdaptiveRetryStrategy.h | 2 + .../include/aws/core/client/RetryStrategy.h | 17 + .../source/client/AdaptiveRetryStrategy.cpp | 4 + .../source/client/ClientConfiguration.cpp | 29 +- .../source/client/RetryStrategy.cpp | 141 +++-- .../aws/client/RetryBehaviorTest.cpp | 484 ++++++++++++++++++ 6 files changed, 635 insertions(+), 42 deletions(-) create mode 100644 tests/aws-cpp-sdk-core-tests/aws/client/RetryBehaviorTest.cpp diff --git a/src/aws-cpp-sdk-core/include/aws/core/client/AdaptiveRetryStrategy.h b/src/aws-cpp-sdk-core/include/aws/core/client/AdaptiveRetryStrategy.h index 4a9f4d551d86..f89b61f40e12 100644 --- a/src/aws-cpp-sdk-core/include/aws/core/client/AdaptiveRetryStrategy.h +++ b/src/aws-cpp-sdk-core/include/aws/core/client/AdaptiveRetryStrategy.h @@ -141,6 +141,8 @@ class AWS_CORE_API AdaptiveRetryStrategy : public StandardRetryStrategy const char* GetStrategyName() const override { return "adaptive";} + AdaptiveRetryStrategy(std::shared_ptr retryQuotaContainer, long maxAttempts, double transientBackoffBaseSec); + protected: RetryTokenBucket m_retryTokenBucket; bool m_fastFail = false; diff --git a/src/aws-cpp-sdk-core/include/aws/core/client/RetryStrategy.h b/src/aws-cpp-sdk-core/include/aws/core/client/RetryStrategy.h index 8f9260c69091..77f9fd0776a4 100644 --- a/src/aws-cpp-sdk-core/include/aws/core/client/RetryStrategy.h +++ b/src/aws-cpp-sdk-core/include/aws/core/client/RetryStrategy.h @@ -6,6 +6,7 @@ #pragma once #include +#include #include #include @@ -102,10 +103,17 @@ namespace Aws virtual int GetRetryQuota() const = 0; }; + enum class RetryCostClassification + { + REQUEST_TIMEOUT_BASED, + THROTTLE_BASED + }; + class AWS_CORE_API DefaultRetryQuotaContainer : public RetryQuotaContainer { public: DefaultRetryQuotaContainer(); + DefaultRetryQuotaContainer(int retryCost, int throttlingRetryCost, RetryCostClassification classification); virtual ~DefaultRetryQuotaContainer() = default; virtual bool AcquireRetryQuota(int capacityAmount) override; virtual bool AcquireRetryQuota(const AWSError& error) override; @@ -116,6 +124,9 @@ namespace Aws protected: mutable Aws::Utils::Threading::ReaderWriterLock m_retryQuotaLock; int m_retryQuota; + int m_retryCost; + int m_throttlingRetryCost; + RetryCostClassification m_classification; }; class AWS_CORE_API StandardRetryStrategy : public RetryStrategy @@ -123,6 +134,8 @@ namespace Aws public: StandardRetryStrategy(long maxAttempts = 3); StandardRetryStrategy(std::shared_ptr retryQuotaContainer, long maxAttempts = 3); + StandardRetryStrategy(std::shared_ptr retryQuotaContainer, long maxAttempts, double transientBackoffBaseSec); + virtual ~StandardRetryStrategy(); virtual void RequestBookkeeping(const HttpResponseOutcome& httpResponseOutcome) override; virtual void RequestBookkeeping(const HttpResponseOutcome& httpResponseOutcome, const AWSError& lastError) override; @@ -138,6 +151,10 @@ namespace Aws protected: std::shared_ptr m_retryQuotaContainer; long m_maxAttempts; + + private: + struct RetryImpl; + Aws::UniquePtr m_impl; }; } // namespace Client } // namespace Aws diff --git a/src/aws-cpp-sdk-core/source/client/AdaptiveRetryStrategy.cpp b/src/aws-cpp-sdk-core/source/client/AdaptiveRetryStrategy.cpp index 69bde40fb6a7..5a63d0d10eb1 100644 --- a/src/aws-cpp-sdk-core/source/client/AdaptiveRetryStrategy.cpp +++ b/src/aws-cpp-sdk-core/source/client/AdaptiveRetryStrategy.cpp @@ -169,6 +169,10 @@ namespace Aws StandardRetryStrategy(retryQuotaContainer, maxAttempts) {} + AdaptiveRetryStrategy::AdaptiveRetryStrategy(std::shared_ptr retryQuotaContainer, long maxAttempts, double transientBackoffBaseSec) : + StandardRetryStrategy(retryQuotaContainer, maxAttempts, transientBackoffBaseSec) + {} + bool AdaptiveRetryStrategy::HasSendToken() { return m_retryTokenBucket.Acquire(1, m_fastFail); diff --git a/src/aws-cpp-sdk-core/source/client/ClientConfiguration.cpp b/src/aws-cpp-sdk-core/source/client/ClientConfiguration.cpp index cc805c69eb99..800f0378103b 100644 --- a/src/aws-cpp-sdk-core/source/client/ClientConfiguration.cpp +++ b/src/aws-cpp-sdk-core/source/client/ClientConfiguration.cpp @@ -544,6 +544,8 @@ ClientConfiguration::ClientConfiguration(bool /*useSmartDefaults*/, const char* } std::shared_ptr InitRetryStrategy(int maxAttempts, Aws::String retryMode) { + const bool newRetriesEnabled = Aws::Environment::GetEnv("AWS_NEW_RETRIES_2026") == "true"; + if (retryMode.empty()) { retryMode = Aws::Environment::GetEnv("AWS_RETRY_MODE"); @@ -552,30 +554,36 @@ std::shared_ptr InitRetryStrategy(int maxAttempts, Aws::String re { retryMode = Aws::Config::GetCachedConfigValue("retry_mode"); } + if (newRetriesEnabled && retryMode.empty()) + { + retryMode = "standard"; + } std::shared_ptr retryStrategy; if (retryMode == "standard") { - if (maxAttempts < 0) + long attempts = (maxAttempts < 0) ? 3 : maxAttempts; + if (newRetriesEnabled) { - // negative value set above force usage of default max attempts - retryStrategy = Aws::MakeShared(CLIENT_CONFIG_TAG); + auto quota = Aws::MakeShared(CLIENT_CONFIG_TAG, 14, 5, RetryCostClassification::THROTTLE_BASED); + retryStrategy = Aws::MakeShared(CLIENT_CONFIG_TAG, quota, attempts, 0.05); } else { - retryStrategy = Aws::MakeShared(CLIENT_CONFIG_TAG, maxAttempts); + retryStrategy = Aws::MakeShared(CLIENT_CONFIG_TAG, attempts); } } else if (retryMode == "adaptive") { - if (maxAttempts < 0) + long attempts = (maxAttempts < 0) ? 3 : maxAttempts; + if (newRetriesEnabled) { - // negative value set above force usage of default max attempts - retryStrategy = Aws::MakeShared(CLIENT_CONFIG_TAG); + auto quota = Aws::MakeShared(CLIENT_CONFIG_TAG, 14, 5, RetryCostClassification::THROTTLE_BASED); + retryStrategy = Aws::MakeShared(CLIENT_CONFIG_TAG, quota, attempts, 0.05); } else { - retryStrategy = Aws::MakeShared(CLIENT_CONFIG_TAG, maxAttempts); + retryStrategy = Aws::MakeShared(CLIENT_CONFIG_TAG, attempts); } } else @@ -583,6 +591,11 @@ std::shared_ptr InitRetryStrategy(int maxAttempts, Aws::String re retryStrategy = Aws::MakeShared(CLIENT_CONFIG_TAG); } + if (newRetriesEnabled) + { + AWS_LOGSTREAM_INFO(CLIENT_CONFIG_TAG, "Retry Behavior 2.1 active (AWS_NEW_RETRIES_2026=true), mode=" << retryMode); + } + return retryStrategy; } diff --git a/src/aws-cpp-sdk-core/source/client/RetryStrategy.cpp b/src/aws-cpp-sdk-core/source/client/RetryStrategy.cpp index 2c3b43fc3936..e5f68c1c99be 100644 --- a/src/aws-cpp-sdk-core/source/client/RetryStrategy.cpp +++ b/src/aws-cpp-sdk-core/source/client/RetryStrategy.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include using namespace Aws::Utils::Threading; @@ -19,11 +20,98 @@ namespace Aws static const int RETRY_COST = 5; static const int TIMEOUT_RETRY_COST = 10; + // ---- DefaultRetryQuotaContainer ---- + + DefaultRetryQuotaContainer::DefaultRetryQuotaContainer() + : m_retryQuota(INITIAL_RETRY_TOKENS), + m_retryCost(RETRY_COST), + m_throttlingRetryCost(TIMEOUT_RETRY_COST), + m_classification(RetryCostClassification::REQUEST_TIMEOUT_BASED) + {} + + DefaultRetryQuotaContainer::DefaultRetryQuotaContainer(int retryCost, int throttlingRetryCost, RetryCostClassification classification) + : m_retryQuota(INITIAL_RETRY_TOKENS), + m_retryCost(retryCost), + m_throttlingRetryCost(throttlingRetryCost), + m_classification(classification) + {} + + bool DefaultRetryQuotaContainer::AcquireRetryQuota(int capacityAmount) + { + WriterLockGuard guard(m_retryQuotaLock); + + if (capacityAmount > m_retryQuota) + { + return false; + } + m_retryQuota -= capacityAmount; + return true; + } + + bool DefaultRetryQuotaContainer::AcquireRetryQuota(const AWSError& error) + { + int capacityAmount; + if (m_classification == RetryCostClassification::THROTTLE_BASED) + { + capacityAmount = error.ShouldThrottle() ? m_throttlingRetryCost : m_retryCost; + } + else + { + capacityAmount = error.GetErrorType() == CoreErrors::REQUEST_TIMEOUT ? TIMEOUT_RETRY_COST : RETRY_COST; + } + return AcquireRetryQuota(capacityAmount); + } + + void DefaultRetryQuotaContainer::ReleaseRetryQuota(int capacityAmount) + { + WriterLockGuard guard(m_retryQuotaLock); + m_retryQuota = (std::min)(m_retryQuota + capacityAmount, INITIAL_RETRY_TOKENS); + } + + void DefaultRetryQuotaContainer::ReleaseRetryQuota(const AWSError& error) + { + int capacityAmount; + if (m_classification == RetryCostClassification::THROTTLE_BASED) + { + capacityAmount = error.ShouldThrottle() ? m_throttlingRetryCost : m_retryCost; + } + else + { + capacityAmount = error.GetErrorType() == CoreErrors::REQUEST_TIMEOUT ? TIMEOUT_RETRY_COST : RETRY_COST; + } + ReleaseRetryQuota(capacityAmount); + } + + // ---- StandardRetryStrategy pimpl ---- + + struct StandardRetryStrategy::RetryImpl + { + bool newRetriesEnabled = false; + double transientBackoffBaseSec = 0.05; + }; + StandardRetryStrategy::StandardRetryStrategy(long maxAttempts) - : m_retryQuotaContainer(Aws::MakeShared("StandardRetryStrategy")), m_maxAttempts(maxAttempts) {} + : m_retryQuotaContainer(Aws::MakeShared("StandardRetryStrategy")), + m_maxAttempts(maxAttempts), + m_impl(Aws::MakeUnique("StandardRetryStrategy")) + {} StandardRetryStrategy::StandardRetryStrategy(std::shared_ptr retryQuotaContainer, long maxAttempts) - : m_retryQuotaContainer(retryQuotaContainer), m_maxAttempts(maxAttempts) {} + : m_retryQuotaContainer(retryQuotaContainer), + m_maxAttempts(maxAttempts), + m_impl(Aws::MakeUnique("StandardRetryStrategy")) + {} + + StandardRetryStrategy::StandardRetryStrategy(std::shared_ptr retryQuotaContainer, long maxAttempts, double transientBackoffBaseSec) + : m_retryQuotaContainer(retryQuotaContainer), + m_maxAttempts(maxAttempts), + m_impl(Aws::MakeUnique("StandardRetryStrategy")) + { + m_impl->newRetriesEnabled = true; + m_impl->transientBackoffBaseSec = transientBackoffBaseSec; + } + + StandardRetryStrategy::~StandardRetryStrategy() = default; void StandardRetryStrategy::RequestBookkeeping(const HttpResponseOutcome& httpResponseOutcome) { @@ -54,45 +142,30 @@ namespace Aws long StandardRetryStrategy::CalculateDelayBeforeNextRetry(const AWSError& error, long attemptedRetries) const { - AWS_UNREFERENCED_PARAM(error); - // Maximum left shift factor is capped by ceil(log2(max_delay)), to avoid wrap-around and overflow into negative values: - return std::min(static_cast(Aws::Utils::GetRandomValue() % 1000) * (1 << std::min(attemptedRetries, 15L)), 20000); - } + if (!m_impl->newRetriesEnabled) + { + AWS_UNREFERENCED_PARAM(error); + return (std::min)(static_cast(Aws::Utils::GetRandomValue() % 1000) * (1 << (std::min)(attemptedRetries, 15L)), 20000); + } - DefaultRetryQuotaContainer::DefaultRetryQuotaContainer() : m_retryQuota(INITIAL_RETRY_TOKENS) - {} + double x = error.ShouldThrottle() ? 1.0 : m_impl->transientBackoffBaseSec; + double exponentialPart = x * static_cast(1L << (std::min)(attemptedRetries, 30L)); + double cappedPart = (std::min)(exponentialPart, 20.0); - bool DefaultRetryQuotaContainer::AcquireRetryQuota(int capacityAmount) - { - WriterLockGuard guard(m_retryQuotaLock); + double b = static_cast(Aws::Utils::GetRandomValue() % 10000) / 10000.0; + double t_i = b * cappedPart; - if (capacityAmount > m_retryQuota) - { - return false; - } - else + const auto& headers = error.GetResponseHeaders(); + auto it = headers.find("x-amz-retry-after"); + if (it != headers.end()) { - m_retryQuota -= capacityAmount; - return true; + double headerSec = static_cast(Aws::Utils::StringUtils::ConvertToInt64(it->second.c_str())) / 1000.0; + double clamped = (std::max)(t_i, (std::min)(headerSec, 5.0 + t_i)); + return static_cast(clamped * 1000.0); } - } - bool DefaultRetryQuotaContainer::AcquireRetryQuota(const AWSError& error) - { - int capacityAmount = error.GetErrorType() == CoreErrors::REQUEST_TIMEOUT ? TIMEOUT_RETRY_COST : RETRY_COST; - return AcquireRetryQuota(capacityAmount); + return static_cast(t_i * 1000.0); } - void DefaultRetryQuotaContainer::ReleaseRetryQuota(int capacityAmount) - { - WriterLockGuard guard(m_retryQuotaLock); - m_retryQuota = (std::min)(m_retryQuota + capacityAmount, INITIAL_RETRY_TOKENS); - } - - void DefaultRetryQuotaContainer::ReleaseRetryQuota(const AWSError& error) - { - int capacityAmount = error.GetErrorType() == CoreErrors::REQUEST_TIMEOUT ? TIMEOUT_RETRY_COST : RETRY_COST; - ReleaseRetryQuota(capacityAmount); - } } } diff --git a/tests/aws-cpp-sdk-core-tests/aws/client/RetryBehaviorTest.cpp b/tests/aws-cpp-sdk-core-tests/aws/client/RetryBehaviorTest.cpp new file mode 100644 index 000000000000..36a0d9143d1c --- /dev/null +++ b/tests/aws-cpp-sdk-core-tests/aws/client/RetryBehaviorTest.cpp @@ -0,0 +1,484 @@ +/** + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace Aws::Client; +using namespace Aws::Http; + +static const char ALLOCATION_TAG[] = "RetryBehaviorTest"; + +class RetryBehaviorTest : public Aws::Testing::AwsCppSdkGTestSuite +{ +}; + +static AWSError MakeTransientError() +{ + return AWSError(CoreErrors::NETWORK_CONNECTION, true); +} + +static AWSError MakeThrottlingError() +{ + return AWSError(CoreErrors::THROTTLING, RetryableType::RETRYABLE_THROTTLING); +} + +static AWSError MakeNonRetryableError() +{ + return AWSError(CoreErrors::INCOMPLETE_SIGNATURE, false); +} + +static AWSError MakeTransientErrorWithRetryAfter(const Aws::String& value) +{ + AWSError error(CoreErrors::NETWORK_CONNECTION, true); + HeaderValueCollection headers; + headers["x-amz-retry-after"] = value; + error.SetResponseHeaders(headers); + return error; +} + +// SEP Test 1: Retry eventually succeeds, quota restored +TEST_F(RetryBehaviorTest, RetryEventuallySucceeds) +{ + auto quota = Aws::MakeShared(ALLOCATION_TAG, 14, 5, RetryCostClassification::THROTTLE_BASED); + StandardRetryStrategy strategy(quota, 3, 0.05); + + ASSERT_EQ(500, quota->GetRetryQuota()); + + // First failure: transient, costs 14 + ASSERT_TRUE(strategy.ShouldRetry(MakeTransientError(), 0)); + ASSERT_EQ(486, quota->GetRetryQuota()); + + // Second failure: transient, costs 14 + ASSERT_TRUE(strategy.ShouldRetry(MakeTransientError(), 1)); + ASSERT_EQ(472, quota->GetRetryQuota()); +} + +// SEP Test 2: Max attempts reached +TEST_F(RetryBehaviorTest, MaxAttemptsReached) +{ + auto quota = Aws::MakeShared(ALLOCATION_TAG, 14, 5, RetryCostClassification::THROTTLE_BASED); + StandardRetryStrategy strategy(quota, 3, 0.05); + + ASSERT_TRUE(strategy.ShouldRetry(MakeTransientError(), 0)); + ASSERT_TRUE(strategy.ShouldRetry(MakeTransientError(), 1)); + // Third attempt (index 2) hits max attempts of 3 + ASSERT_FALSE(strategy.ShouldRetry(MakeTransientError(), 2)); +} + +// SEP Test 3: Quota reached after 1 retry +TEST_F(RetryBehaviorTest, QuotaReachedAfterRetry) +{ + auto quota = Aws::MakeShared(ALLOCATION_TAG, 14, 5, RetryCostClassification::THROTTLE_BASED); + StandardRetryStrategy strategy(quota, 10, 0.05); + + // Drain quota to 10 + ASSERT_TRUE(quota->AcquireRetryQuota(490)); + ASSERT_EQ(10, quota->GetRetryQuota()); + + // Can't acquire 14 tokens + ASSERT_FALSE(strategy.ShouldRetry(MakeTransientError(), 0)); +} + +// SEP Test 4: Zero quota, no retries +TEST_F(RetryBehaviorTest, ZeroQuotaNoRetries) +{ + auto quota = Aws::MakeShared(ALLOCATION_TAG, 14, 5, RetryCostClassification::THROTTLE_BASED); + StandardRetryStrategy strategy(quota, 10, 0.05); + + ASSERT_TRUE(quota->AcquireRetryQuota(500)); + ASSERT_EQ(0, quota->GetRetryQuota()); + + ASSERT_FALSE(strategy.ShouldRetry(MakeTransientError(), 0)); + ASSERT_FALSE(strategy.ShouldRetry(MakeThrottlingError(), 0)); +} + +// SEP Test 5: Exponential timing (transient, 50ms base) +TEST_F(RetryBehaviorTest, ExponentialBackoffTransient) +{ + auto quota = Aws::MakeShared(ALLOCATION_TAG, 14, 5, RetryCostClassification::THROTTLE_BASED); + StandardRetryStrategy strategy(quota, 10, 0.05); + + auto error = MakeTransientError(); + // Backoff is randomized, but must be within [0, x * 2^i * 1000]ms + // i=0: [0, 50ms], i=1: [0, 100ms], i=2: [0, 200ms], i=3: [0, 400ms] + for (int i = 0; i < 4; ++i) + { + long delay = strategy.CalculateDelayBeforeNextRetry(error, i); + long maxDelay = static_cast(0.05 * (1L << i) * 1000.0); + ASSERT_GE(delay, 0) << "Retry " << i; + ASSERT_LE(delay, maxDelay) << "Retry " << i; + } +} + +// SEP Test 6: Max backoff cap at 20s +TEST_F(RetryBehaviorTest, MaxBackoffCap) +{ + auto quota = Aws::MakeShared(ALLOCATION_TAG, 14, 5, RetryCostClassification::THROTTLE_BASED); + StandardRetryStrategy strategy(quota, 100, 0.05); + + auto error = MakeTransientError(); + // At i=30, 0.05 * 2^30 = 53687091.2s which exceeds 20s cap + long delay = strategy.CalculateDelayBeforeNextRetry(error, 30); + ASSERT_LE(delay, 20000); +} + +// SEP Test 7: Quota exhaustion mid-sequence +TEST_F(RetryBehaviorTest, QuotaExhaustionMidSequence) +{ + auto quota = Aws::MakeShared(ALLOCATION_TAG, 14, 5, RetryCostClassification::THROTTLE_BASED); + StandardRetryStrategy strategy(quota, 100, 0.05); + + // Drain to 20 tokens + ASSERT_TRUE(quota->AcquireRetryQuota(480)); + ASSERT_EQ(20, quota->GetRetryQuota()); + + // First retry: costs 14, quota = 6 + ASSERT_TRUE(strategy.ShouldRetry(MakeTransientError(), 0)); + ASSERT_EQ(6, quota->GetRetryQuota()); + + // Second retry: can't afford 14 + ASSERT_FALSE(strategy.ShouldRetry(MakeTransientError(), 1)); + ASSERT_EQ(6, quota->GetRetryQuota()); +} + +// SEP Test 8: Quota recovery (stateful multi-request sequence) +TEST_F(RetryBehaviorTest, QuotaRecoveryStateful) +{ + auto quota = Aws::MakeShared(ALLOCATION_TAG, 14, 5, RetryCostClassification::THROTTLE_BASED); + StandardRetryStrategy strategy(quota, 10, 0.05); + + std::shared_ptr httpRequest = CreateHttpRequest(URI("http://www.uri.com"), HttpMethod::HTTP_GET, Aws::Utils::Stream::DefaultResponseStreamFactoryMethod); + std::shared_ptr httpResponse = Aws::MakeShared(ALLOCATION_TAG, httpRequest); + HttpResponseOutcome httpResponseOutcome(httpResponse); + + // Request 1: retry once (transient), costs 14, quota = 486 + ASSERT_TRUE(strategy.ShouldRetry(MakeTransientError(), 0)); + ASSERT_EQ(486, quota->GetRetryQuota()); + + // Request 1 succeeds: release 14, quota = 500 + strategy.RequestBookkeeping(httpResponseOutcome, MakeTransientError()); + ASSERT_EQ(500, quota->GetRetryQuota()); + + // Request 2: retry once (transient), costs 14, quota = 486 + ASSERT_TRUE(strategy.ShouldRetry(MakeTransientError(), 0)); + ASSERT_EQ(486, quota->GetRetryQuota()); + + // Request 2: retry again (transient), costs 14, quota = 472 + ASSERT_TRUE(strategy.ShouldRetry(MakeTransientError(), 1)); + ASSERT_EQ(472, quota->GetRetryQuota()); + + // Request 2 succeeds: release 14, quota = 486 + strategy.RequestBookkeeping(httpResponseOutcome, MakeTransientError()); + ASSERT_EQ(486, quota->GetRetryQuota()); + + // Request 3: succeeds first try, release NO_RETRY_INCREMENT (1), quota = 487 + strategy.RequestBookkeeping(httpResponseOutcome); + ASSERT_EQ(487, quota->GetRetryQuota()); +} + +// SEP Test 9: Multi-threaded quota sharing (verify shared state) +TEST_F(RetryBehaviorTest, SharedQuotaAcrossRequests) +{ + auto quota = Aws::MakeShared(ALLOCATION_TAG, 14, 5, RetryCostClassification::THROTTLE_BASED); + StandardRetryStrategy strategy(quota, 10, 0.05); + + // Simulate two concurrent requests both acquiring from same quota + // Request A: transient retry, costs 14, quota = 486 + ASSERT_TRUE(strategy.ShouldRetry(MakeTransientError(), 0)); + ASSERT_EQ(486, quota->GetRetryQuota()); + + // Request B: throttling retry, costs 5, quota = 481 + ASSERT_TRUE(strategy.ShouldRetry(MakeThrottlingError(), 0)); + ASSERT_EQ(481, quota->GetRetryQuota()); + + // Request A: another transient retry, costs 14, quota = 467 + ASSERT_TRUE(strategy.ShouldRetry(MakeTransientError(), 1)); + ASSERT_EQ(467, quota->GetRetryQuota()); + + // Total consumed: 14 + 5 + 14 = 33 + ASSERT_EQ(500 - 33, quota->GetRetryQuota()); +} + +// SEP Test 10: Throttling costs (5 tokens) and backoff (1000ms base) +TEST_F(RetryBehaviorTest, ThrottlingCostsAndBackoff) +{ + auto quota = Aws::MakeShared(ALLOCATION_TAG, 14, 5, RetryCostClassification::THROTTLE_BASED); + StandardRetryStrategy strategy(quota, 10, 0.05); + + // Throttling error costs 5 tokens + ASSERT_TRUE(strategy.ShouldRetry(MakeThrottlingError(), 0)); + ASSERT_EQ(495, quota->GetRetryQuota()); + + // Backoff for throttling: [0, 1000ms] at i=0 + auto error = MakeThrottlingError(); + long delay = strategy.CalculateDelayBeforeNextRetry(error, 0); + ASSERT_GE(delay, 0); + ASSERT_LE(delay, 1000); + + // At i=1: [0, 2000ms] + delay = strategy.CalculateDelayBeforeNextRetry(error, 1); + ASSERT_GE(delay, 0); + ASSERT_LE(delay, 2000); +} + +// SEP Test 11: DynamoDB tuning (25ms base, 4 max attempts) +TEST_F(RetryBehaviorTest, DynamoDBTuning) +{ + auto quota = Aws::MakeShared(ALLOCATION_TAG, 14, 5, RetryCostClassification::THROTTLE_BASED); + StandardRetryStrategy strategy(quota, 4, 0.025); + + ASSERT_EQ(4, strategy.GetMaxAttempts()); + + auto error = MakeTransientError(); + // i=0: [0, 25ms] + long delay = strategy.CalculateDelayBeforeNextRetry(error, 0); + ASSERT_GE(delay, 0); + ASSERT_LE(delay, 25); + + // i=1: [0, 50ms] + delay = strategy.CalculateDelayBeforeNextRetry(error, 1); + ASSERT_GE(delay, 0); + ASSERT_LE(delay, 50); +} + +// SEP Test 12: Long-polling transient + empty quota (backoff applied) +TEST_F(RetryBehaviorTest, LongPollingTransientEmptyQuota) +{ + auto quota = Aws::MakeShared(ALLOCATION_TAG, 14, 5, RetryCostClassification::THROTTLE_BASED); + StandardRetryStrategy strategy(quota, 10, 0.05); + + // Drain quota + ASSERT_TRUE(quota->AcquireRetryQuota(500)); + ASSERT_EQ(0, quota->GetRetryQuota()); + + // ShouldRetry returns false (no quota) + ASSERT_FALSE(strategy.ShouldRetry(MakeTransientError(), 0)); + + // But backoff is still computable for long-polling use + auto error = MakeTransientError(); + long delay = strategy.CalculateDelayBeforeNextRetry(error, 0); + // i=0, transient, base=0.05: [0, 50ms] + ASSERT_GE(delay, 0); + ASSERT_LE(delay, 50); +} + +// SEP Test 13: Long-polling throttling + empty quota (backoff applied) +TEST_F(RetryBehaviorTest, LongPollingThrottlingEmptyQuota) +{ + auto quota = Aws::MakeShared(ALLOCATION_TAG, 14, 5, RetryCostClassification::THROTTLE_BASED); + StandardRetryStrategy strategy(quota, 10, 0.05); + + // Drain quota + ASSERT_TRUE(quota->AcquireRetryQuota(500)); + ASSERT_EQ(0, quota->GetRetryQuota()); + + // ShouldRetry returns false (no quota) + ASSERT_FALSE(strategy.ShouldRetry(MakeThrottlingError(), 0)); + + // But backoff is still computable for long-polling use + auto error = MakeThrottlingError(); + long delay = strategy.CalculateDelayBeforeNextRetry(error, 0); + // i=0, throttling, base=1.0: [0, 1000ms] + ASSERT_GE(delay, 0); + ASSERT_LE(delay, 1000); +} + +// SEP Test 14: Long-polling max attempts exceeded (no delay) +TEST_F(RetryBehaviorTest, LongPollingMaxAttemptsExceeded) +{ + auto quota = Aws::MakeShared(ALLOCATION_TAG, 14, 5, RetryCostClassification::THROTTLE_BASED); + StandardRetryStrategy strategy(quota, 3, 0.05); + + // At retries=2, max attempts (3) is reached + ASSERT_FALSE(strategy.ShouldRetry(MakeTransientError(), 2)); + // Pipeline would NOT apply long-polling backoff because retries+1 >= maxAttempts +} + +// SEP Test 15: Long-polling success (no delay) +TEST_F(RetryBehaviorTest, LongPollingSuccess) +{ + auto quota = Aws::MakeShared(ALLOCATION_TAG, 14, 5, RetryCostClassification::THROTTLE_BASED); + StandardRetryStrategy strategy(quota, 10, 0.05); + + std::shared_ptr httpRequest = CreateHttpRequest(URI("http://www.uri.com"), HttpMethod::HTTP_GET, Aws::Utils::Stream::DefaultResponseStreamFactoryMethod); + std::shared_ptr httpResponse = Aws::MakeShared(ALLOCATION_TAG, httpRequest); + HttpResponseOutcome httpResponseOutcome(httpResponse); + + // Successful request, no retry needed, no delay + strategy.RequestBookkeeping(httpResponseOutcome); + ASSERT_EQ(500, quota->GetRetryQuota()); +} + +// SEP Test 16: Long-polling non-retryable error (no delay) +TEST_F(RetryBehaviorTest, LongPollingNonRetryableError) +{ + auto quota = Aws::MakeShared(ALLOCATION_TAG, 14, 5, RetryCostClassification::THROTTLE_BASED); + StandardRetryStrategy strategy(quota, 10, 0.05); + + // Non-retryable error: ShouldRetry returns false + ASSERT_FALSE(strategy.ShouldRetry(MakeNonRetryableError(), 0)); + // Pipeline would NOT apply long-polling backoff because error.ShouldRetry() is false + ASSERT_EQ(500, quota->GetRetryQuota()); +} + +// SEP Test 17: retry-after header honored +TEST_F(RetryBehaviorTest, RetryAfterHeaderHonored) +{ + auto quota = Aws::MakeShared(ALLOCATION_TAG, 14, 5, RetryCostClassification::THROTTLE_BASED); + StandardRetryStrategy strategy(quota, 10, 0.05); + + // Header value 1500ms, at i=0 t_i is in [0, 50ms] + // clamped to max(t_i, min(1.5, 5 + t_i)) = 1.5s = 1500ms + auto error = MakeTransientErrorWithRetryAfter("1500"); + long delay = strategy.CalculateDelayBeforeNextRetry(error, 0); + // Should be between 1500 and 5050 (5 + max t_i at i=0) + ASSERT_GE(delay, 50); // at minimum t_i (could be 0, then header wins) + ASSERT_LE(delay, 5050); +} + +// SEP Test 18: retry-after floor clamped (value 0 clamped up to t_i) +TEST_F(RetryBehaviorTest, RetryAfterFloorClamped) +{ + auto quota = Aws::MakeShared(ALLOCATION_TAG, 14, 5, RetryCostClassification::THROTTLE_BASED); + StandardRetryStrategy strategy(quota, 10, 0.05); + + auto error = MakeTransientErrorWithRetryAfter("0"); + // Header is 0ms, gets clamped up to t_i + // Result should be same as normal backoff (t_i) + long delay = strategy.CalculateDelayBeforeNextRetry(error, 0); + ASSERT_GE(delay, 0); + ASSERT_LE(delay, 50); // max t_i at i=0 with base 0.05 +} + +// SEP Test 19: retry-after ceiling clamped (value 10000ms clamped to 5+t_i) +TEST_F(RetryBehaviorTest, RetryAfterCeilingClamped) +{ + auto quota = Aws::MakeShared(ALLOCATION_TAG, 14, 5, RetryCostClassification::THROTTLE_BASED); + StandardRetryStrategy strategy(quota, 10, 0.05); + + auto error = MakeTransientErrorWithRetryAfter("10000"); + // Header is 10000ms = 10s, exceeds 5 + t_i (max ~5.05s at i=0) + long delay = strategy.CalculateDelayBeforeNextRetry(error, 0); + // Clamped to 5 + t_i, which is at most 5050ms + ASSERT_LE(delay, 5050); + // But at least t_i (could be 0) + ASSERT_GE(delay, 0); +} + +// SEP Test 20: Invalid retry-after falls back to exponential backoff +TEST_F(RetryBehaviorTest, InvalidRetryAfterFallsBack) +{ + auto quota = Aws::MakeShared(ALLOCATION_TAG, 14, 5, RetryCostClassification::THROTTLE_BASED); + StandardRetryStrategy strategy(quota, 10, 0.05); + + // "abc" parses to 0 via atoll, which gets clamped to t_i + auto error = MakeTransientErrorWithRetryAfter("abc"); + long delay = strategy.CalculateDelayBeforeNextRetry(error, 0); + ASSERT_GE(delay, 0); + ASSERT_LE(delay, 50); +} + +// Verify legacy behavior unchanged when gate is off (old constructors) +TEST_F(RetryBehaviorTest, LegacyBehaviorUnchanged) +{ + StandardRetryStrategy strategy(3); + + auto error = MakeTransientError(); + // Legacy: rand(0,999) * 2^i, cap 20000 + long delay = strategy.CalculateDelayBeforeNextRetry(error, 0); + ASSERT_GE(delay, 0); + ASSERT_LE(delay, 999); // 2^0 = 1, so max is 999 + + delay = strategy.CalculateDelayBeforeNextRetry(error, 1); + ASSERT_GE(delay, 0); + ASSERT_LE(delay, 1998); // 2^1 = 2, so max is 1998 + + delay = strategy.CalculateDelayBeforeNextRetry(error, 15); + ASSERT_GE(delay, 0); + ASSERT_LE(delay, 20000); // capped +} + +// Verify throttle-based classification: throttling costs 5, transient costs 14 +TEST_F(RetryBehaviorTest, ThrottleBasedClassification) +{ + auto quota = Aws::MakeShared(ALLOCATION_TAG, 14, 5, RetryCostClassification::THROTTLE_BASED); + StandardRetryStrategy strategy(quota, 10, 0.05); + + // Transient costs 14 + ASSERT_TRUE(strategy.ShouldRetry(MakeTransientError(), 0)); + ASSERT_EQ(486, quota->GetRetryQuota()); + + // Throttling costs 5 + ASSERT_TRUE(strategy.ShouldRetry(MakeThrottlingError(), 0)); + ASSERT_EQ(481, quota->GetRetryQuota()); +} + +// Verify legacy classification: REQUEST_TIMEOUT costs 10, others cost 5 +TEST_F(RetryBehaviorTest, LegacyClassification) +{ + DefaultRetryQuotaContainer quota; + + AWSError timeoutError(CoreErrors::REQUEST_TIMEOUT, true); + AWSError otherError(CoreErrors::NETWORK_CONNECTION, true); + + // Other error costs 5 + ASSERT_TRUE(quota.AcquireRetryQuota(otherError)); + ASSERT_EQ(495, quota.GetRetryQuota()); + + // Timeout costs 10 + ASSERT_TRUE(quota.AcquireRetryQuota(timeoutError)); + ASSERT_EQ(485, quota.GetRetryQuota()); +} + +// Non-retryable errors are not retried regardless of gate +TEST_F(RetryBehaviorTest, NonRetryableNotRetried) +{ + auto quota = Aws::MakeShared(ALLOCATION_TAG, 14, 5, RetryCostClassification::THROTTLE_BASED); + StandardRetryStrategy strategy(quota, 10, 0.05); + + ASSERT_FALSE(strategy.ShouldRetry(MakeNonRetryableError(), 0)); + ASSERT_EQ(500, quota->GetRetryQuota()); +} + +// Verify RequestBookkeeping releases correct tokens on success +TEST_F(RetryBehaviorTest, RequestBookkeepingReleasesTokens) +{ + auto quota = Aws::MakeShared(ALLOCATION_TAG, 14, 5, RetryCostClassification::THROTTLE_BASED); + StandardRetryStrategy strategy(quota, 10, 0.05); + + std::shared_ptr httpRequest = CreateHttpRequest(URI("http://www.uri.com"), HttpMethod::HTTP_GET, Aws::Utils::Stream::DefaultResponseStreamFactoryMethod); + std::shared_ptr httpResponse = Aws::MakeShared(ALLOCATION_TAG, httpRequest); + HttpResponseOutcome httpResponseOutcome(httpResponse); + + // Acquire 14 (transient), quota = 486 + ASSERT_TRUE(strategy.ShouldRetry(MakeTransientError(), 0)); + ASSERT_EQ(486, quota->GetRetryQuota()); + + // Release 14 on success with last error context + strategy.RequestBookkeeping(httpResponseOutcome, MakeTransientError()); + ASSERT_EQ(500, quota->GetRetryQuota()); + + // Acquire 5 (throttling), quota = 495 + ASSERT_TRUE(strategy.ShouldRetry(MakeThrottlingError(), 0)); + ASSERT_EQ(495, quota->GetRetryQuota()); + + // Release 5 on success with last error context + strategy.RequestBookkeeping(httpResponseOutcome, MakeThrottlingError()); + ASSERT_EQ(500, quota->GetRetryQuota()); + + // Release NO_RETRY_INCREMENT (1) on success without prior retry + quota->AcquireRetryQuota(10); + ASSERT_EQ(490, quota->GetRetryQuota()); + strategy.RequestBookkeeping(httpResponseOutcome); + ASSERT_EQ(491, quota->GetRetryQuota()); +} From 965a186fdc2fcc9b7b4e1e540ff5a69776a42a98 Mon Sep 17 00:00:00 2001 From: kai lin Date: Tue, 2 Jun 2026 15:36:35 -0400 Subject: [PATCH 02/11] removed transient back off --- .../aws/core/client/AdaptiveRetryStrategy.h | 2 - .../include/aws/core/client/RetryStrategy.h | 11 -- .../source/client/AdaptiveRetryStrategy.cpp | 4 - .../source/client/ClientConfiguration.cpp | 51 ++++++- .../source/client/RetryStrategy.cpp | 127 ++++++------------ 5 files changed, 91 insertions(+), 104 deletions(-) diff --git a/src/aws-cpp-sdk-core/include/aws/core/client/AdaptiveRetryStrategy.h b/src/aws-cpp-sdk-core/include/aws/core/client/AdaptiveRetryStrategy.h index f89b61f40e12..4a9f4d551d86 100644 --- a/src/aws-cpp-sdk-core/include/aws/core/client/AdaptiveRetryStrategy.h +++ b/src/aws-cpp-sdk-core/include/aws/core/client/AdaptiveRetryStrategy.h @@ -141,8 +141,6 @@ class AWS_CORE_API AdaptiveRetryStrategy : public StandardRetryStrategy const char* GetStrategyName() const override { return "adaptive";} - AdaptiveRetryStrategy(std::shared_ptr retryQuotaContainer, long maxAttempts, double transientBackoffBaseSec); - protected: RetryTokenBucket m_retryTokenBucket; bool m_fastFail = false; diff --git a/src/aws-cpp-sdk-core/include/aws/core/client/RetryStrategy.h b/src/aws-cpp-sdk-core/include/aws/core/client/RetryStrategy.h index 77f9fd0776a4..5e1c24af53ab 100644 --- a/src/aws-cpp-sdk-core/include/aws/core/client/RetryStrategy.h +++ b/src/aws-cpp-sdk-core/include/aws/core/client/RetryStrategy.h @@ -103,17 +103,10 @@ namespace Aws virtual int GetRetryQuota() const = 0; }; - enum class RetryCostClassification - { - REQUEST_TIMEOUT_BASED, - THROTTLE_BASED - }; - class AWS_CORE_API DefaultRetryQuotaContainer : public RetryQuotaContainer { public: DefaultRetryQuotaContainer(); - DefaultRetryQuotaContainer(int retryCost, int throttlingRetryCost, RetryCostClassification classification); virtual ~DefaultRetryQuotaContainer() = default; virtual bool AcquireRetryQuota(int capacityAmount) override; virtual bool AcquireRetryQuota(const AWSError& error) override; @@ -124,9 +117,6 @@ namespace Aws protected: mutable Aws::Utils::Threading::ReaderWriterLock m_retryQuotaLock; int m_retryQuota; - int m_retryCost; - int m_throttlingRetryCost; - RetryCostClassification m_classification; }; class AWS_CORE_API StandardRetryStrategy : public RetryStrategy @@ -134,7 +124,6 @@ namespace Aws public: StandardRetryStrategy(long maxAttempts = 3); StandardRetryStrategy(std::shared_ptr retryQuotaContainer, long maxAttempts = 3); - StandardRetryStrategy(std::shared_ptr retryQuotaContainer, long maxAttempts, double transientBackoffBaseSec); virtual ~StandardRetryStrategy(); virtual void RequestBookkeeping(const HttpResponseOutcome& httpResponseOutcome) override; diff --git a/src/aws-cpp-sdk-core/source/client/AdaptiveRetryStrategy.cpp b/src/aws-cpp-sdk-core/source/client/AdaptiveRetryStrategy.cpp index 5a63d0d10eb1..69bde40fb6a7 100644 --- a/src/aws-cpp-sdk-core/source/client/AdaptiveRetryStrategy.cpp +++ b/src/aws-cpp-sdk-core/source/client/AdaptiveRetryStrategy.cpp @@ -169,10 +169,6 @@ namespace Aws StandardRetryStrategy(retryQuotaContainer, maxAttempts) {} - AdaptiveRetryStrategy::AdaptiveRetryStrategy(std::shared_ptr retryQuotaContainer, long maxAttempts, double transientBackoffBaseSec) : - StandardRetryStrategy(retryQuotaContainer, maxAttempts, transientBackoffBaseSec) - {} - bool AdaptiveRetryStrategy::HasSendToken() { return m_retryTokenBucket.Acquire(1, m_fastFail); diff --git a/src/aws-cpp-sdk-core/source/client/ClientConfiguration.cpp b/src/aws-cpp-sdk-core/source/client/ClientConfiguration.cpp index 800f0378103b..17f0becf0c50 100644 --- a/src/aws-cpp-sdk-core/source/client/ClientConfiguration.cpp +++ b/src/aws-cpp-sdk-core/source/client/ClientConfiguration.cpp @@ -543,6 +543,49 @@ ClientConfiguration::ClientConfiguration(bool /*useSmartDefaults*/, const char* Aws::Config::Defaults::SetSmartDefaultsConfigurationParameters(*this, defaultMode, hasEc2MetadataRegion, ec2MetadataRegion); } +namespace { + class ThrottleBasedRetryQuotaContainer : public RetryQuotaContainer + { + public: + ThrottleBasedRetryQuotaContainer(int retryCost = 14, int throttlingRetryCost = 5) + : m_retryQuota(500), m_retryCost(retryCost), m_throttlingRetryCost(throttlingRetryCost) {} + + bool AcquireRetryQuota(int capacityAmount) override + { + Aws::Utils::Threading::WriterLockGuard guard(m_retryQuotaLock); + if (capacityAmount > m_retryQuota) return false; + m_retryQuota -= capacityAmount; + return true; + } + + bool AcquireRetryQuota(const AWSError& error) override + { + int capacityAmount = error.ShouldThrottle() ? m_throttlingRetryCost : m_retryCost; + return AcquireRetryQuota(capacityAmount); + } + + void ReleaseRetryQuota(int capacityAmount) override + { + Aws::Utils::Threading::WriterLockGuard guard(m_retryQuotaLock); + m_retryQuota = (std::min)(m_retryQuota + capacityAmount, 500); + } + + void ReleaseRetryQuota(const AWSError& error) override + { + int capacityAmount = error.ShouldThrottle() ? m_throttlingRetryCost : m_retryCost; + ReleaseRetryQuota(capacityAmount); + } + + int GetRetryQuota() const override { return m_retryQuota; } + + private: + mutable Aws::Utils::Threading::ReaderWriterLock m_retryQuotaLock; + int m_retryQuota; + int m_retryCost; + int m_throttlingRetryCost; + }; +} // anonymous namespace + std::shared_ptr InitRetryStrategy(int maxAttempts, Aws::String retryMode) { const bool newRetriesEnabled = Aws::Environment::GetEnv("AWS_NEW_RETRIES_2026") == "true"; @@ -565,8 +608,8 @@ std::shared_ptr InitRetryStrategy(int maxAttempts, Aws::String re long attempts = (maxAttempts < 0) ? 3 : maxAttempts; if (newRetriesEnabled) { - auto quota = Aws::MakeShared(CLIENT_CONFIG_TAG, 14, 5, RetryCostClassification::THROTTLE_BASED); - retryStrategy = Aws::MakeShared(CLIENT_CONFIG_TAG, quota, attempts, 0.05); + auto quota = Aws::MakeShared(CLIENT_CONFIG_TAG); + retryStrategy = Aws::MakeShared(CLIENT_CONFIG_TAG, quota, attempts); } else { @@ -578,8 +621,8 @@ std::shared_ptr InitRetryStrategy(int maxAttempts, Aws::String re long attempts = (maxAttempts < 0) ? 3 : maxAttempts; if (newRetriesEnabled) { - auto quota = Aws::MakeShared(CLIENT_CONFIG_TAG, 14, 5, RetryCostClassification::THROTTLE_BASED); - retryStrategy = Aws::MakeShared(CLIENT_CONFIG_TAG, quota, attempts, 0.05); + auto quota = Aws::MakeShared(CLIENT_CONFIG_TAG); + retryStrategy = Aws::MakeShared(CLIENT_CONFIG_TAG, quota, attempts); } else { diff --git a/src/aws-cpp-sdk-core/source/client/RetryStrategy.cpp b/src/aws-cpp-sdk-core/source/client/RetryStrategy.cpp index e5f68c1c99be..e1e927dcea44 100644 --- a/src/aws-cpp-sdk-core/source/client/RetryStrategy.cpp +++ b/src/aws-cpp-sdk-core/source/client/RetryStrategy.cpp @@ -20,95 +20,20 @@ namespace Aws static const int RETRY_COST = 5; static const int TIMEOUT_RETRY_COST = 10; - // ---- DefaultRetryQuotaContainer ---- - - DefaultRetryQuotaContainer::DefaultRetryQuotaContainer() - : m_retryQuota(INITIAL_RETRY_TOKENS), - m_retryCost(RETRY_COST), - m_throttlingRetryCost(TIMEOUT_RETRY_COST), - m_classification(RetryCostClassification::REQUEST_TIMEOUT_BASED) - {} - - DefaultRetryQuotaContainer::DefaultRetryQuotaContainer(int retryCost, int throttlingRetryCost, RetryCostClassification classification) - : m_retryQuota(INITIAL_RETRY_TOKENS), - m_retryCost(retryCost), - m_throttlingRetryCost(throttlingRetryCost), - m_classification(classification) - {} - - bool DefaultRetryQuotaContainer::AcquireRetryQuota(int capacityAmount) - { - WriterLockGuard guard(m_retryQuotaLock); - - if (capacityAmount > m_retryQuota) - { - return false; - } - m_retryQuota -= capacityAmount; - return true; - } - - bool DefaultRetryQuotaContainer::AcquireRetryQuota(const AWSError& error) - { - int capacityAmount; - if (m_classification == RetryCostClassification::THROTTLE_BASED) - { - capacityAmount = error.ShouldThrottle() ? m_throttlingRetryCost : m_retryCost; - } - else - { - capacityAmount = error.GetErrorType() == CoreErrors::REQUEST_TIMEOUT ? TIMEOUT_RETRY_COST : RETRY_COST; - } - return AcquireRetryQuota(capacityAmount); - } - - void DefaultRetryQuotaContainer::ReleaseRetryQuota(int capacityAmount) - { - WriterLockGuard guard(m_retryQuotaLock); - m_retryQuota = (std::min)(m_retryQuota + capacityAmount, INITIAL_RETRY_TOKENS); - } - - void DefaultRetryQuotaContainer::ReleaseRetryQuota(const AWSError& error) - { - int capacityAmount; - if (m_classification == RetryCostClassification::THROTTLE_BASED) - { - capacityAmount = error.ShouldThrottle() ? m_throttlingRetryCost : m_retryCost; - } - else - { - capacityAmount = error.GetErrorType() == CoreErrors::REQUEST_TIMEOUT ? TIMEOUT_RETRY_COST : RETRY_COST; - } - ReleaseRetryQuota(capacityAmount); - } - - // ---- StandardRetryStrategy pimpl ---- - struct StandardRetryStrategy::RetryImpl { bool newRetriesEnabled = false; - double transientBackoffBaseSec = 0.05; }; StandardRetryStrategy::StandardRetryStrategy(long maxAttempts) - : m_retryQuotaContainer(Aws::MakeShared("StandardRetryStrategy")), - m_maxAttempts(maxAttempts), - m_impl(Aws::MakeUnique("StandardRetryStrategy")) - {} + : m_retryQuotaContainer(Aws::MakeShared("StandardRetryStrategy")), m_maxAttempts(maxAttempts), + m_impl(Aws::MakeUnique("StandardRetryStrategy")) {} StandardRetryStrategy::StandardRetryStrategy(std::shared_ptr retryQuotaContainer, long maxAttempts) - : m_retryQuotaContainer(retryQuotaContainer), - m_maxAttempts(maxAttempts), - m_impl(Aws::MakeUnique("StandardRetryStrategy")) - {} - - StandardRetryStrategy::StandardRetryStrategy(std::shared_ptr retryQuotaContainer, long maxAttempts, double transientBackoffBaseSec) - : m_retryQuotaContainer(retryQuotaContainer), - m_maxAttempts(maxAttempts), + : m_retryQuotaContainer(retryQuotaContainer), m_maxAttempts(maxAttempts), m_impl(Aws::MakeUnique("StandardRetryStrategy")) { m_impl->newRetriesEnabled = true; - m_impl->transientBackoffBaseSec = transientBackoffBaseSec; } StandardRetryStrategy::~StandardRetryStrategy() = default; @@ -145,12 +70,13 @@ namespace Aws if (!m_impl->newRetriesEnabled) { AWS_UNREFERENCED_PARAM(error); - return (std::min)(static_cast(Aws::Utils::GetRandomValue() % 1000) * (1 << (std::min)(attemptedRetries, 15L)), 20000); + // Maximum left shift factor is capped by ceil(log2(max_delay)), to avoid wrap-around and overflow into negative values: + return std::min(static_cast(Aws::Utils::GetRandomValue() % 1000) * (1 << std::min(attemptedRetries, 15L)), 20000); } - double x = error.ShouldThrottle() ? 1.0 : m_impl->transientBackoffBaseSec; - double exponentialPart = x * static_cast(1L << (std::min)(attemptedRetries, 30L)); - double cappedPart = (std::min)(exponentialPart, 20.0); + double x = error.ShouldThrottle() ? 1.0 : 0.05; + double exponentialPart = x * static_cast(1L << std::min(attemptedRetries, 30L)); + double cappedPart = std::min(exponentialPart, 20.0); double b = static_cast(Aws::Utils::GetRandomValue() % 10000) / 10000.0; double t_i = b * cappedPart; @@ -160,12 +86,47 @@ namespace Aws if (it != headers.end()) { double headerSec = static_cast(Aws::Utils::StringUtils::ConvertToInt64(it->second.c_str())) / 1000.0; - double clamped = (std::max)(t_i, (std::min)(headerSec, 5.0 + t_i)); + double clamped = std::max(t_i, std::min(headerSec, 5.0 + t_i)); return static_cast(clamped * 1000.0); } return static_cast(t_i * 1000.0); } + DefaultRetryQuotaContainer::DefaultRetryQuotaContainer() : m_retryQuota(INITIAL_RETRY_TOKENS) + {} + + bool DefaultRetryQuotaContainer::AcquireRetryQuota(int capacityAmount) + { + WriterLockGuard guard(m_retryQuotaLock); + + if (capacityAmount > m_retryQuota) + { + return false; + } + else + { + m_retryQuota -= capacityAmount; + return true; + } + } + + bool DefaultRetryQuotaContainer::AcquireRetryQuota(const AWSError& error) + { + int capacityAmount = error.GetErrorType() == CoreErrors::REQUEST_TIMEOUT ? TIMEOUT_RETRY_COST : RETRY_COST; + return AcquireRetryQuota(capacityAmount); + } + + void DefaultRetryQuotaContainer::ReleaseRetryQuota(int capacityAmount) + { + WriterLockGuard guard(m_retryQuotaLock); + m_retryQuota = (std::min)(m_retryQuota + capacityAmount, INITIAL_RETRY_TOKENS); + } + + void DefaultRetryQuotaContainer::ReleaseRetryQuota(const AWSError& error) + { + int capacityAmount = error.GetErrorType() == CoreErrors::REQUEST_TIMEOUT ? TIMEOUT_RETRY_COST : RETRY_COST; + ReleaseRetryQuota(capacityAmount); + } } } From a2f5d4af7d9131ccf758adb75632cccffe517771 Mon Sep 17 00:00:00 2001 From: kai lin Date: Tue, 2 Jun 2026 16:17:11 -0400 Subject: [PATCH 03/11] moved implementation to pimple --- .../source/client/RetryStrategy.cpp | 53 ++++--- .../aws/client/RetryBehaviorTest.cpp | 137 +++++++++++------- 2 files changed, 115 insertions(+), 75 deletions(-) diff --git a/src/aws-cpp-sdk-core/source/client/RetryStrategy.cpp b/src/aws-cpp-sdk-core/source/client/RetryStrategy.cpp index e1e927dcea44..142c17ea8bb4 100644 --- a/src/aws-cpp-sdk-core/source/client/RetryStrategy.cpp +++ b/src/aws-cpp-sdk-core/source/client/RetryStrategy.cpp @@ -23,6 +23,34 @@ namespace Aws struct StandardRetryStrategy::RetryImpl { bool newRetriesEnabled = false; + + long CalculateDelay(const AWSError& error, long attemptedRetries) const + { + if (!newRetriesEnabled) + { + AWS_UNREFERENCED_PARAM(error); + // Maximum left shift factor is capped by ceil(log2(max_delay)), to avoid wrap-around and overflow into negative values: + return std::min(static_cast(Aws::Utils::GetRandomValue() % 1000) * (1 << std::min(attemptedRetries, 15L)), 20000); + } + + double x = error.ShouldThrottle() ? 1.0 : 0.05; + double exponentialPart = x * static_cast(1L << std::min(attemptedRetries, 30L)); + double cappedPart = std::min(exponentialPart, 20.0); + + double b = static_cast(Aws::Utils::GetRandomValue() % 10000) / 10000.0; + double t_i = b * cappedPart; + + const auto& headers = error.GetResponseHeaders(); + auto it = headers.find("x-amz-retry-after"); + if (it != headers.end()) + { + double headerSec = static_cast(Aws::Utils::StringUtils::ConvertToInt64(it->second.c_str())) / 1000.0; + double clamped = std::max(t_i, std::min(headerSec, 5.0 + t_i)); + return static_cast(clamped * 1000.0); + } + + return static_cast(t_i * 1000.0); + } }; StandardRetryStrategy::StandardRetryStrategy(long maxAttempts) @@ -67,30 +95,7 @@ namespace Aws long StandardRetryStrategy::CalculateDelayBeforeNextRetry(const AWSError& error, long attemptedRetries) const { - if (!m_impl->newRetriesEnabled) - { - AWS_UNREFERENCED_PARAM(error); - // Maximum left shift factor is capped by ceil(log2(max_delay)), to avoid wrap-around and overflow into negative values: - return std::min(static_cast(Aws::Utils::GetRandomValue() % 1000) * (1 << std::min(attemptedRetries, 15L)), 20000); - } - - double x = error.ShouldThrottle() ? 1.0 : 0.05; - double exponentialPart = x * static_cast(1L << std::min(attemptedRetries, 30L)); - double cappedPart = std::min(exponentialPart, 20.0); - - double b = static_cast(Aws::Utils::GetRandomValue() % 10000) / 10000.0; - double t_i = b * cappedPart; - - const auto& headers = error.GetResponseHeaders(); - auto it = headers.find("x-amz-retry-after"); - if (it != headers.end()) - { - double headerSec = static_cast(Aws::Utils::StringUtils::ConvertToInt64(it->second.c_str())) / 1000.0; - double clamped = std::max(t_i, std::min(headerSec, 5.0 + t_i)); - return static_cast(clamped * 1000.0); - } - - return static_cast(t_i * 1000.0); + return m_impl->CalculateDelay(error, attemptedRetries); } DefaultRetryQuotaContainer::DefaultRetryQuotaContainer() : m_retryQuota(INITIAL_RETRY_TOKENS) diff --git a/tests/aws-cpp-sdk-core-tests/aws/client/RetryBehaviorTest.cpp b/tests/aws-cpp-sdk-core-tests/aws/client/RetryBehaviorTest.cpp index 36a0d9143d1c..435f8e34d99d 100644 --- a/tests/aws-cpp-sdk-core-tests/aws/client/RetryBehaviorTest.cpp +++ b/tests/aws-cpp-sdk-core-tests/aws/client/RetryBehaviorTest.cpp @@ -18,6 +18,41 @@ using namespace Aws::Http; static const char ALLOCATION_TAG[] = "RetryBehaviorTest"; +class TestThrottleBasedQuotaContainer : public RetryQuotaContainer +{ +public: + TestThrottleBasedQuotaContainer() : m_retryQuota(500) {} + + bool AcquireRetryQuota(int capacityAmount) override + { + if (capacityAmount > m_retryQuota) return false; + m_retryQuota -= capacityAmount; + return true; + } + + bool AcquireRetryQuota(const AWSError& error) override + { + int capacityAmount = error.ShouldThrottle() ? 5 : 14; + return AcquireRetryQuota(capacityAmount); + } + + void ReleaseRetryQuota(int capacityAmount) override + { + m_retryQuota = std::min(m_retryQuota + capacityAmount, 500); + } + + void ReleaseRetryQuota(const AWSError& error) override + { + int capacityAmount = error.ShouldThrottle() ? 5 : 14; + ReleaseRetryQuota(capacityAmount); + } + + int GetRetryQuota() const override { return m_retryQuota; } + +private: + int m_retryQuota; +}; + class RetryBehaviorTest : public Aws::Testing::AwsCppSdkGTestSuite { }; @@ -49,8 +84,8 @@ static AWSError MakeTransientErrorWithRetryAfter(const Aws::String& // SEP Test 1: Retry eventually succeeds, quota restored TEST_F(RetryBehaviorTest, RetryEventuallySucceeds) { - auto quota = Aws::MakeShared(ALLOCATION_TAG, 14, 5, RetryCostClassification::THROTTLE_BASED); - StandardRetryStrategy strategy(quota, 3, 0.05); + auto quota = Aws::MakeShared(ALLOCATION_TAG); + StandardRetryStrategy strategy(quota, 3); ASSERT_EQ(500, quota->GetRetryQuota()); @@ -66,8 +101,8 @@ TEST_F(RetryBehaviorTest, RetryEventuallySucceeds) // SEP Test 2: Max attempts reached TEST_F(RetryBehaviorTest, MaxAttemptsReached) { - auto quota = Aws::MakeShared(ALLOCATION_TAG, 14, 5, RetryCostClassification::THROTTLE_BASED); - StandardRetryStrategy strategy(quota, 3, 0.05); + auto quota = Aws::MakeShared(ALLOCATION_TAG); + StandardRetryStrategy strategy(quota, 3); ASSERT_TRUE(strategy.ShouldRetry(MakeTransientError(), 0)); ASSERT_TRUE(strategy.ShouldRetry(MakeTransientError(), 1)); @@ -78,8 +113,8 @@ TEST_F(RetryBehaviorTest, MaxAttemptsReached) // SEP Test 3: Quota reached after 1 retry TEST_F(RetryBehaviorTest, QuotaReachedAfterRetry) { - auto quota = Aws::MakeShared(ALLOCATION_TAG, 14, 5, RetryCostClassification::THROTTLE_BASED); - StandardRetryStrategy strategy(quota, 10, 0.05); + auto quota = Aws::MakeShared(ALLOCATION_TAG); + StandardRetryStrategy strategy(quota, 10); // Drain quota to 10 ASSERT_TRUE(quota->AcquireRetryQuota(490)); @@ -92,8 +127,8 @@ TEST_F(RetryBehaviorTest, QuotaReachedAfterRetry) // SEP Test 4: Zero quota, no retries TEST_F(RetryBehaviorTest, ZeroQuotaNoRetries) { - auto quota = Aws::MakeShared(ALLOCATION_TAG, 14, 5, RetryCostClassification::THROTTLE_BASED); - StandardRetryStrategy strategy(quota, 10, 0.05); + auto quota = Aws::MakeShared(ALLOCATION_TAG); + StandardRetryStrategy strategy(quota, 10); ASSERT_TRUE(quota->AcquireRetryQuota(500)); ASSERT_EQ(0, quota->GetRetryQuota()); @@ -105,8 +140,8 @@ TEST_F(RetryBehaviorTest, ZeroQuotaNoRetries) // SEP Test 5: Exponential timing (transient, 50ms base) TEST_F(RetryBehaviorTest, ExponentialBackoffTransient) { - auto quota = Aws::MakeShared(ALLOCATION_TAG, 14, 5, RetryCostClassification::THROTTLE_BASED); - StandardRetryStrategy strategy(quota, 10, 0.05); + auto quota = Aws::MakeShared(ALLOCATION_TAG); + StandardRetryStrategy strategy(quota, 10); auto error = MakeTransientError(); // Backoff is randomized, but must be within [0, x * 2^i * 1000]ms @@ -123,8 +158,8 @@ TEST_F(RetryBehaviorTest, ExponentialBackoffTransient) // SEP Test 6: Max backoff cap at 20s TEST_F(RetryBehaviorTest, MaxBackoffCap) { - auto quota = Aws::MakeShared(ALLOCATION_TAG, 14, 5, RetryCostClassification::THROTTLE_BASED); - StandardRetryStrategy strategy(quota, 100, 0.05); + auto quota = Aws::MakeShared(ALLOCATION_TAG); + StandardRetryStrategy strategy(quota, 100); auto error = MakeTransientError(); // At i=30, 0.05 * 2^30 = 53687091.2s which exceeds 20s cap @@ -135,8 +170,8 @@ TEST_F(RetryBehaviorTest, MaxBackoffCap) // SEP Test 7: Quota exhaustion mid-sequence TEST_F(RetryBehaviorTest, QuotaExhaustionMidSequence) { - auto quota = Aws::MakeShared(ALLOCATION_TAG, 14, 5, RetryCostClassification::THROTTLE_BASED); - StandardRetryStrategy strategy(quota, 100, 0.05); + auto quota = Aws::MakeShared(ALLOCATION_TAG); + StandardRetryStrategy strategy(quota, 100); // Drain to 20 tokens ASSERT_TRUE(quota->AcquireRetryQuota(480)); @@ -154,8 +189,8 @@ TEST_F(RetryBehaviorTest, QuotaExhaustionMidSequence) // SEP Test 8: Quota recovery (stateful multi-request sequence) TEST_F(RetryBehaviorTest, QuotaRecoveryStateful) { - auto quota = Aws::MakeShared(ALLOCATION_TAG, 14, 5, RetryCostClassification::THROTTLE_BASED); - StandardRetryStrategy strategy(quota, 10, 0.05); + auto quota = Aws::MakeShared(ALLOCATION_TAG); + StandardRetryStrategy strategy(quota, 10); std::shared_ptr httpRequest = CreateHttpRequest(URI("http://www.uri.com"), HttpMethod::HTTP_GET, Aws::Utils::Stream::DefaultResponseStreamFactoryMethod); std::shared_ptr httpResponse = Aws::MakeShared(ALLOCATION_TAG, httpRequest); @@ -189,8 +224,8 @@ TEST_F(RetryBehaviorTest, QuotaRecoveryStateful) // SEP Test 9: Multi-threaded quota sharing (verify shared state) TEST_F(RetryBehaviorTest, SharedQuotaAcrossRequests) { - auto quota = Aws::MakeShared(ALLOCATION_TAG, 14, 5, RetryCostClassification::THROTTLE_BASED); - StandardRetryStrategy strategy(quota, 10, 0.05); + auto quota = Aws::MakeShared(ALLOCATION_TAG); + StandardRetryStrategy strategy(quota, 10); // Simulate two concurrent requests both acquiring from same quota // Request A: transient retry, costs 14, quota = 486 @@ -212,8 +247,8 @@ TEST_F(RetryBehaviorTest, SharedQuotaAcrossRequests) // SEP Test 10: Throttling costs (5 tokens) and backoff (1000ms base) TEST_F(RetryBehaviorTest, ThrottlingCostsAndBackoff) { - auto quota = Aws::MakeShared(ALLOCATION_TAG, 14, 5, RetryCostClassification::THROTTLE_BASED); - StandardRetryStrategy strategy(quota, 10, 0.05); + auto quota = Aws::MakeShared(ALLOCATION_TAG); + StandardRetryStrategy strategy(quota, 10); // Throttling error costs 5 tokens ASSERT_TRUE(strategy.ShouldRetry(MakeThrottlingError(), 0)); @@ -231,31 +266,31 @@ TEST_F(RetryBehaviorTest, ThrottlingCostsAndBackoff) ASSERT_LE(delay, 2000); } -// SEP Test 11: DynamoDB tuning (25ms base, 4 max attempts) +// SEP Test 11: DynamoDB tuning (maxAttempts=4, 25ms base deferred to follow-up) TEST_F(RetryBehaviorTest, DynamoDBTuning) { - auto quota = Aws::MakeShared(ALLOCATION_TAG, 14, 5, RetryCostClassification::THROTTLE_BASED); - StandardRetryStrategy strategy(quota, 4, 0.025); + auto quota = Aws::MakeShared(ALLOCATION_TAG); + StandardRetryStrategy strategy(quota, 4); ASSERT_EQ(4, strategy.GetMaxAttempts()); auto error = MakeTransientError(); - // i=0: [0, 25ms] + // i=0: [0, 50ms] (25ms base deferred) long delay = strategy.CalculateDelayBeforeNextRetry(error, 0); ASSERT_GE(delay, 0); - ASSERT_LE(delay, 25); + ASSERT_LE(delay, 50); - // i=1: [0, 50ms] + // i=1: [0, 100ms] delay = strategy.CalculateDelayBeforeNextRetry(error, 1); ASSERT_GE(delay, 0); - ASSERT_LE(delay, 50); + ASSERT_LE(delay, 100); } // SEP Test 12: Long-polling transient + empty quota (backoff applied) TEST_F(RetryBehaviorTest, LongPollingTransientEmptyQuota) { - auto quota = Aws::MakeShared(ALLOCATION_TAG, 14, 5, RetryCostClassification::THROTTLE_BASED); - StandardRetryStrategy strategy(quota, 10, 0.05); + auto quota = Aws::MakeShared(ALLOCATION_TAG); + StandardRetryStrategy strategy(quota, 10); // Drain quota ASSERT_TRUE(quota->AcquireRetryQuota(500)); @@ -275,8 +310,8 @@ TEST_F(RetryBehaviorTest, LongPollingTransientEmptyQuota) // SEP Test 13: Long-polling throttling + empty quota (backoff applied) TEST_F(RetryBehaviorTest, LongPollingThrottlingEmptyQuota) { - auto quota = Aws::MakeShared(ALLOCATION_TAG, 14, 5, RetryCostClassification::THROTTLE_BASED); - StandardRetryStrategy strategy(quota, 10, 0.05); + auto quota = Aws::MakeShared(ALLOCATION_TAG); + StandardRetryStrategy strategy(quota, 10); // Drain quota ASSERT_TRUE(quota->AcquireRetryQuota(500)); @@ -296,8 +331,8 @@ TEST_F(RetryBehaviorTest, LongPollingThrottlingEmptyQuota) // SEP Test 14: Long-polling max attempts exceeded (no delay) TEST_F(RetryBehaviorTest, LongPollingMaxAttemptsExceeded) { - auto quota = Aws::MakeShared(ALLOCATION_TAG, 14, 5, RetryCostClassification::THROTTLE_BASED); - StandardRetryStrategy strategy(quota, 3, 0.05); + auto quota = Aws::MakeShared(ALLOCATION_TAG); + StandardRetryStrategy strategy(quota, 3); // At retries=2, max attempts (3) is reached ASSERT_FALSE(strategy.ShouldRetry(MakeTransientError(), 2)); @@ -307,8 +342,8 @@ TEST_F(RetryBehaviorTest, LongPollingMaxAttemptsExceeded) // SEP Test 15: Long-polling success (no delay) TEST_F(RetryBehaviorTest, LongPollingSuccess) { - auto quota = Aws::MakeShared(ALLOCATION_TAG, 14, 5, RetryCostClassification::THROTTLE_BASED); - StandardRetryStrategy strategy(quota, 10, 0.05); + auto quota = Aws::MakeShared(ALLOCATION_TAG); + StandardRetryStrategy strategy(quota, 10); std::shared_ptr httpRequest = CreateHttpRequest(URI("http://www.uri.com"), HttpMethod::HTTP_GET, Aws::Utils::Stream::DefaultResponseStreamFactoryMethod); std::shared_ptr httpResponse = Aws::MakeShared(ALLOCATION_TAG, httpRequest); @@ -322,8 +357,8 @@ TEST_F(RetryBehaviorTest, LongPollingSuccess) // SEP Test 16: Long-polling non-retryable error (no delay) TEST_F(RetryBehaviorTest, LongPollingNonRetryableError) { - auto quota = Aws::MakeShared(ALLOCATION_TAG, 14, 5, RetryCostClassification::THROTTLE_BASED); - StandardRetryStrategy strategy(quota, 10, 0.05); + auto quota = Aws::MakeShared(ALLOCATION_TAG); + StandardRetryStrategy strategy(quota, 10); // Non-retryable error: ShouldRetry returns false ASSERT_FALSE(strategy.ShouldRetry(MakeNonRetryableError(), 0)); @@ -334,8 +369,8 @@ TEST_F(RetryBehaviorTest, LongPollingNonRetryableError) // SEP Test 17: retry-after header honored TEST_F(RetryBehaviorTest, RetryAfterHeaderHonored) { - auto quota = Aws::MakeShared(ALLOCATION_TAG, 14, 5, RetryCostClassification::THROTTLE_BASED); - StandardRetryStrategy strategy(quota, 10, 0.05); + auto quota = Aws::MakeShared(ALLOCATION_TAG); + StandardRetryStrategy strategy(quota, 10); // Header value 1500ms, at i=0 t_i is in [0, 50ms] // clamped to max(t_i, min(1.5, 5 + t_i)) = 1.5s = 1500ms @@ -349,8 +384,8 @@ TEST_F(RetryBehaviorTest, RetryAfterHeaderHonored) // SEP Test 18: retry-after floor clamped (value 0 clamped up to t_i) TEST_F(RetryBehaviorTest, RetryAfterFloorClamped) { - auto quota = Aws::MakeShared(ALLOCATION_TAG, 14, 5, RetryCostClassification::THROTTLE_BASED); - StandardRetryStrategy strategy(quota, 10, 0.05); + auto quota = Aws::MakeShared(ALLOCATION_TAG); + StandardRetryStrategy strategy(quota, 10); auto error = MakeTransientErrorWithRetryAfter("0"); // Header is 0ms, gets clamped up to t_i @@ -363,8 +398,8 @@ TEST_F(RetryBehaviorTest, RetryAfterFloorClamped) // SEP Test 19: retry-after ceiling clamped (value 10000ms clamped to 5+t_i) TEST_F(RetryBehaviorTest, RetryAfterCeilingClamped) { - auto quota = Aws::MakeShared(ALLOCATION_TAG, 14, 5, RetryCostClassification::THROTTLE_BASED); - StandardRetryStrategy strategy(quota, 10, 0.05); + auto quota = Aws::MakeShared(ALLOCATION_TAG); + StandardRetryStrategy strategy(quota, 10); auto error = MakeTransientErrorWithRetryAfter("10000"); // Header is 10000ms = 10s, exceeds 5 + t_i (max ~5.05s at i=0) @@ -378,8 +413,8 @@ TEST_F(RetryBehaviorTest, RetryAfterCeilingClamped) // SEP Test 20: Invalid retry-after falls back to exponential backoff TEST_F(RetryBehaviorTest, InvalidRetryAfterFallsBack) { - auto quota = Aws::MakeShared(ALLOCATION_TAG, 14, 5, RetryCostClassification::THROTTLE_BASED); - StandardRetryStrategy strategy(quota, 10, 0.05); + auto quota = Aws::MakeShared(ALLOCATION_TAG); + StandardRetryStrategy strategy(quota, 10); // "abc" parses to 0 via atoll, which gets clamped to t_i auto error = MakeTransientErrorWithRetryAfter("abc"); @@ -411,8 +446,8 @@ TEST_F(RetryBehaviorTest, LegacyBehaviorUnchanged) // Verify throttle-based classification: throttling costs 5, transient costs 14 TEST_F(RetryBehaviorTest, ThrottleBasedClassification) { - auto quota = Aws::MakeShared(ALLOCATION_TAG, 14, 5, RetryCostClassification::THROTTLE_BASED); - StandardRetryStrategy strategy(quota, 10, 0.05); + auto quota = Aws::MakeShared(ALLOCATION_TAG); + StandardRetryStrategy strategy(quota, 10); // Transient costs 14 ASSERT_TRUE(strategy.ShouldRetry(MakeTransientError(), 0)); @@ -443,8 +478,8 @@ TEST_F(RetryBehaviorTest, LegacyClassification) // Non-retryable errors are not retried regardless of gate TEST_F(RetryBehaviorTest, NonRetryableNotRetried) { - auto quota = Aws::MakeShared(ALLOCATION_TAG, 14, 5, RetryCostClassification::THROTTLE_BASED); - StandardRetryStrategy strategy(quota, 10, 0.05); + auto quota = Aws::MakeShared(ALLOCATION_TAG); + StandardRetryStrategy strategy(quota, 10); ASSERT_FALSE(strategy.ShouldRetry(MakeNonRetryableError(), 0)); ASSERT_EQ(500, quota->GetRetryQuota()); @@ -453,8 +488,8 @@ TEST_F(RetryBehaviorTest, NonRetryableNotRetried) // Verify RequestBookkeeping releases correct tokens on success TEST_F(RetryBehaviorTest, RequestBookkeepingReleasesTokens) { - auto quota = Aws::MakeShared(ALLOCATION_TAG, 14, 5, RetryCostClassification::THROTTLE_BASED); - StandardRetryStrategy strategy(quota, 10, 0.05); + auto quota = Aws::MakeShared(ALLOCATION_TAG); + StandardRetryStrategy strategy(quota, 10); std::shared_ptr httpRequest = CreateHttpRequest(URI("http://www.uri.com"), HttpMethod::HTTP_GET, Aws::Utils::Stream::DefaultResponseStreamFactoryMethod); std::shared_ptr httpResponse = Aws::MakeShared(ALLOCATION_TAG, httpRequest); From 72469575455dcd9164b2fad4d5e34b87f09fdb09 Mon Sep 17 00:00:00 2001 From: kai lin Date: Wed, 3 Jun 2026 10:54:37 -0400 Subject: [PATCH 04/11] added a new function to support dynamodb transient backoff --- .../include/aws/core/client/RetryStrategy.h | 1 + .../source/client/RetryStrategy.cpp | 11 +++- .../aws/client/RetryBehaviorTest.cpp | 53 ++++--------------- 3 files changed, 22 insertions(+), 43 deletions(-) diff --git a/src/aws-cpp-sdk-core/include/aws/core/client/RetryStrategy.h b/src/aws-cpp-sdk-core/include/aws/core/client/RetryStrategy.h index 5e1c24af53ab..eca0e9ba711b 100644 --- a/src/aws-cpp-sdk-core/include/aws/core/client/RetryStrategy.h +++ b/src/aws-cpp-sdk-core/include/aws/core/client/RetryStrategy.h @@ -124,6 +124,7 @@ namespace Aws public: StandardRetryStrategy(long maxAttempts = 3); StandardRetryStrategy(std::shared_ptr retryQuotaContainer, long maxAttempts = 3); + StandardRetryStrategy(std::shared_ptr retryQuotaContainer, long maxAttempts, double transientBackoffBaseSec); virtual ~StandardRetryStrategy(); virtual void RequestBookkeeping(const HttpResponseOutcome& httpResponseOutcome) override; diff --git a/src/aws-cpp-sdk-core/source/client/RetryStrategy.cpp b/src/aws-cpp-sdk-core/source/client/RetryStrategy.cpp index 142c17ea8bb4..83449812bb80 100644 --- a/src/aws-cpp-sdk-core/source/client/RetryStrategy.cpp +++ b/src/aws-cpp-sdk-core/source/client/RetryStrategy.cpp @@ -23,6 +23,7 @@ namespace Aws struct StandardRetryStrategy::RetryImpl { bool newRetriesEnabled = false; + double transientBackoffBaseSec = 0.05; long CalculateDelay(const AWSError& error, long attemptedRetries) const { @@ -33,7 +34,7 @@ namespace Aws return std::min(static_cast(Aws::Utils::GetRandomValue() % 1000) * (1 << std::min(attemptedRetries, 15L)), 20000); } - double x = error.ShouldThrottle() ? 1.0 : 0.05; + double x = error.ShouldThrottle() ? 1.0 : transientBackoffBaseSec; double exponentialPart = x * static_cast(1L << std::min(attemptedRetries, 30L)); double cappedPart = std::min(exponentialPart, 20.0); @@ -64,6 +65,14 @@ namespace Aws m_impl->newRetriesEnabled = true; } + StandardRetryStrategy::StandardRetryStrategy(std::shared_ptr retryQuotaContainer, long maxAttempts, double transientBackoffBaseSec) + : m_retryQuotaContainer(retryQuotaContainer), m_maxAttempts(maxAttempts), + m_impl(Aws::MakeUnique("StandardRetryStrategy")) + { + m_impl->newRetriesEnabled = true; + m_impl->transientBackoffBaseSec = transientBackoffBaseSec; + } + StandardRetryStrategy::~StandardRetryStrategy() = default; void StandardRetryStrategy::RequestBookkeeping(const HttpResponseOutcome& httpResponseOutcome) diff --git a/tests/aws-cpp-sdk-core-tests/aws/client/RetryBehaviorTest.cpp b/tests/aws-cpp-sdk-core-tests/aws/client/RetryBehaviorTest.cpp index 435f8e34d99d..ff8200d4a67b 100644 --- a/tests/aws-cpp-sdk-core-tests/aws/client/RetryBehaviorTest.cpp +++ b/tests/aws-cpp-sdk-core-tests/aws/client/RetryBehaviorTest.cpp @@ -266,24 +266,24 @@ TEST_F(RetryBehaviorTest, ThrottlingCostsAndBackoff) ASSERT_LE(delay, 2000); } -// SEP Test 11: DynamoDB tuning (maxAttempts=4, 25ms base deferred to follow-up) +// SEP Test 11: DynamoDB tuning (25ms base, 4 max attempts) TEST_F(RetryBehaviorTest, DynamoDBTuning) { auto quota = Aws::MakeShared(ALLOCATION_TAG); - StandardRetryStrategy strategy(quota, 4); + StandardRetryStrategy strategy(quota, 4, 0.025); ASSERT_EQ(4, strategy.GetMaxAttempts()); auto error = MakeTransientError(); - // i=0: [0, 50ms] (25ms base deferred) + // i=0: [0, 25ms] long delay = strategy.CalculateDelayBeforeNextRetry(error, 0); ASSERT_GE(delay, 0); - ASSERT_LE(delay, 50); + ASSERT_LE(delay, 25); - // i=1: [0, 100ms] + // i=1: [0, 50ms] delay = strategy.CalculateDelayBeforeNextRetry(error, 1); ASSERT_GE(delay, 0); - ASSERT_LE(delay, 100); + ASSERT_LE(delay, 50); } // SEP Test 12: Long-polling transient + empty quota (backoff applied) @@ -328,43 +328,12 @@ TEST_F(RetryBehaviorTest, LongPollingThrottlingEmptyQuota) ASSERT_LE(delay, 1000); } -// SEP Test 14: Long-polling max attempts exceeded (no delay) -TEST_F(RetryBehaviorTest, LongPollingMaxAttemptsExceeded) -{ - auto quota = Aws::MakeShared(ALLOCATION_TAG); - StandardRetryStrategy strategy(quota, 3); - - // At retries=2, max attempts (3) is reached - ASSERT_FALSE(strategy.ShouldRetry(MakeTransientError(), 2)); - // Pipeline would NOT apply long-polling backoff because retries+1 >= maxAttempts -} - -// SEP Test 15: Long-polling success (no delay) -TEST_F(RetryBehaviorTest, LongPollingSuccess) -{ - auto quota = Aws::MakeShared(ALLOCATION_TAG); - StandardRetryStrategy strategy(quota, 10); +// TODO: SEP Tests 14, 15, 16 require integration tests that exercise the full pipeline +// with a mock HTTP client to verify the pipeline does NOT sleep in these scenarios: +// - Test 14: Long-polling max attempts exceeded (no delay before returning) +// - Test 15: Long-polling success (no delay before returning) +// - Test 16: Long-polling non-retryable error (no delay before returning) - std::shared_ptr httpRequest = CreateHttpRequest(URI("http://www.uri.com"), HttpMethod::HTTP_GET, Aws::Utils::Stream::DefaultResponseStreamFactoryMethod); - std::shared_ptr httpResponse = Aws::MakeShared(ALLOCATION_TAG, httpRequest); - HttpResponseOutcome httpResponseOutcome(httpResponse); - - // Successful request, no retry needed, no delay - strategy.RequestBookkeeping(httpResponseOutcome); - ASSERT_EQ(500, quota->GetRetryQuota()); -} - -// SEP Test 16: Long-polling non-retryable error (no delay) -TEST_F(RetryBehaviorTest, LongPollingNonRetryableError) -{ - auto quota = Aws::MakeShared(ALLOCATION_TAG); - StandardRetryStrategy strategy(quota, 10); - - // Non-retryable error: ShouldRetry returns false - ASSERT_FALSE(strategy.ShouldRetry(MakeNonRetryableError(), 0)); - // Pipeline would NOT apply long-polling backoff because error.ShouldRetry() is false - ASSERT_EQ(500, quota->GetRetryQuota()); -} // SEP Test 17: retry-after header honored TEST_F(RetryBehaviorTest, RetryAfterHeaderHonored) From adcbc4fb6b9ac54cea388ab27cebf17ff5a1170b Mon Sep 17 00:00:00 2001 From: kai lin Date: Wed, 3 Jun 2026 11:11:04 -0400 Subject: [PATCH 05/11] added a new function to support dynamodb transient backoff --- .../include/aws/core/client/RetryStrategy.h | 1 - .../source/client/RetryStrategy.cpp | 11 +--------- .../aws/client/RetryBehaviorTest.cpp | 22 +++---------------- 3 files changed, 4 insertions(+), 30 deletions(-) diff --git a/src/aws-cpp-sdk-core/include/aws/core/client/RetryStrategy.h b/src/aws-cpp-sdk-core/include/aws/core/client/RetryStrategy.h index eca0e9ba711b..5e1c24af53ab 100644 --- a/src/aws-cpp-sdk-core/include/aws/core/client/RetryStrategy.h +++ b/src/aws-cpp-sdk-core/include/aws/core/client/RetryStrategy.h @@ -124,7 +124,6 @@ namespace Aws public: StandardRetryStrategy(long maxAttempts = 3); StandardRetryStrategy(std::shared_ptr retryQuotaContainer, long maxAttempts = 3); - StandardRetryStrategy(std::shared_ptr retryQuotaContainer, long maxAttempts, double transientBackoffBaseSec); virtual ~StandardRetryStrategy(); virtual void RequestBookkeeping(const HttpResponseOutcome& httpResponseOutcome) override; diff --git a/src/aws-cpp-sdk-core/source/client/RetryStrategy.cpp b/src/aws-cpp-sdk-core/source/client/RetryStrategy.cpp index 83449812bb80..142c17ea8bb4 100644 --- a/src/aws-cpp-sdk-core/source/client/RetryStrategy.cpp +++ b/src/aws-cpp-sdk-core/source/client/RetryStrategy.cpp @@ -23,7 +23,6 @@ namespace Aws struct StandardRetryStrategy::RetryImpl { bool newRetriesEnabled = false; - double transientBackoffBaseSec = 0.05; long CalculateDelay(const AWSError& error, long attemptedRetries) const { @@ -34,7 +33,7 @@ namespace Aws return std::min(static_cast(Aws::Utils::GetRandomValue() % 1000) * (1 << std::min(attemptedRetries, 15L)), 20000); } - double x = error.ShouldThrottle() ? 1.0 : transientBackoffBaseSec; + double x = error.ShouldThrottle() ? 1.0 : 0.05; double exponentialPart = x * static_cast(1L << std::min(attemptedRetries, 30L)); double cappedPart = std::min(exponentialPart, 20.0); @@ -65,14 +64,6 @@ namespace Aws m_impl->newRetriesEnabled = true; } - StandardRetryStrategy::StandardRetryStrategy(std::shared_ptr retryQuotaContainer, long maxAttempts, double transientBackoffBaseSec) - : m_retryQuotaContainer(retryQuotaContainer), m_maxAttempts(maxAttempts), - m_impl(Aws::MakeUnique("StandardRetryStrategy")) - { - m_impl->newRetriesEnabled = true; - m_impl->transientBackoffBaseSec = transientBackoffBaseSec; - } - StandardRetryStrategy::~StandardRetryStrategy() = default; void StandardRetryStrategy::RequestBookkeeping(const HttpResponseOutcome& httpResponseOutcome) diff --git a/tests/aws-cpp-sdk-core-tests/aws/client/RetryBehaviorTest.cpp b/tests/aws-cpp-sdk-core-tests/aws/client/RetryBehaviorTest.cpp index ff8200d4a67b..b7eb764c599f 100644 --- a/tests/aws-cpp-sdk-core-tests/aws/client/RetryBehaviorTest.cpp +++ b/tests/aws-cpp-sdk-core-tests/aws/client/RetryBehaviorTest.cpp @@ -266,25 +266,9 @@ TEST_F(RetryBehaviorTest, ThrottlingCostsAndBackoff) ASSERT_LE(delay, 2000); } -// SEP Test 11: DynamoDB tuning (25ms base, 4 max attempts) -TEST_F(RetryBehaviorTest, DynamoDBTuning) -{ - auto quota = Aws::MakeShared(ALLOCATION_TAG); - StandardRetryStrategy strategy(quota, 4, 0.025); - - ASSERT_EQ(4, strategy.GetMaxAttempts()); - - auto error = MakeTransientError(); - // i=0: [0, 25ms] - long delay = strategy.CalculateDelayBeforeNextRetry(error, 0); - ASSERT_GE(delay, 0); - ASSERT_LE(delay, 25); - - // i=1: [0, 50ms] - delay = strategy.CalculateDelayBeforeNextRetry(error, 1); - ASSERT_GE(delay, 0); - ASSERT_LE(delay, 50); -} +// TODO: SEP Test 11: DynamoDB tuning (25ms base, 4 max attempts) +// Deferred to next PR — requires 3-arg constructor or service-specific config +// to pass transientBackoffBaseSec=0.025 for DynamoDB/DynamoDB Streams. // SEP Test 12: Long-polling transient + empty quota (backoff applied) TEST_F(RetryBehaviorTest, LongPollingTransientEmptyQuota) From 3aa265fe5b2cbca49e8d0d1933b0f2346081a4d6 Mon Sep 17 00:00:00 2001 From: kai lin Date: Wed, 3 Jun 2026 16:39:28 -0400 Subject: [PATCH 06/11] changed from feature bool to interface --- .../include/aws/core/client/RetryStrategy.h | 3 +- .../aws/core/internal/RetryStrategyImpl.h | 71 +++++++++++++++++++ .../source/client/ClientConfiguration.cpp | 70 +++--------------- .../source/client/RetryStrategy.cpp | 61 +++++++++++----- 4 files changed, 126 insertions(+), 79 deletions(-) create mode 100644 src/aws-cpp-sdk-core/include/aws/core/internal/RetryStrategyImpl.h diff --git a/src/aws-cpp-sdk-core/include/aws/core/client/RetryStrategy.h b/src/aws-cpp-sdk-core/include/aws/core/client/RetryStrategy.h index 5e1c24af53ab..32f8cb24f2cc 100644 --- a/src/aws-cpp-sdk-core/include/aws/core/client/RetryStrategy.h +++ b/src/aws-cpp-sdk-core/include/aws/core/client/RetryStrategy.h @@ -137,12 +137,13 @@ namespace Aws const char* GetStrategyName() const override { return "standard";} + struct RetryImpl; + protected: std::shared_ptr m_retryQuotaContainer; long m_maxAttempts; private: - struct RetryImpl; Aws::UniquePtr m_impl; }; } // namespace Client diff --git a/src/aws-cpp-sdk-core/include/aws/core/internal/RetryStrategyImpl.h b/src/aws-cpp-sdk-core/include/aws/core/internal/RetryStrategyImpl.h new file mode 100644 index 000000000000..c19278211d8a --- /dev/null +++ b/src/aws-cpp-sdk-core/include/aws/core/internal/RetryStrategyImpl.h @@ -0,0 +1,71 @@ +/** + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0. + */ + +#pragma once + +#include +#include +#include +#include +#include + +namespace Aws +{ + namespace Client + { + static const int THROTTLE_BASED_RETRY_COST = 14; + static const int THROTTLE_BASED_THROTTLING_COST = 5; + static const int THROTTLE_BASED_INITIAL_TOKENS = 500; + + class AWS_CORE_LOCAL ThrottleBasedRetryQuotaContainer : public RetryQuotaContainer + { + public: + ThrottleBasedRetryQuotaContainer(int retryCost = THROTTLE_BASED_RETRY_COST, int throttlingRetryCost = THROTTLE_BASED_THROTTLING_COST) + : m_retryQuota(THROTTLE_BASED_INITIAL_TOKENS), m_retryCost(retryCost), m_throttlingRetryCost(throttlingRetryCost) {} + + virtual ~ThrottleBasedRetryQuotaContainer() = default; + + bool AcquireRetryQuota(int capacityAmount) override + { + Aws::Utils::Threading::WriterLockGuard guard(m_retryQuotaLock); + if (capacityAmount > m_retryQuota) + { + return false; + } + else + { + m_retryQuota -= capacityAmount; + return true; + } + } + + bool AcquireRetryQuota(const AWSError& error) override + { + int capacityAmount = error.ShouldThrottle() ? m_throttlingRetryCost : m_retryCost; + return AcquireRetryQuota(capacityAmount); + } + + void ReleaseRetryQuota(int capacityAmount) override + { + Aws::Utils::Threading::WriterLockGuard guard(m_retryQuotaLock); + m_retryQuota = (std::min)(m_retryQuota + capacityAmount, THROTTLE_BASED_INITIAL_TOKENS); + } + + void ReleaseRetryQuota(const AWSError& error) override + { + int capacityAmount = error.ShouldThrottle() ? m_throttlingRetryCost : m_retryCost; + ReleaseRetryQuota(capacityAmount); + } + + int GetRetryQuota() const override { return m_retryQuota; } + + private: + mutable Aws::Utils::Threading::ReaderWriterLock m_retryQuotaLock; + int m_retryQuota; + int m_retryCost; + int m_throttlingRetryCost; + }; + } // namespace Client +} // namespace Aws diff --git a/src/aws-cpp-sdk-core/source/client/ClientConfiguration.cpp b/src/aws-cpp-sdk-core/source/client/ClientConfiguration.cpp index 17f0becf0c50..497368b58035 100644 --- a/src/aws-cpp-sdk-core/source/client/ClientConfiguration.cpp +++ b/src/aws-cpp-sdk-core/source/client/ClientConfiguration.cpp @@ -543,52 +543,7 @@ ClientConfiguration::ClientConfiguration(bool /*useSmartDefaults*/, const char* Aws::Config::Defaults::SetSmartDefaultsConfigurationParameters(*this, defaultMode, hasEc2MetadataRegion, ec2MetadataRegion); } -namespace { - class ThrottleBasedRetryQuotaContainer : public RetryQuotaContainer - { - public: - ThrottleBasedRetryQuotaContainer(int retryCost = 14, int throttlingRetryCost = 5) - : m_retryQuota(500), m_retryCost(retryCost), m_throttlingRetryCost(throttlingRetryCost) {} - - bool AcquireRetryQuota(int capacityAmount) override - { - Aws::Utils::Threading::WriterLockGuard guard(m_retryQuotaLock); - if (capacityAmount > m_retryQuota) return false; - m_retryQuota -= capacityAmount; - return true; - } - - bool AcquireRetryQuota(const AWSError& error) override - { - int capacityAmount = error.ShouldThrottle() ? m_throttlingRetryCost : m_retryCost; - return AcquireRetryQuota(capacityAmount); - } - - void ReleaseRetryQuota(int capacityAmount) override - { - Aws::Utils::Threading::WriterLockGuard guard(m_retryQuotaLock); - m_retryQuota = (std::min)(m_retryQuota + capacityAmount, 500); - } - - void ReleaseRetryQuota(const AWSError& error) override - { - int capacityAmount = error.ShouldThrottle() ? m_throttlingRetryCost : m_retryCost; - ReleaseRetryQuota(capacityAmount); - } - - int GetRetryQuota() const override { return m_retryQuota; } - - private: - mutable Aws::Utils::Threading::ReaderWriterLock m_retryQuotaLock; - int m_retryQuota; - int m_retryCost; - int m_throttlingRetryCost; - }; -} // anonymous namespace - std::shared_ptr InitRetryStrategy(int maxAttempts, Aws::String retryMode) { - const bool newRetriesEnabled = Aws::Environment::GetEnv("AWS_NEW_RETRIES_2026") == "true"; - if (retryMode.empty()) { retryMode = Aws::Environment::GetEnv("AWS_RETRY_MODE"); @@ -597,7 +552,7 @@ std::shared_ptr InitRetryStrategy(int maxAttempts, Aws::String re { retryMode = Aws::Config::GetCachedConfigValue("retry_mode"); } - if (newRetriesEnabled && retryMode.empty()) + if (Aws::Environment::GetEnv("AWS_NEW_RETRIES_2026") == "true" && retryMode.empty()) { retryMode = "standard"; } @@ -605,28 +560,26 @@ std::shared_ptr InitRetryStrategy(int maxAttempts, Aws::String re std::shared_ptr retryStrategy; if (retryMode == "standard") { - long attempts = (maxAttempts < 0) ? 3 : maxAttempts; - if (newRetriesEnabled) + if (maxAttempts < 0) { - auto quota = Aws::MakeShared(CLIENT_CONFIG_TAG); - retryStrategy = Aws::MakeShared(CLIENT_CONFIG_TAG, quota, attempts); + // negative value set above force usage of default max attempts + retryStrategy = Aws::MakeShared(CLIENT_CONFIG_TAG); } else { - retryStrategy = Aws::MakeShared(CLIENT_CONFIG_TAG, attempts); + retryStrategy = Aws::MakeShared(CLIENT_CONFIG_TAG, maxAttempts); } } else if (retryMode == "adaptive") { - long attempts = (maxAttempts < 0) ? 3 : maxAttempts; - if (newRetriesEnabled) + if (maxAttempts < 0) { - auto quota = Aws::MakeShared(CLIENT_CONFIG_TAG); - retryStrategy = Aws::MakeShared(CLIENT_CONFIG_TAG, quota, attempts); + // negative value set above force usage of default max attempts + retryStrategy = Aws::MakeShared(CLIENT_CONFIG_TAG); } else { - retryStrategy = Aws::MakeShared(CLIENT_CONFIG_TAG, attempts); + retryStrategy = Aws::MakeShared(CLIENT_CONFIG_TAG, maxAttempts); } } else @@ -634,11 +587,6 @@ std::shared_ptr InitRetryStrategy(int maxAttempts, Aws::String re retryStrategy = Aws::MakeShared(CLIENT_CONFIG_TAG); } - if (newRetriesEnabled) - { - AWS_LOGSTREAM_INFO(CLIENT_CONFIG_TAG, "Retry Behavior 2.1 active (AWS_NEW_RETRIES_2026=true), mode=" << retryMode); - } - return retryStrategy; } diff --git a/src/aws-cpp-sdk-core/source/client/RetryStrategy.cpp b/src/aws-cpp-sdk-core/source/client/RetryStrategy.cpp index 142c17ea8bb4..b5100c5ef2f9 100644 --- a/src/aws-cpp-sdk-core/source/client/RetryStrategy.cpp +++ b/src/aws-cpp-sdk-core/source/client/RetryStrategy.cpp @@ -6,6 +6,8 @@ #include #include #include +#include +#include #include #include #include @@ -22,20 +24,28 @@ namespace Aws struct StandardRetryStrategy::RetryImpl { - bool newRetriesEnabled = false; + virtual ~RetryImpl() = default; + virtual long CalculateDelay(const AWSError& error, long attemptedRetries) const = 0; + }; - long CalculateDelay(const AWSError& error, long attemptedRetries) const + namespace { + struct LegacyRetryImpl : StandardRetryStrategy::RetryImpl + { + long CalculateDelay(const AWSError& error, long attemptedRetries) const override { - if (!newRetriesEnabled) - { - AWS_UNREFERENCED_PARAM(error); - // Maximum left shift factor is capped by ceil(log2(max_delay)), to avoid wrap-around and overflow into negative values: - return std::min(static_cast(Aws::Utils::GetRandomValue() % 1000) * (1 << std::min(attemptedRetries, 15L)), 20000); - } + AWS_UNREFERENCED_PARAM(error); + // Maximum left shift factor is capped by ceil(log2(max_delay)), to avoid wrap-around and overflow into negative values: + return std::min(static_cast(Aws::Utils::GetRandomValue() % 1000) * (1 << std::min(attemptedRetries, 15L)), 20000); + } + }; + struct NewRetriesImpl : StandardRetryStrategy::RetryImpl + { + long CalculateDelay(const AWSError& error, long attemptedRetries) const override + { double x = error.ShouldThrottle() ? 1.0 : 0.05; - double exponentialPart = x * static_cast(1L << std::min(attemptedRetries, 30L)); - double cappedPart = std::min(exponentialPart, 20.0); + double exponentialPart = x * static_cast(1L << (std::min)(attemptedRetries, 30L)); + double cappedPart = (std::min)(exponentialPart, 20.0); double b = static_cast(Aws::Utils::GetRandomValue() % 10000) / 10000.0; double t_i = b * cappedPart; @@ -45,24 +55,40 @@ namespace Aws if (it != headers.end()) { double headerSec = static_cast(Aws::Utils::StringUtils::ConvertToInt64(it->second.c_str())) / 1000.0; - double clamped = std::max(t_i, std::min(headerSec, 5.0 + t_i)); + double clamped = (std::max)(t_i, (std::min)(headerSec, 5.0 + t_i)); return static_cast(clamped * 1000.0); } return static_cast(t_i * 1000.0); } }; + } // anonymous namespace + + static Aws::UniquePtr CreateRetryImpl() + { + if (Aws::Environment::GetEnv("AWS_NEW_RETRIES_2026") == "true") + { + return Aws::MakeUnique("StandardRetryStrategy"); + } + return Aws::MakeUnique("StandardRetryStrategy"); + } + + static std::shared_ptr CreateQuotaContainer() + { + if (Aws::Environment::GetEnv("AWS_NEW_RETRIES_2026") == "true") + { + return Aws::MakeShared("StandardRetryStrategy"); + } + return Aws::MakeShared("StandardRetryStrategy"); + } StandardRetryStrategy::StandardRetryStrategy(long maxAttempts) - : m_retryQuotaContainer(Aws::MakeShared("StandardRetryStrategy")), m_maxAttempts(maxAttempts), - m_impl(Aws::MakeUnique("StandardRetryStrategy")) {} + : m_retryQuotaContainer(CreateQuotaContainer()), m_maxAttempts(maxAttempts), + m_impl(CreateRetryImpl()) {} StandardRetryStrategy::StandardRetryStrategy(std::shared_ptr retryQuotaContainer, long maxAttempts) : m_retryQuotaContainer(retryQuotaContainer), m_maxAttempts(maxAttempts), - m_impl(Aws::MakeUnique("StandardRetryStrategy")) - { - m_impl->newRetriesEnabled = true; - } + m_impl(CreateRetryImpl()) {} StandardRetryStrategy::~StandardRetryStrategy() = default; @@ -133,5 +159,6 @@ namespace Aws int capacityAmount = error.GetErrorType() == CoreErrors::REQUEST_TIMEOUT ? TIMEOUT_RETRY_COST : RETRY_COST; ReleaseRetryQuota(capacityAmount); } + } } From a8ced33881a183cd8231e4ae03f5c2f96d39e96c Mon Sep 17 00:00:00 2001 From: kai lin Date: Wed, 3 Jun 2026 17:07:36 -0400 Subject: [PATCH 07/11] update test to use environment var --- .../aws/client/RetryBehaviorTest.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tests/aws-cpp-sdk-core-tests/aws/client/RetryBehaviorTest.cpp b/tests/aws-cpp-sdk-core-tests/aws/client/RetryBehaviorTest.cpp index b7eb764c599f..264be122c0d4 100644 --- a/tests/aws-cpp-sdk-core-tests/aws/client/RetryBehaviorTest.cpp +++ b/tests/aws-cpp-sdk-core-tests/aws/client/RetryBehaviorTest.cpp @@ -4,6 +4,7 @@ */ #include +#include #include #include #include @@ -55,6 +56,7 @@ class TestThrottleBasedQuotaContainer : public RetryQuotaContainer class RetryBehaviorTest : public Aws::Testing::AwsCppSdkGTestSuite { + Aws::Environment::EnvironmentRAII m_env{{{"AWS_NEW_RETRIES_2026", "true"}}}; }; static AWSError MakeTransientError() @@ -376,8 +378,13 @@ TEST_F(RetryBehaviorTest, InvalidRetryAfterFallsBack) ASSERT_LE(delay, 50); } +class RetryBehaviorLegacyTest : public Aws::Testing::AwsCppSdkGTestSuite +{ + Aws::Environment::EnvironmentRAII m_env{{{"AWS_NEW_RETRIES_2026", ""}}}; +}; + // Verify legacy behavior unchanged when gate is off (old constructors) -TEST_F(RetryBehaviorTest, LegacyBehaviorUnchanged) +TEST_F(RetryBehaviorLegacyTest, LegacyBehaviorUnchanged) { StandardRetryStrategy strategy(3); @@ -412,7 +419,7 @@ TEST_F(RetryBehaviorTest, ThrottleBasedClassification) } // Verify legacy classification: REQUEST_TIMEOUT costs 10, others cost 5 -TEST_F(RetryBehaviorTest, LegacyClassification) +TEST_F(RetryBehaviorLegacyTest, LegacyClassification) { DefaultRetryQuotaContainer quota; From d960d2a4b79f0b8e62082e0830a5e3835dc52c9a Mon Sep 17 00:00:00 2001 From: kai lin Date: Wed, 3 Jun 2026 17:09:28 -0400 Subject: [PATCH 08/11] white space change --- src/aws-cpp-sdk-core/source/client/RetryStrategy.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/aws-cpp-sdk-core/source/client/RetryStrategy.cpp b/src/aws-cpp-sdk-core/source/client/RetryStrategy.cpp index b5100c5ef2f9..4569175de5f4 100644 --- a/src/aws-cpp-sdk-core/source/client/RetryStrategy.cpp +++ b/src/aws-cpp-sdk-core/source/client/RetryStrategy.cpp @@ -159,6 +159,5 @@ namespace Aws int capacityAmount = error.GetErrorType() == CoreErrors::REQUEST_TIMEOUT ? TIMEOUT_RETRY_COST : RETRY_COST; ReleaseRetryQuota(capacityAmount); } - } } From 3ca51666fee31c38f5b6e34a14660bba48cd012f Mon Sep 17 00:00:00 2001 From: kai lin Date: Thu, 4 Jun 2026 11:03:32 -0400 Subject: [PATCH 09/11] adding logging and dry --- .../source/client/ClientConfiguration.cpp | 2 +- .../source/client/RetryStrategy.cpp | 19 ++++++++++++++++--- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/src/aws-cpp-sdk-core/source/client/ClientConfiguration.cpp b/src/aws-cpp-sdk-core/source/client/ClientConfiguration.cpp index 497368b58035..de8ac00c9e78 100644 --- a/src/aws-cpp-sdk-core/source/client/ClientConfiguration.cpp +++ b/src/aws-cpp-sdk-core/source/client/ClientConfiguration.cpp @@ -552,7 +552,7 @@ std::shared_ptr InitRetryStrategy(int maxAttempts, Aws::String re { retryMode = Aws::Config::GetCachedConfigValue("retry_mode"); } - if (Aws::Environment::GetEnv("AWS_NEW_RETRIES_2026") == "true" && retryMode.empty()) + if (Aws::Utils::StringUtils::ToLower(Aws::Environment::GetEnv("AWS_NEW_RETRIES_2026").c_str()) == "true" && retryMode.empty()) { retryMode = "standard"; } diff --git a/src/aws-cpp-sdk-core/source/client/RetryStrategy.cpp b/src/aws-cpp-sdk-core/source/client/RetryStrategy.cpp index 4569175de5f4..a5800bc357c5 100644 --- a/src/aws-cpp-sdk-core/source/client/RetryStrategy.cpp +++ b/src/aws-cpp-sdk-core/source/client/RetryStrategy.cpp @@ -11,9 +11,12 @@ #include #include #include +#include using namespace Aws::Utils::Threading; +static const char RETRY_STRATEGY_TAG[] = "StandardRetryStrategy"; + namespace Aws { namespace Client @@ -22,6 +25,11 @@ namespace Aws static const int RETRY_COST = 5; static const int TIMEOUT_RETRY_COST = 10; + static bool IsNewRetriesEnabled() + { + return Aws::Utils::StringUtils::ToLower(Aws::Environment::GetEnv("AWS_NEW_RETRIES_2026").c_str()) == "true"; + } + struct StandardRetryStrategy::RetryImpl { virtual ~RetryImpl() = default; @@ -54,7 +62,12 @@ namespace Aws auto it = headers.find("x-amz-retry-after"); if (it != headers.end()) { - double headerSec = static_cast(Aws::Utils::StringUtils::ConvertToInt64(it->second.c_str())) / 1000.0; + long long headerMs = Aws::Utils::StringUtils::ConvertToInt64(it->second.c_str()); + if (headerMs < 0) + { + AWS_LOGSTREAM_DEBUG(RETRY_STRATEGY_TAG, "Ignoring invalid x-amz-retry-after value: " << it->second); + } + double headerSec = static_cast(headerMs) / 1000.0; double clamped = (std::max)(t_i, (std::min)(headerSec, 5.0 + t_i)); return static_cast(clamped * 1000.0); } @@ -66,7 +79,7 @@ namespace Aws static Aws::UniquePtr CreateRetryImpl() { - if (Aws::Environment::GetEnv("AWS_NEW_RETRIES_2026") == "true") + if (IsNewRetriesEnabled()) { return Aws::MakeUnique("StandardRetryStrategy"); } @@ -75,7 +88,7 @@ namespace Aws static std::shared_ptr CreateQuotaContainer() { - if (Aws::Environment::GetEnv("AWS_NEW_RETRIES_2026") == "true") + if (IsNewRetriesEnabled()) { return Aws::MakeShared("StandardRetryStrategy"); } From dee37e6713a6d815b45247d807cdea5377e60d5b Mon Sep 17 00:00:00 2001 From: kai lin Date: Thu, 4 Jun 2026 14:28:16 -0400 Subject: [PATCH 10/11] updated testing and namespace --- .../source/client/RetryStrategy.cpp | 117 +++-- .../aws/client/RetryBehaviorTest.cpp | 479 ------------------ .../aws/client/RetryStrategyTest.cpp | 159 ++++++ 3 files changed, 222 insertions(+), 533 deletions(-) delete mode 100644 tests/aws-cpp-sdk-core-tests/aws/client/RetryBehaviorTest.cpp diff --git a/src/aws-cpp-sdk-core/source/client/RetryStrategy.cpp b/src/aws-cpp-sdk-core/source/client/RetryStrategy.cpp index a5800bc357c5..da23fab25be1 100644 --- a/src/aws-cpp-sdk-core/source/client/RetryStrategy.cpp +++ b/src/aws-cpp-sdk-core/source/client/RetryStrategy.cpp @@ -14,87 +14,96 @@ #include using namespace Aws::Utils::Threading; +using namespace Aws::Client; static const char RETRY_STRATEGY_TAG[] = "StandardRetryStrategy"; +static const int INITIAL_RETRY_TOKENS = 500; +static const int RETRY_COST = 5; +static const int TIMEOUT_RETRY_COST = 10; namespace Aws { namespace Client { - static const int INITIAL_RETRY_TOKENS = 500; - static const int RETRY_COST = 5; - static const int TIMEOUT_RETRY_COST = 10; - - static bool IsNewRetriesEnabled() - { - return Aws::Utils::StringUtils::ToLower(Aws::Environment::GetEnv("AWS_NEW_RETRIES_2026").c_str()) == "true"; - } - - struct StandardRetryStrategy::RetryImpl + class StandardRetryStrategy::RetryImpl { + public: virtual ~RetryImpl() = default; virtual long CalculateDelay(const AWSError& error, long attemptedRetries) const = 0; }; + } +} + +namespace { + bool IsNewRetriesEnabled() + { + return Aws::Utils::StringUtils::ToLower(Aws::Environment::GetEnv("AWS_NEW_RETRIES_2026").c_str()) == "true"; + } - namespace { - struct LegacyRetryImpl : StandardRetryStrategy::RetryImpl + class LegacyRetryImpl : public StandardRetryStrategy::RetryImpl + { + public: + long CalculateDelay(const AWSError& error, long attemptedRetries) const override { - long CalculateDelay(const AWSError& error, long attemptedRetries) const override - { - AWS_UNREFERENCED_PARAM(error); - // Maximum left shift factor is capped by ceil(log2(max_delay)), to avoid wrap-around and overflow into negative values: - return std::min(static_cast(Aws::Utils::GetRandomValue() % 1000) * (1 << std::min(attemptedRetries, 15L)), 20000); - } - }; + AWS_UNREFERENCED_PARAM(error); + // Maximum left shift factor is capped by ceil(log2(max_delay)), to avoid wrap-around and overflow into negative values: + return std::min(static_cast(Aws::Utils::GetRandomValue() % 1000) * (1 << std::min(attemptedRetries, 15L)), 20000); + } + }; - struct NewRetriesImpl : StandardRetryStrategy::RetryImpl + class NewRetriesImpl : public StandardRetryStrategy::RetryImpl + { + public: + long CalculateDelay(const AWSError& error, long attemptedRetries) const override { - long CalculateDelay(const AWSError& error, long attemptedRetries) const override - { - double x = error.ShouldThrottle() ? 1.0 : 0.05; - double exponentialPart = x * static_cast(1L << (std::min)(attemptedRetries, 30L)); - double cappedPart = (std::min)(exponentialPart, 20.0); + double x = error.ShouldThrottle() ? 1.0 : 0.05; + double exponentialPart = x * static_cast(1L << (std::min)(attemptedRetries, 30L)); + double cappedPart = (std::min)(exponentialPart, 20.0); - double b = static_cast(Aws::Utils::GetRandomValue() % 10000) / 10000.0; - double t_i = b * cappedPart; + double b = static_cast(Aws::Utils::GetRandomValue() % 10000) / 10000.0; + double t_i = b * cappedPart; - const auto& headers = error.GetResponseHeaders(); - auto it = headers.find("x-amz-retry-after"); - if (it != headers.end()) + const auto& headers = error.GetResponseHeaders(); + auto it = headers.find("x-amz-retry-after"); + if (it != headers.end()) + { + long long headerMs = Aws::Utils::StringUtils::ConvertToInt64(it->second.c_str()); + if (headerMs < 0) { - long long headerMs = Aws::Utils::StringUtils::ConvertToInt64(it->second.c_str()); - if (headerMs < 0) - { - AWS_LOGSTREAM_DEBUG(RETRY_STRATEGY_TAG, "Ignoring invalid x-amz-retry-after value: " << it->second); - } - double headerSec = static_cast(headerMs) / 1000.0; - double clamped = (std::max)(t_i, (std::min)(headerSec, 5.0 + t_i)); - return static_cast(clamped * 1000.0); + AWS_LOGSTREAM_DEBUG(RETRY_STRATEGY_TAG, "Ignoring invalid x-amz-retry-after value: " << it->second); } - - return static_cast(t_i * 1000.0); + double headerSec = static_cast(headerMs) / 1000.0; + double clamped = (std::max)(t_i, (std::min)(headerSec, 5.0 + t_i)); + return static_cast(clamped * 1000.0); } - }; - } // anonymous namespace - static Aws::UniquePtr CreateRetryImpl() + return static_cast(t_i * 1000.0); + } + }; + + Aws::UniquePtr CreateRetryImpl() + { + if (IsNewRetriesEnabled()) { - if (IsNewRetriesEnabled()) - { - return Aws::MakeUnique("StandardRetryStrategy"); - } - return Aws::MakeUnique("StandardRetryStrategy"); + return Aws::MakeUnique("StandardRetryStrategy"); } + return Aws::MakeUnique("StandardRetryStrategy"); + } - static std::shared_ptr CreateQuotaContainer() + std::shared_ptr CreateQuotaContainer() + { + if (IsNewRetriesEnabled()) { - if (IsNewRetriesEnabled()) - { - return Aws::MakeShared("StandardRetryStrategy"); - } - return Aws::MakeShared("StandardRetryStrategy"); + return Aws::MakeShared("StandardRetryStrategy"); } + return Aws::MakeShared("StandardRetryStrategy"); + } +} // anonymous namespace +namespace Aws +{ + namespace Client + { StandardRetryStrategy::StandardRetryStrategy(long maxAttempts) : m_retryQuotaContainer(CreateQuotaContainer()), m_maxAttempts(maxAttempts), m_impl(CreateRetryImpl()) {} diff --git a/tests/aws-cpp-sdk-core-tests/aws/client/RetryBehaviorTest.cpp b/tests/aws-cpp-sdk-core-tests/aws/client/RetryBehaviorTest.cpp deleted file mode 100644 index 264be122c0d4..000000000000 --- a/tests/aws-cpp-sdk-core-tests/aws/client/RetryBehaviorTest.cpp +++ /dev/null @@ -1,479 +0,0 @@ -/** - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * SPDX-License-Identifier: Apache-2.0. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -using namespace Aws::Client; -using namespace Aws::Http; - -static const char ALLOCATION_TAG[] = "RetryBehaviorTest"; - -class TestThrottleBasedQuotaContainer : public RetryQuotaContainer -{ -public: - TestThrottleBasedQuotaContainer() : m_retryQuota(500) {} - - bool AcquireRetryQuota(int capacityAmount) override - { - if (capacityAmount > m_retryQuota) return false; - m_retryQuota -= capacityAmount; - return true; - } - - bool AcquireRetryQuota(const AWSError& error) override - { - int capacityAmount = error.ShouldThrottle() ? 5 : 14; - return AcquireRetryQuota(capacityAmount); - } - - void ReleaseRetryQuota(int capacityAmount) override - { - m_retryQuota = std::min(m_retryQuota + capacityAmount, 500); - } - - void ReleaseRetryQuota(const AWSError& error) override - { - int capacityAmount = error.ShouldThrottle() ? 5 : 14; - ReleaseRetryQuota(capacityAmount); - } - - int GetRetryQuota() const override { return m_retryQuota; } - -private: - int m_retryQuota; -}; - -class RetryBehaviorTest : public Aws::Testing::AwsCppSdkGTestSuite -{ - Aws::Environment::EnvironmentRAII m_env{{{"AWS_NEW_RETRIES_2026", "true"}}}; -}; - -static AWSError MakeTransientError() -{ - return AWSError(CoreErrors::NETWORK_CONNECTION, true); -} - -static AWSError MakeThrottlingError() -{ - return AWSError(CoreErrors::THROTTLING, RetryableType::RETRYABLE_THROTTLING); -} - -static AWSError MakeNonRetryableError() -{ - return AWSError(CoreErrors::INCOMPLETE_SIGNATURE, false); -} - -static AWSError MakeTransientErrorWithRetryAfter(const Aws::String& value) -{ - AWSError error(CoreErrors::NETWORK_CONNECTION, true); - HeaderValueCollection headers; - headers["x-amz-retry-after"] = value; - error.SetResponseHeaders(headers); - return error; -} - -// SEP Test 1: Retry eventually succeeds, quota restored -TEST_F(RetryBehaviorTest, RetryEventuallySucceeds) -{ - auto quota = Aws::MakeShared(ALLOCATION_TAG); - StandardRetryStrategy strategy(quota, 3); - - ASSERT_EQ(500, quota->GetRetryQuota()); - - // First failure: transient, costs 14 - ASSERT_TRUE(strategy.ShouldRetry(MakeTransientError(), 0)); - ASSERT_EQ(486, quota->GetRetryQuota()); - - // Second failure: transient, costs 14 - ASSERT_TRUE(strategy.ShouldRetry(MakeTransientError(), 1)); - ASSERT_EQ(472, quota->GetRetryQuota()); -} - -// SEP Test 2: Max attempts reached -TEST_F(RetryBehaviorTest, MaxAttemptsReached) -{ - auto quota = Aws::MakeShared(ALLOCATION_TAG); - StandardRetryStrategy strategy(quota, 3); - - ASSERT_TRUE(strategy.ShouldRetry(MakeTransientError(), 0)); - ASSERT_TRUE(strategy.ShouldRetry(MakeTransientError(), 1)); - // Third attempt (index 2) hits max attempts of 3 - ASSERT_FALSE(strategy.ShouldRetry(MakeTransientError(), 2)); -} - -// SEP Test 3: Quota reached after 1 retry -TEST_F(RetryBehaviorTest, QuotaReachedAfterRetry) -{ - auto quota = Aws::MakeShared(ALLOCATION_TAG); - StandardRetryStrategy strategy(quota, 10); - - // Drain quota to 10 - ASSERT_TRUE(quota->AcquireRetryQuota(490)); - ASSERT_EQ(10, quota->GetRetryQuota()); - - // Can't acquire 14 tokens - ASSERT_FALSE(strategy.ShouldRetry(MakeTransientError(), 0)); -} - -// SEP Test 4: Zero quota, no retries -TEST_F(RetryBehaviorTest, ZeroQuotaNoRetries) -{ - auto quota = Aws::MakeShared(ALLOCATION_TAG); - StandardRetryStrategy strategy(quota, 10); - - ASSERT_TRUE(quota->AcquireRetryQuota(500)); - ASSERT_EQ(0, quota->GetRetryQuota()); - - ASSERT_FALSE(strategy.ShouldRetry(MakeTransientError(), 0)); - ASSERT_FALSE(strategy.ShouldRetry(MakeThrottlingError(), 0)); -} - -// SEP Test 5: Exponential timing (transient, 50ms base) -TEST_F(RetryBehaviorTest, ExponentialBackoffTransient) -{ - auto quota = Aws::MakeShared(ALLOCATION_TAG); - StandardRetryStrategy strategy(quota, 10); - - auto error = MakeTransientError(); - // Backoff is randomized, but must be within [0, x * 2^i * 1000]ms - // i=0: [0, 50ms], i=1: [0, 100ms], i=2: [0, 200ms], i=3: [0, 400ms] - for (int i = 0; i < 4; ++i) - { - long delay = strategy.CalculateDelayBeforeNextRetry(error, i); - long maxDelay = static_cast(0.05 * (1L << i) * 1000.0); - ASSERT_GE(delay, 0) << "Retry " << i; - ASSERT_LE(delay, maxDelay) << "Retry " << i; - } -} - -// SEP Test 6: Max backoff cap at 20s -TEST_F(RetryBehaviorTest, MaxBackoffCap) -{ - auto quota = Aws::MakeShared(ALLOCATION_TAG); - StandardRetryStrategy strategy(quota, 100); - - auto error = MakeTransientError(); - // At i=30, 0.05 * 2^30 = 53687091.2s which exceeds 20s cap - long delay = strategy.CalculateDelayBeforeNextRetry(error, 30); - ASSERT_LE(delay, 20000); -} - -// SEP Test 7: Quota exhaustion mid-sequence -TEST_F(RetryBehaviorTest, QuotaExhaustionMidSequence) -{ - auto quota = Aws::MakeShared(ALLOCATION_TAG); - StandardRetryStrategy strategy(quota, 100); - - // Drain to 20 tokens - ASSERT_TRUE(quota->AcquireRetryQuota(480)); - ASSERT_EQ(20, quota->GetRetryQuota()); - - // First retry: costs 14, quota = 6 - ASSERT_TRUE(strategy.ShouldRetry(MakeTransientError(), 0)); - ASSERT_EQ(6, quota->GetRetryQuota()); - - // Second retry: can't afford 14 - ASSERT_FALSE(strategy.ShouldRetry(MakeTransientError(), 1)); - ASSERT_EQ(6, quota->GetRetryQuota()); -} - -// SEP Test 8: Quota recovery (stateful multi-request sequence) -TEST_F(RetryBehaviorTest, QuotaRecoveryStateful) -{ - auto quota = Aws::MakeShared(ALLOCATION_TAG); - StandardRetryStrategy strategy(quota, 10); - - std::shared_ptr httpRequest = CreateHttpRequest(URI("http://www.uri.com"), HttpMethod::HTTP_GET, Aws::Utils::Stream::DefaultResponseStreamFactoryMethod); - std::shared_ptr httpResponse = Aws::MakeShared(ALLOCATION_TAG, httpRequest); - HttpResponseOutcome httpResponseOutcome(httpResponse); - - // Request 1: retry once (transient), costs 14, quota = 486 - ASSERT_TRUE(strategy.ShouldRetry(MakeTransientError(), 0)); - ASSERT_EQ(486, quota->GetRetryQuota()); - - // Request 1 succeeds: release 14, quota = 500 - strategy.RequestBookkeeping(httpResponseOutcome, MakeTransientError()); - ASSERT_EQ(500, quota->GetRetryQuota()); - - // Request 2: retry once (transient), costs 14, quota = 486 - ASSERT_TRUE(strategy.ShouldRetry(MakeTransientError(), 0)); - ASSERT_EQ(486, quota->GetRetryQuota()); - - // Request 2: retry again (transient), costs 14, quota = 472 - ASSERT_TRUE(strategy.ShouldRetry(MakeTransientError(), 1)); - ASSERT_EQ(472, quota->GetRetryQuota()); - - // Request 2 succeeds: release 14, quota = 486 - strategy.RequestBookkeeping(httpResponseOutcome, MakeTransientError()); - ASSERT_EQ(486, quota->GetRetryQuota()); - - // Request 3: succeeds first try, release NO_RETRY_INCREMENT (1), quota = 487 - strategy.RequestBookkeeping(httpResponseOutcome); - ASSERT_EQ(487, quota->GetRetryQuota()); -} - -// SEP Test 9: Multi-threaded quota sharing (verify shared state) -TEST_F(RetryBehaviorTest, SharedQuotaAcrossRequests) -{ - auto quota = Aws::MakeShared(ALLOCATION_TAG); - StandardRetryStrategy strategy(quota, 10); - - // Simulate two concurrent requests both acquiring from same quota - // Request A: transient retry, costs 14, quota = 486 - ASSERT_TRUE(strategy.ShouldRetry(MakeTransientError(), 0)); - ASSERT_EQ(486, quota->GetRetryQuota()); - - // Request B: throttling retry, costs 5, quota = 481 - ASSERT_TRUE(strategy.ShouldRetry(MakeThrottlingError(), 0)); - ASSERT_EQ(481, quota->GetRetryQuota()); - - // Request A: another transient retry, costs 14, quota = 467 - ASSERT_TRUE(strategy.ShouldRetry(MakeTransientError(), 1)); - ASSERT_EQ(467, quota->GetRetryQuota()); - - // Total consumed: 14 + 5 + 14 = 33 - ASSERT_EQ(500 - 33, quota->GetRetryQuota()); -} - -// SEP Test 10: Throttling costs (5 tokens) and backoff (1000ms base) -TEST_F(RetryBehaviorTest, ThrottlingCostsAndBackoff) -{ - auto quota = Aws::MakeShared(ALLOCATION_TAG); - StandardRetryStrategy strategy(quota, 10); - - // Throttling error costs 5 tokens - ASSERT_TRUE(strategy.ShouldRetry(MakeThrottlingError(), 0)); - ASSERT_EQ(495, quota->GetRetryQuota()); - - // Backoff for throttling: [0, 1000ms] at i=0 - auto error = MakeThrottlingError(); - long delay = strategy.CalculateDelayBeforeNextRetry(error, 0); - ASSERT_GE(delay, 0); - ASSERT_LE(delay, 1000); - - // At i=1: [0, 2000ms] - delay = strategy.CalculateDelayBeforeNextRetry(error, 1); - ASSERT_GE(delay, 0); - ASSERT_LE(delay, 2000); -} - -// TODO: SEP Test 11: DynamoDB tuning (25ms base, 4 max attempts) -// Deferred to next PR — requires 3-arg constructor or service-specific config -// to pass transientBackoffBaseSec=0.025 for DynamoDB/DynamoDB Streams. - -// SEP Test 12: Long-polling transient + empty quota (backoff applied) -TEST_F(RetryBehaviorTest, LongPollingTransientEmptyQuota) -{ - auto quota = Aws::MakeShared(ALLOCATION_TAG); - StandardRetryStrategy strategy(quota, 10); - - // Drain quota - ASSERT_TRUE(quota->AcquireRetryQuota(500)); - ASSERT_EQ(0, quota->GetRetryQuota()); - - // ShouldRetry returns false (no quota) - ASSERT_FALSE(strategy.ShouldRetry(MakeTransientError(), 0)); - - // But backoff is still computable for long-polling use - auto error = MakeTransientError(); - long delay = strategy.CalculateDelayBeforeNextRetry(error, 0); - // i=0, transient, base=0.05: [0, 50ms] - ASSERT_GE(delay, 0); - ASSERT_LE(delay, 50); -} - -// SEP Test 13: Long-polling throttling + empty quota (backoff applied) -TEST_F(RetryBehaviorTest, LongPollingThrottlingEmptyQuota) -{ - auto quota = Aws::MakeShared(ALLOCATION_TAG); - StandardRetryStrategy strategy(quota, 10); - - // Drain quota - ASSERT_TRUE(quota->AcquireRetryQuota(500)); - ASSERT_EQ(0, quota->GetRetryQuota()); - - // ShouldRetry returns false (no quota) - ASSERT_FALSE(strategy.ShouldRetry(MakeThrottlingError(), 0)); - - // But backoff is still computable for long-polling use - auto error = MakeThrottlingError(); - long delay = strategy.CalculateDelayBeforeNextRetry(error, 0); - // i=0, throttling, base=1.0: [0, 1000ms] - ASSERT_GE(delay, 0); - ASSERT_LE(delay, 1000); -} - -// TODO: SEP Tests 14, 15, 16 require integration tests that exercise the full pipeline -// with a mock HTTP client to verify the pipeline does NOT sleep in these scenarios: -// - Test 14: Long-polling max attempts exceeded (no delay before returning) -// - Test 15: Long-polling success (no delay before returning) -// - Test 16: Long-polling non-retryable error (no delay before returning) - - -// SEP Test 17: retry-after header honored -TEST_F(RetryBehaviorTest, RetryAfterHeaderHonored) -{ - auto quota = Aws::MakeShared(ALLOCATION_TAG); - StandardRetryStrategy strategy(quota, 10); - - // Header value 1500ms, at i=0 t_i is in [0, 50ms] - // clamped to max(t_i, min(1.5, 5 + t_i)) = 1.5s = 1500ms - auto error = MakeTransientErrorWithRetryAfter("1500"); - long delay = strategy.CalculateDelayBeforeNextRetry(error, 0); - // Should be between 1500 and 5050 (5 + max t_i at i=0) - ASSERT_GE(delay, 50); // at minimum t_i (could be 0, then header wins) - ASSERT_LE(delay, 5050); -} - -// SEP Test 18: retry-after floor clamped (value 0 clamped up to t_i) -TEST_F(RetryBehaviorTest, RetryAfterFloorClamped) -{ - auto quota = Aws::MakeShared(ALLOCATION_TAG); - StandardRetryStrategy strategy(quota, 10); - - auto error = MakeTransientErrorWithRetryAfter("0"); - // Header is 0ms, gets clamped up to t_i - // Result should be same as normal backoff (t_i) - long delay = strategy.CalculateDelayBeforeNextRetry(error, 0); - ASSERT_GE(delay, 0); - ASSERT_LE(delay, 50); // max t_i at i=0 with base 0.05 -} - -// SEP Test 19: retry-after ceiling clamped (value 10000ms clamped to 5+t_i) -TEST_F(RetryBehaviorTest, RetryAfterCeilingClamped) -{ - auto quota = Aws::MakeShared(ALLOCATION_TAG); - StandardRetryStrategy strategy(quota, 10); - - auto error = MakeTransientErrorWithRetryAfter("10000"); - // Header is 10000ms = 10s, exceeds 5 + t_i (max ~5.05s at i=0) - long delay = strategy.CalculateDelayBeforeNextRetry(error, 0); - // Clamped to 5 + t_i, which is at most 5050ms - ASSERT_LE(delay, 5050); - // But at least t_i (could be 0) - ASSERT_GE(delay, 0); -} - -// SEP Test 20: Invalid retry-after falls back to exponential backoff -TEST_F(RetryBehaviorTest, InvalidRetryAfterFallsBack) -{ - auto quota = Aws::MakeShared(ALLOCATION_TAG); - StandardRetryStrategy strategy(quota, 10); - - // "abc" parses to 0 via atoll, which gets clamped to t_i - auto error = MakeTransientErrorWithRetryAfter("abc"); - long delay = strategy.CalculateDelayBeforeNextRetry(error, 0); - ASSERT_GE(delay, 0); - ASSERT_LE(delay, 50); -} - -class RetryBehaviorLegacyTest : public Aws::Testing::AwsCppSdkGTestSuite -{ - Aws::Environment::EnvironmentRAII m_env{{{"AWS_NEW_RETRIES_2026", ""}}}; -}; - -// Verify legacy behavior unchanged when gate is off (old constructors) -TEST_F(RetryBehaviorLegacyTest, LegacyBehaviorUnchanged) -{ - StandardRetryStrategy strategy(3); - - auto error = MakeTransientError(); - // Legacy: rand(0,999) * 2^i, cap 20000 - long delay = strategy.CalculateDelayBeforeNextRetry(error, 0); - ASSERT_GE(delay, 0); - ASSERT_LE(delay, 999); // 2^0 = 1, so max is 999 - - delay = strategy.CalculateDelayBeforeNextRetry(error, 1); - ASSERT_GE(delay, 0); - ASSERT_LE(delay, 1998); // 2^1 = 2, so max is 1998 - - delay = strategy.CalculateDelayBeforeNextRetry(error, 15); - ASSERT_GE(delay, 0); - ASSERT_LE(delay, 20000); // capped -} - -// Verify throttle-based classification: throttling costs 5, transient costs 14 -TEST_F(RetryBehaviorTest, ThrottleBasedClassification) -{ - auto quota = Aws::MakeShared(ALLOCATION_TAG); - StandardRetryStrategy strategy(quota, 10); - - // Transient costs 14 - ASSERT_TRUE(strategy.ShouldRetry(MakeTransientError(), 0)); - ASSERT_EQ(486, quota->GetRetryQuota()); - - // Throttling costs 5 - ASSERT_TRUE(strategy.ShouldRetry(MakeThrottlingError(), 0)); - ASSERT_EQ(481, quota->GetRetryQuota()); -} - -// Verify legacy classification: REQUEST_TIMEOUT costs 10, others cost 5 -TEST_F(RetryBehaviorLegacyTest, LegacyClassification) -{ - DefaultRetryQuotaContainer quota; - - AWSError timeoutError(CoreErrors::REQUEST_TIMEOUT, true); - AWSError otherError(CoreErrors::NETWORK_CONNECTION, true); - - // Other error costs 5 - ASSERT_TRUE(quota.AcquireRetryQuota(otherError)); - ASSERT_EQ(495, quota.GetRetryQuota()); - - // Timeout costs 10 - ASSERT_TRUE(quota.AcquireRetryQuota(timeoutError)); - ASSERT_EQ(485, quota.GetRetryQuota()); -} - -// Non-retryable errors are not retried regardless of gate -TEST_F(RetryBehaviorTest, NonRetryableNotRetried) -{ - auto quota = Aws::MakeShared(ALLOCATION_TAG); - StandardRetryStrategy strategy(quota, 10); - - ASSERT_FALSE(strategy.ShouldRetry(MakeNonRetryableError(), 0)); - ASSERT_EQ(500, quota->GetRetryQuota()); -} - -// Verify RequestBookkeeping releases correct tokens on success -TEST_F(RetryBehaviorTest, RequestBookkeepingReleasesTokens) -{ - auto quota = Aws::MakeShared(ALLOCATION_TAG); - StandardRetryStrategy strategy(quota, 10); - - std::shared_ptr httpRequest = CreateHttpRequest(URI("http://www.uri.com"), HttpMethod::HTTP_GET, Aws::Utils::Stream::DefaultResponseStreamFactoryMethod); - std::shared_ptr httpResponse = Aws::MakeShared(ALLOCATION_TAG, httpRequest); - HttpResponseOutcome httpResponseOutcome(httpResponse); - - // Acquire 14 (transient), quota = 486 - ASSERT_TRUE(strategy.ShouldRetry(MakeTransientError(), 0)); - ASSERT_EQ(486, quota->GetRetryQuota()); - - // Release 14 on success with last error context - strategy.RequestBookkeeping(httpResponseOutcome, MakeTransientError()); - ASSERT_EQ(500, quota->GetRetryQuota()); - - // Acquire 5 (throttling), quota = 495 - ASSERT_TRUE(strategy.ShouldRetry(MakeThrottlingError(), 0)); - ASSERT_EQ(495, quota->GetRetryQuota()); - - // Release 5 on success with last error context - strategy.RequestBookkeeping(httpResponseOutcome, MakeThrottlingError()); - ASSERT_EQ(500, quota->GetRetryQuota()); - - // Release NO_RETRY_INCREMENT (1) on success without prior retry - quota->AcquireRetryQuota(10); - ASSERT_EQ(490, quota->GetRetryQuota()); - strategy.RequestBookkeeping(httpResponseOutcome); - ASSERT_EQ(491, quota->GetRetryQuota()); -} diff --git a/tests/aws-cpp-sdk-core-tests/aws/client/RetryStrategyTest.cpp b/tests/aws-cpp-sdk-core-tests/aws/client/RetryStrategyTest.cpp index ffb977f395a4..dbcb058c0ed3 100644 --- a/tests/aws-cpp-sdk-core-tests/aws/client/RetryStrategyTest.cpp +++ b/tests/aws-cpp-sdk-core-tests/aws/client/RetryStrategyTest.cpp @@ -5,6 +5,7 @@ #include +#include #include #include #include @@ -126,3 +127,161 @@ TEST_F(RetryStrategyTest, TestStandardRetryStrategy) retryStrategy.RequestBookkeeping(httpResponse, requestTimeoutError); ASSERT_EQ(500, retryStrategy.GetRetryQuotaContainer()->GetRetryQuota()); } + +class NewRetriesStrategyTest : public Aws::Testing::AwsCppSdkGTestSuite +{ + Aws::Environment::EnvironmentRAII m_env{{{"AWS_NEW_RETRIES_2026", "true"}}}; +}; + +// SEP Test Case 1, 9, 10 +TEST_F(NewRetriesStrategyTest, ThrottleBasedCostClassification) +{ + MockStandardRetryStrategy retryStrategy; + AWSError transientError(CoreErrors::NETWORK_CONNECTION, true); + AWSError throttlingError(CoreErrors::THROTTLING, RetryableType::RETRYABLE_THROTTLING); + + ASSERT_EQ(500, retryStrategy.GetRetryQuotaContainer()->GetRetryQuota()); + + // Transient error costs 14 tokens + ASSERT_TRUE(retryStrategy.ShouldRetry(transientError, 0)); + ASSERT_EQ(486, retryStrategy.GetRetryQuotaContainer()->GetRetryQuota()); + + // Throttling error costs 5 tokens + ASSERT_TRUE(retryStrategy.ShouldRetry(throttlingError, 0)); + ASSERT_EQ(481, retryStrategy.GetRetryQuotaContainer()->GetRetryQuota()); +} + +// SEP Test Case 3 +TEST_F(NewRetriesStrategyTest, QuotaExhaustionWithNewCosts) +{ + MockStandardRetryStrategy retryStrategy; + AWSError transientError(CoreErrors::NETWORK_CONNECTION, true); + + // Drain to 10 tokens + ASSERT_TRUE(retryStrategy.GetRetryQuotaContainer()->AcquireRetryQuota(490)); + ASSERT_EQ(10, retryStrategy.GetRetryQuotaContainer()->GetRetryQuota()); + + // Can't acquire 14 tokens for transient error + ASSERT_FALSE(retryStrategy.ShouldRetry(transientError, 0)); +} + +// SEP Test Case 8 +TEST_F(NewRetriesStrategyTest, QuotaRecoveryOnSuccess) +{ + MockStandardRetryStrategy retryStrategy; + AWSError transientError(CoreErrors::NETWORK_CONNECTION, true); + + std::shared_ptr httpRequest = CreateHttpRequest(URI("http://www.uri.com"), HttpMethod::HTTP_GET, Aws::Utils::Stream::DefaultResponseStreamFactoryMethod); + std::shared_ptr httpResponse = Aws::MakeShared(ALLOCATION_TAG, httpRequest); + HttpResponseOutcome httpResponseOutcome(httpResponse); + + // Retry costs 14, quota = 486 + ASSERT_TRUE(retryStrategy.ShouldRetry(transientError, 0)); + ASSERT_EQ(486, retryStrategy.GetRetryQuotaContainer()->GetRetryQuota()); + + // Success releases 14, quota = 500 + retryStrategy.RequestBookkeeping(httpResponseOutcome, transientError); + ASSERT_EQ(500, retryStrategy.GetRetryQuotaContainer()->GetRetryQuota()); +} + +// SEP Test Case 5 +TEST_F(NewRetriesStrategyTest, ExponentialBackoffTransient) +{ + MockStandardRetryStrategy retryStrategy; + AWSError transientError(CoreErrors::NETWORK_CONNECTION, true); + + // Backoff with 50ms base: [0, 50ms] at i=0, [0, 100ms] at i=1, etc. + for (int i = 0; i < 4; ++i) + { + long delay = retryStrategy.CalculateDelayBeforeNextRetry(transientError, i); + long maxDelay = static_cast(0.05 * (1L << i) * 1000.0); + ASSERT_GE(delay, 0) << "Retry " << i; + ASSERT_LE(delay, maxDelay) << "Retry " << i; + } +} + +// SEP Test Case 10 +TEST_F(NewRetriesStrategyTest, ExponentialBackoffThrottling) +{ + MockStandardRetryStrategy retryStrategy; + AWSError throttlingError(CoreErrors::THROTTLING, RetryableType::RETRYABLE_THROTTLING); + + // Backoff with 1000ms base: [0, 1000ms] at i=0, [0, 2000ms] at i=1 + long delay = retryStrategy.CalculateDelayBeforeNextRetry(throttlingError, 0); + ASSERT_GE(delay, 0); + ASSERT_LE(delay, 1000); + + delay = retryStrategy.CalculateDelayBeforeNextRetry(throttlingError, 1); + ASSERT_GE(delay, 0); + ASSERT_LE(delay, 2000); +} + +// SEP Test Case 6 +TEST_F(NewRetriesStrategyTest, MaxBackoffCap) +{ + MockStandardRetryStrategy retryStrategy; + AWSError transientError(CoreErrors::NETWORK_CONNECTION, true); + + // At high retry index, cap at 20s + long delay = retryStrategy.CalculateDelayBeforeNextRetry(transientError, 30); + ASSERT_LE(delay, 20000); +} + +// SEP Test Case 17 +TEST_F(NewRetriesStrategyTest, RetryAfterHeaderHonored) +{ + MockStandardRetryStrategy retryStrategy; + AWSError error(CoreErrors::NETWORK_CONNECTION, true); + HeaderValueCollection headers; + headers["x-amz-retry-after"] = "1500"; + error.SetResponseHeaders(headers); + + long delay = retryStrategy.CalculateDelayBeforeNextRetry(error, 0); + ASSERT_GE(delay, 0); + ASSERT_LE(delay, 5050); +} + +// SEP Test Case 18 +TEST_F(NewRetriesStrategyTest, RetryAfterFloorClamped) +{ + MockStandardRetryStrategy retryStrategy; + AWSError error(CoreErrors::NETWORK_CONNECTION, true); + HeaderValueCollection headers; + headers["x-amz-retry-after"] = "0"; + error.SetResponseHeaders(headers); + + long delay = retryStrategy.CalculateDelayBeforeNextRetry(error, 0); + ASSERT_GE(delay, 0); + ASSERT_LE(delay, 50); +} + +// SEP Test Case 19 +TEST_F(NewRetriesStrategyTest, RetryAfterCeilingClamped) +{ + MockStandardRetryStrategy retryStrategy; + AWSError error(CoreErrors::NETWORK_CONNECTION, true); + HeaderValueCollection headers; + headers["x-amz-retry-after"] = "10000"; + error.SetResponseHeaders(headers); + + long delay = retryStrategy.CalculateDelayBeforeNextRetry(error, 0); + ASSERT_GE(delay, 0); + ASSERT_LE(delay, 5050); +} + +// SEP Test Case 20 +TEST_F(NewRetriesStrategyTest, InvalidRetryAfterFallsBack) +{ + MockStandardRetryStrategy retryStrategy; + AWSError error(CoreErrors::NETWORK_CONNECTION, true); + HeaderValueCollection headers; + headers["x-amz-retry-after"] = "abc"; + error.SetResponseHeaders(headers); + + long delay = retryStrategy.CalculateDelayBeforeNextRetry(error, 0); + ASSERT_GE(delay, 0); + ASSERT_LE(delay, 50); +} + +// TODO: SEP Test 11 (DynamoDB 25ms base) deferred to next PR. +// TODO: SEP Tests 14-16 (long-polling pipeline behavior) require integration tests. From 10540c7a7348423826555efa256557595822c7cc Mon Sep 17 00:00:00 2001 From: kai lin Date: Thu, 4 Jun 2026 14:38:09 -0400 Subject: [PATCH 11/11] updated testing and namespace --- .../source/client/RetryStrategy.cpp | 7 +- .../aws/client/RetryStrategyTest.cpp | 72 +++++++++++-------- 2 files changed, 45 insertions(+), 34 deletions(-) diff --git a/src/aws-cpp-sdk-core/source/client/RetryStrategy.cpp b/src/aws-cpp-sdk-core/source/client/RetryStrategy.cpp index da23fab25be1..eeef6b2826d9 100644 --- a/src/aws-cpp-sdk-core/source/client/RetryStrategy.cpp +++ b/src/aws-cpp-sdk-core/source/client/RetryStrategy.cpp @@ -17,9 +17,6 @@ using namespace Aws::Utils::Threading; using namespace Aws::Client; static const char RETRY_STRATEGY_TAG[] = "StandardRetryStrategy"; -static const int INITIAL_RETRY_TOKENS = 500; -static const int RETRY_COST = 5; -static const int TIMEOUT_RETRY_COST = 10; namespace Aws { @@ -104,6 +101,10 @@ namespace Aws { namespace Client { + static const int INITIAL_RETRY_TOKENS = 500; + static const int RETRY_COST = 5; + static const int TIMEOUT_RETRY_COST = 10; + StandardRetryStrategy::StandardRetryStrategy(long maxAttempts) : m_retryQuotaContainer(CreateQuotaContainer()), m_maxAttempts(maxAttempts), m_impl(CreateRetryImpl()) {} diff --git a/tests/aws-cpp-sdk-core-tests/aws/client/RetryStrategyTest.cpp b/tests/aws-cpp-sdk-core-tests/aws/client/RetryStrategyTest.cpp index dbcb058c0ed3..741e1496b7bb 100644 --- a/tests/aws-cpp-sdk-core-tests/aws/client/RetryStrategyTest.cpp +++ b/tests/aws-cpp-sdk-core-tests/aws/client/RetryStrategyTest.cpp @@ -133,22 +133,19 @@ class NewRetriesStrategyTest : public Aws::Testing::AwsCppSdkGTestSuite Aws::Environment::EnvironmentRAII m_env{{{"AWS_NEW_RETRIES_2026", "true"}}}; }; -// SEP Test Case 1, 9, 10 -TEST_F(NewRetriesStrategyTest, ThrottleBasedCostClassification) +// SEP Test Case 1 +TEST_F(NewRetriesStrategyTest, TransientRetryCost) { MockStandardRetryStrategy retryStrategy; AWSError transientError(CoreErrors::NETWORK_CONNECTION, true); - AWSError throttlingError(CoreErrors::THROTTLING, RetryableType::RETRYABLE_THROTTLING); ASSERT_EQ(500, retryStrategy.GetRetryQuotaContainer()->GetRetryQuota()); - // Transient error costs 14 tokens ASSERT_TRUE(retryStrategy.ShouldRetry(transientError, 0)); ASSERT_EQ(486, retryStrategy.GetRetryQuotaContainer()->GetRetryQuota()); - // Throttling error costs 5 tokens - ASSERT_TRUE(retryStrategy.ShouldRetry(throttlingError, 0)); - ASSERT_EQ(481, retryStrategy.GetRetryQuotaContainer()->GetRetryQuota()); + ASSERT_TRUE(retryStrategy.ShouldRetry(transientError, 1)); + ASSERT_EQ(472, retryStrategy.GetRetryQuotaContainer()->GetRetryQuota()); } // SEP Test Case 3 @@ -165,6 +162,33 @@ TEST_F(NewRetriesStrategyTest, QuotaExhaustionWithNewCosts) ASSERT_FALSE(retryStrategy.ShouldRetry(transientError, 0)); } +// SEP Test Case 5 +TEST_F(NewRetriesStrategyTest, ExponentialBackoffTransient) +{ + MockStandardRetryStrategy retryStrategy; + AWSError transientError(CoreErrors::NETWORK_CONNECTION, true); + + // Backoff with 50ms base: [0, 50ms] at i=0, [0, 100ms] at i=1, etc. + for (int i = 0; i < 4; ++i) + { + long delay = retryStrategy.CalculateDelayBeforeNextRetry(transientError, i); + long maxDelay = static_cast(0.05 * (1L << i) * 1000.0); + ASSERT_GE(delay, 0) << "Retry " << i; + ASSERT_LE(delay, maxDelay) << "Retry " << i; + } +} + +// SEP Test Case 6 +TEST_F(NewRetriesStrategyTest, MaxBackoffCap) +{ + MockStandardRetryStrategy retryStrategy; + AWSError transientError(CoreErrors::NETWORK_CONNECTION, true); + + // At high retry index, cap at 20s + long delay = retryStrategy.CalculateDelayBeforeNextRetry(transientError, 30); + ASSERT_LE(delay, 20000); +} + // SEP Test Case 8 TEST_F(NewRetriesStrategyTest, QuotaRecoveryOnSuccess) { @@ -184,20 +208,16 @@ TEST_F(NewRetriesStrategyTest, QuotaRecoveryOnSuccess) ASSERT_EQ(500, retryStrategy.GetRetryQuotaContainer()->GetRetryQuota()); } -// SEP Test Case 5 -TEST_F(NewRetriesStrategyTest, ExponentialBackoffTransient) +// SEP Test Case 9 +TEST_F(NewRetriesStrategyTest, ThrottlingRetryCost) { MockStandardRetryStrategy retryStrategy; - AWSError transientError(CoreErrors::NETWORK_CONNECTION, true); + AWSError throttlingError(CoreErrors::THROTTLING, RetryableType::RETRYABLE_THROTTLING); - // Backoff with 50ms base: [0, 50ms] at i=0, [0, 100ms] at i=1, etc. - for (int i = 0; i < 4; ++i) - { - long delay = retryStrategy.CalculateDelayBeforeNextRetry(transientError, i); - long maxDelay = static_cast(0.05 * (1L << i) * 1000.0); - ASSERT_GE(delay, 0) << "Retry " << i; - ASSERT_LE(delay, maxDelay) << "Retry " << i; - } + ASSERT_EQ(500, retryStrategy.GetRetryQuotaContainer()->GetRetryQuota()); + + ASSERT_TRUE(retryStrategy.ShouldRetry(throttlingError, 0)); + ASSERT_EQ(495, retryStrategy.GetRetryQuotaContainer()->GetRetryQuota()); } // SEP Test Case 10 @@ -216,17 +236,6 @@ TEST_F(NewRetriesStrategyTest, ExponentialBackoffThrottling) ASSERT_LE(delay, 2000); } -// SEP Test Case 6 -TEST_F(NewRetriesStrategyTest, MaxBackoffCap) -{ - MockStandardRetryStrategy retryStrategy; - AWSError transientError(CoreErrors::NETWORK_CONNECTION, true); - - // At high retry index, cap at 20s - long delay = retryStrategy.CalculateDelayBeforeNextRetry(transientError, 30); - ASSERT_LE(delay, 20000); -} - // SEP Test Case 17 TEST_F(NewRetriesStrategyTest, RetryAfterHeaderHonored) { @@ -283,5 +292,6 @@ TEST_F(NewRetriesStrategyTest, InvalidRetryAfterFallsBack) ASSERT_LE(delay, 50); } -// TODO: SEP Test 11 (DynamoDB 25ms base) deferred to next PR. -// TODO: SEP Tests 14-16 (long-polling pipeline behavior) require integration tests. +// SEP Test Cases 2, 4, 7 are covered by TestStandardRetryStrategy above (same behavior with/without gate). +// TODO: SEP Test Case 11 (DynamoDB 25ms base) deferred to next PR. +// TODO: SEP Test Cases 12-16 (long-polling) require pipeline integration tests.