diff --git a/src/aws-cpp-sdk-core/include/aws/core/client/RetryStrategy.h b/src/aws-cpp-sdk-core/include/aws/core/client/RetryStrategy.h index 8f9260c69091..32f8cb24f2cc 100644 --- a/src/aws-cpp-sdk-core/include/aws/core/client/RetryStrategy.h +++ b/src/aws-cpp-sdk-core/include/aws/core/client/RetryStrategy.h @@ -6,6 +6,7 @@ #pragma once #include +#include #include #include @@ -123,6 +124,7 @@ namespace Aws public: StandardRetryStrategy(long maxAttempts = 3); StandardRetryStrategy(std::shared_ptr retryQuotaContainer, long maxAttempts = 3); + virtual ~StandardRetryStrategy(); virtual void RequestBookkeeping(const HttpResponseOutcome& httpResponseOutcome) override; virtual void RequestBookkeeping(const HttpResponseOutcome& httpResponseOutcome, const AWSError& lastError) override; @@ -135,9 +137,14 @@ namespace Aws const char* GetStrategyName() const override { return "standard";} + struct RetryImpl; + protected: std::shared_ptr m_retryQuotaContainer; long m_maxAttempts; + + private: + Aws::UniquePtr m_impl; }; } // namespace Client } // namespace Aws diff --git a/src/aws-cpp-sdk-core/include/aws/core/internal/RetryStrategyImpl.h b/src/aws-cpp-sdk-core/include/aws/core/internal/RetryStrategyImpl.h new file mode 100644 index 000000000000..c19278211d8a --- /dev/null +++ b/src/aws-cpp-sdk-core/include/aws/core/internal/RetryStrategyImpl.h @@ -0,0 +1,71 @@ +/** + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0. + */ + +#pragma once + +#include +#include +#include +#include +#include + +namespace Aws +{ + namespace Client + { + static const int THROTTLE_BASED_RETRY_COST = 14; + static const int THROTTLE_BASED_THROTTLING_COST = 5; + static const int THROTTLE_BASED_INITIAL_TOKENS = 500; + + class AWS_CORE_LOCAL ThrottleBasedRetryQuotaContainer : public RetryQuotaContainer + { + public: + ThrottleBasedRetryQuotaContainer(int retryCost = THROTTLE_BASED_RETRY_COST, int throttlingRetryCost = THROTTLE_BASED_THROTTLING_COST) + : m_retryQuota(THROTTLE_BASED_INITIAL_TOKENS), m_retryCost(retryCost), m_throttlingRetryCost(throttlingRetryCost) {} + + virtual ~ThrottleBasedRetryQuotaContainer() = default; + + bool AcquireRetryQuota(int capacityAmount) override + { + Aws::Utils::Threading::WriterLockGuard guard(m_retryQuotaLock); + if (capacityAmount > m_retryQuota) + { + return false; + } + else + { + m_retryQuota -= capacityAmount; + return true; + } + } + + bool AcquireRetryQuota(const AWSError& error) override + { + int capacityAmount = error.ShouldThrottle() ? m_throttlingRetryCost : m_retryCost; + return AcquireRetryQuota(capacityAmount); + } + + void ReleaseRetryQuota(int capacityAmount) override + { + Aws::Utils::Threading::WriterLockGuard guard(m_retryQuotaLock); + m_retryQuota = (std::min)(m_retryQuota + capacityAmount, THROTTLE_BASED_INITIAL_TOKENS); + } + + void ReleaseRetryQuota(const AWSError& error) override + { + int capacityAmount = error.ShouldThrottle() ? m_throttlingRetryCost : m_retryCost; + ReleaseRetryQuota(capacityAmount); + } + + int GetRetryQuota() const override { return m_retryQuota; } + + private: + mutable Aws::Utils::Threading::ReaderWriterLock m_retryQuotaLock; + int m_retryQuota; + int m_retryCost; + int m_throttlingRetryCost; + }; + } // namespace Client +} // namespace Aws diff --git a/src/aws-cpp-sdk-core/source/client/ClientConfiguration.cpp b/src/aws-cpp-sdk-core/source/client/ClientConfiguration.cpp index cc805c69eb99..de8ac00c9e78 100644 --- a/src/aws-cpp-sdk-core/source/client/ClientConfiguration.cpp +++ b/src/aws-cpp-sdk-core/source/client/ClientConfiguration.cpp @@ -552,6 +552,10 @@ std::shared_ptr InitRetryStrategy(int maxAttempts, Aws::String re { retryMode = Aws::Config::GetCachedConfigValue("retry_mode"); } + if (Aws::Utils::StringUtils::ToLower(Aws::Environment::GetEnv("AWS_NEW_RETRIES_2026").c_str()) == "true" && retryMode.empty()) + { + retryMode = "standard"; + } std::shared_ptr retryStrategy; if (retryMode == "standard") diff --git a/src/aws-cpp-sdk-core/source/client/RetryStrategy.cpp b/src/aws-cpp-sdk-core/source/client/RetryStrategy.cpp index 2c3b43fc3936..eeef6b2826d9 100644 --- a/src/aws-cpp-sdk-core/source/client/RetryStrategy.cpp +++ b/src/aws-cpp-sdk-core/source/client/RetryStrategy.cpp @@ -6,10 +6,96 @@ #include #include #include +#include +#include #include +#include #include +#include using namespace Aws::Utils::Threading; +using namespace Aws::Client; + +static const char RETRY_STRATEGY_TAG[] = "StandardRetryStrategy"; + +namespace Aws +{ + namespace Client + { + class StandardRetryStrategy::RetryImpl + { + public: + virtual ~RetryImpl() = default; + virtual long CalculateDelay(const AWSError& error, long attemptedRetries) const = 0; + }; + } +} + +namespace { + bool IsNewRetriesEnabled() + { + return Aws::Utils::StringUtils::ToLower(Aws::Environment::GetEnv("AWS_NEW_RETRIES_2026").c_str()) == "true"; + } + + class LegacyRetryImpl : public StandardRetryStrategy::RetryImpl + { + public: + long CalculateDelay(const AWSError& error, long attemptedRetries) const override + { + AWS_UNREFERENCED_PARAM(error); + // Maximum left shift factor is capped by ceil(log2(max_delay)), to avoid wrap-around and overflow into negative values: + return std::min(static_cast(Aws::Utils::GetRandomValue() % 1000) * (1 << std::min(attemptedRetries, 15L)), 20000); + } + }; + + class NewRetriesImpl : public StandardRetryStrategy::RetryImpl + { + public: + long CalculateDelay(const AWSError& error, long attemptedRetries) const override + { + double x = error.ShouldThrottle() ? 1.0 : 0.05; + double exponentialPart = x * static_cast(1L << (std::min)(attemptedRetries, 30L)); + double cappedPart = (std::min)(exponentialPart, 20.0); + + double b = static_cast(Aws::Utils::GetRandomValue() % 10000) / 10000.0; + double t_i = b * cappedPart; + + const auto& headers = error.GetResponseHeaders(); + auto it = headers.find("x-amz-retry-after"); + if (it != headers.end()) + { + long long headerMs = Aws::Utils::StringUtils::ConvertToInt64(it->second.c_str()); + if (headerMs < 0) + { + AWS_LOGSTREAM_DEBUG(RETRY_STRATEGY_TAG, "Ignoring invalid x-amz-retry-after value: " << it->second); + } + double headerSec = static_cast(headerMs) / 1000.0; + double clamped = (std::max)(t_i, (std::min)(headerSec, 5.0 + t_i)); + return static_cast(clamped * 1000.0); + } + + return static_cast(t_i * 1000.0); + } + }; + + Aws::UniquePtr CreateRetryImpl() + { + if (IsNewRetriesEnabled()) + { + return Aws::MakeUnique("StandardRetryStrategy"); + } + return Aws::MakeUnique("StandardRetryStrategy"); + } + + std::shared_ptr CreateQuotaContainer() + { + if (IsNewRetriesEnabled()) + { + return Aws::MakeShared("StandardRetryStrategy"); + } + return Aws::MakeShared("StandardRetryStrategy"); + } +} // anonymous namespace namespace Aws { @@ -20,10 +106,14 @@ namespace Aws static const int TIMEOUT_RETRY_COST = 10; StandardRetryStrategy::StandardRetryStrategy(long maxAttempts) - : m_retryQuotaContainer(Aws::MakeShared("StandardRetryStrategy")), m_maxAttempts(maxAttempts) {} + : m_retryQuotaContainer(CreateQuotaContainer()), m_maxAttempts(maxAttempts), + m_impl(CreateRetryImpl()) {} StandardRetryStrategy::StandardRetryStrategy(std::shared_ptr retryQuotaContainer, long maxAttempts) - : m_retryQuotaContainer(retryQuotaContainer), m_maxAttempts(maxAttempts) {} + : m_retryQuotaContainer(retryQuotaContainer), m_maxAttempts(maxAttempts), + m_impl(CreateRetryImpl()) {} + + StandardRetryStrategy::~StandardRetryStrategy() = default; void StandardRetryStrategy::RequestBookkeeping(const HttpResponseOutcome& httpResponseOutcome) { @@ -54,9 +144,7 @@ namespace Aws long StandardRetryStrategy::CalculateDelayBeforeNextRetry(const AWSError& error, long attemptedRetries) const { - AWS_UNREFERENCED_PARAM(error); - // Maximum left shift factor is capped by ceil(log2(max_delay)), to avoid wrap-around and overflow into negative values: - return std::min(static_cast(Aws::Utils::GetRandomValue() % 1000) * (1 << std::min(attemptedRetries, 15L)), 20000); + return m_impl->CalculateDelay(error, attemptedRetries); } DefaultRetryQuotaContainer::DefaultRetryQuotaContainer() : m_retryQuota(INITIAL_RETRY_TOKENS) diff --git a/tests/aws-cpp-sdk-core-tests/aws/client/RetryStrategyTest.cpp b/tests/aws-cpp-sdk-core-tests/aws/client/RetryStrategyTest.cpp index ffb977f395a4..741e1496b7bb 100644 --- a/tests/aws-cpp-sdk-core-tests/aws/client/RetryStrategyTest.cpp +++ b/tests/aws-cpp-sdk-core-tests/aws/client/RetryStrategyTest.cpp @@ -5,6 +5,7 @@ #include +#include #include #include #include @@ -126,3 +127,171 @@ TEST_F(RetryStrategyTest, TestStandardRetryStrategy) retryStrategy.RequestBookkeeping(httpResponse, requestTimeoutError); ASSERT_EQ(500, retryStrategy.GetRetryQuotaContainer()->GetRetryQuota()); } + +class NewRetriesStrategyTest : public Aws::Testing::AwsCppSdkGTestSuite +{ + Aws::Environment::EnvironmentRAII m_env{{{"AWS_NEW_RETRIES_2026", "true"}}}; +}; + +// SEP Test Case 1 +TEST_F(NewRetriesStrategyTest, TransientRetryCost) +{ + MockStandardRetryStrategy retryStrategy; + AWSError transientError(CoreErrors::NETWORK_CONNECTION, true); + + ASSERT_EQ(500, retryStrategy.GetRetryQuotaContainer()->GetRetryQuota()); + + ASSERT_TRUE(retryStrategy.ShouldRetry(transientError, 0)); + ASSERT_EQ(486, retryStrategy.GetRetryQuotaContainer()->GetRetryQuota()); + + ASSERT_TRUE(retryStrategy.ShouldRetry(transientError, 1)); + ASSERT_EQ(472, retryStrategy.GetRetryQuotaContainer()->GetRetryQuota()); +} + +// SEP Test Case 3 +TEST_F(NewRetriesStrategyTest, QuotaExhaustionWithNewCosts) +{ + MockStandardRetryStrategy retryStrategy; + AWSError transientError(CoreErrors::NETWORK_CONNECTION, true); + + // Drain to 10 tokens + ASSERT_TRUE(retryStrategy.GetRetryQuotaContainer()->AcquireRetryQuota(490)); + ASSERT_EQ(10, retryStrategy.GetRetryQuotaContainer()->GetRetryQuota()); + + // Can't acquire 14 tokens for transient error + ASSERT_FALSE(retryStrategy.ShouldRetry(transientError, 0)); +} + +// SEP Test Case 5 +TEST_F(NewRetriesStrategyTest, ExponentialBackoffTransient) +{ + MockStandardRetryStrategy retryStrategy; + AWSError transientError(CoreErrors::NETWORK_CONNECTION, true); + + // Backoff with 50ms base: [0, 50ms] at i=0, [0, 100ms] at i=1, etc. + for (int i = 0; i < 4; ++i) + { + long delay = retryStrategy.CalculateDelayBeforeNextRetry(transientError, i); + long maxDelay = static_cast(0.05 * (1L << i) * 1000.0); + ASSERT_GE(delay, 0) << "Retry " << i; + ASSERT_LE(delay, maxDelay) << "Retry " << i; + } +} + +// SEP Test Case 6 +TEST_F(NewRetriesStrategyTest, MaxBackoffCap) +{ + MockStandardRetryStrategy retryStrategy; + AWSError transientError(CoreErrors::NETWORK_CONNECTION, true); + + // At high retry index, cap at 20s + long delay = retryStrategy.CalculateDelayBeforeNextRetry(transientError, 30); + ASSERT_LE(delay, 20000); +} + +// SEP Test Case 8 +TEST_F(NewRetriesStrategyTest, QuotaRecoveryOnSuccess) +{ + MockStandardRetryStrategy retryStrategy; + AWSError transientError(CoreErrors::NETWORK_CONNECTION, true); + + std::shared_ptr httpRequest = CreateHttpRequest(URI("http://www.uri.com"), HttpMethod::HTTP_GET, Aws::Utils::Stream::DefaultResponseStreamFactoryMethod); + std::shared_ptr httpResponse = Aws::MakeShared(ALLOCATION_TAG, httpRequest); + HttpResponseOutcome httpResponseOutcome(httpResponse); + + // Retry costs 14, quota = 486 + ASSERT_TRUE(retryStrategy.ShouldRetry(transientError, 0)); + ASSERT_EQ(486, retryStrategy.GetRetryQuotaContainer()->GetRetryQuota()); + + // Success releases 14, quota = 500 + retryStrategy.RequestBookkeeping(httpResponseOutcome, transientError); + ASSERT_EQ(500, retryStrategy.GetRetryQuotaContainer()->GetRetryQuota()); +} + +// SEP Test Case 9 +TEST_F(NewRetriesStrategyTest, ThrottlingRetryCost) +{ + MockStandardRetryStrategy retryStrategy; + AWSError throttlingError(CoreErrors::THROTTLING, RetryableType::RETRYABLE_THROTTLING); + + ASSERT_EQ(500, retryStrategy.GetRetryQuotaContainer()->GetRetryQuota()); + + ASSERT_TRUE(retryStrategy.ShouldRetry(throttlingError, 0)); + ASSERT_EQ(495, retryStrategy.GetRetryQuotaContainer()->GetRetryQuota()); +} + +// SEP Test Case 10 +TEST_F(NewRetriesStrategyTest, ExponentialBackoffThrottling) +{ + MockStandardRetryStrategy retryStrategy; + AWSError throttlingError(CoreErrors::THROTTLING, RetryableType::RETRYABLE_THROTTLING); + + // Backoff with 1000ms base: [0, 1000ms] at i=0, [0, 2000ms] at i=1 + long delay = retryStrategy.CalculateDelayBeforeNextRetry(throttlingError, 0); + ASSERT_GE(delay, 0); + ASSERT_LE(delay, 1000); + + delay = retryStrategy.CalculateDelayBeforeNextRetry(throttlingError, 1); + ASSERT_GE(delay, 0); + ASSERT_LE(delay, 2000); +} + +// SEP Test Case 17 +TEST_F(NewRetriesStrategyTest, RetryAfterHeaderHonored) +{ + MockStandardRetryStrategy retryStrategy; + AWSError error(CoreErrors::NETWORK_CONNECTION, true); + HeaderValueCollection headers; + headers["x-amz-retry-after"] = "1500"; + error.SetResponseHeaders(headers); + + long delay = retryStrategy.CalculateDelayBeforeNextRetry(error, 0); + ASSERT_GE(delay, 0); + ASSERT_LE(delay, 5050); +} + +// SEP Test Case 18 +TEST_F(NewRetriesStrategyTest, RetryAfterFloorClamped) +{ + MockStandardRetryStrategy retryStrategy; + AWSError error(CoreErrors::NETWORK_CONNECTION, true); + HeaderValueCollection headers; + headers["x-amz-retry-after"] = "0"; + error.SetResponseHeaders(headers); + + long delay = retryStrategy.CalculateDelayBeforeNextRetry(error, 0); + ASSERT_GE(delay, 0); + ASSERT_LE(delay, 50); +} + +// SEP Test Case 19 +TEST_F(NewRetriesStrategyTest, RetryAfterCeilingClamped) +{ + MockStandardRetryStrategy retryStrategy; + AWSError error(CoreErrors::NETWORK_CONNECTION, true); + HeaderValueCollection headers; + headers["x-amz-retry-after"] = "10000"; + error.SetResponseHeaders(headers); + + long delay = retryStrategy.CalculateDelayBeforeNextRetry(error, 0); + ASSERT_GE(delay, 0); + ASSERT_LE(delay, 5050); +} + +// SEP Test Case 20 +TEST_F(NewRetriesStrategyTest, InvalidRetryAfterFallsBack) +{ + MockStandardRetryStrategy retryStrategy; + AWSError error(CoreErrors::NETWORK_CONNECTION, true); + HeaderValueCollection headers; + headers["x-amz-retry-after"] = "abc"; + error.SetResponseHeaders(headers); + + long delay = retryStrategy.CalculateDelayBeforeNextRetry(error, 0); + ASSERT_GE(delay, 0); + ASSERT_LE(delay, 50); +} + +// SEP Test Cases 2, 4, 7 are covered by TestStandardRetryStrategy above (same behavior with/without gate). +// TODO: SEP Test Case 11 (DynamoDB 25ms base) deferred to next PR. +// TODO: SEP Test Cases 12-16 (long-polling) require pipeline integration tests.