From 7058760a0dac38ecfe186045696ab0ee4a664a6e Mon Sep 17 00:00:00 2001 From: jmaeagle99 <44687433+jmaeagle99@users.noreply.github.com> Date: Mon, 13 Apr 2026 13:53:29 -0700 Subject: [PATCH] Require hash algorithm and value on S3 claim payloads --- temporalio/contrib/aws/s3driver/_driver.py | 37 ++++++++++++--------- tests/contrib/aws/s3driver/test_s3driver.py | 11 +++--- 2 files changed, 27 insertions(+), 21 deletions(-) diff --git a/temporalio/contrib/aws/s3driver/_driver.py b/temporalio/contrib/aws/s3driver/_driver.py index 9e68697ac..f784e67d1 100644 --- a/temporalio/contrib/aws/s3driver/_driver.py +++ b/temporalio/contrib/aws/s3driver/_driver.py @@ -188,23 +188,28 @@ async def _download(claim: StorageDriverClaim) -> Payload: f"S3StorageDriver retrieve failed [bucket={bucket}, key={key}]" ) from e - expected_hash = claim.claim_data.get("hash_value") hash_algorithm = claim.claim_data.get("hash_algorithm") - if expected_hash and hash_algorithm: - if hash_algorithm != "sha256": - raise ValueError( - f"S3StorageDriver unsupported hash algorithm " - f"[bucket={bucket}, key={key}]: " - f"expected sha256, got {hash_algorithm}" - ) - actual_hash = hashlib.sha256(payload_bytes).hexdigest().lower() - if actual_hash != expected_hash: - raise ValueError( - f"S3StorageDriver integrity check failed " - f"[bucket={bucket}, key={key}]: " - f"expected {hash_algorithm}:{expected_hash}, " - f"got {hash_algorithm}:{actual_hash}" - ) + expected_hash = claim.claim_data.get("hash_value") + if not hash_algorithm or not expected_hash: + raise ValueError( + f"S3StorageDriver claim is missing required content hash information " + f"[bucket={bucket}, key={key}]: " + f"claim_data must contain 'hash_algorithm' and 'hash_value'" + ) + if hash_algorithm != "sha256": + raise ValueError( + f"S3StorageDriver unsupported hash algorithm " + f"[bucket={bucket}, key={key}]: " + f"expected sha256, got {hash_algorithm}" + ) + actual_hash = hashlib.sha256(payload_bytes).hexdigest().lower() + if actual_hash != expected_hash: + raise ValueError( + f"S3StorageDriver integrity check failed " + f"[bucket={bucket}, key={key}]: " + f"expected {hash_algorithm}:{expected_hash}, " + f"got {hash_algorithm}:{actual_hash}" + ) payload = Payload() payload.ParseFromString(payload_bytes) diff --git a/tests/contrib/aws/s3driver/test_s3driver.py b/tests/contrib/aws/s3driver/test_s3driver.py index c389fe07c..ac11158fa 100644 --- a/tests/contrib/aws/s3driver/test_s3driver.py +++ b/tests/contrib/aws/s3driver/test_s3driver.py @@ -489,7 +489,7 @@ async def test_retrieve_rejects_unsupported_hash_algorithm( async def test_retrieve_without_hash_in_claim( self, driver_client: S3StorageDriverClient ) -> None: - """Claims without hash fields still retrieve successfully (backward compat).""" + """Claims missing content hash fields raise ValueError on retrieve.""" driver = S3StorageDriver(client=driver_client, bucket=BUCKET) payload = make_payload("no-hash-claim") [claim] = await driver.store(make_store_context(), [payload]) @@ -500,10 +500,11 @@ async def test_retrieve_without_hash_in_claim( "key": claim.claim_data["key"], }, ) - [retrieved] = await driver.retrieve( - StorageDriverRetrieveContext(), [legacy_claim] - ) - assert retrieved == payload + with pytest.raises( + ValueError, + match=r"S3StorageDriver claim is missing required content hash information", + ): + await driver.retrieve(StorageDriverRetrieveContext(), [legacy_claim]) # ---------------------------------------------------------------------------