diff --git a/changelog/+idempotent-file-ops-unsaved-node.fixed.md b/changelog/+idempotent-file-ops-unsaved-node.fixed.md new file mode 100644 index 00000000..2e7ccecf --- /dev/null +++ b/changelog/+idempotent-file-ops-unsaved-node.fixed.md @@ -0,0 +1 @@ +Fixed `InfrahubNode.download_file(skip_if_unchanged=True)` silently returning `0` bytes-written on nodes that had never been saved. Previously, if the local file at `dest` happened to have the same SHA-1 as whatever was cached on the unsaved node's `checksum.value`, callers got back a success-looking `0` result instead of a clear failure — which could mask the fact that the node had no server counterpart at all. Calling `download_file` on an unsaved node now raises `ValueError` consistently, whether or not `skip_if_unchanged` is set. diff --git a/changelog/+idempotent-file-ops.added.md b/changelog/+idempotent-file-ops.added.md new file mode 100644 index 00000000..0ed3091a --- /dev/null +++ b/changelog/+idempotent-file-ops.added.md @@ -0,0 +1,7 @@ +Added SHA-1 idempotency primitives for `CoreFileObject` nodes: + +- `InfrahubNode.matches_local_checksum(source)` / sync variant — compare a local `bytes | Path | BinaryIO` source against the node's server-stored checksum without invoking a transfer. +- `InfrahubNode.upload_if_changed(source, name=None)` / sync variant — stage + save only when the local source differs from the server, returning an `UploadResult(uploaded, checksum)` dataclass. +- `download_file(..., skip_if_unchanged=True)` — short-circuit the download when `dest` already exists on disk with a matching SHA-1. Returns `0` bytes written when skipped. + +A shared `sha1_of_source` helper (streaming, 64 KiB chunks) centralises the hashing convention in `infrahub_sdk.file_handler`. diff --git a/infrahub_sdk/file_handler.py b/infrahub_sdk/file_handler.py index 5d32441a..b5a557b9 100644 --- a/infrahub_sdk/file_handler.py +++ b/infrahub_sdk/file_handler.py @@ -1,5 +1,6 @@ from __future__ import annotations +import hashlib from dataclasses import dataclass from io import BytesIO from pathlib import Path @@ -13,6 +14,52 @@ if TYPE_CHECKING: from .client import InfrahubClient, InfrahubClientSync +_SHA1_CHUNK_BYTES = 64 * 1024 + + +def sha1_of_source(source: bytes | Path | BinaryIO) -> str: + """Compute the SHA-1 hex digest of an upload/download source. + + Accepts the same shapes as :meth:`FileHandlerBase.prepare_upload` so + callers can compare local content against a server-stored checksum + without materialising the full file in memory. + + Args: + source: The content to hash. ``bytes`` are hashed in one shot. + A ``Path`` is read in 64 KiB chunks. A ``BinaryIO`` is read + from its current position, then rewound so downstream + callers can re-read it. + + Returns: + Lowercase SHA-1 hex digest, matching the algorithm Infrahub + stores in ``CoreFileObject.checksum``. + + Raises: + TypeError: If ``source`` is not one of the supported types. + """ + hasher = hashlib.sha1(usedforsecurity=False) + + if isinstance(source, bytes): + hasher.update(source) + return hasher.hexdigest() + + if isinstance(source, Path): + with source.open("rb") as fh: + while chunk := fh.read(_SHA1_CHUNK_BYTES): + hasher.update(chunk) + return hasher.hexdigest() + + if hasattr(source, "read") and hasattr(source, "seek"): + start = source.tell() + try: + while chunk := source.read(_SHA1_CHUNK_BYTES): + hasher.update(chunk) + finally: + source.seek(start) + return hasher.hexdigest() + + raise TypeError(f"sha1_of_source expects bytes, Path, or BinaryIO; got {type(source).__name__}") + @dataclass class PreparedFile: diff --git a/infrahub_sdk/node/__init__.py b/infrahub_sdk/node/__init__.py index 2a1c39e5..136100e8 100644 --- a/infrahub_sdk/node/__init__.py +++ b/infrahub_sdk/node/__init__.py @@ -7,11 +7,13 @@ ARTIFACT_GENERATE_FEATURE_NOT_SUPPORTED_MESSAGE, HFID_STR_SEPARATOR, IP_TYPES, + MATCHES_LOCAL_CHECKSUM_FEATURE_NOT_SUPPORTED_MESSAGE, PROPERTIES_FLAG, PROPERTIES_OBJECT, SAFE_VALUE, + UPLOAD_IF_CHANGED_FEATURE_NOT_SUPPORTED_MESSAGE, ) -from .node import InfrahubNode, InfrahubNodeBase, InfrahubNodeSync +from .node import InfrahubNode, InfrahubNodeBase, InfrahubNodeSync, UploadResult from .parsers import parse_human_friendly_id from .property import NodeProperty from .related_node import RelatedNode, RelatedNodeBase, RelatedNodeSync @@ -23,9 +25,11 @@ "ARTIFACT_GENERATE_FEATURE_NOT_SUPPORTED_MESSAGE", "HFID_STR_SEPARATOR", "IP_TYPES", + "MATCHES_LOCAL_CHECKSUM_FEATURE_NOT_SUPPORTED_MESSAGE", "PROPERTIES_FLAG", "PROPERTIES_OBJECT", "SAFE_VALUE", + "UPLOAD_IF_CHANGED_FEATURE_NOT_SUPPORTED_MESSAGE", "Attribute", "InfrahubNode", "InfrahubNodeBase", @@ -37,5 +41,6 @@ "RelationshipManager", "RelationshipManagerBase", "RelationshipManagerSync", + "UploadResult", "parse_human_friendly_id", ] diff --git a/infrahub_sdk/node/constants.py b/infrahub_sdk/node/constants.py index 7a0bc6fd..6a56584e 100644 --- a/infrahub_sdk/node/constants.py +++ b/infrahub_sdk/node/constants.py @@ -30,6 +30,12 @@ FILE_DOWNLOAD_FEATURE_NOT_SUPPORTED_MESSAGE = ( "calling download_file is only supported for nodes that inherit from CoreFileObject" ) +MATCHES_LOCAL_CHECKSUM_FEATURE_NOT_SUPPORTED_MESSAGE = ( + "calling matches_local_checksum is only supported for nodes that inherit from CoreFileObject" +) +UPLOAD_IF_CHANGED_FEATURE_NOT_SUPPORTED_MESSAGE = ( + "calling upload_if_changed is only supported for nodes that inherit from CoreFileObject" +) HIERARCHY_FETCH_FEATURE_NOT_SUPPORTED_MESSAGE = "Hierarchical fields are not supported for this node." diff --git a/infrahub_sdk/node/node.py b/infrahub_sdk/node/node.py index a47209dc..ddbc6498 100644 --- a/infrahub_sdk/node/node.py +++ b/infrahub_sdk/node/node.py @@ -2,12 +2,13 @@ from collections.abc import Iterable from copy import copy, deepcopy +from dataclasses import dataclass from pathlib import Path -from typing import TYPE_CHECKING, Any, BinaryIO +from typing import TYPE_CHECKING, Any, BinaryIO, overload from ..constants import InfrahubClientMode from ..exceptions import FeatureNotSupportedError, NodeNotFoundError, ResourceNotDefinedError, SchemaNotFoundError -from ..file_handler import FileHandler, FileHandlerBase, FileHandlerSync, PreparedFile +from ..file_handler import FileHandler, FileHandlerBase, FileHandlerSync, PreparedFile, sha1_of_source from ..graphql import Mutation, Query from ..schema import ( GenericSchemaAPI, @@ -24,7 +25,9 @@ ARTIFACT_FETCH_FEATURE_NOT_SUPPORTED_MESSAGE, ARTIFACT_GENERATE_FEATURE_NOT_SUPPORTED_MESSAGE, FILE_DOWNLOAD_FEATURE_NOT_SUPPORTED_MESSAGE, + MATCHES_LOCAL_CHECKSUM_FEATURE_NOT_SUPPORTED_MESSAGE, PROPERTIES_OBJECT, + UPLOAD_IF_CHANGED_FEATURE_NOT_SUPPORTED_MESSAGE, ) from .metadata import NodeMetadata from .related_node import RelatedNode, RelatedNodeBase, RelatedNodeSync @@ -39,6 +42,32 @@ from ..types import Order +@dataclass(frozen=True) +class UploadResult: + """Outcome of an idempotent upload attempt. + + Returned by :meth:`InfrahubNode.upload_if_changed` and its sync twin. + ``was_uploaded`` tells the caller whether a network transfer actually + happened; ``checksum`` carries the SHA-1 of the content held on the + server after the operation — on skip paths that is the server's + pre-existing value, on upload paths it is the locally-computed SHA-1 + used as a proxy (which matches what a standard CoreFileObject server + stores, since the server computes SHA-1 of received bytes). ``None`` + only when no server checksum was available (either the node was + unsaved and nothing was transferred, or the save returned no checksum + value). + + The comparison used by ``upload_if_changed`` reads the node's + ``checksum`` attribute, which was populated when the node was + fetched via ``client.get(...)``. A server-side change to the file + between the fetch and the call will not be detected unless the + caller re-fetches the node first. + """ + + was_uploaded: bool + checksum: str | None + + class InfrahubNodeBase: """Base class for InfrahubNode and InfrahubNodeSync""" @@ -772,7 +801,17 @@ async def artifact_fetch(self, name: str) -> str | dict[str, Any]: artifact = await self._client.get(kind="CoreArtifact", name__value=name, object__ids=[self.id]) return await self._client.object_store.get(identifier=artifact._get_attribute(name="storage_id").value) - async def download_file(self, dest: Path | None = None) -> bytes | int: + @overload + async def download_file(self, dest: None = None, skip_if_unchanged: bool = ...) -> bytes: ... + + @overload + async def download_file(self, dest: Path, skip_if_unchanged: bool = ...) -> int: ... + + async def download_file( + self, + dest: Path | None = None, + skip_if_unchanged: bool = False, + ) -> bytes | int: """Download the file content from this FileObject node. This method is only available for nodes that inherit from CoreFileObject. @@ -783,14 +822,24 @@ async def download_file(self, dest: Path | None = None) -> bytes | int: directly to this path (memory-efficient for large files) and the number of bytes written will be returned. If not provided, the file content will be returned as bytes. + skip_if_unchanged: When ``True``, compute the SHA-1 of the file at + ``dest`` (which must be provided) and compare against the + node's ``checksum`` attribute. If they match, return ``0`` + without hitting the network. The ``checksum`` is the value + loaded when this node was fetched — a later server-side + change to the file will not be detected unless the caller + re-fetches the node first. Returns: If ``dest`` is None: The file content as bytes. If ``dest`` is provided: The number of bytes written to the file. + If ``skip_if_unchanged=True`` and the local file matches the server + checksum: ``0``. Raises: FeatureNotSupportedError: If this node doesn't inherit from CoreFileObject. - ValueError: If the node hasn't been saved yet or file not found. + ValueError: If the node hasn't been saved yet, file not found, or + ``skip_if_unchanged=True`` was passed without a ``dest``. AuthenticationError: If authentication fails. Examples: @@ -799,14 +848,124 @@ async def download_file(self, dest: Path | None = None) -> bytes | int: >>> # Stream to file (memory-efficient for large files) >>> bytes_written = await contract.download_file(dest=Path("/tmp/contract.pdf")) + + >>> # Skip download if local file already matches server checksum + >>> bytes_written = await contract.download_file( + ... dest=Path("/tmp/contract.pdf"), skip_if_unchanged=True + ... ) """ self._validate_file_object_support(message=FILE_DOWNLOAD_FEATURE_NOT_SUPPORTED_MESSAGE) if not self.id: raise ValueError("Cannot download file for a node that hasn't been saved yet.") + if skip_if_unchanged: + if dest is None: + raise ValueError("skip_if_unchanged requires dest to be provided") + if dest.exists() and dest.is_file(): + server_checksum = self.checksum # type: ignore[attr-defined] + if server_checksum.value is not None and sha1_of_source(dest) == server_checksum.value: # type: ignore[union-attr] + return 0 + return await self._file_handler.download(node_id=self.id, branch=self._branch, dest=dest) + async def matches_local_checksum(self, source: bytes | Path | BinaryIO) -> bool: + """Return True if ``source``'s SHA-1 matches this node's server checksum. + + Only available for nodes inheriting from ``CoreFileObject``. Callers + that want to branch on the comparison without invoking a transfer + should use this primitive instead of reading ``node.checksum.value`` + and hashing ``source`` themselves, so the hashing convention stays + centralised in the SDK. + + The comparison is against the ``checksum`` attribute as loaded + when this node was retrieved from the server. If the server's + file has been replaced since the node was fetched, this method + will not see that change — re-fetch the node to refresh the + checksum before comparing. + + Args: + source: Local content to hash and compare. Accepts the same + shapes as :func:`infrahub_sdk.file_handler.sha1_of_source`. + + Returns: + True if the local digest equals the server's stored checksum. + + Raises: + FeatureNotSupportedError: Node is not a ``CoreFileObject``. + ValueError: Node has no server-side checksum yet (unsaved or + file never attached). + """ + self._validate_file_object_support(message=MATCHES_LOCAL_CHECKSUM_FEATURE_NOT_SUPPORTED_MESSAGE) + + server_checksum = self.checksum # type: ignore[attr-defined] + if server_checksum.value is None: # type: ignore[union-attr] + raise ValueError( + f"{self._schema.kind} node has no server-side checksum; " + "ensure the node has been saved with file content attached before comparing." + ) + + return sha1_of_source(source) == server_checksum.value # type: ignore[union-attr] + + async def upload_if_changed( + self, + source: bytes | Path | BinaryIO, + name: str | None = None, + ) -> UploadResult: + """Upload ``source`` only if its SHA-1 differs from the server checksum. + + Composes :meth:`matches_local_checksum` with :meth:`upload_from_path` + (or :meth:`upload_from_bytes`) and :meth:`save`. For unsaved nodes or + nodes that have no prior server-side file, the upload is always + performed — there is nothing to compare against. + + Args: + source: Content to upload. ``bytes`` and ``BinaryIO`` sources + must supply ``name``; for a ``Path`` the filename is derived + from ``source.name`` when ``name`` is omitted. + name: Filename to use on the server. Required for ``bytes`` / + ``BinaryIO`` sources. + + Returns: + :class:`UploadResult` with ``was_uploaded=False`` (skipped) or + ``was_uploaded=True`` (transfer occurred), and the resulting server + checksum (``None`` only when no server checksum was available + after the operation). + + Raises: + FeatureNotSupportedError: Node is not a ``CoreFileObject``. + ValueError: ``source`` is ``bytes`` or ``BinaryIO`` and no + ``name`` was supplied. + """ + self._validate_file_object_support(message=UPLOAD_IF_CHANGED_FEATURE_NOT_SUPPORTED_MESSAGE) + + resolved_name: str | None = name + if resolved_name is None and isinstance(source, Path): + resolved_name = source.name + if not isinstance(source, Path) and resolved_name is None: + raise ValueError("name is required when source is bytes or BinaryIO") + + # Short-circuit only if we have a server checksum to compare against. + server_checksum = getattr(self, "checksum", None) + have_server_state = bool(self.id) and server_checksum is not None and server_checksum.value is not None + + # Compute digest before staging — source may only be readable once. + local_digest = sha1_of_source(source) + + if have_server_state and local_digest == server_checksum.value: # type: ignore[union-attr] + return UploadResult(was_uploaded=False, checksum=server_checksum.value) # type: ignore[union-attr] + + # Either no server state, or checksum mismatched — stage + save. + if isinstance(source, Path): + self.upload_from_path(path=source) + else: + assert resolved_name is not None # validated above for non-Path sources # noqa: S101 + self.upload_from_bytes(content=source, name=resolved_name) + + await self.save() + + return UploadResult(was_uploaded=True, checksum=local_digest) + async def delete(self, timeout: int | None = None, request_context: RequestContext | None = None) -> None: input_data = {"data": {"id": self.id}} if context_data := self._get_request_context(request_context=request_context): @@ -1656,7 +1815,17 @@ def artifact_fetch(self, name: str) -> str | dict[str, Any]: artifact = self._client.get(kind="CoreArtifact", name__value=name, object__ids=[self.id]) return self._client.object_store.get(identifier=artifact._get_attribute(name="storage_id").value) - def download_file(self, dest: Path | None = None) -> bytes | int: + @overload + def download_file(self, dest: None = None, skip_if_unchanged: bool = ...) -> bytes: ... + + @overload + def download_file(self, dest: Path, skip_if_unchanged: bool = ...) -> int: ... + + def download_file( + self, + dest: Path | None = None, + skip_if_unchanged: bool = False, + ) -> bytes | int: """Download the file content from this FileObject node. This method is only available for nodes that inherit from CoreFileObject. @@ -1667,14 +1836,24 @@ def download_file(self, dest: Path | None = None) -> bytes | int: directly to this path (memory-efficient for large files) and the number of bytes written will be returned. If not provided, the file content will be returned as bytes. + skip_if_unchanged: When ``True``, compute the SHA-1 of the file at + ``dest`` (which must be provided) and compare against the + node's ``checksum`` attribute. If they match, return ``0`` + without hitting the network. The ``checksum`` is the value + loaded when this node was fetched — a later server-side + change to the file will not be detected unless the caller + re-fetches the node first. Returns: If ``dest`` is None: The file content as bytes. If ``dest`` is provided: The number of bytes written to the file. + If ``skip_if_unchanged=True`` and the local file matches the server + checksum: ``0``. Raises: FeatureNotSupportedError: If this node doesn't inherit from CoreFileObject. - ValueError: If the node hasn't been saved yet or file not found. + ValueError: If the node hasn't been saved yet, file not found, or + ``skip_if_unchanged=True`` was passed without a ``dest``. AuthenticationError: If authentication fails. Examples: @@ -1683,14 +1862,105 @@ def download_file(self, dest: Path | None = None) -> bytes | int: >>> # Stream to file (memory-efficient for large files) >>> bytes_written = contract.download_file(dest=Path("/tmp/contract.pdf")) + + >>> # Skip download if local file already matches server checksum + >>> bytes_written = contract.download_file( + ... dest=Path("/tmp/contract.pdf"), skip_if_unchanged=True + ... ) """ self._validate_file_object_support(message=FILE_DOWNLOAD_FEATURE_NOT_SUPPORTED_MESSAGE) if not self.id: raise ValueError("Cannot download file for a node that hasn't been saved yet.") + if skip_if_unchanged: + if dest is None: + raise ValueError("skip_if_unchanged requires dest to be provided") + if dest.exists() and dest.is_file(): + server_checksum = self.checksum # type: ignore[attr-defined] + if server_checksum.value is not None and sha1_of_source(dest) == server_checksum.value: # type: ignore[union-attr] + return 0 + return self._file_handler.download(node_id=self.id, branch=self._branch, dest=dest) + def matches_local_checksum(self, source: bytes | Path | BinaryIO) -> bool: + """Return True if ``source``'s SHA-1 matches this node's server checksum. + + Sync equivalent of :meth:`InfrahubNode.matches_local_checksum`. See + that method for full documentation. + + Args: + source: Local content to hash and compare. Accepts the same + shapes as :func:`infrahub_sdk.file_handler.sha1_of_source`. + + Returns: + True if the local digest equals the server's stored checksum. + + Raises: + FeatureNotSupportedError: Node is not a ``CoreFileObject``. + ValueError: Node has no server-side checksum yet. + """ + self._validate_file_object_support(message=MATCHES_LOCAL_CHECKSUM_FEATURE_NOT_SUPPORTED_MESSAGE) + + server_checksum = self.checksum # type: ignore[attr-defined] + if server_checksum.value is None: # type: ignore[union-attr] + raise ValueError( + f"{self._schema.kind} node has no server-side checksum; " + "ensure the node has been saved with file content attached before comparing." + ) + + return sha1_of_source(source) == server_checksum.value # type: ignore[union-attr] + + def upload_if_changed( + self, + source: bytes | Path | BinaryIO, + name: str | None = None, + ) -> UploadResult: + """Upload ``source`` only if its SHA-1 differs from the server checksum. + + Sync equivalent of :meth:`InfrahubNode.upload_if_changed`. See that + method for full documentation. + + Args: + source: Content to upload. + name: Filename to use on the server. + + Returns: + :class:`UploadResult`. + + Raises: + FeatureNotSupportedError: Node is not a ``CoreFileObject``. + ValueError: Bytes/BinaryIO source without ``name``. + """ + self._validate_file_object_support(message=UPLOAD_IF_CHANGED_FEATURE_NOT_SUPPORTED_MESSAGE) + + resolved_name: str | None = name + if resolved_name is None and isinstance(source, Path): + resolved_name = source.name + if not isinstance(source, Path) and resolved_name is None: + raise ValueError("name is required when source is bytes or BinaryIO") + + # Short-circuit only if we have a server checksum to compare against. + server_checksum = getattr(self, "checksum", None) + have_server_state = bool(self.id) and server_checksum is not None and server_checksum.value is not None + + # Compute digest before staging — source may only be readable once. + local_digest = sha1_of_source(source) + + if have_server_state and local_digest == server_checksum.value: # type: ignore[union-attr] + return UploadResult(was_uploaded=False, checksum=server_checksum.value) # type: ignore[union-attr] + + # Either no server state, or checksum mismatched — stage + save. + if isinstance(source, Path): + self.upload_from_path(path=source) + else: + assert resolved_name is not None # validated above for non-Path sources # noqa: S101 + self.upload_from_bytes(content=source, name=resolved_name) + + self.save() + + return UploadResult(was_uploaded=True, checksum=local_digest) + def delete(self, timeout: int | None = None, request_context: RequestContext | None = None) -> None: input_data = {"data": {"id": self.id}} if context_data := self._get_request_context(request_context=request_context): diff --git a/tests/unit/sdk/test_file_handler.py b/tests/unit/sdk/test_file_handler.py index ae59c842..f8ffacfa 100644 --- a/tests/unit/sdk/test_file_handler.py +++ b/tests/unit/sdk/test_file_handler.py @@ -1,5 +1,6 @@ from __future__ import annotations +import hashlib import tempfile from io import BytesIO from pathlib import Path @@ -10,7 +11,7 @@ import pytest from infrahub_sdk.exceptions import AuthenticationError, NodeNotFoundError -from infrahub_sdk.file_handler import FileHandler, FileHandlerBase, FileHandlerSync, PreparedFile +from infrahub_sdk.file_handler import FileHandler, FileHandlerBase, FileHandlerSync, PreparedFile, sha1_of_source if TYPE_CHECKING: from pytest_httpx import HTTPXMock @@ -303,3 +304,47 @@ async def test_file_handler_build_url_without_branch(client_type: str, clients: url = handler._build_url(node_id="node-456", branch=None) assert url == "http://mock/api/storage/files/node-456" + + +KNOWN_CONTENT = b"hello infrahub" +KNOWN_SHA1 = hashlib.sha1(KNOWN_CONTENT, usedforsecurity=False).hexdigest() + + +class TestSha1OfSource: + def test_bytes_matches_known_digest(self) -> None: + assert sha1_of_source(KNOWN_CONTENT) == KNOWN_SHA1 + + def test_path_matches_known_digest(self, tmp_path: Path) -> None: + target = tmp_path / "sample.bin" + target.write_bytes(KNOWN_CONTENT) + assert sha1_of_source(target) == KNOWN_SHA1 + + def test_binaryio_matches_known_digest(self) -> None: + stream = BytesIO(KNOWN_CONTENT) + assert sha1_of_source(stream) == KNOWN_SHA1 + + def test_binaryio_resets_position(self) -> None: + stream = BytesIO(KNOWN_CONTENT) + sha1_of_source(stream) + # Hashing must not consume the stream — later callers (upload_from_bytes) + # still need to read it. + assert stream.read() == KNOWN_CONTENT + + def test_large_file_streams_without_full_read(self, tmp_path: Path) -> None: + # 2 MiB — bigger than the 64 KiB chunk to exercise the streaming loop. + payload = b"x" * (2 * 1024 * 1024) + target = tmp_path / "big.bin" + target.write_bytes(payload) + expected = hashlib.sha1(payload, usedforsecurity=False).hexdigest() + assert sha1_of_source(target) == expected + + def test_rejects_none(self) -> None: + with pytest.raises(TypeError): + sha1_of_source(None) # type: ignore[arg-type] + + def test_binaryio_resets_to_original_position_not_start(self) -> None: + stream = BytesIO(b"prefixhello") + stream.read(6) # advance to position 6, so only b"hello" remains + digest = sha1_of_source(stream) + assert digest == hashlib.sha1(b"hello", usedforsecurity=False).hexdigest() + assert stream.tell() == 6 # rewound to the original non-zero position, not 0 diff --git a/tests/unit/sdk/test_file_object.py b/tests/unit/sdk/test_file_object.py index f6267003..c2c4cec3 100644 --- a/tests/unit/sdk/test_file_object.py +++ b/tests/unit/sdk/test_file_object.py @@ -1,3 +1,4 @@ +import hashlib import tempfile from pathlib import Path @@ -6,7 +7,7 @@ from pytest_httpx import HTTPXMock from infrahub_sdk.exceptions import FeatureNotSupportedError -from infrahub_sdk.node import InfrahubNode, InfrahubNodeSync +from infrahub_sdk.node import InfrahubNode, InfrahubNodeSync, UploadResult from infrahub_sdk.schema import NodeSchemaAPI from tests.unit.sdk.conftest import BothClients @@ -293,3 +294,456 @@ async def test_node_download_file_unsaved_node_raises( node = InfrahubNodeSync(client=client, schema=file_object_schema, branch="main") with pytest.raises(ValueError, match=r"Cannot download file for a node that hasn't been saved yet"): node.download_file() + + +class TestUploadResult: + def test_carries_was_uploaded_and_checksum(self) -> None: + result = UploadResult(was_uploaded=True, checksum="abc123") + assert result.was_uploaded is True + assert result.checksum == "abc123" + + def test_checksum_optional(self) -> None: + result = UploadResult(was_uploaded=False, checksum=None) + assert result.checksum is None + + +@pytest.mark.parametrize("client_type", client_types) +class TestMatchesLocalChecksum: + async def test_bytes_match(self, client_type: str, clients: BothClients, file_object_schema: NodeSchemaAPI) -> None: + payload = b"matching content" + digest = hashlib.sha1(payload, usedforsecurity=False).hexdigest() + + client = getattr(clients, client_type) + if client_type == "standard": + node = InfrahubNode(client=client, schema=file_object_schema, branch="main") + else: + node = InfrahubNodeSync(client=client, schema=file_object_schema, branch="main") + node.id = "node-1" + node.checksum.value = digest # type: ignore[attr-defined] + + if isinstance(node, InfrahubNode): + assert await node.matches_local_checksum(payload) is True + else: + assert node.matches_local_checksum(payload) is True + + async def test_bytes_differ( + self, client_type: str, clients: BothClients, file_object_schema: NodeSchemaAPI + ) -> None: + client = getattr(clients, client_type) + if client_type == "standard": + node = InfrahubNode(client=client, schema=file_object_schema, branch="main") + else: + node = InfrahubNodeSync(client=client, schema=file_object_schema, branch="main") + node.id = "node-1" + node.checksum.value = "different-digest" # type: ignore[attr-defined] + + if isinstance(node, InfrahubNode): + assert await node.matches_local_checksum(b"hello world") is False + else: + assert node.matches_local_checksum(b"hello world") is False + + async def test_path_source( + self, + client_type: str, + clients: BothClients, + file_object_schema: NodeSchemaAPI, + tmp_path: Path, + ) -> None: + payload = b"file on disk" + target = tmp_path / "f.bin" + target.write_bytes(payload) + digest = hashlib.sha1(payload, usedforsecurity=False).hexdigest() + + client = getattr(clients, client_type) + if client_type == "standard": + node = InfrahubNode(client=client, schema=file_object_schema, branch="main") + else: + node = InfrahubNodeSync(client=client, schema=file_object_schema, branch="main") + node.id = "node-1" + node.checksum.value = digest # type: ignore[attr-defined] + + if isinstance(node, InfrahubNode): + assert await node.matches_local_checksum(target) is True + else: + assert node.matches_local_checksum(target) is True + + async def test_raises_for_non_file_object( + self, client_type: str, clients: BothClients, non_file_object_schema: NodeSchemaAPI + ) -> None: + client = getattr(clients, client_type) + if client_type == "standard": + node = InfrahubNode(client=client, schema=non_file_object_schema, branch="main") + else: + node = InfrahubNodeSync(client=client, schema=non_file_object_schema, branch="main") + + if isinstance(node, InfrahubNode): + with pytest.raises( + FeatureNotSupportedError, + match=r"calling matches_local_checksum is only supported", + ): + await node.matches_local_checksum(b"anything") + else: + with pytest.raises( + FeatureNotSupportedError, + match=r"calling matches_local_checksum is only supported", + ): + node.matches_local_checksum(b"anything") + + async def test_raises_when_no_server_checksum( + self, client_type: str, clients: BothClients, file_object_schema: NodeSchemaAPI + ) -> None: + client = getattr(clients, client_type) + if client_type == "standard": + node = InfrahubNode(client=client, schema=file_object_schema, branch="main") + else: + node = InfrahubNodeSync(client=client, schema=file_object_schema, branch="main") + node.id = "node-1" + # Do NOT set node.checksum.value — default is None. + + if isinstance(node, InfrahubNode): + with pytest.raises(ValueError, match=r"has no server-side checksum"): + await node.matches_local_checksum(b"anything") + else: + with pytest.raises(ValueError, match=r"has no server-side checksum"): + node.matches_local_checksum(b"anything") + + +@pytest.mark.parametrize("client_type", client_types) +class TestUploadIfChanged: + async def test_skips_when_checksum_matches( + self, + client_type: str, + clients: BothClients, + file_object_schema: NodeSchemaAPI, + httpx_mock: HTTPXMock, + ) -> None: + payload = b"unchanged content" + digest = hashlib.sha1(payload, usedforsecurity=False).hexdigest() + + client = getattr(clients, client_type) + if client_type == "standard": + node = InfrahubNode(client=client, schema=file_object_schema, branch="main") + else: + node = InfrahubNodeSync(client=client, schema=file_object_schema, branch="main") + node.id = "already-on-server" + node._existing = True + node.checksum.value = digest # type: ignore[attr-defined, union-attr] + + if isinstance(node, InfrahubNode): + result = await node.upload_if_changed(source=payload, name="f.bin") + else: + result = node.upload_if_changed(source=payload, name="f.bin") + + assert isinstance(result, UploadResult) + assert result.was_uploaded is False + assert result.checksum == digest + # No HTTP request should have been issued. + assert httpx_mock.get_requests() == [] + + async def test_uploads_when_checksum_differs( + self, + client_type: str, + clients: BothClients, + file_object_schema: NodeSchemaAPI, + mock_node_update_with_file: HTTPXMock, + ) -> None: + new_content = b"new content" + expected_digest = hashlib.sha1(new_content, usedforsecurity=False).hexdigest() + + client = getattr(clients, client_type) + if client_type == "standard": + node = InfrahubNode(client=client, schema=file_object_schema, branch="main") + else: + node = InfrahubNodeSync(client=client, schema=file_object_schema, branch="main") + node.id = "existing-file-node-456" + node._existing = True + node.checksum.value = "old-server-digest" # type: ignore[attr-defined, union-attr] + + if isinstance(node, InfrahubNode): + result = await node.upload_if_changed(source=new_content, name="f.bin") + else: + result = node.upload_if_changed(source=new_content, name="f.bin") + + assert result.was_uploaded is True + # Post-save checksum is the locally computed SHA-1 of the uploaded content. + assert result.checksum == expected_digest + # Positive-path HTTP verification: the update mutation must have been dispatched. + requests = mock_node_update_with_file.get_requests() + assert len(requests) > 0 + # At least one request should be a POST to the GraphQL endpoint (the update mutation). + assert any(r.method == "POST" for r in requests) + + async def test_uploads_when_node_unsaved( + self, + client_type: str, + clients: BothClients, + file_object_schema: NodeSchemaAPI, + mock_node_create_with_file: HTTPXMock, + ) -> None: + client = getattr(clients, client_type) + if client_type == "standard": + node = InfrahubNode(client=client, schema=file_object_schema, branch="main") + else: + node = InfrahubNodeSync(client=client, schema=file_object_schema, branch="main") + # Do NOT set node.id — unsaved. + + if isinstance(node, InfrahubNode): + result = await node.upload_if_changed(source=b"initial content", name=FILE_NAME) + else: + result = node.upload_if_changed(source=b"initial content", name=FILE_NAME) + + assert result.was_uploaded is True + assert result.checksum is not None + + async def test_derives_name_from_path( + self, + client_type: str, + clients: BothClients, + file_object_schema: NodeSchemaAPI, + mock_node_update_with_file: HTTPXMock, + tmp_path: Path, + ) -> None: + target = tmp_path / "derived-name.bin" + target.write_bytes(b"content") + + client = getattr(clients, client_type) + if client_type == "standard": + node = InfrahubNode(client=client, schema=file_object_schema, branch="main") + else: + node = InfrahubNodeSync(client=client, schema=file_object_schema, branch="main") + node.id = "existing-file-node-456" + node._existing = True + node.checksum.value = "old-server-digest" # type: ignore[attr-defined, union-attr] + + # No explicit name — should derive from target.name internally. + if isinstance(node, InfrahubNode): + result = await node.upload_if_changed(source=target) + else: + result = node.upload_if_changed(source=target) + + assert result.was_uploaded is True + + async def test_requires_name_for_bytes( + self, + client_type: str, + clients: BothClients, + file_object_schema: NodeSchemaAPI, + ) -> None: + client = getattr(clients, client_type) + if client_type == "standard": + node = InfrahubNode(client=client, schema=file_object_schema, branch="main") + else: + node = InfrahubNodeSync(client=client, schema=file_object_schema, branch="main") + node.id = "some-id" + node.checksum.value = "x" # type: ignore[attr-defined, union-attr] + + if isinstance(node, InfrahubNode): + with pytest.raises(ValueError, match=r"name is required"): + await node.upload_if_changed(source=b"bytes content") # no name supplied + else: + with pytest.raises(ValueError, match=r"name is required"): + node.upload_if_changed(source=b"bytes content") # no name supplied + + async def test_raises_for_non_file_object( + self, + client_type: str, + clients: BothClients, + non_file_object_schema: NodeSchemaAPI, + ) -> None: + client = getattr(clients, client_type) + if client_type == "standard": + node = InfrahubNode(client=client, schema=non_file_object_schema, branch="main") + else: + node = InfrahubNodeSync(client=client, schema=non_file_object_schema, branch="main") + + if isinstance(node, InfrahubNode): + with pytest.raises( + FeatureNotSupportedError, + match=r"calling upload_if_changed is only supported", + ): + await node.upload_if_changed(source=b"x", name="f.bin") + else: + with pytest.raises( + FeatureNotSupportedError, + match=r"calling upload_if_changed is only supported", + ): + node.upload_if_changed(source=b"x", name="f.bin") + + +@pytest.mark.parametrize("client_type", client_types) +class TestDownloadSkipIfUnchanged: + async def test_skip_when_local_matches( + self, + client_type: str, + clients: BothClients, + file_object_schema: NodeSchemaAPI, + tmp_path: Path, + httpx_mock: HTTPXMock, + ) -> None: + payload = b"identical content" + digest = hashlib.sha1(payload, usedforsecurity=False).hexdigest() + dest = tmp_path / "local.bin" + dest.write_bytes(payload) + + client = getattr(clients, client_type) + if client_type == "standard": + node: InfrahubNode | InfrahubNodeSync = InfrahubNode( + client=client, schema=file_object_schema, branch="main" + ) + else: + node = InfrahubNodeSync(client=client, schema=file_object_schema, branch="main") + node.id = "file-node-skip" + node.checksum.value = digest # type: ignore[attr-defined, union-attr] + + if isinstance(node, InfrahubNode): + bytes_written = await node.download_file(dest=dest, skip_if_unchanged=True) + else: + bytes_written = node.download_file(dest=dest, skip_if_unchanged=True) + + assert bytes_written == 0 + # pytest-httpx raises if any unregistered request is attempted; this also asserts + # that zero requests were made at all. + assert httpx_mock.get_requests() == [] + + async def test_downloads_when_local_differs( + self, + client_type: str, + clients: BothClients, + file_object_schema: NodeSchemaAPI, + tmp_path: Path, + mock_download_file_to_disk: HTTPXMock, # existing fixture + ) -> None: + dest = tmp_path / "local.bin" + dest.write_bytes(b"stale content") # different from FILE_CONTENT + + client = getattr(clients, client_type) + if client_type == "standard": + node: InfrahubNode | InfrahubNodeSync = InfrahubNode( + client=client, schema=file_object_schema, branch="main" + ) + else: + node = InfrahubNodeSync(client=client, schema=file_object_schema, branch="main") + node.id = "file-node-stream" # id matches mock_download_file_to_disk + node.checksum.value = "server-digest-different-from-local" # type: ignore[attr-defined, union-attr] + + if isinstance(node, InfrahubNode): + bytes_written = await node.download_file(dest=dest, skip_if_unchanged=True) + else: + bytes_written = node.download_file(dest=dest, skip_if_unchanged=True) + + assert bytes_written == len(FILE_CONTENT) + assert dest.read_bytes() == FILE_CONTENT + # Positive-path HTTP verification: the GET to the storage endpoint must have fired. + download_requests = [ + r + for r in mock_download_file_to_disk.get_requests() + if r.method == "GET" and "/api/storage/files/" in r.url.path + ] + assert len(download_requests) == 1 + + async def test_downloads_when_dest_missing( + self, + client_type: str, + clients: BothClients, + file_object_schema: NodeSchemaAPI, + tmp_path: Path, + mock_download_file_to_disk: HTTPXMock, + ) -> None: + dest = tmp_path / "missing.bin" # does not exist + assert not dest.exists() + + client = getattr(clients, client_type) + if client_type == "standard": + node: InfrahubNode | InfrahubNodeSync = InfrahubNode( + client=client, schema=file_object_schema, branch="main" + ) + else: + node = InfrahubNodeSync(client=client, schema=file_object_schema, branch="main") + node.id = "file-node-stream" + node.checksum.value = "any-digest" # type: ignore[attr-defined, union-attr] + + if isinstance(node, InfrahubNode): + bytes_written = await node.download_file(dest=dest, skip_if_unchanged=True) + else: + bytes_written = node.download_file(dest=dest, skip_if_unchanged=True) + + assert bytes_written == len(FILE_CONTENT) + assert dest.exists() + + async def test_raises_when_skip_without_dest( + self, + client_type: str, + clients: BothClients, + file_object_schema: NodeSchemaAPI, + ) -> None: + client = getattr(clients, client_type) + if client_type == "standard": + node: InfrahubNode | InfrahubNodeSync = InfrahubNode( + client=client, schema=file_object_schema, branch="main" + ) + else: + node = InfrahubNodeSync(client=client, schema=file_object_schema, branch="main") + node.id = "file-node-1" + node.checksum.value = "any-digest" # type: ignore[attr-defined, union-attr] + + with pytest.raises(ValueError, match=r"skip_if_unchanged requires dest"): + if isinstance(node, InfrahubNode): + await node.download_file(dest=None, skip_if_unchanged=True) + else: + node.download_file(dest=None, skip_if_unchanged=True) + + async def test_default_behavior_unchanged( + self, + client_type: str, + clients: BothClients, + file_object_schema: NodeSchemaAPI, + mock_download_file: HTTPXMock, # existing fixture for in-memory download + ) -> None: + # skip_if_unchanged defaults to False — download always occurs. + client = getattr(clients, client_type) + if client_type == "standard": + node: InfrahubNode | InfrahubNodeSync = InfrahubNode( + client=client, schema=file_object_schema, branch="main" + ) + else: + node = InfrahubNodeSync(client=client, schema=file_object_schema, branch="main") + node.id = "file-node-123" # matches mock_download_file + + if isinstance(node, InfrahubNode): + content = await node.download_file() # no flag + else: + content = node.download_file() # no flag + + assert isinstance(content, bytes) + assert content == FILE_CONTENT + + async def test_skip_raises_for_unsaved_node( + self, + client_type: str, + clients: BothClients, + file_object_schema: NodeSchemaAPI, + tmp_path: Path, + ) -> None: + # Unsaved node (no id) with a dest whose checksum happens to match + # the node's checksum attribute should still raise the unsaved-node + # ValueError, not silently return 0. + payload = b"content" + digest = hashlib.sha1(payload, usedforsecurity=False).hexdigest() + dest = tmp_path / "local.bin" + dest.write_bytes(payload) + + client = getattr(clients, client_type) + if client_type == "standard": + node: InfrahubNode | InfrahubNodeSync = InfrahubNode( + client=client, schema=file_object_schema, branch="main" + ) + else: + node = InfrahubNodeSync(client=client, schema=file_object_schema, branch="main") + # Do NOT set node.id — unsaved. + node.checksum.value = digest # type: ignore[attr-defined, union-attr] + + with pytest.raises(ValueError, match=r"hasn't been saved yet"): + if isinstance(node, InfrahubNode): + await node.download_file(dest=dest, skip_if_unchanged=True) + else: + node.download_file(dest=dest, skip_if_unchanged=True)