diff --git a/charts/model-engine/templates/cacher_deployment.yaml b/charts/model-engine/templates/cacher_deployment.yaml index 0f2db0598..62b5f1d43 100644 --- a/charts/model-engine/templates/cacher_deployment.yaml +++ b/charts/model-engine/templates/cacher_deployment.yaml @@ -48,8 +48,9 @@ spec: readinessProbe: exec: command: - - cat - - /tmp/readyz + - bash + - -c + - test -f /tmp/readyz command: - dumb-init - -- diff --git a/charts/model-engine/templates/endpoint_builder_deployment.yaml b/charts/model-engine/templates/endpoint_builder_deployment.yaml index 7791d4050..bf684c41f 100644 --- a/charts/model-engine/templates/endpoint_builder_deployment.yaml +++ b/charts/model-engine/templates/endpoint_builder_deployment.yaml @@ -49,8 +49,9 @@ spec: readinessProbe: exec: command: - - cat - - /tmp/readyz + - bash + - -c + - test -f /tmp/readyz command: - dumb-init - -- diff --git a/model-engine/Dockerfile b/model-engine/Dockerfile index a6a3bb490..b1a587995 100644 --- a/model-engine/Dockerfile +++ b/model-engine/Dockerfile @@ -1,79 +1,70 @@ -# syntax = docker/dockerfile:experimental +# syntax = docker/dockerfile:1 -# --- Builder: compile C extensions (pycurl, etc.) and install Python packages --- -FROM python:3.13-slim AS builder +FROM cgr.dev/chainguard/python:latest-dev AS builder +USER root WORKDIR /workspace +ARG TARGETARCH -RUN apt-get update && apt-get install -y --no-install-recommends \ +RUN apk add --no-cache \ + bash \ + build-base \ + curl \ + curl-dev \ + dumb-init \ git \ - gcc \ - build-essential \ - libssl-dev \ - libcurl4-openssl-dev \ - && rm -rf /var/lib/apt/lists/* + go \ + openssl-dev \ + rsync -RUN pip install pip==24.2 setuptools -RUN pip install awscli==1.34.28 --no-cache-dir +RUN python -m venv /workspace/venv +ENV PATH="/workspace/venv/bin:/usr/sbin:/usr/bin:/sbin:/bin" -WORKDIR /workspace/model-engine/ -COPY model-engine/requirements-test.txt requirements-test.txt +WORKDIR /workspace/model-engine COPY model-engine/requirements.txt requirements.txt COPY model-engine/requirements_override.txt requirements_override.txt -RUN pip install -r requirements-test.txt --no-cache-dir -RUN pip install -r requirements.txt --no-cache-dir -# NOTE: aioboto3==10.4.0 -> aiobotocore==2.4.2 -> urllib3<1.27, which downgrades urllib3 -# from 2.x back to 1.26.x. CVE-2023-43804, CVE-2023-45803, CVE-2024-37891 remain. -# Fix: upgrade aioboto3 to >=15.x (separate PR — breaking API changes). +RUN pip install --upgrade pip==24.2 setuptools cmake setuptools-rust +RUN pip install -r requirements.txt --no-cache-dir --no-build-isolation RUN pip install -r requirements_override.txt --no-cache-dir COPY model-engine/setup.py setup.py COPY model-engine/model_engine_server model_engine_server +COPY model-engine/service_configs service_configs RUN pip install -e . -# --- Runtime: no build tools (eliminates linux-libc-dev and python3.13 CVEs) --- -FROM python:3.13-slim AS model-engine - -WORKDIR /workspace - -# Runtime-only system deps (vim omitted: multiple unpatched HIGH CVEs in Debian 13.4) -RUN apt-get update && apt-get install -y --no-install-recommends \ - dumb-init \ - git \ - openssh-client \ - curl \ - procps \ - htop \ - libcurl4 \ - && rm -rf /var/lib/apt/lists/* - -# Install aws-iam-authenticator (architecture-aware) -RUN ARCH=$(uname -m) && \ - if [ "$ARCH" = "aarch64" ]; then \ - curl -fLo /bin/aws-iam-authenticator https://github.com/kubernetes-sigs/aws-iam-authenticator/releases/download/v0.7.11/aws-iam-authenticator_0.7.11_linux_arm64; \ - else \ - curl -fLo /bin/aws-iam-authenticator https://github.com/kubernetes-sigs/aws-iam-authenticator/releases/download/v0.7.11/aws-iam-authenticator_0.7.11_linux_amd64; \ - fi && \ - chmod +x /bin/aws-iam-authenticator +RUN mkdir -p /tmp/runtime-bin /tmp/runtime-libs && \ + cp /bin/bash /tmp/runtime-bin/bash && \ + cp /usr/bin/dumb-init /tmp/runtime-bin/dumb-init && \ + cp /usr/bin/git /tmp/runtime-bin/git && \ + cp -R /usr/libexec/git-core /tmp/runtime-bin/git-core && \ + cp /usr/lib/libpcre2-8.so.0* /tmp/runtime-libs/ && \ + cp /usr/lib/libcurl.so.4* /tmp/runtime-libs/ && \ + cp /usr/lib/libreadline.so.8* /tmp/runtime-libs/ && \ + cp /usr/lib/libtinfo.so.6* /tmp/runtime-libs/ && \ + cp /usr/lib/libz.so.1* /tmp/runtime-libs/ && \ + git clone --depth 1 --branch v1.35.3 https://github.com/kubernetes/kubernetes.git /tmp/k8s && \ + cd /tmp/k8s && \ + GOTOOLCHAIN=local KUBE_BUILD_PLATFORMS=linux/${TARGETARCH} make WHAT=cmd/kubectl && \ + cp _output/local/bin/linux/${TARGETARCH}/kubectl /tmp/runtime-bin/kubectl && \ + GOBIN=/tmp/runtime-bin GOOS=linux GOARCH=${TARGETARCH} go install sigs.k8s.io/aws-iam-authenticator/cmd/aws-iam-authenticator@v0.7.11 -# Install kubectl (architecture-aware) -RUN ARCH=$(uname -m | sed 's/x86_64/amd64/' | sed 's/aarch64/arm64/') && \ - curl -fLO "https://dl.k8s.io/release/v1.35.3/bin/linux/${ARCH}/kubectl" && \ - chmod +x kubectl && \ - mv kubectl /usr/local/bin/kubectl +FROM cgr.dev/chainguard/python:latest AS model-engine -# Copy Python packages, entry-point scripts, and source tree from builder -COPY --from=builder /usr/local/lib/python3.13/site-packages /usr/local/lib/python3.13/site-packages -COPY --from=builder /usr/local/bin /usr/local/bin -COPY --from=builder /workspace/model-engine /workspace/model-engine - -RUN useradd --create-home --shell /bin/bash nonroot && \ - chown -R nonroot:nonroot /workspace +USER root +WORKDIR /workspace -COPY integration_tests /workspace/integration_tests +COPY --from=builder --chown=nonroot:nonroot /workspace/venv /workspace/venv +COPY --from=builder --chown=nonroot:nonroot /workspace/model-engine /workspace/model-engine +COPY --from=builder /tmp/runtime-bin/bash /bin/bash +COPY --from=builder /tmp/runtime-bin/dumb-init /usr/bin/dumb-init +COPY --from=builder /tmp/runtime-bin/git /usr/bin/git +COPY --from=builder /tmp/runtime-bin/git-core /usr/libexec/git-core +COPY --from=builder /tmp/runtime-bin/kubectl /usr/local/bin/kubectl +COPY --from=builder /tmp/runtime-bin/aws-iam-authenticator /usr/local/bin/aws-iam-authenticator +COPY --from=builder /tmp/runtime-libs/ /usr/lib/ -WORKDIR /workspace -ENV PYTHONPATH /workspace -ENV WORKSPACE /workspace +ENV PATH="/workspace/venv/bin:/usr/local/bin:/usr/libexec/git-core:/usr/bin:/bin" +ENV PYTHONPATH=/workspace +ENV WORKSPACE=/workspace USER nonroot EXPOSE 5000 diff --git a/model-engine/model_engine_server/common/dtos/llms/vllm.py b/model-engine/model_engine_server/common/dtos/llms/vllm.py index 473af057a..5a85dc30f 100644 --- a/model-engine/model_engine_server/common/dtos/llms/vllm.py +++ b/model-engine/model_engine_server/common/dtos/llms/vllm.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, List, Optional, Union +from typing import Any, Dict, List, Optional, Union, cast from model_engine_server.common.pydantic_types import BaseModel, Field from model_engine_server.common.types.gen.openai import ( @@ -275,7 +275,7 @@ class VLLMSamplingParams(BaseModel): (canonical beam search algorithm).""", ) stop_token_ids: Optional[List[int]] = Field( - default_factory=list, + default_factory=lambda: cast(List[int], []), description="""List of tokens that stop the generation when they are generated. The returned output will contain the stop tokens unless the stop tokens are special tokens.""", diff --git a/model-engine/model_engine_server/core/docker/remote_build.py b/model-engine/model_engine_server/core/docker/remote_build.py index 5b1920648..04d85b63c 100644 --- a/model-engine/model_engine_server/core/docker/remote_build.py +++ b/model-engine/model_engine_server/core/docker/remote_build.py @@ -2,16 +2,18 @@ import os import shutil import subprocess +import tarfile import tempfile import uuid from base64 import b64encode from contextlib import ExitStack from dataclasses import dataclass +from fnmatch import fnmatchcase from pathlib import Path from string import Template -from subprocess import PIPE from typing import Dict, Iterable, List, Optional, Union +import boto3 import click import tenacity import yaml @@ -74,49 +76,102 @@ def zip_context( s3_uri = f"s3://{S3_BUCKET}/{s3_file_name}" print(f"Uploading to s3 at: {s3_uri}") try: - # Need to gimme_okta_aws_creds (you can export AWS_PROFILE='ml-admin' right after) - tar_command = _build_tar_cmd(context, ignore_file, folders_to_include) - print(f"Creating archive: {' '.join(tar_command)}") - - with subprocess.Popen( - tar_command, - stdout=subprocess.PIPE, - stderr=subprocess.DEVNULL, - ) as proc: - assert proc.stdout is not None - with storage_client.open( - s3_uri, - "wb", - ) as out_file: - shutil.copyfileobj(proc.stdout, out_file) + context_path = Path(context).resolve() + ignore_patterns = _read_ignore_patterns(context_path, ignore_file) + archive_roots = [ + _normalize_path_for_archive(context_path, folder)[1] for folder in folders_to_include + ] + with tempfile.NamedTemporaryFile(suffix=".tar.gz") as archive: + print(f"Creating archive: {archive.name}") + with tarfile.open(archive.name, mode="w:gz") as tar: + for folder, archive_root in zip(folders_to_include, archive_roots): + resolved_path, _ = _normalize_path_for_archive(context_path, folder) + nested_archive_roots = [ + root + for root in archive_roots + if root != archive_root and root.startswith(f"{archive_root}/") + ] + tar.add( + resolved_path, + arcname=archive_root, + filter=lambda tar_info, nested_archive_roots=nested_archive_roots: _filter_archive_member( + tar_info, ignore_patterns, nested_archive_roots + ), + ) + + with ( + open(archive.name, "rb") as archive_in, + storage_client.open( + s3_uri, + "wb", + ) as out_file, + ): + shutil.copyfileobj(archive_in, out_file) print("Done uploading!") except (ClientError, ProfileNotFound): print("Did you gimme_okta_aws_creds and then export AWS_PROFILE='ml-admin'? Try doing both") raise -def _build_tar_cmd( - context: str, ignore_file: Optional[str], folders_to_include: List[str] -) -> List[str]: - assert len(folders_to_include) > 0, "Need at least one folder to create a tar archive from!" +def _read_ignore_patterns(context_path: Path, ignore_file: Optional[str]) -> List[str]: + if ignore_file is None: + return [] - tar_command = ["tar", "-C", context] - - if ignore_file is not None: - ignore_file = os.path.join(context, ignore_file) - if not os.path.isfile(ignore_file): - print( - f"WARNING: File {ignore_file} does not exist in calling context, not using any file as a .dockerignore" - ) + ignore_path = context_path / ignore_file + if not ignore_path.is_file(): + print( + f"WARNING: File {ignore_path} does not exist in calling context, not using any file as a .dockerignore" + ) + return [] + + patterns: List[str] = [] + for raw_line in ignore_path.read_text().splitlines(): + line = raw_line.strip() + if not line or line.startswith("#"): + continue + patterns.append(line.removeprefix("./")) + return patterns + + +def _normalize_path_for_archive(context_path: Path, folder_to_include: str) -> tuple[Path, str]: + include_path = Path(folder_to_include) + resolved_path = ( + include_path.resolve() + if include_path.is_absolute() + else (context_path / include_path).resolve() + ) + try: + archive_root = str(resolved_path.relative_to(context_path)) + except ValueError as exc: + raise ValueError( + f"{folder_to_include=} is not contained within context {context_path}" + ) from exc + return resolved_path, archive_root + + +def _filter_archive_member( + tar_info: tarfile.TarInfo, + ignore_patterns: List[str], + nested_archive_roots: Optional[List[str]] = None, +) -> Optional[tarfile.TarInfo]: + normalized_name = tar_info.name.removeprefix("./") + nested_archive_roots = nested_archive_roots or [] + + for nested_root in nested_archive_roots: + if normalized_name == nested_root or normalized_name.startswith(f"{nested_root}/"): + return None + + for pattern in ignore_patterns: + normalized_pattern = pattern.rstrip("/") + if "/" in normalized_pattern: + pattern_matches = fnmatchcase(normalized_name, normalized_pattern) else: - tar_command.append("--exclude-from") - tar_command.append(ignore_file) - - tar_command.append("-cf") - tar_command.append("-") - tar_command.extend(folders_to_include) - - return tar_command + pattern_matches = "/" not in normalized_name and fnmatchcase( + normalized_name, normalized_pattern + ) + if pattern_matches or normalized_name.startswith(f"{normalized_pattern}/"): + return None + return tar_info def start_build_job( @@ -154,18 +209,18 @@ def start_build_job( f = stack.enter_context(tempfile.NamedTemporaryFile("wt", suffix=".yaml")) template_f = stack.enter_context(open(TEMPLATE_FILE, "rt")) - # In Circle CI we need to retrieve the AWS access key to attach to kaniko + # Keep these values available for any template using explicit env creds, but do not + # shell out to the AWS CLI from the endpoint-builder image. aws_access_key_id = "" aws_secret_access_key = "" + aws_session_token = "" if os.getenv("CIRCLECI"): - aws_access_key_id_result = subprocess.run( - ["aws", "configure", "get", "aws_access_key_id"], check=False, stdout=PIPE - ) - aws_access_key_id = aws_access_key_id_result.stdout.decode().strip() - aws_secret_access_key_result = subprocess.run( - ["aws", "configure", "get", "aws_secret_access_key"], check=False, stdout=PIPE - ) - aws_secret_access_key = aws_secret_access_key_result.stdout.decode().strip() + credentials = boto3.Session().get_credentials() + if credentials is not None: + frozen_credentials = credentials.get_frozen_credentials() + aws_access_key_id = frozen_credentials.access_key or "" + aws_secret_access_key = frozen_credentials.secret_key or "" + aws_session_token = frozen_credentials.token or "" job = Template(template_f.read()).substitute( NAME=job_name, CUSTOM_TAGS=json.dumps(custom_tags_serialized), @@ -176,6 +231,7 @@ def start_build_job( CACHE_REPO=f"{infra_config().docker_repo_prefix}/{cache_name}", AWS_ACCESS_KEY_ID=aws_access_key_id, AWS_SECRET_ACCESS_KEY=aws_secret_access_key, + AWS_SESSION_TOKEN=aws_session_token, NAMESPACE=NAMESPACE, ) yml = yaml.safe_load(job) @@ -214,7 +270,13 @@ def start_build_job( pip_conf_base64 = b64encode(pip_conf_data.encode("utf-8")).decode("utf-8") data = {"data": {"codeartifact_pip_conf": pip_conf_base64}} subprocess.check_output( - ["kubectl", "patch", "secret", "codeartifact-pip-conf", f"-p={json.dumps(data)}"] + [ + "kubectl", + "patch", + "secret", + "codeartifact-pip-conf", + f"-p={json.dumps(data)}", + ] ).decode("utf-8") print(f"Executing Kaniko build command:\n{container_spec}") @@ -293,7 +355,13 @@ def build_remote( ignore_file=ignore_file, ) return start_build_job( - s3_file_name, dockerfile, repotags, use_cache, cache_name, build_args, custom_tags + s3_file_name, + dockerfile, + repotags, + use_cache, + cache_name, + build_args, + custom_tags, ) diff --git a/model-engine/model_engine_server/db/migrations/run_database_migration.sh b/model-engine/model_engine_server/db/migrations/run_database_migration.sh index 8b25f20e5..2f4a5e2bd 100755 --- a/model-engine/model_engine_server/db/migrations/run_database_migration.sh +++ b/model-engine/model_engine_server/db/migrations/run_database_migration.sh @@ -1,10 +1,11 @@ #!/bin/bash -# Get the directory of this script -DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +# Get the directory of this script without relying on external coreutils. +SCRIPT_PATH="${BASH_SOURCE[0]}" +DIR="$(cd -- "${SCRIPT_PATH%/*}" >/dev/null 2>&1 && pwd)" # Change directory to the directory of this script -cd $DIR +cd "$DIR" # Runs database migration -alembic upgrade head \ No newline at end of file +alembic upgrade head diff --git a/model-engine/model_engine_server/infra/repositories/ecr_docker_repository.py b/model-engine/model_engine_server/infra/repositories/ecr_docker_repository.py index f20ee6edc..7ce835f6c 100644 --- a/model-engine/model_engine_server/infra/repositories/ecr_docker_repository.py +++ b/model-engine/model_engine_server/infra/repositories/ecr_docker_repository.py @@ -1,4 +1,6 @@ -from typing import Optional +import os +from pathlib import Path +from typing import Dict, Optional from model_engine_server.common.config import hmi_config from model_engine_server.common.dtos.docker_repository import BuildImageRequest, BuildImageResponse @@ -13,6 +15,28 @@ class ECRDockerRepository(DockerRepository): + @staticmethod + def _normalize_build_args(base_path: str, build_args: Dict[str, str]) -> Dict[str, str]: + normalized = dict(build_args) + base_path_abs = Path(base_path).resolve() + updates: Dict[str, str] = {} + + for key, value in normalized.items(): + if not isinstance(value, str) or not os.path.isabs(value): + continue + + value_abs = Path(value).resolve() + try: + if value_abs == base_path_abs or not value_abs.is_relative_to(base_path_abs): + continue + except ValueError: + continue + + updates[key] = os.path.relpath(str(value_abs), str(base_path_abs)) + + normalized.update(updates) + return normalized + def image_exists( self, image_tag: str, repository_name: str, aws_profile: Optional[str] = None ) -> bool: @@ -43,7 +67,9 @@ def build_image(self, image_params: BuildImageRequest) -> BuildImageResponse: } if image_params.substitution_args: - build_args.update(image_params.substitution_args) + build_args.update( + self._normalize_build_args(image_params.base_path, image_params.substitution_args) + ) build_result = build_remote_block( context=image_params.base_path, @@ -54,7 +80,9 @@ def build_image(self, image_params: BuildImageRequest) -> BuildImageResponse: cache_name=hmi_config.docker_image_layer_cache_repository, ) return BuildImageResponse( - status=build_result.status, logs=build_result.logs, job_name=build_result.job_name + status=build_result.status, + logs=build_result.logs, + job_name=build_result.job_name, ) def get_latest_image_tag(self, repository_name: str) -> str: diff --git a/model-engine/model_engine_server/infra/services/live_endpoint_builder_service.py b/model-engine/model_engine_server/infra/services/live_endpoint_builder_service.py index 275ba89cc..1a8b7e6f5 100644 --- a/model-engine/model_engine_server/infra/services/live_endpoint_builder_service.py +++ b/model-engine/model_engine_server/infra/services/live_endpoint_builder_service.py @@ -79,6 +79,7 @@ GIT_TAG: str = os.getenv("GIT_TAG") # type: ignore ENV: str = os.getenv("DD_ENV") # type: ignore WORKSPACE_PATH = os.getenv("WORKSPACE", ".") +BUILD_CONTEXT_TEMP_ROOT = os.path.join(WORKSPACE_PATH, "model-engine", ".build-context") INITIAL_K8S_CACHE_TTL_SECONDS: int = 180 MAX_IMAGE_TAG_LEN = 128 @@ -557,11 +558,9 @@ def _get_user_image_params( # The context should be whatever WORKDIR is in the container running the build app itself. inference_folder = "model-engine/model_engine_server/inference" - requirements_folder = os.path.join(WORKSPACE_PATH, f"requirements_{requirements_hash}") - try: - os.mkdir(requirements_folder) - except FileExistsError: - pass + requirements_folder = self._create_build_context_dir( + prefix=f"requirements_{requirements_hash}_" + ) requirements_file = os.path.join(requirements_folder, "requirements.txt") with open(requirements_file, "w") as f: @@ -610,11 +609,7 @@ def _get_inject_bundle_image_params( # The context should be whatever WORKDIR is in the container running the build app itself. dockerfile = "inject_bundle.Dockerfile" inference_folder = "model-engine/model_engine_server/inference" - bundle_folder = os.path.join(WORKSPACE_PATH, f"bundle_{service_image_hash}") - try: - os.mkdir(bundle_folder) - except FileExistsError: - pass + bundle_folder = self._create_build_context_dir(prefix=f"bundle_{service_image_hash}_") _, model_bundle_path = tempfile.mkstemp(dir=bundle_folder, suffix=".zip") bundle_url = model_bundle.location logger_adapter.info( @@ -819,6 +814,11 @@ def _get_requirements_hash(requirements: List[str]) -> str: usedforsecurity=False, ).hexdigest()[:6] + @staticmethod + def _create_build_context_dir(prefix: str) -> str: + os.makedirs(BUILD_CONTEXT_TEMP_ROOT, exist_ok=True) + return tempfile.mkdtemp(prefix=prefix, dir=BUILD_CONTEXT_TEMP_ROOT) + @staticmethod def _get_image_tag(base_image_tag: str, git_tag: str, requirements_hash: str) -> str: """An identifier from an endpoint's base Docker image & git tag, plus the identify of its diff --git a/model-engine/requirements.in b/model-engine/requirements.in index 3dba1c96a..3dec6909c 100644 --- a/model-engine/requirements.in +++ b/model-engine/requirements.in @@ -12,8 +12,8 @@ azure-storage-blob~=12.19.0 # GCP dependencies gcloud-aio-storage~=9.6 google-auth~=2.25.0 -google-cloud-artifact-registry~=1.13.0 -google-cloud-secret-manager>=2.20 +google-cloud-artifact-registry~=1.21.0 +google-cloud-secret-manager>=2.24.0 google-cloud-storage~=2.14.0 aioboto3==15.5.0 # 10.4.0 forced urllib3<1.27 (CVE-2023-43804/45803/2024-37891); 15.x uses aiobotocore 2.25.x (urllib3<3, 2.x compatible) boto3-stubs[essential]>=1.40.46,<1.40.62 @@ -28,8 +28,8 @@ cryptography>=44.0.0 # not used directly, but needs to be pinned for Microsoft s dataclasses-json>=0.5.7 datadog-api-client==2.11.0 datadog~=0.47.0 -ddtrace>=2.0,<3.0 # 1.8.3 is incompatible with Python 3.12; 2.x has full Py3.12 support -numpy>=1.26.4,<2.3 # Python 3.12/3.13 wheels start at 1.26.0; 2.3+ requires Python 3.11+ +ddtrace>=4.7.1,<5.0 # 4.7.1 publishes CPython 3.14 wheels; needed for public Chainguard latest +numpy>=2.4.4,<2.5 # 2.4.4 publishes CPython 3.14 wheels deprecation~=2.1 docker~=5.0 fastapi>=0.115.8 # bumped to allow starlette>=0.49.1 (CVE-2025-62727 fix) @@ -42,10 +42,11 @@ kubernetes-asyncio==25.11.0 kubernetes~=25.3.0 orjson>=3.10.15 protobuf>=4.25.0 -psycopg2-binary==2.9.10 +psycopg2-binary==2.9.11 py-xid==0.3.0 pycurl~=7.44 # For celery[sqs] -pydantic==2.8.2 +pytz>=2024.1 +pydantic==2.12.5 python-multipart>=0.0.18 quart~=0.19.9 werkzeug>=3.0.6 # CVE-2024-34069, CVE-2024-49766, CVE-2024-49767 diff --git a/model-engine/requirements.txt b/model-engine/requirements.txt index b3f8fb43d..370fe778b 100644 --- a/model-engine/requirements.txt +++ b/model-engine/requirements.txt @@ -146,7 +146,7 @@ datadog==0.47.0 # via -r requirements.in datadog-api-client==2.11.0 # via -r requirements.in -ddtrace==2.21.12 +ddtrace==4.7.1 # via -r requirements.in deprecation==2.1.0 # via -r requirements.in @@ -154,7 +154,7 @@ docker==5.0.3 # via -r requirements.in docutils==0.20.1 # via readme-renderer -envier==0.5.2 +envier==0.6.1 # via ddtrace fastapi==0.135.1 # via -r requirements.in @@ -196,13 +196,13 @@ google-auth==2.25.2 # google-cloud-secret-manager # google-cloud-storage # kubernetes -google-cloud-artifact-registry==1.13.1 +google-cloud-artifact-registry==1.21.0 # via -r requirements.in google-cloud-core==2.5.0 # via # google-cloud-secret-manager # google-cloud-storage -google-cloud-secret-manager==2.21.0 +google-cloud-secret-manager==2.24.0 # via -r requirements.in google-cloud-storage==2.14.0 # via -r requirements.in @@ -223,16 +223,15 @@ greenlet==3.3.2 # -r requirements.in # sqlalchemy grpc-google-iam-v1==0.14.3 - # via - # google-cloud-artifact-registry - # google-cloud-secret-manager -grpcio==1.74.0 + # via google-cloud-artifact-registry +grpcio==1.75.1 # via # google-api-core # googleapis-common-protos # grpc-google-iam-v1 # grpcio-status -grpcio-status==1.71.2 + # google-cloud-secret-manager +grpcio-status==1.75.1 # via # google-api-core # google-cloud-secret-manager @@ -357,7 +356,7 @@ mypy-boto3-sqs==1.40.61 # via boto3-stubs mypy-extensions==1.0.0 # via typing-inspect -numpy==2.2.6 +numpy==2.4.4 # via # -r requirements.in # transformers @@ -398,7 +397,7 @@ proto-plus==1.27.1 # google-api-core # google-cloud-artifact-registry # google-cloud-secret-manager -protobuf==5.29.6 +protobuf==6.33.5 # via # -r requirements.in # ddtrace @@ -409,7 +408,7 @@ protobuf==5.29.6 # grpc-google-iam-v1 # grpcio-status # proto-plus -psycopg2-binary==2.9.10 +psycopg2-binary==2.9.11 # via -r requirements.in py-xid==0.3.0 # via -r requirements.in @@ -426,11 +425,13 @@ pycparser==2.21 # via cffi pycurl==7.45.2 # via -r requirements.in -pydantic==2.8.2 +pytz==2025.2 + # via -r requirements.in +pydantic==2.12.5 # via # -r requirements.in # fastapi -pydantic-core==2.20.1 +pydantic-core==2.41.5 # via pydantic pygments==2.15.1 # via diff --git a/model-engine/tests/unit/core/docker/test_remote_build.py b/model-engine/tests/unit/core/docker/test_remote_build.py new file mode 100644 index 000000000..7b25c1715 --- /dev/null +++ b/model-engine/tests/unit/core/docker/test_remote_build.py @@ -0,0 +1,274 @@ +from io import BytesIO +from pathlib import Path +from types import SimpleNamespace +from unittest import mock + +import pytest +from botocore.exceptions import ClientError +from model_engine_server.core.docker import remote_build + + +def test_read_ignore_patterns_handles_missing_file(tmp_path, capsys): + patterns = remote_build._read_ignore_patterns(tmp_path, ".dockerignore") + + assert patterns == [] + assert "does not exist" in capsys.readouterr().out + + +def test_read_ignore_patterns_skips_comments_and_blank_lines(tmp_path): + ignore_file = tmp_path / ".dockerignore" + ignore_file.write_text("\n# comment\n./foo\nbar/\n") + + patterns = remote_build._read_ignore_patterns(tmp_path, ".dockerignore") + + assert patterns == ["foo", "bar/"] + + +def test_normalize_path_for_archive_relative_path(tmp_path): + folder = tmp_path / "subdir" + folder.mkdir() + + resolved_path, archive_root = remote_build._normalize_path_for_archive(tmp_path, "subdir") + + assert resolved_path == folder.resolve() + assert archive_root == "subdir" + + +def test_normalize_path_for_archive_rejects_path_outside_context(tmp_path): + outside = tmp_path.parent / "outside" + outside.mkdir(exist_ok=True) + + with pytest.raises(ValueError, match="is not contained within context"): + remote_build._normalize_path_for_archive(tmp_path, str(outside)) + + +@pytest.mark.parametrize( + ("member_name", "patterns", "nested_archive_roots", "should_keep"), + [ + ("pkg/file.py", ["pkg"], [], False), + ("pkg/file.py", ["*.py"], [], True), + ("file.py", ["*.py"], [], False), + ("pkg/file.py", ["other"], [], True), + ( + "model-engine/.build-context/reqs/file.txt", + [], + ["model-engine/.build-context/reqs"], + False, + ), + ], +) +def test_filter_archive_member(member_name, patterns, nested_archive_roots, should_keep): + tar_info = mock.Mock() + tar_info.name = member_name + + result = remote_build._filter_archive_member(tar_info, patterns, nested_archive_roots) + + assert (result is tar_info) is should_keep + + +def test_zip_context_uploads_filtered_archive(tmp_path): + context = tmp_path / "context" + include_dir = context / "pkg" + include_dir.mkdir(parents=True) + (context / "root.log").write_text("root") + (include_dir / "keep.txt").write_text("keep") + (include_dir / "drop.log").write_text("drop") + (context / ".dockerignore").write_text("*.log\npkg/*.log\n") + + uploaded = BytesIO() + + class UploadSink: + def __enter__(self): + return uploaded + + def __exit__(self, exc_type, exc, tb): + uploaded.seek(0) + return False + + with mock.patch.object(remote_build.storage_client, "open", return_value=UploadSink()): + remote_build.zip_context( + s3_file_name="bundle.tar.gz", + context=str(context), + folders_to_include=["pkg"], + ignore_file=".dockerignore", + ) + + archive_path = tmp_path / "uploaded.tar.gz" + archive_path.write_bytes(uploaded.getvalue()) + import tarfile + + with tarfile.open(archive_path, mode="r:gz") as tar: + names = tar.getnames() + + assert "pkg/keep.txt" in names + assert "root.log" not in names + assert "pkg/drop.log" not in names + + +def test_zip_context_reraises_storage_errors(tmp_path): + context = tmp_path / "context" + folder = context / "pkg" + folder.mkdir(parents=True) + (folder / "keep.txt").write_text("keep") + error_response = {"Error": {"Code": "AccessDenied", "Message": "denied"}} + + with mock.patch.object( + remote_build.storage_client, + "open", + side_effect=ClientError(error_response, "PutObject"), + ): + with pytest.raises(ClientError): + remote_build.zip_context( + s3_file_name="bundle.tar.gz", + context=str(context), + folders_to_include=["pkg"], + ) + + +def test_zip_context_excludes_nested_explicit_roots_from_parent_archive(tmp_path): + context = tmp_path / "context" + nested_dir = context / "model-engine" / ".build-context" / "reqs" + nested_dir.mkdir(parents=True) + (nested_dir / "requirements.txt").write_text("pkg==1.0") + (context / "model-engine" / "app.py").write_text("print('ok')") + + uploaded = BytesIO() + + class UploadSink: + def __enter__(self): + return uploaded + + def __exit__(self, exc_type, exc, tb): + uploaded.seek(0) + return False + + with mock.patch.object(remote_build.storage_client, "open", return_value=UploadSink()): + remote_build.zip_context( + s3_file_name="bundle.tar.gz", + context=str(context), + folders_to_include=["model-engine", "model-engine/.build-context/reqs"], + ) + + archive_path = tmp_path / "uploaded_nested.tar.gz" + archive_path.write_bytes(uploaded.getvalue()) + import tarfile + + with tarfile.open(archive_path, mode="r:gz") as tar: + names = tar.getnames() + + assert "model-engine/app.py" in names + assert "model-engine/.build-context/reqs/requirements.txt" in names + assert names.count("model-engine/.build-context/reqs/requirements.txt") == 1 + + +def test_start_build_job_uses_boto_credentials_for_circleci(tmp_path): + template_file = tmp_path / "kaniko_template.yaml" + template_file.write_text( + """ +apiVersion: batch/v1 +kind: Job +metadata: + name: $NAME +spec: + template: + spec: + containers: + - name: kaniko + args: [] + env: + - name: AWS_ACCESS_KEY_ID + value: "$AWS_ACCESS_KEY_ID" + - name: AWS_SECRET_ACCESS_KEY + value: "$AWS_SECRET_ACCESS_KEY" + - name: AWS_SESSION_TOKEN + value: "$AWS_SESSION_TOKEN" +""" + ) + captured = {} + + def fake_check_output(args, cwd=None, shell=False): + if shell: + return b"" + if args[:3] == ["kubectl", "patch", "secret"]: + captured["patch_args"] = args + return b"patched" + if args[:3] == ["kubectl", "apply", "-f"]: + captured["apply_args"] = args + captured["apply_yaml"] = Path(args[3]).read_text() + return b"applied" + raise AssertionError(f"unexpected subprocess call: {args}") + + frozen_credentials = SimpleNamespace( + access_key="access", + secret_key="secret", + token="token", + ) + credentials = SimpleNamespace(get_frozen_credentials=lambda: frozen_credentials) + + with ( + mock.patch.object(remote_build, "TEMPLATE_FILE", str(template_file)), + mock.patch.object( + remote_build, + "infra_config", + return_value=SimpleNamespace( + docker_repo_prefix="repo-prefix", + profile_ml_worker="default", + ), + ), + mock.patch.dict(remote_build.os.environ, {"CIRCLECI": "true"}, clear=False), + mock.patch.object( + remote_build.boto3, + "Session", + return_value=mock.Mock(get_credentials=mock.Mock(return_value=credentials)), + ), + mock.patch.object(remote_build.subprocess, "check_output", side_effect=fake_check_output), + ): + job_name = remote_build.start_build_job( + s3_file_name="tmp/context.tar.gz", + path_to_dockerfile="./Dockerfile", + repotags=["repo/image:tag"], + use_cache=True, + cache_name="cache-repo", + build_args={"ARG1": "VALUE1"}, + custom_tags={"team": "ml"}, + ) + + assert job_name.startswith("kaniko-") + assert captured["patch_args"][:4] == ["kubectl", "patch", "secret", "codeartifact-pip-conf"] + assert "--destination=repo-prefix/repo/image:tag" in captured["apply_yaml"] + assert "--build-arg=ARG1=VALUE1" in captured["apply_yaml"] + assert "name: AWS_ACCESS_KEY_ID" in captured["apply_yaml"] + assert "value: access" in captured["apply_yaml"] + assert "name: AWS_SECRET_ACCESS_KEY" in captured["apply_yaml"] + assert "value: secret" in captured["apply_yaml"] + assert "name: AWS_SESSION_TOKEN" in captured["apply_yaml"] + assert "value: token" in captured["apply_yaml"] + + +def test_build_remote_with_explicit_folders_calls_zip_and_start(tmp_path): + dockerfile = tmp_path / "Dockerfile" + dockerfile.write_text("FROM scratch\n") + + with ( + mock.patch.object(remote_build, "zip_context") as mock_zip_context, + mock.patch.object( + remote_build, "start_build_job", return_value="kaniko-job" + ) as mock_start_build_job, + ): + result = remote_build.build_remote( + context=str(tmp_path), + dockerfile=str(dockerfile), + repotags="repo/image:tag", + folders_to_include=["model-engine"], + build_args={"ARG1": "VALUE1"}, + ) + + assert result == "kaniko-job" + mock_zip_context.assert_called_once() + zip_kwargs = mock_zip_context.call_args.kwargs + assert zip_kwargs["context"] == str(tmp_path) + assert zip_kwargs["folders_to_include"] == ["model-engine"] + mock_start_build_job.assert_called_once() + start_args = mock_start_build_job.call_args.args + assert start_args[1] == "./Dockerfile" + assert start_args[2] == ["repo/image:tag"] diff --git a/model-engine/tests/unit/infra/repositories/test_ecr_docker_repository.py b/model-engine/tests/unit/infra/repositories/test_ecr_docker_repository.py new file mode 100644 index 000000000..ce35836b3 --- /dev/null +++ b/model-engine/tests/unit/infra/repositories/test_ecr_docker_repository.py @@ -0,0 +1,115 @@ +from unittest import mock + +from model_engine_server.common.dtos.docker_repository import BuildImageRequest +from model_engine_server.infra.repositories.ecr_docker_repository import ECRDockerRepository + + +def test_normalize_build_args_rewrites_only_paths_inside_base(tmp_path): + base = tmp_path / "repo" + base.mkdir() + inside = base / "nested" / "requirements.txt" + inside.parent.mkdir() + inside.write_text("x") + outside = tmp_path / "outside.txt" + outside.write_text("y") + + normalized = ECRDockerRepository._normalize_build_args( + str(base), + { + "INSIDE": str(inside), + "OUTSIDE": str(outside), + "RELATIVE": "already/relative.txt", + "NON_STRING": 1, + }, + ) + + assert normalized["INSIDE"] == "nested/requirements.txt" + assert normalized["OUTSIDE"] == str(outside) + assert normalized["RELATIVE"] == "already/relative.txt" + assert normalized["NON_STRING"] == 1 + + +def test_normalize_build_args_does_not_rewrite_base_path_itself(tmp_path): + base = tmp_path / "repo" + base.mkdir() + + normalized = ECRDockerRepository._normalize_build_args( + str(base), + { + "CONTEXT_ROOT": str(base), + }, + ) + + assert normalized["CONTEXT_ROOT"] == str(base) + + +def test_build_image_includes_requirements_and_dockerfile_root(tmp_path): + repo = ECRDockerRepository() + base = tmp_path / "repo" + base.mkdir() + requirements = base / "model-engine" / ".build-context" / "reqs" + requirements.mkdir(parents=True) + abs_build_arg = base / "model-engine" / ".build-context" / "reqs" / "requirements.txt" + abs_build_arg.write_text("x") + + image_request = BuildImageRequest( + repo="hosted-model-inference/test", + image_tag="tag", + aws_profile="default", + base_path=str(base), + dockerfile="model-engine/model_engine_server/inference/pytorch_or_tf.user.Dockerfile", + base_image="python:3.8-slim", + requirements_folder="model-engine/.build-context/reqs", + substitution_args={"REQUIREMENTS_FILE": str(abs_build_arg)}, + ) + + build_result = mock.Mock(status=True, logs="ok", job_name="job-1") + + with mock.patch( + "model_engine_server.infra.repositories.ecr_docker_repository.build_remote_block", + return_value=build_result, + ) as mock_build_remote_block: + response = repo.build_image(image_request) + + assert response.status is True + assert response.logs == "ok" + assert response.job_name == "job-1" + + mock_build_remote_block.assert_called_once() + _, kwargs = mock_build_remote_block.call_args + assert kwargs["folders_to_include"] == [ + "model-engine", + "model-engine/.build-context/reqs", + ] + assert kwargs["build_args"] == { + "BASE_IMAGE": "python:3.8-slim", + "REQUIREMENTS_FILE": "model-engine/.build-context/reqs/requirements.txt", + } + + +def test_build_image_without_substitution_args_keeps_base_image_only(tmp_path): + repo = ECRDockerRepository() + base = tmp_path / "repo" + base.mkdir() + + image_request = BuildImageRequest( + repo="hosted-model-inference/test", + image_tag="tag", + aws_profile="default", + base_path=str(base), + dockerfile="model-engine/Dockerfile", + base_image="python:3.13-slim", + ) + + build_result = mock.Mock(status=True, logs="ok", job_name="job-2") + + with mock.patch( + "model_engine_server.infra.repositories.ecr_docker_repository.build_remote_block", + return_value=build_result, + ) as mock_build_remote_block: + response = repo.build_image(image_request) + + assert response.status is True + _, kwargs = mock_build_remote_block.call_args + assert kwargs["folders_to_include"] == ["model-engine"] + assert kwargs["build_args"] == {"BASE_IMAGE": "python:3.13-slim"} diff --git a/model-engine/tests/unit/infra/services/test_live_endpoint_builder_service.py b/model-engine/tests/unit/infra/services/test_live_endpoint_builder_service.py index a0e876eb7..5b22acceb 100644 --- a/model-engine/tests/unit/infra/services/test_live_endpoint_builder_service.py +++ b/model-engine/tests/unit/infra/services/test_live_endpoint_builder_service.py @@ -103,9 +103,13 @@ def set_env_vars(): live_endpoint_builder_service.GIT_TAG = "test_tag" live_endpoint_builder_service.ENV = "test_env" live_endpoint_builder_service.WORKSPACE_PATH = ".." + live_endpoint_builder_service.BUILD_CONTEXT_TEMP_ROOT = "../model-engine/.build-context" live_endpoint_builder_service.open = mock_open() - live_endpoint_builder_service.os.mkdir = Mock() + live_endpoint_builder_service.os.makedirs = Mock() live_endpoint_builder_service.open_wrapper = mock_open() + live_endpoint_builder_service.tempfile.mkdtemp = Mock( + return_value="../model-engine/.build-context/tmpdir" + ) live_endpoint_builder_service.tempfile.mkstemp = Mock(return_value=["", ""])