Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
1c42de3
feat: harden model-engine runtime on chainguard
scale-ballen Apr 16, 2026
e2c5fb3
fix: restore runtime kubectl assets
scale-ballen Apr 16, 2026
54f41f0
fix: tighten runtime binary handling
scale-ballen Apr 16, 2026
a79a067
fix: eliminate remaining runtime binary highs
scale-ballen Apr 16, 2026
b479214
fix: honor target architecture for runtime binaries
scale-ballen Apr 17, 2026
5f5840f
fix: restore CI test compatibility
scale-ballen Apr 17, 2026
8ea29f9
fix: remove coreutils dependency from migration script
scale-ballen Apr 17, 2026
8eabec0
fix: use shell-based readiness probes
scale-ballen Apr 17, 2026
84a1045
fix: remove endpoint builder shell dependencies
scale-ballen Apr 17, 2026
f871d5e
style: format remote build helper
scale-ballen Apr 17, 2026
fea67a9
fix: normalize endpoint build context paths
scale-ballen Apr 17, 2026
b1d2c94
fix: use writable build context temp dirs
scale-ballen Apr 17, 2026
13f827f
fix: unblock simple bundle endpoint builds
scale-ballen Apr 17, 2026
56e57a0
test: cover remote build diff paths
scale-ballen Apr 18, 2026
6637e6e
test: fix remote build credential assertion
scale-ballen Apr 18, 2026
1da90cb
fix: address review feedback on build context handling
scale-ballen Apr 18, 2026
f4261c8
fix: keep temp build contexts out of archives
scale-ballen Apr 18, 2026
42699f1
fix: avoid archiving temp build contexts
scale-ballen Apr 18, 2026
9b4686e
fix: address runtime library and ignore matching reviews
scale-ballen Apr 18, 2026
86f894a
fix: restore root-only ignore glob behavior
scale-ballen Apr 20, 2026
b2cb597
test: align archive ignore coverage with matcher semantics
scale-ballen Apr 20, 2026
823f556
fix: skip rewriting build context root args
scale-ballen Apr 20, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions charts/model-engine/templates/cacher_deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,9 @@ spec:
readinessProbe:
exec:
command:
- cat
- /tmp/readyz
- bash
- -c
- test -f /tmp/readyz
command:
- dumb-init
- --
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,9 @@ spec:
readinessProbe:
exec:
command:
- cat
- /tmp/readyz
- bash
- -c
- test -f /tmp/readyz
command:
- dumb-init
- --
Expand Down
107 changes: 49 additions & 58 deletions model-engine/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,79 +1,70 @@
# syntax = docker/dockerfile:experimental
# syntax = docker/dockerfile:1

# --- Builder: compile C extensions (pycurl, etc.) and install Python packages ---
FROM python:3.13-slim AS builder
FROM cgr.dev/chainguard/python:latest-dev AS builder

USER root
WORKDIR /workspace
ARG TARGETARCH

RUN apt-get update && apt-get install -y --no-install-recommends \
RUN apk add --no-cache \
bash \
build-base \
curl \
curl-dev \
dumb-init \
git \
gcc \
build-essential \
libssl-dev \
libcurl4-openssl-dev \
&& rm -rf /var/lib/apt/lists/*
go \
openssl-dev \
rsync

RUN pip install pip==24.2 setuptools
RUN pip install awscli==1.34.28 --no-cache-dir
RUN python -m venv /workspace/venv
ENV PATH="/workspace/venv/bin:/usr/sbin:/usr/bin:/sbin:/bin"

WORKDIR /workspace/model-engine/
COPY model-engine/requirements-test.txt requirements-test.txt
WORKDIR /workspace/model-engine
COPY model-engine/requirements.txt requirements.txt
COPY model-engine/requirements_override.txt requirements_override.txt
RUN pip install -r requirements-test.txt --no-cache-dir
RUN pip install -r requirements.txt --no-cache-dir
# NOTE: aioboto3==10.4.0 -> aiobotocore==2.4.2 -> urllib3<1.27, which downgrades urllib3
# from 2.x back to 1.26.x. CVE-2023-43804, CVE-2023-45803, CVE-2024-37891 remain.
# Fix: upgrade aioboto3 to >=15.x (separate PR — breaking API changes).
RUN pip install --upgrade pip==24.2 setuptools cmake setuptools-rust
RUN pip install -r requirements.txt --no-cache-dir --no-build-isolation
RUN pip install -r requirements_override.txt --no-cache-dir
COPY model-engine/setup.py setup.py
COPY model-engine/model_engine_server model_engine_server
COPY model-engine/service_configs service_configs
RUN pip install -e .

# --- Runtime: no build tools (eliminates linux-libc-dev and python3.13 CVEs) ---
FROM python:3.13-slim AS model-engine

WORKDIR /workspace

# Runtime-only system deps (vim omitted: multiple unpatched HIGH CVEs in Debian 13.4)
RUN apt-get update && apt-get install -y --no-install-recommends \
dumb-init \
git \
openssh-client \
curl \
procps \
htop \
libcurl4 \
&& rm -rf /var/lib/apt/lists/*

# Install aws-iam-authenticator (architecture-aware)
RUN ARCH=$(uname -m) && \
if [ "$ARCH" = "aarch64" ]; then \
curl -fLo /bin/aws-iam-authenticator https://github.com/kubernetes-sigs/aws-iam-authenticator/releases/download/v0.7.11/aws-iam-authenticator_0.7.11_linux_arm64; \
else \
curl -fLo /bin/aws-iam-authenticator https://github.com/kubernetes-sigs/aws-iam-authenticator/releases/download/v0.7.11/aws-iam-authenticator_0.7.11_linux_amd64; \
fi && \
chmod +x /bin/aws-iam-authenticator
RUN mkdir -p /tmp/runtime-bin /tmp/runtime-libs && \
cp /bin/bash /tmp/runtime-bin/bash && \
cp /usr/bin/dumb-init /tmp/runtime-bin/dumb-init && \
cp /usr/bin/git /tmp/runtime-bin/git && \
cp -R /usr/libexec/git-core /tmp/runtime-bin/git-core && \
cp /usr/lib/libpcre2-8.so.0* /tmp/runtime-libs/ && \
cp /usr/lib/libcurl.so.4* /tmp/runtime-libs/ && \
cp /usr/lib/libreadline.so.8* /tmp/runtime-libs/ && \
cp /usr/lib/libtinfo.so.6* /tmp/runtime-libs/ && \
cp /usr/lib/libz.so.1* /tmp/runtime-libs/ && \
Comment thread
greptile-apps[bot] marked this conversation as resolved.
Comment thread
greptile-apps[bot] marked this conversation as resolved.
git clone --depth 1 --branch v1.35.3 https://github.com/kubernetes/kubernetes.git /tmp/k8s && \
cd /tmp/k8s && \
GOTOOLCHAIN=local KUBE_BUILD_PLATFORMS=linux/${TARGETARCH} make WHAT=cmd/kubectl && \
cp _output/local/bin/linux/${TARGETARCH}/kubectl /tmp/runtime-bin/kubectl && \
GOBIN=/tmp/runtime-bin GOOS=linux GOARCH=${TARGETARCH} go install sigs.k8s.io/aws-iam-authenticator/cmd/aws-iam-authenticator@v0.7.11

# Install kubectl (architecture-aware)
RUN ARCH=$(uname -m | sed 's/x86_64/amd64/' | sed 's/aarch64/arm64/') && \
curl -fLO "https://dl.k8s.io/release/v1.35.3/bin/linux/${ARCH}/kubectl" && \
chmod +x kubectl && \
mv kubectl /usr/local/bin/kubectl
FROM cgr.dev/chainguard/python:latest AS model-engine

# Copy Python packages, entry-point scripts, and source tree from builder
COPY --from=builder /usr/local/lib/python3.13/site-packages /usr/local/lib/python3.13/site-packages
COPY --from=builder /usr/local/bin /usr/local/bin
COPY --from=builder /workspace/model-engine /workspace/model-engine

RUN useradd --create-home --shell /bin/bash nonroot && \
chown -R nonroot:nonroot /workspace
USER root
WORKDIR /workspace

COPY integration_tests /workspace/integration_tests
COPY --from=builder --chown=nonroot:nonroot /workspace/venv /workspace/venv
COPY --from=builder --chown=nonroot:nonroot /workspace/model-engine /workspace/model-engine
COPY --from=builder /tmp/runtime-bin/bash /bin/bash
COPY --from=builder /tmp/runtime-bin/dumb-init /usr/bin/dumb-init
COPY --from=builder /tmp/runtime-bin/git /usr/bin/git
COPY --from=builder /tmp/runtime-bin/git-core /usr/libexec/git-core
COPY --from=builder /tmp/runtime-bin/kubectl /usr/local/bin/kubectl
COPY --from=builder /tmp/runtime-bin/aws-iam-authenticator /usr/local/bin/aws-iam-authenticator
COPY --from=builder /tmp/runtime-libs/ /usr/lib/

WORKDIR /workspace
ENV PYTHONPATH /workspace
ENV WORKSPACE /workspace
ENV PATH="/workspace/venv/bin:/usr/local/bin:/usr/libexec/git-core:/usr/bin:/bin"
ENV PYTHONPATH=/workspace
ENV WORKSPACE=/workspace

USER nonroot
Comment thread
greptile-apps[bot] marked this conversation as resolved.
EXPOSE 5000
4 changes: 2 additions & 2 deletions model-engine/model_engine_server/common/dtos/llms/vllm.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Any, Dict, List, Optional, Union
from typing import Any, Dict, List, Optional, Union, cast

from model_engine_server.common.pydantic_types import BaseModel, Field
from model_engine_server.common.types.gen.openai import (
Expand Down Expand Up @@ -275,7 +275,7 @@ class VLLMSamplingParams(BaseModel):
(canonical beam search algorithm).""",
)
stop_token_ids: Optional[List[int]] = Field(
default_factory=list,
default_factory=lambda: cast(List[int], []),
description="""List of tokens that stop the generation when they are
generated. The returned output will contain the stop tokens unless
the stop tokens are special tokens.""",
Expand Down
162 changes: 115 additions & 47 deletions model-engine/model_engine_server/core/docker/remote_build.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,18 @@
import os
import shutil
import subprocess
import tarfile
import tempfile
import uuid
from base64 import b64encode
from contextlib import ExitStack
from dataclasses import dataclass
from fnmatch import fnmatchcase
from pathlib import Path
from string import Template
from subprocess import PIPE
from typing import Dict, Iterable, List, Optional, Union

import boto3
import click
import tenacity
import yaml
Expand Down Expand Up @@ -74,49 +76,102 @@ def zip_context(
s3_uri = f"s3://{S3_BUCKET}/{s3_file_name}"
print(f"Uploading to s3 at: {s3_uri}")
try:
# Need to gimme_okta_aws_creds (you can export AWS_PROFILE='ml-admin' right after)
tar_command = _build_tar_cmd(context, ignore_file, folders_to_include)
print(f"Creating archive: {' '.join(tar_command)}")

with subprocess.Popen(
tar_command,
stdout=subprocess.PIPE,
stderr=subprocess.DEVNULL,
) as proc:
assert proc.stdout is not None
with storage_client.open(
s3_uri,
"wb",
) as out_file:
shutil.copyfileobj(proc.stdout, out_file)
context_path = Path(context).resolve()
ignore_patterns = _read_ignore_patterns(context_path, ignore_file)
archive_roots = [
_normalize_path_for_archive(context_path, folder)[1] for folder in folders_to_include
]
with tempfile.NamedTemporaryFile(suffix=".tar.gz") as archive:
print(f"Creating archive: {archive.name}")
with tarfile.open(archive.name, mode="w:gz") as tar:
for folder, archive_root in zip(folders_to_include, archive_roots):
resolved_path, _ = _normalize_path_for_archive(context_path, folder)
nested_archive_roots = [
root
for root in archive_roots
if root != archive_root and root.startswith(f"{archive_root}/")
]
tar.add(
resolved_path,
arcname=archive_root,
filter=lambda tar_info, nested_archive_roots=nested_archive_roots: _filter_archive_member(
tar_info, ignore_patterns, nested_archive_roots
),
)

with (
open(archive.name, "rb") as archive_in,
storage_client.open(
s3_uri,
"wb",
) as out_file,
):
shutil.copyfileobj(archive_in, out_file)
print("Done uploading!")
except (ClientError, ProfileNotFound):
print("Did you gimme_okta_aws_creds and then export AWS_PROFILE='ml-admin'? Try doing both")
raise


def _build_tar_cmd(
context: str, ignore_file: Optional[str], folders_to_include: List[str]
) -> List[str]:
assert len(folders_to_include) > 0, "Need at least one folder to create a tar archive from!"
def _read_ignore_patterns(context_path: Path, ignore_file: Optional[str]) -> List[str]:
if ignore_file is None:
return []

tar_command = ["tar", "-C", context]

if ignore_file is not None:
ignore_file = os.path.join(context, ignore_file)
if not os.path.isfile(ignore_file):
print(
f"WARNING: File {ignore_file} does not exist in calling context, not using any file as a .dockerignore"
)
ignore_path = context_path / ignore_file
if not ignore_path.is_file():
print(
f"WARNING: File {ignore_path} does not exist in calling context, not using any file as a .dockerignore"
)
return []

patterns: List[str] = []
for raw_line in ignore_path.read_text().splitlines():
line = raw_line.strip()
if not line or line.startswith("#"):
continue
patterns.append(line.removeprefix("./"))
return patterns


def _normalize_path_for_archive(context_path: Path, folder_to_include: str) -> tuple[Path, str]:
include_path = Path(folder_to_include)
resolved_path = (
include_path.resolve()
if include_path.is_absolute()
else (context_path / include_path).resolve()
)
try:
archive_root = str(resolved_path.relative_to(context_path))
except ValueError as exc:
raise ValueError(
f"{folder_to_include=} is not contained within context {context_path}"
) from exc
return resolved_path, archive_root


def _filter_archive_member(
tar_info: tarfile.TarInfo,
ignore_patterns: List[str],
nested_archive_roots: Optional[List[str]] = None,
) -> Optional[tarfile.TarInfo]:
normalized_name = tar_info.name.removeprefix("./")
nested_archive_roots = nested_archive_roots or []

for nested_root in nested_archive_roots:
if normalized_name == nested_root or normalized_name.startswith(f"{nested_root}/"):
return None

for pattern in ignore_patterns:
normalized_pattern = pattern.rstrip("/")
if "/" in normalized_pattern:
pattern_matches = fnmatchcase(normalized_name, normalized_pattern)
else:
tar_command.append("--exclude-from")
tar_command.append(ignore_file)

tar_command.append("-cf")
tar_command.append("-")
tar_command.extend(folders_to_include)

return tar_command
pattern_matches = "/" not in normalized_name and fnmatchcase(
normalized_name, normalized_pattern
)
if pattern_matches or normalized_name.startswith(f"{normalized_pattern}/"):
return None
return tar_info


def start_build_job(
Expand Down Expand Up @@ -154,18 +209,18 @@ def start_build_job(
f = stack.enter_context(tempfile.NamedTemporaryFile("wt", suffix=".yaml"))
template_f = stack.enter_context(open(TEMPLATE_FILE, "rt"))

# In Circle CI we need to retrieve the AWS access key to attach to kaniko
# Keep these values available for any template using explicit env creds, but do not
# shell out to the AWS CLI from the endpoint-builder image.
aws_access_key_id = ""
aws_secret_access_key = ""
aws_session_token = ""
if os.getenv("CIRCLECI"):
aws_access_key_id_result = subprocess.run(
["aws", "configure", "get", "aws_access_key_id"], check=False, stdout=PIPE
)
aws_access_key_id = aws_access_key_id_result.stdout.decode().strip()
aws_secret_access_key_result = subprocess.run(
["aws", "configure", "get", "aws_secret_access_key"], check=False, stdout=PIPE
)
aws_secret_access_key = aws_secret_access_key_result.stdout.decode().strip()
credentials = boto3.Session().get_credentials()
if credentials is not None:
frozen_credentials = credentials.get_frozen_credentials()
aws_access_key_id = frozen_credentials.access_key or ""
aws_secret_access_key = frozen_credentials.secret_key or ""
aws_session_token = frozen_credentials.token or ""
job = Template(template_f.read()).substitute(
NAME=job_name,
CUSTOM_TAGS=json.dumps(custom_tags_serialized),
Expand All @@ -176,6 +231,7 @@ def start_build_job(
CACHE_REPO=f"{infra_config().docker_repo_prefix}/{cache_name}",
AWS_ACCESS_KEY_ID=aws_access_key_id,
AWS_SECRET_ACCESS_KEY=aws_secret_access_key,
AWS_SESSION_TOKEN=aws_session_token,
NAMESPACE=NAMESPACE,
)
yml = yaml.safe_load(job)
Expand Down Expand Up @@ -214,7 +270,13 @@ def start_build_job(
pip_conf_base64 = b64encode(pip_conf_data.encode("utf-8")).decode("utf-8")
data = {"data": {"codeartifact_pip_conf": pip_conf_base64}}
subprocess.check_output(
["kubectl", "patch", "secret", "codeartifact-pip-conf", f"-p={json.dumps(data)}"]
[
"kubectl",
"patch",
"secret",
"codeartifact-pip-conf",
f"-p={json.dumps(data)}",
]
).decode("utf-8")

print(f"Executing Kaniko build command:\n{container_spec}")
Expand Down Expand Up @@ -293,7 +355,13 @@ def build_remote(
ignore_file=ignore_file,
)
return start_build_job(
s3_file_name, dockerfile, repotags, use_cache, cache_name, build_args, custom_tags
s3_file_name,
dockerfile,
repotags,
use_cache,
cache_name,
build_args,
custom_tags,
)


Expand Down
Loading