From 615f48759f90bbf9f9d85da8299938a0c8facdaf Mon Sep 17 00:00:00 2001 From: mrgemy95 Date: Thu, 14 May 2026 13:24:43 -0400 Subject: [PATCH] Switch container registry to Google Artifact Registry and honour uv.lock at build time - docker.yml: auth via WIF, push to GAR. Registry coordinates come from GCP_AR_REGION/GCP_PROJECT_ID/GCP_AR_REPOSITORY variables and GCP_WIF_PROVIDER/GCP_WIF_SERVICE_ACCOUNT secrets. - vllm.Dockerfile, sglang.Dockerfile: install pinned to uv.lock via 'uv export --frozen | uv pip install --no-deps' (uv pip install alone ignores the lockfile). Adds a build-time import canary. - README and docs/index: point to GAR. --- .github/workflows/docker.yml | 58 +++++++++++++++++++++++++++++------- README.md | 13 +++++++- docs/index.md | 2 +- sglang.Dockerfile | 16 ++++++++-- vllm.Dockerfile | 27 +++++++++++++++-- 5 files changed, 98 insertions(+), 18 deletions(-) diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index ecfb058a..1bdf8eff 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -21,6 +21,13 @@ on: - .github/workflows/docker.yml - uv.lock +# Required for Workload Identity Federation to GCP. +# `id-token: write` lets the runner mint an OIDC token that GCP exchanges for +# short-lived credentials. `contents: read` is needed for actions/checkout. +permissions: + contents: read + id-token: write + jobs: push_to_registry: name: Build and push Docker images @@ -29,6 +36,17 @@ jobs: strategy: matrix: backend: [vllm, sglang] + env: + # These are read from GitHub Actions repository variables (Settings -> + # Secrets and variables -> Actions -> Variables). Set them once for the + # repo and they apply to every workflow run. + # + # GCP_AR_REGION e.g. us-central1, northamerica-northeast1 + # GCP_PROJECT_ID e.g. my-gcp-project-123456 + # GCP_AR_REPOSITORY e.g. vector-inference (must already exist in GAR) + GCP_AR_REGION: ${{ vars.GCP_AR_REGION }} + GCP_PROJECT_ID: ${{ vars.GCP_PROJECT_ID }} + GCP_AR_REPOSITORY: ${{ vars.GCP_AR_REPOSITORY }} steps: - name: Checkout repository uses: actions/checkout@v6.0.2 @@ -39,20 +57,23 @@ jobs: VERSION=$(grep -A 1 "name = \"${{ matrix.backend }}\"" uv.lock | grep version | cut -d '"' -f 2) echo "version=$VERSION" >> $GITHUB_OUTPUT + - name: Compute image base path + id: image + run: | + BASE="${GCP_AR_REGION}-docker.pkg.dev/${GCP_PROJECT_ID}/${GCP_AR_REPOSITORY}/vector-inference-${{ matrix.backend }}" + echo "base=${BASE}" >> $GITHUB_OUTPUT + - name: Maximize build space run: | echo "Disk space before cleanup:" df -h - # Remove unnecessary pre-installed software sudo rm -rf /usr/share/dotnet sudo rm -rf /usr/local/lib/android sudo rm -rf /opt/ghc sudo rm -rf /opt/hostedtoolcache/CodeQL sudo rm -rf /usr/local/share/boost sudo rm -rf "$AGENT_TOOLSDIRECTORY" - # Clean apt cache sudo apt-get clean - # Remove docker images docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true echo "Disk space after cleanup:" df -h @@ -60,17 +81,34 @@ jobs: - name: Set up Docker Buildx uses: docker/setup-buildx-action@v4 - - name: Log in to Docker Hub - uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 + # Authenticate to Google Cloud using Workload Identity Federation. + # No long-lived service-account JSON key is stored in GitHub. + # + # Required secrets: + # GCP_WIF_PROVIDER Full resource name of the Workload Identity + # Provider, e.g. + # projects/123456789/locations/global/workloadIdentityPools/github-pool/providers/github-provider + # GCP_WIF_SERVICE_ACCOUNT Email of the service account to impersonate, + # e.g. gh-actions-pusher@my-project.iam.gserviceaccount.com + - name: Authenticate to Google Cloud + id: gcp-auth + uses: google-github-actions/auth@v2 + with: + workload_identity_provider: ${{ secrets.GCP_WIF_PROVIDER }} + service_account: ${{ secrets.GCP_WIF_SERVICE_ACCOUNT }} + + - name: Log in to Google Artifact Registry + uses: docker/login-action@v3 with: - username: ${{ secrets.DOCKER_USERNAME }} - password: ${{ secrets.DOCKER_PASSWORD }} + registry: ${{ env.GCP_AR_REGION }}-docker.pkg.dev + username: oauth2accesstoken + password: ${{ steps.gcp-auth.outputs.access_token }} - name: Extract metadata (tags, labels) for Docker id: meta uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf with: - images: vectorinstitute/vector-inference-${{ matrix.backend }} + images: ${{ steps.image.outputs.base }} - name: Build and push Docker image uses: docker/build-push-action@d08e5c354a6adb9ed34480a06d141179aa583294 @@ -80,6 +118,6 @@ jobs: push: true tags: | ${{ steps.meta.outputs.tags }} - vectorinstitute/vector-inference-${{ matrix.backend }}:${{ steps.backend-version.outputs.version }} - vectorinstitute/vector-inference-${{ matrix.backend }}:latest + ${{ steps.image.outputs.base }}:${{ steps.backend-version.outputs.version }} + ${{ steps.image.outputs.base }}:latest labels: ${{ steps.meta.outputs.labels }} diff --git a/README.md b/README.md index 3aba6eb1..5fafa219 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,18 @@ If you are using the Vector cluster environment, and you don't need any customiz ```bash pip install vec-inf ``` -Otherwise, we recommend using the provided [`vllm.Dockerfile`](vllm.Dockerfile) and [`sglang.Dockerfile`](sglang.Dockerfile) to set up your own environment with the package. The built images are available through [Docker Hub](https://hub.docker.com/orgs/vectorinstitute/repositories) +Otherwise, we recommend using the provided [`vllm.Dockerfile`](vllm.Dockerfile) and [`sglang.Dockerfile`](sglang.Dockerfile) to set up your own environment with the package. The built images are published to **Google Artifact Registry** at: + +``` +-docker.pkg.dev///vector-inference-vllm +-docker.pkg.dev///vector-inference-sglang +``` + +Pull an image with (after `gcloud auth configure-docker -docker.pkg.dev`): + +```bash +docker pull -docker.pkg.dev///vector-inference-vllm:latest +``` If you'd like to use `vec-inf` on your own Slurm cluster, you would need to update the configuration files, there are 3 ways to do it: * Clone the repository and update the `environment.yaml` and the `models.yaml` file in [`vec_inf/config`](vec_inf/config/), then install from source by running `pip install .`. diff --git a/docs/index.md b/docs/index.md index 5992c8b9..06626b00 100644 --- a/docs/index.md +++ b/docs/index.md @@ -11,7 +11,7 @@ If you are using the Vector cluster environment, and you don't need any customiz pip install vec-inf ``` -Otherwise, we recommend using the provided [`vllm.Dockerfile`](https://github.com/VectorInstitute/vector-inference/blob/main/vllm.Dockerfile) and [`sglang.Dockerfile`](https://github.com/VectorInstitute/vector-inference/blob/main/sglang.Dockerfile) to set up your own environment with the package. The built images are available through [Docker Hub](https://hub.docker.com/orgs/vectorinstitute/repositories) +Otherwise, we recommend using the provided [`vllm.Dockerfile`](https://github.com/VectorInstitute/vector-inference/blob/main/vllm.Dockerfile) and [`sglang.Dockerfile`](https://github.com/VectorInstitute/vector-inference/blob/main/sglang.Dockerfile) to set up your own environment with the package. The built images are published to **Google Artifact Registry** at `-docker.pkg.dev///vector-inference-{vllm,sglang}`. Run `gcloud auth configure-docker -docker.pkg.dev` once, then `docker pull` the image you want. If you'd like to use `vec-inf` on your own Slurm cluster, you would need to update the configuration files, there are 3 ways to do it: diff --git a/sglang.Dockerfile b/sglang.Dockerfile index 8aab2f9a..eb087f16 100644 --- a/sglang.Dockerfile +++ b/sglang.Dockerfile @@ -56,11 +56,21 @@ ENV NCCL_DEBUG=INFO WORKDIR /vec-inf COPY . /vec-inf -# Install project dependencies with sglang backend and inference group -# Use --no-cache to prevent uv from storing both downloaded and extracted packages -RUN uv pip install --system -e .[sglang] --group inference --prerelease=allow --no-cache && \ +# Install project dependencies pinned to uv.lock. +# See vllm.Dockerfile for the full rationale; same logic, sglang extra. +RUN uv export --frozen --no-emit-project --no-hashes \ + --extra sglang --group inference \ + -o /tmp/requirements.txt && \ + uv pip install --system --no-cache --no-deps --prerelease=allow \ + -r /tmp/requirements.txt && \ + uv pip install --system --no-cache --no-deps -e . && \ + rm -f /tmp/requirements.txt && \ rm -rf /root/.cache/uv /tmp/* +# Build-time canary: fail the build if the locked deps cannot be imported. +RUN python3.12 -c "import sglang, torch; \ + print('sglang', sglang.__version__, '/ torch', torch.__version__)" + # Install a single, system NCCL (from NVIDIA CUDA repo in base image) RUN apt-get update && apt-get install -y --allow-change-held-packages\ libnccl2 libnccl-dev \ diff --git a/vllm.Dockerfile b/vllm.Dockerfile index 88679ac7..3727593b 100644 --- a/vllm.Dockerfile +++ b/vllm.Dockerfile @@ -56,11 +56,32 @@ ENV NCCL_DEBUG=INFO WORKDIR /vec-inf COPY . /vec-inf -# Install project dependencies with vllm backend and inference group -# Use --no-cache to prevent uv from storing both downloaded and extracted packages -RUN uv pip install --system -e .[vllm] --group inference --prerelease=allow --no-cache && \ +# Install project dependencies pinned to uv.lock. +# +# `uv pip install` does NOT consult uv.lock -- only `uv sync` does, and +# `uv sync` requires a venv (incompatible with --system). Without this, +# every image build does fresh PyPI resolution and may pick a different +# transitive set than what the lockfile records (this is how :0.19.0 +# shipped with the pyarrow/datasets ABI mismatch). Instead: +# 1. Export uv.lock to a fully-pinned requirements.txt (no resolver). +# 2. Install transitives with --no-deps so nothing is re-resolved. +# 3. Install the project itself editable, also --no-deps. +RUN uv export --frozen --no-emit-project --no-hashes \ + --extra vllm --group inference \ + -o /tmp/requirements.txt && \ + uv pip install --system --no-cache --no-deps --prerelease=allow \ + -r /tmp/requirements.txt && \ + uv pip install --system --no-cache --no-deps -e . && \ + rm -f /tmp/requirements.txt && \ rm -rf /root/.cache/uv /tmp/* +# Build-time canary: fail the build if the locked deps cannot be imported +# together. This is the check that would have caught the pyarrow/datasets +# ABI mismatch in :0.19.0 at build time instead of at job start. +RUN python3.12 -c "import vllm, datasets, pyarrow, transformers, torch; \ + print('vllm', vllm.__version__, '/ datasets', datasets.__version__, \ + '/ pyarrow', pyarrow.__version__, '/ torch', torch.__version__)" + # Install a single, system NCCL (from NVIDIA CUDA repo in base image) RUN apt-get update && apt-get install -y --allow-change-held-packages\ libnccl2 libnccl-dev \