From 615f48759f90bbf9f9d85da8299938a0c8facdaf Mon Sep 17 00:00:00 2001
From: mrgemy95 <mahmoud.gemy18@gmail.com>
Date: Thu, 14 May 2026 13:24:43 -0400
Subject: [PATCH] Switch container registry to Google Artifact Registry and
 honour uv.lock at build time

- docker.yml: auth via WIF, push to GAR. Registry coordinates come from
  GCP_AR_REGION/GCP_PROJECT_ID/GCP_AR_REPOSITORY variables and
  GCP_WIF_PROVIDER/GCP_WIF_SERVICE_ACCOUNT secrets.
- vllm.Dockerfile, sglang.Dockerfile: install pinned to uv.lock via
  'uv export --frozen | uv pip install --no-deps' (uv pip install
  alone ignores the lockfile). Adds a build-time import canary.
- README and docs/index: point to GAR.
---
 .github/workflows/docker.yml | 58 +++++++++++++++++++++++++++++-------
 README.md                    | 13 +++++++-
 docs/index.md                |  2 +-
 sglang.Dockerfile            | 16 ++++++++--
 vllm.Dockerfile              | 27 +++++++++++++++--
 5 files changed, 98 insertions(+), 18 deletions(-)

diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
index ecfb058a..1bdf8eff 100644
--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker.yml
@@ -21,6 +21,13 @@ on:
       - .github/workflows/docker.yml
       - uv.lock
 
+# Required for Workload Identity Federation to GCP.
+# `id-token: write` lets the runner mint an OIDC token that GCP exchanges for
+# short-lived credentials. `contents: read` is needed for actions/checkout.
+permissions:
+  contents: read
+  id-token: write
+
 jobs:
   push_to_registry:
     name: Build and push Docker images
@@ -29,6 +36,17 @@ jobs:
     strategy:
       matrix:
         backend: [vllm, sglang]
+    env:
+      # These are read from GitHub Actions repository variables (Settings ->
+      # Secrets and variables -> Actions -> Variables). Set them once for the
+      # repo and they apply to every workflow run.
+      #
+      #   GCP_AR_REGION       e.g. us-central1, northamerica-northeast1
+      #   GCP_PROJECT_ID      e.g. my-gcp-project-123456
+      #   GCP_AR_REPOSITORY   e.g. vector-inference   (must already exist in GAR)
+      GCP_AR_REGION: ${{ vars.GCP_AR_REGION }}
+      GCP_PROJECT_ID: ${{ vars.GCP_PROJECT_ID }}
+      GCP_AR_REPOSITORY: ${{ vars.GCP_AR_REPOSITORY }}
     steps:
       - name: Checkout repository
         uses: actions/checkout@v6.0.2
@@ -39,20 +57,23 @@ jobs:
           VERSION=$(grep -A 1 "name = \"${{ matrix.backend }}\"" uv.lock | grep version | cut -d '"' -f 2)
           echo "version=$VERSION" >> $GITHUB_OUTPUT
 
+      - name: Compute image base path
+        id: image
+        run: |
+          BASE="${GCP_AR_REGION}-docker.pkg.dev/${GCP_PROJECT_ID}/${GCP_AR_REPOSITORY}/vector-inference-${{ matrix.backend }}"
+          echo "base=${BASE}" >> $GITHUB_OUTPUT
+
       - name: Maximize build space
         run: |
           echo "Disk space before cleanup:"
           df -h
-          # Remove unnecessary pre-installed software
           sudo rm -rf /usr/share/dotnet
           sudo rm -rf /usr/local/lib/android
           sudo rm -rf /opt/ghc
           sudo rm -rf /opt/hostedtoolcache/CodeQL
           sudo rm -rf /usr/local/share/boost
           sudo rm -rf "$AGENT_TOOLSDIRECTORY"
-          # Clean apt cache
           sudo apt-get clean
-          # Remove docker images
           docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true
           echo "Disk space after cleanup:"
           df -h
@@ -60,17 +81,34 @@ jobs:
       - name: Set up Docker Buildx
         uses: docker/setup-buildx-action@v4
 
-      - name: Log in to Docker Hub
-        uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121
+      # Authenticate to Google Cloud using Workload Identity Federation.
+      # No long-lived service-account JSON key is stored in GitHub.
+      #
+      # Required secrets:
+      #   GCP_WIF_PROVIDER         Full resource name of the Workload Identity
+      #                            Provider, e.g.
+      #                            projects/123456789/locations/global/workloadIdentityPools/github-pool/providers/github-provider
+      #   GCP_WIF_SERVICE_ACCOUNT  Email of the service account to impersonate,
+      #                            e.g. gh-actions-pusher@my-project.iam.gserviceaccount.com
+      - name: Authenticate to Google Cloud
+        id: gcp-auth
+        uses: google-github-actions/auth@v2
+        with:
+          workload_identity_provider: ${{ secrets.GCP_WIF_PROVIDER }}
+          service_account: ${{ secrets.GCP_WIF_SERVICE_ACCOUNT }}
+
+      - name: Log in to Google Artifact Registry
+        uses: docker/login-action@v3
         with:
-          username: ${{ secrets.DOCKER_USERNAME }}
-          password: ${{ secrets.DOCKER_PASSWORD }}
+          registry: ${{ env.GCP_AR_REGION }}-docker.pkg.dev
+          username: oauth2accesstoken
+          password: ${{ steps.gcp-auth.outputs.access_token }}
 
       - name: Extract metadata (tags, labels) for Docker
         id: meta
         uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf
         with:
-          images: vectorinstitute/vector-inference-${{ matrix.backend }}
+          images: ${{ steps.image.outputs.base }}
 
       - name: Build and push Docker image
         uses: docker/build-push-action@d08e5c354a6adb9ed34480a06d141179aa583294
@@ -80,6 +118,6 @@ jobs:
           push: true
           tags: |
             ${{ steps.meta.outputs.tags }}
-            vectorinstitute/vector-inference-${{ matrix.backend }}:${{ steps.backend-version.outputs.version }}
-            vectorinstitute/vector-inference-${{ matrix.backend }}:latest
+            ${{ steps.image.outputs.base }}:${{ steps.backend-version.outputs.version }}
+            ${{ steps.image.outputs.base }}:latest
           labels: ${{ steps.meta.outputs.labels }}
diff --git a/README.md b/README.md
index 3aba6eb1..5fafa219 100644
--- a/README.md
+++ b/README.md
@@ -19,7 +19,18 @@ If you are using the Vector cluster environment, and you don't need any customiz
 ```bash
 pip install vec-inf
 ```
-Otherwise, we recommend using the provided [`vllm.Dockerfile`](vllm.Dockerfile) and [`sglang.Dockerfile`](sglang.Dockerfile) to set up your own environment with the package. The built images are available through [Docker Hub](https://hub.docker.com/orgs/vectorinstitute/repositories)
+Otherwise, we recommend using the provided [`vllm.Dockerfile`](vllm.Dockerfile) and [`sglang.Dockerfile`](sglang.Dockerfile) to set up your own environment with the package. The built images are published to **Google Artifact Registry** at:
+
+```
+<REGION>-docker.pkg.dev/<PROJECT_ID>/<REPOSITORY>/vector-inference-vllm
+<REGION>-docker.pkg.dev/<PROJECT_ID>/<REPOSITORY>/vector-inference-sglang
+```
+
+Pull an image with (after `gcloud auth configure-docker <REGION>-docker.pkg.dev`):
+
+```bash
+docker pull <REGION>-docker.pkg.dev/<PROJECT_ID>/<REPOSITORY>/vector-inference-vllm:latest
+```
 
 If you'd like to use `vec-inf` on your own Slurm cluster, you would need to update the configuration files, there are 3 ways to do it:
 * Clone the repository and update the `environment.yaml` and the `models.yaml` file in [`vec_inf/config`](vec_inf/config/), then install from source by running `pip install .`.
diff --git a/docs/index.md b/docs/index.md
index 5992c8b9..06626b00 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -11,7 +11,7 @@ If you are using the Vector cluster environment, and you don't need any customiz
 pip install vec-inf
 ```
 
-Otherwise, we recommend using the provided [`vllm.Dockerfile`](https://github.com/VectorInstitute/vector-inference/blob/main/vllm.Dockerfile) and [`sglang.Dockerfile`](https://github.com/VectorInstitute/vector-inference/blob/main/sglang.Dockerfile) to set up your own environment with the package. The built images are available through [Docker Hub](https://hub.docker.com/orgs/vectorinstitute/repositories)
+Otherwise, we recommend using the provided [`vllm.Dockerfile`](https://github.com/VectorInstitute/vector-inference/blob/main/vllm.Dockerfile) and [`sglang.Dockerfile`](https://github.com/VectorInstitute/vector-inference/blob/main/sglang.Dockerfile) to set up your own environment with the package. The built images are published to **Google Artifact Registry** at `<REGION>-docker.pkg.dev/<PROJECT_ID>/<REPOSITORY>/vector-inference-{vllm,sglang}`. Run `gcloud auth configure-docker <REGION>-docker.pkg.dev` once, then `docker pull` the image you want.
 
 If you'd like to use `vec-inf` on your own Slurm cluster, you would need to update the configuration files, there are 3 ways to do it:
 
diff --git a/sglang.Dockerfile b/sglang.Dockerfile
index 8aab2f9a..eb087f16 100644
--- a/sglang.Dockerfile
+++ b/sglang.Dockerfile
@@ -56,11 +56,21 @@ ENV NCCL_DEBUG=INFO
 WORKDIR /vec-inf
 COPY . /vec-inf
 
-# Install project dependencies with sglang backend and inference group
-# Use --no-cache to prevent uv from storing both downloaded and extracted packages
-RUN uv pip install --system -e .[sglang] --group inference --prerelease=allow --no-cache && \
+# Install project dependencies pinned to uv.lock.
+# See vllm.Dockerfile for the full rationale; same logic, sglang extra.
+RUN uv export --frozen --no-emit-project --no-hashes \
+        --extra sglang --group inference \
+        -o /tmp/requirements.txt && \
+    uv pip install --system --no-cache --no-deps --prerelease=allow \
+        -r /tmp/requirements.txt && \
+    uv pip install --system --no-cache --no-deps -e . && \
+    rm -f /tmp/requirements.txt && \
     rm -rf /root/.cache/uv /tmp/*
 
+# Build-time canary: fail the build if the locked deps cannot be imported.
+RUN python3.12 -c "import sglang, torch; \
+    print('sglang', sglang.__version__, '/ torch', torch.__version__)"
+
 # Install a single, system NCCL (from NVIDIA CUDA repo in base image)
 RUN apt-get update && apt-get install -y --allow-change-held-packages\
     libnccl2 libnccl-dev \
diff --git a/vllm.Dockerfile b/vllm.Dockerfile
index 88679ac7..3727593b 100644
--- a/vllm.Dockerfile
+++ b/vllm.Dockerfile
@@ -56,11 +56,32 @@ ENV NCCL_DEBUG=INFO
 WORKDIR /vec-inf
 COPY . /vec-inf
 
-# Install project dependencies with vllm backend and inference group
-# Use --no-cache to prevent uv from storing both downloaded and extracted packages
-RUN uv pip install --system -e .[vllm] --group inference --prerelease=allow --no-cache && \
+# Install project dependencies pinned to uv.lock.
+#
+# `uv pip install` does NOT consult uv.lock -- only `uv sync` does, and
+# `uv sync` requires a venv (incompatible with --system). Without this,
+# every image build does fresh PyPI resolution and may pick a different
+# transitive set than what the lockfile records (this is how :0.19.0
+# shipped with the pyarrow/datasets ABI mismatch). Instead:
+#   1. Export uv.lock to a fully-pinned requirements.txt (no resolver).
+#   2. Install transitives with --no-deps so nothing is re-resolved.
+#   3. Install the project itself editable, also --no-deps.
+RUN uv export --frozen --no-emit-project --no-hashes \
+        --extra vllm --group inference \
+        -o /tmp/requirements.txt && \
+    uv pip install --system --no-cache --no-deps --prerelease=allow \
+        -r /tmp/requirements.txt && \
+    uv pip install --system --no-cache --no-deps -e . && \
+    rm -f /tmp/requirements.txt && \
     rm -rf /root/.cache/uv /tmp/*
 
+# Build-time canary: fail the build if the locked deps cannot be imported
+# together. This is the check that would have caught the pyarrow/datasets
+# ABI mismatch in :0.19.0 at build time instead of at job start.
+RUN python3.12 -c "import vllm, datasets, pyarrow, transformers, torch; \
+    print('vllm', vllm.__version__, '/ datasets', datasets.__version__, \
+          '/ pyarrow', pyarrow.__version__, '/ torch', torch.__version__)"
+
 # Install a single, system NCCL (from NVIDIA CUDA repo in base image)
 RUN apt-get update && apt-get install -y --allow-change-held-packages\
     libnccl2 libnccl-dev \