OpenPipe · FurtherAI · Apr 8, 2026 · Apr 8, 2026 · Apr 8, 2026 · Apr 8, 2026
diff --git a/.github/workflows/package-install.yml b/.github/workflows/package-install.yml
@@ -27,7 +27,7 @@ jobs:
           echo "$HOME/.cargo/bin" >> "$GITHUB_PATH"
 
       - name: Build wheel
-        run: uv build --wheel --out-dir dist
+        run: python scripts/build_package.py --wheel
 
       - name: Smoke test uv add + sync for backend extra
         run: |

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -10,9 +10,11 @@ permissions:
   id-token: write
 
 jobs:
-  release:
+  build-package:
     runs-on: ubuntu-latest
     if: github.event.pull_request.merged == true && startsWith(github.event.pull_request.head.ref, 'release/')
+    outputs:
+      version: ${{ steps.get_version.outputs.VERSION }}
     steps:
       - uses: actions/checkout@v4
         with:
@@ -21,52 +23,121 @@ jobs:
       - name: Set up Python
         uses: actions/setup-python@v5
         with:
-          python-version: '3.11'
+          python-version: "3.11"
 
       - name: Install uv
         run: |
           curl -LsSf https://astral.sh/uv/install.sh | sh
-          echo "$HOME/.cargo/bin" >> $GITHUB_PATH
-
-      - name: Install dependencies
-        run: |
-          uv venv
-          uv pip install -e .
-          uv pip install hatch
+          echo "$HOME/.cargo/bin" >> "$GITHUB_PATH"
 
       - name: Build package
-        run: uv run hatch build
+        run: python scripts/build_package.py
 
       - name: Get version from pyproject.toml
         id: get_version
         run: |
           VERSION=$(python -c "import tomllib; print(tomllib.load(open('pyproject.toml', 'rb'))['project']['version'])")
-          echo "VERSION=$VERSION" >> $GITHUB_OUTPUT
+          echo "VERSION=$VERSION" >> "$GITHUB_OUTPUT"
+
+      - name: Upload package artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: python-distributions
+          path: dist/*
+
+  runtime-smoke:
+    runs-on: art-large-runner
+    needs: build-package
+    steps:
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
+      - name: Install uv
+        run: |
+          curl -LsSf https://astral.sh/uv/install.sh | sh
+          echo "$HOME/.cargo/bin" >> "$GITHUB_PATH"
+
+      - name: Download package artifact
+        uses: actions/download-artifact@v4
+        with:
+          name: python-distributions
+          path: dist
+
+      - name: Smoke test managed vLLM runtime install
+        run: |
+          export ART_VLLM_RUNTIME_CACHE_DIR="${RUNNER_TEMP}/art-vllm-runtime-cache"
+          export UV_LINK_MODE=copy
+          wheel_path="$(python - <<'PY'
+          from pathlib import Path
+
+          print(next(Path("dist").glob("openpipe_art-*.whl")).resolve())
+          PY
+          )"
+
+          project_dir="$(mktemp -d)"
+          cd "$project_dir"
+          uv init --name art-runtime-smoke --python 3.11 --bare
+          uv add "openpipe-art[backend] @ file://${wheel_path}"
+          uv sync
+          uv run python - <<'PY'
+          from pathlib import Path
+          import subprocess
+
+          from art.vllm_runtime import ensure_vllm_runtime
+
+          runtime_bin = ensure_vllm_runtime()
+          runtime_python = Path(runtime_bin).parent / "python"
+          subprocess.run([str(runtime_bin), "--help"], check=True)
+          subprocess.run(
+              [
+                  str(runtime_python),
+                  "-c",
+                  "import art_vllm_runtime, torch, vllm; print('runtime imports ok')",
+              ],
+              check=True,
+          )
+          print(runtime_bin)
+          PY
+
+  publish:
+    runs-on: ubuntu-latest
+    needs: [build-package, runtime-smoke]
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Download package artifact
+        uses: actions/download-artifact@v4
+        with:
+          name: python-distributions
+          path: dist
 
       - name: Create git tag
         run: |
           git config --local user.email "action@github.com"
           git config --local user.name "GitHub Action"
-          git tag v${{ steps.get_version.outputs.VERSION }}
-          git push origin v${{ steps.get_version.outputs.VERSION }}
+          git tag v${{ needs.build-package.outputs.version }}
+          git push origin v${{ needs.build-package.outputs.version }}
 
       - name: Publish draft release
         env:
           GH_TOKEN: ${{ github.token }}
         run: |
-          # Check if draft release exists and publish it
-          if gh release view v${{ steps.get_version.outputs.VERSION }} --json isDraft | jq -r '.isDraft' | grep -q true; then
-            gh release edit v${{ steps.get_version.outputs.VERSION }} --draft=false
+          if gh release view v${{ needs.build-package.outputs.version }} --json isDraft | jq -r '.isDraft' | grep -q true; then
+            gh release edit v${{ needs.build-package.outputs.version }} --draft=false
           else
-            echo "::error::No draft release found for v${{ steps.get_version.outputs.VERSION }}"
+            echo "::error::No draft release found for v${{ needs.build-package.outputs.version }}"
             exit 1
           fi
 
       - name: Upload assets to release
         env:
           GH_TOKEN: ${{ github.token }}
         run: |
-          gh release upload v${{ steps.get_version.outputs.VERSION }} dist/*
+          gh release upload v${{ needs.build-package.outputs.version }} dist/*
 
       - name: Publish to PyPI
         uses: pypa/gh-action-pypi-publish@release/v1
diff --git a/.gitignore b/.gitignore
@@ -20,4 +20,5 @@ trajectories/
 .ruff_cache/
 !/src/art/wandb/
 !/src/art/wandb/**
-/src/art/wandb/__pycache__/
+/src/art/wandb/__pycache__/
+scratch/
diff --git a/dev/bench_cute_grouped_lora.py b/dev/bench_cute_grouped_lora.py
@@ -11,7 +11,7 @@
 from pydantic import BaseModel, ConfigDict, Field, field_validator
 import torch
 
-from art.megatron.cute_grouped_lora_quack import quack_grouped_lora
+from art.megatron.kernels.cute_grouped_lora_quack import quack_grouped_lora
 
 GroupedLoraFn = Callable[
     [torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor],

diff --git a/pyproject.toml b/pyproject.toml
@@ -40,11 +40,11 @@ backend = [
     "gql<4",
     "nvidia-cudnn-frontend<1.21 ; sys_platform == 'linux'",
     "nvidia-resiliency-ext<0.5 ; sys_platform == 'linux'",
-    "vllm @ https://github.com/vivekkalyan/vllm/releases/download/v0.17.0-art1/vllm-0.17.0%2Bart1-cp38-abi3-manylinux_2_31_x86_64.whl ; sys_platform == 'linux'",
 ]
 megatron = [
     "numpy<2",
     "torch==2.10.0",
+    "quack-kernels==0.2.5",
     "apex @ git+https://github.com/NVIDIA/apex.git@25.09",
     "transformer-engine==2.11.0",
     "transformer-engine-cu12==2.11.0",
@@ -83,9 +83,6 @@ tinker = [
 [project.scripts]
 art = "art.cli:app"
 
-[project.entry-points."vllm.general_plugins"]
-art = "art.vllm.patches:patch_transformers_v5_compat"
-
 [build-system]
 requires = ["hatchling"]
 build-backend = "hatchling.build"
@@ -103,17 +100,30 @@ packages = ["src/art", "src/mp_actors"]
 sources = ["src"]
 
 [tool.hatch.build.targets.sdist]
+sources = []
+only-include = [
+    ".agents/skills",
+    "LICENSE",
+    "README.md",
+    "THIRD-PARTY-NOTICES",
+    "pyproject.toml",
+    "src",
+]
 exclude = [
     "/dev",
     "/wandb",
     "/.art",
+    "/.local",
     "/.ruff_cache",
     "/.venv",
     "/dist",
+    "/scratch",
+    "/unsloth_compiled_cache",
     "/.git",
     "/.github",
     "/examples/*/data",
     "/examples/*/wandb",
+    "/tests/unsloth_compiled_cache",
     "**/__pycache__",
     "**/*.pyc",
 ]
@@ -217,7 +227,6 @@ allowed-unresolved-imports = [
     "unsloth.**",
     "unsloth_zoo.**",
     "uvicorn.**",
-    "vllm.**",
     "wandb.**",
     # langgraph deps
     "langchain_core.**",