From a79f6c646e946c030e8438cece76011f465771a9 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Fri, 22 May 2026 21:17:04 +0000 Subject: [PATCH 01/12] Add Python 3.15 to the build/test matrix Backport of #2108 to the 12.9.x branch so that cuda-bindings 12.9.x wheels are available for Python 3.15, which the main branch needs in order to build cuda-core. - Add 3.15 / 3.15t to the build matrix - Upgrade cibuildwheel to v4.0.0rc1 (required for cpython-prerelease) - Enable cpython-prerelease in CIBW_ENABLE - Add amd64-only test entries for 3.15 / 3.15t (CUDA 12.9.1) --- .github/workflows/build-wheel.yml | 10 ++++++---- ci/test-matrix.json | 2 ++ 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build-wheel.yml b/.github/workflows/build-wheel.yml index 5c04afea290..de4be611429 100644 --- a/.github/workflows/build-wheel.yml +++ b/.github/workflows/build-wheel.yml @@ -32,6 +32,8 @@ jobs: - "3.13t" - "3.14" - "3.14t" + - "3.15" + - "3.15t" name: py${{ matrix.python-version }} runs-on: ${{ (inputs.host-platform == 'linux-64' && 'linux-amd64-cpu8') || (inputs.host-platform == 'linux-aarch64' && 'linux-arm64-cpu8') || @@ -111,7 +113,7 @@ jobs: if-no-files-found: error - name: Build cuda.core wheel - uses: pypa/cibuildwheel@8d2b08b68458a16aeb24b64e68a09ab1c8e82084 # v3.4.1 + uses: pypa/cibuildwheel@54327ab9d35de03b359ac25c97de9417d94639c0 # v4.0.0rc1 env: CIBW_BUILD: ${{ env.CIBW_BUILD }} CIBW_ARCHS_LINUX: "native" @@ -120,7 +122,7 @@ jobs: CIBW_REPAIR_WHEEL_COMMAND_WINDOWS: "delvewheel repair --namespace-pkg cuda -w {dest_dir} {wheel}" CIBW_ENVIRONMENT: > CUDA_PYTHON_PARALLEL_LEVEL=${{ env.CUDA_PYTHON_PARALLEL_LEVEL }} - CIBW_ENABLE: "cpython-freethreading" + CIBW_ENABLE: "cpython-freethreading cpython-prerelease" with: package-dir: ./cuda_core/ output-dir: ${{ env.CUDA_CORE_ARTIFACTS_DIR }} @@ -154,7 +156,7 @@ jobs: cuda-version: ${{ inputs.cuda-version }} - name: Build cuda.bindings wheel - uses: pypa/cibuildwheel@8d2b08b68458a16aeb24b64e68a09ab1c8e82084 # v3.4.1 + uses: pypa/cibuildwheel@54327ab9d35de03b359ac25c97de9417d94639c0 # v4.0.0rc1 env: CIBW_BUILD: ${{ env.CIBW_BUILD }} CIBW_ARCHS_LINUX: "native" @@ -168,7 +170,7 @@ jobs: CUDA_PYTHON_PARALLEL_LEVEL=${{ env.CUDA_PYTHON_PARALLEL_LEVEL }} CIBW_BEFORE_BUILD_WINDOWS: "pip install delvewheel" CIBW_REPAIR_WHEEL_COMMAND_WINDOWS: "delvewheel repair --namespace-pkg cuda -w {dest_dir} {wheel}" - CIBW_ENABLE: "cpython-freethreading" + CIBW_ENABLE: "cpython-freethreading cpython-prerelease" with: package-dir: ./cuda_bindings/ output-dir: ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }} diff --git a/ci/test-matrix.json b/ci/test-matrix.json index 45ae4299a4c..ada640fd6e2 100644 --- a/ci/test-matrix.json +++ b/ci/test-matrix.json @@ -15,6 +15,8 @@ { "ARCH": "amd64", "PY_VER": "3.13t", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" }, + { "ARCH": "amd64", "PY_VER": "3.15", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" }, + { "ARCH": "amd64", "PY_VER": "3.15t", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" }, { "ARCH": "arm64", "PY_VER": "3.10", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" }, { "ARCH": "arm64", "PY_VER": "3.10", "CUDA_VER": "13.0.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest" }, { "ARCH": "arm64", "PY_VER": "3.11", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest" }, From 25989945faf891c93942219d4a7976e0a40ed01f Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Fri, 22 May 2026 21:40:24 +0000 Subject: [PATCH 02/12] Drop cpython-freethreading from CIBW_ENABLE (default in v4.0) --- .github/workflows/build-wheel.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-wheel.yml b/.github/workflows/build-wheel.yml index de4be611429..a27b291f52c 100644 --- a/.github/workflows/build-wheel.yml +++ b/.github/workflows/build-wheel.yml @@ -122,7 +122,7 @@ jobs: CIBW_REPAIR_WHEEL_COMMAND_WINDOWS: "delvewheel repair --namespace-pkg cuda -w {dest_dir} {wheel}" CIBW_ENVIRONMENT: > CUDA_PYTHON_PARALLEL_LEVEL=${{ env.CUDA_PYTHON_PARALLEL_LEVEL }} - CIBW_ENABLE: "cpython-freethreading cpython-prerelease" + CIBW_ENABLE: "cpython-prerelease" with: package-dir: ./cuda_core/ output-dir: ${{ env.CUDA_CORE_ARTIFACTS_DIR }} @@ -170,7 +170,7 @@ jobs: CUDA_PYTHON_PARALLEL_LEVEL=${{ env.CUDA_PYTHON_PARALLEL_LEVEL }} CIBW_BEFORE_BUILD_WINDOWS: "pip install delvewheel" CIBW_REPAIR_WHEEL_COMMAND_WINDOWS: "delvewheel repair --namespace-pkg cuda -w {dest_dir} {wheel}" - CIBW_ENABLE: "cpython-freethreading cpython-prerelease" + CIBW_ENABLE: "cpython-prerelease" with: package-dir: ./cuda_bindings/ output-dir: ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }} From d64eefdd45bfd197bb5798d8eda82816ae33020d Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Fri, 22 May 2026 22:13:04 +0000 Subject: [PATCH 03/12] Drop 3.13t from matrix; fix GNU link.exe on Windows - Remove 3.13t from build matrix and all test matrices (already dropped on main). - Work around Meson finding GNU link.exe (from Git for Windows) instead of MSVC link.exe when building numpy from source for pre-release Python on Windows. --- .github/workflows/build-wheel.yml | 8 +++++++- ci/test-matrix.json | 10 ++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/build-wheel.yml b/.github/workflows/build-wheel.yml index a27b291f52c..101aa0e6d8b 100644 --- a/.github/workflows/build-wheel.yml +++ b/.github/workflows/build-wheel.yml @@ -29,7 +29,6 @@ jobs: - "3.11" - "3.12" - "3.13" - - "3.13t" - "3.14" - "3.14t" - "3.15" @@ -251,6 +250,13 @@ jobs: run: | pip install cuda_pathfinder/*.whl + - name: Hide GNU link.exe so Meson finds MSVC link.exe + if: ${{ startsWith(inputs.host-platform, 'win') }} + run: | + if [ -f "/c/Program Files/Git/usr/bin/link.exe" ]; then + mv "/c/Program Files/Git/usr/bin/link.exe" "/c/Program Files/Git/usr/bin/link.exe.bak" + fi + - name: Build cuda.bindings Cython tests run: | pip install $(ls ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}/*.whl)[test] diff --git a/ci/test-matrix.json b/ci/test-matrix.json index ada640fd6e2..59b8f8c90fe 100644 --- a/ci/test-matrix.json +++ b/ci/test-matrix.json @@ -12,7 +12,6 @@ { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "13.0.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" }, { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" }, { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "13.0.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.13t", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" }, { "ARCH": "amd64", "PY_VER": "3.15", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" }, @@ -25,7 +24,6 @@ { "ARCH": "arm64", "PY_VER": "3.12", "CUDA_VER": "13.0.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest" }, { "ARCH": "arm64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest" }, { "ARCH": "arm64", "PY_VER": "3.13", "CUDA_VER": "13.0.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" }, - { "ARCH": "arm64", "PY_VER": "3.13t", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" }, { "ARCH": "arm64", "PY_VER": "3.14", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" }, { "ARCH": "arm64", "PY_VER": "3.14t", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest" } ], @@ -82,17 +80,13 @@ { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" }, { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "t4", "DRIVER": "latest" }, { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "13.0.1", "LOCAL_CTK": "0", "GPU": "t4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "13.0.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.13t", "CUDA_VER": "13.0.1", "LOCAL_CTK": "0", "GPU": "t4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.13t", "CUDA_VER": "13.0.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" } + { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "13.0.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" } ], "nightly": [ { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "11.8.0", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" }, { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "11.8.0", "LOCAL_CTK": "1", "GPU": "t4", "DRIVER": "latest" }, { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "t4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.13t", "CUDA_VER": "13.0.1", "LOCAL_CTK": "0", "GPU": "t4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.13t", "CUDA_VER": "13.0.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" } + { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" } ] } } From 38ec741e35c58f5fcb0b880e15c7df7360b735f6 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Fri, 22 May 2026 22:59:32 +0000 Subject: [PATCH 04/12] Expand test matrix: arm64 3.15, Windows 3.10-3.15 - Add arm64 entries for 3.15 / 3.15t (linux pull-request) - Add Windows pull-request entries for 3.10, 3.11, 3.14, 3.14t, 3.15, 3.15t to keep in sync with main --- ci/test-matrix.json | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/ci/test-matrix.json b/ci/test-matrix.json index 59b8f8c90fe..d17f37b8c81 100644 --- a/ci/test-matrix.json +++ b/ci/test-matrix.json @@ -25,7 +25,9 @@ { "ARCH": "arm64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest" }, { "ARCH": "arm64", "PY_VER": "3.13", "CUDA_VER": "13.0.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" }, { "ARCH": "arm64", "PY_VER": "3.14", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" }, - { "ARCH": "arm64", "PY_VER": "3.14t", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest" } + { "ARCH": "arm64", "PY_VER": "3.14t", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest" }, + { "ARCH": "arm64", "PY_VER": "3.15", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest" }, + { "ARCH": "arm64", "PY_VER": "3.15t", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest" } ], "nightly": [ { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "11.8.0", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "earliest" }, @@ -77,10 +79,16 @@ }, "windows": { "pull-request": [ - { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "t4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "13.0.1", "LOCAL_CTK": "0", "GPU": "t4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "13.0.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" } + { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" }, + { "ARCH": "amd64", "PY_VER": "3.11", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "t4", "DRIVER": "latest" }, + { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" }, + { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "t4", "DRIVER": "latest" }, + { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "13.0.1", "LOCAL_CTK": "0", "GPU": "t4", "DRIVER": "latest" }, + { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "13.0.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, + { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "t4", "DRIVER": "latest" }, + { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, + { "ARCH": "amd64", "PY_VER": "3.15", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" }, + { "ARCH": "amd64", "PY_VER": "3.15t", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" } ], "nightly": [ { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "11.8.0", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" }, From 8a2c9c336608c5d369f101a4c83a198479c0723c Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Fri, 22 May 2026 23:02:38 +0000 Subject: [PATCH 05/12] Windows test matrix: dual LOCAL_CTK for 3.10/3.11, use 12.9.1 for 3.13 --- ci/test-matrix.json | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/ci/test-matrix.json b/ci/test-matrix.json index d17f37b8c81..16690b04ff5 100644 --- a/ci/test-matrix.json +++ b/ci/test-matrix.json @@ -80,11 +80,13 @@ "windows": { "pull-request": [ { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.11", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "t4", "DRIVER": "latest" }, + { "ARCH": "amd64", "PY_VER": "3.10", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "t4", "DRIVER": "latest" }, + { "ARCH": "amd64", "PY_VER": "3.11", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "t4", "DRIVER": "latest" }, + { "ARCH": "amd64", "PY_VER": "3.11", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" }, { "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "t4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "13.0.1", "LOCAL_CTK": "0", "GPU": "t4", "DRIVER": "latest" }, - { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "13.0.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, + { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "t4", "DRIVER": "latest" }, + { "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, { "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "t4", "DRIVER": "latest" }, { "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }, { "ARCH": "amd64", "PY_VER": "3.15", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" }, From a7692b402982a2ffd1099f9409d01cc5d2f1ea4c Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Fri, 22 May 2026 23:24:26 +0000 Subject: [PATCH 06/12] Pre-build numpy wheel for Python 3.15 (no pre-built wheel yet) NumPy does not ship pre-built wheels for Python 3.15 yet. Build the wheel from source during the build stage (where compilers are available), upload it as an artifact, and download/install it in the test stage before running cuda.bindings tests. --- .github/workflows/build-wheel.yml | 17 +++++++++++++++++ .github/workflows/test-wheel-linux.yml | 11 +++++++++++ .github/workflows/test-wheel-windows.yml | 11 +++++++++++ 3 files changed, 39 insertions(+) diff --git a/.github/workflows/build-wheel.yml b/.github/workflows/build-wheel.yml index 101aa0e6d8b..35d40e36638 100644 --- a/.github/workflows/build-wheel.yml +++ b/.github/workflows/build-wheel.yml @@ -257,6 +257,23 @@ jobs: mv "/c/Program Files/Git/usr/bin/link.exe" "/c/Program Files/Git/usr/bin/link.exe.bak" fi + - name: Build numpy wheel (pre-release Python) + if: ${{ startsWith(matrix.python-version, '3.15') }} + run: | + pip wheel "numpy>=1.21.1" --no-binary numpy -w numpy-wheel/ + + - name: Upload numpy wheel + if: ${{ startsWith(matrix.python-version, '3.15') }} + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + with: + name: numpy-python${{ env.PYTHON_VERSION_FORMATTED }}-${{ inputs.host-platform }} + path: numpy-wheel/*.whl + if-no-files-found: error + + - name: Install numpy wheel + if: ${{ startsWith(matrix.python-version, '3.15') }} + run: pip install numpy-wheel/*.whl + - name: Build cuda.bindings Cython tests run: | pip install $(ls ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}/*.whl)[test] diff --git a/.github/workflows/test-wheel-linux.yml b/.github/workflows/test-wheel-linux.yml index 5db0f8caa7f..41c8f0630d0 100644 --- a/.github/workflows/test-wheel-linux.yml +++ b/.github/workflows/test-wheel-linux.yml @@ -240,6 +240,17 @@ jobs: - name: Set up compute-sanitizer run: setup-sanitizer + - name: Download numpy wheel (pre-release Python) + if: ${{ startsWith(matrix.PY_VER, '3.15') }} + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: numpy-python${{ env.PYTHON_VERSION_FORMATTED }}-${{ inputs.host-platform }} + path: numpy-wheel + + - name: Install numpy wheel (pre-release Python) + if: ${{ startsWith(matrix.PY_VER, '3.15') }} + run: pip install numpy-wheel/*.whl + - name: Run cuda.bindings tests if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '0' }} env: diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index f97e63417a8..876bf399595 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -215,6 +215,17 @@ jobs: host-platform: ${{ inputs.host-platform }} cuda-version: ${{ matrix.CUDA_VER }} + - name: Download numpy wheel (pre-release Python) + if: ${{ startsWith(matrix.PY_VER, '3.15') }} + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: numpy-python${{ env.PYTHON_VERSION_FORMATTED }}-${{ inputs.host-platform }} + path: numpy-wheel + + - name: Install numpy wheel (pre-release Python) + if: ${{ startsWith(matrix.PY_VER, '3.15') }} + run: pip install numpy-wheel/*.whl + - name: Run cuda.bindings tests if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '0' }} env: From ee8db0410929ebb09733fb366bfbe27745b33c2a Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sat, 30 May 2026 14:31:03 +0000 Subject: [PATCH 07/12] Build numpy in manylinux container for glibc compat; fix Windows glob MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Linux: build numpy via cibuildwheel inside the manylinux_2_28 container so the wheel is compatible with the ubuntu:22.04 test container (glibc 2.35). Previously, pip wheel on the host produced a linux_x86_64 wheel linked against glibc 2.38+, causing ImportError at test time. Windows: add explicit bash shell to the numpy install step in the test workflow — PowerShell does not expand *.whl globs. Co-Authored-By: Claude Opus 4.6 --- .github/workflows/build-wheel.yml | 23 +++++++++++++++++++++-- .github/workflows/test-wheel-windows.yml | 1 + 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-wheel.yml b/.github/workflows/build-wheel.yml index 35d40e36638..b04de5d65ab 100644 --- a/.github/workflows/build-wheel.yml +++ b/.github/workflows/build-wheel.yml @@ -257,8 +257,27 @@ jobs: mv "/c/Program Files/Git/usr/bin/link.exe" "/c/Program Files/Git/usr/bin/link.exe.bak" fi - - name: Build numpy wheel (pre-release Python) - if: ${{ startsWith(matrix.python-version, '3.15') }} + - name: Download numpy sdist (pre-release Python) + if: ${{ startsWith(matrix.python-version, '3.15') && startsWith(inputs.host-platform, 'linux') }} + run: | + pip download --no-binary numpy --no-deps "numpy>=1.21.1" -d numpy-sdist/ + cd numpy-sdist && tar xf numpy-*.tar.gz && rm numpy-*.tar.gz + echo "NUMPY_SRC_DIR=$(pwd)/$(ls -d numpy-*/)" >> $GITHUB_ENV + + - name: Build numpy wheel (pre-release Python, Linux) + if: ${{ startsWith(matrix.python-version, '3.15') && startsWith(inputs.host-platform, 'linux') }} + uses: pypa/cibuildwheel@54327ab9d35de03b359ac25c97de9417d94639c0 # v4.0.0rc1 + env: + CIBW_BUILD: ${{ env.CIBW_BUILD }} + CIBW_ARCHS_LINUX: "native" + CIBW_BUILD_VERBOSITY: 1 + CIBW_ENABLE: "cpython-prerelease" + with: + package-dir: ${{ env.NUMPY_SRC_DIR }} + output-dir: numpy-wheel/ + + - name: Build numpy wheel (pre-release Python, Windows) + if: ${{ startsWith(matrix.python-version, '3.15') && startsWith(inputs.host-platform, 'win') }} run: | pip wheel "numpy>=1.21.1" --no-binary numpy -w numpy-wheel/ diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index 876bf399595..4ea595325ca 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -224,6 +224,7 @@ jobs: - name: Install numpy wheel (pre-release Python) if: ${{ startsWith(matrix.PY_VER, '3.15') }} + shell: bash --noprofile --norc -xeuo pipefail {0} run: pip install numpy-wheel/*.whl - name: Run cuda.bindings tests From 3f1f3c0f86d7fc02852a10bb474634e95f5180a7 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sat, 30 May 2026 15:02:41 +0000 Subject: [PATCH 08/12] Fix numpy cibuildwheel: patch cpython-freethreading, unify platforms - Patch numpy 2.4.x pyproject.toml to remove the "cpython-freethreading" enable group, which is invalid in cibuildwheel v4.0 (fixed on numpy main but not yet released). - Consolidate Linux/Windows numpy builds into a single cibuildwheel step, matching how cuda.core and cuda.bindings are built. Co-Authored-By: Claude Opus 4.6 --- .github/workflows/build-wheel.yml | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/.github/workflows/build-wheel.yml b/.github/workflows/build-wheel.yml index b04de5d65ab..6c6d41c0783 100644 --- a/.github/workflows/build-wheel.yml +++ b/.github/workflows/build-wheel.yml @@ -257,30 +257,31 @@ jobs: mv "/c/Program Files/Git/usr/bin/link.exe" "/c/Program Files/Git/usr/bin/link.exe.bak" fi - - name: Download numpy sdist (pre-release Python) - if: ${{ startsWith(matrix.python-version, '3.15') && startsWith(inputs.host-platform, 'linux') }} + - name: Download and patch numpy sdist (pre-release Python) + if: ${{ startsWith(matrix.python-version, '3.15') }} run: | pip download --no-binary numpy --no-deps "numpy>=1.21.1" -d numpy-sdist/ cd numpy-sdist && tar xf numpy-*.tar.gz && rm numpy-*.tar.gz + # WAR: numpy 2.4.x ships enable=["cpython-freethreading", ...] which + # is invalid in cibuildwheel v4.0 (freethreading is now on by default). + # Fixed on numpy main (a5df4859) but not yet released. + sed -i 's/"cpython-freethreading", //' numpy-*/pyproject.toml echo "NUMPY_SRC_DIR=$(pwd)/$(ls -d numpy-*/)" >> $GITHUB_ENV - - name: Build numpy wheel (pre-release Python, Linux) - if: ${{ startsWith(matrix.python-version, '3.15') && startsWith(inputs.host-platform, 'linux') }} + - name: Build numpy wheel (pre-release Python) + if: ${{ startsWith(matrix.python-version, '3.15') }} uses: pypa/cibuildwheel@54327ab9d35de03b359ac25c97de9417d94639c0 # v4.0.0rc1 env: CIBW_BUILD: ${{ env.CIBW_BUILD }} CIBW_ARCHS_LINUX: "native" CIBW_BUILD_VERBOSITY: 1 + CIBW_BEFORE_BUILD_WINDOWS: "pip install delvewheel" + CIBW_REPAIR_WHEEL_COMMAND_WINDOWS: "delvewheel repair -w {dest_dir} {wheel}" CIBW_ENABLE: "cpython-prerelease" with: package-dir: ${{ env.NUMPY_SRC_DIR }} output-dir: numpy-wheel/ - - name: Build numpy wheel (pre-release Python, Windows) - if: ${{ startsWith(matrix.python-version, '3.15') && startsWith(inputs.host-platform, 'win') }} - run: | - pip wheel "numpy>=1.21.1" --no-binary numpy -w numpy-wheel/ - - name: Upload numpy wheel if: ${{ startsWith(matrix.python-version, '3.15') }} uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 From b808899614e370cfab7199df01ff7caa1e23c58b Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sat, 30 May 2026 15:29:41 +0000 Subject: [PATCH 09/12] Strip numpy cibuildwheel config; build without BLAS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit numpy 2.4.x ships [tool.cibuildwheel] config (OpenBLAS scripts, cpython-freethreading, ILP64 settings) incompatible with cibuildwheel v4.0. Strip the entire section from the sdist and build with -Dallow-noblas=true — we only need numpy for testing, not optimized linear algebra. Co-Authored-By: Claude Opus 4.6 --- .github/workflows/build-wheel.yml | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build-wheel.yml b/.github/workflows/build-wheel.yml index 6c6d41c0783..adcc0aab5b1 100644 --- a/.github/workflows/build-wheel.yml +++ b/.github/workflows/build-wheel.yml @@ -262,10 +262,17 @@ jobs: run: | pip download --no-binary numpy --no-deps "numpy>=1.21.1" -d numpy-sdist/ cd numpy-sdist && tar xf numpy-*.tar.gz && rm numpy-*.tar.gz - # WAR: numpy 2.4.x ships enable=["cpython-freethreading", ...] which - # is invalid in cibuildwheel v4.0 (freethreading is now on by default). - # Fixed on numpy main (a5df4859) but not yet released. - sed -i 's/"cpython-freethreading", //' numpy-*/pyproject.toml + # WAR: numpy 2.4.x ships [tool.cibuildwheel] config that is + # incompatible with cibuildwheel v4.0 (cpython-freethreading enable + # group, OpenBLAS before-build scripts, etc.). Strip the entire + # section — we only need a basic numpy wheel for testing. + python -c " + import glob, re + for f in glob.glob('numpy-*/pyproject.toml'): + txt = open(f).read() + txt = re.sub(r'\n\[tool\.cibuildwheel\].*', '', txt, flags=re.DOTALL) + open(f, 'w').write(txt) + " echo "NUMPY_SRC_DIR=$(pwd)/$(ls -d numpy-*/)" >> $GITHUB_ENV - name: Build numpy wheel (pre-release Python) @@ -275,6 +282,8 @@ jobs: CIBW_BUILD: ${{ env.CIBW_BUILD }} CIBW_ARCHS_LINUX: "native" CIBW_BUILD_VERBOSITY: 1 + CIBW_CONFIG_SETTINGS: "setup-args=-Dallow-noblas=true" + CIBW_CONFIG_SETTINGS_WINDOWS: "setup-args=--vsenv setup-args=-Dallow-noblas=true" CIBW_BEFORE_BUILD_WINDOWS: "pip install delvewheel" CIBW_REPAIR_WHEEL_COMMAND_WINDOWS: "delvewheel repair -w {dest_dir} {wheel}" CIBW_ENABLE: "cpython-prerelease" From bd3e9904f8c581fb69b82d00d7d35a5e7d67f21d Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sat, 30 May 2026 16:06:17 +0000 Subject: [PATCH 10/12] Fix numpy pyproject.toml patch to preserve [tool.meson-python] The previous regex stripped everything from [tool.cibuildwheel] to EOF, which also removed [tool.meson-python] with its vendored meson path. Use a line-by-line approach to only remove cibuildwheel sections. Co-Authored-By: Claude Opus 4.6 --- .github/workflows/build-wheel.yml | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build-wheel.yml b/.github/workflows/build-wheel.yml index adcc0aab5b1..9f29839825c 100644 --- a/.github/workflows/build-wheel.yml +++ b/.github/workflows/build-wheel.yml @@ -264,14 +264,23 @@ jobs: cd numpy-sdist && tar xf numpy-*.tar.gz && rm numpy-*.tar.gz # WAR: numpy 2.4.x ships [tool.cibuildwheel] config that is # incompatible with cibuildwheel v4.0 (cpython-freethreading enable - # group, OpenBLAS before-build scripts, etc.). Strip the entire - # section — we only need a basic numpy wheel for testing. + # group, OpenBLAS before-build scripts, etc.). Strip the cibuildwheel + # sections but preserve [tool.meson-python] (vendored meson path). python -c " - import glob, re + import glob for f in glob.glob('numpy-*/pyproject.toml'): - txt = open(f).read() - txt = re.sub(r'\n\[tool\.cibuildwheel\].*', '', txt, flags=re.DOTALL) - open(f, 'w').write(txt) + lines, skip = open(f).readlines(), False + out = [] + for line in lines: + hdr = line.strip() + if hdr.startswith('[tool.cibuildwheel') or hdr.startswith('[[tool.cibuildwheel'): + skip = True + continue + if skip and hdr.startswith('[') and 'cibuildwheel' not in hdr: + skip = False + if not skip: + out.append(line) + open(f, 'w').writelines(out) " echo "NUMPY_SRC_DIR=$(pwd)/$(ls -d numpy-*/)" >> $GITHUB_ENV From 4898bde5cf62bac8061cb4129839e9ee4348757f Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sat, 30 May 2026 17:45:39 +0000 Subject: [PATCH 11/12] Fix test_graphics_api_smoketest on py3.10/3.11 Windows On py3.10/3.11, pyglet raises FileNotFoundError (an OSError subclass) when opengl32.dll is missing, rather than AttributeError as on py3.12+. Add OSError to the except clause so the test skips gracefully. Co-Authored-By: Claude Opus 4.6 --- cuda_bindings/tests/test_graphics_apis.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda_bindings/tests/test_graphics_apis.py b/cuda_bindings/tests/test_graphics_apis.py index e5e9d818142..e45c210685b 100644 --- a/cuda_bindings/tests/test_graphics_apis.py +++ b/cuda_bindings/tests/test_graphics_apis.py @@ -12,7 +12,7 @@ def test_graphics_api_smoketest(): import pyglet tex = pyglet.image.Texture.create(512, 512) - except (ImportError, AttributeError): + except (ImportError, AttributeError, OSError): pytest.skip("pyglet not available or could not create GL context") # return to make linters happy return From fe6d2bb721262e53ebf556e153ff784e972043b4 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sat, 30 May 2026 18:38:13 +0000 Subject: [PATCH 12/12] Exclude Python 3.15 wheels from release publishing Co-Authored-By: Claude Opus 4.6 --- .github/workflows/release.yml | 4 ++++ ci/tools/download-wheels | 6 ++++++ 2 files changed, 10 insertions(+) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 4d2656e88c0..8a650be0a43 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -163,6 +163,10 @@ jobs: if [[ "${p}" == *-tests ]]; then continue fi + # exclude pre-release Python (3.15) wheels from publishing + if [[ "${p}" == *python315* ]]; then + continue + fi mv ${p}/*.whl dist/ done rm -rf ${{ inputs.component }}* diff --git a/ci/tools/download-wheels b/ci/tools/download-wheels index a3141afb33b..1a98909a996 100755 --- a/ci/tools/download-wheels +++ b/ci/tools/download-wheels @@ -56,6 +56,12 @@ do continue fi + # exclude pre-release Python (3.15) wheels from releasing + if [[ "${p}" == *python315* ]]; then + echo "Skipping pre-release Python artifact: $p" + continue + fi + # If we're not downloading "all", only process matching component if [[ "$COMPONENT" != "all" && "$p" != ${COMPONENT}* ]]; then continue