From bd063ea88428a7bd4238f1f5c1d750d85f0ab4dd Mon Sep 17 00:00:00 2001 From: Titus von Koeller <9048635+Titus-von-Koeller@users.noreply.github.com> Date: Thu, 16 Apr 2026 11:13:09 +0200 Subject: [PATCH 1/2] ci: add CUDA 13.2 build and nightly test support - Add CUDA 13.2.0 to the build matrix (Linux + Windows) - Bump Jimver/cuda-toolkit from v0.2.29 to v0.2.35 for CUDA 13.2 Windows support - Add CUDA 13.2.0 nightly test with torch 2.12+cu132 (nightly only, not PR tests) - Add torch_nightly input to test-runner for pre-release torch installs Ref: https://github.com/pytorch/pytorch/issues/178665 Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/workflows/python-package.yml | 4 ++-- .github/workflows/test-runner.yml | 13 +++++++++++-- .github/workflows/tests-nightly.yml | 7 ++++++- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index b16a9ab11..9b73e4094 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -74,12 +74,12 @@ jobs: - os: windows-2025 arch: x86_64 cuda_version: - ["11.8.0", "12.0.1", "12.1.1", "12.2.2", "12.3.2", "12.4.1", "12.5.1", "12.6.3", "12.8.1", "12.9.1", "13.0.2"] + ["11.8.0", "12.0.1", "12.1.1", "12.2.2", "12.3.2", "12.4.1", "12.5.1", "12.6.3", "12.8.1", "12.9.1", "13.0.2", "13.2.0"] runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v4 # Windows: We install Cuda on the agent (slow) - - uses: Jimver/cuda-toolkit@6008063726ffe3309d1b22e413d9e88fed91a2f2 # v0.2.29 + - uses: Jimver/cuda-toolkit@3d45d157f327c09c04b50ee6ccdea2d9d017ec76 # v0.2.35 if: startsWith(matrix.os, 'windows') id: cuda-toolkit with: diff --git a/.github/workflows/test-runner.yml b/.github/workflows/test-runner.yml index 9a9c42fda..69f618eb6 100644 --- a/.github/workflows/test-runner.yml +++ b/.github/workflows/test-runner.yml @@ -32,6 +32,10 @@ on: type: string default: "" description: "CPU architecture for testing: icelake, cascadelake (default: platform default runner)" + torch_nightly: + type: boolean + default: false + description: "Install nightly torch (--pre, no version pin) instead of a stable release" env: BNB_SKIP_CMAKE: 1 @@ -144,7 +148,7 @@ jobs: # Windows + CUDA: Install CUDA Toolkit - name: Install CUDA Toolkit if: inputs.backend == 'cuda' && inputs.platform == 'windows' - uses: Jimver/cuda-toolkit@6008063726ffe3309d1b22e413d9e88fed91a2f2 # v0.2.29 + uses: Jimver/cuda-toolkit@3d45d157f327c09c04b50ee6ccdea2d9d017ec76 # v0.2.35 with: cuda: ${{ inputs.cuda_version }} method: "network" @@ -208,9 +212,14 @@ jobs: - name: Install dependencies run: | - pip install torch==${{ inputs.torch_version }} --index-url ${{ inputs.pypi_index }} + if [[ "${{ inputs.torch_nightly }}" == "true" ]]; then + pip install --pre torch --index-url ${{ inputs.pypi_index }} + else + pip install torch==${{ inputs.torch_version }} --index-url ${{ inputs.pypi_index }} + fi pip install -e ".[test]" -v pip install pytest-cov + shell: bash # Windows: Downgrade NumPy for torch<2.4.1 compatibility # See: https://github.com/pytorch/pytorch/issues/131668 diff --git a/.github/workflows/tests-nightly.yml b/.github/workflows/tests-nightly.yml index ea541ad29..24eb4e0d4 100644 --- a/.github/workflows/tests-nightly.yml +++ b/.github/workflows/tests-nightly.yml @@ -57,7 +57,7 @@ jobs: # Linux x64 cross-product platform: [linux-x64] gpu_type: [T4, A10, L40S] - cuda_version: ["11.8.0", "12.6.3", "12.8.1", "13.0.2"] + cuda_version: ["11.8.0", "12.6.3", "12.8.1", "13.0.2", "13.2.0"] include: # Map CUDA version to torch version and PyPI index @@ -73,6 +73,10 @@ jobs: - cuda_version: "13.0.2" torch_version: "2.11.0" pypi_index: "https://download.pytorch.org/whl/cu130" + - cuda_version: "13.2.0" + torch_version: "2.12.0" + torch_nightly: true + pypi_index: "https://download.pytorch.org/whl/nightly/cu132" # Windows CUDA Tests - T4 GPU (CUDA 11.8 only, multiple torch versions) - platform: windows @@ -98,4 +102,5 @@ jobs: cuda_version: ${{ matrix.cuda_version }} gpu_type: ${{ matrix.gpu_type }} torch_version: ${{ matrix.torch_version }} + torch_nightly: ${{ matrix.torch_nightly || false }} pypi_index: ${{ matrix.pypi_index }} From 7c7ee0b4c06c16c16f00c99f5945de5098a32a9b Mon Sep 17 00:00:00 2001 From: Titus von Koeller <9048635+Titus-von-Koeller@users.noreply.github.com> Date: Thu, 16 Apr 2026 14:16:37 +0200 Subject: [PATCH 2/2] fix: add /Zc:preprocessor for MSVC with CUDA 13.2+ CUDA 13.2's CCCL headers require MSVC's standard conforming preprocessor. Pass /Zc:preprocessor to cl.exe via -Xcompiler. Co-Authored-By: Claude Opus 4.6 (1M context) --- CMakeLists.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 39473eff1..ea61fbdf3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -194,6 +194,11 @@ if(BUILD_CUDA) string(APPEND CMAKE_CUDA_FLAGS " --compress-mode=size") endif() + # CUDA 13.2+ CCCL headers require MSVC's standard conforming preprocessor. + if(MSVC AND CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "13.2") + string(APPEND CMAKE_CUDA_FLAGS " -Xcompiler=/Zc:preprocessor") + endif() + if(PTXAS_VERBOSE) string(APPEND CMAKE_CUDA_FLAGS " -Xptxas=-v") endif()