From 3389933d4b75398bfb4bf4f4a6d299def084a20d Mon Sep 17 00:00:00 2001
From: Titus von Koeller <9048635+Titus-von-Koeller@users.noreply.github.com>
Date: Thu, 16 Apr 2026 11:44:47 +0200
Subject: [PATCH] ci: add nightly integration tests for transformers,
 diffusers, peft, axolotl
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Run each downstream library's own bnb-specific test suite against the
latest main bnb wheel (from the continuous-release_main pre-release).
Catches downstream breakage before it reaches users.

Jobs (all run in parallel):
- test-transformers (A10G): tests/quantization/bnb/ from transformers v5.x
- test-transformers-multigpu (4×L4): same, multi-GPU tests only [disabled
  until runner is provisioned]
- test-diffusers (L40S, Docker): pytest -m bitsandbytes across quantization
  + model tests, using diffusers/diffusers-pytorch-cuda container
- test-axolotl (A10G): kernel-level bnb tests (dequantize, LoRA, QLoRA)
- test-peft (L4): PeftBnbGPUExampleTests single-GPU per Benjamin's recommendation
- test-peft-multigpu (4×L4): same, multi-GPU [disabled until runner]
- report (ubuntu): consolidates JUnit XMLs, posts to #bnb-daily-ci-collab
  via Slack bot token with threaded per-suite failure details

Triggers: workflow_dispatch + pull_request (scoped to workflow/script changes).
Cron schedule commented out — enable in a follow-up PR once stable.

Also bumps the transformers test dep upper bound from <5 to <6, since
transformers v5.x is released and the bnb test suite there has fixes
(e.g. removed gated mosaicml/mpt-7b dependency).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../workflows/tests-integration-nightly.yml   | 436 ++++++++++++++++++
 pyproject.toml                                |   2 +-
 scripts/integration_test_report.py            | 336 ++++++++++++++
 3 files changed, 773 insertions(+), 1 deletion(-)
 create mode 100644 .github/workflows/tests-integration-nightly.yml
 create mode 100644 scripts/integration_test_report.py

diff --git a/.github/workflows/tests-integration-nightly.yml b/.github/workflows/tests-integration-nightly.yml
new file mode 100644
index 000000000..82d528691
--- /dev/null
+++ b/.github/workflows/tests-integration-nightly.yml
@@ -0,0 +1,436 @@
+name: Integration Tests (Downstream)
+
+# Nightly smoke tests: run the bnb-specific test suites from transformers,
+# accelerate, and peft against the latest main-branch bnb wheel. Catches
+# downstream breakage before it reaches users.
+#
+# bnb is installed from the `continuous-release_main` pre-release which
+# python-package.yml publishes on every push to main — no duplicate build.
+#
+# See agents/integration_tests_guide.md for background.
+
+on:
+  workflow_dispatch:
+  pull_request:
+    paths:
+      - '.github/workflows/tests-integration-nightly.yml'
+      - 'scripts/integration_test_report.py'
+  # schedule:
+  #   - cron: "30 3 * * *"  # enable once stable; runs after python-package + tests-nightly
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+env:
+  PYTHON_VERSION: "3.10"
+  TORCH_VERSION: "2.9.1"
+  PYPI_INDEX: "https://download.pytorch.org/whl/cu128"
+  BNB_WHEEL_URL: "https://github.com/bitsandbytes-foundation/bitsandbytes/releases/download/continuous-release_main/bitsandbytes-1.33.7.preview-py3-none-manylinux_2_24_x86_64.whl"
+  CUDA_VISIBLE_DEVICES: "0,1"
+
+jobs:
+  # ─── Downstream test jobs ─────────────────────────────────────────────────
+  # Each job:
+  #   1. Installs torch, then bnb from the continuous-release wheel
+  #   2. Installs the downstream lib (latest release from PyPI)
+  #   3. Clones the matching version tag for the test files
+  #   4. Runs the library's bnb-specific tests with --junitxml
+  #   5. Uploads the XML + full log as an artifact for the report job
+  #
+  # Runner matching rationale (see integration_tests_guide.md):
+  #   transformers CI runs on T4 → we use T4
+  #   accelerate / peft CI runs on L4 → closest bnb equivalent is A10
+  # This reduces spurious failures from expected values calibrated on their runners.
+
+  test-transformers:
+    name: Transformers bnb tests (single GPU)
+    if: github.repository == 'bitsandbytes-foundation/bitsandbytes'
+    runs-on: bandb-aws-g5-4xlarge-plus-use1-public-80  # A10G (matches transformers CI)
+    steps:
+      - name: Show GPU information
+        run: nvidia-smi
+
+      - uses: actions/checkout@v4
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ env.PYTHON_VERSION }}
+
+      - name: Install torch + bnb (from continuous-release)
+        run: |
+          pip install torch==${TORCH_VERSION} --index-url ${PYPI_INDEX}
+          pip install "bitsandbytes[test] @ ${BNB_WHEEL_URL}"
+
+      - name: Install transformers and clone matching tag
+        run: |
+          pip install -U transformers accelerate pytest-xdist
+          TRANSFORMERS_VERSION=$(pip show transformers | awk '/^Version:/ {print $2}')
+          echo "Installed transformers v${TRANSFORMERS_VERSION}"
+          git clone --depth=1 --branch "v${TRANSFORMERS_VERSION}" \
+            https://github.com/huggingface/transformers.git /tmp/transformers
+
+      - name: Show environment
+        run: |
+          pip list
+          python -m torch.utils.collect_env
+
+      - name: Run transformers bnb tests
+        working-directory: /tmp/transformers
+        env:
+          RUN_SLOW: "1"
+        shell: bash -o pipefail {0}
+        run: |
+          mkdir -p ${GITHUB_WORKSPACE}/reports
+          python -m pytest tests/quantization/bnb/ \
+            -v \
+            -k "not MultiGpu and not multi_gpu" \
+            --junitxml=${GITHUB_WORKSPACE}/reports/transformers.xml \
+            -o junit_logging=all \
+            2>&1 | tee ${GITHUB_WORKSPACE}/reports/transformers.log
+
+      - name: Upload JUnit XML and log
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: reports-transformers
+          path: reports/
+          retention-days: 7
+
+  test-transformers-multigpu:
+    name: Transformers bnb tests (multi GPU)
+    if: false  # disabled until bandb-aws-g6-12xlarge-plus runner is provisioned
+    runs-on: bandb-aws-g6-12xlarge-plus-use1-public-80  # 4× L4 (2 used)
+    steps:
+      - name: Show GPU information
+        run: nvidia-smi
+
+      - uses: actions/checkout@v4
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ env.PYTHON_VERSION }}
+
+      - name: Install torch + bnb (from continuous-release)
+        run: |
+          pip install torch==${TORCH_VERSION} --index-url ${PYPI_INDEX}
+          pip install "bitsandbytes[test] @ ${BNB_WHEEL_URL}"
+
+      - name: Install transformers and clone matching tag
+        run: |
+          pip install -U transformers accelerate pytest-xdist
+          TRANSFORMERS_VERSION=$(pip show transformers | awk '/^Version:/ {print $2}')
+          echo "Installed transformers v${TRANSFORMERS_VERSION}"
+          git clone --depth=1 --branch "v${TRANSFORMERS_VERSION}" \
+            https://github.com/huggingface/transformers.git /tmp/transformers
+
+      - name: Show environment
+        run: |
+          pip list
+          python -m torch.utils.collect_env
+
+      - name: Run transformers bnb tests (multi-GPU only)
+        working-directory: /tmp/transformers
+        env:
+          RUN_SLOW: "1"
+        shell: bash -o pipefail {0}
+        run: |
+          mkdir -p ${GITHUB_WORKSPACE}/reports
+          python -m pytest tests/quantization/bnb/ \
+            -v \
+            -k "MultiGpu or multi_gpu" \
+            --junitxml=${GITHUB_WORKSPACE}/reports/transformers-multigpu.xml \
+            -o junit_logging=all \
+            2>&1 | tee ${GITHUB_WORKSPACE}/reports/transformers-multigpu.log
+
+      - name: Upload JUnit XML and log
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: reports-transformers-multigpu
+          path: reports/
+          retention-days: 7
+
+  test-diffusers:
+    name: Diffusers bnb tests
+    if: github.repository == 'bitsandbytes-foundation/bitsandbytes'
+    runs-on: bandb-aws-g6e-4xlarge-plus-use1-public-80  # L40S (matches diffusers CI)
+    container:
+      image: diffusers/diffusers-pytorch-cuda
+      options: --gpus all --shm-size "16gb" --ipc host
+    steps:
+      - name: Show GPU information
+        run: nvidia-smi
+
+      - uses: actions/checkout@v4
+
+      - name: Install bnb + diffusers from PyPI (overriding image versions)
+        run: |
+          pip install "bitsandbytes[test] @ ${BNB_WHEEL_URL}"
+          pip install --force-reinstall --no-deps diffusers
+
+      - name: Clone diffusers matching installed version
+        run: |
+          DIFFUSERS_VERSION=$(pip show diffusers | awk '/^Version:/ {print $2}')
+          echo "Installed diffusers v${DIFFUSERS_VERSION}"
+          git clone --depth=1 --branch "v${DIFFUSERS_VERSION}" \
+            https://github.com/huggingface/diffusers.git /tmp/diffusers
+
+      - name: Show environment
+        run: |
+          pip list
+          python -m torch.utils.collect_env
+
+      - name: Run diffusers bnb tests
+        working-directory: /tmp/diffusers
+        env:
+          RUN_SLOW: "1"
+          CUBLAS_WORKSPACE_CONFIG: ":16:8"
+        shell: bash -o pipefail {0}
+        run: |
+          mkdir -p ${GITHUB_WORKSPACE}/reports
+          python -m pytest \
+            -m bitsandbytes \
+            tests/ \
+            -v \
+            --junitxml=${GITHUB_WORKSPACE}/reports/diffusers.xml \
+            -o junit_logging=all \
+            2>&1 | tee ${GITHUB_WORKSPACE}/reports/diffusers.log
+
+      - name: Upload JUnit XML and log
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: reports-diffusers
+          path: reports/
+          retention-days: 7
+
+  test-axolotl:
+    name: Axolotl bnb kernel tests
+    if: github.repository == 'bitsandbytes-foundation/bitsandbytes'
+    runs-on: bandb-aws-g5-4xlarge-plus-use1-public-80  # A10G
+    steps:
+      - name: Show GPU information
+        run: nvidia-smi
+
+      - uses: actions/checkout@v4
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ env.PYTHON_VERSION }}
+
+      - name: Install torch + bnb (from continuous-release)
+        run: |
+          pip install torch==${TORCH_VERSION} --index-url ${PYPI_INDEX}
+          pip install "bitsandbytes[test] @ ${BNB_WHEEL_URL}"
+
+      - name: Install axolotl from latest release tag
+        run: |
+          AXOLOTL_TAG=$(curl -s https://api.github.com/repos/axolotl-ai-cloud/axolotl/releases/latest | python -c "import json,sys; print(json.load(sys.stdin)['tag_name'])")
+          echo "Latest axolotl release: ${AXOLOTL_TAG}"
+          git clone --depth=1 --branch "${AXOLOTL_TAG}" \
+            https://github.com/axolotl-ai-cloud/axolotl.git /tmp/axolotl
+          pip install /tmp/axolotl transformers accelerate peft
+
+      - name: Show environment
+        run: |
+          pip list
+          python -m torch.utils.collect_env
+
+      - name: Run axolotl bnb kernel + optimizer tests
+        working-directory: /tmp/axolotl
+        shell: bash -o pipefail {0}
+        run: |
+          mkdir -p ${GITHUB_WORKSPACE}/reports
+          python -m pytest \
+            tests/e2e/kernels/test_quantize.py \
+            tests/e2e/kernels/test_lora.py \
+            "tests/e2e/kernels/test_lora_features.py::TestQuantizedModels" \
+            "tests/e2e/test_llama.py::TestLlama::test_fft_trust_remote_code" \
+            -v \
+            --junitxml=${GITHUB_WORKSPACE}/reports/axolotl.xml \
+            -o junit_logging=all \
+            2>&1 | tee ${GITHUB_WORKSPACE}/reports/axolotl.log
+
+      - name: Upload JUnit XML and log
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: reports-axolotl
+          path: reports/
+          retention-days: 7
+
+  test-peft:
+    name: PEFT bnb tests (single GPU)
+    if: github.repository == 'bitsandbytes-foundation/bitsandbytes'
+    runs-on: bandb-aws-g6-4xlarge-plus-use1-public-80  # L4 (matches peft CI)
+    steps:
+      - name: Show GPU information
+        run: nvidia-smi
+
+      - uses: actions/checkout@v4
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ env.PYTHON_VERSION }}
+
+      - name: Install torch + bnb (from continuous-release)
+        run: |
+          pip install torch==${TORCH_VERSION} --index-url ${PYPI_INDEX}
+          pip install "bitsandbytes[test] @ ${BNB_WHEEL_URL}"
+
+      - name: Install peft and clone matching tag
+        run: |
+          pip install "peft[test]" transformers accelerate
+          PEFT_VERSION=$(pip show peft | awk '/^Version:/ {print $2}')
+          echo "Installed peft v${PEFT_VERSION}"
+          git clone --depth=1 --branch "v${PEFT_VERSION}" \
+            https://github.com/huggingface/peft.git /tmp/peft
+
+      - name: Show environment
+        run: |
+          pip list
+          python -m torch.utils.collect_env
+
+      - name: Run peft bnb tests
+        working-directory: /tmp/peft
+        env:
+          IS_GITHUB_CI: "1"
+        shell: bash -o pipefail {0}
+        run: |
+          mkdir -p ${GITHUB_WORKSPACE}/reports
+          python -m pytest \
+            -m single_gpu_tests \
+            -k PeftBnbGPUExampleTests \
+            tests/test_gpu_examples.py \
+            -v \
+            --junitxml=${GITHUB_WORKSPACE}/reports/peft.xml \
+            -o junit_logging=all \
+            2>&1 | tee ${GITHUB_WORKSPACE}/reports/peft.log
+
+      - name: Upload JUnit XML and log
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: reports-peft
+          path: reports/
+          retention-days: 7
+
+  test-peft-multigpu:
+    name: PEFT bnb tests (multi GPU)
+    if: false  # disabled until bandb-aws-g6-12xlarge-plus runner is provisioned
+    runs-on: bandb-aws-g6-12xlarge-plus-use1-public-80  # 4× L4
+    steps:
+      - name: Show GPU information
+        run: nvidia-smi
+
+      - uses: actions/checkout@v4
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ env.PYTHON_VERSION }}
+
+      - name: Install torch + bnb (from continuous-release)
+        run: |
+          pip install torch==${TORCH_VERSION} --index-url ${PYPI_INDEX}
+          pip install "bitsandbytes[test] @ ${BNB_WHEEL_URL}"
+
+      - name: Install peft and clone matching tag
+        run: |
+          pip install "peft[test]" transformers accelerate
+          PEFT_VERSION=$(pip show peft | awk '/^Version:/ {print $2}')
+          echo "Installed peft v${PEFT_VERSION}"
+          git clone --depth=1 --branch "v${PEFT_VERSION}" \
+            https://github.com/huggingface/peft.git /tmp/peft
+
+      - name: Show environment
+        run: |
+          pip list
+          python -m torch.utils.collect_env
+
+      - name: Run peft bnb tests
+        working-directory: /tmp/peft
+        env:
+          IS_GITHUB_CI: "1"
+        shell: bash -o pipefail {0}
+        run: |
+          mkdir -p ${GITHUB_WORKSPACE}/reports
+          python -m pytest \
+            -m multi_gpu_tests \
+            -k PeftBnbGPUExampleTests \
+            tests/test_gpu_examples.py \
+            -v \
+            --junitxml=${GITHUB_WORKSPACE}/reports/peft-multigpu.xml \
+            -o junit_logging=all \
+            2>&1 | tee ${GITHUB_WORKSPACE}/reports/peft-multigpu.log
+
+      - name: Upload JUnit XML and log
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: reports-peft-multigpu
+          path: reports/
+          retention-days: 7
+
+  # ─── Consolidated report ──────────────────────────────────────────────────
+  # Runs after all test jobs finish (success or failure).
+  # Downloads the JUnit XMLs, runs our report script, writes to the job
+  # summary, uploads artifacts, and posts a consolidated message to
+  # #bnb-daily-ci-collab on Slack.
+
+  report:
+    name: Consolidated report
+    needs: [test-transformers, test-transformers-multigpu, test-diffusers, test-axolotl, test-peft, test-peft-multigpu]
+    if: always() && github.repository == 'bitsandbytes-foundation/bitsandbytes'
+    runs-on: ubuntu-22.04
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ env.PYTHON_VERSION }}
+
+      - name: Download all report artifacts
+        uses: actions/download-artifact@v4
+        with:
+          path: artifacts
+          pattern: reports-*
+
+      - name: Consolidate XMLs into reports/
+        run: |
+          mkdir -p reports
+          # Each artifact lands in artifacts/reports-<suite>/ — flatten to reports/<suite>.xml
+          find artifacts -name '*.xml' -exec cp {} reports/ \;
+          find artifacts -name '*.log' -exec cp {} reports/ \;
+          ls -la reports/
+
+      - name: Generate consolidated report + post to Slack
+        env:
+          SLACK_API_TOKEN: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
+        run: |
+          pip install slack_sdk
+          python scripts/integration_test_report.py \
+            --reports-dir reports/ \
+            --output consolidated_report.md \
+            --slack-channel bnb-daily-ci-collab
+
+      - name: Write to job summary
+        if: always()
+        run: |
+          cat consolidated_report.md >> $GITHUB_STEP_SUMMARY
+
+      - name: Upload consolidated report
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: consolidated-report
+          path: |
+            consolidated_report.md
+            reports/
+          retention-days: 14
diff --git a/pyproject.toml b/pyproject.toml
index f448a079e..2bfe1ba0d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -69,7 +69,7 @@ test = [
     "lion-pytorch==0.2.3",
     "pytest~=8.3",
     "scipy>=1.11.4,<2",
-    "transformers>=4.30.1,<5"
+    "transformers>=4.30.1,<6"
 ]
 
 [tool.setuptools]
diff --git a/scripts/integration_test_report.py b/scripts/integration_test_report.py
new file mode 100644
index 000000000..3286042ce
--- /dev/null
+++ b/scripts/integration_test_report.py
@@ -0,0 +1,336 @@
+#!/usr/bin/env python
+"""Parse JUnit XML test reports and post a consolidated summary to Slack.
+
+Designed for the bitsandbytes nightly integration tests that run downstream
+test suites (transformers, accelerate, peft) against the current bnb build.
+
+Usage:
+    # Dry-run (print to stdout, no Slack):
+    python scripts/integration_test_report.py --reports-dir reports/
+
+    # Post to Slack:
+    python scripts/integration_test_report.py --reports-dir reports/ --slack-channel bnb-ci-nightly
+"""
+
+import argparse
+from datetime import date
+import glob
+import os
+import sys
+from xml.etree import ElementTree
+
+
+def parse_junit_xml(file_path):
+    """Parse a JUnit XML file and return structured results."""
+    tree = ElementTree.parse(file_path)
+    root = tree.getroot()
+
+    # Handle both <testsuites><testsuite>... and bare <testsuite>...
+    if root.tag == "testsuites":
+        suites = root.findall("testsuite")
+    else:
+        suites = [root]
+
+    tests = 0
+    passed = 0
+    failed = 0
+    skipped = 0
+    errors = 0
+    total_time = 0.0
+    failures = []
+
+    for suite in suites:
+        tests += int(suite.get("tests", 0))
+        skipped += int(suite.get("skipped", 0))
+        errors += int(suite.get("errors", 0))
+        failed += int(suite.get("failures", 0))
+        total_time += float(suite.get("time", 0))
+
+        for testcase in suite.findall("testcase"):
+            failure = testcase.find("failure")
+            error = testcase.find("error")
+            if failure is not None:
+                failures.append(
+                    {
+                        "test": f"{testcase.get('classname', '')}::{testcase.get('name', '')}",
+                        "message": failure.get("message", ""),
+                    }
+                )
+            elif error is not None:
+                failures.append(
+                    {
+                        "test": f"{testcase.get('classname', '')}::{testcase.get('name', '')}",
+                        "message": error.get("message", ""),
+                    }
+                )
+
+    passed = tests - failed - skipped - errors
+
+    return {
+        "tests": tests,
+        "passed": passed,
+        "failed": failed + errors,
+        "skipped": skipped,
+        "time": total_time,
+        "failures": failures,
+    }
+
+
+def format_duration(seconds):
+    """Format seconds into a human-readable string."""
+    m, s = divmod(int(seconds), 60)
+    if m > 0:
+        return f"{m}m{s:02d}s"
+    return f"{s}s"
+
+
+def consolidate_reports(reports_dir):
+    """Find and parse all JUnit XML files in the reports directory."""
+    xml_files = sorted(glob.glob(os.path.join(reports_dir, "**", "*.xml"), recursive=True))
+
+    if not xml_files:
+        print(f"No XML report files found in {reports_dir}", file=sys.stderr)
+        return {}
+
+    results = {}
+    for xml_file in xml_files:
+        # Derive suite name from filename: "transformers.xml" -> "transformers"
+        suite_name = os.path.splitext(os.path.basename(xml_file))[0]
+        results[suite_name] = parse_junit_xml(xml_file)
+
+    return results
+
+
+def _success_rate(r):
+    """Success rate: passed / (passed + failed), ignoring skipped."""
+    run = r["passed"] + r["failed"]
+    return (r["passed"] / run) if run > 0 else 1.0
+
+
+def generate_markdown(results):
+    """Generate a markdown summary report."""
+    if not results:
+        return "No test results found."
+
+    total_passed = sum(r["passed"] for r in results.values())
+    total_failed = sum(r["failed"] for r in results.values())
+    total_skipped = sum(r["skipped"] for r in results.values())
+    total_time = sum(r["time"] for r in results.values())
+
+    lines = []
+    lines.append("# BNB Integration Test Report")
+    lines.append("")
+
+    total_run = total_passed + total_failed
+    if total_failed == 0:
+        lines.append(f"All {total_run} tests passed in {format_duration(total_time)}.")
+    else:
+        lines.append(f"**{total_failed} failures** out of {total_run} tests in {format_duration(total_time)}.")
+    if total_skipped > 0:
+        lines.append(f"({total_skipped} skipped)")
+
+    lines.append("")
+    lines.append("| Suite | Tests | Passed | Failed | Skipped | Duration | Success Rate |")
+    lines.append("|-------|------:|-------:|-------:|--------:|---------:|-------------:|")
+
+    # Sort by success rate ascending (worst first)
+    sorted_results = sorted(results.items(), key=lambda x: _success_rate(x[1]))
+
+    for suite_name, r in sorted_results:
+        run = r["passed"] + r["failed"]
+        rate = f"{r['passed'] / run * 100:.1f}%" if run > 0 else "N/A"
+        lines.append(
+            f"| {suite_name} | {r['tests']} | {r['passed']} | {r['failed']} "
+            f"| {r['skipped']} | {format_duration(r['time'])} | {rate} |"
+        )
+
+    # Failure details
+    any_failures = any(r["failures"] for r in results.values())
+    if any_failures:
+        lines.append("")
+        lines.append("## Failures")
+        for suite_name, r in sorted_results:
+            if r["failures"]:
+                lines.append(f"### {suite_name}")
+                lines.append("```")
+                for f in r["failures"]:
+                    if f["message"]:
+                        lines.append(f"FAILED {f['test']} - {f['message']}")
+                    else:
+                        lines.append(f"FAILED {f['test']}")
+                lines.append("```")
+                lines.append("")
+
+    return "\n".join(lines)
+
+
+def create_slack_payload(results):
+    """Create Slack Block Kit payload from results."""
+    total_passed = sum(r["passed"] for r in results.values())
+    total_failed = sum(r["failed"] for r in results.values())
+    total_skipped = sum(r["skipped"] for r in results.values())
+
+    total_run = total_passed + total_failed
+
+    if total_run == 0:
+        emoji = "⚠️"
+        rate_str = "N/A"
+    elif total_failed == 0:
+        emoji = "✅"
+        rate_str = "100%"
+    elif total_failed / total_run < 0.1:
+        emoji = "⚠️"
+        rate_str = f"{total_passed / total_run * 100:.1f}%"
+    else:
+        emoji = "❌"
+        rate_str = f"{total_passed / total_run * 100:.1f}%"
+
+    summary = f"{emoji} *BNB Integration Tests:* {rate_str} success ({total_passed}/{total_run} tests"
+    if total_skipped > 0:
+        summary += f", {total_skipped} skipped"
+    if total_failed > 0:
+        summary += f", {total_failed} failed"
+    summary += ")"
+
+    # Build table — sorted by success rate ascending (worst first)
+    sorted_results = sorted(results.items(), key=lambda x: _success_rate(x[1]))
+
+    table_lines = ["```"]
+    header = f"{'Suite':<15} {'Tests':>6} {'Failed':>7} {'Duration':>10} {'Success':>8}"
+    table_lines.append(header)
+    table_lines.append("-" * len(header))
+
+    for suite_name, r in sorted_results:
+        run = r["passed"] + r["failed"]
+        rate = f"{r['passed'] / run * 100:.1f}%" if run > 0 else "N/A"
+        table_lines.append(f"{suite_name:<15} {run:>6} {r['failed']:>7} {format_duration(r['time']):>10} {rate:>8}")
+
+    table_lines.append("```")
+
+    payload = [
+        {"type": "section", "text": {"type": "mrkdwn", "text": summary}},
+        {"type": "section", "text": {"type": "mrkdwn", "text": "\n".join(table_lines)}},
+    ]
+
+    # GitHub Actions link
+    run_id = os.environ.get("GITHUB_RUN_ID")
+    repo = os.environ.get("GITHUB_REPOSITORY", "bitsandbytes-foundation/bitsandbytes")
+    if run_id:
+        payload.append(
+            {
+                "type": "section",
+                "text": {
+                    "type": "mrkdwn",
+                    "text": f"*<https://github.com/{repo}/actions/runs/{run_id}|View full report on GitHub>*",
+                },
+            }
+        )
+
+    payload.append(
+        {
+            "type": "context",
+            "elements": [{"type": "plain_text", "text": f"Nightly integration test results for {date.today()}"}],
+        }
+    )
+
+    return payload
+
+
+def create_failure_thread_payloads(results):
+    """Create per-suite Slack thread replies for failures."""
+    threads = []
+
+    for suite_name, r in results.items():
+        if not r["failures"]:
+            continue
+
+        run = r["passed"] + r["failed"]
+        rate = f"{r['passed'] / run * 100:.1f}%" if run > 0 else "N/A"
+        lines = [f"*{suite_name}* (Success Rate: {rate})"]
+        lines.append("```")
+        for f in r["failures"]:
+            if f["message"]:
+                lines.append(f"FAILED {f['test']}")
+                lines.append(f"  {f['message'][:200]}")
+            else:
+                lines.append(f"FAILED {f['test']}")
+        lines.append("```")
+
+        threads.append("\n".join(lines))
+
+    return threads
+
+
+def post_to_slack(channel, payload, thread_payloads):
+    """Post the report to Slack."""
+    from slack_sdk import WebClient
+
+    token = os.environ.get("SLACK_API_TOKEN")
+    if not token:
+        print("SLACK_API_TOKEN not set, skipping Slack post", file=sys.stderr)
+        return
+
+    client = WebClient(token=token)
+
+    # Main message
+    response = client.chat_postMessage(
+        channel=f"#{channel}",
+        text="BNB Integration Test Results",
+        blocks=payload,
+    )
+    print(f"Posted to #{channel}")
+
+    # Threaded failure details
+    ts = response["ts"]
+    for thread_msg in thread_payloads:
+        client.chat_postMessage(
+            channel=f"#{channel}",
+            thread_ts=ts,
+            text=thread_msg,
+        )
+
+    if thread_payloads:
+        print(f"Posted {len(thread_payloads)} failure thread replies")
+
+
+def main():
+    parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
+    parser.add_argument("--reports-dir", default="reports", help="Directory containing JUnit XML files")
+    parser.add_argument("--slack-channel", default=None, help="Slack channel name (omit to skip Slack)")
+    parser.add_argument("--output", default=None, help="Write markdown report to file")
+    parser.add_argument("--dry-run", action="store_true", help="Print Slack payload as JSON instead of posting")
+    args = parser.parse_args()
+
+    results = consolidate_reports(args.reports_dir)
+    if not results:
+        sys.exit(1)
+
+    # Markdown report
+    markdown = generate_markdown(results)
+
+    if args.output:
+        with open(args.output, "w") as f:
+            f.write(markdown)
+        print(f"Report written to {args.output}")
+
+    # Always print markdown (for $GITHUB_STEP_SUMMARY piping)
+    print(markdown)
+
+    # Slack
+    payload = create_slack_payload(results)
+    thread_payloads = create_failure_thread_payloads(results)
+
+    if args.dry_run:
+        import json
+
+        print("\n--- Slack main payload ---")
+        print(json.dumps(payload, indent=2))
+        for i, tp in enumerate(thread_payloads):
+            print(f"\n--- Thread reply {i + 1} ---")
+            print(tp)
+    elif args.slack_channel:
+        post_to_slack(args.slack_channel, payload, thread_payloads)
+
+
+if __name__ == "__main__":
+    main()