From 8865107f478fc79abcc51a437d64ddebfceff7f7 Mon Sep 17 00:00:00 2001
From: Evan Lezar <elezar@nvidia.com>
Date: Wed, 15 Apr 2026 11:35:17 +0200
Subject: [PATCH 1/3] feat(gpu): pin device plugin image with WSL2 CDI spec
 fixes

Use ghcr.io/nvidia/k8s-device-plugin:1bb36583 which includes upstream
fixes for WSL2 CDI spec compatibility (cdiVersion and device naming),
removing the need for any local spec transformation.

See NVIDIA/k8s-device-plugin#1671.

TODO: revert to chart-default image once a released version includes
these fixes.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
---
 .../gpu-manifests/nvidia-device-plugin-helmchart.yaml     | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/deploy/kube/gpu-manifests/nvidia-device-plugin-helmchart.yaml b/deploy/kube/gpu-manifests/nvidia-device-plugin-helmchart.yaml
index 1cb0ca70a..ab7eaad92 100644
--- a/deploy/kube/gpu-manifests/nvidia-device-plugin-helmchart.yaml
+++ b/deploy/kube/gpu-manifests/nvidia-device-plugin-helmchart.yaml
@@ -16,6 +16,11 @@
 # devices are injected via CDI hooks before container start. Sandbox pods only
 # need the nvidia.com/gpu resource request — no runtimeClassName is required.
 #
+# The image is pinned to commit 1bb36583 which includes WSL2 CDI spec
+# compatibility fixes (correct cdiVersion and device naming).
+# TODO: switch back to the chart-default image once a released version includes
+# these fixes and the version pin above is updated accordingly.
+#
 # k3s auto-detects nvidia-container-runtime on PATH and registers the "nvidia"
 # RuntimeClass automatically, so no manual RuntimeClass manifest is needed.
 
@@ -34,6 +39,9 @@ spec:
     runtimeClassName: nvidia
     deviceListStrategy: cdi-cri
     deviceIDStrategy: index
+    image:
+      repository: ghcr.io/nvidia/k8s-device-plugin
+      tag: "1bb36583"
     cdi:
       nvidiaHookPath: /usr/bin/nvidia-cdi-hook
     nvidiaDriverRoot: "/"

From 482aae3a514408ea82d0653dec268cfff335b65f Mon Sep 17 00:00:00 2001
From: Evan Lezar <elezar@nvidia.com>
Date: Wed, 15 Apr 2026 11:35:17 +0200
Subject: [PATCH 2/3] fix(sandbox): add WSL2 GPU device and library paths to
 Landlock baseline

On WSL2, NVIDIA GPUs are exposed through the DXG kernel driver
(/dev/dxg) rather than the native nvidia* devices. CDI injects /dev/dxg
as the sole GPU device node, plus GPU libraries under /usr/lib/wsl/.

has_gpu_devices() previously only checked for /dev/nvidiactl, which
does not exist on WSL2, so GPU enrichment never ran. This meant /dev/dxg
was never permitted by Landlock and /proc write access (required by CUDA
for thread naming) was never granted.

Fix by:
- Extending has_gpu_devices() to also detect /dev/dxg
- Adding /dev/dxg to GPU_BASELINE_READ_WRITE (device nodes need O_RDWR)
- Adding /usr/lib/wsl to GPU_BASELINE_READ_ONLY for CDI-injected GPU
  library bind-mounts that may not be covered by the /usr parent rule
  across filesystem boundaries

The existing path existence check in enrich_proto_baseline_paths()
ensures all new entries are silently skipped on native Linux where
these paths do not exist.
---
 crates/openshell-sandbox/src/lib.rs | 66 ++++++++++++++++++++++++++---
 1 file changed, 61 insertions(+), 5 deletions(-)

diff --git a/crates/openshell-sandbox/src/lib.rs b/crates/openshell-sandbox/src/lib.rs
index c2956b1e0..571ca9bce 100644
--- a/crates/openshell-sandbox/src/lib.rs
+++ b/crates/openshell-sandbox/src/lib.rs
@@ -1220,14 +1220,31 @@ const PROXY_BASELINE_READ_WRITE: &[&str] = &["/sandbox", "/tmp"];
 /// socket at init time.  If the directory exists but Landlock denies traversal
 /// (EACCES vs ECONNREFUSED), NVML returns `NVML_ERROR_INSUFFICIENT_PERMISSIONS`
 /// even though the daemon is optional.  Only read/traversal access is needed.
-const GPU_BASELINE_READ_ONLY: &[&str] = &["/run/nvidia-persistenced"];
+///
+/// `/usr/lib/wsl`: On WSL2, CDI bind-mounts GPU libraries (libdxcore.so,
+/// libcuda.so.1.1, etc.) into paths under `/usr/lib/wsl/`.  Although `/usr`
+/// is already in `PROXY_BASELINE_READ_ONLY`, individual file bind-mounts may
+/// not be covered by the parent-directory Landlock rule when the mount crosses
+/// a filesystem boundary.  Listing `/usr/lib/wsl` explicitly ensures traversal
+/// is permitted regardless of Landlock's cross-mount behaviour.
+const GPU_BASELINE_READ_ONLY: &[&str] = &[
+    "/run/nvidia-persistenced",
+    "/usr/lib/wsl", // WSL2: CDI-injected GPU library directory
+];
 
 /// GPU read-write paths (static).
 ///
 /// `/dev/nvidiactl`, `/dev/nvidia-uvm`, `/dev/nvidia-uvm-tools`,
-/// `/dev/nvidia-modeset`: control and UVM devices injected by CDI.
-/// Landlock restricts `open(2)` on device files even when DAC allows it;
-/// these need read-write because NVML/CUDA opens them with `O_RDWR`.
+/// `/dev/nvidia-modeset`: control and UVM devices injected by CDI on native
+/// Linux.  Landlock restricts `open(2)` on device files even when DAC allows
+/// it; these need read-write because NVML/CUDA opens them with `O_RDWR`.
+/// These devices do not exist on WSL2 and will be skipped by the existence
+/// check in `enrich_proto_baseline_paths()`.
+///
+/// `/dev/dxg`: On WSL2, NVIDIA GPUs are exposed through the DXG kernel driver
+/// (DirectX Graphics) rather than the native nvidia* devices.  CDI injects
+/// `/dev/dxg` as the sole GPU device node; it does not exist on native Linux
+/// and will be skipped there by the existence check.
 ///
 /// `/proc`: CUDA writes to `/proc/<pid>/task/<tid>/comm` during `cuInit()`
 /// to set thread names.  Without write access, `cuInit()` returns error 304.
@@ -1241,12 +1258,17 @@ const GPU_BASELINE_READ_WRITE: &[&str] = &[
     "/dev/nvidia-uvm",
     "/dev/nvidia-uvm-tools",
     "/dev/nvidia-modeset",
+    "/dev/dxg", // WSL2: DXG device (GPU via DirectX kernel driver, injected by CDI)
     "/proc",
 ];
 
 /// Returns true if GPU devices are present in the container.
+///
+/// Checks both the native Linux NVIDIA control device (`/dev/nvidiactl`) and
+/// the WSL2 DXG device (`/dev/dxg`).  CDI injects exactly one of these
+/// depending on the host kernel; the other will not exist.
 fn has_gpu_devices() -> bool {
-    std::path::Path::new("/dev/nvidiactl").exists()
+    std::path::Path::new("/dev/nvidiactl").exists() || std::path::Path::new("/dev/dxg").exists()
 }
 
 /// Enumerate per-GPU device nodes (`/dev/nvidia0`, `/dev/nvidia1`, …).
@@ -1479,6 +1501,40 @@ mod baseline_tests {
             );
         }
     }
+
+    #[test]
+    fn gpu_baseline_read_write_contains_dxg() {
+        // /dev/dxg must be present so WSL2 sandboxes get the Landlock
+        // read-write rule for the CDI-injected DXG device.  The existence
+        // check in enrich_proto_baseline_paths() skips it on native Linux.
+        assert!(
+            GPU_BASELINE_READ_WRITE.contains(&"/dev/dxg"),
+            "/dev/dxg must be in GPU_BASELINE_READ_WRITE for WSL2 support"
+        );
+    }
+
+    #[test]
+    fn gpu_baseline_read_only_contains_usr_lib_wsl() {
+        // /usr/lib/wsl must be present so CDI-injected WSL2 GPU library
+        // bind-mounts are accessible under Landlock.  Skipped on native Linux.
+        assert!(
+            GPU_BASELINE_READ_ONLY.contains(&"/usr/lib/wsl"),
+            "/usr/lib/wsl must be in GPU_BASELINE_READ_ONLY for WSL2 CDI library paths"
+        );
+    }
+
+    #[test]
+    fn has_gpu_devices_reflects_dxg_or_nvidiactl() {
+        // Verify the OR logic: result must match the manual disjunction of
+        // the two path checks.  Passes in all environments.
+        let nvidiactl = std::path::Path::new("/dev/nvidiactl").exists();
+        let dxg = std::path::Path::new("/dev/dxg").exists();
+        assert_eq!(
+            has_gpu_devices(),
+            nvidiactl || dxg,
+            "has_gpu_devices() should be true iff /dev/nvidiactl or /dev/dxg exists"
+        );
+    }
 }
 
 /// Load sandbox policy from local files or gRPC.

From 5c01d5bd8eca12e263e80370dfe21142c4e6c5a3 Mon Sep 17 00:00:00 2001
From: Evan Lezar <elezar@nvidia.com>
Date: Thu, 16 Apr 2026 01:42:49 -0700
Subject: [PATCH 3/3] ci(gpu): generate CDI spec before cluster bootstrap on
 WSL2

On WSL2 runners, no CDI spec exists by default. Add a step to generate
/var/run/cdi/nvidia.yaml using nvidia-ctk from the already-pulled cluster
image before the GPU cluster is bootstrapped. This ensures the NVIDIA
device plugin can advertise GPU resources and CDI injection works correctly.
---
 .github/workflows/e2e-gpu-test.yaml | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/.github/workflows/e2e-gpu-test.yaml b/.github/workflows/e2e-gpu-test.yaml
index 9feda3769..1b4126759 100644
--- a/.github/workflows/e2e-gpu-test.yaml
+++ b/.github/workflows/e2e-gpu-test.yaml
@@ -66,6 +66,16 @@ jobs:
       - name: Install Python dependencies and generate protobuf stubs
         run: uv sync --frozen && mise run --no-prepare python:proto
 
+      - name: Generate CDI spec (WSL2)
+        if: matrix.name == 'wsl-amd64'
+        run: |
+          docker run --rm --privileged \
+            -v /var/run/cdi:/var/run/cdi \
+            -v /usr/lib/wsl:/usr/lib/wsl \
+            --entrypoint nvidia-ctk \
+            ghcr.io/nvidia/openshell/cluster:${{ inputs.image-tag }} \
+            cdi generate --mode=wsl --output=/var/run/cdi/nvidia.yaml
+
       - name: Bootstrap GPU cluster
         env:
           GATEWAY_HOST: host.docker.internal