From 6f1f1c69236964bdb42f5ee5c5f1d2b69518bc08 Mon Sep 17 00:00:00 2001
From: Aryan Putta <aryansputta@gmail.com>
Date: Fri, 22 May 2026 19:14:15 -0400
Subject: [PATCH 1/4] cuda.core: add docstrings to all public StrEnum types in
 typing.py

Each StrEnum class in cuda.core.typing now has a class-level docstring
that states its purpose and briefly describes every member value.
Sphinx and help() now show meaningful descriptions for CompilerBackendType,
GraphConditionalType, GraphMemoryType, ManagedMemoryLocationType,
ObjectCodeFormatType, PCHStatusType, SourceCodeType, VirtualMemory*.

No logic changes.
---
 cuda_core/cuda/core/typing.py | 86 +++++++++++++++++++++++++++++++++++
 1 file changed, 86 insertions(+)

diff --git a/cuda_core/cuda/core/typing.py b/cuda_core/cuda/core/typing.py
index 1a6d377579d..33ae4140072 100644
--- a/cuda_core/cuda/core/typing.py
+++ b/cuda_core/cuda/core/typing.py
@@ -43,12 +43,28 @@
 
 
 class SourceCodeType(StrEnum):
+    """Source language passed to :class:`~cuda.core.Program`.
+
+    ``CXX`` selects CUDA C++, ``PTX`` selects PTX assembly text, and
+    ``NVVM`` selects NVVM IR (LLVM bitcode).
+    """
+
     CXX = "c++"
     PTX = "ptx"
     NVVM = "nvvm"
 
 
 class ObjectCodeFormatType(StrEnum):
+    """Output format produced by :meth:`~cuda.core.Program.compile`.
+
+    ``PTX`` — PTX assembly text.
+    ``CUBIN`` — device-native CUDA binary.
+    ``LTOIR`` — LTO (link-time optimization) IR for later linking.
+    ``FATBIN`` — fat binary bundling multiple device images.
+    ``OBJECT`` — relocatable device object.
+    ``LIBRARY`` — device code library.
+    """
+
     PTX = "ptx"
     CUBIN = "cubin"
     LTOIR = "ltoir"
@@ -58,6 +74,14 @@ class ObjectCodeFormatType(StrEnum):
 
 
 class CompilerBackendType(StrEnum):
+    """Compiler backend selected via :class:`~cuda.core.ProgramOptions`.
+
+    ``NVRTC`` — NVIDIA Runtime Compilation.
+    ``NVVM`` — NVVM LLVM backend.
+    ``NVJITLINK`` — nvJitLink device-side linker.
+    ``DRIVER`` — CUDA driver PTX JIT compiler.
+    """
+
     NVRTC = "NVRTC"
     NVVM = "NVVM"
     NVJITLINK = "nvJitLink"
@@ -65,36 +89,80 @@ class CompilerBackendType(StrEnum):
 
 
 class PCHStatusType(StrEnum):
+    """Precompiled-header (PCH) outcome reported by :meth:`~cuda.core.Program.compile`.
+
+    ``CREATED`` — PCH was successfully written.
+    ``NOT_ATTEMPTED`` — PCH creation was skipped (backend does not support it or
+    the option was not requested).
+    ``FAILED`` — PCH creation was attempted but failed.
+    """
+
     CREATED = "created"
     NOT_ATTEMPTED = "not_attempted"
     FAILED = "failed"
 
 
 class GraphConditionalType(StrEnum):
+    """Conditional node flavor for :class:`~cuda.core.graph.GraphBuilder`.
+
+    ``IF`` — body graph executes at most once based on a condition.
+    ``WHILE`` — body graph loops while the condition is true.
+    ``SWITCH`` — selects one child graph by an integer index.
+    """
+
     IF = "if"
     WHILE = "while"
     SWITCH = "switch"
 
 
 class GraphMemoryType(StrEnum):
+    """Memory space for a graph memory-allocation or free node.
+
+    ``DEVICE`` — GPU device memory.
+    ``HOST`` — pinned host memory.
+    ``MANAGED`` — CUDA managed (unified) memory.
+    """
+
     DEVICE = "device"
     HOST = "host"
     MANAGED = "managed"
 
 
 class ManagedMemoryLocationType(StrEnum):
+    """Destination type for managed-memory prefetch and advise operations.
+
+    ``DEVICE`` — target a GPU device.
+    ``HOST`` — target the CPU host (any NUMA node).
+    ``HOST_NUMA`` — target a specific host NUMA node (CUDA 13+ only).
+    """
+
     DEVICE = "device"
     HOST = "host"
     HOST_NUMA = "host_numa"
 
 
 class VirtualMemoryHandleType(StrEnum):
+    """OS handle type for exporting virtual memory allocations across processes.
+
+    ``POSIX_FD`` — POSIX file descriptor (Linux).
+    ``WIN32_KMT`` — Win32 D3DKMT handle (Windows).
+    ``FABRIC`` — NVLink/NVSwitch fabric handle for multi-node topologies.
+    """
+
     POSIX_FD = "posix_fd"
     WIN32_KMT = "win32_kmt"
     FABRIC = "fabric"
 
 
 class VirtualMemoryLocationType(StrEnum):
+    """Physical backing location for a virtual memory allocation.
+
+    ``DEVICE`` — GPU device memory.
+    ``HOST`` — pinned host memory.
+    ``HOST_NUMA`` — host memory pinned to a specific NUMA node.
+    ``HOST_NUMA_CURRENT`` — host memory on the calling thread's NUMA node.
+    """
+
     DEVICE = "device"
     HOST = "host"
     HOST_NUMA = "host_numa"
@@ -102,16 +170,34 @@ class VirtualMemoryLocationType(StrEnum):
 
 
 class VirtualMemoryGranularityType(StrEnum):
+    """Granularity query type for virtual memory allocations.
+
+    ``MINIMUM`` — smallest allocation size supported by the device.
+    ``RECOMMENDED`` — granularity that yields best performance on the device.
+    """
+
     MINIMUM = "minimum"
     RECOMMENDED = "recommended"
 
 
 class VirtualMemoryAccessType(StrEnum):
+    """Access permissions for a virtual memory mapping.
+
+    ``READ_WRITE`` — both read and write access.
+    ``READ`` — read-only access.
+    """
+
     READ_WRITE = "rw"
     READ = "r"
 
 
 class VirtualMemoryAllocationType(StrEnum):
+    """Physical memory type for a virtual memory backing allocation.
+
+    ``PINNED`` — page-locked (pinned) host memory.
+    ``MANAGED`` — CUDA managed (unified) memory.
+    """
+
     PINNED = "pinned"
     MANAGED = "managed"
 

From a76557d05f1e8d309ddbb49d781c3551651649b1 Mon Sep 17 00:00:00 2001
From: Aryan Putta <aryansputta@gmail.com>
Date: Fri, 22 May 2026 19:18:40 -0400
Subject: [PATCH 2/4] cuda.core: add property docstrings to Host.numa_id and
 Host.is_numa_current

Host is a new 1.1.0 API. The class-level docstring is complete but the two
public properties had no docstrings, leaving them blank in generated API docs
and in help().
---
 cuda_core/cuda/core/_host.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/cuda_core/cuda/core/_host.py b/cuda_core/cuda/core/_host.py
index 79da81cae20..c4bef886495 100644
--- a/cuda_core/cuda/core/_host.py
+++ b/cuda_core/cuda/core/_host.py
@@ -62,10 +62,12 @@ def _get_or_create(cls, numa_id: int | None, is_numa_current: bool) -> Host:
 
     @property
     def numa_id(self) -> int | None:
+        """NUMA node ID, or ``None`` if not pinned to a specific NUMA node."""
         return self._numa_id
 
     @property
     def is_numa_current(self) -> bool:
+        """Whether this ``Host`` represents the calling thread's NUMA node (constructed via :meth:`numa_current`)."""
         return self._is_numa_current
 
     @classmethod

From 88e5576d0f6c4d70141655102b1cef100fd1f1b2 Mon Sep 17 00:00:00 2001
From: Aryan <aryansputta@gmail.com>
Date: Fri, 29 May 2026 12:47:14 -0400
Subject: [PATCH 3/4] cuda.core: fix is_numa_current phrasing and WIN32_KMT
 description

- is_numa_current: lead with return type (``True`` if ...) to match
  bool-property docstring convention; keep :meth:`numa_current` cross-ref
- WIN32_KMT: drop "D3DKMT" (DirectX-specific); CU_MEM_HANDLE_TYPE_WIN32_KMT
  maps to a general WDDM kernel-mode handle, not a D3D handle
---
 cuda_core/cuda/core/_host.py  | 2 +-
 cuda_core/cuda/core/typing.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/cuda_core/cuda/core/_host.py b/cuda_core/cuda/core/_host.py
index c4bef886495..1898578b0b0 100644
--- a/cuda_core/cuda/core/_host.py
+++ b/cuda_core/cuda/core/_host.py
@@ -67,7 +67,7 @@ def numa_id(self) -> int | None:
 
     @property
     def is_numa_current(self) -> bool:
-        """Whether this ``Host`` represents the calling thread's NUMA node (constructed via :meth:`numa_current`)."""
+        """``True`` if this ``Host`` represents the calling thread's NUMA node (constructed via :meth:`numa_current`)."""
         return self._is_numa_current
 
     @classmethod
diff --git a/cuda_core/cuda/core/typing.py b/cuda_core/cuda/core/typing.py
index 33ae4140072..327782950a3 100644
--- a/cuda_core/cuda/core/typing.py
+++ b/cuda_core/cuda/core/typing.py
@@ -145,7 +145,7 @@ class VirtualMemoryHandleType(StrEnum):
     """OS handle type for exporting virtual memory allocations across processes.
 
     ``POSIX_FD`` — POSIX file descriptor (Linux).
-    ``WIN32_KMT`` — Win32 D3DKMT handle (Windows).
+    ``WIN32_KMT`` — Win32 kernel-mode handle (Windows).
     ``FABRIC`` — NVLink/NVSwitch fabric handle for multi-node topologies.
     """
 

From fb80ccee511f862f340f35d97dc055df13c4523d Mon Sep 17 00:00:00 2001
From: Aryan <aryansputta@gmail.com>
Date: Wed, 3 Jun 2026 15:16:02 -0400
Subject: [PATCH 4/4] docs(typing): fix RST bullet format, accuracy issues, and
 version notes in StrEnum docstrings

- Convert all enum member descriptions to RST bullet list format
  (* ``MEMBER`` -- description) so Sphinx renders them as a list
  rather than a run-on paragraph
- Fix VirtualMemoryAllocationType.PINNED: remove incorrect
  "page-locked host memory" wording; backing location is
  device by default and controlled by VirtualMemoryLocationType
- Fix CompilerBackendType class summary: backend is inferred from
  Program.code_type and exposed on Program.backend, not selected
  via ProgramOptions
- Move CUDA 13+ version note from ManagedMemoryLocationType.HOST_NUMA
  to VirtualMemoryAllocationType.MANAGED (the actual 13+-gated member;
  ManagedMemoryResource already requires CUDA 13 so the HOST_NUMA note
  was misleading)
- Fix ObjectCodeFormatType summary to mention Linker.link and
  Program.as_bytes alongside Program.compile
---
 cuda_core/cuda/core/typing.py | 82 +++++++++++++++++------------------
 1 file changed, 41 insertions(+), 41 deletions(-)

diff --git a/cuda_core/cuda/core/typing.py b/cuda_core/cuda/core/typing.py
index 327782950a3..2ff3cd8e0be 100644
--- a/cuda_core/cuda/core/typing.py
+++ b/cuda_core/cuda/core/typing.py
@@ -45,8 +45,9 @@
 class SourceCodeType(StrEnum):
     """Source language passed to :class:`~cuda.core.Program`.
 
-    ``CXX`` selects CUDA C++, ``PTX`` selects PTX assembly text, and
-    ``NVVM`` selects NVVM IR (LLVM bitcode).
+    * ``CXX`` — CUDA C++ source.
+    * ``PTX`` — PTX assembly text.
+    * ``NVVM`` — NVVM IR (LLVM bitcode).
     """
 
     CXX = "c++"
@@ -55,14 +56,14 @@ class SourceCodeType(StrEnum):
 
 
 class ObjectCodeFormatType(StrEnum):
-    """Output format produced by :meth:`~cuda.core.Program.compile`.
-
-    ``PTX`` — PTX assembly text.
-    ``CUBIN`` — device-native CUDA binary.
-    ``LTOIR`` — LTO (link-time optimization) IR for later linking.
-    ``FATBIN`` — fat binary bundling multiple device images.
-    ``OBJECT`` — relocatable device object.
-    ``LIBRARY`` — device code library.
+    """Output format for :meth:`~cuda.core.Program.compile`, :meth:`~cuda.core.Linker.link`, and :meth:`~cuda.core.Program.as_bytes`.
+
+    * ``PTX`` — PTX assembly text.
+    * ``CUBIN`` — device-native CUDA binary.
+    * ``LTOIR`` — LTO (link-time optimization) IR for later linking.
+    * ``FATBIN`` — fat binary bundling multiple device images.
+    * ``OBJECT`` — relocatable device object.
+    * ``LIBRARY`` — device code library.
     """
 
     PTX = "ptx"
@@ -74,12 +75,12 @@ class ObjectCodeFormatType(StrEnum):
 
 
 class CompilerBackendType(StrEnum):
-    """Compiler backend selected via :class:`~cuda.core.ProgramOptions`.
+    """Compiler backend inferred from the program's code type and exposed on :attr:`~cuda.core.Program.backend`.
 
-    ``NVRTC`` — NVIDIA Runtime Compilation.
-    ``NVVM`` — NVVM LLVM backend.
-    ``NVJITLINK`` — nvJitLink device-side linker.
-    ``DRIVER`` — CUDA driver PTX JIT compiler.
+    * ``NVRTC`` — NVIDIA Runtime Compilation.
+    * ``NVVM`` — NVVM LLVM backend.
+    * ``NVJITLINK`` — nvJitLink device-side linker.
+    * ``DRIVER`` — CUDA driver PTX JIT compiler.
     """
 
     NVRTC = "NVRTC"
@@ -91,10 +92,9 @@ class CompilerBackendType(StrEnum):
 class PCHStatusType(StrEnum):
     """Precompiled-header (PCH) outcome reported by :meth:`~cuda.core.Program.compile`.
 
-    ``CREATED`` — PCH was successfully written.
-    ``NOT_ATTEMPTED`` — PCH creation was skipped (backend does not support it or
-    the option was not requested).
-    ``FAILED`` — PCH creation was attempted but failed.
+    * ``CREATED`` — PCH was successfully written.
+    * ``NOT_ATTEMPTED`` — PCH creation was skipped (backend does not support it or the option was not requested).
+    * ``FAILED`` — PCH creation was attempted but failed.
     """
 
     CREATED = "created"
@@ -105,9 +105,9 @@ class PCHStatusType(StrEnum):
 class GraphConditionalType(StrEnum):
     """Conditional node flavor for :class:`~cuda.core.graph.GraphBuilder`.
 
-    ``IF`` — body graph executes at most once based on a condition.
-    ``WHILE`` — body graph loops while the condition is true.
-    ``SWITCH`` — selects one child graph by an integer index.
+    * ``IF`` — body graph executes at most once based on a condition.
+    * ``WHILE`` — body graph loops while the condition is true.
+    * ``SWITCH`` — selects one child graph by an integer index.
     """
 
     IF = "if"
@@ -118,9 +118,9 @@ class GraphConditionalType(StrEnum):
 class GraphMemoryType(StrEnum):
     """Memory space for a graph memory-allocation or free node.
 
-    ``DEVICE`` — GPU device memory.
-    ``HOST`` — pinned host memory.
-    ``MANAGED`` — CUDA managed (unified) memory.
+    * ``DEVICE`` — GPU device memory.
+    * ``HOST`` — pinned host memory.
+    * ``MANAGED`` — CUDA managed (unified) memory.
     """
 
     DEVICE = "device"
@@ -131,9 +131,9 @@ class GraphMemoryType(StrEnum):
 class ManagedMemoryLocationType(StrEnum):
     """Destination type for managed-memory prefetch and advise operations.
 
-    ``DEVICE`` — target a GPU device.
-    ``HOST`` — target the CPU host (any NUMA node).
-    ``HOST_NUMA`` — target a specific host NUMA node (CUDA 13+ only).
+    * ``DEVICE`` — target a GPU device.
+    * ``HOST`` — target the CPU host (any NUMA node).
+    * ``HOST_NUMA`` — target a specific host NUMA node.
     """
 
     DEVICE = "device"
@@ -144,9 +144,9 @@ class ManagedMemoryLocationType(StrEnum):
 class VirtualMemoryHandleType(StrEnum):
     """OS handle type for exporting virtual memory allocations across processes.
 
-    ``POSIX_FD`` — POSIX file descriptor (Linux).
-    ``WIN32_KMT`` — Win32 kernel-mode handle (Windows).
-    ``FABRIC`` — NVLink/NVSwitch fabric handle for multi-node topologies.
+    * ``POSIX_FD`` — POSIX file descriptor (Linux).
+    * ``WIN32_KMT`` — Win32 kernel-mode handle (Windows).
+    * ``FABRIC`` — NVLink/NVSwitch fabric handle for multi-node topologies.
     """
 
     POSIX_FD = "posix_fd"
@@ -157,10 +157,10 @@ class VirtualMemoryHandleType(StrEnum):
 class VirtualMemoryLocationType(StrEnum):
     """Physical backing location for a virtual memory allocation.
 
-    ``DEVICE`` — GPU device memory.
-    ``HOST`` — pinned host memory.
-    ``HOST_NUMA`` — host memory pinned to a specific NUMA node.
-    ``HOST_NUMA_CURRENT`` — host memory on the calling thread's NUMA node.
+    * ``DEVICE`` — GPU device memory.
+    * ``HOST`` — pinned host memory.
+    * ``HOST_NUMA`` — host memory pinned to a specific NUMA node.
+    * ``HOST_NUMA_CURRENT`` — host memory on the calling thread's NUMA node.
     """
 
     DEVICE = "device"
@@ -172,8 +172,8 @@ class VirtualMemoryLocationType(StrEnum):
 class VirtualMemoryGranularityType(StrEnum):
     """Granularity query type for virtual memory allocations.
 
-    ``MINIMUM`` — smallest allocation size supported by the device.
-    ``RECOMMENDED`` — granularity that yields best performance on the device.
+    * ``MINIMUM`` — smallest allocation size supported by the device.
+    * ``RECOMMENDED`` — granularity that yields best performance on the device.
     """
 
     MINIMUM = "minimum"
@@ -183,8 +183,8 @@ class VirtualMemoryGranularityType(StrEnum):
 class VirtualMemoryAccessType(StrEnum):
     """Access permissions for a virtual memory mapping.
 
-    ``READ_WRITE`` — both read and write access.
-    ``READ`` — read-only access.
+    * ``READ_WRITE`` — both read and write access.
+    * ``READ`` — read-only access.
     """
 
     READ_WRITE = "rw"
@@ -194,8 +194,8 @@ class VirtualMemoryAccessType(StrEnum):
 class VirtualMemoryAllocationType(StrEnum):
     """Physical memory type for a virtual memory backing allocation.
 
-    ``PINNED`` — page-locked (pinned) host memory.
-    ``MANAGED`` — CUDA managed (unified) memory.
+    * ``PINNED`` — pinned/non-migratable physical allocation (placement via :class:`VirtualMemoryLocationType`).
+    * ``MANAGED`` — CUDA managed (unified) memory (CUDA 13+ only).
     """
 
     PINNED = "pinned"