From 6f1f1c69236964bdb42f5ee5c5f1d2b69518bc08 Mon Sep 17 00:00:00 2001 From: Aryan Putta Date: Fri, 22 May 2026 19:14:15 -0400 Subject: [PATCH 1/4] cuda.core: add docstrings to all public StrEnum types in typing.py Each StrEnum class in cuda.core.typing now has a class-level docstring that states its purpose and briefly describes every member value. Sphinx and help() now show meaningful descriptions for CompilerBackendType, GraphConditionalType, GraphMemoryType, ManagedMemoryLocationType, ObjectCodeFormatType, PCHStatusType, SourceCodeType, VirtualMemory*. No logic changes. --- cuda_core/cuda/core/typing.py | 86 +++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) diff --git a/cuda_core/cuda/core/typing.py b/cuda_core/cuda/core/typing.py index 1a6d377579d..33ae4140072 100644 --- a/cuda_core/cuda/core/typing.py +++ b/cuda_core/cuda/core/typing.py @@ -43,12 +43,28 @@ class SourceCodeType(StrEnum): + """Source language passed to :class:`~cuda.core.Program`. + + ``CXX`` selects CUDA C++, ``PTX`` selects PTX assembly text, and + ``NVVM`` selects NVVM IR (LLVM bitcode). + """ + CXX = "c++" PTX = "ptx" NVVM = "nvvm" class ObjectCodeFormatType(StrEnum): + """Output format produced by :meth:`~cuda.core.Program.compile`. + + ``PTX`` — PTX assembly text. + ``CUBIN`` — device-native CUDA binary. + ``LTOIR`` — LTO (link-time optimization) IR for later linking. + ``FATBIN`` — fat binary bundling multiple device images. + ``OBJECT`` — relocatable device object. + ``LIBRARY`` — device code library. + """ + PTX = "ptx" CUBIN = "cubin" LTOIR = "ltoir" @@ -58,6 +74,14 @@ class ObjectCodeFormatType(StrEnum): class CompilerBackendType(StrEnum): + """Compiler backend selected via :class:`~cuda.core.ProgramOptions`. + + ``NVRTC`` — NVIDIA Runtime Compilation. + ``NVVM`` — NVVM LLVM backend. + ``NVJITLINK`` — nvJitLink device-side linker. + ``DRIVER`` — CUDA driver PTX JIT compiler. + """ + NVRTC = "NVRTC" NVVM = "NVVM" NVJITLINK = "nvJitLink" @@ -65,36 +89,80 @@ class CompilerBackendType(StrEnum): class PCHStatusType(StrEnum): + """Precompiled-header (PCH) outcome reported by :meth:`~cuda.core.Program.compile`. + + ``CREATED`` — PCH was successfully written. + ``NOT_ATTEMPTED`` — PCH creation was skipped (backend does not support it or + the option was not requested). + ``FAILED`` — PCH creation was attempted but failed. + """ + CREATED = "created" NOT_ATTEMPTED = "not_attempted" FAILED = "failed" class GraphConditionalType(StrEnum): + """Conditional node flavor for :class:`~cuda.core.graph.GraphBuilder`. + + ``IF`` — body graph executes at most once based on a condition. + ``WHILE`` — body graph loops while the condition is true. + ``SWITCH`` — selects one child graph by an integer index. + """ + IF = "if" WHILE = "while" SWITCH = "switch" class GraphMemoryType(StrEnum): + """Memory space for a graph memory-allocation or free node. + + ``DEVICE`` — GPU device memory. + ``HOST`` — pinned host memory. + ``MANAGED`` — CUDA managed (unified) memory. + """ + DEVICE = "device" HOST = "host" MANAGED = "managed" class ManagedMemoryLocationType(StrEnum): + """Destination type for managed-memory prefetch and advise operations. + + ``DEVICE`` — target a GPU device. + ``HOST`` — target the CPU host (any NUMA node). + ``HOST_NUMA`` — target a specific host NUMA node (CUDA 13+ only). + """ + DEVICE = "device" HOST = "host" HOST_NUMA = "host_numa" class VirtualMemoryHandleType(StrEnum): + """OS handle type for exporting virtual memory allocations across processes. + + ``POSIX_FD`` — POSIX file descriptor (Linux). + ``WIN32_KMT`` — Win32 D3DKMT handle (Windows). + ``FABRIC`` — NVLink/NVSwitch fabric handle for multi-node topologies. + """ + POSIX_FD = "posix_fd" WIN32_KMT = "win32_kmt" FABRIC = "fabric" class VirtualMemoryLocationType(StrEnum): + """Physical backing location for a virtual memory allocation. + + ``DEVICE`` — GPU device memory. + ``HOST`` — pinned host memory. + ``HOST_NUMA`` — host memory pinned to a specific NUMA node. + ``HOST_NUMA_CURRENT`` — host memory on the calling thread's NUMA node. + """ + DEVICE = "device" HOST = "host" HOST_NUMA = "host_numa" @@ -102,16 +170,34 @@ class VirtualMemoryLocationType(StrEnum): class VirtualMemoryGranularityType(StrEnum): + """Granularity query type for virtual memory allocations. + + ``MINIMUM`` — smallest allocation size supported by the device. + ``RECOMMENDED`` — granularity that yields best performance on the device. + """ + MINIMUM = "minimum" RECOMMENDED = "recommended" class VirtualMemoryAccessType(StrEnum): + """Access permissions for a virtual memory mapping. + + ``READ_WRITE`` — both read and write access. + ``READ`` — read-only access. + """ + READ_WRITE = "rw" READ = "r" class VirtualMemoryAllocationType(StrEnum): + """Physical memory type for a virtual memory backing allocation. + + ``PINNED`` — page-locked (pinned) host memory. + ``MANAGED`` — CUDA managed (unified) memory. + """ + PINNED = "pinned" MANAGED = "managed" From a76557d05f1e8d309ddbb49d781c3551651649b1 Mon Sep 17 00:00:00 2001 From: Aryan Putta Date: Fri, 22 May 2026 19:18:40 -0400 Subject: [PATCH 2/4] cuda.core: add property docstrings to Host.numa_id and Host.is_numa_current Host is a new 1.1.0 API. The class-level docstring is complete but the two public properties had no docstrings, leaving them blank in generated API docs and in help(). --- cuda_core/cuda/core/_host.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cuda_core/cuda/core/_host.py b/cuda_core/cuda/core/_host.py index 79da81cae20..c4bef886495 100644 --- a/cuda_core/cuda/core/_host.py +++ b/cuda_core/cuda/core/_host.py @@ -62,10 +62,12 @@ def _get_or_create(cls, numa_id: int | None, is_numa_current: bool) -> Host: @property def numa_id(self) -> int | None: + """NUMA node ID, or ``None`` if not pinned to a specific NUMA node.""" return self._numa_id @property def is_numa_current(self) -> bool: + """Whether this ``Host`` represents the calling thread's NUMA node (constructed via :meth:`numa_current`).""" return self._is_numa_current @classmethod From 88e5576d0f6c4d70141655102b1cef100fd1f1b2 Mon Sep 17 00:00:00 2001 From: Aryan Date: Fri, 29 May 2026 12:47:14 -0400 Subject: [PATCH 3/4] cuda.core: fix is_numa_current phrasing and WIN32_KMT description - is_numa_current: lead with return type (``True`` if ...) to match bool-property docstring convention; keep :meth:`numa_current` cross-ref - WIN32_KMT: drop "D3DKMT" (DirectX-specific); CU_MEM_HANDLE_TYPE_WIN32_KMT maps to a general WDDM kernel-mode handle, not a D3D handle --- cuda_core/cuda/core/_host.py | 2 +- cuda_core/cuda/core/typing.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cuda_core/cuda/core/_host.py b/cuda_core/cuda/core/_host.py index c4bef886495..1898578b0b0 100644 --- a/cuda_core/cuda/core/_host.py +++ b/cuda_core/cuda/core/_host.py @@ -67,7 +67,7 @@ def numa_id(self) -> int | None: @property def is_numa_current(self) -> bool: - """Whether this ``Host`` represents the calling thread's NUMA node (constructed via :meth:`numa_current`).""" + """``True`` if this ``Host`` represents the calling thread's NUMA node (constructed via :meth:`numa_current`).""" return self._is_numa_current @classmethod diff --git a/cuda_core/cuda/core/typing.py b/cuda_core/cuda/core/typing.py index 33ae4140072..327782950a3 100644 --- a/cuda_core/cuda/core/typing.py +++ b/cuda_core/cuda/core/typing.py @@ -145,7 +145,7 @@ class VirtualMemoryHandleType(StrEnum): """OS handle type for exporting virtual memory allocations across processes. ``POSIX_FD`` — POSIX file descriptor (Linux). - ``WIN32_KMT`` — Win32 D3DKMT handle (Windows). + ``WIN32_KMT`` — Win32 kernel-mode handle (Windows). ``FABRIC`` — NVLink/NVSwitch fabric handle for multi-node topologies. """ From fb80ccee511f862f340f35d97dc055df13c4523d Mon Sep 17 00:00:00 2001 From: Aryan Date: Wed, 3 Jun 2026 15:16:02 -0400 Subject: [PATCH 4/4] docs(typing): fix RST bullet format, accuracy issues, and version notes in StrEnum docstrings - Convert all enum member descriptions to RST bullet list format (* ``MEMBER`` -- description) so Sphinx renders them as a list rather than a run-on paragraph - Fix VirtualMemoryAllocationType.PINNED: remove incorrect "page-locked host memory" wording; backing location is device by default and controlled by VirtualMemoryLocationType - Fix CompilerBackendType class summary: backend is inferred from Program.code_type and exposed on Program.backend, not selected via ProgramOptions - Move CUDA 13+ version note from ManagedMemoryLocationType.HOST_NUMA to VirtualMemoryAllocationType.MANAGED (the actual 13+-gated member; ManagedMemoryResource already requires CUDA 13 so the HOST_NUMA note was misleading) - Fix ObjectCodeFormatType summary to mention Linker.link and Program.as_bytes alongside Program.compile --- cuda_core/cuda/core/typing.py | 82 +++++++++++++++++------------------ 1 file changed, 41 insertions(+), 41 deletions(-) diff --git a/cuda_core/cuda/core/typing.py b/cuda_core/cuda/core/typing.py index 327782950a3..2ff3cd8e0be 100644 --- a/cuda_core/cuda/core/typing.py +++ b/cuda_core/cuda/core/typing.py @@ -45,8 +45,9 @@ class SourceCodeType(StrEnum): """Source language passed to :class:`~cuda.core.Program`. - ``CXX`` selects CUDA C++, ``PTX`` selects PTX assembly text, and - ``NVVM`` selects NVVM IR (LLVM bitcode). + * ``CXX`` — CUDA C++ source. + * ``PTX`` — PTX assembly text. + * ``NVVM`` — NVVM IR (LLVM bitcode). """ CXX = "c++" @@ -55,14 +56,14 @@ class SourceCodeType(StrEnum): class ObjectCodeFormatType(StrEnum): - """Output format produced by :meth:`~cuda.core.Program.compile`. - - ``PTX`` — PTX assembly text. - ``CUBIN`` — device-native CUDA binary. - ``LTOIR`` — LTO (link-time optimization) IR for later linking. - ``FATBIN`` — fat binary bundling multiple device images. - ``OBJECT`` — relocatable device object. - ``LIBRARY`` — device code library. + """Output format for :meth:`~cuda.core.Program.compile`, :meth:`~cuda.core.Linker.link`, and :meth:`~cuda.core.Program.as_bytes`. + + * ``PTX`` — PTX assembly text. + * ``CUBIN`` — device-native CUDA binary. + * ``LTOIR`` — LTO (link-time optimization) IR for later linking. + * ``FATBIN`` — fat binary bundling multiple device images. + * ``OBJECT`` — relocatable device object. + * ``LIBRARY`` — device code library. """ PTX = "ptx" @@ -74,12 +75,12 @@ class ObjectCodeFormatType(StrEnum): class CompilerBackendType(StrEnum): - """Compiler backend selected via :class:`~cuda.core.ProgramOptions`. + """Compiler backend inferred from the program's code type and exposed on :attr:`~cuda.core.Program.backend`. - ``NVRTC`` — NVIDIA Runtime Compilation. - ``NVVM`` — NVVM LLVM backend. - ``NVJITLINK`` — nvJitLink device-side linker. - ``DRIVER`` — CUDA driver PTX JIT compiler. + * ``NVRTC`` — NVIDIA Runtime Compilation. + * ``NVVM`` — NVVM LLVM backend. + * ``NVJITLINK`` — nvJitLink device-side linker. + * ``DRIVER`` — CUDA driver PTX JIT compiler. """ NVRTC = "NVRTC" @@ -91,10 +92,9 @@ class CompilerBackendType(StrEnum): class PCHStatusType(StrEnum): """Precompiled-header (PCH) outcome reported by :meth:`~cuda.core.Program.compile`. - ``CREATED`` — PCH was successfully written. - ``NOT_ATTEMPTED`` — PCH creation was skipped (backend does not support it or - the option was not requested). - ``FAILED`` — PCH creation was attempted but failed. + * ``CREATED`` — PCH was successfully written. + * ``NOT_ATTEMPTED`` — PCH creation was skipped (backend does not support it or the option was not requested). + * ``FAILED`` — PCH creation was attempted but failed. """ CREATED = "created" @@ -105,9 +105,9 @@ class PCHStatusType(StrEnum): class GraphConditionalType(StrEnum): """Conditional node flavor for :class:`~cuda.core.graph.GraphBuilder`. - ``IF`` — body graph executes at most once based on a condition. - ``WHILE`` — body graph loops while the condition is true. - ``SWITCH`` — selects one child graph by an integer index. + * ``IF`` — body graph executes at most once based on a condition. + * ``WHILE`` — body graph loops while the condition is true. + * ``SWITCH`` — selects one child graph by an integer index. """ IF = "if" @@ -118,9 +118,9 @@ class GraphConditionalType(StrEnum): class GraphMemoryType(StrEnum): """Memory space for a graph memory-allocation or free node. - ``DEVICE`` — GPU device memory. - ``HOST`` — pinned host memory. - ``MANAGED`` — CUDA managed (unified) memory. + * ``DEVICE`` — GPU device memory. + * ``HOST`` — pinned host memory. + * ``MANAGED`` — CUDA managed (unified) memory. """ DEVICE = "device" @@ -131,9 +131,9 @@ class GraphMemoryType(StrEnum): class ManagedMemoryLocationType(StrEnum): """Destination type for managed-memory prefetch and advise operations. - ``DEVICE`` — target a GPU device. - ``HOST`` — target the CPU host (any NUMA node). - ``HOST_NUMA`` — target a specific host NUMA node (CUDA 13+ only). + * ``DEVICE`` — target a GPU device. + * ``HOST`` — target the CPU host (any NUMA node). + * ``HOST_NUMA`` — target a specific host NUMA node. """ DEVICE = "device" @@ -144,9 +144,9 @@ class ManagedMemoryLocationType(StrEnum): class VirtualMemoryHandleType(StrEnum): """OS handle type for exporting virtual memory allocations across processes. - ``POSIX_FD`` — POSIX file descriptor (Linux). - ``WIN32_KMT`` — Win32 kernel-mode handle (Windows). - ``FABRIC`` — NVLink/NVSwitch fabric handle for multi-node topologies. + * ``POSIX_FD`` — POSIX file descriptor (Linux). + * ``WIN32_KMT`` — Win32 kernel-mode handle (Windows). + * ``FABRIC`` — NVLink/NVSwitch fabric handle for multi-node topologies. """ POSIX_FD = "posix_fd" @@ -157,10 +157,10 @@ class VirtualMemoryHandleType(StrEnum): class VirtualMemoryLocationType(StrEnum): """Physical backing location for a virtual memory allocation. - ``DEVICE`` — GPU device memory. - ``HOST`` — pinned host memory. - ``HOST_NUMA`` — host memory pinned to a specific NUMA node. - ``HOST_NUMA_CURRENT`` — host memory on the calling thread's NUMA node. + * ``DEVICE`` — GPU device memory. + * ``HOST`` — pinned host memory. + * ``HOST_NUMA`` — host memory pinned to a specific NUMA node. + * ``HOST_NUMA_CURRENT`` — host memory on the calling thread's NUMA node. """ DEVICE = "device" @@ -172,8 +172,8 @@ class VirtualMemoryLocationType(StrEnum): class VirtualMemoryGranularityType(StrEnum): """Granularity query type for virtual memory allocations. - ``MINIMUM`` — smallest allocation size supported by the device. - ``RECOMMENDED`` — granularity that yields best performance on the device. + * ``MINIMUM`` — smallest allocation size supported by the device. + * ``RECOMMENDED`` — granularity that yields best performance on the device. """ MINIMUM = "minimum" @@ -183,8 +183,8 @@ class VirtualMemoryGranularityType(StrEnum): class VirtualMemoryAccessType(StrEnum): """Access permissions for a virtual memory mapping. - ``READ_WRITE`` — both read and write access. - ``READ`` — read-only access. + * ``READ_WRITE`` — both read and write access. + * ``READ`` — read-only access. """ READ_WRITE = "rw" @@ -194,8 +194,8 @@ class VirtualMemoryAccessType(StrEnum): class VirtualMemoryAllocationType(StrEnum): """Physical memory type for a virtual memory backing allocation. - ``PINNED`` — page-locked (pinned) host memory. - ``MANAGED`` — CUDA managed (unified) memory. + * ``PINNED`` — pinned/non-migratable physical allocation (placement via :class:`VirtualMemoryLocationType`). + * ``MANAGED`` — CUDA managed (unified) memory (CUDA 13+ only). """ PINNED = "pinned"