Video failed to be rendered , problem with gsplat and CUDA platform.

system: Windows 11
CUDAToolkit: NAVIDA GPU Computing Toolkit CUDA 12.8
Pytorch: torch 2.12.1+cu126
torchvison: 0.27.1+cu126
GPU : RTX 3060
| NVIDIA-SMI 610.62                 KMD Version: 610.62        CUDA UMD Version: 13.3     |
+-----------------------------------------+------------------------+----------------------+
| GPU  Name                  Driver-Model | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|=========================================+========================+======================|
|   0  NVIDIA GeForce RTX 3060 ...  WDDM  |   00000000:01:00.0  On |                  N/A |
| N/A   52C    P0             23W /  140W |    1660MiB /   6144MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+

```
--------------------------------------------------------------------------------------------------------
ERROR when import gsplat:
C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.8/include\cub/thread/thread_store.cuh(246): error: asm operand type size(8) does not match type/size implied by constraint 'r'
  template <> __declspec(__device__) __forceinline void ThreadStore<STORE_CG, uint4*, uint4>(uint4 * ptr, uint4 val) { asm volatile("st." "cg" ".v4.u32 [%0], {%1, %2, %3, %4};" : : "r"(ptr), "r"(val.x), "r"(val.y), "r"(val.z), "r"(val.w)); } template <> __declspec(__device__) __forceinline void ThreadStore<STORE_CG, ulonglong2*, ulonglong2>( ulonglong2 * ptr, ulonglong2 val) { asm volatile("st." "cg" ".v2.u64 [%0], {%1, %2};" : : "r"(ptr), "l"(val.x), "l"(val.y)); } template <> __declspec(__device__) __forceinline void ThreadStore<STORE_CG, ushort4*, ushort4>(ushort4 * ptr, ushort4 val) { asm volatile("st." "cg" ".v4.u16 [%0], {%1, %2, %3, %4};" : : "r"(ptr), "h"(val.x), "h"(val.y), "h"(val.z), "h"(val.w)); } template <> __declspec(__device__) __forceinline void ThreadStore<STORE_CG, uint2*, uint2>(uint2 * ptr, uint2 val) { asm volatile("st." "cg" ".v2.u32 [%0], {%1, %2};" : : "r"(ptr), "r"(val.x), "r"(val.y)); } template <> __declspec(__device__) __forceinline void ThreadStore<STORE_CG, unsigned long long*, unsigned long long>( unsigned long long* ptr, unsigned long long val) { asm volatile("st." "cg" ".u64 [%0], %1;" : : "r"(ptr), "l"(val)); } template <> __declspec(__device__) __forceinline void ThreadStore<STORE_CG, unsigned int*, unsigned int>( unsigned int* ptr, unsigned int val) { asm volatile("st." "cg" ".u32 [%0], %1;" : : "r"(ptr), "r"(val)); } template <> __declspec(__device__) __forceinline void ThreadStore<STORE_CG, unsigned short*, unsigned short>( unsigned short* ptr, unsigned short val) { asm volatile("st." "cg" ".u16 [%0], %1;" : : "r"(ptr), "h"(val)); } template <> __declspec(__device__) __forceinline void ThreadStore<STORE_CG, unsigned char*, unsigned char>( unsigned char* ptr, unsigned char val) { asm volatile( "{" "   .reg .u8 datum;" "   cvt.u8.u16 datum, %1;" "   st." "cg" ".u8 [%0], datum;" "}" : : "r"(ptr), "h"((unsigned short) val)); }
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          ^

C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.8/include\cub/thread/thread_store.cuh(246): error: asm operand type size(8) does not match type/size implied by constraint 'r'
  template <> __declspec(__device__) __forceinline void ThreadStore<STORE_CG, uint4*, uint4>(uint4 * ptr, uint4 val) { asm volatile("st." "cg" ".v4.u32 [%0], {%1, %2, %3, %4};" : : "r"(ptr), "r"(val.x), "r"(val.y), "r"(val.z), "r"(val.w)); } template <> __declspec(__device__) __forceinline void ThreadStore<STORE_CG, ulonglong2*, ulonglong2>( ulonglong2 * ptr, ulonglong2 val) { asm volatile("st." "cg" ".v2.u64 [%0], {%1, %2};" : : "r"(ptr), "l"(val.x), "l"(val.y)); } template <> __declspec(__device__) __forceinline void ThreadStore<STORE_CG, ushort4*, ushort4>(ushort4 * ptr, ushort4 val) { asm volatile("st." "cg" ".v4.u16 [%0], {%1, %2, %3, %4};" : : "r"(ptr), "h"(val.x), "h"(val.y), "h"(val.z), "h"(val.w)); } template <> __declspec(__device__) __forceinline void ThreadStore<STORE_CG, uint2*, uint2>(uint2 * ptr, uint2 val) { asm volatile("st." "cg" ".v2.u32 [%0], {%1, %2};" : : "r"(ptr), "r"(val.x), "r"(val.y)); } template <> __declspec(__device__) __forceinline void ThreadStore<STORE_CG, unsigned long long*, unsigned long long>( unsigned long long* ptr, unsigned long long val) { asm volatile("st." "cg" ".u64 [%0], %1;" : : "r"(ptr), "l"(val)); } template <> __declspec(__device__) __forceinline void ThreadStore<STORE_CG, unsigned int*, unsigned int>( unsigned int* ptr, unsigned int val) { asm volatile("st." "cg" ".u32 [%0], %1;" : : "r"(ptr), "r"(val)); } template <> __declspec(__device__) __forceinline void ThreadStore<STORE_CG, unsigned short*, unsigned short>( unsigned short* ptr, unsigned short val) { asm volatile("st." "cg" ".u16 [%0], %1;" : : "r"(ptr), "h"(val)); } template <> __declspec(__device__) __forceinline void ThreadStore<STORE_CG, unsigned char*, unsigned char>( unsigned char* ptr, unsigned char val) { asm volatile( "{" "   .reg .u8 datum;" "   cvt.u8.u16 datum, %1;" "   st." "cg" ".u8 [%0], datum;" "}" : : "r"(ptr), "h"((unsigned short) val)); }
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     ^

Error limit reached.
100 errors detected in the compilation of "C:/Users/Lenovo/.conda/envs/mlsharp/Lib/site-packages/gsplat/cuda/csrc/IntersectTile.cu".
Compilation terminated.
IntersectTile.cu
ninja: build stopped: subcommand failed.

```

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Video failed to be rendered , problem with gsplat and CUDA platform. #89

Metadata

Assignees

Labels

Type

Fields

Projects

Milestone

Relationships

Development

Uh oh!

Video failed to be rendered , problem with gsplat and CUDA platform. #89

Description

Metadata

Metadata

Assignees

Labels

Type

Fields

Projects

Milestone

Relationships

Development

Issue actions