From e882b8e50cad4a22f64a4d0f016ee37c72ecbe8b Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Fri, 17 Apr 2026 11:36:03 +0200 Subject: [PATCH 1/8] Bump minimum version of CUDA and cuTENSOR --- Project.toml | 6 +++--- test/cuda/factorizations.jl | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Project.toml b/Project.toml index 28088e3de..83ee37582 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "TensorKit" uuid = "07d1fe3e-3e46-537d-9eac-e9e13d0d4cec" -version = "0.16.3" +version = "0.17.0" authors = ["Jutho Haegeman, Lukas Devos"] [deps] @@ -41,7 +41,7 @@ projects = ["test", "docs"] [compat] Adapt = "4" AMDGPU = "2" -CUDA = "5.9" +CUDA = "6" ChainRulesCore = "1" Dictionaries = "0.4" FiniteDifferences = "0.12" @@ -58,5 +58,5 @@ TensorKitSectors = "0.3.7" TensorOperations = "5.1" TupleTools = "1.5" VectorInterface = "0.4.8, 0.5" -cuTENSOR = "2" +cuTENSOR = "6" julia = "1.10" diff --git a/test/cuda/factorizations.jl b/test/cuda/factorizations.jl index 62e23c9df..d18672459 100644 --- a/test/cuda/factorizations.jl +++ b/test/cuda/factorizations.jl @@ -373,7 +373,7 @@ for V in spacelist d1, d2 = dim(codomain(t)), dim(domain(t)) r = rank(t) - @test r == min(d1, d2) + @test r ≈ min(d1, d2) @test typeof(r) == typeof(d1) M = left_null(t) @test @constinferred(rank(M)) + r ≈ d1 From 20fbbdc69acbd23b05850187958f5323c0d8a4a6 Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Tue, 21 Apr 2026 09:26:14 -0400 Subject: [PATCH 2/8] Some import fixes --- ext/TensorKitCUDAExt/TensorKitCUDAExt.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ext/TensorKitCUDAExt/TensorKitCUDAExt.jl b/ext/TensorKitCUDAExt/TensorKitCUDAExt.jl index f5efb98bb..f7e87d16e 100644 --- a/ext/TensorKitCUDAExt/TensorKitCUDAExt.jl +++ b/ext/TensorKitCUDAExt/TensorKitCUDAExt.jl @@ -1,9 +1,9 @@ module TensorKitCUDAExt -using CUDA, CUDA.CUBLAS, CUDA.CUSOLVER, LinearAlgebra +using CUDA, CUDA.cuBLAS, CUDA.cuSOLVER, CUDA.cuRAND, LinearAlgebra using CUDA: @allowscalar using cuTENSOR: cuTENSOR -import CUDA: rand as curand, rand! as curand!, randn as curandn, randn! as curandn! +import CUDA.cuRAND: rand as curand, rand! as curand!, randn as curandn, randn! as curandn! using TensorKit using TensorKit.Factorizations From 66c1bc6e9dd40c993c7afb5e2277ee7694c42b91 Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Tue, 21 Apr 2026 09:27:41 -0400 Subject: [PATCH 3/8] Sources for Projects --- Project.toml | 7 +++++++ test/Project.toml | 2 ++ 2 files changed, 9 insertions(+) diff --git a/Project.toml b/Project.toml index 83ee37582..5230c5755 100644 --- a/Project.toml +++ b/Project.toml @@ -60,3 +60,10 @@ TupleTools = "1.5" VectorInterface = "0.4.8, 0.5" cuTENSOR = "6" julia = "1.10" + +[extras] +Mooncake = "da2b9cff-9c12-43a0-ae48-6db2b0edb7d6" + +[sources] +MatrixAlgebraKit = {url = "https://github.com/QuantumKitHub/MatrixAlgebraKit.jl", rev = "ksh/cuda6"} +Mooncake = {url = "https://github.com/chalk-lab/Mooncake.jl", rev = "ksh/cuda6"} diff --git a/test/Project.toml b/test/Project.toml index 18af8af80..9190343e9 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -29,6 +29,8 @@ cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1" [sources] TensorKit = {path = ".."} +MatrixAlgebraKit = {url = "https://github.com/QuantumKitHub/MatrixAlgebraKit.jl", rev = "ksh/cuda6"} +Mooncake = {url = "https://github.com/chalk-lab/Mooncake.jl", rev = "ksh/cuda6"} [compat] Aqua = "0.6, 0.7, 0.8" From b7825aa167a4c35898d1a053b072e639c4625d80 Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Wed, 22 Apr 2026 01:45:48 -0400 Subject: [PATCH 4/8] Fix rand/randn --- test/cuda/factorizations.jl | 100 ++++++++++++++++++------------------ test/cuda/tensors.jl | 88 +++++++++++++++---------------- 2 files changed, 94 insertions(+), 94 deletions(-) diff --git a/test/cuda/factorizations.jl b/test/cuda/factorizations.jl index d18672459..d7c8e0520 100644 --- a/test/cuda/factorizations.jl +++ b/test/cuda/factorizations.jl @@ -1,4 +1,4 @@ -using Adapt, CUDA, cuTENSOR +using Adapt, CUDA, CUDA.cuRAND, cuTENSOR using Test, TestExtras using TensorKit using LinearAlgebra: LinearAlgebra @@ -23,10 +23,10 @@ for V in spacelist @testset "QR decomposition" begin for T in eltypes, t in ( - CUDA.rand(T, W, W), CUDA.rand(T, W, W)', - CUDA.rand(T, (V1 ⊗ V2 ⊗ V3), (V4 ⊗ V5)'), CUDA.rand(T, (V1 ⊗ V2 ⊗ V3), (V4 ⊗ V5)')', - CUDA.rand(T, (V1 ⊗ V2)', (V3 ⊗ V4 ⊗ V5)), CUDA.rand(T, (V1 ⊗ V2)', (V3 ⊗ V4 ⊗ V5))', - DiagonalTensorMap(CUDA.rand(T, reduceddim(V1)), V1), + cuRAND.rand(T, W, W), cuRAND.rand(T, W, W)', + cuRAND.rand(T, (V1 ⊗ V2 ⊗ V3), (V4 ⊗ V5)'), cuRAND.rand(T, (V1 ⊗ V2 ⊗ V3), (V4 ⊗ V5)')', + cuRAND.rand(T, (V1 ⊗ V2)', (V3 ⊗ V4 ⊗ V5)), cuRAND.rand(T, (V1 ⊗ V2)', (V3 ⊗ V4 ⊗ V5))', + DiagonalTensorMap(cuRAND.rand(T, reduceddim(V1)), V1), ) Q, R = @constinferred qr_full(t) @@ -52,7 +52,7 @@ for V in spacelist # empty tensor for T in eltypes - t = CUDA.rand(T, V1 ⊗ V2, zerospace(V1)) + t = cuRAND.rand(T, V1 ⊗ V2, zerospace(V1)) Q, R = @constinferred qr_full(t) @test Q * R ≈ t @@ -78,10 +78,10 @@ for V in spacelist @testset "LQ decomposition" begin for T in eltypes, t in ( - CUDA.rand(T, W, W), CUDA.rand(T, W, W)', - CUDA.rand(T, (V1 ⊗ V2), (V3 ⊗ V4 ⊗ V5)'), CUDA.rand(T, (V1 ⊗ V2), (V3 ⊗ V4 ⊗ V5)')', - CUDA.rand(T, (V1 ⊗ V2 ⊗ V3)', (V4 ⊗ V5)), CUDA.rand(T, (V1 ⊗ V2 ⊗ V3)', (V4 ⊗ V5))', - DiagonalTensorMap(CUDA.rand(T, reduceddim(V1)), V1), + cuRAND.rand(T, W, W), cuRAND.rand(T, W, W)', + cuRAND.rand(T, (V1 ⊗ V2), (V3 ⊗ V4 ⊗ V5)'), cuRAND.rand(T, (V1 ⊗ V2), (V3 ⊗ V4 ⊗ V5)')', + cuRAND.rand(T, (V1 ⊗ V2 ⊗ V3)', (V4 ⊗ V5)), cuRAND.rand(T, (V1 ⊗ V2 ⊗ V3)', (V4 ⊗ V5))', + DiagonalTensorMap(cuRAND.rand(T, reduceddim(V1)), V1), ) L, Q = @constinferred lq_full(t) @@ -103,7 +103,7 @@ for V in spacelist for T in eltypes # empty tensor - t = CUDA.rand(T, zerospace(V1), V1 ⊗ V2) + t = cuRAND.rand(T, zerospace(V1), V1 ⊗ V2) L, Q = @constinferred lq_full(t) @test L * Q ≈ t @@ -129,10 +129,10 @@ for V in spacelist @testset "Polar decomposition" begin @testset for T in eltypes, t in ( - CUDA.rand(T, W, W), - CUDA.rand(T, (V1 ⊗ V2 ⊗ V3), (V4 ⊗ V5)'), - CUDA.rand(T, (V1 ⊗ V2)', (V3 ⊗ V4 ⊗ V5))', - DiagonalTensorMap(CUDA.rand(T, reduceddim(V1)), V1), + cuRAND.rand(T, W, W), + cuRAND.rand(T, (V1 ⊗ V2 ⊗ V3), (V4 ⊗ V5)'), + cuRAND.rand(T, (V1 ⊗ V2)', (V3 ⊗ V4 ⊗ V5))', + DiagonalTensorMap(cuRAND.rand(T, reduceddim(V1)), V1), ) @assert domain(t) ≾ codomain(t) @@ -148,10 +148,10 @@ for V in spacelist @testset for T in eltypes, t in ( - CUDA.rand(T, W, W), - CUDA.rand(T, (V1 ⊗ V2), (V3 ⊗ V4 ⊗ V5)'), - CUDA.rand(T, (V1 ⊗ V2 ⊗ V3)', (V4 ⊗ V5))', - DiagonalTensorMap(CUDA.rand(T, reduceddim(V1)), V1), + cuRAND.rand(T, W, W), + cuRAND.rand(T, (V1 ⊗ V2), (V3 ⊗ V4 ⊗ V5)'), + cuRAND.rand(T, (V1 ⊗ V2 ⊗ V3)', (V4 ⊗ V5))', + DiagonalTensorMap(cuRAND.rand(T, reduceddim(V1)), V1), ) @assert codomain(t) ≾ domain(t) @@ -169,10 +169,10 @@ for V in spacelist @testset "SVD" begin for T in eltypes, t in ( - CUDA.rand(T, W, W), CUDA.rand(T, W, W)', - CUDA.rand(T, (V1 ⊗ V2 ⊗ V3), (V4 ⊗ V5)'), CUDA.rand(T, (V1 ⊗ V2)', (V3 ⊗ V4 ⊗ V5))', - CUDA.rand(T, (V1 ⊗ V2), (V3 ⊗ V4 ⊗ V5)'), CUDA.rand(T, (V1 ⊗ V2 ⊗ V3)', (V4 ⊗ V5))', - DiagonalTensorMap(CUDA.rand(T, reduceddim(V1)), V1), + cuRAND.rand(T, W, W), cuRAND.rand(T, W, W)', + cuRAND.rand(T, (V1 ⊗ V2 ⊗ V3), (V4 ⊗ V5)'), cuRAND.rand(T, (V1 ⊗ V2)', (V3 ⊗ V4 ⊗ V5))', + cuRAND.rand(T, (V1 ⊗ V2), (V3 ⊗ V4 ⊗ V5)'), cuRAND.rand(T, (V1 ⊗ V2 ⊗ V3)', (V4 ⊗ V5))', + DiagonalTensorMap(cuRAND.rand(T, reduceddim(V1)), V1), ) u, s, vᴴ = @constinferred svd_full(t) @@ -220,8 +220,8 @@ for V in spacelist # empty tensor for T in eltypes, t in ( - CUDA.rand(T, W, zerospace(V1)), - CUDA.rand(T, zerospace(V1), W), + cuRAND.rand(T, W, zerospace(V1)), + cuRAND.rand(T, zerospace(V1), W), ) U, S, Vᴴ = @constinferred svd_full(t) @test U * S * Vᴴ ≈ t @@ -237,10 +237,10 @@ for V in spacelist @testset "truncated SVD" begin for T in eltypes, t in ( - CUDA.randn(T, W, W), CUDA.randn(T, W, W)', - CUDA.randn(T, (V1 ⊗ V2 ⊗ V3), (V4 ⊗ V5)'), CUDA.randn(T, (V1 ⊗ V2)', (V3 ⊗ V4 ⊗ V5))', - CUDA.randn(T, (V1 ⊗ V2), (V3 ⊗ V4 ⊗ V5)'), CUDA.randn(T, (V1 ⊗ V2 ⊗ V3)', (V4 ⊗ V5))', - DiagonalTensorMap(CUDA.randn(T, reduceddim(V1)), V1), + cuRAND.randn(T, W, W), cuRAND.randn(T, W, W)', + cuRAND.randn(T, (V1 ⊗ V2 ⊗ V3), (V4 ⊗ V5)'), cuRAND.randn(T, (V1 ⊗ V2)', (V3 ⊗ V4 ⊗ V5))', + cuRAND.randn(T, (V1 ⊗ V2), (V3 ⊗ V4 ⊗ V5)'), cuRAND.randn(T, (V1 ⊗ V2 ⊗ V3)', (V4 ⊗ V5))', + DiagonalTensorMap(cuRAND.randn(T, reduceddim(V1)), V1), ) @constinferred normalize!(t) @@ -305,10 +305,10 @@ for V in spacelist @testset "Eigenvalue decomposition" begin for T in eltypes, t in ( - CUDA.rand(T, V1, V1), - CUDA.rand(T, W, W), - CUDA.rand(T, W, W)', - # DiagonalTensorMap(CUDA.rand(T, reduceddim(V1)), V1), + cuRAND.rand(T, V1, V1), + cuRAND.rand(T, W, W), + cuRAND.rand(T, W, W)', + # DiagonalTensorMap(cuRAND.rand(T, reduceddim(V1)), V1), ) d, v = @constinferred eig_full(t) @@ -365,10 +365,10 @@ for V in spacelist @testset "Condition number and rank" begin for T in eltypes, t in ( - CUDA.rand(T, W, W), CUDA.rand(T, W, W)', - CUDA.rand(T, (V1 ⊗ V2 ⊗ V3), (V4 ⊗ V5)'), CUDA.rand(T, (V1 ⊗ V2)', (V3 ⊗ V4 ⊗ V5))', - CUDA.rand(T, (V1 ⊗ V2), (V3 ⊗ V4 ⊗ V5)'), CUDA.rand(T, (V1 ⊗ V2 ⊗ V3)', (V4 ⊗ V5))', - DiagonalTensorMap(CUDA.rand(T, reduceddim(V1)), V1), + cuRAND.rand(T, W, W), cuRAND.rand(T, W, W)', + cuRAND.rand(T, (V1 ⊗ V2 ⊗ V3), (V4 ⊗ V5)'), cuRAND.rand(T, (V1 ⊗ V2)', (V3 ⊗ V4 ⊗ V5))', + cuRAND.rand(T, (V1 ⊗ V2), (V3 ⊗ V4 ⊗ V5)'), cuRAND.rand(T, (V1 ⊗ V2 ⊗ V3)', (V4 ⊗ V5))', + DiagonalTensorMap(cuRAND.rand(T, reduceddim(V1)), V1), ) d1, d2 = dim(codomain(t)), dim(domain(t)) @@ -385,15 +385,15 @@ for V in spacelist @test @constinferred(cond(u)) ≈ one(real(T)) @test @constinferred(rank(u)) == dim(V1 ⊗ V2) - t = CUDA.rand(T, zerospace(V1), W) + t = cuRAND.rand(T, zerospace(V1), W) @test rank(t) == 0 - t2 = CUDA.rand(T, zerospace(V1) * zerospace(V2), zerospace(V1) * zerospace(V2)) + t2 = cuRAND.rand(T, zerospace(V1) * zerospace(V2), zerospace(V1) * zerospace(V2)) @test rank(t2) == 0 @test cond(t2) == 0.0 end for T in eltypes, t in ( - CUDA.rand(T, W, W), - CUDA.rand(T, W, W)', + cuRAND.rand(T, W, W), + cuRAND.rand(T, W, W)', ) project_hermitian!(t) vals = @constinferred LinearAlgebra.eigvals(t) @@ -406,10 +406,10 @@ for V in spacelist @testset "Hermitian projections" begin for T in eltypes, t in ( - CUDA.rand(T, V1, V1), - CUDA.rand(T, W, W), - CUDA.rand(T, W, W)', - DiagonalTensorMap(CUDA.rand(T, reduceddim(V1)), V1), + cuRAND.rand(T, V1, V1), + cuRAND.rand(T, W, W), + cuRAND.rand(T, W, W)', + DiagonalTensorMap(cuRAND.rand(T, reduceddim(V1)), V1), ) normalize!(t) noisefactor = eps(real(T))^(3 / 4) @@ -439,10 +439,10 @@ for V in spacelist @testset "Isometric projections" begin for T in eltypes, t in ( - CUDA.randn(T, W, W), - CUDA.randn(T, W, W)', - CUDA.randn(T, (V1 ⊗ V2 ⊗ V3), (V4 ⊗ V5)'), - CUDA.randn(T, (V1 ⊗ V2)', (V3 ⊗ V4 ⊗ V5))', + cuRAND.randn(T, W, W), + cuRAND.randn(T, W, W)', + cuRAND.randn(T, (V1 ⊗ V2 ⊗ V3), (V4 ⊗ V5)'), + cuRAND.randn(T, (V1 ⊗ V2)', (V3 ⊗ V4 ⊗ V5))', ) t2 = project_isometric(t) @test isisometric(t2) @@ -457,7 +457,7 @@ for V in spacelist # test that t2 is closer to A then any other isometry for k in 1:10 - δt = CUDA.randn!(similar(t)) + δt = cuRAND.randn!(similar(t)) t3 = project_isometric(t + δt / 100) @test norm(t - t3) > norm(t - t2) end diff --git a/test/cuda/tensors.jl b/test/cuda/tensors.jl index c88e98b45..f33c40c44 100644 --- a/test/cuda/tensors.jl +++ b/test/cuda/tensors.jl @@ -1,4 +1,4 @@ -using Adapt, CUDA, cuTENSOR +using Adapt, CUDA, CUDA.cuRAND, cuTENSOR using Test, TestExtras using TensorKit, Combinatorics ad = adapt(Array) @@ -20,7 +20,7 @@ for V in spacelist @timedtestset "Basic tensor properties" begin W = V1 ⊗ V2 ⊗ V3 ⊗ V4 ⊗ V5 # test default pass-throughs - for f in (CUDA.zeros, CUDA.ones, CUDA.rand, CUDA.randn) + for f in (CUDA.zeros, CUDA.ones, cuRAND.rand, cuRAND.randn) t = @constinferred f(W) @test scalartype(t) == Float64 @test codomain(t) == W @@ -44,7 +44,7 @@ for V in spacelist @test domain(t) == one(W) @test typeof(t) == TensorMap{Float64, spacetype(t), 5, 0, CuVector{Float64, CUDA.DeviceMemory}} end - for f! in (CUDA.rand!, CUDA.randn!) + for f! in (cuRAND.rand!, cuRAND.randn!) t = @constinferred CUDA.zeros(W) f!(t) @test scalartype(t) == Float64 @@ -113,7 +113,7 @@ for V in spacelist @timedtestset "Tensor Dict conversion" begin W = V1 ⊗ V2 ← (V3 ⊗ V4 ⊗ V5)' for T in (Int, Float32, ComplexF64) - t = @constinferred CUDA.rand(T, W) + t = @constinferred cuRAND.rand(T, W) d = convert(Dict, t) @test convert(Dict, TensorKit.to_cpu(t)) == d end @@ -121,7 +121,7 @@ for V in spacelist symmetricbraiding && @timedtestset "Basic linear algebra" begin W = V1 ⊗ V2 ← (V3 ⊗ V4 ⊗ V5)' for T in (Float32, ComplexF64) - t = @constinferred CUDA.rand(T, W) + t = @constinferred cuRAND.rand(T, W) @test scalartype(t) == T @test space(t) == W @test space(t') == W' @@ -171,7 +171,7 @@ for V in spacelist @timedtestset "Trivial space insertion and removal" begin W = V1 ⊗ V2 ← (V3 ⊗ V4 ⊗ V5)' for T in (Float32, ComplexF64) - t = @constinferred CUDA.rand(T, W) + t = @constinferred cuRAND.rand(T, W) t2 = @constinferred insertleftunit(t) @test t2 == @constinferred insertrightunit(t) @test numind(t2) == numind(t) + 1 @@ -204,8 +204,8 @@ for V in spacelist @timedtestset "Basic linear algebra: test via CPU" begin W = V1 ⊗ V2 ⊗ V3 ← (V4 ⊗ V5)' for T in (Float32, ComplexF64) - t = CUDA.rand(T, W) - t2 = @constinferred CUDA.rand!(similar(t)) + t = cuRAND.rand(T, W) + t2 = @constinferred cuRAND.rand!(similar(t)) α = rand(T) @test norm(t, 2) ≈ norm(TensorKit.to_cpu(t), 2) @test dot(t2, t) ≈ dot(TensorKit.to_cpu(t2), TensorKit.to_cpu(t)) @@ -216,7 +216,7 @@ for V in spacelist @timedtestset "Real and imaginary parts" begin W = V1 ⊗ V2 for T in (Float64, ComplexF64, ComplexF32) - t = @constinferred CUDA.randn(T, W, W) + t = @constinferred cuRAND.randn(T, W, W) tr = @constinferred real(t) @test scalartype(tr) <: Real @@ -241,7 +241,7 @@ for V in spacelist end @timedtestset "Tensor conversion" begin W = V1 ⊗ V2 - t = @constinferred CUDA.randn(W ← W) + t = @constinferred cuRAND.randn(W ← W) @test typeof(convert(typeof(t), t')) == typeof(t) @test typeof(TensorKit.to_cpu(t')) == typeof(TensorKit.to_cpu(t)') tc = complex(t) @@ -253,7 +253,7 @@ for V in spacelist end #=@timedtestset "diag/diagm" begin W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5 - t = CUDA.randn(ComplexF64, W) + t = cuRAND.randn(ComplexF64, W) d = LinearAlgebra.diag(t) # TODO find a way to use CUDA here D = LinearAlgebra.diagm(codomain(t), domain(t), d) @@ -262,8 +262,8 @@ for V in spacelist end=# symmetricbraiding && @timedtestset "Permutations: test via inner product invariance" begin W = V1 ⊗ V2 ⊗ V3 ⊗ V4 ⊗ V5 - t = CUDA.rand(ComplexF64, W) - t′ = CUDA.randn!(similar(t)) + t = cuRAND.rand(ComplexF64, W) + t′ = cuRAND.randn!(similar(t)) for k in 0:5 for p in permutations(1:5) p1 = ntuple(n -> p[n], k) @@ -287,7 +287,7 @@ for V in spacelist end symmetricbraiding && @timedtestset "Permutations: test via CPU" begin W = V1 ⊗ V2 ⊗ V3 ⊗ V4 ⊗ V5 - t = CUDA.rand(ComplexF64, W) + t = cuRAND.rand(ComplexF64, W) for k in 0:5 for p in permutations(1:5) p1 = ntuple(n -> p[n], k) @@ -303,7 +303,7 @@ for V in spacelist end end symmetricbraiding && @timedtestset "Full trace: test self-consistency" begin - t = CUDA.rand(ComplexF64, V1 ⊗ V2' ⊗ V2 ⊗ V1') + t = cuRAND.rand(ComplexF64, V1 ⊗ V2' ⊗ V2 ⊗ V1') CUDA.@allowscalar begin t2 = permute(t, ((1, 2), (4, 3))) s = @constinferred tr(t2) @@ -323,14 +323,14 @@ for V in spacelist @test ss ≈ s3 end symmetricbraiding && @timedtestset "Partial trace: test self-consistency" begin - t = CUDA.rand(ComplexF64, V1 ⊗ V2' ⊗ V3 ⊗ V2 ⊗ V1' ⊗ V3') + t = cuRAND.rand(ComplexF64, V1 ⊗ V2' ⊗ V3 ⊗ V2 ⊗ V1' ⊗ V3') @tensor t2[a, b] := t[c, d, b, d, c, a] @tensor t4[a, b, c, d] := t[d, e, b, e, c, a] @tensor t5[a, b] := t4[a, b, c, c] @test t2 ≈ t5 end symmetricbraiding && @timedtestset "Trace: test via conversion" begin - t = CUDA.rand(ComplexF64, V1 ⊗ V2' ⊗ V3 ⊗ V2 ⊗ V1' ⊗ V3') + t = cuRAND.rand(ComplexF64, V1 ⊗ V2' ⊗ V3 ⊗ V2 ⊗ V1' ⊗ V3') CUDA.@allowscalar begin @tensor t2[a, b] := t[c, d, b, d, c, a] @tensor t3[a, b] := ad(t)[c, d, b, d, c, a] @@ -338,8 +338,8 @@ for V in spacelist @test t3 ≈ ad(t2) end symmetricbraiding && @timedtestset "Trace and contraction" begin - t1 = CUDA.rand(ComplexF64, V1 ⊗ V2 ⊗ V3) - t2 = CUDA.rand(ComplexF64, V2' ⊗ V4 ⊗ V1') + t1 = cuRAND.rand(ComplexF64, V1 ⊗ V2 ⊗ V3) + t2 = cuRAND.rand(ComplexF64, V2' ⊗ V4 ⊗ V1') CUDA.@allowscalar begin t3 = t1 ⊗ t2 @tensor ta[a, b] := t1[x, y, a] * t2[y, b, x] @@ -349,11 +349,11 @@ for V in spacelist end #=if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) @timedtestset "Tensor contraction: test via CPU" begin - dA1 = CUDA.randn(ComplexF64, V1' * V2', V3') - dA2 = CUDA.randn(ComplexF64, V3 * V4, V5) - drhoL = CUDA.randn(ComplexF64, V1, V1) - drhoR = CUDA.randn(ComplexF64, V5, V5)' # test adjoint tensor - dH = CUDA.randn(ComplexF64, V2 * V4, V2 * V4) + dA1 = cuRAND.randn(ComplexF64, V1' * V2', V3') + dA2 = cuRAND.randn(ComplexF64, V3 * V4, V5) + drhoL = cuRAND.randn(ComplexF64, V1, V1) + drhoR = cuRAND.randn(ComplexF64, V5, V5)' # test adjoint tensor + dH = cuRAND.randn(ComplexF64, V2 * V4, V2 * V4) @tensor dHrA12[a, s1, s2, c] := drhoL[a, a'] * conj(dA1[a', t1, b]) * dA2[b, t2, c'] * drhoR[c', c] * dH[s1, s2, t1, t2] @@ -364,7 +364,7 @@ for V in spacelist end end=# # doesn't yet work because of AdjointTensor BraidingStyle(I) isa HasBraiding && @timedtestset "Index flipping: test flipping inverse" begin - t = CUDA.rand(ComplexF64, V1 ⊗ V2 ⊗ V3 ← (V4 ⊗ V5)') + t = cuRAND.rand(ComplexF64, V1 ⊗ V2 ⊗ V3 ← (V4 ⊗ V5)') for i in 1:5 CUDA.@allowscalar begin @test t ≈ flip(flip(t, i), i; inv = true) @@ -373,7 +373,7 @@ for V in spacelist end end #=@timedtestset "Index flipping: test via explicit flip" begin - t = CUDA.rand(ComplexF64, V1 ⊗ V1' ← V1' ⊗ V1) + t = cuRAND.rand(ComplexF64, V1 ⊗ V1' ← V1' ⊗ V1) F1 = unitary(flip(V1), V1) CUDA.@allowscalar begin @@ -388,8 +388,8 @@ for V in spacelist end end @timedtestset "Index flipping: test via contraction" begin - t1 = CUDA.rand(ComplexF64, V1 ⊗ V2 ⊗ V3 ← V4) - t2 = CUDA.rand(ComplexF64, V2' ⊗ V5 ← V4' ⊗ V1) + t1 = cuRAND.rand(ComplexF64, V1 ⊗ V2 ⊗ V3 ← V4) + t2 = cuRAND.rand(ComplexF64, V2' ⊗ V5 ← V4' ⊗ V1) CUDA.@allowscalar begin @tensor ta[a, b] := t1[x, y, a, z] * t2[y, b, z, x] @tensor tb[a, b] := flip(t1, 1)[x, y, a, z] * flip(t2, 4)[y, b, z, x] @@ -417,9 +417,9 @@ for V in spacelist W1 = V1 ⊗ V2 ⊗ V3 W2 = (V4 ⊗ V5)' for T in (Float64, ComplexF64) - t1 = CUDA.rand(T, W1, W1) - t2 = CUDA.rand(T, W2, W2) - t = CUDA.rand(T, W1, W2) + t1 = cuRAND.rand(T, W1, W1) + t2 = cuRAND.rand(T, W2, W2) + t = cuRAND.rand(T, W1, W2) @test t1 * (t1 \ t) ≈ t @test (t / t2) * t2 ≈ t @test t1 \ one(t1) ≈ inv(t1) @@ -435,9 +435,9 @@ for V in spacelist W1 = V1 ⊗ V2 ⊗ V3 W2 = (V4 ⊗ V5)' for T in (Float32, Float64, ComplexF32, ComplexF64) - t1 = CUDA.rand(T, W1, W1) - t2 = CUDA.rand(T, W2, W2) - t = CUDA.rand(T, W1, W2) + t1 = cuRAND.rand(T, W1, W1) + t2 = cuRAND.rand(T, W2, W2) + t = cuRAND.rand(T, W1, W2) ht1 = TensorKit.to_cpu(t1) ht2 = TensorKit.to_cpu(t2) ht = TensorKit.to_cpu(t) @@ -467,7 +467,7 @@ for V in spacelist symmetricbraiding && @timedtestset "Tensor functions" begin W = V1 ⊗ V2 for T in (Float64, ComplexF64) - t = project_hermitian!(CUDA.randn(T, W, W)) + t = project_hermitian!(cuRAND.randn(T, W, W)) s = dim(W) #@test (@constinferred sqrt(t))^2 ≈ t #@test TensorKit.to_cpu(sqrt(t)) ≈ sqrt(TensorKit.to_cpu(t)) @@ -510,11 +510,11 @@ for V in spacelist # Sylvester not defined for CUDA # @timedtestset "Sylvester equation" begin # for T in (Float32, ComplexF64) - # tA = CUDA.rand(T, V1 ⊗ V3, V1 ⊗ V3) - # tB = CUDA.rand(T, V2 ⊗ V4, V2 ⊗ V4) + # tA = cuRAND.rand(T, V1 ⊗ V3, V1 ⊗ V3) + # tB = cuRAND.rand(T, V2 ⊗ V4, V2 ⊗ V4) # tA = 3 // 2 * leftorth(tA; alg=Polar())[1] # tB = 1 // 5 * leftorth(tB; alg=Polar())[1] - # tC = CUDA.rand(T, V1 ⊗ V3, V2 ⊗ V4) + # tC = cuRAND.rand(T, V1 ⊗ V3, V2 ⊗ V4) # t = @constinferred sylvester(tA, tB, tC) # @test codomain(t) == V1 ⊗ V3 # @test domain(t) == V2 ⊗ V4 @@ -530,16 +530,16 @@ for V in spacelist # TODO @timedtestset "Tensor product: test via norm preservation" begin for T in (ComplexF64,) # Float32 case broken because of cuTENSOR - t1 = CUDA.rand(T, V1, V5') - t2 = CUDA.rand(T, V2 ⊗ V3, V4') + t1 = cuRAND.rand(T, V1, V5') + t2 = cuRAND.rand(T, V2 ⊗ V3, V4') t = @constinferred (t1 ⊗ t2) @test norm(t) ≈ norm(t1) * norm(t2) end end symmetricbraiding && @timedtestset "Tensor product: test via conversion" begin for T in (Float32, ComplexF64) - t1 = CUDA.rand(T, V1, V5') - t2 = CUDA.rand(T, V2 ⊗ V3, V4') + t1 = cuRAND.rand(T, V1, V5') + t2 = cuRAND.rand(T, V2 ⊗ V3, V4') d1 = dim(codomain(t1)) d2 = dim(codomain(t2)) d3 = dim(domain(t1)) @@ -551,8 +551,8 @@ for V in spacelist end symmetricbraiding && @timedtestset "Tensor product: test via tensor contraction" begin for T in (Float32, ComplexF64) - t1 = CUDA.rand(T, V1, V5') - t2 = CUDA.rand(T, V2 ⊗ V3, V4') + t1 = cuRAND.rand(T, V1, V5') + t2 = cuRAND.rand(T, V2 ⊗ V3, V4') t = @constinferred (t1 ⊗ t2) CUDA.@allowscalar begin @tensor t′[1 2 3; 4 5] := t1[1; 4] * t2[2 3; 5] From 2a34113cd38d7ae828952c923bd01c69ef11b950 Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Fri, 24 Apr 2026 08:15:47 -0400 Subject: [PATCH 5/8] Cleanup --- .buildkite/pipeline.yml | 2 -- Project.toml | 9 +-------- test/Project.toml | 2 -- 3 files changed, 1 insertion(+), 12 deletions(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 347502ebb..f02740b00 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -14,8 +14,6 @@ steps: agents: queue: "juliagpu" cuda: "*" - commands: | - unset LD_LIBRARY_PATH if: build.message !~ /\[skip tests\]/ timeout_in_minutes: 90 matrix: diff --git a/Project.toml b/Project.toml index 5230c5755..5555e7b96 100644 --- a/Project.toml +++ b/Project.toml @@ -48,7 +48,7 @@ FiniteDifferences = "0.12" LRUCache = "1.0.2" LinearAlgebra = "1" MatrixAlgebraKit = "0.6.5" -Mooncake = "0.5" +Mooncake = "0.5.27" OhMyThreads = "0.8.0" Printf = "1" Random = "1" @@ -60,10 +60,3 @@ TupleTools = "1.5" VectorInterface = "0.4.8, 0.5" cuTENSOR = "6" julia = "1.10" - -[extras] -Mooncake = "da2b9cff-9c12-43a0-ae48-6db2b0edb7d6" - -[sources] -MatrixAlgebraKit = {url = "https://github.com/QuantumKitHub/MatrixAlgebraKit.jl", rev = "ksh/cuda6"} -Mooncake = {url = "https://github.com/chalk-lab/Mooncake.jl", rev = "ksh/cuda6"} diff --git a/test/Project.toml b/test/Project.toml index 9190343e9..18af8af80 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -29,8 +29,6 @@ cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1" [sources] TensorKit = {path = ".."} -MatrixAlgebraKit = {url = "https://github.com/QuantumKitHub/MatrixAlgebraKit.jl", rev = "ksh/cuda6"} -Mooncake = {url = "https://github.com/chalk-lab/Mooncake.jl", rev = "ksh/cuda6"} [compat] Aqua = "0.6, 0.7, 0.8" From b5142201eafd6077cd02b791bdb43cd3030324e4 Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Fri, 24 Apr 2026 16:33:31 +0200 Subject: [PATCH 6/8] Update Project.toml --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 5555e7b96..07f19efca 100644 --- a/Project.toml +++ b/Project.toml @@ -47,7 +47,7 @@ Dictionaries = "0.4" FiniteDifferences = "0.12" LRUCache = "1.0.2" LinearAlgebra = "1" -MatrixAlgebraKit = "0.6.5" +MatrixAlgebraKit = "0.6.6" Mooncake = "0.5.27" OhMyThreads = "0.8.0" Printf = "1" From 62632fb39cc8c789078e88ebe4e1a0fb3a5a2a7e Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Sat, 25 Apr 2026 08:17:41 +0200 Subject: [PATCH 7/8] Fix import --- ext/TensorKitCUDAExt/TensorKitCUDAExt.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/TensorKitCUDAExt/TensorKitCUDAExt.jl b/ext/TensorKitCUDAExt/TensorKitCUDAExt.jl index 9303bb305..1a5c28f7c 100644 --- a/ext/TensorKitCUDAExt/TensorKitCUDAExt.jl +++ b/ext/TensorKitCUDAExt/TensorKitCUDAExt.jl @@ -5,7 +5,7 @@ using CUDA: @allowscalar using cuTENSOR: cuTENSOR import CUDA.cuRAND: rand as curand, rand! as curand!, randn as curandn, randn! as curandn! using Strided: StridedViews -using CUDA.KernelAbstractions: @kernel, @index, get_backend +using CUDA.CUDACore.KernelAbstractions: @kernel, @index, get_backend using TensorKit using TensorKit.Factorizations From 566adbd69fd1cc9afcaceb464404e32724ec6fb8 Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Sat, 25 Apr 2026 08:42:07 +0200 Subject: [PATCH 8/8] Another module path --- ext/TensorKitCUDAExt/cutensormap.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/TensorKitCUDAExt/cutensormap.jl b/ext/TensorKitCUDAExt/cutensormap.jl index 8894164a9..2fefb3a24 100644 --- a/ext/TensorKitCUDAExt/cutensormap.jl +++ b/ext/TensorKitCUDAExt/cutensormap.jl @@ -170,5 +170,5 @@ for f in (:sqrt, :log, :asin, :acos, :acosh, :atanh, :acoth) end function TensorKit._add_transform_multi!(tdst::CuTensorMap, tsrc, p, (U, structs_dst, structs_src)::Tuple{<:Array, TD, TS}, buffers, alpha, beta, backend...) where {TD, TS} - return TensorKit._add_transform_multi!(tdst, tsrc, p, (CUDA.Adapt.adapt(CuArray, U), structs_dst, structs_src), buffers, alpha, beta, backend...) + return TensorKit._add_transform_multi!(tdst, tsrc, p, (CUDA.CUDACore.Adapt.adapt(CuArray, U), structs_dst, structs_src), buffers, alpha, beta, backend...) end