diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 347502ebb..f02740b00 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -14,8 +14,6 @@ steps: agents: queue: "juliagpu" cuda: "*" - commands: | - unset LD_LIBRARY_PATH if: build.message !~ /\[skip tests\]/ timeout_in_minutes: 90 matrix: diff --git a/Project.toml b/Project.toml index 0308e3933..07f19efca 100644 --- a/Project.toml +++ b/Project.toml @@ -41,14 +41,14 @@ projects = ["test", "docs"] [compat] Adapt = "4" AMDGPU = "2" -CUDA = "5.9" +CUDA = "6" ChainRulesCore = "1" Dictionaries = "0.4" FiniteDifferences = "0.12" LRUCache = "1.0.2" LinearAlgebra = "1" -MatrixAlgebraKit = "0.6.5" -Mooncake = "0.5" +MatrixAlgebraKit = "0.6.6" +Mooncake = "0.5.27" OhMyThreads = "0.8.0" Printf = "1" Random = "1" @@ -58,5 +58,5 @@ TensorKitSectors = "0.3.7" TensorOperations = "5.1" TupleTools = "1.5" VectorInterface = "0.4.8, 0.5" -cuTENSOR = "2" +cuTENSOR = "6" julia = "1.10" diff --git a/ext/TensorKitCUDAExt/TensorKitCUDAExt.jl b/ext/TensorKitCUDAExt/TensorKitCUDAExt.jl index 530c8cc85..1a5c28f7c 100644 --- a/ext/TensorKitCUDAExt/TensorKitCUDAExt.jl +++ b/ext/TensorKitCUDAExt/TensorKitCUDAExt.jl @@ -1,11 +1,11 @@ module TensorKitCUDAExt -using CUDA, CUDA.CUBLAS, CUDA.CUSOLVER, LinearAlgebra +using CUDA, CUDA.cuBLAS, CUDA.cuSOLVER, CUDA.cuRAND, LinearAlgebra using CUDA: @allowscalar using cuTENSOR: cuTENSOR +import CUDA.cuRAND: rand as curand, rand! as curand!, randn as curandn, randn! as curandn! using Strided: StridedViews -import CUDA: rand as curand, rand! as curand!, randn as curandn, randn! as curandn! -using CUDA.KernelAbstractions: @kernel, @index, get_backend +using CUDA.CUDACore.KernelAbstractions: @kernel, @index, get_backend using TensorKit using TensorKit.Factorizations diff --git a/ext/TensorKitCUDAExt/cutensormap.jl b/ext/TensorKitCUDAExt/cutensormap.jl index 8894164a9..2fefb3a24 100644 --- a/ext/TensorKitCUDAExt/cutensormap.jl +++ b/ext/TensorKitCUDAExt/cutensormap.jl @@ -170,5 +170,5 @@ for f in (:sqrt, :log, :asin, :acos, :acosh, :atanh, :acoth) end function TensorKit._add_transform_multi!(tdst::CuTensorMap, tsrc, p, (U, structs_dst, structs_src)::Tuple{<:Array, TD, TS}, buffers, alpha, beta, backend...) where {TD, TS} - return TensorKit._add_transform_multi!(tdst, tsrc, p, (CUDA.Adapt.adapt(CuArray, U), structs_dst, structs_src), buffers, alpha, beta, backend...) + return TensorKit._add_transform_multi!(tdst, tsrc, p, (CUDA.CUDACore.Adapt.adapt(CuArray, U), structs_dst, structs_src), buffers, alpha, beta, backend...) end diff --git a/test/cuda/factorizations.jl b/test/cuda/factorizations.jl index 63848767f..fdeca843d 100644 --- a/test/cuda/factorizations.jl +++ b/test/cuda/factorizations.jl @@ -1,4 +1,4 @@ -using Adapt, CUDA, cuTENSOR +using Adapt, CUDA, CUDA.cuRAND, cuTENSOR using Test, TestExtras using TensorKit using LinearAlgebra: LinearAlgebra @@ -25,10 +25,10 @@ for V in spacelist @testset "QR decomposition" begin for T in eltypes, t in ( - CUDA.rand(T, W, W), CUDA.rand(T, W, W)', - CUDA.rand(T, (V1 ⊗ V2 ⊗ V3), (V4 ⊗ V5)'), CUDA.rand(T, (V1 ⊗ V2 ⊗ V3), (V4 ⊗ V5)')', - CUDA.rand(T, (V1 ⊗ V2)', (V3 ⊗ V4 ⊗ V5)), CUDA.rand(T, (V1 ⊗ V2)', (V3 ⊗ V4 ⊗ V5))', - DiagonalTensorMap(CUDA.rand(T, reduceddim(V1)), V1), + cuRAND.rand(T, W, W), cuRAND.rand(T, W, W)', + cuRAND.rand(T, (V1 ⊗ V2 ⊗ V3), (V4 ⊗ V5)'), cuRAND.rand(T, (V1 ⊗ V2 ⊗ V3), (V4 ⊗ V5)')', + cuRAND.rand(T, (V1 ⊗ V2)', (V3 ⊗ V4 ⊗ V5)), cuRAND.rand(T, (V1 ⊗ V2)', (V3 ⊗ V4 ⊗ V5))', + DiagonalTensorMap(cuRAND.rand(T, reduceddim(V1)), V1), ) Q, R = @constinferred qr_full(t) @@ -54,7 +54,7 @@ for V in spacelist # empty tensor for T in eltypes - t = CUDA.rand(T, V1 ⊗ V2, zerospace(V1)) + t = cuRAND.rand(T, V1 ⊗ V2, zerospace(V1)) Q, R = @constinferred qr_full(t) @test Q * R ≈ t @@ -80,10 +80,10 @@ for V in spacelist @testset "LQ decomposition" begin for T in eltypes, t in ( - CUDA.rand(T, W, W), CUDA.rand(T, W, W)', - CUDA.rand(T, (V1 ⊗ V2), (V3 ⊗ V4 ⊗ V5)'), CUDA.rand(T, (V1 ⊗ V2), (V3 ⊗ V4 ⊗ V5)')', - CUDA.rand(T, (V1 ⊗ V2 ⊗ V3)', (V4 ⊗ V5)), CUDA.rand(T, (V1 ⊗ V2 ⊗ V3)', (V4 ⊗ V5))', - DiagonalTensorMap(CUDA.rand(T, reduceddim(V1)), V1), + cuRAND.rand(T, W, W), cuRAND.rand(T, W, W)', + cuRAND.rand(T, (V1 ⊗ V2), (V3 ⊗ V4 ⊗ V5)'), cuRAND.rand(T, (V1 ⊗ V2), (V3 ⊗ V4 ⊗ V5)')', + cuRAND.rand(T, (V1 ⊗ V2 ⊗ V3)', (V4 ⊗ V5)), cuRAND.rand(T, (V1 ⊗ V2 ⊗ V3)', (V4 ⊗ V5))', + DiagonalTensorMap(cuRAND.rand(T, reduceddim(V1)), V1), ) L, Q = @constinferred lq_full(t) @@ -105,7 +105,7 @@ for V in spacelist for T in eltypes # empty tensor - t = CUDA.rand(T, zerospace(V1), V1 ⊗ V2) + t = cuRAND.rand(T, zerospace(V1), V1 ⊗ V2) L, Q = @constinferred lq_full(t) @test L * Q ≈ t @@ -131,10 +131,10 @@ for V in spacelist @testset "Polar decomposition" begin @testset for T in eltypes, t in ( - CUDA.rand(T, W, W), - CUDA.rand(T, (V1 ⊗ V2 ⊗ V3), (V4 ⊗ V5)'), - CUDA.rand(T, (V1 ⊗ V2)', (V3 ⊗ V4 ⊗ V5))', - DiagonalTensorMap(CUDA.rand(T, reduceddim(V1)), V1), + cuRAND.rand(T, W, W), + cuRAND.rand(T, (V1 ⊗ V2 ⊗ V3), (V4 ⊗ V5)'), + cuRAND.rand(T, (V1 ⊗ V2)', (V3 ⊗ V4 ⊗ V5))', + DiagonalTensorMap(cuRAND.rand(T, reduceddim(V1)), V1), ) @assert domain(t) ≾ codomain(t) @@ -150,10 +150,10 @@ for V in spacelist @testset for T in eltypes, t in ( - CUDA.rand(T, W, W), - CUDA.rand(T, (V1 ⊗ V2), (V3 ⊗ V4 ⊗ V5)'), - CUDA.rand(T, (V1 ⊗ V2 ⊗ V3)', (V4 ⊗ V5))', - DiagonalTensorMap(CUDA.rand(T, reduceddim(V1)), V1), + cuRAND.rand(T, W, W), + cuRAND.rand(T, (V1 ⊗ V2), (V3 ⊗ V4 ⊗ V5)'), + cuRAND.rand(T, (V1 ⊗ V2 ⊗ V3)', (V4 ⊗ V5))', + DiagonalTensorMap(cuRAND.rand(T, reduceddim(V1)), V1), ) @assert codomain(t) ≾ domain(t) @@ -171,10 +171,10 @@ for V in spacelist @testset "SVD" begin for T in eltypes, t in ( - CUDA.rand(T, W, W), CUDA.rand(T, W, W)', - CUDA.rand(T, (V1 ⊗ V2 ⊗ V3), (V4 ⊗ V5)'), CUDA.rand(T, (V1 ⊗ V2)', (V3 ⊗ V4 ⊗ V5))', - CUDA.rand(T, (V1 ⊗ V2), (V3 ⊗ V4 ⊗ V5)'), CUDA.rand(T, (V1 ⊗ V2 ⊗ V3)', (V4 ⊗ V5))', - DiagonalTensorMap(CUDA.rand(T, reduceddim(V1)), V1), + cuRAND.rand(T, W, W), cuRAND.rand(T, W, W)', + cuRAND.rand(T, (V1 ⊗ V2 ⊗ V3), (V4 ⊗ V5)'), cuRAND.rand(T, (V1 ⊗ V2)', (V3 ⊗ V4 ⊗ V5))', + cuRAND.rand(T, (V1 ⊗ V2), (V3 ⊗ V4 ⊗ V5)'), cuRAND.rand(T, (V1 ⊗ V2 ⊗ V3)', (V4 ⊗ V5))', + DiagonalTensorMap(cuRAND.rand(T, reduceddim(V1)), V1), ) u, s, vᴴ = @constinferred svd_full(t) @@ -222,8 +222,8 @@ for V in spacelist # empty tensor for T in eltypes, t in ( - CUDA.rand(T, W, zerospace(V1)), - CUDA.rand(T, zerospace(V1), W), + cuRAND.rand(T, W, zerospace(V1)), + cuRAND.rand(T, zerospace(V1), W), ) U, S, Vᴴ = @constinferred svd_full(t) @test U * S * Vᴴ ≈ t @@ -239,10 +239,10 @@ for V in spacelist @testset "truncated SVD" begin for T in eltypes, t in ( - CUDA.randn(T, W, W), CUDA.randn(T, W, W)', - CUDA.randn(T, (V1 ⊗ V2 ⊗ V3), (V4 ⊗ V5)'), CUDA.randn(T, (V1 ⊗ V2)', (V3 ⊗ V4 ⊗ V5))', - CUDA.randn(T, (V1 ⊗ V2), (V3 ⊗ V4 ⊗ V5)'), CUDA.randn(T, (V1 ⊗ V2 ⊗ V3)', (V4 ⊗ V5))', - DiagonalTensorMap(CUDA.randn(T, reduceddim(V1)), V1), + cuRAND.randn(T, W, W), cuRAND.randn(T, W, W)', + cuRAND.randn(T, (V1 ⊗ V2 ⊗ V3), (V4 ⊗ V5)'), cuRAND.randn(T, (V1 ⊗ V2)', (V3 ⊗ V4 ⊗ V5))', + cuRAND.randn(T, (V1 ⊗ V2), (V3 ⊗ V4 ⊗ V5)'), cuRAND.randn(T, (V1 ⊗ V2 ⊗ V3)', (V4 ⊗ V5))', + DiagonalTensorMap(cuRAND.randn(T, reduceddim(V1)), V1), ) @constinferred normalize!(t) @@ -307,10 +307,10 @@ for V in spacelist @testset "Eigenvalue decomposition" begin for T in eltypes, t in ( - CUDA.rand(T, V1, V1), - CUDA.rand(T, W, W), - CUDA.rand(T, W, W)', - # DiagonalTensorMap(CUDA.rand(T, reduceddim(V1)), V1), + cuRAND.rand(T, V1, V1), + cuRAND.rand(T, W, W), + cuRAND.rand(T, W, W)', + # DiagonalTensorMap(cuRAND.rand(T, reduceddim(V1)), V1), ) d, v = @constinferred eig_full(t) @@ -367,15 +367,15 @@ for V in spacelist @testset "Condition number and rank" begin for T in eltypes, t in ( - CUDA.rand(T, W, W), CUDA.rand(T, W, W)', - CUDA.rand(T, (V1 ⊗ V2 ⊗ V3), (V4 ⊗ V5)'), CUDA.rand(T, (V1 ⊗ V2)', (V3 ⊗ V4 ⊗ V5))', - CUDA.rand(T, (V1 ⊗ V2), (V3 ⊗ V4 ⊗ V5)'), CUDA.rand(T, (V1 ⊗ V2 ⊗ V3)', (V4 ⊗ V5))', - DiagonalTensorMap(CUDA.rand(T, reduceddim(V1)), V1), + cuRAND.rand(T, W, W), cuRAND.rand(T, W, W)', + cuRAND.rand(T, (V1 ⊗ V2 ⊗ V3), (V4 ⊗ V5)'), cuRAND.rand(T, (V1 ⊗ V2)', (V3 ⊗ V4 ⊗ V5))', + cuRAND.rand(T, (V1 ⊗ V2), (V3 ⊗ V4 ⊗ V5)'), cuRAND.rand(T, (V1 ⊗ V2 ⊗ V3)', (V4 ⊗ V5))', + DiagonalTensorMap(cuRAND.rand(T, reduceddim(V1)), V1), ) d1, d2 = dim(codomain(t)), dim(domain(t)) r = rank(t) - @test r == min(d1, d2) + @test r ≈ min(d1, d2) @test typeof(r) == typeof(d1) M = left_null(t) @test @constinferred(rank(M)) + r ≈ d1 @@ -387,15 +387,15 @@ for V in spacelist @test @constinferred(cond(u)) ≈ one(real(T)) @test @constinferred(rank(u)) == dim(V1 ⊗ V2) - t = CUDA.rand(T, zerospace(V1), W) + t = cuRAND.rand(T, zerospace(V1), W) @test rank(t) == 0 - t2 = CUDA.rand(T, zerospace(V1) * zerospace(V2), zerospace(V1) * zerospace(V2)) + t2 = cuRAND.rand(T, zerospace(V1) * zerospace(V2), zerospace(V1) * zerospace(V2)) @test rank(t2) == 0 @test cond(t2) == 0.0 end for T in eltypes, t in ( - CUDA.rand(T, W, W), - CUDA.rand(T, W, W)', + cuRAND.rand(T, W, W), + cuRAND.rand(T, W, W)', ) project_hermitian!(t) vals = @constinferred LinearAlgebra.eigvals(t) @@ -408,10 +408,10 @@ for V in spacelist @testset "Hermitian projections" begin for T in eltypes, t in ( - CUDA.rand(T, V1, V1), - CUDA.rand(T, W, W), - CUDA.rand(T, W, W)', - DiagonalTensorMap(CUDA.rand(T, reduceddim(V1)), V1), + cuRAND.rand(T, V1, V1), + cuRAND.rand(T, W, W), + cuRAND.rand(T, W, W)', + DiagonalTensorMap(cuRAND.rand(T, reduceddim(V1)), V1), ) normalize!(t) noisefactor = eps(real(T))^(3 / 4) @@ -441,10 +441,10 @@ for V in spacelist @testset "Isometric projections" begin for T in eltypes, t in ( - CUDA.randn(T, W, W), - CUDA.randn(T, W, W)', - CUDA.randn(T, (V1 ⊗ V2 ⊗ V3), (V4 ⊗ V5)'), - CUDA.randn(T, (V1 ⊗ V2)', (V3 ⊗ V4 ⊗ V5))', + cuRAND.randn(T, W, W), + cuRAND.randn(T, W, W)', + cuRAND.randn(T, (V1 ⊗ V2 ⊗ V3), (V4 ⊗ V5)'), + cuRAND.randn(T, (V1 ⊗ V2)', (V3 ⊗ V4 ⊗ V5))', ) t2 = project_isometric(t) @test isisometric(t2) @@ -459,7 +459,7 @@ for V in spacelist # test that t2 is closer to A then any other isometry for k in 1:10 - δt = CUDA.randn!(similar(t)) + δt = cuRAND.randn!(similar(t)) t3 = project_isometric(t + δt / 100) @test norm(t - t3) > norm(t - t2) end diff --git a/test/cuda/tensors.jl b/test/cuda/tensors.jl index 8314d8466..738440bef 100644 --- a/test/cuda/tensors.jl +++ b/test/cuda/tensors.jl @@ -1,4 +1,4 @@ -using Adapt, CUDA, cuTENSOR +using Adapt, CUDA, CUDA.cuRAND, cuTENSOR using Test, TestExtras using TensorKit, Combinatorics ad = adapt(Array) @@ -20,7 +20,7 @@ for V in spacelist @timedtestset "Basic tensor properties" begin W = V1 ⊗ V2 ⊗ V3 ⊗ V4 ⊗ V5 # test default pass-throughs - for f in (CUDA.zeros, CUDA.ones, CUDA.rand, CUDA.randn) + for f in (CUDA.zeros, CUDA.ones, cuRAND.rand, cuRAND.randn) t = @constinferred f(W) @test scalartype(t) == Float64 @test codomain(t) == W @@ -44,7 +44,7 @@ for V in spacelist @test domain(t) == one(W) @test typeof(t) == TensorMap{Float64, spacetype(t), 5, 0, CuVector{Float64, CUDA.DeviceMemory}} end - for f! in (CUDA.rand!, CUDA.randn!) + for f! in (cuRAND.rand!, cuRAND.randn!) t = @constinferred CUDA.zeros(W) f!(t) @test scalartype(t) == Float64 @@ -113,7 +113,7 @@ for V in spacelist @timedtestset "Tensor Dict conversion" begin W = V1 ⊗ V2 ← (V3 ⊗ V4 ⊗ V5)' for T in (Int, Float32, ComplexF64) - t = @constinferred CUDA.rand(T, W) + t = @constinferred cuRAND.rand(T, W) d = convert(Dict, t) @test convert(Dict, TensorKit.to_cpu(t)) == d end @@ -121,7 +121,7 @@ for V in spacelist symmetricbraiding && @timedtestset "Basic linear algebra" begin W = V1 ⊗ V2 ← (V3 ⊗ V4 ⊗ V5)' for T in (Float32, ComplexF64) - t = @constinferred CUDA.rand(T, W) + t = @constinferred cuRAND.rand(T, W) @test scalartype(t) == T @test space(t) == W @test space(t') == W' @@ -171,7 +171,7 @@ for V in spacelist @timedtestset "Trivial space insertion and removal" begin W = V1 ⊗ V2 ← (V3 ⊗ V4 ⊗ V5)' for T in (Float32, ComplexF64) - t = @constinferred CUDA.rand(T, W) + t = @constinferred cuRAND.rand(T, W) t2 = @constinferred insertleftunit(t) @test t2 == @constinferred insertrightunit(t) @test numind(t2) == numind(t) + 1 @@ -204,8 +204,8 @@ for V in spacelist @timedtestset "Basic linear algebra: test via CPU" begin W = V1 ⊗ V2 ⊗ V3 ← (V4 ⊗ V5)' for T in (Float32, ComplexF64) - t = CUDA.rand(T, W) - t2 = @constinferred CUDA.rand!(similar(t)) + t = cuRAND.rand(T, W) + t2 = @constinferred cuRAND.rand!(similar(t)) α = rand(T) @test norm(t, 2) ≈ norm(TensorKit.to_cpu(t), 2) @test dot(t2, t) ≈ dot(TensorKit.to_cpu(t2), TensorKit.to_cpu(t)) @@ -216,7 +216,7 @@ for V in spacelist @timedtestset "Real and imaginary parts" begin W = V1 ⊗ V2 for T in (Float64, ComplexF64, ComplexF32) - t = @constinferred CUDA.randn(T, W, W) + t = @constinferred cuRAND.randn(T, W, W) tr = @constinferred real(t) @test scalartype(tr) <: Real @@ -241,7 +241,7 @@ for V in spacelist end @timedtestset "Tensor conversion" begin W = V1 ⊗ V2 - t = @constinferred CUDA.randn(W ← W) + t = @constinferred cuRAND.randn(W ← W) @test typeof(convert(typeof(t), t')) == typeof(t) @test typeof(TensorKit.to_cpu(t')) == typeof(TensorKit.to_cpu(t)') tc = complex(t) @@ -253,7 +253,7 @@ for V in spacelist end #=@timedtestset "diag/diagm" begin W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5 - t = CUDA.randn(ComplexF64, W) + t = cuRAND.randn(ComplexF64, W) d = LinearAlgebra.diag(t) # TODO find a way to use CUDA here D = LinearAlgebra.diagm(codomain(t), domain(t), d) @@ -262,8 +262,8 @@ for V in spacelist end=# symmetricbraiding && @timedtestset "Permutations: test via inner product invariance" begin W = V1 ⊗ V2 ⊗ V3 ⊗ V4 ⊗ V5 - t = CUDA.rand(ComplexF64, W) - t′ = CUDA.randn!(similar(t)) + t = cuRAND.rand(ComplexF64, W) + t′ = cuRAND.randn!(similar(t)) for k in 0:5 for p in permutations(1:5) p1 = ntuple(n -> p[n], k) @@ -284,7 +284,7 @@ for V in spacelist end symmetricbraiding && @timedtestset "Permutations: test via CPU" begin W = V1 ⊗ V2 ⊗ V3 ⊗ V4 ⊗ V5 - t = CUDA.rand(ComplexF64, W) + t = cuRAND.rand(ComplexF64, W) for k in 0:5 for p in permutations(1:5) p1 = ntuple(n -> p[n], k) @@ -300,7 +300,7 @@ for V in spacelist end end symmetricbraiding && @timedtestset "Full trace: test self-consistency" begin - t = CUDA.rand(ComplexF64, V1 ⊗ V2' ⊗ V2 ⊗ V1') + t = cuRAND.rand(ComplexF64, V1 ⊗ V2' ⊗ V2 ⊗ V1') CUDA.@allowscalar begin t2 = permute(t, ((1, 2), (4, 3))) s = @constinferred tr(t2) @@ -320,14 +320,14 @@ for V in spacelist @test ss ≈ s3 end symmetricbraiding && @timedtestset "Partial trace: test self-consistency" begin - t = CUDA.rand(ComplexF64, V1 ⊗ V2' ⊗ V3 ⊗ V2 ⊗ V1' ⊗ V3') + t = cuRAND.rand(ComplexF64, V1 ⊗ V2' ⊗ V3 ⊗ V2 ⊗ V1' ⊗ V3') @tensor t2[a, b] := t[c, d, b, d, c, a] @tensor t4[a, b, c, d] := t[d, e, b, e, c, a] @tensor t5[a, b] := t4[a, b, c, c] @test t2 ≈ t5 end symmetricbraiding && @timedtestset "Trace: test via conversion" begin - t = CUDA.rand(ComplexF64, V1 ⊗ V2' ⊗ V3 ⊗ V2 ⊗ V1' ⊗ V3') + t = cuRAND.rand(ComplexF64, V1 ⊗ V2' ⊗ V3 ⊗ V2 ⊗ V1' ⊗ V3') CUDA.@allowscalar begin @tensor t2[a, b] := t[c, d, b, d, c, a] @tensor t3[a, b] := ad(t)[c, d, b, d, c, a] @@ -335,8 +335,8 @@ for V in spacelist @test t3 ≈ ad(t2) end symmetricbraiding && @timedtestset "Trace and contraction" begin - t1 = CUDA.rand(ComplexF64, V1 ⊗ V2 ⊗ V3) - t2 = CUDA.rand(ComplexF64, V2' ⊗ V4 ⊗ V1') + t1 = cuRAND.rand(ComplexF64, V1 ⊗ V2 ⊗ V3) + t2 = cuRAND.rand(ComplexF64, V2' ⊗ V4 ⊗ V1') CUDA.@allowscalar begin t3 = t1 ⊗ t2 @tensor ta[a, b] := t1[x, y, a] * t2[y, b, x] @@ -346,11 +346,11 @@ for V in spacelist end if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) @timedtestset "Tensor contraction: test via CPU" begin - dA1 = CUDA.randn(ComplexF64, V1' * V2', V3') - dA2 = CUDA.randn(ComplexF64, V3 * V4, V5) - drhoL = CUDA.randn(ComplexF64, V1, V1) - drhoR = CUDA.randn(ComplexF64, V5, V5)' # test adjoint tensor - dH = CUDA.randn(ComplexF64, V2 * V4, V2 * V4) + dA1 = cuRAND.randn(ComplexF64, V1' * V2', V3') + dA2 = cuRAND.randn(ComplexF64, V3 * V4, V5) + drhoL = cuRAND.randn(ComplexF64, V1, V1) + drhoR = cuRAND.randn(ComplexF64, V5, V5)' # test adjoint tensor + dH = cuRAND.randn(ComplexF64, V2 * V4, V2 * V4) @tensor dHrA12[a, s1, s2, c] := drhoL[a, a'] * conj(dA1[a', t1, b]) * dA2[b, t2, c'] * drhoR[c', c] * dH[s1, s2, t1, t2] @@ -361,7 +361,7 @@ for V in spacelist end end BraidingStyle(I) isa HasBraiding && @timedtestset "Index flipping: test flipping inverse" begin - t = CUDA.rand(ComplexF64, V1 ⊗ V2 ⊗ V3 ← (V4 ⊗ V5)') + t = cuRAND.rand(ComplexF64, V1 ⊗ V2 ⊗ V3 ← (V4 ⊗ V5)') for i in 1:5 CUDA.@allowscalar begin @test t ≈ flip(flip(t, i), i; inv = true) @@ -370,7 +370,7 @@ for V in spacelist end end #=@timedtestset "Index flipping: test via explicit flip" begin - t = CUDA.rand(ComplexF64, V1 ⊗ V1' ← V1' ⊗ V1) + t = cuRAND.rand(ComplexF64, V1 ⊗ V1' ← V1' ⊗ V1) F1 = unitary(flip(V1), V1) CUDA.@allowscalar begin @@ -385,8 +385,8 @@ for V in spacelist end end @timedtestset "Index flipping: test via contraction" begin - t1 = CUDA.rand(ComplexF64, V1 ⊗ V2 ⊗ V3 ← V4) - t2 = CUDA.rand(ComplexF64, V2' ⊗ V5 ← V4' ⊗ V1) + t1 = cuRAND.rand(ComplexF64, V1 ⊗ V2 ⊗ V3 ← V4) + t2 = cuRAND.rand(ComplexF64, V2' ⊗ V5 ← V4' ⊗ V1) CUDA.@allowscalar begin @tensor ta[a, b] := t1[x, y, a, z] * t2[y, b, z, x] @tensor tb[a, b] := flip(t1, 1)[x, y, a, z] * flip(t2, 4)[y, b, z, x] @@ -414,9 +414,9 @@ for V in spacelist W1 = V1 ⊗ V2 ⊗ V3 W2 = (V4 ⊗ V5)' for T in (Float64, ComplexF64) - t1 = CUDA.rand(T, W1, W1) - t2 = CUDA.rand(T, W2, W2) - t = CUDA.rand(T, W1, W2) + t1 = cuRAND.rand(T, W1, W1) + t2 = cuRAND.rand(T, W2, W2) + t = cuRAND.rand(T, W1, W2) @test t1 * (t1 \ t) ≈ t @test (t / t2) * t2 ≈ t @test t1 \ one(t1) ≈ inv(t1) @@ -432,9 +432,9 @@ for V in spacelist W1 = V1 ⊗ V2 ⊗ V3 W2 = (V4 ⊗ V5)' for T in (Float32, Float64, ComplexF32, ComplexF64) - t1 = CUDA.rand(T, W1, W1) - t2 = CUDA.rand(T, W2, W2) - t = CUDA.rand(T, W1, W2) + t1 = cuRAND.rand(T, W1, W1) + t2 = cuRAND.rand(T, W2, W2) + t = cuRAND.rand(T, W1, W2) ht1 = TensorKit.to_cpu(t1) ht2 = TensorKit.to_cpu(t2) ht = TensorKit.to_cpu(t) @@ -464,7 +464,7 @@ for V in spacelist symmetricbraiding && @timedtestset "Tensor functions" begin W = V1 ⊗ V2 for T in (Float64, ComplexF64) - t = project_hermitian!(CUDA.randn(T, W, W)) + t = project_hermitian!(cuRAND.randn(T, W, W)) s = dim(W) #@test (@constinferred sqrt(t))^2 ≈ t #@test TensorKit.to_cpu(sqrt(t)) ≈ sqrt(TensorKit.to_cpu(t)) @@ -507,11 +507,11 @@ for V in spacelist # Sylvester not defined for CUDA # @timedtestset "Sylvester equation" begin # for T in (Float32, ComplexF64) - # tA = CUDA.rand(T, V1 ⊗ V3, V1 ⊗ V3) - # tB = CUDA.rand(T, V2 ⊗ V4, V2 ⊗ V4) + # tA = cuRAND.rand(T, V1 ⊗ V3, V1 ⊗ V3) + # tB = cuRAND.rand(T, V2 ⊗ V4, V2 ⊗ V4) # tA = 3 // 2 * leftorth(tA; alg=Polar())[1] # tB = 1 // 5 * leftorth(tB; alg=Polar())[1] - # tC = CUDA.rand(T, V1 ⊗ V3, V2 ⊗ V4) + # tC = cuRAND.rand(T, V1 ⊗ V3, V2 ⊗ V4) # t = @constinferred sylvester(tA, tB, tC) # @test codomain(t) == V1 ⊗ V3 # @test domain(t) == V2 ⊗ V4 @@ -527,16 +527,16 @@ for V in spacelist # TODO @timedtestset "Tensor product: test via norm preservation" begin for T in (ComplexF64,) # Float32 case broken because of cuTENSOR - t1 = CUDA.rand(T, V1, V5') - t2 = CUDA.rand(T, V2 ⊗ V3, V4') + t1 = cuRAND.rand(T, V1, V5') + t2 = cuRAND.rand(T, V2 ⊗ V3, V4') t = @constinferred (t1 ⊗ t2) @test norm(t) ≈ norm(t1) * norm(t2) end end symmetricbraiding && @timedtestset "Tensor product: test via conversion" begin for T in (Float32, ComplexF64) - t1 = CUDA.rand(T, V1, V5') - t2 = CUDA.rand(T, V2 ⊗ V3, V4') + t1 = cuRAND.rand(T, V1, V5') + t2 = cuRAND.rand(T, V2 ⊗ V3, V4') d1 = dim(codomain(t1)) d2 = dim(codomain(t2)) d3 = dim(domain(t1)) @@ -548,8 +548,8 @@ for V in spacelist end symmetricbraiding && @timedtestset "Tensor product: test via tensor contraction" begin for T in (Float32, ComplexF64) - t1 = CUDA.rand(T, V1, V5') - t2 = CUDA.rand(T, V2 ⊗ V3, V4') + t1 = cuRAND.rand(T, V1, V5') + t2 = cuRAND.rand(T, V2 ⊗ V3, V4') t = @constinferred (t1 ⊗ t2) CUDA.@allowscalar begin @tensor t′[1 2 3; 4 5] := t1[1; 4] * t2[2 3; 5]