From 18a692ace47726de423b50a0d0f7e241e0402059 Mon Sep 17 00:00:00 2001 From: ChrisRackauckas-Claude Date: Sat, 13 Jun 2026 04:23:17 -0400 Subject: [PATCH] Add GPUArraysCore extension for broadcast-based seeding on GPU arrays The four `seed!` methods in src/apiutils.jl write each dual with a scalar `setindex!` loop over `structural_eachindex`, which errors with "Scalar indexing is disallowed" on GPU arrays. ForwardDiff 0.10 seeded with broadcast and worked on GPU arrays, so jacobians on GPU arrays regressed in the 1.0 rewrite. Add a GPUArraysCore package extension that overrides `seed!` for `AbstractGPUArray{<:Dual}` with broadcast-based seeding, restoring the pre-1.0 behavior. GPU arrays are dense, one-based, and isbits-valued, so the structural-index / unset-element handling of the generic methods is not needed on this path. Tested via JLArrays (which emulates the GPU scalar-indexing ban on the CPU) so the extension is covered in CI without a physical GPU. Co-Authored-By: Chris Rackauckas --- Project.toml | 10 +++++-- ext/ForwardDiffGPUArraysCoreExt.jl | 45 ++++++++++++++++++++++++++++++ test/GPUArraysCoreTest.jl | 34 ++++++++++++++++++++++ test/runtests.jl | 5 ++++ 4 files changed, 92 insertions(+), 2 deletions(-) create mode 100644 ext/ForwardDiffGPUArraysCoreExt.jl create mode 100644 test/GPUArraysCoreTest.jl diff --git a/Project.toml b/Project.toml index 77f40590..2c8cfac7 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "ForwardDiff" uuid = "f6369f11-7733-5829-9624-2563aa707210" -version = "1.4.1" +version = "1.5.0" [deps] CommonSubexpressions = "bbf7d656-a473-5ed7-a52c-81e309532950" @@ -15,9 +15,11 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b" [weakdeps] +GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527" StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" [extensions] +ForwardDiffGPUArraysCoreExt = "GPUArraysCore" ForwardDiffStaticArraysExt = "StaticArrays" [compat] @@ -26,8 +28,10 @@ CommonSubexpressions = "0.3" DiffResults = "1.1" DiffRules = "1.4" DiffTests = "0.1" +GPUArraysCore = "0.1, 0.2" IrrationalConstants = "0.1, 0.2" JET = "0.9, 0.10, 0.11" +JLArrays = "0.1, 0.2" LogExpFunctions = "0.3, 1" NaNMath = "1" Preferences = "1" @@ -38,12 +42,14 @@ julia = "1.10" [extras] Calculus = "49dc2e85-a5d0-5ad3-a950-438e2897f1b9" DiffTests = "de460e47-3fe3-5279-bb4a-814414816d5d" +GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527" InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" IrrationalConstants = "92d709cd-6900-40b7-9082-c6be49f344b6" JET = "c3a54625-cd67-489e-a8e7-0a5a0ff4e31b" +JLArrays = "27aeb0d3-9eb9-45fb-866b-73c2ecf80fcb" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["Calculus", "DiffTests", "IrrationalConstants", "JET", "SparseArrays", "StaticArrays", "Test", "InteractiveUtils"] +test = ["Calculus", "DiffTests", "GPUArraysCore", "IrrationalConstants", "JET", "JLArrays", "SparseArrays", "StaticArrays", "Test", "InteractiveUtils"] diff --git a/ext/ForwardDiffGPUArraysCoreExt.jl b/ext/ForwardDiffGPUArraysCoreExt.jl new file mode 100644 index 00000000..316ada52 --- /dev/null +++ b/ext/ForwardDiffGPUArraysCoreExt.jl @@ -0,0 +1,45 @@ +module ForwardDiffGPUArraysCoreExt + +using ForwardDiff: ForwardDiff, Dual, Partials +using GPUArraysCore: AbstractGPUArray + +# ForwardDiff's default `seed!` methods (src/apiutils.jl) write each dual with a +# scalar `setindex!` loop over `structural_eachindex`. On GPU arrays that +# triggers a scalar-indexing error. GPU arrays are always dense, one-based, and +# carry isbits element types, so the structural-index / unset-element handling of +# the generic methods is unnecessary here and broadcast restores the +# pre-1.0 GPU-compatible behavior. + +function ForwardDiff.seed!( + duals::AbstractGPUArray{Dual{T,V,N}}, x, + seed::Partials{N,V} = zero(Partials{N,V})) where {T,V,N} + duals .= Dual{T,V,N}.(x, Ref(seed)) + return duals +end + +function ForwardDiff.seed!( + duals::AbstractGPUArray{Dual{T,V,N}}, x, + seeds::NTuple{N,Partials{N,V}}) where {T,V,N} + dual_inds = 1:N + duals[dual_inds] .= Dual{T,V,N}.(view(x, dual_inds), seeds) + return duals +end + +function ForwardDiff.seed!( + duals::AbstractGPUArray{Dual{T,V,N}}, x, index, + seed::Partials{N,V} = zero(Partials{N,V})) where {T,V,N} + dual_inds = index:length(duals) + duals[dual_inds] .= Dual{T,V,N}.(view(x, dual_inds), Ref(seed)) + return duals +end + +function ForwardDiff.seed!( + duals::AbstractGPUArray{Dual{T,V,N}}, x, index, + seeds::NTuple{N,Partials{N,V}}, chunksize = N) where {T,V,N} + offset = index - 1 + dual_inds = (1 + offset):(offset + chunksize) + duals[dual_inds] .= Dual{T,V,N}.(view(x, dual_inds), seeds[1:chunksize]) + return duals +end + +end diff --git a/test/GPUArraysCoreTest.jl b/test/GPUArraysCoreTest.jl new file mode 100644 index 00000000..89ab1c71 --- /dev/null +++ b/test/GPUArraysCoreTest.jl @@ -0,0 +1,34 @@ +module GPUArraysCoreTest + +using ForwardDiff, Test +using JLArrays + +# JLArrays emulates GPU array semantics (including the scalar-indexing ban) on +# the CPU, so the GPUArraysCore extension's broadcast-based `seed!` methods can +# be exercised without a physical GPU. +JLArrays.allowscalar(false) + +@testset "ForwardDiff seeding on GPU arrays" begin + f(x) = x .^ 2 .+ 2 .* x + + @testset "jacobian, vector mode (length $n)" for n in (1, 4, 8) + x = collect(Float64, 1:n) + @test Array(ForwardDiff.jacobian(f, JLArray(x))) == ForwardDiff.jacobian(f, x) + end + + # lengths above the chunk size exercise the chunked `seed!` methods + @testset "jacobian, chunk mode (length $n, chunk $c)" for n in (16, 20, 27), c in (4, 8) + x = collect(Float64, 1:n) + cfg = ForwardDiff.JacobianConfig(f, JLArray(x), ForwardDiff.Chunk{c}()) + @test Array(ForwardDiff.jacobian(f, JLArray(x), cfg)) == ForwardDiff.jacobian(f, x) + end + + @testset "jacobian! into a GPU array (length $n)" for n in (4, 16) + x = collect(Float64, 1:n) + out = JLArray(zeros(n, n)) + ForwardDiff.jacobian!(out, f, JLArray(x)) + @test Array(out) == ForwardDiff.jacobian(f, x) + end +end + +end # module diff --git a/test/runtests.jl b/test/runtests.jl index 2193242d..0805678d 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -43,6 +43,11 @@ Random.seed!(SEED) t = @elapsed include("ConfusionTest.jl") println("##### done (took $t seconds).") end + @testset "GPUArraysCore" begin + println("##### Testing GPUArraysCore extension...") + t = @elapsed include("GPUArraysCoreTest.jl") + println("##### done (took $t seconds).") + end @testset "Miscellaneous" begin println("##### Testing miscellaneous functionality...") t = @elapsed include("MiscTest.jl")