From ae0d442d2c822c2054feba3eca44cf45ff0123e8 Mon Sep 17 00:00:00 2001 From: Matthew Fishman Date: Wed, 1 Jul 2026 18:59:28 -0400 Subject: [PATCH 1/2] Build permuted named-tensor add on the field-based PermutedDims ## Summary Aligns a misaligned operand in named-tensor addition with `TensorAlgebra.PermutedDims` instead of `Base.PermutedDimsArray`. `PermutedDimsArray` encodes the permutation in a type parameter, so a runtime permutation forces a type-unstable, allocating construction on every permuted add. `PermutedDims` stores the permutation in a field, builds cheaply and type-stably, and is a broadcast leaf the linear-combination fold absorbs, so a permuted add no longer pays that type-construction floor and closes most of the gap to an aligned add. Builds on the broadcast-friendly `PermutedDims` from https://github.com/ITensor/TensorAlgebra.jl/pull/198. --- Project.toml | 8 ++++++-- src/abstractnamedtensor.jl | 14 +++++++++----- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/Project.toml b/Project.toml index 397af2d..cfe0a44 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "ITensorBase" uuid = "4795dd04-0d67-49bb-8f44-b89c448a1dc7" -version = "0.10.2" +version = "0.10.3" authors = ["ITensor developers and contributors"] [workspace] @@ -30,6 +30,10 @@ Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" Mooncake = "da2b9cff-9c12-43a0-ae48-6db2b0edb7d6" TensorOperations = "6aa20fa7-93e2-5fca-9bc0-fbd0db3c71a2" +[sources.TensorAlgebra] +rev = "mf/permuteddims-broadcast" +url = "https://github.com/ITensor/TensorAlgebra.jl" + [extensions] ITensorBaseAdaptExt = "Adapt" ITensorBaseMooncakeExt = "Mooncake" @@ -49,7 +53,7 @@ Mooncake = "0.4.202, 0.5" OrderedCollections = "1.6" Random = "1.10" SimpleTraits = "0.9.4" -TensorAlgebra = "0.15" +TensorAlgebra = "0.15.1" TensorOperations = "5.3.1" TermInterface = "2" TupleTools = "1.6" diff --git a/src/abstractnamedtensor.jl b/src/abstractnamedtensor.jl index 1a4a69f..f500a71 100644 --- a/src/abstractnamedtensor.jl +++ b/src/abstractnamedtensor.jl @@ -87,12 +87,16 @@ unnamed(a::AbstractNamedTensor) = throw(MethodError(unnamed, a)) function unnamed(a::AbstractNamedTensor, names) return _permuteddims_to(unnamed(a), getperm(dimnames(a), names)) end -# Function barrier: `unnamed(a)` is abstractly typed, so dispatching on the concrete array here -# makes `ndims` a compile-time constant. Building the permutation as an `ntuple(…, Val(ndims))` -# (an `NTuple{N,Int}`) rather than `Tuple(perm)` (a length-non-inferrable `Tuple{Vararg{Int}}`) -# lets `permuteddims` build a concretely-typed wrapper, roughly halving the permute cost. +# Align a misaligned operand for the elementwise broadcast by wrapping it in a lazy permuted +# view. `TensorAlgebra.PermutedDims` stores the permutation in a field (unlike +# `Base.PermutedDimsArray`, which encodes it in a type parameter, so a runtime permutation forces +# runtime type construction), so it builds cheaply and type-stably; it is a broadcast leaf the +# linear-combination path absorbs via `bipermutedimsopadd!`. Function barrier: `unnamed(a)` is +# abstractly typed, so dispatching on the concrete array here makes `ndims` a compile-time +# constant, and the `ntuple(…, Val(ndims))` builds an inferrable `NTuple{N,Int}` permutation +# rather than a length-non-inferrable `Tuple(::Vector)`. @noinline function _permuteddims_to(array::AbstractArray, perm) - return permuteddims(array, ntuple(i -> perm[i], Val(ndims(array)))) + return TensorAlgebra.PermutedDims(array, ntuple(i -> perm[i], Val(ndims(array)))) end unname(a::AbstractNamedTensor, inds) = unnamed(aligndims(a, inds)) From 4ed4c8bbebcd362ff20f6a298418baef96ddc7fd Mon Sep 17 00:00:00 2001 From: Matthew Fishman Date: Wed, 1 Jul 2026 19:43:50 -0400 Subject: [PATCH 2/2] Confine the field-based PermutedDims to the broadcast alignment path The public `unnamed(a, names)` keeps returning a `Base.PermutedDimsArray` (a full array), so callers outside broadcasting are unaffected. Only `broadcasted_unnamed` aligns a misaligned operand with `TensorAlgebra.PermutedDims`, whose minimal array interface stays confined to the broadcast hot path and is never handed to users. Also removes the `[sources]` pin now that TensorAlgebra 0.15.1 is registered, and switches a GPU-unsafe `dot` test to eager `unname`. --- Project.toml | 4 ---- src/abstractnamedtensor.jl | 14 +++++--------- src/broadcast.jl | 16 ++++++++++++++-- test/test_linearalgebra.jl | 4 ++-- 4 files changed, 21 insertions(+), 17 deletions(-) diff --git a/Project.toml b/Project.toml index cfe0a44..dac0c96 100644 --- a/Project.toml +++ b/Project.toml @@ -30,10 +30,6 @@ Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" Mooncake = "da2b9cff-9c12-43a0-ae48-6db2b0edb7d6" TensorOperations = "6aa20fa7-93e2-5fca-9bc0-fbd0db3c71a2" -[sources.TensorAlgebra] -rev = "mf/permuteddims-broadcast" -url = "https://github.com/ITensor/TensorAlgebra.jl" - [extensions] ITensorBaseAdaptExt = "Adapt" ITensorBaseMooncakeExt = "Mooncake" diff --git a/src/abstractnamedtensor.jl b/src/abstractnamedtensor.jl index f500a71..1a4a69f 100644 --- a/src/abstractnamedtensor.jl +++ b/src/abstractnamedtensor.jl @@ -87,16 +87,12 @@ unnamed(a::AbstractNamedTensor) = throw(MethodError(unnamed, a)) function unnamed(a::AbstractNamedTensor, names) return _permuteddims_to(unnamed(a), getperm(dimnames(a), names)) end -# Align a misaligned operand for the elementwise broadcast by wrapping it in a lazy permuted -# view. `TensorAlgebra.PermutedDims` stores the permutation in a field (unlike -# `Base.PermutedDimsArray`, which encodes it in a type parameter, so a runtime permutation forces -# runtime type construction), so it builds cheaply and type-stably; it is a broadcast leaf the -# linear-combination path absorbs via `bipermutedimsopadd!`. Function barrier: `unnamed(a)` is -# abstractly typed, so dispatching on the concrete array here makes `ndims` a compile-time -# constant, and the `ntuple(…, Val(ndims))` builds an inferrable `NTuple{N,Int}` permutation -# rather than a length-non-inferrable `Tuple(::Vector)`. +# Function barrier: `unnamed(a)` is abstractly typed, so dispatching on the concrete array here +# makes `ndims` a compile-time constant. Building the permutation as an `ntuple(…, Val(ndims))` +# (an `NTuple{N,Int}`) rather than `Tuple(perm)` (a length-non-inferrable `Tuple{Vararg{Int}}`) +# lets `permuteddims` build a concretely-typed wrapper, roughly halving the permute cost. @noinline function _permuteddims_to(array::AbstractArray, perm) - return TensorAlgebra.PermutedDims(array, ntuple(i -> perm[i], Val(ndims(array)))) + return permuteddims(array, ntuple(i -> perm[i], Val(ndims(array)))) end unname(a::AbstractNamedTensor, inds) = unnamed(aligndims(a, inds)) diff --git a/src/broadcast.jl b/src/broadcast.jl index c5c4527..14b8b0e 100644 --- a/src/broadcast.jl +++ b/src/broadcast.jl @@ -1,4 +1,5 @@ -using ..ITensorBase: AbstractNamedTensor, ITensorBase, dimnames, named, nameddims, unnamed +using ..ITensorBase: + AbstractNamedTensor, ITensorBase, dimnames, getperm, named, nameddims, unnamed using Base.Broadcast: Broadcast as BC, Broadcasted, broadcasted using TensorAlgebra: TensorAlgebra as TA @@ -26,7 +27,18 @@ function broadcasted_unnamed(a::AbstractNamedTensor, names) # common case for the rest) needs no permutation, avoiding a `getperm` allocation and the # identity `permuteddims` wrapper. Skipping it makes a small add several times slower. dimnames(a) == names && return unnamed(a) - return unnamed(a, names) + return _broadcast_permuteddims_to(unnamed(a), getperm(dimnames(a), names)) +end +# Broadcasting-only alignment: unlike the public `unnamed(a, names)` (which returns a +# `Base.PermutedDimsArray`, a full array), this wraps in `TensorAlgebra.PermutedDims`, which stores +# the permutation in a field rather than a type parameter, so it builds cheaply and type-stably +# from the runtime permutation and is a broadcast leaf the linear-combination fold absorbs via +# `bipermutedimsopadd!`. `PermutedDims` has almost no array interface, so it stays confined to this +# hot path and is never handed back to users. Function barrier: `unnamed(a)` is abstractly typed, +# so dispatching on the concrete array makes `ndims` a compile-time constant for the inferrable +# `ntuple(…, Val(ndims))` permutation. +@noinline function _broadcast_permuteddims_to(array::AbstractArray, perm) + return TA.PermutedDims(array, ntuple(i -> perm[i], Val(ndims(array)))) end function broadcasted_unnamed(bc::Broadcasted, names) return broadcasted(bc.f, Base.Fix2(broadcasted_unnamed, names).(bc.args)...) diff --git a/test/test_linearalgebra.jl b/test/test_linearalgebra.jl index c292760..a360345 100644 --- a/test/test_linearalgebra.jl +++ b/test/test_linearalgebra.jl @@ -1,5 +1,5 @@ import LinearAlgebra as LA -using ITensorBase: dimnames, named, unnamed +using ITensorBase: dimnames, named, unname, unnamed using Test: @test, @testset @testset "LinearAlgebra (eltype=$(elt))" for elt in @@ -14,5 +14,5 @@ using Test: @test, @testset @test unnamed(LA.lmul!(2, copy(a))) ≈ 2 * unnamed(a) @test unnamed(LA.rdiv!(copy(a), 2)) ≈ unnamed(a) / 2 @test unnamed(LA.ldiv!(2, copy(a))) ≈ 2 \ unnamed(a) - @test LA.dot(a, b) ≈ LA.dot(unnamed(a), unnamed(b, dimnames(a))) + @test LA.dot(a, b) ≈ LA.dot(unnamed(a), unname(b, dimnames(a))) end