diff --git a/Project.toml b/Project.toml index e74ffa0..3cc75bc 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "TensorAlgebra" uuid = "68bd88dc-f39d-4e12-b2ca-f046b68fcc6a" -version = "0.16.2" +version = "0.16.3" authors = ["ITensor developers and contributors"] [workspace] diff --git a/src/contract/contract_matricize.jl b/src/contract/contract_matricize.jl index b83277d..23d173d 100644 --- a/src/contract/contract_matricize.jl +++ b/src/contract/contract_matricize.jl @@ -23,15 +23,32 @@ function contractopadd!( algorithm.right_fusion_style, op2, a2, biperm2_codomain, biperm2_domain ) output_style = algorithm.output_fusion_style - # Matricize the destination and multiply straight into it: a no-op for an aligned or - # transposed dense output (a view aliasing `a_dest`, so `mul!` writes through and we - # are done), a fresh permuted copy otherwise. Either way `matricize` seeds `a_dest_mat` - # with `a_dest`'s current contents, so `β` rides on the `mul!` and a detached copy is - # written back with a plain overwrite. - a_dest_mat = matricizeperm(output_style, a_dest, invperm_codomain, invperm_domain) - mul!(a_dest_mat, a1_mat, a2_mat, α, β) - if !Base.mightalias(a_dest_mat, a_dest) + if iszero(β) && !matricizepermaliases(output_style, invperm_codomain, invperm_domain) + # `β` is a strong zero and matricizing `a_dest` would only build a detached copy that + # `mul!` immediately overwrites, so skip that gather: let the matmul allocate its matrix + # result directly and scatter it into `a_dest`. Every coupled-sector block is + # materialized (the matmul zeros the ones it does not reach), so the scatter overwrites + # `a_dest` in full. + a_dest_mat = a1_mat * a2_mat + isone(α) || scale!(a_dest_mat, α) unmatricizeperm!(output_style, a_dest, a_dest_mat, invperm_codomain, invperm_domain) + else + # Matricize the destination and multiply straight into it: a no-op for an aligned or + # transposed dense output (a view aliasing `a_dest`, so `mul!` writes through and we + # are done), a fresh permuted copy otherwise. Either way `matricize` seeds `a_dest_mat` + # with `a_dest`'s current contents, so `β` rides on the `mul!` and a detached copy is + # written back with a plain overwrite. + a_dest_mat = matricizeperm(output_style, a_dest, invperm_codomain, invperm_domain) + mul!(a_dest_mat, a1_mat, a2_mat, α, β) + if !Base.mightalias(a_dest_mat, a_dest) + unmatricizeperm!( + output_style, + a_dest, + a_dest_mat, + invperm_codomain, + invperm_domain + ) + end end return a_dest end diff --git a/src/matricize.jl b/src/matricize.jl index ea71bb8..d6c6a82 100644 --- a/src/matricize.jl +++ b/src/matricize.jl @@ -174,6 +174,13 @@ function matricizekind( return PermuteMatricizeKind end +# Whether `matricizeperm(style, a, perm_codomain, perm_domain)` aliases `a` — returns a view (so a +# `mul!` into it writes through to `a`) rather than freshly allocated storage. Only a style whose +# `matricize` is itself a view (a dense reshape) can alias, and only when the bipermutation needs +# no permuted copy. Defaults to `false`: a style that gathers into new storage, such as a graded +# array, never aliases its input. +matricizepermaliases(::FusionStyle, perm_codomain, perm_domain) = false + # Skip the permuted copy when the classifier says it is unnecessary. `ReshapeMatricizeKind` # calls `matricize` directly on `a` (a view for dense, a gather without the extra permute # for graded); `TransposeMatricizeKind` returns a lazy `transpose` of the reshape. Both @@ -279,6 +286,11 @@ function matricizekind( isidentityperm((perm_domain..., perm_codomain...)) && return TransposeMatricizeKind return PermuteMatricizeKind end +# A dense reshape/transpose is a view of `a`; only a permuted copy detaches. So the matricized +# output aliases `a` for every kind except `PermuteMatricizeKind`. +function matricizepermaliases(style::ReshapeFusion, perm_codomain, perm_domain) + return matricizekind(style, perm_codomain, perm_domain) != PermuteMatricizeKind +end # A dense reshape ignores the codomain/domain split: it just reshapes to the concatenated axes. # `conj` re-dualizes the codomain-facing `domain_axes` into stored form, a no-op on a dense axis. function unmatricize(style::ReshapeFusion, m, codomain_axes, domain_axes)