Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,8 @@ build
installed
.clang-format
TAGS
src/.libs/*
src/apps/.deps/*
src/utilities/src/.libs/*
src/utilities/src/.deps/*
test/.deps/*
16 changes: 14 additions & 2 deletions src/numerics/include/metaphysicl/dualnumber.h
Original file line number Diff line number Diff line change
Expand Up @@ -288,8 +288,20 @@ derivative_multiply_helper(DualNumber<T, D, asd> & out, const DualNumber<T2, D2,
if (asd && !(out.take_derivatives() || in.take_derivatives()))
return;

out.derivatives() *= in.value();
out.derivatives() += out.value() * in.derivatives();
// Fused multiply-add: out.deriv = in.value * out.deriv + out.value * in.deriv
//
// Replaces:
// out.derivatives() *= in.value();
// out.derivatives() += out.value() * in.derivatives();
//
// The naive version above costs:
// 1) scalar *=: cheap, no sparsity_union, no alloc
// 2) out.value() * in.derivatives(): allocates a temporary sparse number (no sparsity_union)
// 3) operator+=: sparsity_union(temp.indices()) + merge loop + temp dealloc
//
// The fused version below calls sparsity_union once and eliminates the temporary entirely,
// reducing the cost from 1 alloc + 1 dealloc + 1 sparsity_union to 0 allocs + 1 sparsity_union.
out.derivatives().multiply_union_add(in.value(), out.value(), in.derivatives());
}

template <typename T,
Expand Down
42 changes: 41 additions & 1 deletion src/numerics/include/metaphysicl/dynamicsparsenumberbase.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ template <typename Data2, typename Indices2, class... SubTypeArgs2>
METAPHYSICL_INLINE
DynamicSparseNumberBase<Data, Indices, SubType, SubTypeArgs...>::
DynamicSparseNumberBase(const DynamicSparseNumberBase<Data2, Indices2, SubType, SubTypeArgs2...> & src)
{
{
this->resize(src.size());
const auto & src_indices = src.nude_indices();
const auto & src_data = src.nude_data();
Expand Down Expand Up @@ -612,6 +612,46 @@ DynamicSparseNumberBase<Data, Indices, SubType, SubTypeArgs...>::operator+= (con
}


template <typename Data, typename Indices, template <class...> class SubType, class... SubTypeArgs>
template <class... SubTypeArgs2>
METAPHYSICL_INLINE
SubType<SubTypeArgs...>&
DynamicSparseNumberBase<Data, Indices, SubType, SubTypeArgs...>::multiply_union_add(
const typename Data::value_type scalar_a,
const typename Data::value_type scalar_b,
const SubType<SubTypeArgs2...>& other)
{
// Single sparsity union — may resize _data and _indices
this->sparsity_union(other.nude_indices());

// Scale all existing entries by scalar_a (indices unchanged by scalar multiply)
for (auto & d : _data)
d *= scalar_a;

// Walk the sorted index lists and add scalar_b * other[i] into matching entries.
// After sparsity_union above, every index in other is guaranteed to be in _indices,
// so the inner while loop will always find a match.
auto data_it = _data.begin();
auto index_it = _indices.begin();
auto data2_it = other.nude_data().begin();
auto index2_it = other.nude_indices().begin();

for (; data2_it != other.nude_data().end(); ++data2_it, ++index2_it)
{
while (*index_it < *index2_it) {
++index_it;
++data_it;
metaphysicl_assert(index_it != _indices.end());
}
metaphysicl_assert_equal_to(*index_it, *index2_it);

*data_it += scalar_b * (*data2_it);
}

return static_cast<SubType<SubTypeArgs...>&>(*this);
}


template <typename Data, typename Indices, template <class...> class SubType, class... SubTypeArgs>
template <class... SubTypeArgs2>
METAPHYSICL_INLINE
Expand Down