Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ changelog does not include internal changes that do not affect the user.
Learning](https://arxiv.org/pdf/2103.13392) (ICDM 2021), a `Scalarizer` that combines a linear
scalarization with a cosine-similarity penalty pulling the vector of values toward a preference
direction.
- Added `PBI` (Penalty-based Boundary Intersection) from [MOEA/D: A Multiobjective Evolutionary
Algorithm Based on Decomposition](https://ieeexplore.ieee.org/document/4358754) (IEEE TEVC 2007), a
`Scalarizer` that decomposes the values into a component along a preference direction and a
penalized perpendicular component.

## [0.15.0] - 2026-06-15

Expand Down
1 change: 1 addition & 0 deletions docs/source/docs/scalarization/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ Abstract base class
geometric_mean.rst
imtl_l.rst
mean.rst
pbi.rst
random.rst
stch.rst
sum.rst
Expand Down
7 changes: 7 additions & 0 deletions docs/source/docs/scalarization/pbi.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
:hide-toc:

PBI
===

.. autoclass:: torchjd.scalarization.PBI
:members: __call__
2 changes: 2 additions & 0 deletions src/torchjd/scalarization/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from ._geometric_mean import GeometricMean
from ._imtl_l import IMTLL
from ._mean import Mean
from ._pbi import PBI
from ._random import Random
from ._scalarizer_base import Scalarizer
from ._stch import STCH
Expand All @@ -40,6 +41,7 @@
"GeometricMean",
"IMTLL",
"Mean",
"PBI",
"Random",
"Scalarizer",
"STCH",
Expand Down
90 changes: 90 additions & 0 deletions src/torchjd/scalarization/_pbi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import torch
from torch import Tensor

from ._scalarizer_base import Scalarizer

_EPSILON = 1e-12


class PBI(Scalarizer):
r"""
:class:`~torchjd.scalarization.Scalarizer` that combines the input tensor of values using the
Penalty-based Boundary Intersection (PBI) scalarization, proposed in `MOEA/D: A Multiobjective
Evolutionary Algorithm Based on Decomposition <https://ieeexplore.ieee.org/document/4358754>`_.

It decomposes the values, relative to a reference point, into a component along a preference
direction and a component perpendicular to it, and penalizes the latter:

.. math::
d_1 = (L - z^*)^\top \hat r, \qquad
d_2 = \lVert (L - z^*) - d_1 \hat r \rVert, \qquad
d_1 + \theta\, d_2,

where:

- :math:`L_i` is the :math:`i`-th input value (the :math:`i`-th objective);
- :math:`z^*` is the reference (ideal) point (the ``reference`` parameter);
- :math:`\hat r = r / \lVert r \rVert` is the normalized preference direction (the ``weights``
parameter);
- :math:`d_1` is the distance along the preference direction and :math:`d_2` is the distance to
it;
- :math:`\theta` is the penalty coefficient applied to :math:`d_2` (the ``theta`` parameter).

:param theta: The penalty coefficient :math:`\theta` applied to the perpendicular distance. Must
be non-negative. A value of ``0`` reduces PBI to the projection onto the preference
direction. The paper uses ``5`` in its experiments; there is no single best value, and the
paper notes that a too large or too small value worsens the result.
:param weights: The preference vector :math:`r`, giving the direction along which the values are
decomposed. It must have the same shape as the values passed at call time. To approximate the
whole Pareto front rather than a single trade-off, it should be re-sampled from a Dirichlet
distribution and reassigned before every call, e.g. for ``m`` objectives
``pbi.weights = torch.distributions.Dirichlet(torch.ones(m)).sample()``.
:param reference: The reference (ideal) point :math:`z^*` subtracted from the values. It should
be a lower bound on the values. If ``None``, the origin is used, which assumes non-negative
values. If provided, it must have the same shape as the values passed at call time.

.. note::
:math:`d_2` is a Euclidean norm, whose gradient is undefined when the values lie exactly on
the preference direction (:math:`d_2 = 0`). To keep the gradient finite there, a small
constant is added under the square root; this shifts the result by at most around
:math:`10^{-6}` at that point and is negligible elsewhere.
"""

def __init__(self, theta: float, weights: Tensor, reference: Tensor | None = None) -> None:
if theta < 0.0:
raise ValueError(f"Parameter `theta` should be non-negative. Found `theta = {theta}`.")

super().__init__()
self.theta = theta
self.weights = weights
self.reference = reference

def forward(self, values: Tensor, /) -> Tensor:
if self.weights.shape != values.shape:
raise ValueError(
f"Parameter `weights` should have the same shape as `values`. Found "
f"`weights.shape = {tuple(self.weights.shape)}` and `values.shape = "
f"{tuple(values.shape)}`."
)
if self.reference is not None and self.reference.shape != values.shape:
raise ValueError(
f"Parameter `reference` should have the same shape as `values`. Found "
f"`reference.shape = {tuple(self.reference.shape)}` and `values.shape = "
f"{tuple(values.shape)}`."
)

shifted = values if self.reference is None else values - self.reference
f = shifted.flatten()
direction = self.weights.flatten()
direction = direction / direction.norm()

d1 = (f * direction).sum()
perpendicular = f - d1 * direction
d2 = torch.sqrt((perpendicular * perpendicular).sum() + _EPSILON)
return d1 + self.theta * d2

def __repr__(self) -> str:
return (
f"{self.__class__.__name__}(theta={self.theta}, weights={self.weights!r}, "
f"reference={self.reference!r})"
)
105 changes: 105 additions & 0 deletions tests/unit/scalarization/test_pbi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import torch
from pytest import mark, raises
from torch import Tensor
from utils.tensors import tensor_

from torchjd.scalarization import PBI

from ._asserts import (
assert_grad_flow,
assert_permutation_invariant,
assert_returns_scalar,
)
from ._inputs import all_inputs


def _uniform(values: Tensor) -> Tensor:
"""Uniform preference vector matching the shape of `values`."""
return torch.full_like(values, 1.0 / values.numel())


def test_value() -> None:
# direction = [1, 1] / sqrt(2). For [2, 0]: d1 = sqrt(2), perpendicular = [1, -1] so
# d2 = sqrt(2), and d1 + theta * d2 = 2 * sqrt(2).
out = PBI(theta=1.0, weights=tensor_([1.0, 1.0]))(tensor_([2.0, 0.0]))
torch.testing.assert_close(out, tensor_(2.0) * tensor_(2.0).sqrt())


def test_theta_zero_is_projection() -> None:
# With theta = 0 only the projection d1 remains. For [2, 0] onto [1, 1] / sqrt(2): d1 = sqrt(2).
out = PBI(theta=0.0, weights=tensor_([1.0, 1.0]))(tensor_([2.0, 0.0]))
torch.testing.assert_close(out, tensor_(2.0).sqrt())


def test_reference_shifts_values() -> None:
# Subtracting the reference [1, 1] from [3, 1] gives [2, 0], matching the no-reference case.
with_reference = PBI(theta=1.0, weights=tensor_([1.0, 1.0]), reference=tensor_([1.0, 1.0]))
out = with_reference(tensor_([3.0, 1.0]))
expected = PBI(theta=1.0, weights=tensor_([1.0, 1.0]))(tensor_([2.0, 0.0]))
torch.testing.assert_close(out, expected)


def test_full_formula() -> None:
values = tensor_([1.0, 2.0, 4.0])
weights = tensor_([0.5, 0.3, 0.2])
reference = tensor_([0.5, 0.5, 0.5])
theta = 5.0
shifted = values - reference
direction = weights / weights.norm()
d1 = (shifted * direction).sum()
d2 = (shifted - d1 * direction).norm()
expected = d1 + theta * d2
torch.testing.assert_close(PBI(theta, weights=weights, reference=reference)(values), expected)


def test_finite_when_values_on_preference_ray() -> None:
# When the values lie exactly on the preference direction, d2 = 0. The constant under the square
# root keeps both the value and the gradient finite (no nan), which is the whole point of the
# stabilization.
weights = tensor_([1.0, 2.0])
leaf = weights.detach().clone().requires_grad_() # values == weights, so they are on the ray.
out = PBI(theta=5.0, weights=weights)(leaf)
out.backward()
assert out.isfinite()
assert leaf.grad is not None
assert leaf.grad.isfinite().all()


@mark.parametrize("values", all_inputs)
def test_expected_structure(values: Tensor) -> None:
assert_returns_scalar(PBI(theta=5.0, weights=_uniform(values)), values)


@mark.parametrize("values", all_inputs)
def test_grad_flow(values: Tensor) -> None:
assert_grad_flow(PBI(theta=5.0, weights=_uniform(values)), values)


@mark.parametrize("values", all_inputs)
def test_permutation_invariant(values: Tensor) -> None:
# With uniform weights and no reference, both d1 and d2 are symmetric in the inputs.
assert_permutation_invariant(PBI(theta=5.0, weights=_uniform(values)), values)


@mark.parametrize("theta", [-1.0, -0.5])
def test_raises_on_negative_theta(theta: float) -> None:
with raises(ValueError):
PBI(theta=theta, weights=tensor_([0.5, 0.5]))


def test_raises_on_weights_shape_mismatch() -> None:
scalarizer = PBI(theta=5.0, weights=tensor_([1.0, 1.0, 1.0]))
with raises(ValueError):
scalarizer(tensor_([1.0, 1.0]))


def test_raises_on_reference_shape_mismatch() -> None:
scalarizer = PBI(theta=5.0, weights=tensor_([1.0, 1.0]), reference=tensor_([0.0, 0.0, 0.0]))
with raises(ValueError):
scalarizer(tensor_([1.0, 1.0]))


def test_representations() -> None:
s = PBI(theta=5.0, weights=torch.tensor([0.5, 0.5]))
assert repr(s) == "PBI(theta=5.0, weights=tensor([0.5000, 0.5000]), reference=None)"
assert str(s) == "PBI"
Loading