diff --git a/CHANGELOG.md b/CHANGELOG.md index 870f7949..52107b54 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,10 @@ changelog does not include internal changes that do not affect the user. Learning](https://arxiv.org/pdf/2103.13392) (ICDM 2021), a `Scalarizer` that combines a linear scalarization with a cosine-similarity penalty pulling the vector of values toward a preference direction. +- Added `PBI` (Penalty-based Boundary Intersection) from [MOEA/D: A Multiobjective Evolutionary + Algorithm Based on Decomposition](https://ieeexplore.ieee.org/document/4358754) (IEEE TEVC 2007), a + `Scalarizer` that decomposes the values into a component along a preference direction and a + penalized perpendicular component. ## [0.15.0] - 2026-06-15 diff --git a/docs/source/docs/scalarization/index.rst b/docs/source/docs/scalarization/index.rst index 76b98cd6..560469cb 100644 --- a/docs/source/docs/scalarization/index.rst +++ b/docs/source/docs/scalarization/index.rst @@ -21,6 +21,7 @@ Abstract base class geometric_mean.rst imtl_l.rst mean.rst + pbi.rst random.rst stch.rst sum.rst diff --git a/docs/source/docs/scalarization/pbi.rst b/docs/source/docs/scalarization/pbi.rst new file mode 100644 index 00000000..254d5ba5 --- /dev/null +++ b/docs/source/docs/scalarization/pbi.rst @@ -0,0 +1,7 @@ +:hide-toc: + +PBI +=== + +.. autoclass:: torchjd.scalarization.PBI + :members: __call__ diff --git a/src/torchjd/scalarization/__init__.py b/src/torchjd/scalarization/__init__.py index a7a0c3fc..7e64321a 100644 --- a/src/torchjd/scalarization/__init__.py +++ b/src/torchjd/scalarization/__init__.py @@ -26,6 +26,7 @@ from ._geometric_mean import GeometricMean from ._imtl_l import IMTLL from ._mean import Mean +from ._pbi import PBI from ._random import Random from ._scalarizer_base import Scalarizer from ._stch import STCH @@ -40,6 +41,7 @@ "GeometricMean", "IMTLL", "Mean", + "PBI", "Random", "Scalarizer", "STCH", diff --git a/src/torchjd/scalarization/_pbi.py b/src/torchjd/scalarization/_pbi.py new file mode 100644 index 00000000..85d95eeb --- /dev/null +++ b/src/torchjd/scalarization/_pbi.py @@ -0,0 +1,90 @@ +import torch +from torch import Tensor + +from ._scalarizer_base import Scalarizer + +_EPSILON = 1e-12 + + +class PBI(Scalarizer): + r""" + :class:`~torchjd.scalarization.Scalarizer` that combines the input tensor of values using the + Penalty-based Boundary Intersection (PBI) scalarization, proposed in `MOEA/D: A Multiobjective + Evolutionary Algorithm Based on Decomposition `_. + + It decomposes the values, relative to a reference point, into a component along a preference + direction and a component perpendicular to it, and penalizes the latter: + + .. math:: + d_1 = (L - z^*)^\top \hat r, \qquad + d_2 = \lVert (L - z^*) - d_1 \hat r \rVert, \qquad + d_1 + \theta\, d_2, + + where: + + - :math:`L_i` is the :math:`i`-th input value (the :math:`i`-th objective); + - :math:`z^*` is the reference (ideal) point (the ``reference`` parameter); + - :math:`\hat r = r / \lVert r \rVert` is the normalized preference direction (the ``weights`` + parameter); + - :math:`d_1` is the distance along the preference direction and :math:`d_2` is the distance to + it; + - :math:`\theta` is the penalty coefficient applied to :math:`d_2` (the ``theta`` parameter). + + :param theta: The penalty coefficient :math:`\theta` applied to the perpendicular distance. Must + be non-negative. A value of ``0`` reduces PBI to the projection onto the preference + direction. The paper uses ``5`` in its experiments; there is no single best value, and the + paper notes that a too large or too small value worsens the result. + :param weights: The preference vector :math:`r`, giving the direction along which the values are + decomposed. It must have the same shape as the values passed at call time. To approximate the + whole Pareto front rather than a single trade-off, it should be re-sampled from a Dirichlet + distribution and reassigned before every call, e.g. for ``m`` objectives + ``pbi.weights = torch.distributions.Dirichlet(torch.ones(m)).sample()``. + :param reference: The reference (ideal) point :math:`z^*` subtracted from the values. It should + be a lower bound on the values. If ``None``, the origin is used, which assumes non-negative + values. If provided, it must have the same shape as the values passed at call time. + + .. note:: + :math:`d_2` is a Euclidean norm, whose gradient is undefined when the values lie exactly on + the preference direction (:math:`d_2 = 0`). To keep the gradient finite there, a small + constant is added under the square root; this shifts the result by at most around + :math:`10^{-6}` at that point and is negligible elsewhere. + """ + + def __init__(self, theta: float, weights: Tensor, reference: Tensor | None = None) -> None: + if theta < 0.0: + raise ValueError(f"Parameter `theta` should be non-negative. Found `theta = {theta}`.") + + super().__init__() + self.theta = theta + self.weights = weights + self.reference = reference + + def forward(self, values: Tensor, /) -> Tensor: + if self.weights.shape != values.shape: + raise ValueError( + f"Parameter `weights` should have the same shape as `values`. Found " + f"`weights.shape = {tuple(self.weights.shape)}` and `values.shape = " + f"{tuple(values.shape)}`." + ) + if self.reference is not None and self.reference.shape != values.shape: + raise ValueError( + f"Parameter `reference` should have the same shape as `values`. Found " + f"`reference.shape = {tuple(self.reference.shape)}` and `values.shape = " + f"{tuple(values.shape)}`." + ) + + shifted = values if self.reference is None else values - self.reference + f = shifted.flatten() + direction = self.weights.flatten() + direction = direction / direction.norm() + + d1 = (f * direction).sum() + perpendicular = f - d1 * direction + d2 = torch.sqrt((perpendicular * perpendicular).sum() + _EPSILON) + return d1 + self.theta * d2 + + def __repr__(self) -> str: + return ( + f"{self.__class__.__name__}(theta={self.theta}, weights={self.weights!r}, " + f"reference={self.reference!r})" + ) diff --git a/tests/unit/scalarization/test_pbi.py b/tests/unit/scalarization/test_pbi.py new file mode 100644 index 00000000..f7187d27 --- /dev/null +++ b/tests/unit/scalarization/test_pbi.py @@ -0,0 +1,105 @@ +import torch +from pytest import mark, raises +from torch import Tensor +from utils.tensors import tensor_ + +from torchjd.scalarization import PBI + +from ._asserts import ( + assert_grad_flow, + assert_permutation_invariant, + assert_returns_scalar, +) +from ._inputs import all_inputs + + +def _uniform(values: Tensor) -> Tensor: + """Uniform preference vector matching the shape of `values`.""" + return torch.full_like(values, 1.0 / values.numel()) + + +def test_value() -> None: + # direction = [1, 1] / sqrt(2). For [2, 0]: d1 = sqrt(2), perpendicular = [1, -1] so + # d2 = sqrt(2), and d1 + theta * d2 = 2 * sqrt(2). + out = PBI(theta=1.0, weights=tensor_([1.0, 1.0]))(tensor_([2.0, 0.0])) + torch.testing.assert_close(out, tensor_(2.0) * tensor_(2.0).sqrt()) + + +def test_theta_zero_is_projection() -> None: + # With theta = 0 only the projection d1 remains. For [2, 0] onto [1, 1] / sqrt(2): d1 = sqrt(2). + out = PBI(theta=0.0, weights=tensor_([1.0, 1.0]))(tensor_([2.0, 0.0])) + torch.testing.assert_close(out, tensor_(2.0).sqrt()) + + +def test_reference_shifts_values() -> None: + # Subtracting the reference [1, 1] from [3, 1] gives [2, 0], matching the no-reference case. + with_reference = PBI(theta=1.0, weights=tensor_([1.0, 1.0]), reference=tensor_([1.0, 1.0])) + out = with_reference(tensor_([3.0, 1.0])) + expected = PBI(theta=1.0, weights=tensor_([1.0, 1.0]))(tensor_([2.0, 0.0])) + torch.testing.assert_close(out, expected) + + +def test_full_formula() -> None: + values = tensor_([1.0, 2.0, 4.0]) + weights = tensor_([0.5, 0.3, 0.2]) + reference = tensor_([0.5, 0.5, 0.5]) + theta = 5.0 + shifted = values - reference + direction = weights / weights.norm() + d1 = (shifted * direction).sum() + d2 = (shifted - d1 * direction).norm() + expected = d1 + theta * d2 + torch.testing.assert_close(PBI(theta, weights=weights, reference=reference)(values), expected) + + +def test_finite_when_values_on_preference_ray() -> None: + # When the values lie exactly on the preference direction, d2 = 0. The constant under the square + # root keeps both the value and the gradient finite (no nan), which is the whole point of the + # stabilization. + weights = tensor_([1.0, 2.0]) + leaf = weights.detach().clone().requires_grad_() # values == weights, so they are on the ray. + out = PBI(theta=5.0, weights=weights)(leaf) + out.backward() + assert out.isfinite() + assert leaf.grad is not None + assert leaf.grad.isfinite().all() + + +@mark.parametrize("values", all_inputs) +def test_expected_structure(values: Tensor) -> None: + assert_returns_scalar(PBI(theta=5.0, weights=_uniform(values)), values) + + +@mark.parametrize("values", all_inputs) +def test_grad_flow(values: Tensor) -> None: + assert_grad_flow(PBI(theta=5.0, weights=_uniform(values)), values) + + +@mark.parametrize("values", all_inputs) +def test_permutation_invariant(values: Tensor) -> None: + # With uniform weights and no reference, both d1 and d2 are symmetric in the inputs. + assert_permutation_invariant(PBI(theta=5.0, weights=_uniform(values)), values) + + +@mark.parametrize("theta", [-1.0, -0.5]) +def test_raises_on_negative_theta(theta: float) -> None: + with raises(ValueError): + PBI(theta=theta, weights=tensor_([0.5, 0.5])) + + +def test_raises_on_weights_shape_mismatch() -> None: + scalarizer = PBI(theta=5.0, weights=tensor_([1.0, 1.0, 1.0])) + with raises(ValueError): + scalarizer(tensor_([1.0, 1.0])) + + +def test_raises_on_reference_shape_mismatch() -> None: + scalarizer = PBI(theta=5.0, weights=tensor_([1.0, 1.0]), reference=tensor_([0.0, 0.0, 0.0])) + with raises(ValueError): + scalarizer(tensor_([1.0, 1.0])) + + +def test_representations() -> None: + s = PBI(theta=5.0, weights=torch.tensor([0.5, 0.5])) + assert repr(s) == "PBI(theta=5.0, weights=tensor([0.5000, 0.5000]), reference=None)" + assert str(s) == "PBI"