diff --git a/CHANGELOG.md b/CHANGELOG.md index 829e29ca..870f7949 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,13 @@ changelog does not include internal changes that do not affect the user. ## [Unreleased] +### Added + +- Added `COSMOS` from [Scalable Pareto Front Approximation for Deep Multi-Objective + Learning](https://arxiv.org/pdf/2103.13392) (ICDM 2021), a `Scalarizer` that combines a linear + scalarization with a cosine-similarity penalty pulling the vector of values toward a preference + direction. + ## [0.15.0] - 2026-06-15 ### Added diff --git a/docs/source/docs/scalarization/cosmos.rst b/docs/source/docs/scalarization/cosmos.rst new file mode 100644 index 00000000..9b3d9c1c --- /dev/null +++ b/docs/source/docs/scalarization/cosmos.rst @@ -0,0 +1,7 @@ +:hide-toc: + +COSMOS +====== + +.. autoclass:: torchjd.scalarization.COSMOS + :members: __call__ diff --git a/docs/source/docs/scalarization/index.rst b/docs/source/docs/scalarization/index.rst index d38708c0..76b98cd6 100644 --- a/docs/source/docs/scalarization/index.rst +++ b/docs/source/docs/scalarization/index.rst @@ -15,6 +15,7 @@ Abstract base class :maxdepth: 1 constant.rst + cosmos.rst dwa.rst famo.rst geometric_mean.rst diff --git a/src/torchjd/scalarization/__init__.py b/src/torchjd/scalarization/__init__.py index f1d22029..a7a0c3fc 100644 --- a/src/torchjd/scalarization/__init__.py +++ b/src/torchjd/scalarization/__init__.py @@ -20,6 +20,7 @@ """ from ._constant import Constant +from ._cosmos import COSMOS from ._dwa import DWA from ._famo import FAMO from ._geometric_mean import GeometricMean @@ -33,6 +34,7 @@ __all__ = [ "Constant", + "COSMOS", "DWA", "FAMO", "GeometricMean", diff --git a/src/torchjd/scalarization/_cosmos.py b/src/torchjd/scalarization/_cosmos.py new file mode 100644 index 00000000..be7bc3dc --- /dev/null +++ b/src/torchjd/scalarization/_cosmos.py @@ -0,0 +1,67 @@ +from torch import Tensor +from torch.nn.functional import cosine_similarity + +from ._scalarizer_base import Scalarizer + + +class COSMOS(Scalarizer): + r""" + :class:`~torchjd.scalarization.Scalarizer` that combines the input tensor of values using the + COSMOS scalarization, proposed in `Scalable Pareto Front Approximation for Deep Multi-Objective + Learning `_. + + It returns a linear scalarization penalized by the cosine similarity between the values and the + preference vector: + + .. math:: + \sum_i r_i L_i - \lambda \frac{\sum_i r_i L_i}{\lVert r \rVert \, \lVert L \rVert}, + + where: + + - :math:`L_i` is the :math:`i`-th input value (the :math:`i`-th objective); + - :math:`r_i` is its preference weight (the ``weights`` parameter); + - :math:`\lambda` is the cosine-similarity penalty coefficient (the ``lambda_`` parameter); + - the subtracted term is :math:`\lambda \cos(r, L)`, which rewards aligning the vector of values + with the preference direction and is what spreads the approximated Pareto front. + + :param lambda_: The cosine-similarity penalty coefficient :math:`\lambda`. Must be non-negative. + A value of ``0`` reduces COSMOS to a plain linear scalarization. The paper uses values + ranging from ``0.01`` to ``8`` depending on the dataset, with no single best value. + :param weights: The preference vector :math:`r` applied to the values. It must have the same + shape as the values passed at call time. To approximate the whole Pareto front rather than a + single trade-off, it should be re-sampled from a Dirichlet distribution and reassigned before + every call, as in the paper, e.g. for ``m`` objectives + ``cosmos.weights = torch.distributions.Dirichlet(torch.ones(m)).sample()`` (a uniform + distribution over the probability simplex; a concentration smaller than one spreads the + samples toward the corners of the simplex). + + .. note:: + The full COSMOS method also conditions the model on the preference vector by concatenating it + to the input; that is a modeling choice left to the user. This scalarizer only implements the + objective. + """ + + def __init__(self, lambda_: float, weights: Tensor) -> None: + if lambda_ < 0.0: + raise ValueError( + f"Parameter `lambda_` should be non-negative. Found `lambda_ = {lambda_}`." + ) + + super().__init__() + self.lambda_ = lambda_ + self.weights = weights + + def forward(self, values: Tensor, /) -> Tensor: + if self.weights.shape != values.shape: + raise ValueError( + f"Parameter `weights` should have the same shape as `values`. Found " + f"`weights.shape = {tuple(self.weights.shape)}` and `values.shape = " + f"{tuple(values.shape)}`." + ) + + weighted_sum = (self.weights * values).sum() + cosine = cosine_similarity(self.weights.flatten(), values.flatten(), dim=0) + return weighted_sum - self.lambda_ * cosine + + def __repr__(self) -> str: + return f"{self.__class__.__name__}(lambda_={self.lambda_}, weights={self.weights!r})" diff --git a/tests/unit/scalarization/test_cosmos.py b/tests/unit/scalarization/test_cosmos.py new file mode 100644 index 00000000..d98369d1 --- /dev/null +++ b/tests/unit/scalarization/test_cosmos.py @@ -0,0 +1,88 @@ +import torch +from pytest import mark, raises +from torch import Tensor +from torch.nn.functional import cosine_similarity +from utils.tensors import tensor_ + +from torchjd.scalarization import COSMOS + +from ._asserts import ( + assert_grad_flow, + assert_permutation_invariant, + assert_returns_scalar, +) +from ._inputs import all_inputs + + +def _uniform(values: Tensor) -> Tensor: + """Uniform preference vector matching the shape of `values`.""" + return torch.full_like(values, 1.0 / values.numel()) + + +def test_value_aligned_gives_zero() -> None: + # Uniform weights on equal values are perfectly aligned, so cos(r, L) = 1. The result is the + # weighted sum (1) minus lambda (1): 0. + out = COSMOS(lambda_=1.0, weights=tensor_([0.5, 0.5]))(tensor_([1.0, 1.0])) + torch.testing.assert_close(out, tensor_(0.0)) + + +def test_value_lambda_zero_is_linear_scalarization() -> None: + # With lambda = 0 there is no cosine penalty, so COSMOS is just the weighted sum. + weights = tensor_([1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0]) + out = COSMOS(lambda_=0.0, weights=weights)(tensor_([1.0, 2.0, 4.0])) + torch.testing.assert_close(out, tensor_(7.0 / 3.0)) + + +def test_value_with_weights() -> None: + # With lambda = 0, only the linear term remains: 2*3 + 1*4 = 10. + out = COSMOS(lambda_=0.0, weights=tensor_([2.0, 1.0]))(tensor_([3.0, 4.0])) + torch.testing.assert_close(out, tensor_(10.0)) + + +def test_full_formula() -> None: + values = tensor_([1.0, 2.0, 4.0]) + weights = tensor_([0.5, 0.3, 0.2]) + lambda_ = 2.0 + expected = (weights * values).sum() - lambda_ * cosine_similarity(weights, values, dim=0) + torch.testing.assert_close(COSMOS(lambda_, weights=weights)(values), expected) + + +@mark.parametrize("values", all_inputs) +def test_expected_structure(values: Tensor) -> None: + assert_returns_scalar(COSMOS(lambda_=1.0, weights=_uniform(values)), values) + + +@mark.parametrize("values", all_inputs) +def test_grad_flow(values: Tensor) -> None: + assert_grad_flow(COSMOS(lambda_=1.0, weights=_uniform(values)), values) + + +@mark.parametrize("values", all_inputs) +def test_permutation_invariant(values: Tensor) -> None: + # With uniform weights, both the weighted sum and the cosine term are symmetric in the inputs. + assert_permutation_invariant(COSMOS(lambda_=1.0, weights=_uniform(values)), values) + + +def test_zero_values_returns_zero() -> None: + # `cosine_similarity` is numerically stable for the zero vector, so all-zero values give 0 (no + # nan), regardless of lambda. + out = COSMOS(lambda_=1.0, weights=tensor_([0.5, 0.5]))(tensor_([0.0, 0.0])) + torch.testing.assert_close(out, tensor_(0.0)) + + +@mark.parametrize("lambda_", [-1.0, -0.5]) +def test_raises_on_negative_lambda(lambda_: float) -> None: + with raises(ValueError): + COSMOS(lambda_=lambda_, weights=tensor_([0.5, 0.5])) + + +def test_raises_on_weights_shape_mismatch() -> None: + scalarizer = COSMOS(lambda_=1.0, weights=tensor_([1.0, 1.0, 1.0])) + with raises(ValueError): + scalarizer(tensor_([1.0, 1.0])) + + +def test_representations() -> None: + s = COSMOS(lambda_=0.5, weights=torch.tensor([0.5, 0.5])) + assert repr(s) == "COSMOS(lambda_=0.5, weights=tensor([0.5000, 0.5000]))" + assert str(s) == "COSMOS"