SimplexLab · ValerianRey · Jun 17, 2026 · Jun 16, 2026 · Jun 17, 2026 · Jun 17, 2026
@@ -8,6 +8,13 @@ changelog does not include internal changes that do not affect the user.
 
 ## [Unreleased]
 
+### Added
+
+- Added `COSMOS` from [Scalable Pareto Front Approximation for Deep Multi-Objective
+  Learning](https://arxiv.org/pdf/2103.13392) (ICDM 2021), a `Scalarizer` that combines a linear
+  scalarization with a cosine-similarity penalty pulling the vector of values toward a preference
+  direction.
+
 ## [0.15.0] - 2026-06-15
 
 ### Added

@@ -0,0 +1,7 @@
+:hide-toc:
+
+COSMOS
+======
+
+.. autoclass:: torchjd.scalarization.COSMOS
+    :members: __call__
@@ -15,6 +15,7 @@ Abstract base class
     :maxdepth: 1
 
     constant.rst
+    cosmos.rst
     dwa.rst
     famo.rst
     geometric_mean.rst

@@ -20,6 +20,7 @@
 """
 
 from ._constant import Constant
+from ._cosmos import COSMOS
 from ._dwa import DWA
 from ._famo import FAMO
 from ._geometric_mean import GeometricMean
@@ -33,6 +34,7 @@
 
 __all__ = [
     "Constant",
+    "COSMOS",
     "DWA",
     "FAMO",
     "GeometricMean",

@@ -0,0 +1,67 @@
+from torch import Tensor
+from torch.nn.functional import cosine_similarity
+
+from ._scalarizer_base import Scalarizer
+
+
+class COSMOS(Scalarizer):
+    r"""
+    :class:`~torchjd.scalarization.Scalarizer` that combines the input tensor of values using the
+    COSMOS scalarization, proposed in `Scalable Pareto Front Approximation for Deep Multi-Objective
+    Learning <https://arxiv.org/pdf/2103.13392>`_.
+
+    It returns a linear scalarization penalized by the cosine similarity between the values and the
+    preference vector:
+
+    .. math::
+        \sum_i r_i L_i - \lambda \frac{\sum_i r_i L_i}{\lVert r \rVert \, \lVert L \rVert},
+
+    where:
+
+    - :math:`L_i` is the :math:`i`-th input value (the :math:`i`-th objective);
+    - :math:`r_i` is its preference weight (the ``weights`` parameter);
+    - :math:`\lambda` is the cosine-similarity penalty coefficient (the ``lambda_`` parameter);
+    - the subtracted term is :math:`\lambda \cos(r, L)`, which rewards aligning the vector of values
+      with the preference direction and is what spreads the approximated Pareto front.
+
+    :param lambda_: The cosine-similarity penalty coefficient :math:`\lambda`. Must be non-negative.
+        A value of ``0`` reduces COSMOS to a plain linear scalarization. The paper uses values
+        ranging from ``0.01`` to ``8`` depending on the dataset, with no single best value.
+    :param weights: The preference vector :math:`r` applied to the values. It must have the same
+        shape as the values passed at call time. To approximate the whole Pareto front rather than a
+        single trade-off, it should be re-sampled from a Dirichlet distribution and reassigned before
+        every call, as in the paper, e.g. for ``m`` objectives
+        ``cosmos.weights = torch.distributions.Dirichlet(torch.ones(m)).sample()`` (a uniform
+        distribution over the probability simplex; a concentration smaller than one spreads the
+        samples toward the corners of the simplex).
+
+    .. note::
+        The full COSMOS method also conditions the model on the preference vector by concatenating it
+        to the input; that is a modeling choice left to the user. This scalarizer only implements the
+        objective.
+    """
+
+    def __init__(self, lambda_: float, weights: Tensor) -> None:
+        if lambda_ < 0.0:
+            raise ValueError(
+                f"Parameter `lambda_` should be non-negative. Found `lambda_ = {lambda_}`."
+            )
+
+        super().__init__()
+        self.lambda_ = lambda_
+        self.weights = weights
+
+    def forward(self, values: Tensor, /) -> Tensor:
+        if self.weights.shape != values.shape:
+            raise ValueError(
+                f"Parameter `weights` should have the same shape as `values`. Found "
+                f"`weights.shape = {tuple(self.weights.shape)}` and `values.shape = "
+                f"{tuple(values.shape)}`."
+            )
+
+        weighted_sum = (self.weights * values).sum()
+        cosine = cosine_similarity(self.weights.flatten(), values.flatten(), dim=0)
+        return weighted_sum - self.lambda_ * cosine
+
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(lambda_={self.lambda_}, weights={self.weights!r})"
@@ -0,0 +1,88 @@
+import torch
+from pytest import mark, raises
+from torch import Tensor
+from torch.nn.functional import cosine_similarity
+from utils.tensors import tensor_
+
+from torchjd.scalarization import COSMOS
+
+from ._asserts import (
+    assert_grad_flow,
+    assert_permutation_invariant,
+    assert_returns_scalar,
+)
+from ._inputs import all_inputs
+
+
+def _uniform(values: Tensor) -> Tensor:
+    """Uniform preference vector matching the shape of `values`."""
+    return torch.full_like(values, 1.0 / values.numel())
+
+
+def test_value_aligned_gives_zero() -> None:
+    # Uniform weights on equal values are perfectly aligned, so cos(r, L) = 1. The result is the
+    # weighted sum (1) minus lambda (1): 0.
+    out = COSMOS(lambda_=1.0, weights=tensor_([0.5, 0.5]))(tensor_([1.0, 1.0]))
+    torch.testing.assert_close(out, tensor_(0.0))
+
+
+def test_value_lambda_zero_is_linear_scalarization() -> None:
+    # With lambda = 0 there is no cosine penalty, so COSMOS is just the weighted sum.
+    weights = tensor_([1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0])
+    out = COSMOS(lambda_=0.0, weights=weights)(tensor_([1.0, 2.0, 4.0]))
+    torch.testing.assert_close(out, tensor_(7.0 / 3.0))
+
+
+def test_value_with_weights() -> None:
+    # With lambda = 0, only the linear term remains: 2*3 + 1*4 = 10.
+    out = COSMOS(lambda_=0.0, weights=tensor_([2.0, 1.0]))(tensor_([3.0, 4.0]))
+    torch.testing.assert_close(out, tensor_(10.0))
+
+
+def test_full_formula() -> None:
+    values = tensor_([1.0, 2.0, 4.0])
+    weights = tensor_([0.5, 0.3, 0.2])
+    lambda_ = 2.0
+    expected = (weights * values).sum() - lambda_ * cosine_similarity(weights, values, dim=0)
+    torch.testing.assert_close(COSMOS(lambda_, weights=weights)(values), expected)
+
+
+@mark.parametrize("values", all_inputs)
+def test_expected_structure(values: Tensor) -> None:
+    assert_returns_scalar(COSMOS(lambda_=1.0, weights=_uniform(values)), values)
+
+
+@mark.parametrize("values", all_inputs)
+def test_grad_flow(values: Tensor) -> None:
+    assert_grad_flow(COSMOS(lambda_=1.0, weights=_uniform(values)), values)
+
+
+@mark.parametrize("values", all_inputs)
+def test_permutation_invariant(values: Tensor) -> None:
+    # With uniform weights, both the weighted sum and the cosine term are symmetric in the inputs.
+    assert_permutation_invariant(COSMOS(lambda_=1.0, weights=_uniform(values)), values)
+
+
+def test_zero_values_returns_zero() -> None:
+    # `cosine_similarity` is numerically stable for the zero vector, so all-zero values give 0 (no
+    # nan), regardless of lambda.
+    out = COSMOS(lambda_=1.0, weights=tensor_([0.5, 0.5]))(tensor_([0.0, 0.0]))
+    torch.testing.assert_close(out, tensor_(0.0))
+
+
+@mark.parametrize("lambda_", [-1.0, -0.5])
+def test_raises_on_negative_lambda(lambda_: float) -> None:
+    with raises(ValueError):
+        COSMOS(lambda_=lambda_, weights=tensor_([0.5, 0.5]))
+
+
+def test_raises_on_weights_shape_mismatch() -> None:
+    scalarizer = COSMOS(lambda_=1.0, weights=tensor_([1.0, 1.0, 1.0]))
+    with raises(ValueError):
+        scalarizer(tensor_([1.0, 1.0]))
+
+
+def test_representations() -> None:
+    s = COSMOS(lambda_=0.5, weights=torch.tensor([0.5, 0.5]))
+    assert repr(s) == "COSMOS(lambda_=0.5, weights=tensor([0.5000, 0.5000]))"
+    assert str(s) == "COSMOS"