Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,13 @@ changelog does not include internal changes that do not affect the user.

## [Unreleased]

### Added

- Added `COSMOS` from [Scalable Pareto Front Approximation for Deep Multi-Objective
Learning](https://arxiv.org/pdf/2103.13392) (ICDM 2021), a `Scalarizer` that combines a linear
scalarization with a cosine-similarity penalty pulling the vector of values toward a preference
direction.

## [0.15.0] - 2026-06-15

### Added
Expand Down
7 changes: 7 additions & 0 deletions docs/source/docs/scalarization/cosmos.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
:hide-toc:

COSMOS
======

.. autoclass:: torchjd.scalarization.COSMOS
:members: __call__
1 change: 1 addition & 0 deletions docs/source/docs/scalarization/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ Abstract base class
:maxdepth: 1

constant.rst
cosmos.rst
dwa.rst
famo.rst
geometric_mean.rst
Expand Down
2 changes: 2 additions & 0 deletions src/torchjd/scalarization/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
"""

from ._constant import Constant
from ._cosmos import COSMOS
from ._dwa import DWA
from ._famo import FAMO
from ._geometric_mean import GeometricMean
Expand All @@ -33,6 +34,7 @@

__all__ = [
"Constant",
"COSMOS",
"DWA",
"FAMO",
"GeometricMean",
Expand Down
67 changes: 67 additions & 0 deletions src/torchjd/scalarization/_cosmos.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
from torch import Tensor
from torch.nn.functional import cosine_similarity

from ._scalarizer_base import Scalarizer


class COSMOS(Scalarizer):
r"""
:class:`~torchjd.scalarization.Scalarizer` that combines the input tensor of values using the
COSMOS scalarization, proposed in `Scalable Pareto Front Approximation for Deep Multi-Objective
Learning <https://arxiv.org/pdf/2103.13392>`_.

It returns a linear scalarization penalized by the cosine similarity between the values and the
preference vector:

.. math::
\sum_i r_i L_i - \lambda \frac{\sum_i r_i L_i}{\lVert r \rVert \, \lVert L \rVert},

where:

- :math:`L_i` is the :math:`i`-th input value (the :math:`i`-th objective);
- :math:`r_i` is its preference weight (the ``weights`` parameter);
- :math:`\lambda` is the cosine-similarity penalty coefficient (the ``lambda_`` parameter);
- the subtracted term is :math:`\lambda \cos(r, L)`, which rewards aligning the vector of values
with the preference direction and is what spreads the approximated Pareto front.

:param lambda_: The cosine-similarity penalty coefficient :math:`\lambda`. Must be non-negative.
A value of ``0`` reduces COSMOS to a plain linear scalarization. The paper uses values
ranging from ``0.01`` to ``8`` depending on the dataset, with no single best value.
Comment thread
ppraneth marked this conversation as resolved.
:param weights: The preference vector :math:`r` applied to the values. It must have the same
shape as the values passed at call time. To approximate the whole Pareto front rather than a
single trade-off, it should be re-sampled from a Dirichlet distribution and reassigned before
every call, as in the paper, e.g. for ``m`` objectives
``cosmos.weights = torch.distributions.Dirichlet(torch.ones(m)).sample()`` (a uniform
distribution over the probability simplex; a concentration smaller than one spreads the
samples toward the corners of the simplex).

.. note::
The full COSMOS method also conditions the model on the preference vector by concatenating it
to the input; that is a modeling choice left to the user. This scalarizer only implements the
objective.
"""

def __init__(self, lambda_: float, weights: Tensor) -> None:
if lambda_ < 0.0:
raise ValueError(
f"Parameter `lambda_` should be non-negative. Found `lambda_ = {lambda_}`."
)

super().__init__()
self.lambda_ = lambda_
self.weights = weights

def forward(self, values: Tensor, /) -> Tensor:
if self.weights.shape != values.shape:
raise ValueError(
f"Parameter `weights` should have the same shape as `values`. Found "
f"`weights.shape = {tuple(self.weights.shape)}` and `values.shape = "
f"{tuple(values.shape)}`."
)

weighted_sum = (self.weights * values).sum()
cosine = cosine_similarity(self.weights.flatten(), values.flatten(), dim=0)
return weighted_sum - self.lambda_ * cosine

def __repr__(self) -> str:
return f"{self.__class__.__name__}(lambda_={self.lambda_}, weights={self.weights!r})"
88 changes: 88 additions & 0 deletions tests/unit/scalarization/test_cosmos.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import torch
from pytest import mark, raises
from torch import Tensor
from torch.nn.functional import cosine_similarity
from utils.tensors import tensor_

from torchjd.scalarization import COSMOS

from ._asserts import (
assert_grad_flow,
assert_permutation_invariant,
assert_returns_scalar,
)
from ._inputs import all_inputs


def _uniform(values: Tensor) -> Tensor:
"""Uniform preference vector matching the shape of `values`."""
return torch.full_like(values, 1.0 / values.numel())


def test_value_aligned_gives_zero() -> None:
# Uniform weights on equal values are perfectly aligned, so cos(r, L) = 1. The result is the
# weighted sum (1) minus lambda (1): 0.
out = COSMOS(lambda_=1.0, weights=tensor_([0.5, 0.5]))(tensor_([1.0, 1.0]))
torch.testing.assert_close(out, tensor_(0.0))


def test_value_lambda_zero_is_linear_scalarization() -> None:
# With lambda = 0 there is no cosine penalty, so COSMOS is just the weighted sum.
weights = tensor_([1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0])
out = COSMOS(lambda_=0.0, weights=weights)(tensor_([1.0, 2.0, 4.0]))
torch.testing.assert_close(out, tensor_(7.0 / 3.0))


def test_value_with_weights() -> None:
# With lambda = 0, only the linear term remains: 2*3 + 1*4 = 10.
out = COSMOS(lambda_=0.0, weights=tensor_([2.0, 1.0]))(tensor_([3.0, 4.0]))
torch.testing.assert_close(out, tensor_(10.0))


def test_full_formula() -> None:
values = tensor_([1.0, 2.0, 4.0])
weights = tensor_([0.5, 0.3, 0.2])
lambda_ = 2.0
expected = (weights * values).sum() - lambda_ * cosine_similarity(weights, values, dim=0)
torch.testing.assert_close(COSMOS(lambda_, weights=weights)(values), expected)


@mark.parametrize("values", all_inputs)
def test_expected_structure(values: Tensor) -> None:
assert_returns_scalar(COSMOS(lambda_=1.0, weights=_uniform(values)), values)


@mark.parametrize("values", all_inputs)
def test_grad_flow(values: Tensor) -> None:
assert_grad_flow(COSMOS(lambda_=1.0, weights=_uniform(values)), values)


@mark.parametrize("values", all_inputs)
def test_permutation_invariant(values: Tensor) -> None:
# With uniform weights, both the weighted sum and the cosine term are symmetric in the inputs.
assert_permutation_invariant(COSMOS(lambda_=1.0, weights=_uniform(values)), values)


def test_zero_values_returns_zero() -> None:
# `cosine_similarity` is numerically stable for the zero vector, so all-zero values give 0 (no
# nan), regardless of lambda.
out = COSMOS(lambda_=1.0, weights=tensor_([0.5, 0.5]))(tensor_([0.0, 0.0]))
torch.testing.assert_close(out, tensor_(0.0))


@mark.parametrize("lambda_", [-1.0, -0.5])
def test_raises_on_negative_lambda(lambda_: float) -> None:
with raises(ValueError):
COSMOS(lambda_=lambda_, weights=tensor_([0.5, 0.5]))


def test_raises_on_weights_shape_mismatch() -> None:
scalarizer = COSMOS(lambda_=1.0, weights=tensor_([1.0, 1.0, 1.0]))
with raises(ValueError):
scalarizer(tensor_([1.0, 1.0]))


def test_representations() -> None:
s = COSMOS(lambda_=0.5, weights=torch.tensor([0.5, 0.5]))
assert repr(s) == "COSMOS(lambda_=0.5, weights=tensor([0.5000, 0.5000]))"
assert str(s) == "COSMOS"
Loading