v0.2.29

bartzbeielstein · bartzbeielstein · commit e6d4544bd57e · 2023-06-14T21:59:43.000+02:00
MAPK updated
diff --git a/pyproject.toml b/pyproject.toml
@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "spotPython"
-version = "0.2.28"
+version = "0.2.29"
 authors = [
   { name="T. Bartz-Beielstein", email="tbb@bartzundbartz.de" }
 ]
diff --git a/src/spotPython/torch/mapk.py b/src/spotPython/torch/mapk.py
@@ -1,66 +1,108 @@
+import torchmetrics
 import torch
-from torchmetrics import Metric
 import numpy as np
 
 
-class MAPK(Metric):
-    """Computes the mean average precision at k.
+class MAPK(torchmetrics.Metric):
+    """
+    Mean Average Precision at K (MAPK) metric.
+
+    This class inherits from the `Metric` class of the `torchmetrics` library.
+
     Args:
-        k: Number of predictions to consider
-        dist_sync_on_step: Whether to sync the output across all GPUs
-        device: Device to use for the computation
+        k (int): The number of top predictions to consider when calculating the metric.
+        dist_sync_on_step (bool): Whether to synchronize the metric states across processes during the forward pass.
+
+    Attributes:
+        total (torch.Tensor): The cumulative sum of the metric scores across all batches.
+        count (torch.Tensor): The number of batches processed.
+
     Example:
-        >>> from torchmetrics import MAPK
-        >>> target = torch.tensor([0, 1, 2, 3])
-        >>> preds = torch.tensor([[0, 1, 2, 3],
-        ...                       [0, 2, 1, 3],
-        ...                       [0, 1, 3, 2],
-        ...                       [0, 3, 1, 2]])
-        >>> mapk = MAPK(k=3)
-        >>> mapk(preds, target)
-        tensor(0.3333)
-
-        >>> y_pred = torch.tensor([[0.5, 0.2, 0.2],  # 0 is in top 2
-                     [0.3, 0.4, 0.2],  # 1 is in top 2
-                     [0.2, 0.4, 0.3],  # 2 is in top 2
-                     [0.7, 0.2, 0.1]]) # 2 isn't in top 2
-        >>> y_true = torch.tensor([0, 1, 2, 2])
-        >>> mapk_metric = MAPK(k=2)
-        >>> mapk_metric.update(y_pred, y_true)
-        >>> result = mapk_metric.compute()
-        >>> print(result) # tensor(0.37500)
+        from spotPython.torch.mapk import MAPK
+        import torch
+        mapk = MAPK(k=2)
+        target = torch.tensor([0, 1, 2, 2])
+        preds = torch.tensor(
+            [
+                [0.5, 0.2, 0.2],  # 0 is in top 2
+                [0.3, 0.4, 0.2],  # 1 is in top 2
+                [0.2, 0.4, 0.3],  # 2 is in top 2
+                [0.7, 0.2, 0.1],  # 2 isn't in top 2
+            ]
+        )
+        mapk.update(preds, target)
+        print(mapk.compute()) # tensor(0.6250)
     """
 
-    def __init__(self, k=3, dist_sync_on_step=False, device=None):
-        super().__init__(dist_sync_on_step=dist_sync_on_step, device=device)
+    def __init__(self, k=10, dist_sync_on_step=False):
+        super().__init__(dist_sync_on_step=dist_sync_on_step)
         self.k = k
-        self.add_state("actual", default=[], dist_reduce_fx="cat")
-        self.add_state("predicted", default=[], dist_reduce_fx="cat")
+        self.add_state("total", default=torch.tensor(0.0), dist_reduce_fx="sum")
+        self.add_state("count", default=torch.tensor(0), dist_reduce_fx="sum")
+
+    def update(self, predicted: torch.Tensor, actual: torch.Tensor):
+        """
+        Update the state variables with a new batch of data.
 
-    def update(self, y_pred: torch.Tensor, y: torch.Tensor):
-        sorted_prediction_ids = np.argsort(-y_pred.cpu().numpy(), axis=1)
-        top_k_prediction_ids = sorted_prediction_ids[:, : self.k]
-        self.actual.append(y.cpu().numpy().reshape(-1, 1))
-        self.predicted.append(top_k_prediction_ids)
+        Args:
+            predicted (torch.Tensor): A 2D tensor containing the predicted scores for each class.
+            actual (torch.Tensor): A 1D tensor containing the ground truth labels.
+
+
+        Raises:
+            AssertionError: If `actual` is not a 1D tensor or if `predicted` is not a 2D tensor
+            or if `actual` and `predicted` do not have the same number of elements.
+        """
+        assert len(actual.shape) == 1, "actual must be a 1D tensor"
+        assert len(predicted.shape) == 2, "predicted must be a 2D tensor"
+        assert actual.shape[0] == predicted.shape[0], "actual and predicted must have the same number of elements"
+
+        # Convert actual to list of lists
+        actual = actual.tolist()
+        actual = [[a] for a in actual]
+
+        # Convert predicted to list of lists of indices sorted by confidence score
+        _, predicted = predicted.topk(k=self.k, dim=1)
+        predicted = predicted.tolist()
+
+        score = np.mean([self.apk(p, a, self.k) for p, a in zip(predicted, actual)])
+        self.total += score
+        self.count += 1
 
     def compute(self):
-        actual = np.concatenate(self.actual)
-        predicted = np.concatenate(self.predicted)
-        return self.mapk(actual, predicted)
+        """
+        Compute the mean average precision at k.
+
+        Returns:
+            float: The mean average precision at k.
+        """
+        return self.total / self.count
 
     @staticmethod
-    def apk(actual, predicted, k=10):
+    def apk(predicted, actual, k=10):
+        """
+        Calculate the average precision at k for a single pair of actual and predicted labels.
+
+        Args:
+            predicted (list): A list of predicted labels.
+            actual (list): A list of ground truth labels.
+            k (int): The number of top predictions to consider.
+
+        Returns:
+            float: The average precision at k.
+        """
+        if not actual:
+            return 0.0
+
         if len(predicted) > k:
             predicted = predicted[:k]
+
         score = 0.0
         num_hits = 0.0
+
         for i, p in enumerate(predicted):
             if p in actual and p not in predicted[:i]:
                 num_hits += 1.0
                 score += num_hits / (i + 1.0)
-        if not actual:
-            return 0.0
-        return score / min(len(actual), k)
 
-    def mapk(self, actual, predicted):
-        return np.mean([self.apk(a, p, self.k) for a, p in zip(actual, predicted)])
+        return score / min(len(actual), k)
diff --git a/src/spotPython/torch/traintest.py b/src/spotPython/torch/traintest.py
@@ -157,7 +157,7 @@ def evaluate_cv(
                 metric_name = "Metric"
                 if metric is not None:
                     metric_name = type(metric).__name__
-                    print(f"{metric_name} value on hold-out data: {metric_values[fold]}")
+                    # print(f"{metric_name} value on hold-out data: {metric_values[fold]}")
                 if writer is not None:
                     writer.add_scalars(
                         "evaluate_cv fold:" + str(fold + 1) + ". Train & Val Loss and Val Metric" + writerId,

Original file line number	Diff line number	Diff line change
`@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"`
`7`	`7`
`8`	`8`	`[project]`
`9`	`9`	`name = "spotPython"`
`10`		`-version = "0.2.28"`
	`10`	`+version = "0.2.29"`
`11`	`11`	`authors = [`
`12`	`12`	`{ name="T. Bartz-Beielstein", email="tbb@bartzundbartz.de" }`
`13`	`13`	`]`