0.14.36

bartzbeielstein · bartzbeielstein · commit bb8f7f047299 · 2024-07-10T11:24:02.000+02:00
Added examples and tests to scaler.py
diff --git a/pyproject.toml b/pyproject.toml
@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "spotpython"
-version = "0.14.35"
+version = "0.14.36"
 authors = [
   { name="T. Bartz-Beielstein", email="tbb@bartzundbartz.de" }
 ]
diff --git a/src/spotPython/utils/scaler.py b/src/spotPython/utils/scaler.py
@@ -4,29 +4,58 @@
 class TorchStandardScaler:
     """
     A class for scaling data using standardization with torch tensors.
+    This scaler computes the mean and standard deviation on a dataset so that
+    it can later be used to scale the data using the computed mean and standard deviation.
+
+    Attributes:
+        mean (torch.Tensor): The mean value computed over the fitted data.
+        std (torch.Tensor): The standard deviation computed over the fitted data.
+
+    Examples:
+        >>> import torch
+        >>> from spotPython.utils.scaler import TorchStandardScaler
+        # Create a sample tensor
+        >>> tensor = torch.rand((10, 3))  # Random tensor with shape (10, 3)
+        >>> scaler = TorchStandardScaler()
+        # Fit the scaler to the data
+        >>> scaler.fit(tensor)
+        # Transform the data using the fitted scaler
+        >>> transformed_tensor = scaler.transform(tensor)
+        >>> print(transformed_tensor)
+        # Using fit_transform method to fit and transform in one step
+        >>> another_tensor = torch.rand((10, 3))
+        >>> scaled_tensor = scaler.fit_transform(another_tensor)
+        >>> print(scaled_tensor)
     """
 
-    def fit(self, x):
+    def __init__(self):
+        """
+        Initializes the TorchStandardScaler class without any pre-defined mean and std.
+        """
+        self.mean = None
+        self.std = None
+
+    def fit(self, x: torch.Tensor) -> None:
         """
         Compute the mean and standard deviation of the input tensor.
 
         Args:
-            x (torch.Tensor): The input tensor.
+            x (torch.Tensor): The input tensor, expected shape [n_samples, n_features]
 
         Raises:
             TypeError: If the input is not a torch tensor.
         """
         if not torch.is_tensor(x):
             raise TypeError("Input should be a torch tensor")
-        self.mean = x.mean(0, keepdim=True)
-        self.std = x.std(0, unbiased=False, keepdim=True)
+        self.mean = x.mean(dim=0, keepdim=True)
+        self.std = x.std(dim=0, unbiased=False, keepdim=True)
 
-    def transform(self, x):
+    def transform(self, x: torch.Tensor) -> torch.Tensor:
         """
         Scale the input tensor using the computed mean and standard deviation.
 
         Args:
-            x (torch.Tensor): The input tensor.
+            x (torch.Tensor): The input tensor to be transformed, expected shape [n_samples, n_features]
 
         Returns:
             torch.Tensor: The scaled tensor.
@@ -37,56 +66,77 @@ def transform(self, x):
         """
         if not torch.is_tensor(x):
             raise TypeError("Input should be a torch tensor")
-        if not hasattr(self, "mean") or not hasattr(self, "std"):
+        if self.mean is None or self.std is None:
             raise RuntimeError("Must fit scaler before transforming data")
         x = (x - self.mean) / (self.std + 1e-7)
         return x
 
-    def fit_transform(self, x):
+    def fit_transform(self, x: torch.Tensor) -> torch.Tensor:
         """
         Fit the scaler to the input tensor and then scale the tensor.
 
         Args:
-            x (torch.Tensor): The input tensor.
+            x (torch.Tensor): The input tensor, expected shape [n_samples, n_features]
 
         Returns:
             torch.Tensor: The scaled tensor.
-       
+
         Raises:
             TypeError: If the input is not a torch tensor.
         """
-        if not torch.is_tensor(x):
-            raise TypeError("Input should be a torch tensor")
         self.fit(x)
         return self.transform(x)
 
 
 class TorchMinMaxScaler:
     """
     A class for scaling data using min-max normalization with PyTorch tensors.
+    This scaler calculates the minimum and maximum values in the dataset to scale the data within a given range.
+
+    Attributes:
+        min (torch.Tensor): The minimum values computed over the fitted data.
+        max (torch.Tensor): The maximum values computed over the fitted data.
+
+    Examples:
+        >>> import torch
+        >>> from spotPython.utils.scaler import TorchMinMaxScaler
+        >>> scaler = TorchMinMaxScaler()
+        # Given a tensor
+        >>> tensor = torch.tensor([[1.0, 2.0], [3.0, 4.0]])
+        # Fit and transform the tensor using the scaler
+        >>> scaled_tensor = scaler.fit_transform(tensor)
+        >>> print(scaled_tensor)
+        # The output will be a tensor with values scaled between 0 and 1.
     """
 
-    def fit(self, x):
+    def __init__(self):
+        """
+        Initializes the TorchMinMaxScaler class without any predefined min and max.
+        """
+        self.min = None
+        self.max = None
+
+    def fit(self, x: torch.Tensor) -> None:
         """
         Compute the minimum and maximum value of the input tensor.
 
-        Parameters:
+        Args:
             x (torch.Tensor): The input tensor.
 
         Raises:
             TypeError: If the input is not a torch tensor.
         """
         if not torch.is_tensor(x):
             raise TypeError("Input should be a torch tensor")
-        self.min = x.min(0, keepdim=True).values
-        self.max = x.max(0, keepdim=True).values
+        self.min = x.min(dim=0, keepdim=True).values
+        self.max = x.max(dim=0, keepdim=True).values
 
-    def transform(self, x):
+    def transform(self, x: torch.Tensor) -> torch.Tensor:
         """
         Scale the input tensor using the computed minimum and maximum values.
 
         Args:
-            x (torch.Tensor): The input tensor.
+            x (torch.Tensor): The input tensor to be scaled.
 
         Returns:
             torch.Tensor: The scaled tensor.
@@ -97,12 +147,12 @@ def transform(self, x):
         """
         if not torch.is_tensor(x):
             raise TypeError("Input should be a torch tensor")
-        if not hasattr(self, "min") or not hasattr(self, "max"):
+        if self.min is None or self.max is None:
             raise RuntimeError("Must fit scaler before transforming data")
         x = (x - self.min) / (self.max - self.min + 1e-7)
         return x
 
-    def fit_transform(self, x):
+    def fit_transform(self, x: torch.Tensor) -> torch.Tensor:
         """
         Fit the scaler to the input tensor and then scale the tensor.
 
@@ -115,7 +165,5 @@ def fit_transform(self, x):
         Raises:
             TypeError: If the input is not a torch tensor.
         """
-        if not torch.is_tensor(x):
-            raise TypeError("Input should be a torch tensor")
         self.fit(x)
         return self.transform(x)
diff --git a/test/test_scaler.py b/test/test_scaler.py
@@ -1,9 +1,9 @@
 import torch
 from spotPython.data.lightdatamodule import LightDataModule
-from spotPython.data.csvdataset import CSVDataset
 from spotPython.utils.scaler import TorchStandardScaler, TorchMinMaxScaler
 from spotPython.data.california_housing import CaliforniaHousing
 
+
 def test_standard_scaler():
     """
     Test if TorchStandardScaler scales data around 0.
@@ -30,9 +30,10 @@ def test_standard_scaler():
     # Calculate the mean over all inputs
     mean_inputs = total_sum / total_count
     overall_mean = mean_inputs.mean()
-    #assert that overall mean goes against zero
+    # assert that overall mean goes against zero
     assert overall_mean < 0.00001
-    
+
+
 def test_min_max_scaler():
     """
     Test if TorchMinMaxScaler scales data between 0 and 1.
@@ -48,4 +49,3 @@ def test_min_max_scaler():
     for batch in loader():
         inputs, targets = batch
         assert torch.all(inputs >= 0) and torch.all(inputs <= 1), "Inputs are not scaled between 0 and 1"
-
diff --git a/test/test_torch_minmax_scaler.py b/test/test_torch_minmax_scaler.py
@@ -0,0 +1,53 @@
+import pytest
+import torch
+from spotPython.utils.scaler import TorchMinMaxScaler
+
+
+def test_min_max_scaler_fit():
+    """Test the min and max values computed by the `fit` method."""
+    tensor = torch.tensor([[2.0, 4.0], [1.0, 5.0], [3.0, 6.0]])
+    expected_min = torch.tensor([[1.0, 4.0]])
+    expected_max = torch.tensor([[3.0, 6.0]])
+
+    scaler = TorchMinMaxScaler()
+    scaler.fit(tensor)
+
+    torch.testing.assert_allclose(scaler.min, expected_min)
+    torch.testing.assert_allclose(scaler.max, expected_max)
+
+
+def test_min_max_scaler_transform():
+    """Test the output of the `transform` method."""
+    tensor = torch.tensor([[2.0, 4.0], [1.0, 5.0], [3.0, 6.0]])
+    scaler = TorchMinMaxScaler()
+    scaler.fit(tensor)
+    transformed = scaler.transform(tensor)
+
+    expected_transformed = torch.tensor([[0.5, 0.0], [0.0, 0.5], [1.0, 1.0]])
+
+    torch.testing.assert_allclose(transformed, expected_transformed)
+
+
+def test_min_max_scaler_fit_transform():
+    """Check that `fit_transform` method correctly fits and transforms the data."""
+    tensor = torch.tensor([[2.0, 4.0], [1.0, 5.0], [3.0, 6.0]])
+    scaler = TorchMinMaxScaler()
+    transformed = scaler.fit_transform(tensor)
+
+    expected_transformed = torch.tensor([[0.5, 0.0], [0.0, 0.5], [1.0, 1.0]])
+
+    torch.testing.assert_allclose(transformed, expected_transformed)
+
+
+def test_input_validation():
+    """Ensure type error is raised with incorrect input type."""
+    scaler = TorchMinMaxScaler()
+    with pytest.raises(TypeError):
+        scaler.fit([[1, 2], [3, 4]])  # Not a tensor, should raise error
+
+
+def test_transform_before_fit():
+    """Ensure appropriate error is raised when transform is called before fit."""
+    scaler = TorchMinMaxScaler()
+    with pytest.raises(RuntimeError):
+        scaler.transform(torch.tensor([[2.0, 4.0], [1.0, 5.0]]))
diff --git a/test/test_torch_standard_scaler.py b/test/test_torch_standard_scaler.py
@@ -0,0 +1,55 @@
+import pytest
+import torch
+from spotPython.utils.scaler import TorchStandardScaler
+
+
+def test_fit():
+    """Test the `fit` method for correct mean and std computation."""
+    tensor = torch.tensor([[1.0, 2.0], [3.0, 4.0]])
+    expected_mean = torch.tensor([[2.0, 3.0]])
+    expected_std = torch.tensor([[1.0, 1.0]])
+
+    scaler = TorchStandardScaler()
+    scaler.fit(tensor)
+
+    torch.testing.assert_allclose(scaler.mean, expected_mean)
+    torch.testing.assert_allclose(scaler.std, expected_std, atol=1e-7, rtol=1e-7)
+
+
+def test_transform():
+    """Test the `transform` method for correct data scaling."""
+    tensor = torch.tensor([[1.0, 2.0], [3.0, 4.0]])
+    scaler = TorchStandardScaler()
+    scaler.fit(tensor)
+    transformed = scaler.transform(tensor)
+
+    expected_transformed = torch.tensor([[-1.0, -1.0], [1.0, 1.0]])
+
+    torch.testing.assert_allclose(transformed, expected_transformed)
+
+
+def test_fit_transform():
+    """Test the `fit_transform` method for combined fitting and transforming."""
+    tensor = torch.tensor([[1.0, 2.0], [3.0, 4.0]])
+    scaler = TorchStandardScaler()
+    transformed = scaler.fit_transform(tensor)
+
+    expected_transformed = torch.tensor([[-1.0, -1.0], [1.0, 1.0]])
+
+    torch.testing.assert_allclose(transformed, expected_transformed)
+
+
+def test_input_not_tensor():
+    """Test that a TypeError is raised if the input data is not a tensor."""
+    scaler = TorchStandardScaler()
+    with pytest.raises(TypeError):
+        scaler.fit([1.0, 2.0])  # Passing a list instead of a tensor
+
+
+def test_unfitted_transform():
+    """Test that a RuntimeError is raised if attempting to transform without fitting first."""
+    tensor = torch.tensor([[1.0, 2.0], [3.0, 4.0]])
+    scaler = TorchStandardScaler()
+
+    with pytest.raises(RuntimeError):
+        scaler.transform(tensor)

Original file line number	Diff line number	Diff line change
`@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"`
`7`	`7`
`8`	`8`	`[project]`
`9`	`9`	`name = "spotpython"`
`10`		`-version = "0.14.35"`
	`10`	`+version = "0.14.36"`
`11`	`11`	`authors = [`
`12`	`12`	`{ name="T. Bartz-Beielstein", email="tbb@bartzundbartz.de" }`
`13`	`13`	`]`