nn funnel classifier implementation

ahinterl94-th · ahinterl94-th · commit c37afbc71132 · 2025-08-25T12:39:33.000+02:00
diff --git a/src/spotpython/hyperdict/light_hyper_dict.json b/src/spotpython/hyperdict/light_hyper_dict.json
@@ -1123,5 +1123,132 @@
             "lower": 0,
             "upper": 3
         }
+    },
+    "NNFunnelClassifier": {
+        "l1": {
+            "type": "int",
+            "default": 3,
+            "transform": "transform_power_2_int",
+            "lower": 3,
+            "upper": 8
+        },
+        "num_layers": {
+            "type": "int",
+            "default": 3,
+            "transform": "None",
+            "lower": 2,
+            "upper": 10
+        },
+        "epochs": {
+            "type": "int",
+            "default": 4,
+            "transform": "transform_power_2_int",
+            "lower": 4,
+            "upper": 9
+        },
+        "batch_size": {
+            "type": "int",
+            "default": 4,
+            "transform": "transform_power_2_int",
+            "lower": 1,
+            "upper": 4
+        },
+        "act_fn": {
+            "levels": [
+                "Tanh",
+                "ReLU",
+                "LeakyReLU",
+                "ELU",
+                "Swish"
+            ],
+            "type": "factor",
+            "default": "ReLU",
+            "transform": "None",
+            "class_name": "spotpython.torch.activation",
+            "core_model_parameter_type": "instance()",
+            "lower": 0,
+            "upper": 5
+        },
+        "optimizer": {
+            "levels": [
+                "Adadelta",
+                "Adagrad",
+                "Adam",
+                "AdamW",
+                "SparseAdam",
+                "Adamax",
+                "ASGD",
+                "NAdam",
+                "RAdam",
+                "RMSprop",
+                "Rprop",
+                "SGD"
+            ],
+            "type": "factor",
+            "default": "SGD",
+            "transform": "None",
+            "class_name": "torch.optim",
+            "core_model_parameter_type": "str",
+            "lower": 0,
+            "upper": 11
+        },
+        "dropout_prob": {
+            "type": "float",
+            "default": 0.01,
+            "transform": "None",
+            "lower": 0.0,
+            "upper": 0.25
+        },
+        "lr_mult": {
+            "type": "float",
+            "default": 1.0,
+            "transform": "None",
+            "lower": 0.1,
+            "upper": 10.0
+        },
+        "patience": {
+            "type": "int",
+            "default": 2,
+            "transform": "transform_power_2_int",
+            "lower": 2,
+            "upper": 6
+        },
+        "initialization": {
+            "levels": [
+                "Default",
+                "Kaiming",
+                "Xavier"
+            ],
+            "type": "factor",
+            "default": "Default",
+            "transform": "None",
+            "core_model_parameter_type": "str",
+            "lower": 0,
+            "upper": 2
+        },
+        "batch_norm": {
+            "levels": [
+                0,
+                1
+            ],
+            "type": "factor",
+            "default": 0,
+            "transform": "None",
+            "core_model_parameter_type": "bool",
+            "lower": 0,
+            "upper": 1
+        },
+        "lr_sched": {
+            "levels": [
+                0,
+                1
+            ],
+            "type": "factor",
+            "default": 0,
+            "transform": "None",
+            "core_model_parameter_type": "bool",
+            "lower": 0,
+            "upper": 1
+        }
     }
 }
diff --git a/src/spotpython/light/classification/nn_funnel_classifier.py b/src/spotpython/light/classification/nn_funnel_classifier.py
@@ -0,0 +1,197 @@
+import lightning as L
+import torch
+import torch.nn.functional as F
+from torch import nn
+from spotpython.hyperparameters.optimizer import optimizer_handler
+import torchmetrics.functional.classification as TMclf
+import torch.optim as optim
+
+
+class NNFunnelClassifier(L.LightningModule):
+    """
+    Funnel-shaped MLP for classification (binary & multiclass).
+
+    Attributes:
+        l1 (int): neurons in first hidden layer.
+        num_layers (int): number of hidden layers.
+        epochs (int): number of training epochs (used for LR scheduler milestones).
+        batch_size (int): batch size (used for example_input_array).
+        initialization (str): (keine direkte Nutzung hier – identisch zur Vorlage).
+        act_fn (nn.Module): activation module (keine Ignorierung; bleibt tunebar).
+        optimizer (str): optimizer name for optimizer_handler.
+        dropout_prob (float): dropout probability.
+        lr_mult (float): learning-rate multiplier (passed to optimizer_handler).
+        patience (int): (nicht in dieser Klasse verwendet – wie Vorlage).
+        _L_in (int): input dimension.
+        _L_out (int): number of classes. If 1 => binary, else multiclass.
+        _torchmetric (str): optional metric name ("accuracy" default). Used for logging, not as loss.
+        layers (nn.Sequential): the network.
+    """
+
+    def __init__(
+        self,
+        l1: int,
+        num_layers: int,
+        epochs: int,
+        batch_size: int,
+        initialization: str,
+        act_fn: nn.Module,
+        optimizer: str,
+        dropout_prob: float,
+        lr_mult: float,
+        patience: int,
+        _L_in: int,
+        _L_out: int,
+        _torchmetric: str,
+        *args,
+        **kwargs,
+    ):
+        super().__init__()
+        self._L_in = _L_in
+        self._L_out = _L_out
+
+        # Metric (default accuracy) for logging
+        # Loss is always BCEWithLogitsLoss or CrossEntropyLoss
+        if _torchmetric is None:
+            _torchmetric = "accuracy"
+        self._torchmetric = _torchmetric.lower()
+
+        self._is_binary = self._L_out == 1
+
+        self.save_hyperparameters(ignore=["_L_in", "_L_out", "_torchmetric"])
+
+        # Dummy-Input für Graph
+        self.example_input_array = torch.zeros((batch_size, self._L_in))
+
+        if self.hparams.l1 < 8:
+            raise ValueError("l1 must be at least 8")
+
+        # Netzwerk wie in deiner Vorlage (Funnel, optional BatchNorm/Dropout)
+        layers = []
+        in_features = self._L_in
+        hidden_size = self.hparams.l1
+        out_dim = 1 if self._is_binary else self._L_out
+
+        for _ in range(self.hparams.num_layers):
+            out_features = max(hidden_size // 2, 8)  # min 8
+            layers.append(nn.Linear(in_features, hidden_size))
+
+            if getattr(self.hparams, "batch_norm", False):
+                layers.append(nn.BatchNorm1d(hidden_size))
+
+            layers.append(self.hparams.act_fn)
+            layers.append(nn.Dropout(self.hparams.dropout_prob))
+
+            in_features = hidden_size
+            hidden_size = out_features
+
+        layers.append(nn.Linear(in_features, out_dim))
+        self.layers = nn.Sequential(*layers)
+
+        # Loss nach Task
+        if self._is_binary:
+            # Combined Sigmoid + BCE
+            self._criterion = nn.BCEWithLogitsLoss()
+        else:
+            # Combined Softmax + CE
+            self._criterion = nn.CrossEntropyLoss()
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """
+        Returns raw logits. For binary: shape (N,1). For multiclass: (N,C).
+        """
+        return self.layers(x)
+
+    # internal helper to compute loss and metric
+    def _calculate_loss_and_metric(self, batch):
+        x, y = batch
+        logits = self(x)
+
+        if self._is_binary:
+            # y -> (N,1) float
+            y_t = y.view(-1, 1).float()
+            loss = self._criterion(logits, y_t)
+            # Für Metriken bereiten wir Schwellen-Preds vor
+            probs = torch.sigmoid(logits).view(-1)
+            preds = (probs >= 0.5).long()
+            target = y.view(-1).long()
+        else:
+            # CE expected Long targets (N,) with class indices
+            loss = self._criterion(logits, y.long())
+            probs = torch.softmax(logits, dim=1)
+            preds = torch.argmax(probs, dim=1)
+            target = y.long()
+
+        # metrices
+        metric_value = None
+        try:
+            if self._torchmetric == "accuracy":
+                if self._is_binary:
+                    # binary accuracy (0/1)
+                    metric_value = TMclf.accuracy(preds, target, task="binary")
+                else:
+                    metric_value = TMclf.accuracy(preds, target, task="multiclass", num_classes=self._L_out)
+            else:
+                # TBC: implement other metrics
+                pass
+        except Exception:
+            metric_value = None
+
+        return loss, metric_value
+
+    # --- Lightning Hooks ---
+    def training_step(self, batch: tuple) -> torch.Tensor:
+        loss, _ = self._calculate_loss_and_metric(batch)
+        return loss
+
+    def validation_step(self, batch: tuple, batch_idx: int, prog_bar: bool = False) -> torch.Tensor:
+        val_loss, val_metric = self._calculate_loss_and_metric(batch)
+        self.log("val_loss", val_loss, prog_bar=prog_bar)
+        self.log("hp_metric", val_loss, prog_bar=prog_bar)
+        if val_metric is not None:
+            self.log(f"val_{self._torchmetric}", val_metric, prog_bar=prog_bar)
+        return val_loss
+
+    def test_step(self, batch: tuple, batch_idx: int, prog_bar: bool = False) -> torch.Tensor:
+        test_loss, test_metric = self._calculate_loss_and_metric(batch)
+        self.log("test_loss", test_loss, prog_bar=prog_bar)
+        self.log("hp_metric", test_loss, prog_bar=prog_bar)
+        if test_metric is not None:
+            self.log(f"test_{self._torchmetric}", test_metric, prog_bar=prog_bar)
+        return test_loss
+
+    def predict_step(self, batch: tuple, batch_idx: int, prog_bar: bool = False):
+        x, y = batch
+        logits = self(x)
+        if self._is_binary:
+            probs = torch.sigmoid(logits).view(-1, 1)  # (N,1)
+            preds = (probs >= 0.5).long()
+        else:
+            probs = torch.softmax(logits, dim=1)  # (N,C)
+            preds = torch.argmax(probs, dim=1, keepdim=True)
+        # Debug-Ausgaben wie bei dir:
+        print(f"Predict step x: {x}")
+        print(f"Predict step y: {y}")
+        print(f"Predict step logits: {logits}")
+        print(f"Predict step probs: {probs}")
+        print(f"Predict step preds: {preds}")
+        return (x, y, logits, probs, preds)
+
+    def configure_optimizers(self) -> torch.optim.Optimizer:
+        optimizer = optimizer_handler(optimizer_name=self.hparams.optimizer, params=self.parameters(), lr_mult=self.hparams.lr_mult)
+
+        if getattr(self.hparams, "lr_sched", False):
+            num_milestones = 3
+            milestones = [int(self.hparams.epochs / (num_milestones + 1) * (i + 1)) for i in range(num_milestones)]
+            scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=milestones, gamma=0.1)
+            lr_scheduler_config = {
+                "scheduler": scheduler,
+                "interval": "epoch",
+                "frequency": 1,
+            }
+            return {
+                "optimizer": optimizer,
+                "lr_scheduler": lr_scheduler_config,
+            }
+        else:
+            return optimizer
diff --git a/src/spotpython/light/trainmodel.py b/src/spotpython/light/trainmodel.py
@@ -693,7 +693,7 @@ def train_model_xai(config: dict, fun_control: dict, timestamp: bool = True) ->
     X_train_tensor = torch.cat(X_train_list, dim=0).to(model.device)
     X_train_tensor.requires_grad_()
     X_val_tensor = torch.cat(X_val_list, dim=0).to(model.device)
-    X_val_tensor.requires_grad_()  
+    X_val_tensor.requires_grad_()
 
     # Dictionary to store attributions
     attributions_dict = {}
@@ -709,38 +709,39 @@ def train_model_xai(config: dict, fun_control: dict, timestamp: bool = True) ->
         N_total = X_val_tensor.size(0)
         N_attr = min(fun_control["xai_subset_size"], N_total)
         print(f"Using a subset of {N_attr} samples for attribution analysis out of {N_total} total samples.")
-        g = torch.Generator(device=X_val_tensor.device) 
-        g.manual_seed(fun_control["seed"])                              
-        perm = torch.randperm(N_total, generator=g,  
-                      device=X_val_tensor.device)[:N_attr]
-        X_val_tensor = X_val_tensor[perm]  
+        g = torch.Generator(device=X_val_tensor.device)
+        g.manual_seed(fun_control["seed"])
+        perm = torch.randperm(N_total, generator=g, device=X_val_tensor.device)[:N_attr]
+        X_val_tensor = X_val_tensor[perm]
 
-    # Ensure the model is in evaluation mode    
+    # Ensure the model is in evaluation mode
     model.eval()
 
+    target = fun_control.get("xai_target", None)
+
     if "KernelShap" in fun_control["xai_methods"]:
-            attr_ks = KernelShap(model)
-            n_features = X_val_tensor.shape[1]
-            samples_ks = min(2000, 100 * n_features)  # Adjust number of samples based on features, maximum 2000
-            print("KernelShap: Using", samples_ks, "samples for attribution.")
-            with torch.no_grad():
-                attribution_ks = attr_ks.attribute(X_val_tensor, baselines=baseline, n_samples=samples_ks, perturbations_per_eval=64)
-            ks_attr_test_sum = attribution_ks.detach().numpy().sum(axis=0)
-            l2_norm = np.linalg.norm(ks_attr_test_sum)
-            l2_normalized_ks = ks_attr_test_sum / l2_norm if l2_norm != 0 else ks_attr_test_sum
-            attributions_dict["KernelShap"] = l2_normalized_ks
+        attr_ks = KernelShap(model)
+        n_features = X_val_tensor.shape[1]
+        samples_ks = min(2000, 100 * n_features)  # Adjust number of samples based on features, maximum 2000
+        print("KernelShap: Using", samples_ks, "samples for attribution.")
+        with torch.no_grad():
+            attribution_ks = attr_ks.attribute(X_val_tensor, baselines=baseline, n_samples=samples_ks, perturbations_per_eval=64, target=target)
+        ks_attr_test_sum = attribution_ks.detach().numpy().sum(axis=0)
+        l2_norm = np.linalg.norm(ks_attr_test_sum)
+        l2_normalized_ks = ks_attr_test_sum / l2_norm if l2_norm != 0 else ks_attr_test_sum
+        attributions_dict["KernelShap"] = l2_normalized_ks
 
     with torch.enable_grad():
         if "IntegratedGradients" in fun_control["xai_methods"]:
             attr_ig = IntegratedGradients(model)
-            attribution_ig = attr_ig.attribute(X_val_tensor, baselines=baseline)
+            attribution_ig = attr_ig.attribute(X_val_tensor, baselines=baseline, target=target)
             vec = attribution_ig.detach().cpu().numpy().sum(axis=0)
             l2 = np.linalg.norm(vec)
             attributions_dict["IntegratedGradients"] = vec / l2 if l2 != 0 else vec
 
         if "DeepLift" in fun_control["xai_methods"]:
             attr_dl = DeepLift(model)
-            attribution_dl = attr_dl.attribute(X_val_tensor, baselines=baseline)
+            attribution_dl = attr_dl.attribute(X_val_tensor, baselines=baseline, target=target)
             dl_attr_test_sum = attribution_dl.detach().numpy().sum(axis=0)
             l2_norm = np.linalg.norm(dl_attr_test_sum)
             l2_normalized_dl = dl_attr_test_sum / l2_norm if l2_norm != 0 else dl_attr_test_sum