diff --git a/CARLA_Entropy/config_dos_CARLA.yaml b/CARLA_Entropy/config_dos_CARLA.yaml new file mode 100644 index 0000000..d3e6b10 --- /dev/null +++ b/CARLA_Entropy/config_dos_CARLA.yaml @@ -0,0 +1,23 @@ +run_steps: + attack: True + decode: True + evaluate: True + update: True + + +attack: + original_test_dir: "CARLA_dataset/DoS/Surrogate/test_images" + original_label_file: "CARLA_dataset/DoS/Surrogate/test_labels.txt" + original_tracksheet: "CARLA_dataset/DoS/Surrogate/test_track.csv" + surrogate_model: "Trained_models/densenet161_surrogate_CARLA_DoS.pth" + output_dir: "perturbed_images_dos_CARLA" + +decode: + decoded_output_dir: "decoded_traffic_dos_CARLA" + +evaluate: + model_path: "Trained_models/CARLA_DoS_og_final_model.h5" + prediction_output_dir: "prediction_output_dos_CARLA" + +update: + tracksheet_dir: "tracksheets_CARLA" diff --git a/CARLA_Entropy/config_spoof_CARLA.yaml b/CARLA_Entropy/config_spoof_CARLA.yaml new file mode 100644 index 0000000..6227d6d --- /dev/null +++ b/CARLA_Entropy/config_spoof_CARLA.yaml @@ -0,0 +1,23 @@ +run_steps: + attack: True + decode: True + evaluate: True + update: True + + +attack: + original_test_dir: "CARLA_dataset/Spoof/Surrogate/merged_targated_Spoof_attack_images" + original_label_file: "CARLA_dataset/Spoof/Surrogate/test_labels.txt" + original_tracksheet: "CARLA_dataset/Spoof/Surrogate/test_track.csv" + surrogate_model: "Trained_models/densenet161_surrogate_CARLA_Spoof.pth" + output_dir: "perturbed_images_spoof_CARLA" + +decode: + decoded_output_dir: "decoded_traffic_spoof_CARLA" + +evaluate: + model_path: "Trained_models/CARLA_Spoof_og_final_model.h5" + prediction_output_dir: "prediction_output_spoof_CARLA" + +update: + tracksheet_dir: "tracksheets_CARLA" diff --git a/CARLA_Entropy/driver_dos_CARLA.py b/CARLA_Entropy/driver_dos_CARLA.py new file mode 100644 index 0000000..7920882 --- /dev/null +++ b/CARLA_Entropy/driver_dos_CARLA.py @@ -0,0 +1,175 @@ +import os + +import sys +import yaml +import argparse +from io import StringIO + + +# ------------------------------------------------- +# Load YAML Config +# ------------------------------------------------- +def load_config(path): + with open(path, "r") as f: + return yaml.safe_load(f) + + +# ------------------------------------------------- +# Main Pipeline +# ------------------------------------------------- +def pipeline_run(config): + + round_num = config["round"] + steps = config.get("run_steps", {}) + + print(f"\n==============================") + print(f"Running Round in driver file {round_num}") + print(f"==============================") + + # ========================================================== + # STEP 1: ATTACK + # ========================================================== + if steps.get("attack", False): + + from scripts.adversarial_attack_dos_final_CARLA import run as run_attack + + round_num = config["round"] + + # Copy base attack config + attack_cfg = config["attack"].copy() + + # Required keys for attack script + attack_cfg["output_path"] = config["attack"]["output_dir"] + attack_cfg["model_path"] = config["attack"]["surrogate_model"] + attack_cfg["rounds"] = round_num + + # Round-dependent logic + if round_num == 0: + attack_cfg["test_data_dir"] = config["attack"]["original_test_dir"] + attack_cfg["packet_level_data"] = config["attack"]["original_tracksheet"] + attack_cfg["test_label_file"] = config["attack"]["original_label_file"] + else: + attack_cfg["test_data_dir"] = config["attack"]["output_dir"] + attack_cfg["packet_level_data"] = ( + f'{config["update"]["tracksheet_dir"]}/dos_test_track_{round_num-1}.csv' + ) + attack_cfg["test_label_file"] = ( + f'{config["attack"]["output_dir"]}/labels_{round_num}.txt' + ) + + # --------------------------------------------------- + # Create output directory + # --------------------------------------------------- + attack_out = attack_cfg["output_path"] + os.makedirs(attack_out, exist_ok=True) + + print("\n=== Step 1: Adversarial Attack ===") + + # --------------------------------------------------- + # Capture stdout and save to stats file + # --------------------------------------------------- + stats_file = os.path.join(attack_out, f"stats_round_{round_num}.txt") + + old_stdout = sys.stdout + sys.stdout = mystream = StringIO() + + try: + run_attack(attack_cfg) + finally: + sys.stdout = old_stdout + + with open(stats_file, "w") as f: + f.write(mystream.getvalue()) + + print(f"[INFO] Attack log saved to {stats_file}") + + + + # ========================================================== + # STEP 2: DECODE + # ========================================================== + if steps.get("decode", False): + + from scripts.Traffic_decoder_dos_CARLA import run as run_decode + + decode_cfg = config["decode"].copy() + decode_cfg["rounds"] = round_num + decode_cfg["input_images"] = config["attack"]["output_dir"] + decode_cfg["csv_file"] = ( + f'{config["attack"]["output_dir"]}/packet_level_data_{round_num}.csv' + ) + decode_cfg["output_file"] = ( + f'{config["decode"]["decoded_output_dir"]}/traffic_{round_num}.txt' + ) + + os.makedirs(config["decode"]["decoded_output_dir"], exist_ok=True) + + print("\n=== Step 2: Traffic Decoder ===") + run_decode(decode_cfg) + + + # ========================================================== + # STEP 3: EVALUATION + # ========================================================== + if steps.get("evaluate", False): + + from scripts.evaluate_dos_CARLA import run as run_eval + + eval_cfg = config["evaluate"].copy() + eval_cfg["rounds"] = round_num + eval_cfg["model_path"] = config["evaluate"]["model_path"] + eval_cfg["traffic_path"] = ( + f'{config["decode"]["decoded_output_dir"]}/traffic_{round_num}.txt' + ) + eval_cfg["tracksheet"] = ( + f'{config["attack"]["output_dir"]}/packet_level_data_{round_num}.csv' + ) + eval_cfg["output_path"] = ( + f'{config["evaluate"]["prediction_output_dir"]}/prediction_output_{round_num}.csv' + ) + + os.makedirs(config["evaluate"]["prediction_output_dir"], exist_ok=True) + + print("\n=== Step 3: Evaluation ===") + run_eval(eval_cfg) + + + # ========================================================== + # STEP 4: UPDATE + # ========================================================== + if steps.get("update", False): + + from scripts.update_labels_dos_CARLA import run as run_update + + update_cfg = config["update"].copy() + + update_cfg["tracksheet"] = ( + f'{config["update"]["tracksheet_dir"]}/dos_test_track_{round_num}.csv' + ) + + # Label logic + update_cfg["label_file"] = config["attack"]["original_label_file"] + + update_cfg["updated_label_file"] = ( + f'{config["attack"]["output_dir"]}/labels_{round_num+1}.txt' + ) + + print("\n=== Step 4: Update Labels ===") + run_update(update_cfg) + + + +# ------------------------------------------------- +# MAIN +# ------------------------------------------------- +if __name__ == "__main__": + + parser = argparse.ArgumentParser() + parser.add_argument("--round", type=int, default=0) + parser.add_argument("--config", type=str, default="config_dos_CARLA.yaml") + args = parser.parse_args() + + cfg = load_config(args.config) + cfg["round"] = args.round + + pipeline_run(cfg) diff --git a/CARLA_Entropy/driver_spoof_CARLA.py b/CARLA_Entropy/driver_spoof_CARLA.py new file mode 100644 index 0000000..788cafa --- /dev/null +++ b/CARLA_Entropy/driver_spoof_CARLA.py @@ -0,0 +1,175 @@ +import os + +import sys +import yaml +import argparse +from io import StringIO + + +# ------------------------------------------------- +# Load YAML Config +# ------------------------------------------------- +def load_config(path): + with open(path, "r") as f: + return yaml.safe_load(f) + + +# ------------------------------------------------- +# Main Pipeline +# ------------------------------------------------- +def pipeline_run(config): + + round_num = config["round"] + steps = config.get("run_steps", {}) + + print(f"\n==============================") + print(f"Running Round in driver file {round_num}") + print(f"==============================") + + # ========================================================== + # STEP 1: ATTACK + # ========================================================== + if steps.get("attack", False): + + from scripts.adversarial_attack_spoof_CARLA import run as run_attack + + round_num = config["round"] + + # Copy base attack config + attack_cfg = config["attack"].copy() + + # Required keys for attack script + attack_cfg["output_path"] = config["attack"]["output_dir"] + attack_cfg["model_path"] = config["attack"]["surrogate_model"] + attack_cfg["rounds"] = round_num + + # Round-dependent logic + if round_num == 0: + attack_cfg["test_data_dir"] = config["attack"]["original_test_dir"] + attack_cfg["packet_level_data"] = config["attack"]["original_tracksheet"] + attack_cfg["test_label_file"] = config["attack"]["original_label_file"] + else: + attack_cfg["test_data_dir"] = config["attack"]["output_dir"] + attack_cfg["packet_level_data"] = ( + f'{config["update"]["tracksheet_dir"]}/spoof_test_track_{round_num-1}.csv' + ) + attack_cfg["test_label_file"] = ( + f'{config["attack"]["output_dir"]}/labels_{round_num}.txt' + ) + + # --------------------------------------------------- + # Create output directory + # --------------------------------------------------- + attack_out = attack_cfg["output_path"] + os.makedirs(attack_out, exist_ok=True) + + print("\n=== Step 1: Adversarial Attack ===") + + # --------------------------------------------------- + # Capture stdout and save to stats file + # --------------------------------------------------- + stats_file = os.path.join(attack_out, f"stats_round_{round_num}.txt") + + old_stdout = sys.stdout + sys.stdout = mystream = StringIO() + + try: + run_attack(attack_cfg) + finally: + sys.stdout = old_stdout + + with open(stats_file, "w") as f: + f.write(mystream.getvalue()) + + print(f"[INFO] Attack log saved to {stats_file}") + + + + # ========================================================== + # STEP 2: DECODE + # ========================================================== + if steps.get("decode", False): + + from scripts.Traffic_decoder_spoof_CARLA import run as run_decode + + decode_cfg = config["decode"].copy() + decode_cfg["rounds"] = round_num + decode_cfg["input_images"] = config["attack"]["output_dir"] + decode_cfg["csv_file"] = ( + f'{config["attack"]["output_dir"]}/packet_level_data_{round_num}.csv' + ) + decode_cfg["output_file"] = ( + f'{config["decode"]["decoded_output_dir"]}/traffic_{round_num}.txt' + ) + + os.makedirs(config["decode"]["decoded_output_dir"], exist_ok=True) + + print("\n=== Step 2: Traffic Decoder ===") + run_decode(decode_cfg) + + + # ========================================================== + # STEP 3: EVALUATION + # ========================================================== + if steps.get("evaluate", False): + + from scripts.evaluate_spoof_CARLA import run as run_eval + + eval_cfg = config["evaluate"].copy() + eval_cfg["rounds"] = round_num + eval_cfg["model_path"] = config["evaluate"]["model_path"] + eval_cfg["traffic_path"] = ( + f'{config["decode"]["decoded_output_dir"]}/traffic_{round_num}.txt' + ) + eval_cfg["tracksheet"] = ( + f'{config["attack"]["output_dir"]}/packet_level_data_{round_num}.csv' + ) + eval_cfg["output_path"] = ( + f'{config["evaluate"]["prediction_output_dir"]}/prediction_output_{round_num}.csv' + ) + + os.makedirs(config["evaluate"]["prediction_output_dir"], exist_ok=True) + + print("\n=== Step 3: Evaluation ===") + run_eval(eval_cfg) + + + # ========================================================== + # STEP 4: UPDATE + # ========================================================== + if steps.get("update", False): + + from scripts.update_labels_spoof_CARLA import run as run_update + + update_cfg = config["update"].copy() + + update_cfg["tracksheet"] = ( + f'{config["update"]["tracksheet_dir"]}/spoof_test_track_{round_num}.csv' + ) + + # Label logic + update_cfg["label_file"] = config["attack"]["original_label_file"] + + update_cfg["updated_label_file"] = ( + f'{config["attack"]["output_dir"]}/labels_{round_num+1}.txt' + ) + + print("\n=== Step 4: Update Labels ===") + run_update(update_cfg) + + + +# ------------------------------------------------- +# MAIN +# ------------------------------------------------- +if __name__ == "__main__": + + parser = argparse.ArgumentParser() + parser.add_argument("--round", type=int, default=0) + parser.add_argument("--config", type=str, default="config_spoof_CARLA.yaml") + args = parser.parse_args() + + cfg = load_config(args.config) + cfg["round"] = args.round + + pipeline_run(cfg) diff --git a/CARLA_Entropy/networks/Inception_Resnet_V1.py b/CARLA_Entropy/networks/Inception_Resnet_V1.py new file mode 100644 index 0000000..c195402 --- /dev/null +++ b/CARLA_Entropy/networks/Inception_Resnet_V1.py @@ -0,0 +1,492 @@ +#!/usr/bin/env python3 +""" +Reduced Inception-ResNet V1 Model for Vehicle CAN Network Intrusion Detection + +This implementation creates a lightweight version of Inception-ResNet V1 architecture +specifically optimized for processing 29x29 binary CAN frame matrices. The model combines: +- Inception modules for multi-scale feature extraction +- Residual connections for gradient flow and training stability +- Aggressive dimensionality reduction for computational efficiency +- Binary classification for normal vs attack traffic detection + +Architecture Flow: +Input (29x29x1) → Stem → Inception-ResNet-A → Reduction-A → +Inception-ResNet-B → Reduction-B → Global Pooling → Dense → Output (2 classes) + +Key optimizations for CAN data: +- Reduced depth compared to standard Inception-ResNet +- Optimized for small input dimensions (29x29) +- Binary classification head for intrusion detection +""" +import os +os.environ['CUDA_VISIBLE_DEVICES'] = '' + +import tensorflow as tf +from tensorflow.keras.layers import (Input, Conv2D, MaxPooling2D, AveragePooling2D, + Concatenate, Add, Flatten, Dropout, Dense, Lambda) +from tensorflow.keras.models import Model +from tensorflow.keras.callbacks import Callback + +################################################### +# Custom Training Callback for Batch-Level Monitoring +################################################### +class BatchLossHistory(Callback): + """ + Custom Keras callback to record training loss at every batch iteration. + + This provides more granular monitoring than epoch-level tracking, allowing + for detailed analysis of training dynamics and convergence behavior. + Particularly useful for genetic algorithm experiments that need to track + training progress over iterations rather than epochs. + """ + + def on_train_begin(self, logs=None): + """ + Initialize tracking variables at the start of training. + + Args: + logs: Training logs dictionary (unused but required by Keras) + """ + self.batch_losses = [] # List to store (iteration, loss) tuples + self.iterations = 0 # Counter for total training iterations + + def on_batch_end(self, batch, logs=None): + """ + Record loss value after each training batch completes. + + Args: + batch: Current batch number within the epoch + logs: Dictionary containing batch metrics (loss, accuracy, etc.) + """ + self.iterations += 1 # Increment global iteration counter + # Store iteration number and corresponding loss value + self.batch_losses.append((self.iterations, logs.get('loss'))) + +################################################### +# Stem Block: Initial Feature Extraction +################################################### +def stem_block(inputs): + """ + Stem block for initial feature extraction from 29x29 CAN frame inputs. + + This block performs aggressive early feature extraction and dimensionality reduction: + 1. Extracts low-level features with small convolutions + 2. Reduces spatial dimensions while increasing channel depth + 3. Prepares features for subsequent Inception-ResNet blocks + + Architecture: + - Conv2D(64, 3x3, valid) → 29x29x1 → 27x27x64 + - Conv2D(64, 3x3, same) → 27x27x64 → 27x27x64 + - MaxPool2D(2x2, stride=2) → 27x27x64 → 13x13x64 + - Conv2D(128, 1x1, same) → 13x13x64 → 13x13x128 + + Args: + inputs: Input tensor of shape (batch_size, 29, 29, 1) + + Returns: + Tensor of shape (batch_size, 13, 13, 128) + """ + # First convolution with valid padding reduces spatial dimensions + # 29x29x1 → 27x27x64 (removes 2 pixels due to valid padding) + x = Conv2D(64, (3, 3), strides=1, padding='valid', activation='relu')(inputs) + + # Second convolution with same padding preserves spatial dimensions + # 27x27x64 → 27x27x64 (maintains size, extracts more complex features) + x = Conv2D(64, (3, 3), strides=1, padding='same', activation='relu')(x) + + # Max pooling for spatial downsampling (critical for computational efficiency) + # 27x27x64 → 13x13x64 (roughly halves spatial dimensions) + x = MaxPooling2D((2, 2), strides=2, padding='valid')(x) + + # 1x1 convolution to increase channel depth without affecting spatial dimensions + # 13x13x64 → 13x13x128 (doubles channel depth for richer feature representation) + x = Conv2D(128, (1, 1), strides=1, padding='same', activation='relu')(x) + + return x + +################################################### +# Inception-ResNet Block A: Multi-Scale Feature Extraction +################################################### +def inception_resnet_a_block(x, scale=0.1): + """ + Inception-ResNet-A block combining multi-scale convolutions with residual connections. + + This block performs parallel convolutions at different scales to capture features + at multiple receptive field sizes, then combines them with a residual connection + for improved gradient flow and training stability. + + Architecture branches: + - Branch 0: 1x1 conv (32 filters) → point-wise features + - Branch 1: 1x1 conv → 3x3 conv (32 filters) → local spatial features + - Branch 2: 1x1 conv → 3x3 conv → 3x3 conv (64 filters) → larger spatial features + + The residual connection adds the scaled combined branches back to the input, + enabling the network to learn incremental improvements to existing features. + + Args: + x: Input tensor of shape (batch_size, height, width, channels) + scale: Scaling factor for residual connection (0.1 for training stability) + + Returns: + Tensor with same spatial dimensions but potentially different channel depth + """ + # Branch 0: 1x1 convolution for point-wise feature extraction + # Captures channel-wise interactions without spatial aggregation + branch_0 = Conv2D(32, (1, 1), padding='same', activation='relu')(x) + + # Branch 1: 1x1 → 3x3 convolution chain for local spatial features + # 1x1 reduces channels, 3x3 captures local spatial patterns + branch_1 = Conv2D(32, (1, 1), padding='same', activation='relu')(x) + branch_1 = Conv2D(32, (3, 3), padding='same', activation='relu')(branch_1) + + # Branch 2: 1x1 → 3x3 → 3x3 convolution chain for larger receptive field + # Sequential 3x3 convolutions effectively create a 5x5 receptive field + # More efficient than direct 5x5 convolution + branch_2 = Conv2D(32, (1, 1), padding='same', activation='relu')(x) + branch_2 = Conv2D(48, (3, 3), padding='same', activation='relu')(branch_2) + branch_2 = Conv2D(64, (3, 3), padding='same', activation='relu')(branch_2) + + # Concatenate all branches along channel dimension + # Total channels: 32 + 32 + 64 = 128 + merged = Concatenate(axis=-1)([branch_0, branch_1, branch_2]) + + # 1x1 convolution to match input channel dimensions for residual addition + # This projection layer ensures dimensional compatibility + up = Conv2D(tf.keras.backend.int_shape(x)[-1], (1, 1), padding='same')(merged) + + # Scale the residual branch for training stability + # Scaling factor (0.1) prevents residual branch from dominating early in training + up = Lambda(lambda s: s * scale)(up) + + # Residual connection: add scaled features to input + # This enables gradient flow and allows learning of incremental improvements + x = Add()([x, up]) + + # Apply activation after residual addition + # ReLU activation introduces non-linearity after feature combination + x = tf.keras.layers.Activation('relu')(x) + + return x + +################################################### +# Reduction Block A: Spatial Downsampling with Feature Expansion +################################################### +def reduction_a_block(x): + """ + Reduction-A block for spatial downsampling while expanding channel depth. + + This block reduces spatial dimensions (width/height) while increasing the number + of feature channels. Multiple parallel branches ensure that information is + preserved during downsampling through different aggregation strategies. + + Architecture branches: + - Branch 0: Max pooling → preserves dominant features + - Branch 1: Direct 3x3 conv with stride=2 → learned downsampling + - Branch 2: 1x1 → 3x3 → 3x3 conv chain → complex feature extraction before downsampling + + Args: + x: Input tensor (typically 13x13x128 from stem block) + + Returns: + Tensor with reduced spatial dimensions and increased channels (6x6x448) + """ + # Branch 0: Max pooling for dominant feature preservation + # Stride=2 reduces spatial dimensions by half: 13x13 → 6x6 + # Preserves existing channel depth (128) + branch_0 = MaxPooling2D((3, 3), strides=2, padding='valid')(x) + + # Branch 1: Direct convolution with stride=2 for learned downsampling + # Simultaneously reduces spatial dimensions and extracts new features + # 13x13x128 → 6x6x160 + branch_1 = Conv2D(160, (3, 3), strides=2, padding='valid', activation='relu')(x) + + # Branch 2: Multi-stage convolution chain for complex feature extraction + # 1x1 conv reduces channels for computational efficiency + branch_2 = Conv2D(128, (1, 1), padding='same', activation='relu')(x) + # 3x3 conv with same padding maintains spatial dimensions + branch_2 = Conv2D(160, (3, 3), strides=1, padding='same', activation='relu')(branch_2) + # Final 3x3 conv with stride=2 for downsampling: 13x13 → 6x6 + branch_2 = Conv2D(160, (3, 3), strides=2, padding='valid', activation='relu')(branch_2) + + # Concatenate all branches along channel dimension + # Total channels: 128 (branch_0) + 160 (branch_1) + 160 (branch_2) = 448 + x = Concatenate(axis=-1)([branch_0, branch_1, branch_2]) + + return x + +################################################### +# Inception-ResNet Block B: High-Level Feature Processing +################################################### +def inception_resnet_b_block(x, scale=0.1): + """ + Inception-ResNet-B block for high-level feature extraction with asymmetric convolutions. + + This block operates on higher-level features (post-reduction) and uses asymmetric + convolutions (1x7 and 7x1) to capture elongated patterns efficiently. The asymmetric + approach is more parameter-efficient than square convolutions for certain patterns. + + Architecture branches: + - Branch 0: 1x1 conv (192 filters) → channel-wise feature extraction + - Branch 1: 1x1 → 1x7 → 7x1 conv chain → asymmetric spatial feature extraction + + Args: + x: Input tensor of shape (batch_size, height, width, 448) + scale: Scaling factor for residual connection (0.1 for stability) + + Returns: + Tensor with same spatial dimensions and channel depth + """ + # Branch 0: Simple 1x1 convolution for channel-wise feature transformation + # Captures cross-channel interactions without spatial aggregation + branch_0 = Conv2D(192, (1, 1), padding='same', activation='relu')(x) + + # Branch 1: Asymmetric convolution sequence for efficient spatial feature extraction + # 1x1 convolution for dimensionality reduction + branch_1 = Conv2D(128, (1, 1), padding='same', activation='relu')(x) + # 1x7 convolution captures horizontal patterns + branch_1 = Conv2D(160, (1, 7), padding='same', activation='relu')(branch_1) + # 7x1 convolution captures vertical patterns + # This asymmetric approach is more efficient than 7x7 convolution + branch_1 = Conv2D(192, (7, 1), padding='same', activation='relu')(branch_1) + + # Concatenate branches along channel dimension + # Total channels: 192 + 192 = 384 + merged = Concatenate(axis=-1)([branch_0, branch_1]) + + # 1x1 projection to match input channel dimensions for residual connection + up = Conv2D(tf.keras.backend.int_shape(x)[-1], (1, 1), padding='same')(merged) + + # Apply scaling to residual branch for training stability + up = Lambda(lambda s: s * scale)(up) + + # Residual connection: add scaled features to input + x = Add()([x, up]) + + # Apply activation after residual addition + x = tf.keras.layers.Activation('relu')(x) + + return x + +################################################### +# Reduction Block B: Final Spatial Downsampling +################################################### +def reduction_b_block(x): + """ + Reduction-B block for final spatial downsampling before global pooling. + + This block performs the final spatial reduction while dramatically increasing + channel depth. It prepares features for global pooling by creating a very + high-dimensional but spatially compact representation. + + Architecture branches: + - Branch 0: Max pooling → preserves dominant features + - Branch 1: Direct 3x3 conv with stride=2 → learned aggressive feature extraction + + Args: + x: Input tensor of shape (batch_size, 6, 6, 448) + + Returns: + Tensor of shape (batch_size, 2, 2, 896) + """ + # Branch 0: Max pooling preserves strongest activations + # 6x6x448 → 2x2x448 + branch_0 = MaxPooling2D((3, 3), strides=2, padding='valid')(x) + + # Branch 1: Aggressive feature extraction with large channel expansion + # 6x6x448 → 2x2x448 (maintains input channel depth) + # High channel count captures complex high-level patterns + branch_1 = Conv2D(448, (3, 3), strides=2, padding='valid', activation='relu')(x) + + # Concatenate branches for maximum feature preservation + # Total channels: 448 + 448 = 896 + x = Concatenate(axis=-1)([branch_0, branch_1]) + + return x + +################################################### +# Main Model Architecture Builder +################################################### +def build_reduced_inception_resnet(input_shape=(29, 29, 1), num_classes=2, dropout_rate=0.2): + """ + Build the complete reduced Inception-ResNet model for CAN intrusion detection. + + This function assembles all components into a complete neural network optimized + for binary classification of CAN network traffic (normal vs attack). + + Architecture Summary: + 1. Stem Block: 29x29x1 → 13x13x128 (initial feature extraction + reduction) + 2. Inception-ResNet-A: 13x13x128 → 13x13x128 (multi-scale feature extraction) + 3. Reduction-A: 13x13x128 → 6x6x448 (spatial reduction + channel expansion) + 4. Inception-ResNet-B: 6x6x448 → 6x6x448 (high-level asymmetric features) + 5. Reduction-B: 6x6x448 → 2x2x896 (final spatial reduction) + 6. Global Average Pooling: 2x2x896 → 1x1x896 (spatial aggregation) + 7. Classification Head: 896 → 2 (binary classification) + + Args: + input_shape: Shape of input CAN frames (default: 29x29x1) + num_classes: Number of output classes (default: 2 for binary classification) + dropout_rate: Dropout rate for regularization (default: 0.2) + + Returns: + Compiled Keras Model ready for training + """ + # Define input layer for 29x29 binary CAN frame matrices + inputs = Input(shape=input_shape) + + # Stage 1: Initial feature extraction and spatial reduction + # 29x29x1 → 13x13x128 + x = stem_block(inputs) + + # Stage 2: Multi-scale feature extraction with residual connections + # 13x13x128 → 13x13x128 (maintains spatial dimensions) + x = inception_resnet_a_block(x, scale=0.1) + + # Stage 3: First major spatial reduction with channel expansion + # 13x13x128 → 6x6x448 + x = reduction_a_block(x) + + # Stage 4: High-level feature extraction with asymmetric convolutions + # 6x6x448 → 6x6x448 (maintains spatial dimensions) + x = inception_resnet_b_block(x, scale=0.1) + + # Stage 5: Final spatial reduction with maximum channel expansion + # 6x6x448 → 2x2x896 + x = reduction_b_block(x) + + # Stage 6: Global spatial aggregation + # 2x2x896 → 1x1x896 (eliminates spatial dimensions entirely) + x = AveragePooling2D((2, 2), padding='valid')(x) + + # Stage 7: Flatten for dense layer processing + # 1x1x896 → 896-dimensional feature vector + x = Flatten()(x) + + # Stage 8: Regularization to prevent overfitting + # Randomly sets 20% of features to zero during training + x = Dropout(dropout_rate)(x) + + # Stage 9: Final classification layer + # 896 → 2 classes with softmax activation for probability distribution + outputs = Dense(num_classes, activation='softmax')(x) + + # Create and return the complete model + model = Model(inputs, outputs) + return model + +################################################### +# Model Wrapper Class for Training and Evaluation +################################################### +class Inception_Resnet_V1: + """ + Wrapper class for the reduced Inception-ResNet model providing training and evaluation utilities. + + This class encapsulates the model architecture and provides methods for: + - Model initialization with configurable hyperparameters + - Training with batch-level loss tracking + - Optional pre-trained weight loading + - Model summary and inspection + + The class is designed to integrate seamlessly with the genetic algorithm + adversarial attack framework and provides the batch-level loss tracking + required for detailed training analysis. + """ + + def __init__(self, epochs=10, batch_size=32, load_weights=False): + """ + Initialize the Inception-ResNet model with specified hyperparameters. + + Args: + epochs: Number of training epochs (default: 10) + batch_size: Batch size for training (default: 32) + load_weights: Whether to load pre-trained weights (default: False) + """ + # Store training hyperparameters + self.epochs = epochs + self.batch_size = batch_size + + # Build the reduced Inception-ResNet architecture + self.model = build_reduced_inception_resnet() + + # Optionally load pre-trained weights + if load_weights: + # Placeholder for weight loading - can be customized as needed + # Example: self.model.load_weights('path_to_pretrained_weights.h5') + pass + + def train(self, x_train, y_train, x_test, y_test, filename_prefix="", epochs_override=None): + """ + Train the model with batch-level loss tracking for detailed analysis. + + This method compiles the model, trains it on the provided data, and captures + detailed training metrics including per-batch loss values. This granular + tracking is essential for genetic algorithm experiments and training analysis. + + Args: + x_train: Training feature data (CAN frames) + y_train: Training labels (0=normal, 1=attack) + x_test: Test feature data (for validation during training) + y_test: Test labels + filename_prefix: Prefix for saved model filename + epochs_override: Override default epoch count if specified + + Returns: + tuple: (training_history, batch_loss_list) + - training_history: Keras training history object + - batch_loss_list: List of (iteration, loss) tuples for each batch + """ + # Use override epochs if provided, otherwise use instance default + epochs_to_run = epochs_override if epochs_override is not None else self.epochs + + # Compile model with Adam optimizer and sparse categorical crossentropy loss + # Adam optimizer: adaptive learning rate with momentum for stable training + # Sparse categorical crossentropy: efficient for integer class labels + self.model.compile( + optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), + loss='sparse_categorical_crossentropy', + metrics=['accuracy'] + ) + + # Initialize custom callback for batch-level loss tracking + batch_callback = BatchLossHistory() + + # Train the model with batch-level monitoring + history = self.model.fit( + x_train, y_train, + epochs=epochs_to_run, + batch_size=self.batch_size, + callbacks=[batch_callback] # Capture per-batch metrics + ) + + # Save the trained model with custom filename prefix + # This allows saving models for different attack types (DoS, Fuzzy, RPM) + self.model.save(filename_prefix + 'final_model.h5') + + # Return both epoch-level and batch-level training metrics + return history, batch_callback.batch_losses + + def summary(self): + """ + Display model architecture summary including layer details and parameter counts. + + Returns: + Model summary showing architecture, output shapes, and parameter counts + """ + return self.model.summary() + +################################################### +# Development and Testing Code +################################################### +# Uncomment the following lines for model architecture debugging and testing: +# if __name__ == "__main__": +# # Create model instance with sample hyperparameters +# instance = Inception_Resnet_V1(epochs=5, batch_size=32) +# +# # Display model architecture summary +# instance.summary() +# +# # Optional: Test with dummy data +# # import numpy as np +# # x_dummy = np.random.rand(100, 29, 29, 1) +# # y_dummy = np.random.randint(0, 2, 100) +# # history, batch_losses = instance.train(x_dummy, y_dummy, x_dummy, y_dummy) +# # print(f"Training completed. Final batch loss: {batch_losses[-1][1]:.4f}") diff --git a/CARLA_Entropy/run_rounds.sh b/CARLA_Entropy/run_rounds.sh new file mode 100755 index 0000000..1c27018 --- /dev/null +++ b/CARLA_Entropy/run_rounds.sh @@ -0,0 +1,12 @@ +#!/bin/bash +set -e + +TOTAL_ROUNDS=4 + +for ((i=0; i<$TOTAL_ROUNDS; i++)) +do + echo "=======================================" + echo "Running Round $i" + echo "=======================================" + python driver_dos_CARLA.py --round $i +done diff --git a/CARLA_Entropy/run_rounds_spoof.sh b/CARLA_Entropy/run_rounds_spoof.sh new file mode 100755 index 0000000..7eb7434 --- /dev/null +++ b/CARLA_Entropy/run_rounds_spoof.sh @@ -0,0 +1,12 @@ +#!/bin/bash +set -e + +TOTAL_ROUNDS=4 + +for ((i=0; i<$TOTAL_ROUNDS; i++)) +do + echo "=======================================" + echo "Running Round $i" + echo "=======================================" + python driver_spoof_CARLA.py --round $i +done diff --git a/CARLA_Entropy/scripts/Traffic_decoder_dos_CARLA.py b/CARLA_Entropy/scripts/Traffic_decoder_dos_CARLA.py new file mode 100644 index 0000000..ba93008 --- /dev/null +++ b/CARLA_Entropy/scripts/Traffic_decoder_dos_CARLA.py @@ -0,0 +1,391 @@ +import numpy as np +from PIL import Image +import os +import sys +import csv +import yaml + +# Constants +PIXEL_COLOR_MAP = { + (255, 255, 0): '4', # Yellow + (255, 0, 0): '3', # Red + (0, 255, 0): '2', # Green + (255, 255, 255): '1',# White + (0, 0, 0): '0' # Black +} + + +def process_image(image_path): + image = Image.open(image_path) + pixels = np.array(image) + + label_matrix = np.zeros((128, 128), dtype=np.uint8) + for rgb, value in PIXEL_COLOR_MAP.items(): + mask = np.all(pixels == rgb, axis=-1) + label_matrix[mask] = value + + data_array = label_matrix.tolist() + dataset = [] + + for row in data_array: + if row[0] == 0: # Frame row + # print("inside row") + n_row = row[::-1] + last_1_index = n_row.index(1) + last_1 = len(row) - 1 - last_1_index + binary_string = "".join(map(str, row[:last_1 + 1])) + # print("binary tsrning", binary_string) + # CAN ID (bits 1–11) + can_id = hex(int(binary_string[1:12], 2))[2:].zfill(4) + # print("canid", can_id) + # DLC (bits 15–18) + dlc = int(binary_string[15:19], 2) + # print("dlc", dlc) + # Correct CAN data extraction (fixed) + start_bit = 19 + end_bit = 19 + dlc * 8 + data_bits = binary_string[start_bit:end_bit] + + data_bytes = [ + hex(int(data_bits[i:i + 8], 2))[2:].zfill(2) + for i in range(0, len(data_bits), 8) + ] + + dataset.append({ + "can_id": can_id, + "dlc": dlc, + "data": data_bytes + }) + # print("dataset\n", dataset) + + return dataset + + + +def save_to_txt(dataset, traffic_file, packet_level_data,rounds): + + def convert_label(org_label, oop_label): + org_label = org_label.strip() + oop_label = oop_label.strip() + + # map I/M → A + if oop_label in ["I", "M", "Pi", "Pm"] and org_label =="A": + return "A" + + # raw_label "None" + pkt_label == 1 → A + if oop_label == "None" and org_label == "A": + return "A" + + # everything else → B + return "B" + + with open(traffic_file, 'w') as file, open(packet_level_data, 'r') as csv_file: + + file.write("timestamp,can_id,dlc,d0,d1,d2,d3,d4,d5,d6,d7,label\n") + # Read header ONCE + header = next(csv_file).strip().split(",") + + # Create lookup table: column_name → index + col_index = {name: idx for idx, name in enumerate(header)} + + # Validate required columns exist + required_cols = ["timestamp", "can_id", "original_label", "operation_label"] + for col in required_cols: + if col not in col_index: + raise KeyError(f"Column '{col}' not found in CSV header: {header}") + + # Now read each subsequent row + for data in dataset: + + line = csv_file.readline().strip() + if not line: + break # no more rows → stop + extra_data = line.split(",") + # print("extra-data",extra_data) + # Use column names instead of hardcoded [-3], [-2], etc. + timestamp = float(extra_data[col_index["timestamp"]]) + org_label = extra_data[col_index["original_label"]] # old 'label' + # print(org_label) + oop_label = extra_data[col_index["operation_label"]] # raw attack label (I/M/None) + # print(oop_label) + if rounds == 0: + final_label = convert_label(org_label, oop_label) + else: + pred_label = extra_data[col_index["pred_label"]] + # print(pred_label) + final_label = convert_label(pred_label, oop_label) + + data_bytes_str = ",".join(data["data"]) + + file.write( + f"{timestamp:.6f},{data['can_id']},{data['dlc']},{data_bytes_str},{final_label}\n" + ) + + +def process_multiple_images(image_folder): + + # if input_images == "dos_k12": + # image_folder = r"perturbed_images_dos_OTIDS" + # elif input_images == "test/perturbed_dos": + # image_folder = r"test/perturbed_dos" + # else: + # print("Invalid input. Please provide a valid filetype.") + # return + + image_paths = [os.path.join(image_folder, f) + for f in os.listdir(image_folder) + if f.endswith(".png")] + + image_paths.sort(key=lambda x: int(x.split('_')[-1].split('.')[0])) + + all_data = [] + + for image_path in image_paths: + dataset = process_image(image_path) + all_data.extend(dataset) + + return all_data + + +def run(params): + + # input_images = "dos_k12" + rounds = params["rounds"] + input_images = params["input_images"] + packet_level_data = params["csv_file"] + traffic_file = params["output_file"] + # if len(sys.argv) != 2: + # print("Usage: python file_name.py ") + # sys.exit(1) + + # input_images = sys.argv[1] + + # output_file = f"./decoded_traffic/traffic_{rounds}.txt" + # csv_file = "./blackbox_dos_k_12_nfd/packet_level_data_fixed.csv" + + all_data = process_multiple_images(input_images) + print("Decoded") + save_to_txt(all_data, traffic_file, packet_level_data,rounds) + print(f"Saved decoded_traffic_dos_CARLA/traffic_{rounds}.txt") + + +# if __name__ == "__main__": +# # Allow standalone execution +# cfg = yaml.safe_load(open("config_dos_OTIDS.yaml")) +# run(cfg["decode"]) + + +if __name__ == "__main__": + + import argparse + import yaml + + parser = argparse.ArgumentParser() + parser.add_argument("--config", type=str, default="config_dos_CARLA.yaml") + args = parser.parse_args() + + cfg = yaml.safe_load(open(args.config)) + + # Ensure attack section exists + if "decode" not in cfg: + raise ValueError("Config file must contain 'decode' section.") + + run(cfg["decode"]) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +# import numpy as np +# from PIL import Image +# import os +# import sys +# import csv +# import yaml + +# # Constants +# PIXEL_COLOR_MAP = { +# (255, 255, 0): '4', # Yellow +# (255, 0, 0): '3', # Red +# (0, 255, 0): '2', # Green +# (255, 255, 255): '1',# White +# (0, 0, 0): '0' # Black +# } + + +# def process_image(image_path): +# image = Image.open(image_path) +# pixels = np.array(image) + +# label_matrix = np.zeros((128, 128), dtype=np.uint8) +# for rgb, value in PIXEL_COLOR_MAP.items(): +# mask = np.all(pixels == rgb, axis=-1) +# label_matrix[mask] = value + +# data_array = label_matrix.tolist() +# dataset = [] + +# for row in data_array: +# if row[0] == 0: # Frame row +# # print("inside row") +# n_row = row[::-1] +# last_1_index = n_row.index(1) +# last_1 = len(row) - 1 - last_1_index +# binary_string = "".join(map(str, row[:last_1 + 1])) +# # print("binary tsrning", binary_string) +# # CAN ID (bits 1–11) +# can_id = hex(int(binary_string[1:12], 2))[2:].zfill(4) +# # print("canid", can_id) +# # DLC (bits 15–18) +# dlc = int(binary_string[15:19], 2) +# # print("dlc", dlc) +# # Correct CAN data extraction (fixed) +# start_bit = 19 +# end_bit = 19 + dlc * 8 +# data_bits = binary_string[start_bit:end_bit] + +# data_bytes = [ +# hex(int(data_bits[i:i + 8], 2))[2:].zfill(2) +# for i in range(0, len(data_bits), 8) +# ] + +# dataset.append({ +# "can_id": can_id, +# "dlc": dlc, +# "data": data_bytes +# }) +# # print("dataset", dataset) + +# return dataset + + + +# def save_to_txt(dataset, file_path, data_csv_file): + +# def convert_label(raw_label, pkt_label): +# raw_label = raw_label.strip() + +# # map I/M → A +# if raw_label in ["I", "M"]: +# return "A" + +# # raw_label "None" + pkt_label == 1 → A +# if raw_label == "None" and pkt_label == "1": +# return "A" + +# # everything else → B +# return "B" + +# with open(file_path, 'w') as file, open(data_csv_file, 'r') as csv_file: + +# file.write("timestamp,can_id,dlc,d0,d1,d2,d3,d4,d5,d6,d7,label\n") +# # Read header ONCE +# header = next(csv_file).strip().split(",") + +# # Create lookup table: column_name → index +# col_index = {name: idx for idx, name in enumerate(header)} + +# # Validate required columns exist +# required_cols = ["timestamp", "can_id", "label", "perturbation_type"] +# for col in required_cols: +# if col not in col_index: +# raise KeyError(f"Column '{col}' not found in CSV header: {header}") + +# # Now read each subsequent row +# for data in dataset: + +# line = csv_file.readline().strip() +# if not line: +# break # no more rows → stop + +# extra_data = line.split(",") + +# # Use column names instead of hardcoded [-3], [-2], etc. +# timestamp = float(extra_data[col_index["timestamp"]]) +# pkt_label = extra_data[col_index["label"]] # old 'label' +# raw_label = extra_data[col_index["perturbation_type"]] # raw attack label (I/M/None) + +# final_label = convert_label(raw_label, pkt_label) + +# data_bytes_str = ",".join(data["data"]) + +# file.write( +# f"{timestamp:.6f},{data['can_id']},{data['dlc']},{data_bytes_str},{final_label}\n" +# ) + + +# def process_multiple_images(input_images, output_file, csv_file): + +# if input_images == "dos_k12": +# image_folder = r"perturbed_images" +# elif input_images == "test80": +# image_folder = r"test80/perturbed" +# else: +# print("Invalid input. Please provide a valid filetype.") +# return + + +# image_paths = [os.path.join(image_folder, f) +# for f in os.listdir(image_folder) +# if f.endswith(".png")] + +# image_paths.sort(key=lambda x: int(x.split('_')[-1].split('.')[0])) + +# all_data = [] + +# for image_path in image_paths: +# dataset = process_image(image_path) +# all_data.extend(dataset) + +# save_to_txt(all_data, output_file, csv_file) + + + +# def run(params): + +# # input_images = "gear_k12" +# input_images = params["input_images"] +# csv_file = params["csv_file"] +# output_file = params["output_file"] +# # if len(sys.argv) != 2: +# # print("Usage: python file_name.py ") +# # sys.exit(1) + +# # input_images = sys.argv[1] + +# # output_file = f"./decoded_traffic/traffic_{rounds}.txt" +# # csv_file = "./blackbox_dos_k_12_nfd/packet_level_data_fixed.csv" + +# process_multiple_images(input_images, output_file, csv_file) +# print("Decoded") + + +# if __name__ == "__main__": +# # Allow standalone execution +# cfg = yaml.safe_load(open("config_dos.yaml")) +# run(cfg["decode"]) \ No newline at end of file diff --git a/CARLA_Entropy/scripts/Traffic_decoder_spoof_CARLA.py b/CARLA_Entropy/scripts/Traffic_decoder_spoof_CARLA.py new file mode 100644 index 0000000..c620c99 --- /dev/null +++ b/CARLA_Entropy/scripts/Traffic_decoder_spoof_CARLA.py @@ -0,0 +1,184 @@ +import numpy as np +from PIL import Image +import os +import sys +import csv +import yaml + +# Constants +PIXEL_COLOR_MAP = { + (255, 255, 0): '4', # Yellow + (255, 0, 0): '3', # Red + (0, 255, 0): '2', # Green + (255, 255, 255): '1',# White + (0, 0, 0): '0' # Black +} + + +def process_image(image_path): + image = Image.open(image_path) + pixels = np.array(image) + + label_matrix = np.zeros((128, 128), dtype=np.uint8) + for rgb, value in PIXEL_COLOR_MAP.items(): + mask = np.all(pixels == rgb, axis=-1) + label_matrix[mask] = value + + data_array = label_matrix.tolist() + dataset = [] + + for row in data_array: + if row[0] == 0: # Frame row + # print("inside row") + n_row = row[::-1] + last_1_index = n_row.index(1) + last_1 = len(row) - 1 - last_1_index + binary_string = "".join(map(str, row[:last_1 + 1])) + # print("binary tsrning", binary_string) + # CAN ID (bits 1–11) + can_id = hex(int(binary_string[1:12], 2))[2:].zfill(4) + # print("canid", can_id) + # DLC (bits 15–18) + dlc = int(binary_string[15:19], 2) + # print("dlc", dlc) + # Correct CAN data extraction (fixed) + start_bit = 19 + end_bit = 19 + dlc * 8 + data_bits = binary_string[start_bit:end_bit] + + data_bytes = [ + hex(int(data_bits[i:i + 8], 2))[2:].zfill(2) + for i in range(0, len(data_bits), 8) + ] + + dataset.append({ + "can_id": can_id, + "dlc": dlc, + "data": data_bytes + }) + # print("dataset\n", dataset) + + return dataset + + +def save_to_txt(dataset, traffic_file, packet_level_data,rounds): + + def convert_label(org_label, oop_label): + org_label = org_label.strip() + oop_label = oop_label.strip() + + # map I/M → A + if oop_label in ["I", "M", "Pi", "Pm"] and org_label == "A": + return "A" + + # raw_label "None" + pkt_label == 1 → A + if oop_label == "None" and org_label == "A": + return "A" + + # everything else → B + return "B" + + with open(traffic_file, 'w') as file, open(packet_level_data, 'r') as csv_file: + + file.write("timestamp,can_id,dlc,d0,d1,d2,d3,d4,d5,d6,d7,label\n") + # Read header ONCE + header = next(csv_file).strip().split(",") + + # Create lookup table: column_name → index + col_index = {name: idx for idx, name in enumerate(header)} + + # Validate required columns exist + required_cols = ["timestamp", "can_id", "original_label", "operation_label"] + for col in required_cols: + if col not in col_index: + raise KeyError(f"Column '{col}' not found in CSV header: {header}") + + # Now read each subsequent row + for data in dataset: + + line = csv_file.readline().strip() + if not line: + break # no more rows → stop + + extra_data = line.split(",") + + # Use column names instead of hardcoded [-3], [-2], etc. + timestamp = float(extra_data[col_index["timestamp"]]) + org_label = extra_data[col_index["original_label"]] # old 'label' + oop_label = extra_data[col_index["operation_label"]] # raw attack label (I/M/None) + if rounds == 0: + final_label = convert_label(org_label, oop_label) + else: + pred_label = extra_data[col_index["pred_label"]] + final_label = convert_label(pred_label, oop_label) + + data_bytes_str = ",".join(data["data"]) + + file.write( + f"{timestamp:.6f},{data['can_id']},{data['dlc']},{data_bytes_str},{final_label}\n" + ) + + + +def process_multiple_images(image_folder): + + # if input_images == "gear_k12_no_data": + # image_folder = r"perturbed_images_gear_no_data_OTIDS" + # else: + # print("Invalid input. Please provide a valid filetype.") + # return + + image_paths = [os.path.join(image_folder, f) + for f in os.listdir(image_folder) + if f.endswith(".png")] + + image_paths.sort(key=lambda x: int(x.split('_')[-1].split('.')[0])) + + all_data = [] + + for image_path in image_paths: + dataset = process_image(image_path) + all_data.extend(dataset) + + return all_data + + +def run(params): + + # input_images = "gear_k12" + rounds = params["rounds"] + input_images = params["input_images"] + packet_level_data = params["csv_file"] + traffic_file = params["output_file"] + # if len(sys.argv) != 2: + # print("Usage: python file_name.py ") + # sys.exit(1) + + # input_images = sys.argv[1] + + # output_file = f"./decoded_traffic/traffic_{rounds}.txt" + # csv_file = "./blackbox_dos_k_12_nfd/packet_level_data_fixed.csv" + + all_data = process_multiple_images(input_images) + print("Decoded") + save_to_txt(all_data, traffic_file, packet_level_data,rounds) + print(f"Saved decoded_traffic_spoof_CARLA/traffic_{rounds}.txt") + + +if __name__ == "__main__": + + import argparse + import yaml + + parser = argparse.ArgumentParser() + parser.add_argument("--config", type=str, default="config_spoof_CARLA.yaml") + args = parser.parse_args() + + cfg = yaml.safe_load(open(args.config)) + + # Ensure attack section exists + if "decode" not in cfg: + raise ValueError("Config file must contain 'decode' section.") + + run(cfg["decode"]) + diff --git a/CARLA_Entropy/scripts/__init__.py b/CARLA_Entropy/scripts/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/CARLA_Entropy/scripts/adversarial_attack_dos_final_CARLA.py b/CARLA_Entropy/scripts/adversarial_attack_dos_final_CARLA.py new file mode 100644 index 0000000..622134b --- /dev/null +++ b/CARLA_Entropy/scripts/adversarial_attack_dos_final_CARLA.py @@ -0,0 +1,1210 @@ +""" + Description: Multiple Injection and Modification in each iteration on RGB images using densenet161. + round 2, only modification no injection + no feedback +""" +import os +# os.environ["CUDA_VISIBLE_DEVICES"] = "1" # MUST COME FIRST + +import pandas as pd +import torch +import torch.nn as nn +import torch.nn.functional as F +import math +import yaml +import time +import torch.optim as optim +from torch.utils.data import DataLoader, TensorDataset +from torchvision import datasets, transforms, models +import numpy as np +import matplotlib.pyplot as plt +from PIL import Image +import torchvision.utils as vutils +from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay +from torchvision.utils import save_image +from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score +from collections import deque + +# Inception-ResNet Model +class InceptionStem(nn.Module): + def __init__(self): + super(InceptionStem, self).__init__() + self.stem = nn.Sequential( + nn.Conv2d(in_channels = 3, out_channels = 32, stride = 1, kernel_size = 3, padding = 'same'), + nn.Conv2d(in_channels = 32, out_channels = 32, stride = 1, kernel_size = 3, padding = 'valid'), + nn.MaxPool2d(kernel_size = 3, stride = 2, padding = 0), + nn.Conv2d(in_channels = 32, out_channels = 64, kernel_size = 1, stride = 1, padding = 'valid'), + nn.Conv2d(in_channels = 64, out_channels = 128, kernel_size = 3, stride = 1, padding = 'same'), + nn.Conv2d(in_channels = 128, out_channels = 128, kernel_size = 3, stride = 1, padding = 'same') + ) + + def forward(self, x): + stem_out = self.stem(x) + return stem_out + +class InceptionResNetABlock(nn.Module): + def __init__(self, in_channels = 128, scale=0.17): + super(InceptionResNetABlock, self).__init__() + self.scale = scale + self.branch0 = nn.Conv2d(in_channels, 32, kernel_size=1, stride=1, padding='same') + self.branch1 = nn.Sequential( + nn.Conv2d(in_channels, 32, kernel_size=1, stride=1, padding='same'), + nn.Conv2d(32, 32, kernel_size=3, stride=1, padding='same') + ) + self.branch2 = nn.Sequential( + nn.Conv2d(in_channels, 32, kernel_size=1, stride=1, padding='same'), + nn.Conv2d(32, 32, kernel_size=3, stride=1, padding='same'), + nn.Conv2d(32, 32, kernel_size=3, stride=1, padding='same') + ) + self.conv_up = nn.Conv2d(96, 128, kernel_size=1, stride=1, padding='same') + + def forward(self, x): + branch0 = self.branch0(x) + branch1 = self.branch1(x) + branch2 = self.branch2(x) + mixed = torch.cat([branch0, branch1, branch2], dim=1) + up = self.conv_up(mixed) + return F.relu(x + self.scale * up) + +class ReductionA(nn.Module): + def __init__(self, in_channels = 128): + super(ReductionA, self).__init__() + self.branch0 = nn.Conv2d(in_channels = in_channels, out_channels = 192, kernel_size = 3, stride = 2, padding = 'valid') + self.branch1 = nn.Sequential( + nn.Conv2d(in_channels = in_channels, out_channels = 96, kernel_size = 1, stride = 1, padding = 'same'), + nn.Conv2d(in_channels = 96, out_channels = 96, kernel_size = 3, stride = 1, padding = 'same'), + nn.Conv2d(in_channels = 96, out_channels = 128, kernel_size = 3, stride = 2, padding = 'valid') + ) + self.branch2 = nn.MaxPool2d(kernel_size = 3, stride = 2, padding = 0) + + def forward(self, x): + branch0 = self.branch0(x) + branch1 = self.branch1(x) + branch2 = self.branch2(x) + mixed = torch.cat([branch0, branch1, branch2], dim = 1) + return mixed + +class InceptionResNetBBlock(nn.Module): + def __init__(self, in_channels = 448, scale = 0.10): + super(InceptionResNetBBlock, self).__init__() + self.scale = scale + self.branch0 = nn.Conv2d(in_channels = in_channels, out_channels = 64, kernel_size = 1, stride = 1 , padding = 'same') + self.branch1 = nn.Sequential( + nn.Conv2d(in_channels = in_channels, out_channels = 64, kernel_size = 1, stride = 1, padding = 'same'), + nn.Conv2d(in_channels = 64, out_channels = 64, kernel_size = (1,3), stride = 1, padding = 'same'), + nn.Conv2d(in_channels = 64, out_channels = 64, kernel_size = (3,1), stride = 1, padding = 'same') + ) + self.conv_up = nn.Conv2d(in_channels = 128, out_channels = 448, kernel_size = 1, stride = 1, padding = 'same') + + + def forward(self, x): + branch0 = self.branch0(x) + branch1 = self.branch1(x) + mixed = torch.cat([branch0, branch1], dim = 1) + up = self.conv_up(mixed) + return F.relu(x + self.scale * up) + +class ReductionB(nn.Module): + def __init__(self): + super(ReductionB, self).__init__() + self.branch0 = nn.Sequential( + nn.Conv2d(in_channels = 448, out_channels = 128, kernel_size = 1, stride = 1, padding = 'same'), + nn.Conv2d(in_channels = 128, out_channels = 192, kernel_size = 3, stride = 1, padding = 'valid') + ) + self.branch1 = nn.Sequential( + nn.Conv2d(in_channels = 448, out_channels = 128, kernel_size = 1, stride = 1, padding = 'same'), + nn.Conv2d(in_channels = 128, out_channels = 128, kernel_size = 3, stride = 1, padding = 'valid') + ) + self.branch2 = nn.Sequential( + nn.Conv2d(in_channels = 448, out_channels = 128, kernel_size = 1, stride = 1, padding = 'same'), + nn.Conv2d(in_channels = 128, out_channels = 128, kernel_size = 3, stride = 1, padding = 'same'), + nn.Conv2d(in_channels = 128, out_channels = 128, kernel_size = 3, stride = 1, padding = 'valid') + ) + + self.branch3 = nn.MaxPool2d(kernel_size = 3, stride = 1, padding = 0) + + def forward(self, x): + branch0 = self.branch0(x) + branch1 = self.branch1(x) + branch2 = self.branch2(x) + branch3 = self.branch3(x) + mixed = torch.cat([branch0, branch1, branch2, branch3], dim = 1) + return mixed + +# Inception-ResNet Model +class InceptionResNetV1(nn.Module): + def __init__(self, num_classes=2): + super(InceptionResNetV1, self).__init__() + self.stem = InceptionStem() + self.a_block = InceptionResNetABlock() + self.b_block = InceptionResNetBBlock() + self.red_a = ReductionA() + self.red_b = ReductionB() + self.global_pool = nn.AdaptiveAvgPool2d((1,1)) + self.dropout = nn.Dropout(0.2) + self.fc = nn.Linear(896, num_classes) + + + def forward(self, x): + x = self.stem(x) + x = self.a_block(x) + x = self.red_a(x) + x = self.b_block(x) + x = self.red_b(x) + x = self.global_pool(x) + x = torch.flatten(x, 1) + x = self.dropout(x) + x = self.fc(x) + return F.log_softmax(x, dim = 1) + +def load_model(model_path): + # Load the pre-trained ResNet-18 model + + num_classes = 2 + + model = models.densenet161(weights=models.DenseNet161_Weights.DEFAULT) + model.classifier = nn.Linear(model.classifier.in_features, num_classes) + + # test_model = models.densenet161(weights=models.DenseNet161_Weights.DEFAULT) + # test_model.classifier = nn.Linear(test_model.classifier.in_features, num_classes) + + + #If the system has GPU + model.load_state_dict(torch.load(model_path, weights_only=True)) + # test_model.load_state_dict(torch.load(test_model_path, weights_only=True)) + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + # test_model = torch.jit.load(test_model_path, map_location=device) + # test_model.to(device) + # model = torch.jit.load(pre_trained_model_path, map_location=device) + + model = model.to(device) + # test_model = test_model.to(device) + + model.eval() + # test_model.eval() + + return model + + +data_transforms = { + 'test': transforms.Compose([transforms.ToTensor()]), + 'train': transforms.Compose([transforms.ToTensor()]) + } + +def load_labels(label_file): + """Load image labels from the label file.""" + labels = {} + with open(label_file, 'r') as file: + for line in file: + # Clean and split line into filename and label string + filename, label_str = line.strip().replace("'", "").replace('"', '').split(': ') + + # Split label_str by comma and take the last value + label = int(label_str.strip().split(',')[-1].strip()) + + labels[filename.strip()] = label + return labels + +def load_dataset(data_dir,label_file,device,is_train=True): + # Load datasets + image_labels = load_labels(label_file) + + # Load images and create lists for images and labels + images = [] + labels = [] + start_image_number = None + + for filename, label in image_labels.items(): + img_path = os.path.join(data_dir, filename) + if os.path.exists(img_path): + image = Image.open(img_path).convert("RGB") + if is_train: + image = data_transforms['train'](image) # Apply training transformations + else: + image = data_transforms['test'](image) # Apply testing transformations + # save_image(image, "test_image.png") + images.append(image) + labels.append(label) + + if start_image_number is None: + start_image_number = int(filename.split('_')[-1].split('.')[0]) + + # Create tensors and send them to the specified device + images_tensor = torch.stack(images) + labels_tensor = torch.tensor(labels) + + # Create DataLoader + dataset = TensorDataset(images_tensor, labels_tensor) + batch_size = 32 if is_train else 1 # Use larger batch size for training + data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=4) + + print(f'Loaded {len(images)} images.') + + return dataset, data_loader, start_image_number + +def calculate_crc(data): + """ + Calculate CRC-15 checksum for the given data. + """ + crc = 0x0000 + # CRC-15 polynomial + poly = 0x4599 + + for bit in data: + # XOR with the current bit shifted left by 14 bits + crc ^= (int(bit) & 0x01) << 14 + + for _ in range(15): + if crc & 0x8000: + crc = (crc << 1) ^ poly + else: + crc <<= 1 + + # Ensuring 15 bits + crc &= 0x7FFF + return crc + + +def print_image(img,n,pack): + img = img.detach() + img = img.squeeze().permute(1, 2, 0).cpu().numpy() # Convert to numpy format + # Normalize from [-1, 1] to [0, 1] for imshow + img = (img + 1.0) / 2.0 + img = np.clip(img, 0, 1) # Just in case + + plt.imshow(img, interpolation='none') + # plt.imshow(img, cmap='gray', interpolation='none') + if n == 1: + plt.title(f"Mask, Injection {pack})") + elif n == 2: + plt.title(f"Perturbed image, Injection{pack}") + plt.axis('off') + plt.show() + +def saving_image(img, name,output_path): + os.makedirs(output_path, exist_ok=True) + + # Construct the full path for the output image + output_path = os.path.join(output_path, f'perturbed_image_{name}.png') + + # Save the image to the specified path + save_image(img, output_path) + +def generate_mask(perturbed_data, modification_queue, injection_queue,prev_mod_queue, prev_inj_queue,rounds, I, M, Pi, Pm): + """ + Generate a binary perturbation mask for CAN-frame images using + budgeted injection and modification queues. + + Rows are selected from four queues (new injections, original + modifications, previously injected, previously modified) up to + their allocated budgets, without exceeding top_k. For all selected + rows, both ID and data bit regions are masked. + + Returns the perturbation mask along with selected injection and + modification row indices. + """ + sof_len = 1 + id_mask_length = 11 + mid_bits_length = 7 + data_bits_length = 64 + + batch_size, channels, height, width = perturbed_data.shape + id_start = sof_len + id_end = sof_len + id_mask_length + data_start = sof_len + id_mask_length + mid_bits_length + data_end = data_start + data_bits_length + + # Initialize mask + mask = torch.zeros_like(perturbed_data, dtype=torch.float32) + injection_rows = [] + modification_rows = [] + prev_modification_rows = [] + prev_injection_rows = [] + + def pop_k(queue, k): + selected = [] + for _ in range(min(k, len(queue))): + _, row = queue.popleft() + selected.append(row) + return selected + + + # 1. Select rows according to budgets + inj_rows = pop_k(injection_queue, I) + mod_rows = pop_k(modification_queue, M) + prev_inj_rows = pop_k(prev_inj_queue, Pi) + prev_mod_rows = pop_k(prev_mod_queue, Pm) + + # 2. Aggregate selections + injection_rows.extend(inj_rows) + modification_rows.extend(mod_rows) + prev_modification_rows.extend(prev_mod_rows) + prev_injection_rows.extend(prev_inj_rows) + + all_rows = injection_rows + modification_rows + prev_modification_rows + prev_injection_rows + + for row in all_rows: + for b in range(batch_size): + # ID bits + mask[b, :, row, id_start : id_end] = 1.0 + # Data bits + mask[b, :, row, data_start : data_end] = 1.0 + + + # for _ in range(top_k): + # if not injection_queue and not modification_queue: + # break # nothing left to pop + + # if modification_queue: + # mod_grad, mod_row = modification_queue[0] + # # Always prefer modification queue if it's not empty + # grad, row = modification_queue.popleft() + # modification_rows.append(row) + # p_type = "mod" + # elif injection_queue: # Only process injection queue if modification queue is empty + # inj_grad, inj_row = injection_queue[0] + # grad, row = injection_queue.popleft() + # injection_rows.append(row) + # p_type = "inj" + + # # Apply ID + Data masking for the selected row + # for b in range(batch_size): + # if p_type == "inj": + # mask[b, :, row, sof_len:sof_len + id_mask_length] = 1.0 # ID bits + # mask[b, :, row, sof_len + id_mask_length + mid_bits_length: + # sof_len + id_mask_length + mid_bits_length + data_bits_length] = 1.0 # Data bits + # else: + # mask[b, :, row, sof_len + id_mask_length + mid_bits_length: + # sof_len + id_mask_length + mid_bits_length + data_bits_length] = 1.0 # Data bits + + + # selected_total = len(injection_rows) + len(modification_rows) + len(prev_modification_rows) + len(prev_injection_rows) + # assert selected_total <= top_k, "Selected more rows than top_k" + + # print_image(mask,1,0) + return mask, injection_rows, modification_rows, prev_modification_rows, prev_injection_rows + +def bit_flip_attack_rgb(image, mask, data_grad, sign_data_grad): + """ + Bit-flip attack for RGB CAN images. + - Flips pixels based on sign of gradient: + If black ([0,0,0]) and sign_grad > 0 → flip to white ([1,1,1]) + If white ([1,1,1]) and sign_grad < 0 → flip to black ([0,0,0]) + - Works for ID bits and data bits separately with different top-k percentages. + """ + + perturbed_image = image.clone() # Start from original image + B, C, H, W = image.shape + ID_LEN = 11 + MID_LEN = 7 + DATA_LEN = 64 + id_start = 1 + id_end = id_start + ID_LEN + data_start = 1 + ID_LEN + MID_LEN + data_end = data_start + DATA_LEN + count_bit_flip_1 = 0 + count_bit_flip_0 = 0 + + for b in range(B): + rows = mask[b, 0].nonzero(as_tuple=True)[0] # Only use first channel for mask + rows = torch.unique(rows) + rows = torch.sort(rows, descending=True).values # Sort descending + + for row in rows: + # --- ID bits --- + id_pixels = perturbed_image[b, :, row, id_start:id_end] # Shape [3, ID_LEN] + # print("ID Pixels:", id_pixels) + id_grads = data_grad[b, :, row, id_start:id_end] # Shape [3, ID_LEN] + # print("ID gradient:", id_grads) + id_signs = sign_data_grad[b, :, row, id_start:id_end] # Shape [3, ID_LEN] + # print("ID Signs:", id_signs) + + # Collapse gradients to single value per bit (sum over channels) + id_scores = torch.sum(torch.abs(id_grads), dim=0) + # print("ID Scores: ", id_scores) + num_id_top = max(1, int(1.0 * ID_LEN)) + id_top_idx = torch.topk(id_scores, num_id_top).indices + # print("Top Index:", id_top_idx) + count_bit_flip = 0 + # print("ID before flipping: ", id_pixels.clone()) + for idx in id_top_idx: + # print("Index:", idx) + pixel = id_pixels[:, idx] # [R, G, B] + # print("Pixel:", pixel) + grad_sign = torch.sum(id_signs[:, idx]).item() # Combine channels' signs + grad_sign = (id_signs[0, idx] + id_signs[1, idx] + id_signs[2, idx]).item() + # print("Grad Sign:", grad_sign) + if grad_sign > 0: # Black → White + id_pixels[:, idx] = 1.0 + count_bit_flip += 1 + elif grad_sign < 0: # White → Black + id_pixels[:, idx] = 0.0 + count_bit_flip += 1 + + # print("Number of bitflip in ID: ", count_bit_flip) + # print("ID after flipping: ", id_pixels.clone()) + + # --- Data bits --- + + data_pixels = perturbed_image[b, :, row, data_start:data_end] # [3, DATA_LEN] + data_grads = data_grad[b, :, row, data_start:data_end] + data_signs = sign_data_grad[b, :, row, data_start:data_end] + + data_scores = torch.sum(torch.abs(data_grads), dim=0) + num_data_top = max(1, int(1.0 * DATA_LEN)) + data_top_idx = torch.topk(data_scores, num_data_top).indices + + # print("data before flipping: ", data_pixels.clone()) + for idx in data_top_idx: + pixel = data_pixels[:, idx] + # grad_sign = torch.sum(data_signs[:, idx]).item() + grad_sign = (data_signs[0, idx] + data_signs[1, idx] + data_signs[2, idx]).item() + if grad_sign > 0: + data_pixels[:, idx] = 1.0 + count_bit_flip_1 += 1 + elif grad_sign < 0: + data_pixels[:, idx] = 0.0 + count_bit_flip_0 += 1 + + # print("data after flipping: ", data_pixels.clone()) + + # Assign modified bits back + perturbed_image[b, :, row, id_start:id_end] = id_pixels + perturbed_image[b, :, row, data_start:data_end] = data_pixels + + # print("Number of bitflips_1 in Data: ", count_bit_flip_1) + # print("Numberof bitflips_0 in Data,",count_bit_flip_0) + perturbed_image = torch.clamp(perturbed_image, 0, 1) + + return perturbed_image + +def gradient_perturbation(image, perturbed_image,mask,existing_hex_ids, packet_level_data, image_no, injection_rows, modification_rows, prev_modification_rows, prev_injection_rows,rounds): + ID_LEN = 11 + MID_LEN = 7 + # mid_bits = '0001000' + + # Precompute existing IDs as integers + existing_int_ids = [int(h, 16) for h in existing_hex_ids] + # print(image.shape, mask.shape, perturbed_image.shape) + + for b in range(image.shape[0]): + totalRows = mask[b, 0].nonzero(as_tuple=True)[0] + totalRows = torch.unique(totalRows) + # totalRows = torch.sort(totalRows).values + totalRows = torch.sort(totalRows, descending=True).values # Sort descending + + # print(rows, flag) + for row in totalRows: + + if row in injection_rows: + flag = "injection" + elif row in modification_rows: + flag = "modification" + elif row in prev_modification_rows: + flag = "prev_mod" + elif row in prev_injection_rows: + flag = "prev_inj" + + + injection_row = row.item() + i = injection_row - 1 + packets_before_injection = [] + # print("Injection Row: ", injection_row) + + # Traverse upward until first pixel in the row is black + while i >= 0: + first_pixel = image[b, 0, i, 0].item() # First pixel in row i, channel 0 + second_pixel = image[b, 1, i, 0].item() # Second pixel in row i, channel 1 + third_pixel = image[b, 2, i, 0].item() # Third pixel in row i, channel 2 + # print(first_pixel, second_pixel, third_pixel) + if first_pixel == 0.0 and second_pixel == 0.0 and third_pixel == 0.0: + packets_before_injection.append(i) + i -= 1 + + image_packets = packet_level_data[packet_level_data["image_no"] == image_no] + # print("Image packets before injection:\n", image_packets) + target_index = len(packets_before_injection) - 1 + + # print("Target index for injection:", target_index, flag, injection_row,len(image_packets)) + + if flag == 'injection': + start_row = packets_before_injection[0] + end_row = injection_row + + red_pixel_count = 0 + for row_idx in range(start_row, end_row): + red_pixels_mask = ( + (perturbed_image[b, 0, row_idx, :] == 1.0) & # Red channel is 1 + (perturbed_image[b, 1, row_idx, :] == 0.0) & # Green channel is 0 + (perturbed_image[b, 2, row_idx, :] == 0.0) # Blue channel is 0 + ) + red_pixel_count += red_pixels_mask.sum().item() + + # print(f"Red pixel count between rows {start_row} and {end_row}: {red_pixel_count}") + + # print("Target index for injection:", target_index) + timestamp = image_packets.iloc[target_index]["timestamp"] + new_timestamp = timestamp + (injection_row-packets_before_injection[0])*128*0.000002 - red_pixel_count*0.000002 + + # --- 1. Decode ID bits from pixels --- + decoded_bits = '' + for col in range(1, 1 + ID_LEN): + pix = perturbed_image[b, :, row, col] + # print(pix) + # dot1 = torch.dot(pix, torch.tensor([1.0, 1.0, 1.0], device=image.device)) + # dot0 = torch.dot(pix, torch.tensor([0.0, 0.0, 0.0], device=image.device)) + # decoded_bits += '1' if dot1 >= dot0 else '0' + ones = (pix == 1.0).sum().item() # count channels equal to 1 + zeros = (pix == 0.0).sum().item() # count channels equal to 0 + bit = '1' if ones >= zeros else '0' + decoded_bits += bit + # print("decoded ID bits",decoded_bits) + + # --- 2. Project to nearest existing ID via Hamming distance --- + gen_int = int(decoded_bits, 2) + def hamming_dist(a, b, bitlen=ID_LEN): + return bin(a ^ b).count('1') + + best_int = min(existing_int_ids, + key=lambda eid: hamming_dist(eid, gen_int, bitlen=ID_LEN)) + + new_id = format(best_int, 'X') + + # print(packet_level_data.to_string()) + # Convert back to a bitstring of length ID_len + proj_bits = bin(best_int)[2:].zfill(ID_LEN) + # print("proj bitslen", proj_bits, len(proj_bits), decoded_bits) + # --- 3. Overwrite ID-region in perturbed_image with projected bits --- + for idx, bit in enumerate(proj_bits, start=1): + val = 1.0 if bit == '1' else 0.0 + perturbed_image[b, :, row, idx] = val + + + # --- 4. Decode data bits (unchanged) --- + data_bits = '' + start = 1 + ID_LEN + MID_LEN + for col in range(start, start + 64): + pix = perturbed_image[b, :, row, col] + ones = (pix == 1.0).sum().item() # count channels equal to 1 + zeros = (pix == 0.0).sum().item() # count channels equal to 0 + bit = '1' if ones >= zeros else '0' + data_bits += bit + # print("decoded data bits",data_bits) + + # print("Before Perturbed Row",perturbed_image[b, :, row, :]) + if flag in ['modification', 'prev_inj', 'prev_mod']: + mid_bits = '' + # 7 represents middle bits (RTR + IDE + Reserved bit + DLC) + for col in range(1 + ID_LEN, 1 + ID_LEN + 7): + # print("Columns:", col) + pix = perturbed_image[b, :, row, col] + # print("Pixel:", pix) + bit = int((pix > 0.0).any().item()) + mid_bits += str(bit) + else: + mid_bits = "0001000" + + # print("Middle Bits: ", mid_bits) + + # print("Middle Perturbed Row",perturbed_image[b, :, row, 12:19]) + + # --- 5. Build full frame bits, CRC, stuff, and write back --- + frame_start = ('0' + proj_bits + mid_bits + data_bits) + crc_val = calculate_crc(frame_start) + crc_bits = bin(crc_val)[2:].zfill(15) + uptill_crc = frame_start + crc_bits + # stuffed = stuff_bits(frame_start + crc_bits) + + # Write stuffed bits + for i, bit in enumerate(uptill_crc): + val = 1.0 if bit == '1' else 0.0 + perturbed_image[b, :, row, i] = val + + # Ending part (CRC delimiters, ACK, EoF, IFS) + ending = '1011111111111' + offset = len(uptill_crc) + for i, bit in enumerate(ending): + val = 1.0 if bit == '1' else 0.0 + perturbed_image[b, :, row, offset + i] = val + + # Mark rest as green + for i in range(offset + len(ending), perturbed_image.shape[-1]): + perturbed_image[b, 0, row, i] = 0.0 + perturbed_image[b, 1, row, i] = 1.0 + perturbed_image[b, 2, row, i] = 0.0 + + # print("Final Pedequerturbed Row",perturbed_image[b, :, row, :]) + # print(packet_level_data.to_string()) + + # UPDATE PACKET-LEVEL DATA + if flag == 'injection': + start_index = packet_level_data.index[packet_level_data["image_no"] == image_no][0] + df_part_1 = packet_level_data.iloc[:start_index+target_index+1] + df_part_2 = packet_level_data.iloc[start_index+target_index+1:] + if rounds == 0: + packet_level_data = pd.concat([df_part_1, pd.DataFrame({ "row_no": [injection_row],"timestamp": [new_timestamp], "can_id": [new_id], "image_no": [image_no],"valid_flag": [1], "original_label": "A", "operation_label": "I"}), df_part_2], ignore_index=True) + else: + packet_level_data = pd.concat([df_part_1, pd.DataFrame({ "row_no": [injection_row],"timestamp": [new_timestamp], "can_id": [new_id], "image_no": [image_no],"valid_flag": [1], "original_label": "A", "operation_label": "I","pred_label": "A"}), df_part_2], ignore_index=True) + + elif flag == 'modification': + # print(packet_level_data[packet_level_data["image_no"] == image_no]) + start_index = packet_level_data.index[packet_level_data["image_no"] == image_no][0] + # packet_level_data.loc[start_index + target_index+1, ["can_id", "perturbation_type"]] = [new_id, "M"] + packet_level_data.loc[start_index + target_index+1, ["can_id","operation_label"]] = [new_id, "M"] + elif flag == "prev_mod": + start_index = packet_level_data.index[packet_level_data["image_no"] == image_no][0] + packet_level_data.loc[start_index + target_index+1, ["can_id","operation_label"]] = [new_id,"Pm"] + elif flag == "prev_inj": + start_index = packet_level_data.index[packet_level_data["image_no"] == image_no][0] + packet_level_data.loc[start_index + target_index+1, ["can_id","operation_label"]] = [new_id,"Pi"] + + # print("id after gradient_perturbation for row: ",row, perturbed_image[b, :, row, 1:12]) + + + return perturbed_image, packet_level_data + +def apply_inj_mod(data_grad, image, existing_hex_ids, packet_level_data, n_image, modification_queue, injection_queue, prev_mod_queue, prev_inj_queue,rounds,I,M,Pi,Pm): + + sign_data_grad = data_grad.sign() + + mask, injection_rows, modification_rows, prev_modification_rows, prev_injection_rows = generate_mask(image, modification_queue, injection_queue, prev_mod_queue, prev_inj_queue, rounds,I,M,Pi,Pm) + + perturbed_image = bit_flip_attack_rgb(image, mask, data_grad, sign_data_grad) + + perturbed_image, packet_level_data = gradient_perturbation(image, perturbed_image,mask,existing_hex_ids, packet_level_data, n_image, injection_rows, modification_rows, prev_modification_rows, prev_injection_rows,rounds) + + return perturbed_image,packet_level_data, modification_queue, injection_queue + +def perform_perturbation(model, data_grad, perturbed_data, existing_hex_ids, packet_level_data, n_image,modification_queue, injection_queue, prev_mod_queue, prev_inj_queue, rounds,I,M,Pi,Pm): + + perturbed_data, packet_level_data,modification_queue, injection_queue = apply_inj_mod(data_grad, perturbed_data, existing_hex_ids, packet_level_data, n_image, modification_queue, injection_queue, prev_mod_queue, prev_inj_queue,rounds,I,M,Pi,Pm) + + with torch.no_grad(): + output = model(perturbed_data) + # feedback += 1 + + # Get the predicted class index + final_pred = output.max(1, keepdim=True)[1] # index of the maximum log-probability + # print("predicted, label ",final_pred.item(), target.item()) + + return final_pred, perturbed_data,packet_level_data # Indicate that we can stop + +def find_max_prev_inj(image, image_no, packet_level_data,rounds): + """ + Vectorized version: no iterrows(), 200x faster. + """ + + # Required columns + if 'original_label' not in packet_level_data.columns or 'image_no' not in packet_level_data.columns: + raise KeyError("Missing required columns.") + + # 1) Filter rows belonging to this image_no (FAST) + subset = packet_level_data.loc[ + packet_level_data["image_no"] == image_no + ] + + if rounds == 0: + # Round 0: no previously modified packets exist + subset = subset.iloc[0:0] # empty DataFrame, preserves columns + else: + subset = subset[ + (subset["original_label"].astype(str).str.upper() == "A") & + (subset["operation_label"].astype(str).str.upper().isin(["I", "PI"])) & + (subset["pred_label"].astype(str).str.upper() == "A") + ] + + + # 3) Extract row numbers + matched_rows = subset["row_no"].astype(int).tolist() + + # 4) Bound by image shape + _, _, n_rows, _ = image.shape + matched_rows = [r for r in matched_rows if 0 <= r < n_rows] + + return matched_rows + +def find_max_prev_mod(image, image_no, packet_level_data,rounds): + """ + Vectorized version: no iterrows(), 200x faster. + """ + + # Required columns + if 'original_label' not in packet_level_data.columns or 'image_no' not in packet_level_data.columns: + raise KeyError("Missing required columns.") + + # 1) Filter rows belonging to this image_no (FAST) + subset = packet_level_data.loc[ + packet_level_data["image_no"] == image_no + ] + # print("len of subset", len(subset)) + + if rounds == 0: + # Round 0: no previously modified packets exist + subset = subset.iloc[0:0] # empty DataFrame, preserves columns + else: + subset = subset[ + (subset["original_label"].astype(str).str.upper() == "A") & + (subset["operation_label"].astype(str).str.upper().isin(["M", "PM"])) & + (subset["pred_label"].astype(str).str.upper() == "A") + ] + # print(subset["operation_label"].astype(str).str.upper().value_counts()) + + # print("len of prev mod subset", len(subset)) + + + # 3) Extract row numbers + matched_rows = subset["row_no"].astype(int).tolist() + + # print("prev_mod candidate rows BEFORE bound:", matched_rows) + # print("image n_rows:", image.shape[2]) + + + # 4) Bound by image shape + _, _, n_rows, _ = image.shape + matched_rows = [r for r in matched_rows if 0 <= r < n_rows] + + return matched_rows + +def find_max_modification(image, image_no, packet_level_data,rounds): + """ + Vectorized version: no iterrows(), 200x faster. + """ + + # Required columns + if 'original_label' not in packet_level_data.columns or 'image_no' not in packet_level_data.columns: + raise KeyError("Missing required columns.") + + # 1) Filter rows belonging to this image_no (FAST) + subset = packet_level_data.loc[ + packet_level_data["image_no"] == image_no + ] + # print("Length of subset",len(subset)) + + + if rounds == 0: + subset = subset[ + (subset["original_label"].astype(str).str.upper() == "A") + # (subset["operation_label"].astype(str).str.upper()== "None") + # (subset["pred_label"].astype(str).str.upper() == "A") + ] + else: + # 2) Filter rows where original_label == 'A' AND pred_label == 'A' + subset = subset[ + (subset["original_label"].astype(str).str.upper() == "A") & + ( + subset["operation_label"].isna() | + (subset["operation_label"].astype(str).str.upper() == "NONE") + ) & + (subset["pred_label"].astype(str).str.upper() == "A") + ] + # print("subset",subset) + + # 3) Extract row numbers + matched_rows = subset["row_no"].astype(int).tolist() + + # 4) Bound by image shape + _, _, n_rows, _ = image.shape + matched_rows = [r for r in matched_rows if 0 <= r < n_rows] + + return matched_rows + +def find_max_injection(image): + + batch_size, _, n_rows, n_cols = image.shape + # --- Injection rows: check full-green rows --- + red_channel = image[:, 0, :, :] # shape (batch, row, col) + green_channel = image[:, 1, :, :] + blue_channel = image[:, 2, :, :] + + green_mask = (red_channel == 0) & (green_channel == 1) & (blue_channel == 0) + injection_rows = [row for row in range(n_rows) if green_mask[:, row, :].all(dim=1).any()] + return injection_rows + +def build_queues(image,image_no,data_grad,packet_level_data,rounds,verbose=True): + """ + Build two queues: + - modification_queue: rows that match bit_pattern (unbounded length) + - injection_queue: rows where every pixel in the row is green (R=0,G=1,B=0). + Each queue element: (grad_value, row_number), sorted descending by grad_value. + Injection queue is only truncated if > max_injection_len. + """ + sof_len, id_mask_length, mid_bits_length = 1, 11, 7 + batch_size, _, n_rows, n_cols = image.shape + + # --- Precompute safe column indices --- + id_start = sof_len + id_end = sof_len + id_mask_length + data_start = id_end + mid_bits_length + data_end = data_start + 64 + + # --- select candiidate rows via label match --- + modification_rows = find_max_modification(image,image_no,packet_level_data,rounds) + # print("modification_rows ",modification_rows) + prev_mod_rows = find_max_prev_mod(image,image_no,packet_level_data,rounds) + # print("previously modified rows",prev_mod_rows ) + prev_inj_rows= find_max_prev_inj(image, image_no, packet_level_data,rounds) + # print("previously injected rows",prev_inj_rows ) + injection_rows = find_max_injection(image) + + + + #How strong are the gradients in the ID + data bit region of this row? + def compute_grad_for_row_dos(row): + mask = torch.zeros_like(data_grad) + if id_start < id_end: + mask[:, :, row, id_start:id_end] = 1 + if data_start < data_end: + mask[:, :, row, data_start:data_end] = 1 + return float(torch.sum((data_grad * mask) ** 2).item()) #using squared sum because we are more interested in the higher abd values. + + + # --- Build the queues as lists --- + modification_queue = [(compute_grad_for_row_dos(r), r) for r in modification_rows] + injection_queue = [(compute_grad_for_row_dos(r), r) for r in injection_rows] + prev_mod_queue = [(compute_grad_for_row_dos(r), r) for r in prev_mod_rows] + prev_inj_queue = [(compute_grad_for_row_dos(r), r) for r in prev_inj_rows] + + # # Sort descending + modification_queue.sort(key=lambda x: x[0], reverse=True) + injection_queue.sort(key=lambda x: x[0], reverse=True) + prev_mod_queue.sort(key=lambda x: x[0], reverse=True) + prev_inj_queue.sort(key=lambda x: x[0], reverse=True) + + + # # Truncate injection queue + # if len(injection_queue) > max_injection_len: + # injection_queue = injection_queue[:max_injection_len] + + # if rounds >= 2 : + # injection_queue.clear() + + if verbose: + print(f"[INFO] modification_queue size: {len(modification_queue)}") + print(f"[INFO] injection_queue size: {len(injection_queue)}") + print(f"[INFO] prev_modification_queue size: {len(prev_mod_queue)}") + print(f"[INFO] preV_injection_queue size: {len(prev_inj_queue)}") + + return deque(modification_queue), deque(injection_queue), deque(prev_mod_queue), deque(prev_inj_queue) + +def evaluation_metrics(all_preds, all_labels,folder, filename): + + # Generate confusion matrix + # Print debug information + print("Number of predictions:", len(all_preds)) + print("Unique predictions:", np.unique(all_preds, return_counts=True)) + print("Unique labels:", np.unique(all_labels, return_counts=True)) + + cm = confusion_matrix(all_labels, all_preds) + print("Confusion Matrix:\n", cm) + + # Display confusion matrix + disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=[0, 1]) + disp.plot(cmap=plt.cm.Blues) + plt.title('Confusion Matrix') + + output_path = os.path.join(folder, filename) + os.makedirs(folder, exist_ok=True) + + plt.savefig(output_path, dpi=300) + plt.close() + + # os.makedirs(folder, exist_ok=True) + # output_path = os.path.join(folder, filename) + # os.makedirs(os.path.dirname(output_path), exist_ok=True) + + # plt.savefig(output_path, dpi=300) + + # plt.savefig(output_path, dpi=300) + # plt.show() + + # plt.savefig('./CF_Results/DoS/old/TST.png', dpi=300) + # plt.show() + + + # Now you can access the true negatives and other metrics + true_negatives = cm[0, 0] + false_positives = cm[0, 1] + false_negatives = cm[1, 0] + true_positives = cm[1, 1] + + # Calculate metrics with safe division + tnr = true_negatives / (true_negatives + false_positives) if (true_negatives + false_positives) > 0 else 0.0 + mdr = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0.0 + IDS_accu = accuracy_score(all_labels, all_preds) + IDS_prec = precision_score(all_labels, all_preds, zero_division=0) + IDS_recall = recall_score(all_labels, all_preds, zero_division=0) + IDS_F1 = f1_score(all_labels, all_preds, zero_division=0) + # Number of attack packets misclassified as benign (all_labels == 0 and all_preds == 1) + misclassified_attack_packets = ((all_labels == 1) & (all_preds == 0)).sum().item() + + # Total number of original attack packets (all_labels == 0) + total_attack_packets = (all_labels == 1).sum().item() + + oa_asr = misclassified_attack_packets / total_attack_packets + + return tnr, mdr, oa_asr, IDS_accu, IDS_prec, IDS_recall, IDS_F1 + +def Attack_procedure(model, device, test_loader,output_path,existing_hex_ids, start_image_number, packet_level_data,rounds): + all_preds = [] + all_labels = [] + n_image = start_image_number + + # summary_path = os.path.join(output_path, f"perturbation_summary_{rounds}.csv") + # csv_file = open(summary_path, "w") + # csv_file.write("image_name, target_label, injection_count, modification_count, final_prediction_label, model_feedback\n") + + + # rgb_pattern = [(0.0, 0.0, 0.0) if bit == '0' else (1.0, 1.0, 1.0) for bit in bit_pattern] + + for data, target in test_loader: + # print(f"Current target shape: {target.shape}, value: {target}") + data, target = data.to(device), target.to(device) + + # If target is a 1D tensor, no need for item() + current_target = target[0] if target.dim() > 0 else target + # feedback = 0 + + # Initialize predictions for benign images (target=0) + initial_output = model(data) + # feedback += 1 + final_pred = initial_output.max(1, keepdim=True)[1] + # Initialize perturbation counts + injection_count = 0 + modification_count = 0 + prev_mod_count = 0 + prev_inj_count = 0 + # Perform perturbation for predicted attack images + if current_target == 1: + print("\nImage no:", n_image, "(Attack image)") + + data.requires_grad = True + model.eval() + + initial_output = model(data) + loss = F.nll_loss(initial_output, target) + model.zero_grad(set_to_none=True) + loss.backward() + data_grad = data.grad.data + model.zero_grad(set_to_none=True) # clean up + data_denorm = data + # continue_perturbation = True + + if rounds == 0: + n_attack_current = ((packet_level_data["image_no"] == n_image) & (packet_level_data["original_label"] == "A")).sum() + print("n in image no ",n_attack_current, n_image) + I = 0 + M = n_attack_current + Pm = 0 + Pi = 0 + print("I, M, Pi, Pm for round 0", I,M,Pi,Pm) + + elif rounds == 1: + n_attack_current = ((packet_level_data["image_no"] == n_image) & (packet_level_data["original_label"] == "A")).sum() + I = 0 + M = 0 + Pi = 0 + Pm = math.ceil(0.5*n_attack_current) + print("I, M, Pi, Pm for round 1", I,M,Pi,Pm) + else: + n_attack_current = ((packet_level_data["image_no"] == n_image) & (packet_level_data["original_label"] == "A")).sum() + I = 0 + M = 0 + Pi = 0 + Pm = math.ceil(0.5*n_attack_current) + + print("I, M, Pi, Pm for round>=2", I,M,Pi,Pm) + + + + modification_queue, injection_queue, prev_mod_queue, prev_inj_queue = build_queues(data_denorm, n_image, data_grad,packet_level_data,rounds) + num_inj = len(injection_queue) + num_mod = len(modification_queue) + num_prev_mod = len(prev_mod_queue) + num_prev_inj = len(prev_inj_queue) + + perturbed_data = data_denorm.clone().detach().to(device) + perturbed_data.requires_grad = True + + model.eval() + + final_pred, data_denorm, packet_level_data, = perform_perturbation(model,data_grad, perturbed_data, existing_hex_ids, packet_level_data, n_image, modification_queue, injection_queue, prev_mod_queue, prev_inj_queue,rounds,I,M,Pi,Pm) + + injection_count = num_inj - len(injection_queue) + modification_count = num_mod - len(modification_queue) + prev_mod_count = num_prev_mod - len(prev_mod_queue) + prev_inj_count = num_prev_inj - len(prev_inj_queue) + + saving_image(data_denorm, n_image,output_path) + else: + # data.requires_grad = True + model.eval() + with torch.no_grad(): + initial_output = model(data) + final_pred = initial_output.max(1, keepdim=True)[1] + + print(f"Image {n_image}: Benign Image (Skipping Perturbation)") + saving_image(data, n_image,output_path) + + print(f"Final perturbations: Injection={injection_count}, Modification={modification_count}, Prev_inj={prev_inj_count}, Prev_mod={prev_mod_count} \n") + print(f"Image {n_image}, Truth Labels {target.item()}, Final Pred {final_pred.cpu().numpy()}") + + # all_preds.extend(final_pred.cpu().numpy()) + # all_labels.extend(target.cpu().numpy()) + all_preds.append(final_pred.item()) + all_labels.append(target.item()) + + # image_name = f"image_{n_image}.png" + # target_label = target.item() + # final_label = final_pred.item() + + # csv_file.write(f"{image_name}, {target_label}, {injection_count}, {modification_count}, {final_label}, {feedback}\n") + n_image += 1 + + + all_preds = np.array(all_preds) + all_labels = np.array(all_labels) + # csv_file.close() + + # return all_preds.squeeze(), all_labels, packet_level_data + return all_preds, all_labels, packet_level_data + + +def run(params): + + test_dataset_dir = params["test_data_dir"] + # os.makedirs(test_dataset_dir, exist_ok=True) + # print(test_dataset_dir) + test_label_file = params["test_label_file"] + output_path = params["output_path"] + rounds = params["rounds"] + packet_level_data = params["packet_level_data"] + model_path = params["model_path"] + # budgets = params["budgets"] + + + + os.makedirs(output_path, exist_ok=True) + folder = os.path.join("CF_Results", output_path) + # filename = f"{output_path}.png" + filename = f"perturbed_dos.png" + model_type = "densenet161" + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + existing_hex_ids = ['14A', '170', '17C', '18F', '1A3', '1A4', '202', '309', '326', '374', '37B'] + + + # surr_model_type='densenet161' + # test_model_type = 'densenet161' + + # model_path = "./Trained_models/densenet161_surrogate_gear.pth" + + # surr_model_path = "./Trained_models/densenet161_surrogate_gear.pth" + # test_model_path = "./Trained_models/densenet161_surrogate_gear.pth" + + # output_path = "blackbox_dos_k_12_nfd" + # output_path = "test_images" + + + + # rounds = 0 + + + # packet_level_data = pd.read_csv("DoS_test_track.csv") + + # packet_level_data = pd.read_csv("test.csv") + + # Clean up all column names: strip spaces, remove BOMs + + # Read CSV + packet_level_data = pd.read_csv(packet_level_data, dtype=str, low_memory=False) + + # Strip column names FIRST before anything else + packet_level_data.columns = packet_level_data.columns.str.strip() + + # Fill NaN values + packet_level_data = packet_level_data.fillna("None") + + # Type casting + packet_level_data["row_no"] = packet_level_data["row_no"].astype(int) + packet_level_data["timestamp"] = packet_level_data["timestamp"].astype(float) + packet_level_data["image_no"] = packet_level_data["image_no"].astype(int) + packet_level_data["valid_flag"] = packet_level_data["valid_flag"].astype(int) + + # Round 0 label setup + if rounds == 0: + print("in round 0") + # 1. Rename the column + packet_level_data = packet_level_data.rename(columns={"label": "original_label"}) + + # 2. Map integer-string values (CSV read as str, so map "0"/"1" not 0/1) + packet_level_data["original_label"] = packet_level_data["original_label"].map({"0": "B", "1": "A"}) + + # 3. Initialize operation label + packet_level_data["operation_label"] = "None" + + + #Load dataset + image_datasets, test_loader, start_image_number = load_dataset(test_dataset_dir,test_label_file,device,is_train=False) + print("loaded test dataset") + + #load the model + model = load_model(model_path) + + # bit_pattern = "0000000000000001000" # for matching the packets/rows to modify + + + # List of max_perturbations to iterate over + st = time.time() + print("Start time:", st) + # Call the attack procedure + preds, labels, packet_level_data = Attack_procedure(model, device, test_loader,output_path,existing_hex_ids, start_image_number, packet_level_data,rounds) + et = time.time() + print("End time:", et) + # print("Labels:", labels) + # print("Predictions:", preds) + + tnr, mdr, oa_asr, IDS_accu, IDS_prec, IDS_recall,IDS_F1 = evaluation_metrics(preds, labels,folder,filename) + print("----------------IDS Perormance Metric----------------") + print(f'Accuracy: {IDS_accu:.4f}') + print(f'Precision: {IDS_prec:.4f}') + print(f'Recall: {IDS_recall:.4f}') + print(f'F1 Score: {IDS_F1:.4f}') + print("----------------Adversarial attack Perormance Metric----------------") + print("TNR:", tnr) + print("Malcious Detection Rate:", mdr) + print("Attack Success Rate:", oa_asr) + print("Execution Time:", et-st) + + # Force timestamp precision ONLY + packet_level_data["timestamp"] = packet_level_data["timestamp"].map(lambda x: f"{x:.6f}") + int_cols = ["row_no", "image_no", "valid_flag"] + for c in int_cols: + if c in packet_level_data.columns: + packet_level_data[c] = packet_level_data[c].astype(int) + + packet_level_data.to_csv(os.path.join(output_path, f"packet_level_data_{rounds}.csv"), index=False) + + + + +# if __name__ == "__main__": +# # Allow standalone execution +# cfg = yaml.safe_load(open("config_dos_OTIDS.yaml")) +# run(cfg["attack"]) + + +if __name__ == "__main__": + + import argparse + import yaml + + parser = argparse.ArgumentParser() + parser.add_argument("--config", type=str, default="config_dos_CARLA.yaml") + args = parser.parse_args() + + cfg = yaml.safe_load(open(args.config)) + + # Ensure attack section exists + if "attack" not in cfg: + raise ValueError("Config file must contain 'attack' section.") + + run(cfg["attack"]) diff --git a/CARLA_Entropy/scripts/adversarial_attack_spoof_CARLA.py b/CARLA_Entropy/scripts/adversarial_attack_spoof_CARLA.py new file mode 100644 index 0000000..da433df --- /dev/null +++ b/CARLA_Entropy/scripts/adversarial_attack_spoof_CARLA.py @@ -0,0 +1,1620 @@ +""" + Description: delete attack packets with the logic that deletes all packets except D packets just after a benign packet with same id. + Also, there is no data in the surrogate images. + #changes: 06/02/2026 (no data) + # 1. changed the compute_grad_row_for_spoof(), no need to take data for grad, only ID + # 2. bit_flip_attack_rgb() only flip ID, nothing for data + # 3. gradient_perturbation(), no need to calcualte crc, crc = '0'*15 + + To run for deletion, + 1. uncomment not_D + 2. pass not_D instead of D in build_queues() + 3. comment build_deletion_queue() and uncomment keep_D_logic() + 4. assign D = num_del after build_queue() + for rest of the rounds, undo these steps. +""" +import os +# os.environ["CUDA_VISIBLE_DEVICES"] = "1" # MUST COME FIRST +import math +import bisect +import random +import pandas as pd +import torch +import torch.nn as nn +import torch.nn.functional as F + +import yaml +import time +import torch.optim as optim +from torch.utils.data import DataLoader, TensorDataset +from torchvision import datasets, transforms, models +import numpy as np +import matplotlib.pyplot as plt +from PIL import Image +import torchvision.utils as vutils +from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay +from torchvision.utils import save_image +from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score +from collections import deque + +# Inception-ResNet Model +class InceptionStem(nn.Module): + def __init__(self): + super(InceptionStem, self).__init__() + self.stem = nn.Sequential( + nn.Conv2d(in_channels = 3, out_channels = 32, stride = 1, kernel_size = 3, padding = 'same'), + nn.Conv2d(in_channels = 32, out_channels = 32, stride = 1, kernel_size = 3, padding = 'valid'), + nn.MaxPool2d(kernel_size = 3, stride = 2, padding = 0), + nn.Conv2d(in_channels = 32, out_channels = 64, kernel_size = 1, stride = 1, padding = 'valid'), + nn.Conv2d(in_channels = 64, out_channels = 128, kernel_size = 3, stride = 1, padding = 'same'), + nn.Conv2d(in_channels = 128, out_channels = 128, kernel_size = 3, stride = 1, padding = 'same') + ) + + def forward(self, x): + stem_out = self.stem(x) + return stem_out + +class InceptionResNetABlock(nn.Module): + def __init__(self, in_channels = 128, scale=0.17): + super(InceptionResNetABlock, self).__init__() + self.scale = scale + self.branch0 = nn.Conv2d(in_channels, 32, kernel_size=1, stride=1, padding='same') + self.branch1 = nn.Sequential( + nn.Conv2d(in_channels, 32, kernel_size=1, stride=1, padding='same'), + nn.Conv2d(32, 32, kernel_size=3, stride=1, padding='same') + ) + self.branch2 = nn.Sequential( + nn.Conv2d(in_channels, 32, kernel_size=1, stride=1, padding='same'), + nn.Conv2d(32, 32, kernel_size=3, stride=1, padding='same'), + nn.Conv2d(32, 32, kernel_size=3, stride=1, padding='same') + ) + self.conv_up = nn.Conv2d(96, 128, kernel_size=1, stride=1, padding='same') + + def forward(self, x): + branch0 = self.branch0(x) + branch1 = self.branch1(x) + branch2 = self.branch2(x) + mixed = torch.cat([branch0, branch1, branch2], dim=1) + up = self.conv_up(mixed) + return F.relu(x + self.scale * up) + +class ReductionA(nn.Module): + def __init__(self, in_channels = 128): + super(ReductionA, self).__init__() + self.branch0 = nn.Conv2d(in_channels = in_channels, out_channels = 192, kernel_size = 3, stride = 2, padding = 'valid') + self.branch1 = nn.Sequential( + nn.Conv2d(in_channels = in_channels, out_channels = 96, kernel_size = 1, stride = 1, padding = 'same'), + nn.Conv2d(in_channels = 96, out_channels = 96, kernel_size = 3, stride = 1, padding = 'same'), + nn.Conv2d(in_channels = 96, out_channels = 128, kernel_size = 3, stride = 2, padding = 'valid') + ) + self.branch2 = nn.MaxPool2d(kernel_size = 3, stride = 2, padding = 0) + + def forward(self, x): + branch0 = self.branch0(x) + branch1 = self.branch1(x) + branch2 = self.branch2(x) + mixed = torch.cat([branch0, branch1, branch2], dim = 1) + return mixed + +class InceptionResNetBBlock(nn.Module): + def __init__(self, in_channels = 448, scale = 0.10): + super(InceptionResNetBBlock, self).__init__() + self.scale = scale + self.branch0 = nn.Conv2d(in_channels = in_channels, out_channels = 64, kernel_size = 1, stride = 1 , padding = 'same') + self.branch1 = nn.Sequential( + nn.Conv2d(in_channels = in_channels, out_channels = 64, kernel_size = 1, stride = 1, padding = 'same'), + nn.Conv2d(in_channels = 64, out_channels = 64, kernel_size = (1,3), stride = 1, padding = 'same'), + nn.Conv2d(in_channels = 64, out_channels = 64, kernel_size = (3,1), stride = 1, padding = 'same') + ) + self.conv_up = nn.Conv2d(in_channels = 128, out_channels = 448, kernel_size = 1, stride = 1, padding = 'same') + + + def forward(self, x): + branch0 = self.branch0(x) + branch1 = self.branch1(x) + mixed = torch.cat([branch0, branch1], dim = 1) + up = self.conv_up(mixed) + return F.relu(x + self.scale * up) + +class ReductionB(nn.Module): + def __init__(self): + super(ReductionB, self).__init__() + self.branch0 = nn.Sequential( + nn.Conv2d(in_channels = 448, out_channels = 128, kernel_size = 1, stride = 1, padding = 'same'), + nn.Conv2d(in_channels = 128, out_channels = 192, kernel_size = 3, stride = 1, padding = 'valid') + ) + self.branch1 = nn.Sequential( + nn.Conv2d(in_channels = 448, out_channels = 128, kernel_size = 1, stride = 1, padding = 'same'), + nn.Conv2d(in_channels = 128, out_channels = 128, kernel_size = 3, stride = 1, padding = 'valid') + ) + self.branch2 = nn.Sequential( + nn.Conv2d(in_channels = 448, out_channels = 128, kernel_size = 1, stride = 1, padding = 'same'), + nn.Conv2d(in_channels = 128, out_channels = 128, kernel_size = 3, stride = 1, padding = 'same'), + nn.Conv2d(in_channels = 128, out_channels = 128, kernel_size = 3, stride = 1, padding = 'valid') + ) + + self.branch3 = nn.MaxPool2d(kernel_size = 3, stride = 1, padding = 0) + + def forward(self, x): + branch0 = self.branch0(x) + branch1 = self.branch1(x) + branch2 = self.branch2(x) + branch3 = self.branch3(x) + mixed = torch.cat([branch0, branch1, branch2, branch3], dim = 1) + return mixed + +# Inception-ResNet Model +class InceptionResNetV1(nn.Module): + def __init__(self, num_classes=2): + super(InceptionResNetV1, self).__init__() + self.stem = InceptionStem() + self.a_block = InceptionResNetABlock() + self.b_block = InceptionResNetBBlock() + self.red_a = ReductionA() + self.red_b = ReductionB() + self.global_pool = nn.AdaptiveAvgPool2d((1,1)) + self.dropout = nn.Dropout(0.2) + self.fc = nn.Linear(896, num_classes) + + + def forward(self, x): + x = self.stem(x) + x = self.a_block(x) + x = self.red_a(x) + x = self.b_block(x) + x = self.red_b(x) + x = self.global_pool(x) + x = torch.flatten(x, 1) + x = self.dropout(x) + x = self.fc(x) + return F.log_softmax(x, dim = 1) + +def load_model(model_path): + # Load the pre-trained ResNet-18 model + + num_classes = 2 + + model = models.densenet161(weights=models.DenseNet161_Weights.DEFAULT) + model.classifier = nn.Linear(model.classifier.in_features, num_classes) + + # test_model = models.densenet161(weights=models.DenseNet161_Weights.DEFAULT) + # test_model.classifier = nn.Linear(test_model.classifier.in_features, num_classes) + + + #If the system has GPU + model.load_state_dict(torch.load(model_path, weights_only=True)) + # test_model.load_state_dict(torch.load(test_model_path, weights_only=True)) + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + # test_model = torch.jit.load(test_model_path, map_location=device) + # test_model.to(device) + # model = torch.jit.load(pre_trained_model_path, map_location=device) + + model = model.to(device) + # test_model = test_model.to(device) + + model.eval() + # test_model.eval() + + return model + + +data_transforms = { + 'test': transforms.Compose([transforms.ToTensor()]), + 'train': transforms.Compose([transforms.ToTensor()]) + } + +def load_labels(label_file): + """Load image labels from the label file.""" + labels = {} + with open(label_file, 'r') as file: + for line in file: + # Clean and split line into filename and label string + filename, label_str = line.strip().replace("'", "").replace('"', '').split(': ') + + # Split label_str by comma and take the last value + label = int(label_str.strip().split(',')[-1].strip()) + + labels[filename.strip()] = label + return labels + +def load_dataset(data_dir,label_file,device,is_train=True): + # Load datasets + image_labels = load_labels(label_file) + + # Load images and create lists for images and labels + images = [] + labels = [] + image_numbers = [] + + for filename, label in image_labels.items(): + img_path = os.path.join(data_dir, filename) + if os.path.exists(img_path): + image = Image.open(img_path).convert("RGB") + if is_train: + image = data_transforms['train'](image) # Apply training transformations + else: + image = data_transforms['test'](image) # Apply testing transformations + # save_image(image, "test_image.png") + images.append(image) + labels.append(label) + image_numbers.append(int(filename.split('_')[-1].split('.')[0])) + + # Create tensors and send them to the specified device + images_tensor = torch.stack(images) + labels_tensor = torch.tensor(labels) + + # Create DataLoader + dataset = TensorDataset(images_tensor, labels_tensor) + batch_size = 32 if is_train else 1 # Use larger batch size for training + data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=4) + + print(f'Loaded {len(images)} images.') + + return dataset, data_loader, image_numbers + +def calculate_crc(data): + """ + Calculate CRC-15 checksum for the given data. + """ + crc = 0x0000 + # CRC-15 polynomial + poly = 0x4599 + + for bit in data: + # XOR with the current bit shifted left by 14 bits + crc ^= (int(bit) & 0x01) << 14 + + for _ in range(15): + if crc & 0x8000: + crc = (crc << 1) ^ poly + else: + crc <<= 1 + + # Ensuring 15 bits + crc &= 0x7FFF + return crc + + +def print_image(img,n,pack): + img = img.detach() + img = img.squeeze().permute(1, 2, 0).cpu().numpy() # Convert to numpy format + # Normalize from [-1, 1] to [0, 1] for imshow + img = (img + 1.0) / 2.0 + img = np.clip(img, 0, 1) # Just in case + + plt.imshow(img, interpolation='none') + # plt.imshow(img, cmap='gray', interpolation='none') + if n == 1: + plt.title(f"Mask, Injection {pack})") + elif n == 2: + plt.title(f"Perturbed image, Injection{pack}") + plt.axis('off') + plt.show() + +def saving_image(img, name,output_path): + os.makedirs(output_path, exist_ok=True) + + # Construct the full path for the output image + output_path = os.path.join(output_path, f'perturbed_image_{name}.png') + + # Save the image to the specified path + save_image(img, output_path) + +def generate_mask(perturbed_data, modification_queue, injection_queue,prev_mod_queue, prev_inj_queue, deletion_queue, rounds, I, M, Pi, Pm, D): + """ + Generate a binary perturbation mask for CAN-frame images using + budgeted injection and modification queues. + + Rows are selected from four queues (new injections, original + modifications, previously injected, previously modified) up to + their allocated budgets, without exceeding top_k. For all selected + rows, both ID and data bit regions are masked. + + Returns the perturbation mask along with selected injection and + modification row indices. + """ + sof_len = 1 + id_mask_length = 11 + mid_bits_length = 7 + data_bits_length = 64 + + batch_size, channels, height, width = perturbed_data.shape + id_start = sof_len + id_end = sof_len + id_mask_length + data_start = sof_len + id_mask_length + mid_bits_length + data_end = data_start + data_bits_length + + # Initialize mask + mask = torch.zeros_like(perturbed_data, dtype=torch.float32) + injection_rows = [] + modification_rows = [] + prev_modification_rows = [] + prev_injection_rows = [] + deletion_rows = [] + + def pop_k(queue, k): + selected = [] + for _ in range(min(k, len(queue))): + _, row = queue.popleft() + selected.append(row) + return selected + + + + # 1. Select rows according to budgets + print("D in generate_mask", D) + # num_inj = len(injection_queue) + del_rows = pop_k(deletion_queue,D) + inj_rows = pop_k(injection_queue, I) + mod_rows = pop_k(modification_queue, M) + prev_inj_rows = pop_k(prev_inj_queue, Pi) + prev_mod_rows = pop_k(prev_mod_queue, Pm) + + # 2. Aggregate selections + injection_rows.extend(inj_rows) + deletion_rows.extend(del_rows) + modification_rows.extend(mod_rows) + prev_modification_rows.extend(prev_mod_rows) + prev_injection_rows.extend(prev_inj_rows) + + all_rows = injection_rows + modification_rows + prev_modification_rows + prev_injection_rows + deletion_rows + + for row in all_rows: + for b in range(batch_size): + # ID bits + # mask[b, :, row, id_start : id_end] = 1.0 + # Data bits + # mask[b, :, row, data_start : data_end] = 1.0 + mask[b,:,row,0:128] = 1.0 #because deletion will require entire row to be deleted + + + # for _ in range(top_k): + # if not injection_queue and not modification_queue: + # break # nothing left to pop + + # if modification_queue: + # mod_grad, mod_row = modification_queue[0] + # # Always prefer modification queue if it's not empty + # grad, row = modification_queue.popleft() + # modification_rows.append(row) + # p_type = "mod" + # elif injection_queue: # Only process injection queue if modification queue is empty + # inj_grad, inj_row = injection_queue[0] + # grad, row = injection_queue.popleft() + # injection_rows.append(row) + # p_type = "inj" + + # # Apply ID + Data masking for the selected row + # for b in range(batch_size): + # if p_type == "inj": + # mask[b, :, row, sof_len:sof_len + id_mask_length] = 1.0 # ID bits + # mask[b, :, row, sof_len + id_mask_length + mid_bits_length: + # sof_len + id_mask_length + mid_bits_length + data_bits_length] = 1.0 # Data bits + # else: + # mask[b, :, row, sof_len + id_mask_length + mid_bits_length: + # sof_len + id_mask_length + mid_bits_length + data_bits_length] = 1.0 # Data bits + + + # selected_total = len(injection_rows) + len(modification_rows) + len(prev_modification_rows) + len(prev_injection_rows) + # assert selected_total <= top_k, "Selected more rows than top_k" + + # print_image(mask,1,0) + return mask, injection_rows, modification_rows, prev_modification_rows, prev_injection_rows, del_rows + +def bit_flip_attack_rgb(image, mask, data_grad, sign_data_grad, injection_rows, del_rows): + """ + Bit-flip attack for RGB CAN images. + - Flips pixels based on sign of gradient: + If black ([0,0,0]) and sign_grad > 0 → flip to white ([1,1,1]) + If white ([1,1,1]) and sign_grad < 0 → flip to black ([0,0,0]) + - Works for ID bits and data bits separately with different top-k percentages. + """ + + perturbed_image = image.clone() # Start from original image + B, C, H, W = image.shape + ID_LEN = 11 + MID_LEN = 7 + DATA_LEN = 64 + id_start = 1 + id_end = id_start + ID_LEN + data_start = 1 + ID_LEN + MID_LEN + data_end = data_start + DATA_LEN + count_bit_flip_1 = 0 + count_bit_flip_0 = 0 + print("injection_rows", injection_rows) + print("del rows", del_rows) + for b in range(B): + rows = mask[b, 0].nonzero(as_tuple=True)[0] # Only use first channel for mask + rows = torch.unique(rows) + rows = torch.sort(rows, descending=True).values # Sort descending + + for row in rows: + if row in del_rows: + perturbed_image[b,0,row,:] = 0.0 + perturbed_image[b,1,row,:] = 1.0 + perturbed_image[b,2,row,:] = 0.0 + else: + # --- ID bits --- + id_pixels = perturbed_image[b, :, row, id_start:id_end] # Shape [3, ID_LEN] + # print("ID Pixels:", id_pixels) + id_grads = data_grad[b, :, row, id_start:id_end] # Shape [3, ID_LEN] + # print("ID gradient:", id_grads) + id_signs = sign_data_grad[b, :, row, id_start:id_end] # Shape [3, ID_LEN] + # print("ID Signs:", id_signs) + + # Collapse gradients to single value per bit (sum over channels) + id_scores = torch.sum(torch.abs(id_grads), dim=0) + # print("ID Scores: ", id_scores) + num_id_top = max(1, int(1.0 * ID_LEN)) + id_top_idx = torch.topk(id_scores, num_id_top).indices + # print("Top Index:", id_top_idx) + count_bit_flip = 0 + # print("ID before flipping: ", id_pixels.clone()) + for idx in id_top_idx: + # print("Index:", idx) + pixel = id_pixels[:, idx] # [R, G, B] + # print("Pixel:", pixel) + grad_sign = torch.sum(id_signs[:, idx]).item() # Combine channels' signs + grad_sign = (id_signs[0, idx] + id_signs[1, idx] + id_signs[2, idx]).item() + # print("Grad Sign:", grad_sign) + if grad_sign > 0: # Black → White + id_pixels[:, idx] = 1.0 + count_bit_flip += 1 + elif grad_sign < 0: # White → Black + id_pixels[:, idx] = 0.0 + count_bit_flip += 1 + + # print("Number of bitflip in ID: ", count_bit_flip) + # print("ID after flipping: ", id_pixels.clone()) + + + # --- Data bits --- + + data_pixels = perturbed_image[b, :, row, data_start:data_end] # [3, DATA_LEN] + data_grads = data_grad[b, :, row, data_start:data_end] + data_signs = sign_data_grad[b, :, row, data_start:data_end] + + data_scores = torch.sum(torch.abs(data_grads), dim=0) + num_data_top = max(1, int(1.0 * DATA_LEN)) + data_top_idx = torch.topk(data_scores, num_data_top).indices + + # print("data before flipping: ", data_pixels.clone()) + for idx in data_top_idx: + pixel = data_pixels[:, idx] + # grad_sign = torch.sum(data_signs[:, idx]).item() + grad_sign = (data_signs[0, idx] + data_signs[1, idx] + data_signs[2, idx]).item() + if grad_sign > 0: + data_pixels[:, idx] = 1.0 + count_bit_flip_1 += 1 + elif grad_sign < 0: + data_pixels[:, idx] = 0.0 + count_bit_flip_0 += 1 + + # print("data after flipping: ", data_pixels.clone()) + + # Assign modified bits back + perturbed_image[b, :, row, id_start:id_end] = id_pixels + # perturbed_image[b, :, row, data_start:data_end] = data_pixels + + # print("Number of bitflips_1 in Data: ", count_bit_flip_1) + # print("Numberof bitflips_0 in Data,",count_bit_flip_0) + perturbed_image = torch.clamp(perturbed_image, 0, 1) + + return perturbed_image + +def gradient_perturbation(image, perturbed_image,mask,existing_hex_ids, packet_level_data, image_no, injection_rows, modification_rows, prev_modification_rows, prev_injection_rows, del_rows,rounds): + ID_LEN = 11 + MID_LEN = 7 + # mid_bits = '0001000' + injection_set = set(injection_rows) + modification_set = set(modification_rows) + deletion_set = set(del_rows) + prev_inj_set = set(prev_injection_rows) + prev_mod_set = set(prev_modification_rows) + original_packets = len(packet_level_data[packet_level_data["image_no"] == image_no]) + # Precompute existing IDs as integers + existing_int_ids = [int(h, 16) for h in existing_hex_ids] + # print(image.shape, mask.shape, perturbed_image.shape) + for row in del_rows: + packet_level_data = packet_level_data[ + ~( + (packet_level_data["image_no"] == image_no) & + (packet_level_data["row_no"] == row) + ) + ].reset_index(drop=True) + + for b in range(image.shape[0]): + totalRows = mask[b, 0].nonzero(as_tuple=True)[0] + totalRows = torch.unique(totalRows) + # totalRows = torch.sort(totalRows).values + totalRows = torch.sort(totalRows, descending=True).values # Sort descending + + # print(rows, flag) + for row in totalRows: + row = row.item() + + if row in injection_set: + flag = "injection" + elif row in modification_set: + flag = "modification" + elif row in prev_mod_set: + flag = "prev_mod" + elif row in prev_inj_set: + flag = "prev_inj" + else: + # print("Skipping row", row) + continue + + + injection_row = row #earlier it was row.item() + i = injection_row - 1 + packets_before_injection = [] + # print("Injection Row: ", injection_row) + + # Traverse upward until first pixel in the row is black + while i >= 0: + first_pixel = image[b, 0, i, 0].item() # First pixel in row i, channel 0 + second_pixel = image[b, 1, i, 0].item() # Second pixel in row i, channel 1 + third_pixel = image[b, 2, i, 0].item() # Third pixel in row i, channel 2 + # print(first_pixel, second_pixel, third_pixel) + if first_pixel == 0.0 and second_pixel == 0.0 and third_pixel == 0.0: + packets_before_injection.append(i) + i -= 1 + + image_packets = packet_level_data[packet_level_data["image_no"] == image_no].reset_index(drop=True) + # print("Image packets before injection:\n", image_packets) + if packets_before_injection: + target_index = len(packets_before_injection) - 1 + # print("target_idex",target_index) + else: + target_index = 0 + + # print("Target index for injection:", target_index, flag, injection_row,len(image_packets)) + + if flag == 'injection': + + packet_time = 128 * 0.000002 + + if packets_before_injection: + # CASE A: packets exist before injection row + start_row = packets_before_injection[0] + end_row = injection_row + + red_pixel_count = 0 + for row_idx in range(start_row, end_row): + red_pixels_mask = ( + (perturbed_image[b, 0, row_idx, :] == 1.0) & + (perturbed_image[b, 1, row_idx, :] == 0.0) & + (perturbed_image[b, 2, row_idx, :] == 0.0) + ) + red_pixel_count += red_pixels_mask.sum().item() + + safe_index = min(len(packets_before_injection) - 1, + len(image_packets) - 1) + + base_timestamp = image_packets.iloc[safe_index]["timestamp"] + offset = ((injection_row - start_row) * packet_time + - red_pixel_count * 0.000002) + + new_timestamp = base_timestamp + offset + + else: + # CASE B: inject BEFORE first packet (same image) + first_packet = image_packets.iloc[0] + first_row = first_packet["row_no"] + first_ts = first_packet["timestamp"] + + delta_rows = first_row - injection_row + new_timestamp = first_ts - delta_rows * packet_time + + # --- 1. Decode ID bits from pixels --- + decoded_bits = '' + for col in range(1, 1 + ID_LEN): + pix = perturbed_image[b, :, row, col] + # print(pix) + # dot1 = torch.dot(pix, torch.tensor([1.0, 1.0, 1.0], device=image.device)) + # dot0 = torch.dot(pix, torch.tensor([0.0, 0.0, 0.0], device=image.device)) + # decoded_bits += '1' if dot1 >= dot0 else '0' + ones = (pix == 1.0).sum().item() # count channels equal to 1 + zeros = (pix == 0.0).sum().item() # count channels equal to 0 + bit = '1' if ones >= zeros else '0' + decoded_bits += bit + # print("decoded ID bits",decoded_bits) + + # --- 2. Project to nearest existing ID via Hamming distance --- + gen_int = int(decoded_bits, 2) + def hamming_dist(a, b, bitlen=ID_LEN): + return bin(a ^ b).count('1') + + best_int = min(existing_int_ids, + key=lambda eid: hamming_dist(eid, gen_int, bitlen=ID_LEN)) + + new_id = format(best_int, 'X') + + # print(packet_level_data.to_string()) + # Convert back to a bitstring of length ID_len + proj_bits = bin(best_int)[2:].zfill(ID_LEN) + + # --- 3. Overwrite ID-region in perturbed_image with projected bits --- + for idx, bit in enumerate(proj_bits, start=1): + val = 1.0 if bit == '1' else 0.0 + perturbed_image[b, :, row, idx] = val + + + # --- 4. Decode data bits --- + data_bits = '' + start = 1 + ID_LEN + MID_LEN + for col in range(start, start + 64): + pix = perturbed_image[b, :, row, col] + ones = (pix == 1.0).sum().item() # count channels equal to 1 + zeros = (pix == 0.0).sum().item() # count channels equal to 0 + bit = '1' if ones >= zeros else '0' + data_bits += bit + # print("decoded data bits",data_bits) + + # print("Before Perturbed Row",perturbed_image[b, :, row, :]) + if flag in ['modification', 'prev_inj', 'prev_mod']: + mid_bits = '' + # 7 represents middle bits (RTR + IDE + Reserved bit + DLC) + for col in range(1 + ID_LEN, 1 + ID_LEN + 7): + # print("Columns:", col) + pix = perturbed_image[b, :, row, col] + # print("Pixel:", pix) + bit = int((pix > 0.0).any().item()) + mid_bits += str(bit) + else: + mid_bits = "0001000" + + # print("Middle Bits: ", mid_bits) + + # print("Middle Perturbed Row",perturbed_image[b, :, row, 12:19]) + + # --- 5. Build full frame bits, CRC, stuff, and write back --- + frame_start = ('0' + proj_bits + mid_bits + data_bits) + crc_val = calculate_crc(frame_start) + crc_bits = bin(crc_val)[2:].zfill(15) + # crc_bits = '0'*15 + uptill_crc = frame_start + crc_bits + # stuffed = stuff_bits(frame_start + crc_bits) + + # Write stuffed bits + for i, bit in enumerate(uptill_crc): + val = 1.0 if bit == '1' else 0.0 + perturbed_image[b, :, row, i] = val + + # Ending part (CRC delimiters, ACK, EoF, IFS) + ending = '1011111111111' + offset = len(uptill_crc) + for i, bit in enumerate(ending): + val = 1.0 if bit == '1' else 0.0 + perturbed_image[b, :, row, offset + i] = val + + # Mark rest as green + for i in range(offset + len(ending), perturbed_image.shape[-1]): + perturbed_image[b, 0, row, i] = 0.0 + perturbed_image[b, 1, row, i] = 1.0 + perturbed_image[b, 2, row, i] = 0.0 + + # print("Final Pedequerturbed Row",perturbed_image[b, :, row, :]) + # print(packet_level_data.to_string()) + + # UPDATE PACKET-LEVEL DATA + if flag == 'injection': + start_index = packet_level_data.index[packet_level_data["image_no"] == image_no][0] + df_part_1 = packet_level_data.iloc[:start_index+target_index+1] + df_part_2 = packet_level_data.iloc[start_index+target_index+1:] + if rounds == 0: + packet_level_data = pd.concat([df_part_1, pd.DataFrame({ "row_no": [injection_row],"timestamp": [new_timestamp], "can_id": [new_id], "image_no": [image_no],"valid_flag": [1], "original_label": "A", "operation_label": "I"}), df_part_2], ignore_index=True) + else: + packet_level_data = pd.concat([df_part_1, pd.DataFrame({ "row_no": [injection_row],"timestamp": [new_timestamp], "can_id": [new_id], "image_no": [image_no],"valid_flag": [1], "original_label": "A", "operation_label": "I","pred_label": "A"}), df_part_2], ignore_index=True) + + elif flag == 'modification': + # print(packet_level_data[packet_level_data["image_no"] == image_no]) + start_index = packet_level_data.index[packet_level_data["image_no"] == image_no][0] + # packet_level_data.loc[start_index + target_index+1, ["can_id", "perturbation_type"]] = [new_id, "M"] + packet_level_data.loc[start_index + target_index+1, ["can_id","operation_label"]] = [new_id, "M"] + elif flag == "prev_mod": + start_index = packet_level_data.index[packet_level_data["image_no"] == image_no][0] + packet_level_data.loc[start_index + target_index+1, ["can_id","operation_label"]] = [new_id,"Pm"] + elif flag == "prev_inj": + start_index = packet_level_data.index[packet_level_data["image_no"] == image_no][0] + packet_level_data.loc[start_index + target_index+1, ["can_id","operation_label"]] = [new_id,"Pi"] + + # print("id after gradient_perturbation for row: ",row, perturbed_image[b, :, row, 1:12]) + + + return perturbed_image, packet_level_data + +def apply_inj_mod(data_grad, image, existing_hex_ids, packet_level_data, n_image, modification_queue, injection_queue, prev_mod_queue, prev_inj_queue,deletion_queue,rounds,I,M,Pi,Pm,D): + + sign_data_grad = data_grad.sign() + + mask, injection_rows, modification_rows, prev_modification_rows, prev_injection_rows, del_rows = generate_mask(image, modification_queue, injection_queue, prev_mod_queue, prev_inj_queue, deletion_queue,rounds,I,M,Pi,Pm,D) + + perturbed_image = bit_flip_attack_rgb(image, mask, data_grad, sign_data_grad, injection_rows, del_rows) + + perturbed_image, packet_level_data = gradient_perturbation(image, perturbed_image,mask,existing_hex_ids, packet_level_data, n_image, injection_rows, modification_rows, prev_modification_rows, prev_injection_rows,del_rows,rounds) + + return perturbed_image,packet_level_data, modification_queue, injection_queue, deletion_queue + +def perform_perturbation(model, data_grad, perturbed_data, existing_hex_ids, packet_level_data, n_image,modification_queue, injection_queue, prev_mod_queue, prev_inj_queue, deletion_queue,rounds,I,M,Pi,Pm,D): + + perturbed_data, packet_level_data,modification_queue, injection_queue, del_queue = apply_inj_mod(data_grad, perturbed_data, existing_hex_ids, packet_level_data, n_image, modification_queue, injection_queue, prev_mod_queue, prev_inj_queue,deletion_queue,rounds,I,M,Pi,Pm,D) + + with torch.no_grad(): + output = model(perturbed_data) + # feedback += 1 + + # Get the predicted class index + final_pred = output.max(1, keepdim=True)[1] # index of the maximum log-probability + # print("predicted, label ",final_pred.item(), target.item()) + + return final_pred, perturbed_data,packet_level_data # Indicate that we can stop + +def find_max_prev_inj(image, image_no, packet_level_data,rounds): + """ + Vectorized version: no iterrows(), 200x faster. + """ + + # Required columns + if 'original_label' not in packet_level_data.columns or 'image_no' not in packet_level_data.columns: + raise KeyError("Missing required columns.") + + # 1) Filter rows belonging to this image_no (FAST) + subset = packet_level_data.loc[ + packet_level_data["image_no"] == image_no + ] + + if rounds == 0: + # Round 0: no previously modified packets exist + subset = subset.iloc[0:0] # empty DataFrame, preserves columns + else: + subset = subset[ + (subset["original_label"].astype(str).str.upper() == "A") & + (subset["operation_label"].astype(str).str.upper().isin(["I", "PI"])) & + (subset["pred_label"].astype(str).str.upper() == "A") + ] + + + # 3) Extract row numbers + matched_rows = subset["row_no"].astype(int).tolist() + + # 4) Bound by image shape + _, _, n_rows, _ = image.shape + matched_rows = [r for r in matched_rows if 0 <= r < n_rows] + + return matched_rows + +def find_max_prev_mod(image, image_no, packet_level_data,rounds): + """ + Vectorized version: no iterrows(), 200x faster. + """ + + # Required columns + if 'original_label' not in packet_level_data.columns or 'image_no' not in packet_level_data.columns: + raise KeyError("Missing required columns.") + + # 1) Filter rows belonging to this image_no (FAST) + subset = packet_level_data.loc[ + packet_level_data["image_no"] == image_no + ] + # print("len of subset", len(subset)) + + if rounds == 0: + # Round 0: no previously modified packets exist + subset = subset.iloc[0:0] # empty DataFrame, preserves columns + else: + subset = subset[ + (subset["original_label"].astype(str).str.upper() == "A") & + (subset["operation_label"].astype(str).str.upper().isin(["M", "PM"])) & + (subset["pred_label"].astype(str).str.upper() == "A") + ] + # print(subset["operation_label"].astype(str).str.upper().value_counts()) + + # print("len of prev mod subset", len(subset)) + + + # 3) Extract row numbers + matched_rows = subset["row_no"].astype(int).tolist() + + # print("prev_mod candidate rows BEFORE bound:", matched_rows) + # print("image n_rows:", image.shape[2]) + + + # 4) Bound by image shape + _, _, n_rows, _ = image.shape + matched_rows = [r for r in matched_rows if 0 <= r < n_rows] + + return matched_rows + +def find_max_modification(image, image_no, packet_level_data,rounds): + """ + Vectorized version: no iterrows(), 200x faster. + """ + + # Required columns + if 'original_label' not in packet_level_data.columns or 'image_no' not in packet_level_data.columns: + raise KeyError("Missing required columns.") + + # 1) Filter rows belonging to this image_no (FAST) + subset = packet_level_data.loc[ + packet_level_data["image_no"] == image_no + ] + # print("Length of subset",len(subset)) + + + if rounds == 0: + subset = subset[ + (subset["original_label"].astype(str).str.upper() == "A") + # (subset["operation_label"].astype(str).str.upper()== "None") + # (subset["pred_label"].astype(str).str.upper() == "A") + ] + else: #case where we have to modify fresh packets not shifted : only round 0 + # 2) Filter rows where original_label == 'A' AND pred_label == 'A' + # subset = subset[ + # (subset["original_label"].astype(str).str.upper() == "A") & + # ( + # subset["operation_label"].isna() | + # (subset["operation_label"].astype(str).str.upper() == "NONE") + # ) & + # (subset["pred_label"].astype(str).str.upper() == "A") + # ] + #here, need to modify shifted packets + subset = subset[ + (subset["original_label"].astype(str).str.upper() == "A") & + ( + subset["operation_label"].isna() | + (subset["operation_label"].astype(str).str.upper() == "NONE") + ) & + (subset["pred_label"].astype(str).str.upper() == "A") + ] + # print("subset",subset) + + # 3) Extract row numbers + matched_rows = subset["row_no"].astype(int).tolist() + + # 4) Bound by image shape + _, _, n_rows, _ = image.shape + matched_rows = [r for r in matched_rows if 0 <= r < n_rows] + + return matched_rows + +def find_max_del(image, image_no, packet_level_data,rounds): + """ + Vectorized version: no iterrows(), 200x faster. + """ + + # Required columns + if 'original_label' not in packet_level_data.columns or 'image_no' not in packet_level_data.columns: + raise KeyError("Missing required columns.") + + # 1) Filter rows belonging to this image_no (FAST) + subset = packet_level_data.loc[ + packet_level_data["image_no"] == image_no + ] + # print("Length of subset",len(subset)) + + + if rounds == 0: + subset = subset[ + (subset["original_label"].astype(str).str.upper() == "A") + # (subset["operation_label"].astype(str).str.upper()== "None") + # (subset["pred_label"].astype(str).str.upper() == "A") + ] + else: + # 2) Filter rows where original_label == 'A' AND pred_label == 'A' + subset = subset[ + (subset["original_label"].astype(str).str.upper() == "A") & + ( + subset["operation_label"].isna() | + (subset["operation_label"].astype(str).str.upper() == "NONE") + ) & + (subset["pred_label"].astype(str).str.upper() == "A") + ] + # print("subset",subset) + + # 3) Extract row numbers + matched_rows = subset["row_no"].astype(int).tolist() + + # 4) Bound by image shape + _, _, n_rows, _ = image.shape + matched_rows = [r for r in matched_rows if 0 <= r < n_rows] + + return matched_rows + +def build_deletion_queue(packet_level_data, image_no, del_rows,D, benign_can_id="43F",rng_seed=0): + """ + Build deletion queue based on anchor-based backward distance logic. + + Parameters + ---------- + packet_level_data : pd.DataFrame + Must contain columns: image_no, row_no, can_id, original_label + image_no : int + Current image index + del_rows : list[int] + Candidate attack packet row indices + D : int + Deletion budget + benign_can_id : str + Anchor CAN ID (default: '043F') + + Returns + ------- + deletion_queue : list[int] + Row indices of packets to delete (ordered) + """ + + if D <= 0 or not del_rows: + return [] + + attack_rows = sorted(del_rows) + + # --- 1) Get benign anchor rows for this image --- + anchors = ( + packet_level_data[ + (packet_level_data["image_no"] == image_no) & + (packet_level_data["can_id"].astype(str).str.upper() == "43F") & + (packet_level_data["original_label"].astype(str).str.upper() == "B") + ]["row_no"] + .astype(int) + .tolist() + ) + print("anchors", anchors) + + # --- 2) Fallback: no anchors → random deletion --- + if not anchors: + random.seed(rng_seed + image_no) # deterministic per image + return [(None, r) for r in random.sample(del_rows, min(D, len(del_rows)))] + + + anchors = sorted(anchors) + print("anchors", anchors) + + # --- Fallback: no anchors → delete first D attack packets --- + if not anchors: + return [(None, r) for r in attack_rows[:D]] + + # --- Compute all backward distances --- + candidates = [] # (distance, packet_row) + + for a in anchors: + for r in attack_rows: + if r < a: + candidates.append((a - r, r)) + + if not candidates: + return [] + + # --- Sort by closest distance first --- + candidates.sort(key=lambda x: (x[0], x[1])) + + # --- Select first D unique packet rows --- + deletion_queue = [] + seen = set() + + for _, r in candidates: + if r not in seen: + deletion_queue.append((None, r)) + seen.add(r) + if len(deletion_queue) == D: + break + # print("del queue size", len(deletion_queue)) + + return deletion_queue + +def delete_no_anchor_preserve_periodicity(packet_level_data,image_no,del_rows,D,benign_median_period=0.009909868240356445): + """ + No-anchor deletion strategy: + Keep exactly D attack packets such that the remaining packets + best preserve benign periodicity. + """ + + if not del_rows or D <= 0: + return [(None, r) for r in del_rows] + + # Extract timestamps for attack packets + rows = sorted(del_rows) + ts = ( + packet_level_data + .set_index("row_no") + .loc[rows, "timestamp"] + .to_dict() + ) + + # Work on a mutable list of remaining rows + remaining = rows.copy() + deletion_queue = [] + + # Helper: compute periodicity cost after removing index i + def removal_cost(rem, i): + if i == 0 or i == len(rem) - 1: + return float("inf") # avoid deleting endpoints early + t_prev = ts[rem[i - 1]] + t_next = ts[rem[i + 1]] + gap = t_next - t_prev + return abs(gap - benign_median_period) + + # Iteratively delete until D remain + while len(remaining) > D: + costs = [ + (removal_cost(remaining, i), i) + for i in range(len(remaining)) + ] + + # choose packet whose removal best preserves periodicity + _, idx = min(costs, key=lambda x: x[0]) + removed_row = remaining.pop(idx) + deletion_queue.append((None, removed_row)) + + return deletion_queue + + +def keep_D_logic(packet_level_data,image_no,del_rows,D,benign_can_id="160", rng_seed=0): + """ + D semantics: + - D == 0 : delete ALL attack packets + - D == 1 : keep exactly one attack packet immediately AFTER each anchor + """ + + # No attack packets at all + if not del_rows: + return [] + + attack_rows = sorted(del_rows) + # tset = packet_level_data[ + # packet_level_data["image_no"] == image_no + # ][["row_no", "can_id", "original_label"]] + + # print("test set", tset) + # --- 1) Find anchor rows --- + anchors = ( + packet_level_data[ + (packet_level_data["image_no"] == image_no) & + (packet_level_data["can_id"].astype(str).str.upper() == "160") & + (packet_level_data["original_label"].astype(str).str.upper() == "B") + ]["row_no"] + .astype(int) + .tolist() + ) + + anchors = sorted(anchors) #IMP step + print("anchors", anchors) + + if not anchors: + return delete_no_anchor_preserve_periodicity( + packet_level_data, + image_no, + del_rows, + D, + benign_median_period=0.009909868240356445 + ) + + + # --- Case A: D == 0 → delete all attack packets --- + if D == 0: + return [(None, r) for r in attack_rows] + + # --- Case B: D == 1 --- + # If no anchors → delete all attack packets + if D == 1 and not anchors: + return [(None, r) for r in attack_rows] + + # --- Find packets to KEEP --- + keep_rows = set() + + if D >= 1: + for a in anchors: + after_attacks = [r for r in attack_rows if r > a] + keep_rows.update(after_attacks[:D]) + + # for a in anchors: + # # attack packets strictly AFTER anchor + # after_attacks = [r for r in attack_rows if r > a] + # if after_attacks: + # # keep the closest one after anchor + # keep_rows.add(after_attacks[0]) + + # --- Delete everything else --- + deletion_queue = [ + (None, r) for r in attack_rows if r not in keep_rows + ] + # print("inside keep_D_logic", deletion_queue) + return deletion_queue + +# def keep_D_logic(packet_level_data,image_no,del_rows,D,benign_can_id="043F"): +# """ +# D semantics: +# - D == 0 : delete ALL attack packets +# - D == 1 : keep exactly one attack packet immediately AFTER each anchor +# """ + +# # No attack packets at all +# if not del_rows: +# return [] + +# attack_rows = sorted(del_rows) +# # tset = packet_level_data[ +# # packet_level_data["image_no"] == image_no +# # ][["row_no", "can_id", "original_label"]] + +# # print("test set", tset) +# # --- 1) Find anchor rows --- +# anchors = ( +# packet_level_data[ +# (packet_level_data["image_no"] == image_no) & +# (packet_level_data["can_id"].astype(str).str.upper() == "43F") & +# (packet_level_data["original_label"].astype(str).str.upper() == "B") +# ]["row_no"] +# .astype(int) +# .tolist() +# ) + +# anchors = sorted(anchors) #IMP step +# print("anchors", anchors) + +# # --- Case A: D == 0 → delete all attack packets --- +# if D == 0: +# return [(None, r) for r in attack_rows] + +# # --- Case B: D == 1 --- +# # If no anchors → delete all attack packets +# if D == 1 and not anchors: +# return [(None, r) for r in attack_rows] + +# # --- Find packets to KEEP --- +# keep_rows = set() + +# if D >= 1: +# for a in anchors: +# after_attacks = [r for r in attack_rows if r > a] +# keep_rows.update(after_attacks[:D]) + +# # for a in anchors: +# # # attack packets strictly AFTER anchor +# # after_attacks = [r for r in attack_rows if r > a] +# # if after_attacks: +# # # keep the closest one after anchor +# # keep_rows.add(after_attacks[0]) + +# # --- Delete everything else --- +# deletion_queue = [ +# (None, r) for r in attack_rows if r not in keep_rows +# ] +# # print("inside keep_D_logic", deletion_queue) +# return deletion_queue + +def find_max_injection(image): + + batch_size, _, n_rows, n_cols = image.shape + # --- Injection rows: check full-green rows --- + red_channel = image[:, 0, :, :] # shape (batch, row, col) + green_channel = image[:, 1, :, :] + blue_channel = image[:, 2, :, :] + + green_mask = (red_channel == 0) & (green_channel == 1) & (blue_channel == 0) + injection_rows = [row for row in range(n_rows) if green_mask[:, row, :].all(dim=1).any()] + return injection_rows + +def build_queues(image,image_no,data_grad,packet_level_data,rounds,D,verbose=True): + """ + Build two queues: + - modification_queue: rows that match bit_pattern (unbounded length) + - injection_queue: rows where every pixel in the row is green (R=0,G=1,B=0). + Each queue element: (grad_value, row_number), sorted descending by grad_value. + Injection queue is only truncated if > max_injection_len. + """ + sof_len, id_mask_length, mid_bits_length = 1, 11, 7 + batch_size, _, n_rows, n_cols = image.shape + + # --- Precompute safe column indices --- + id_start = sof_len + id_end = sof_len + id_mask_length + data_start = id_end + mid_bits_length + data_end = data_start + 64 + + # --- select candiidate rows via label match --- + modification_rows = find_max_modification(image,image_no,packet_level_data,rounds) + # print("modification_rows ",modification_rows) + prev_mod_rows = find_max_prev_mod(image,image_no,packet_level_data,rounds) + # print("previously modified rows",prev_mod_rows ) + prev_inj_rows= find_max_prev_inj(image, image_no, packet_level_data,rounds) + # print("previously injected rows",prev_inj_rows ) + injection_rows = find_max_injection(image) + del_rows = find_max_del(image,image_no,packet_level_data,rounds) + # print("total del rows", del_rows) + + #How strong are the gradients in the ID + data bit region of this row? + def compute_grad_for_row_gear(row): + mask = torch.zeros_like(data_grad) + if id_start < id_end: + mask[:, :, row, id_start:id_end] = 1 + if data_start < data_end: + mask[:, :, row, data_start:data_end] = 1 + return float(torch.sum((data_grad * mask) ** 2).item()) #using squared sum because we are more interested in the higher abd values. + + + + # --- Build the queues as lists --- + modification_queue = [(compute_grad_for_row_gear(r), r) for r in modification_rows] + injection_queue = [(compute_grad_for_row_gear(r), r) for r in injection_rows] + prev_mod_queue = [(compute_grad_for_row_gear(r), r) for r in prev_mod_rows] + prev_inj_queue = [(compute_grad_for_row_gear(r), r) for r in prev_inj_rows] + # del_queue = [(compute_grad_for_row_gear(r), r) for r in del_rows] + + # deletion_queue = keep_D_logic(packet_level_data,image_no,del_rows,D,benign_can_id="043F") + # print("D before build queue deletion", D) + deletion_queue = build_deletion_queue(packet_level_data, image_no, del_rows,D) + + # # Sort descending + modification_queue.sort(key=lambda x: x[0], reverse=True) + injection_queue.sort(key=lambda x: x[0], reverse=True) + prev_mod_queue.sort(key=lambda x: x[0], reverse=True) + prev_inj_queue.sort(key=lambda x: x[0], reverse=True) + + # # Truncate injection queue + # if len(injection_queue) > max_injection_len: + # injection_queue = injection_queue[:max_injection_len] + + # if rounds >= 2 : + # injection_queue.clear() + + if verbose: + print(f"[INFO] modification_queue size: {len(modification_queue)}") + print(f"[INFO] injection_queue size: {len(injection_queue)}") + print(f"[INFO] prev_modification_queue size: {len(prev_mod_queue)}") + print(f"[INFO] preV_injection_queue size: {len(prev_inj_queue)}") + print(f"[INFO] deletion_queue size: {len(deletion_queue)}") + + return deque(modification_queue), deque(injection_queue), deque(prev_mod_queue), deque(prev_inj_queue), deque(deletion_queue) + +def evaluation_metrics(all_preds, all_labels,folder, filename): + + # Generate confusion matrix + # Print debug information + print("Number of predictions:", len(all_preds)) + print("Unique predictions:", np.unique(all_preds, return_counts=True)) + print("Unique labels:", np.unique(all_labels, return_counts=True)) + + cm = confusion_matrix(all_labels, all_preds) + print("Confusion Matrix:\n", cm) + + # Display confusion matrix + disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=[0, 1]) + disp.plot(cmap=plt.cm.Blues) + plt.title('Confusion Matrix') + + output_path = os.path.join(folder, filename) + os.makedirs(folder, exist_ok=True) + + plt.savefig(output_path, dpi=300) + plt.close() + + # os.makedirs(folder, exist_ok=True) + # output_path = os.path.join(folder, filename) + # os.makedirs(os.path.dirname(output_path), exist_ok=True) + + # plt.savefig(output_path, dpi=300) + + # plt.savefig(output_path, dpi=300) + # plt.show() + + # plt.savefig('./CF_Results/DoS/old/TST.png', dpi=300) + # plt.show() + + + # Now you can access the true negatives and other metrics + true_negatives = cm[0, 0] + false_positives = cm[0, 1] + false_negatives = cm[1, 0] + true_positives = cm[1, 1] + + # Calculate metrics with safe division + tnr = true_negatives / (true_negatives + false_positives) if (true_negatives + false_positives) > 0 else 0.0 + mdr = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0.0 + IDS_accu = accuracy_score(all_labels, all_preds) + IDS_prec = precision_score(all_labels, all_preds, zero_division=0) + IDS_recall = recall_score(all_labels, all_preds, zero_division=0) + IDS_F1 = f1_score(all_labels, all_preds, zero_division=0) + # Number of attack packets misclassified as benign (all_labels == 0 and all_preds == 1) + misclassified_attack_packets = ((all_labels == 1) & (all_preds == 0)).sum().item() + + # Total number of original attack packets (all_labels == 0) + total_attack_packets = (all_labels == 1).sum().item() + + oa_asr = misclassified_attack_packets / total_attack_packets + + return tnr, mdr, oa_asr, IDS_accu, IDS_prec, IDS_recall, IDS_F1 + +def Attack_procedure(model, device, test_loader,output_path,existing_hex_ids, image_numbers, packet_level_data,rounds): + all_preds = [] + all_labels = [] + img_idx = 0 + + # summary_path = os.path.join(output_path, f"perturbation_summary_{rounds}.csv") + # csv_file = open(summary_path, "w") + # csv_file.write("image_name, target_label, injection_count, modification_count, final_prediction_label, model_feedback\n") + + + # rgb_pattern = [(0.0, 0.0, 0.0) if bit == '0' else (1.0, 1.0, 1.0) for bit in bit_pattern] + + for data, target in test_loader: + n_image = image_numbers[img_idx] + # print(f"Current target shape: {target.shape}, value: {target}") + data, target = data.to(device), target.to(device) + + # If target is a 1D tensor, no need for item() + current_target = target[0] if target.dim() > 0 else target + # feedback = 0 + + # Initialize predictions for benign images (target=0) + initial_output = model(data) + # feedback += 1 + final_pred = initial_output.max(1, keepdim=True)[1] + # Initialize perturbation counts + injection_count = 0 + modification_count = 0 + del_count = 0 + prev_mod_count = 0 + prev_inj_count = 0 + # Perform perturbation for predicted attack images + if current_target == 1: + print("\nImage no:", n_image, "(Attack image)") + + data.requires_grad = True + model.eval() + + initial_output = model(data) + loss = F.nll_loss(initial_output, target) + model.zero_grad(set_to_none=True) + loss.backward() + data_grad = data.grad.data + model.zero_grad(set_to_none=True) # clean up + data_denorm = data + + + + # continue_perturbation = True + if rounds == 0: + n_attack_current = ((packet_level_data["image_no"] == n_image) & (packet_level_data["original_label"] == "A")).sum() + # print("n in image no ", n_image) + I = 0 + # M = math.ceil(0.75 * len(modification_queue)) + M = 0 + Pm = 0 + Pi = 0 + # D = 0 + # Deletion budget (generalized rule) + if n_attack_current < 3: + D = 0 + else: + D = n_attack_current // 2 + + # not_D = 2 + print("I, M, Pi, Pm and D for round 0", I,M,Pi,Pm,D) + + elif rounds == 1: + n_attack_current = ((packet_level_data["image_no"] == n_image) & (packet_level_data["original_label"] == "A")).sum() + M = 0 + Pm = 0 + Pi = 0 + D = 0 #how many to shift is decided based on modified packets in prev round. + if n_attack_current <= 2: + I = 1 + else: + I = 1 + + print("I, M, Pi, Pm and S for round 1", I,M,Pi,Pm,D) + elif rounds == 2: + n_attack_current = ((packet_level_data["image_no"] == n_image) & (packet_level_data["original_label"] == "A") & (packet_level_data["operation_label"] != "I")).sum() + I = 0 + # M = math.ceil(0.5*n_attack_current) #these many were shifted in prev round and selected for mmodification in this round. + Pi = 2 + Pm = 0 + D = 0 + if n_attack_current < 3: + M = 0 + else: + M = 1 + + print("I, M, Pi, Pm for round 2", I,M,Pi,Pm) + elif rounds >= 3: + n_attack_current = ((packet_level_data["image_no"] == n_image) & (packet_level_data["original_label"] == "A")).sum() + I = 0 + M = 0 + Pi = 5 + Pm = 5 + D = 0 + print("I, M, Pi, Pm for round>=2", I,M,Pi,Pm,D) + # # elif rounds == 4: + # n_attack_current = ((packet_level_data["image_no"] == n_image) & (packet_level_data["original_label"] == "A")).sum() + # I = 0 + # M = math.ceil(0.25*n_attack_current) + # Pm = 0 + # Pi = 0 + # D = 0 + # print("I, M, Pi, Pm for round>=2", I,M,Pi,Pm,D) + + + + modification_queue, injection_queue, prev_mod_queue, prev_inj_queue, deletion_queue = build_queues(data_denorm, n_image, data_grad,packet_level_data,rounds,D) + num_inj = len(injection_queue) + num_mod = len(modification_queue) + num_prev_mod = len(prev_mod_queue) + num_prev_inj = len(prev_inj_queue) + num_del = len(deletion_queue) + # D = num_del + perturbed_data = data_denorm.clone().detach().to(device) + perturbed_data.requires_grad = True + + model.eval() + + final_pred, data_denorm, packet_level_data, = perform_perturbation(model,data_grad, perturbed_data, existing_hex_ids, packet_level_data, n_image, modification_queue, injection_queue, prev_mod_queue, prev_inj_queue,deletion_queue,rounds,I,M,Pi,Pm,D) + + injection_count = num_inj - len(injection_queue) + modification_count = num_mod - len(modification_queue) + prev_mod_count = num_prev_mod - len(prev_mod_queue) + prev_inj_count = num_prev_inj - len(prev_inj_queue) + del_count = num_del - len(deletion_queue) + + saving_image(data_denorm, n_image,output_path) + else: + # data.requires_grad = True + model.eval() + with torch.no_grad(): + initial_output = model(data) + final_pred = initial_output.max(1, keepdim=True)[1] + + print(f"Image {n_image}: Benign Image (Skipping Perturbation)") + saving_image(data, n_image,output_path) + + print(f"Final perturbations: Injection={injection_count}, Modification={modification_count}, Prev_inj={prev_inj_count}, Prev_mod={prev_mod_count}, Del_count={del_count} \n") + print(f"Image {n_image}, Truth Labels {target.item()}, Final Pred {final_pred.cpu().numpy()}") + + # all_preds.extend(final_pred.cpu().numpy()) + # all_labels.extend(target.cpu().numpy()) + all_preds.append(final_pred.item()) + all_labels.append(target.item()) + + # image_name = f"image_{n_image}.png" + # target_label = target.item() + # final_label = final_pred.item() + + # csv_file.write(f"{image_name}, {target_label}, {injection_count}, {modification_count}, {final_label}, {feedback}\n") + img_idx += 1 + + + all_preds = np.array(all_preds) + all_labels = np.array(all_labels) + # csv_file.close() + + # return all_preds.squeeze(), all_labels, packet_level_data + return all_preds, all_labels, packet_level_data + + +def run(params): + + test_dataset_dir = params["test_data_dir"] + # os.makedirs(test_dataset_dir, exist_ok=True) + # print(test_dataset_dir) + test_label_file = params["test_label_file"] + output_path = params["output_path"] + rounds = params["rounds"] + packet_level_data = params["packet_level_data"] + model_path = params["model_path"] + + + os.makedirs(output_path, exist_ok=True) + folder = os.path.join("CF_Results", output_path) + # filename = f"{output_path}.png" + filename = f"perturbed_spoof_no_data.png" + model_type = "densenet161" + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + # existing_hex_ids = ['0130', '0002', '0131', '0140', '018f', + # '02c0', '0370', '0316', '0153', '043f', '0260', + # '02a0', '0350', '0440', '0329', '0545', '0430', + # '01f1', '04b1', '04f0', '05f0', '00a0', '00a1', + # '0690', '05a0', '05a2'] + + existing_hex_ids = ['170', '17C', '18F', '1A3', '1A4', '202', '309', '326', '374', '37B'] + + + + + # Clean up all column names: strip spaces, remove BOMs + + # Read CSV + packet_level_data = pd.read_csv(packet_level_data, dtype=str, low_memory=False) + + # Strip column names FIRST before anything else + packet_level_data.columns = packet_level_data.columns.str.strip() + + # Fill NaN values + packet_level_data = packet_level_data.fillna("None") + + # Type casting + packet_level_data["row_no"] = packet_level_data["row_no"].astype(int) + packet_level_data["timestamp"] = packet_level_data["timestamp"].astype(float) + packet_level_data["image_no"] = packet_level_data["image_no"].astype(int) + packet_level_data["valid_flag"] = packet_level_data["valid_flag"].astype(int) + + # Round 0 label setup + if rounds == 0: + print("in round 0") + # 1. Rename the column + packet_level_data = packet_level_data.rename(columns={"label": "original_label"}) + + # 2. Map integer-string values (CSV read as str, so map "0"/"1" not 0/1) + packet_level_data["original_label"] = packet_level_data["original_label"].map({"0": "B", "1": "A"}) + + # 3. Initialize operation label + packet_level_data["operation_label"] = "None" + +# Load dataset ... + #Load dataset + image_datasets, test_loader, image_numbers = load_dataset(test_dataset_dir,test_label_file,device,is_train=False) + print("loaded test dataset") + + #load the model + model = load_model(model_path) + + # bit_pattern = "0000000000000001000" # for matching the packets/rows to modify + + + # List of max_perturbations to iterate over + st = time.time() + print("Start time:", st) + # Call the attack procedure + preds, labels, packet_level_data = Attack_procedure(model, device, test_loader,output_path,existing_hex_ids, image_numbers, packet_level_data,rounds) + et = time.time() + print("End time:", et) + # print("Labels:", labels) + # print("Predictions:", preds) + + tnr, mdr, oa_asr, IDS_accu, IDS_prec, IDS_recall,IDS_F1 = evaluation_metrics(preds, labels,folder,filename) + print("----------------IDS Perormance Metric----------------") + print(f'Accuracy: {IDS_accu:.4f}') + print(f'Precision: {IDS_prec:.4f}') + print(f'Recall: {IDS_recall:.4f}') + print(f'F1 Score: {IDS_F1:.4f}') + print("----------------Adversarial attack Perormance Metric----------------") + print("TNR:", tnr) + print("Malcious Detection Rate:", mdr) + print("Attack Success Rate:", oa_asr) + print("Execution Time:", et-st) + + # Force timestamp precision ONLY + packet_level_data["timestamp"] = packet_level_data["timestamp"].map(lambda x: f"{x:.6f}") + int_cols = ["row_no", "image_no", "valid_flag"] + for c in int_cols: + if c in packet_level_data.columns: + packet_level_data[c] = packet_level_data[c].astype(int) + + packet_level_data.to_csv(os.path.join(output_path, f"packet_level_data_{rounds}.csv"), index=False) + + + + + +if __name__ == "__main__": + + import argparse + import yaml + + parser = argparse.ArgumentParser() + parser.add_argument("--config", type=str, default="config_spoof_CARLA.yaml") + args = parser.parse_args() + + cfg = yaml.safe_load(open(args.config)) + + # Ensure attack section exists + if "attack" not in cfg: + raise ValueError("Config file must contain 'attack' section.") + + run(cfg["attack"]) + + + #changes: 06/02/2026 (no data) + # 1. changed the compute_grad_row_for_spoof(), no need to take data for grad, only ID + # 2. bit_flip_attack_rgb() only flip ID, nothing for data + # 3. gradient_perturbation(), no need to calcualte crc, crc = '0'*15 \ No newline at end of file diff --git a/CARLA_Entropy/scripts/evaluate_dos_CARLA.py b/CARLA_Entropy/scripts/evaluate_dos_CARLA.py new file mode 100644 index 0000000..ba760c8 --- /dev/null +++ b/CARLA_Entropy/scripts/evaluate_dos_CARLA.py @@ -0,0 +1,370 @@ +#!/usr/bin/env python3 +""" +CARLA DoS Entropy Evaluation Script +Description: Detects attacks based on Shannon Entropy of CAN payloads. +Replaces the Inception-ResNet target model with entropy-based detection. +Follows the exact same pipeline structure as evaluate_dos_CARLA.py +""" +import os +os.environ["CUDA_VISIBLE_DEVICES"] = "-1" + +import numpy as np +import csv +import pandas as pd +import yaml +from sklearn.metrics import confusion_matrix, classification_report +import matplotlib.pyplot as plt +import itertools +from sklearn.metrics import roc_auc_score, balanced_accuracy_score, recall_score +from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score + + +# --------------------------------------------------------- +# CARLA DoS ENTROPY CONSTANTS +# (Computed from benign-only training data: +# CARLA_dataset/DoS/Target/train.csv) +# --------------------------------------------------------- +TRAIN_MEAN = 5.6106 +TRAIN_STD = 0.1734 +K = 3.5 +WINDOW = 0.0376 +LOWER = TRAIN_MEAN - K * TRAIN_STD # 5.0027 +UPPER = TRAIN_MEAN + K * TRAIN_STD # 6.2185 + + +# --------------------------------------------------------- +# Helper: Safe Hex/Int Parser +# --------------------------------------------------------- +def parse_hex(x): + try: + if pd.isna(x) or str(x).strip() == "": + return 0 + s = str(x).strip() + if '.' in s: + return int(float(s)) + return int(s, 16) + except: + return 0 + + +# --------------------------------------------------------- +# Preprocessing +# --------------------------------------------------------- +def preprocess_dataframe(df): + print(f" -> Raw data shape: {df.shape}") + + df.columns = df.columns.str.strip() + + # 1. Standardize Timestamp + ts_col = next((c for c in ["Timestamp", "timestamp", "Time", "TimeStamp", "time"] + if c in df.columns), None) + if ts_col is None: + ts_col = df.columns[0] + + df = df.rename(columns={ts_col: "Timestamp"}) + df["Timestamp"] = pd.to_numeric(df["Timestamp"], errors="coerce") + df.dropna(subset=["Timestamp"], inplace=True) + + # 2. Standardize ID + if "ID" in df.columns and "can_id" not in df.columns: + df = df.rename(columns={"ID": "can_id"}) + if "can_id" not in df.columns: + df.rename(columns={df.columns[1]: "can_id"}, inplace=True) + df["can_id"] = df["can_id"].apply(parse_hex) + + # 3. Standardize DLC + if "DLC" in df.columns and "dlc" not in df.columns: + df = df.rename(columns={"DLC": "dlc"}) + df["dlc"] = pd.to_numeric(df["dlc"], errors="coerce").fillna(0).astype(int) + + # 4. Standardize Payload + payload_cols = ["d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7"] + for c in payload_cols: + if c not in df.columns: + df[c] = 0 + df[payload_cols] = df[payload_cols].fillna(0) + for c in payload_cols: + df[c] = df[c].apply(parse_hex) + + df["payload"] = df[payload_cols].values.tolist() + + # 5. Standardize Label + if "label" not in df.columns: + df["label"] = 0 + + df["label"] = df["label"].astype(str).str.upper().map({ + "B": 0, "R": 0, "0": 0, "BENIGN": 0, "NAN": 0, "NONE": 0, + "T": 1, "A": 1, "1": 1, "ATTACK": 1 + }).fillna(0).astype(int) + + df.sort_values("Timestamp", inplace=True) + df.reset_index(drop=True, inplace=True) + return df + + +# --------------------------------------------------------- +# Windowing +# --------------------------------------------------------- +def split_into_windows(df, window_size): + if df.empty: + return [], np.array([]), [] + start, end = df["Timestamp"].min(), df["Timestamp"].max() + windows, labels, indices = [], [], [] + t = start + while t <= end: + w = df[(df["Timestamp"] >= t) & (df["Timestamp"] < t + window_size)] + if not w.empty: + windows.append(w) + labels.append(int((w["label"] == 1).any())) + indices.append(w.index) + t += window_size + return windows, np.array(labels), indices + + +# --------------------------------------------------------- +# Entropy Calculation +# --------------------------------------------------------- +def calculate_entropy(windows): + ent = [] + for w in windows: + symbols = [] + for _, r in w.iterrows(): + for i, v in enumerate(r["payload"]): + symbols.append((r["can_id"], r["dlc"], i, v)) + if not symbols: + ent.append(0.0) + continue + _, c = np.unique(symbols, axis=0, return_counts=True) + p = c / c.sum() + ent.append(-np.sum(p * np.log2(p))) + return np.array(ent) + + +# --------------------------------------------------------- +# Save Predictions & Update Tracksheet +# (Same logic as evaluate_dos_CARLA.py save_preds) +# --------------------------------------------------------- +def save_preds(pass_num, tracksheet, pred_labels_list, output_path, preds): + + print(f"-> Updating tracksheet: {tracksheet}") + try: + df = pd.read_csv(tracksheet, dtype=str, low_memory=False) + except FileNotFoundError: + print(f"[ERROR] Tracksheet {tracksheet} not found.") + return + + df.columns = df.columns.str.strip() + df = df.fillna("None") + + df["row_no"] = df["row_no"].astype(int) + df["timestamp"] = df["timestamp"].astype(float) + df["image_no"] = df["image_no"].astype(int) + df["valid_flag"] = df["valid_flag"].astype(int) + + pred_labels = pred_labels_list + + n_df = len(df) + n_pred = len(pred_labels) + + if n_pred < n_df: + print(f"[WARN] pred_labels shorter than packet CSV: " + f"{n_pred} vs {n_df}. Filling remaining using operation_label.") + for i in range(n_pred, n_df): + op = str(df.iloc[i]["operation_label"]).strip().upper() + if op == "NONE": + pred_labels.append("B") + else: + pred_labels.append("A") + elif n_pred > n_df: + print(f"[WARN] pred_labels longer than packet CSV: " + f"{n_pred} vs {n_df}. Truncating extra predictions.") + pred_labels = pred_labels[:n_df] + + assert len(pred_labels) == n_df + + df["pred_label"] = pred_labels + + df["timestamp"] = df["timestamp"].map(lambda x: f"{x:.6f}") + + int_cols = ["row_no", "image_no", "valid_flag"] + for c in int_cols: + if c in df.columns: + df[c] = df[c].astype(int) + + tracksheet_dir = "tracksheets_CARLA" + os.makedirs(tracksheet_dir, exist_ok=True) + + new_tracksheet = os.path.join(tracksheet_dir, f"dos_test_track_{pass_num}.csv") + df.to_csv(new_tracksheet, index=False) + + print(f"Saved updated packet-level CSV -> {new_tracksheet} " + f"(rows={n_df}, preds={len(pred_labels)})") + + +# --------------------------------------------------------- +# Confusion Matrix Plot +# --------------------------------------------------------- +def plot_confusion(cm, pass_num, y_test, preds): + plt.imshow(cm, cmap='Blues') + plt.title("Confusion Matrix - DOS (Entropy)") + plt.colorbar() + ticks = ["Benign", "Attack"] + plt.xticks(range(2), ticks) + plt.yticks(range(2), ticks) + + for i, j in itertools.product(range(2), range(2)): + plt.text(j, i, f"{cm[i,j]}", + ha="center", + color="white" if cm[i,j] > np.max(cm)/2 else "black") + + plt.ylabel("True") + plt.xlabel("Predicted") + plt.tight_layout() + + os.makedirs("./CF_target", exist_ok=True) + plt.savefig("./CF_target/CARLA_dos_entropy_cf_pass_{}.png".format(pass_num)) + plt.close() + + TN, FP, FN, TP = cm.ravel() + + accuracy = accuracy_score(y_test, preds) + precision = precision_score(y_test, preds, pos_label=1, zero_division=0) + rec = recall_score(y_test, preds, pos_label=1, zero_division=0) + f1 = f1_score(y_test, preds, pos_label=1, zero_division=0) + tpr = TP / (TP + FN) if (TP + FN) > 0 else 0 + tnr = TN / (TN + FP) if (TN + FP) > 0 else 0 + fpr = FP / (FP + TN) if (FP + TN) > 0 else 0 + fnr = FN / (TP + FN) if (TP + FN) > 0 else 0 + balanced_acc = balanced_accuracy_score(y_test, preds) + try: + auc = roc_auc_score(y_test, preds) + except: + auc = 0.0 + + print("\n--------------- PERFORMANCE METRICS ----------------") + print("Accuracy:", accuracy) + print("Precision:", precision) + print("Recall / TPR:", rec) + print("True Negative Rate (TNR):", tnr) + print("False Positive Rate (FPR):", fpr) + print("False Negative Rate (FNR):", fnr) + print("F1 Score:", f1) + print("Balanced Accuracy:", balanced_acc) + print("ROC AUC:", auc) + print("---------------------------------------------------\n") + + print("Confusion Matrix (Raw Values):") + print(cm) + print(f"TP={TP}, TN={TN}, FP={FP}, FN={FN}") + + +# --------------------------------------------------------- +# MAIN +# --------------------------------------------------------- +def run(params): + + rounds = params["rounds"] + traffic_path = params["traffic_path"] + tracksheet = params["tracksheet"] + output_path = params["output_path"] + + print(f"\nDataset: CARLA DoS (Entropy) | Mean: {TRAIN_MEAN} | Std: {TRAIN_STD} " + f"| K: {K} | Window: {WINDOW}") + print(f"Thresholds: Lower={LOWER:.4f}, Upper={UPPER:.4f}") + + # 1. Load & Preprocess + print(f"\n--- Loading Data: {traffic_path} ---") + if not os.path.exists(traffic_path): + print(f"CRITICAL ERROR: File {traffic_path} not found.") + return + + try: + df = pd.read_csv(traffic_path, on_bad_lines='skip', low_memory=False) + df.columns = df.columns.str.strip() + + if df.columns[0] not in ["Timestamp", "timestamp", "Time", "time"]: + col_names = ["Timestamp", "can_id", "dlc", + "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "label"] + df = pd.read_csv(traffic_path, delimiter=',', header=None, + names=col_names, on_bad_lines='skip', low_memory=False) + df.columns = df.columns.str.strip() + + if str(df.iloc[0]["Timestamp"]).lower() in ["timestamp", "time"]: + df = df.iloc[1:].reset_index(drop=True) + + except Exception as e: + print(f"Error reading CSV: {e}") + return + + try: + df = preprocess_dataframe(df) + except KeyError as e: + print(f"Preprocessing Error: {e}") + return + + if df.empty: + print("Error: DataFrame is empty.") + return + + # 2. Windowing + print("\n--- Splitting into Time Windows ---") + windows, y_test, window_indices = split_into_windows(df, WINDOW) + + print("\nWINDOW DISTRIBUTION") + print("-----------------------------------") + print(f"Total Windows: {len(y_test)}") + print(f"Benign: {(y_test == 0).sum()}") + print(f"Attack: {(y_test == 1).sum()}") + print("-----------------------------------\n") + + if not windows: + print("Error: No windows created.") + return + + # 3. Calculate Entropy + print("--- Calculating Entropy ---") + ent = calculate_entropy(windows) + + # 4. Prediction + print(f"Applying Thresholds: Lower={LOWER:.4f}, Upper={UPPER:.4f}") + preds = ((ent < LOWER) | (ent > UPPER)).astype(int) + + # 5. Evaluate & Plot + cm = confusion_matrix(y_test, preds) + plot_confusion(cm, rounds, y_test, preds) + + print(f"\nSaved confusion matrix: CARLA_dos_entropy_cf_pass_{rounds}.png\n") + + # 6. Map window predictions back to packet-level + df["pred_label"] = "B" + for i, idxs in enumerate(window_indices): + if preds[i] == 0: + df.loc[idxs, "pred_label"] = "B" + else: + df.loc[idxs, "pred_label"] = df.loc[idxs, "label"].map({1: "A", 0: "B"}) + + # 7. Save detailed prediction output + df_out = df.drop(columns=["payload"], errors='ignore') + os.makedirs(os.path.dirname(output_path) if os.path.dirname(output_path) else ".", exist_ok=True) + df_out.to_csv(output_path, index=False) + print("Saved detailed prediction results ->", output_path) + + # 8. Update tracksheet + pred_labels = df["pred_label"].tolist() + save_preds(rounds, tracksheet, pred_labels, output_path, preds) + + +if __name__ == "__main__": + + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("--config", type=str, default="config_dos_CARLA.yaml") + args = parser.parse_args() + + cfg = yaml.safe_load(open(args.config)) + + if "evaluate" not in cfg: + raise ValueError("Config file must contain 'evaluate' section.") + + run(cfg["evaluate"]) diff --git a/CARLA_Entropy/scripts/evaluate_spoof_CARLA.py b/CARLA_Entropy/scripts/evaluate_spoof_CARLA.py new file mode 100644 index 0000000..24351be --- /dev/null +++ b/CARLA_Entropy/scripts/evaluate_spoof_CARLA.py @@ -0,0 +1,370 @@ +#!/usr/bin/env python3 +""" +CARLA Spoof Entropy Evaluation Script +Description: Detects attacks based on Shannon Entropy of CAN payloads. +Replaces the Inception-ResNet target model with entropy-based detection. +Follows the exact same pipeline structure as evaluate_spoof_CARLA.py +""" +import os +os.environ["CUDA_VISIBLE_DEVICES"] = "-1" + +import numpy as np +import csv +import pandas as pd +import yaml +from sklearn.metrics import confusion_matrix, classification_report +import matplotlib.pyplot as plt +import itertools +from sklearn.metrics import roc_auc_score, balanced_accuracy_score, recall_score +from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score + + +# --------------------------------------------------------- +# CARLA Spoof ENTROPY CONSTANTS +# (Computed from benign-only training data: +# CARLA_dataset/Spoof/Target/target_train.csv) +# --------------------------------------------------------- +TRAIN_MEAN = 5.6106 +TRAIN_STD = 0.1734 +K = 3.5 +WINDOW = 0.0376 +LOWER = TRAIN_MEAN - K * TRAIN_STD # 5.0027 +UPPER = TRAIN_MEAN + K * TRAIN_STD # 6.2185 + + +# --------------------------------------------------------- +# Helper: Safe Hex/Int Parser +# --------------------------------------------------------- +def parse_hex(x): + try: + if pd.isna(x) or str(x).strip() == "": + return 0 + s = str(x).strip() + if '.' in s: + return int(float(s)) + return int(s, 16) + except: + return 0 + + +# --------------------------------------------------------- +# Preprocessing +# --------------------------------------------------------- +def preprocess_dataframe(df): + print(f" -> Raw data shape: {df.shape}") + + df.columns = df.columns.str.strip() + + # 1. Standardize Timestamp + ts_col = next((c for c in ["Timestamp", "timestamp", "Time", "TimeStamp", "time"] + if c in df.columns), None) + if ts_col is None: + ts_col = df.columns[0] + + df = df.rename(columns={ts_col: "Timestamp"}) + df["Timestamp"] = pd.to_numeric(df["Timestamp"], errors="coerce") + df.dropna(subset=["Timestamp"], inplace=True) + + # 2. Standardize ID + if "ID" in df.columns and "can_id" not in df.columns: + df = df.rename(columns={"ID": "can_id"}) + if "can_id" not in df.columns: + df.rename(columns={df.columns[1]: "can_id"}, inplace=True) + df["can_id"] = df["can_id"].apply(parse_hex) + + # 3. Standardize DLC + if "DLC" in df.columns and "dlc" not in df.columns: + df = df.rename(columns={"DLC": "dlc"}) + df["dlc"] = pd.to_numeric(df["dlc"], errors="coerce").fillna(0).astype(int) + + # 4. Standardize Payload + payload_cols = ["d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7"] + for c in payload_cols: + if c not in df.columns: + df[c] = 0 + df[payload_cols] = df[payload_cols].fillna(0) + for c in payload_cols: + df[c] = df[c].apply(parse_hex) + + df["payload"] = df[payload_cols].values.tolist() + + # 5. Standardize Label + if "label" not in df.columns: + df["label"] = 0 + + df["label"] = df["label"].astype(str).str.upper().map({ + "B": 0, "R": 0, "0": 0, "BENIGN": 0, "NAN": 0, "NONE": 0, + "T": 1, "A": 1, "1": 1, "ATTACK": 1, "SPOOF": 1 + }).fillna(0).astype(int) + + df.sort_values("Timestamp", inplace=True) + df.reset_index(drop=True, inplace=True) + return df + + +# --------------------------------------------------------- +# Windowing +# --------------------------------------------------------- +def split_into_windows(df, window_size): + if df.empty: + return [], np.array([]), [] + start, end = df["Timestamp"].min(), df["Timestamp"].max() + windows, labels, indices = [], [], [] + t = start + while t <= end: + w = df[(df["Timestamp"] >= t) & (df["Timestamp"] < t + window_size)] + if not w.empty: + windows.append(w) + labels.append(int((w["label"] == 1).any())) + indices.append(w.index) + t += window_size + return windows, np.array(labels), indices + + +# --------------------------------------------------------- +# Entropy Calculation +# --------------------------------------------------------- +def calculate_entropy(windows): + ent = [] + for w in windows: + symbols = [] + for _, r in w.iterrows(): + for i, v in enumerate(r["payload"]): + symbols.append((r["can_id"], r["dlc"], i, v)) + if not symbols: + ent.append(0.0) + continue + _, c = np.unique(symbols, axis=0, return_counts=True) + p = c / c.sum() + ent.append(-np.sum(p * np.log2(p))) + return np.array(ent) + + +# --------------------------------------------------------- +# Save Predictions & Update Tracksheet +# (Same logic as evaluate_spoof_CARLA.py save_preds) +# --------------------------------------------------------- +def save_preds(pass_num, tracksheet, pred_labels_list, output_path, preds): + + print(f"-> Updating tracksheet: {tracksheet}") + try: + df = pd.read_csv(tracksheet, dtype=str, low_memory=False) + except FileNotFoundError: + print(f"[ERROR] Tracksheet {tracksheet} not found.") + return + + df.columns = df.columns.str.strip() + df = df.fillna("None") + + df["row_no"] = df["row_no"].astype(int) + df["timestamp"] = df["timestamp"].astype(float) + df["image_no"] = df["image_no"].astype(int) + df["valid_flag"] = df["valid_flag"].astype(int) + + pred_labels = pred_labels_list + + n_df = len(df) + n_pred = len(pred_labels) + + if n_pred < n_df: + print(f"[WARN] pred_labels shorter than packet CSV: " + f"{n_pred} vs {n_df}. Filling remaining using operation_label.") + for i in range(n_pred, n_df): + op = str(df.iloc[i]["operation_label"]).strip().upper() + if op == "NONE": + pred_labels.append("B") + else: + pred_labels.append("A") + elif n_pred > n_df: + print(f"[WARN] pred_labels longer than packet CSV: " + f"{n_pred} vs {n_df}. Truncating extra predictions.") + pred_labels = pred_labels[:n_df] + + assert len(pred_labels) == n_df + + df["pred_label"] = pred_labels + + df["timestamp"] = df["timestamp"].map(lambda x: f"{x:.6f}") + + int_cols = ["row_no", "image_no", "valid_flag"] + for c in int_cols: + if c in df.columns: + df[c] = df[c].astype(int) + + tracksheet_dir = "tracksheets_CARLA" + os.makedirs(tracksheet_dir, exist_ok=True) + + new_tracksheet = os.path.join(tracksheet_dir, f"spoof_test_track_{pass_num}.csv") + df.to_csv(new_tracksheet, index=False) + + print(f"Saved updated packet-level CSV -> {new_tracksheet} " + f"(rows={n_df}, preds={len(pred_labels)})") + + +# --------------------------------------------------------- +# Confusion Matrix Plot +# --------------------------------------------------------- +def plot_confusion(cm, pass_num, y_test, preds): + plt.imshow(cm, cmap='Blues') + plt.title("Confusion Matrix - Spoof (Entropy)") + plt.colorbar() + ticks = ["Benign", "Attack"] + plt.xticks(range(2), ticks) + plt.yticks(range(2), ticks) + + for i, j in itertools.product(range(2), range(2)): + plt.text(j, i, f"{cm[i,j]}", + ha="center", + color="white" if cm[i,j] > np.max(cm)/2 else "black") + + plt.ylabel("True") + plt.xlabel("Predicted") + plt.tight_layout() + + os.makedirs("./CF_target", exist_ok=True) + plt.savefig("./CF_target/CARLA_spoof_entropy_pass_{}.png".format(pass_num)) + plt.close() + + TN, FP, FN, TP = cm.ravel() + + accuracy = accuracy_score(y_test, preds) + precision = precision_score(y_test, preds, pos_label=1, zero_division=0) + rec = recall_score(y_test, preds, pos_label=1, zero_division=0) + f1 = f1_score(y_test, preds, pos_label=1, zero_division=0) + tpr = TP / (TP + FN) if (TP + FN) > 0 else 0 + tnr = TN / (TN + FP) if (TN + FP) > 0 else 0 + fpr = FP / (FP + TN) if (FP + TN) > 0 else 0 + fnr = FN / (TP + FN) if (TP + FN) > 0 else 0 + balanced_acc = balanced_accuracy_score(y_test, preds) + try: + auc = roc_auc_score(y_test, preds) + except: + auc = 0.0 + + print("\n--------------- PERFORMANCE METRICS ----------------") + print("Accuracy:", accuracy) + print("Precision:", precision) + print("Recall / TPR:", rec) + print("True Negative Rate (TNR):", tnr) + print("False Positive Rate (FPR):", fpr) + print("False Negative Rate (FNR):", fnr) + print("F1 Score:", f1) + print("Balanced Accuracy:", balanced_acc) + print("ROC AUC:", auc) + print("---------------------------------------------------\n") + + print("Confusion Matrix (Raw Values):") + print(cm) + print(f"TP={TP}, TN={TN}, FP={FP}, FN={FN}") + + +# --------------------------------------------------------- +# MAIN +# --------------------------------------------------------- +def run(params): + + rounds = params["rounds"] + traffic_path = params["traffic_path"] + tracksheet = params["tracksheet"] + output_path = params["output_path"] + + print(f"\nDataset: CARLA Spoof (Entropy) | Mean: {TRAIN_MEAN} | Std: {TRAIN_STD} " + f"| K: {K} | Window: {WINDOW}") + print(f"Thresholds: Lower={LOWER:.4f}, Upper={UPPER:.4f}") + + # 1. Load & Preprocess + print(f"\n--- Loading Data: {traffic_path} ---") + if not os.path.exists(traffic_path): + print(f"CRITICAL ERROR: File {traffic_path} not found.") + return + + try: + df = pd.read_csv(traffic_path, on_bad_lines='skip', low_memory=False) + df.columns = df.columns.str.strip() + + if df.columns[0] not in ["Timestamp", "timestamp", "Time", "time"]: + col_names = ["Timestamp", "can_id", "dlc", + "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "label"] + df = pd.read_csv(traffic_path, delimiter=',', header=None, + names=col_names, on_bad_lines='skip', low_memory=False) + df.columns = df.columns.str.strip() + + if str(df.iloc[0]["Timestamp"]).lower() in ["timestamp", "time"]: + df = df.iloc[1:].reset_index(drop=True) + + except Exception as e: + print(f"Error reading CSV: {e}") + return + + try: + df = preprocess_dataframe(df) + except KeyError as e: + print(f"Preprocessing Error: {e}") + return + + if df.empty: + print("Error: DataFrame is empty.") + return + + # 2. Windowing + print("\n--- Splitting into Time Windows ---") + windows, y_test, window_indices = split_into_windows(df, WINDOW) + + print("\nWINDOW DISTRIBUTION") + print("-----------------------------------") + print(f"Total Windows: {len(y_test)}") + print(f"Benign: {(y_test == 0).sum()}") + print(f"Attack: {(y_test == 1).sum()}") + print("-----------------------------------\n") + + if not windows: + print("Error: No windows created.") + return + + # 3. Calculate Entropy + print("--- Calculating Entropy ---") + ent = calculate_entropy(windows) + + # 4. Prediction + print(f"Applying Thresholds: Lower={LOWER:.4f}, Upper={UPPER:.4f}") + preds = ((ent < LOWER) | (ent > UPPER)).astype(int) + + # 5. Evaluate & Plot + cm = confusion_matrix(y_test, preds) + plot_confusion(cm, rounds, y_test, preds) + + print(f"\nSaved confusion matrix: CARLA_spoof_entropy_pass_{rounds}.png\n") + + # 6. Map window predictions back to packet-level + df["pred_label"] = "B" + for i, idxs in enumerate(window_indices): + if preds[i] == 0: + df.loc[idxs, "pred_label"] = "B" + else: + df.loc[idxs, "pred_label"] = df.loc[idxs, "label"].map({1: "A", 0: "B"}) + + # 7. Save detailed prediction output + df_out = df.drop(columns=["payload"], errors='ignore') + os.makedirs(os.path.dirname(output_path) if os.path.dirname(output_path) else ".", exist_ok=True) + df_out.to_csv(output_path, index=False) + print("Saved detailed prediction results ->", output_path) + + # 8. Update tracksheet + pred_labels = df["pred_label"].tolist() + save_preds(rounds, tracksheet, pred_labels, output_path, preds) + + +if __name__ == "__main__": + + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("--config", type=str, default="config_spoof_CARLA.yaml") + args = parser.parse_args() + + cfg = yaml.safe_load(open(args.config)) + + if "evaluate" not in cfg: + raise ValueError("Config file must contain 'evaluate' section.") + + run(cfg["evaluate"]) diff --git a/CARLA_Entropy/scripts/networks/Inception_Resnet_V1.py b/CARLA_Entropy/scripts/networks/Inception_Resnet_V1.py new file mode 100644 index 0000000..c195402 --- /dev/null +++ b/CARLA_Entropy/scripts/networks/Inception_Resnet_V1.py @@ -0,0 +1,492 @@ +#!/usr/bin/env python3 +""" +Reduced Inception-ResNet V1 Model for Vehicle CAN Network Intrusion Detection + +This implementation creates a lightweight version of Inception-ResNet V1 architecture +specifically optimized for processing 29x29 binary CAN frame matrices. The model combines: +- Inception modules for multi-scale feature extraction +- Residual connections for gradient flow and training stability +- Aggressive dimensionality reduction for computational efficiency +- Binary classification for normal vs attack traffic detection + +Architecture Flow: +Input (29x29x1) → Stem → Inception-ResNet-A → Reduction-A → +Inception-ResNet-B → Reduction-B → Global Pooling → Dense → Output (2 classes) + +Key optimizations for CAN data: +- Reduced depth compared to standard Inception-ResNet +- Optimized for small input dimensions (29x29) +- Binary classification head for intrusion detection +""" +import os +os.environ['CUDA_VISIBLE_DEVICES'] = '' + +import tensorflow as tf +from tensorflow.keras.layers import (Input, Conv2D, MaxPooling2D, AveragePooling2D, + Concatenate, Add, Flatten, Dropout, Dense, Lambda) +from tensorflow.keras.models import Model +from tensorflow.keras.callbacks import Callback + +################################################### +# Custom Training Callback for Batch-Level Monitoring +################################################### +class BatchLossHistory(Callback): + """ + Custom Keras callback to record training loss at every batch iteration. + + This provides more granular monitoring than epoch-level tracking, allowing + for detailed analysis of training dynamics and convergence behavior. + Particularly useful for genetic algorithm experiments that need to track + training progress over iterations rather than epochs. + """ + + def on_train_begin(self, logs=None): + """ + Initialize tracking variables at the start of training. + + Args: + logs: Training logs dictionary (unused but required by Keras) + """ + self.batch_losses = [] # List to store (iteration, loss) tuples + self.iterations = 0 # Counter for total training iterations + + def on_batch_end(self, batch, logs=None): + """ + Record loss value after each training batch completes. + + Args: + batch: Current batch number within the epoch + logs: Dictionary containing batch metrics (loss, accuracy, etc.) + """ + self.iterations += 1 # Increment global iteration counter + # Store iteration number and corresponding loss value + self.batch_losses.append((self.iterations, logs.get('loss'))) + +################################################### +# Stem Block: Initial Feature Extraction +################################################### +def stem_block(inputs): + """ + Stem block for initial feature extraction from 29x29 CAN frame inputs. + + This block performs aggressive early feature extraction and dimensionality reduction: + 1. Extracts low-level features with small convolutions + 2. Reduces spatial dimensions while increasing channel depth + 3. Prepares features for subsequent Inception-ResNet blocks + + Architecture: + - Conv2D(64, 3x3, valid) → 29x29x1 → 27x27x64 + - Conv2D(64, 3x3, same) → 27x27x64 → 27x27x64 + - MaxPool2D(2x2, stride=2) → 27x27x64 → 13x13x64 + - Conv2D(128, 1x1, same) → 13x13x64 → 13x13x128 + + Args: + inputs: Input tensor of shape (batch_size, 29, 29, 1) + + Returns: + Tensor of shape (batch_size, 13, 13, 128) + """ + # First convolution with valid padding reduces spatial dimensions + # 29x29x1 → 27x27x64 (removes 2 pixels due to valid padding) + x = Conv2D(64, (3, 3), strides=1, padding='valid', activation='relu')(inputs) + + # Second convolution with same padding preserves spatial dimensions + # 27x27x64 → 27x27x64 (maintains size, extracts more complex features) + x = Conv2D(64, (3, 3), strides=1, padding='same', activation='relu')(x) + + # Max pooling for spatial downsampling (critical for computational efficiency) + # 27x27x64 → 13x13x64 (roughly halves spatial dimensions) + x = MaxPooling2D((2, 2), strides=2, padding='valid')(x) + + # 1x1 convolution to increase channel depth without affecting spatial dimensions + # 13x13x64 → 13x13x128 (doubles channel depth for richer feature representation) + x = Conv2D(128, (1, 1), strides=1, padding='same', activation='relu')(x) + + return x + +################################################### +# Inception-ResNet Block A: Multi-Scale Feature Extraction +################################################### +def inception_resnet_a_block(x, scale=0.1): + """ + Inception-ResNet-A block combining multi-scale convolutions with residual connections. + + This block performs parallel convolutions at different scales to capture features + at multiple receptive field sizes, then combines them with a residual connection + for improved gradient flow and training stability. + + Architecture branches: + - Branch 0: 1x1 conv (32 filters) → point-wise features + - Branch 1: 1x1 conv → 3x3 conv (32 filters) → local spatial features + - Branch 2: 1x1 conv → 3x3 conv → 3x3 conv (64 filters) → larger spatial features + + The residual connection adds the scaled combined branches back to the input, + enabling the network to learn incremental improvements to existing features. + + Args: + x: Input tensor of shape (batch_size, height, width, channels) + scale: Scaling factor for residual connection (0.1 for training stability) + + Returns: + Tensor with same spatial dimensions but potentially different channel depth + """ + # Branch 0: 1x1 convolution for point-wise feature extraction + # Captures channel-wise interactions without spatial aggregation + branch_0 = Conv2D(32, (1, 1), padding='same', activation='relu')(x) + + # Branch 1: 1x1 → 3x3 convolution chain for local spatial features + # 1x1 reduces channels, 3x3 captures local spatial patterns + branch_1 = Conv2D(32, (1, 1), padding='same', activation='relu')(x) + branch_1 = Conv2D(32, (3, 3), padding='same', activation='relu')(branch_1) + + # Branch 2: 1x1 → 3x3 → 3x3 convolution chain for larger receptive field + # Sequential 3x3 convolutions effectively create a 5x5 receptive field + # More efficient than direct 5x5 convolution + branch_2 = Conv2D(32, (1, 1), padding='same', activation='relu')(x) + branch_2 = Conv2D(48, (3, 3), padding='same', activation='relu')(branch_2) + branch_2 = Conv2D(64, (3, 3), padding='same', activation='relu')(branch_2) + + # Concatenate all branches along channel dimension + # Total channels: 32 + 32 + 64 = 128 + merged = Concatenate(axis=-1)([branch_0, branch_1, branch_2]) + + # 1x1 convolution to match input channel dimensions for residual addition + # This projection layer ensures dimensional compatibility + up = Conv2D(tf.keras.backend.int_shape(x)[-1], (1, 1), padding='same')(merged) + + # Scale the residual branch for training stability + # Scaling factor (0.1) prevents residual branch from dominating early in training + up = Lambda(lambda s: s * scale)(up) + + # Residual connection: add scaled features to input + # This enables gradient flow and allows learning of incremental improvements + x = Add()([x, up]) + + # Apply activation after residual addition + # ReLU activation introduces non-linearity after feature combination + x = tf.keras.layers.Activation('relu')(x) + + return x + +################################################### +# Reduction Block A: Spatial Downsampling with Feature Expansion +################################################### +def reduction_a_block(x): + """ + Reduction-A block for spatial downsampling while expanding channel depth. + + This block reduces spatial dimensions (width/height) while increasing the number + of feature channels. Multiple parallel branches ensure that information is + preserved during downsampling through different aggregation strategies. + + Architecture branches: + - Branch 0: Max pooling → preserves dominant features + - Branch 1: Direct 3x3 conv with stride=2 → learned downsampling + - Branch 2: 1x1 → 3x3 → 3x3 conv chain → complex feature extraction before downsampling + + Args: + x: Input tensor (typically 13x13x128 from stem block) + + Returns: + Tensor with reduced spatial dimensions and increased channels (6x6x448) + """ + # Branch 0: Max pooling for dominant feature preservation + # Stride=2 reduces spatial dimensions by half: 13x13 → 6x6 + # Preserves existing channel depth (128) + branch_0 = MaxPooling2D((3, 3), strides=2, padding='valid')(x) + + # Branch 1: Direct convolution with stride=2 for learned downsampling + # Simultaneously reduces spatial dimensions and extracts new features + # 13x13x128 → 6x6x160 + branch_1 = Conv2D(160, (3, 3), strides=2, padding='valid', activation='relu')(x) + + # Branch 2: Multi-stage convolution chain for complex feature extraction + # 1x1 conv reduces channels for computational efficiency + branch_2 = Conv2D(128, (1, 1), padding='same', activation='relu')(x) + # 3x3 conv with same padding maintains spatial dimensions + branch_2 = Conv2D(160, (3, 3), strides=1, padding='same', activation='relu')(branch_2) + # Final 3x3 conv with stride=2 for downsampling: 13x13 → 6x6 + branch_2 = Conv2D(160, (3, 3), strides=2, padding='valid', activation='relu')(branch_2) + + # Concatenate all branches along channel dimension + # Total channels: 128 (branch_0) + 160 (branch_1) + 160 (branch_2) = 448 + x = Concatenate(axis=-1)([branch_0, branch_1, branch_2]) + + return x + +################################################### +# Inception-ResNet Block B: High-Level Feature Processing +################################################### +def inception_resnet_b_block(x, scale=0.1): + """ + Inception-ResNet-B block for high-level feature extraction with asymmetric convolutions. + + This block operates on higher-level features (post-reduction) and uses asymmetric + convolutions (1x7 and 7x1) to capture elongated patterns efficiently. The asymmetric + approach is more parameter-efficient than square convolutions for certain patterns. + + Architecture branches: + - Branch 0: 1x1 conv (192 filters) → channel-wise feature extraction + - Branch 1: 1x1 → 1x7 → 7x1 conv chain → asymmetric spatial feature extraction + + Args: + x: Input tensor of shape (batch_size, height, width, 448) + scale: Scaling factor for residual connection (0.1 for stability) + + Returns: + Tensor with same spatial dimensions and channel depth + """ + # Branch 0: Simple 1x1 convolution for channel-wise feature transformation + # Captures cross-channel interactions without spatial aggregation + branch_0 = Conv2D(192, (1, 1), padding='same', activation='relu')(x) + + # Branch 1: Asymmetric convolution sequence for efficient spatial feature extraction + # 1x1 convolution for dimensionality reduction + branch_1 = Conv2D(128, (1, 1), padding='same', activation='relu')(x) + # 1x7 convolution captures horizontal patterns + branch_1 = Conv2D(160, (1, 7), padding='same', activation='relu')(branch_1) + # 7x1 convolution captures vertical patterns + # This asymmetric approach is more efficient than 7x7 convolution + branch_1 = Conv2D(192, (7, 1), padding='same', activation='relu')(branch_1) + + # Concatenate branches along channel dimension + # Total channels: 192 + 192 = 384 + merged = Concatenate(axis=-1)([branch_0, branch_1]) + + # 1x1 projection to match input channel dimensions for residual connection + up = Conv2D(tf.keras.backend.int_shape(x)[-1], (1, 1), padding='same')(merged) + + # Apply scaling to residual branch for training stability + up = Lambda(lambda s: s * scale)(up) + + # Residual connection: add scaled features to input + x = Add()([x, up]) + + # Apply activation after residual addition + x = tf.keras.layers.Activation('relu')(x) + + return x + +################################################### +# Reduction Block B: Final Spatial Downsampling +################################################### +def reduction_b_block(x): + """ + Reduction-B block for final spatial downsampling before global pooling. + + This block performs the final spatial reduction while dramatically increasing + channel depth. It prepares features for global pooling by creating a very + high-dimensional but spatially compact representation. + + Architecture branches: + - Branch 0: Max pooling → preserves dominant features + - Branch 1: Direct 3x3 conv with stride=2 → learned aggressive feature extraction + + Args: + x: Input tensor of shape (batch_size, 6, 6, 448) + + Returns: + Tensor of shape (batch_size, 2, 2, 896) + """ + # Branch 0: Max pooling preserves strongest activations + # 6x6x448 → 2x2x448 + branch_0 = MaxPooling2D((3, 3), strides=2, padding='valid')(x) + + # Branch 1: Aggressive feature extraction with large channel expansion + # 6x6x448 → 2x2x448 (maintains input channel depth) + # High channel count captures complex high-level patterns + branch_1 = Conv2D(448, (3, 3), strides=2, padding='valid', activation='relu')(x) + + # Concatenate branches for maximum feature preservation + # Total channels: 448 + 448 = 896 + x = Concatenate(axis=-1)([branch_0, branch_1]) + + return x + +################################################### +# Main Model Architecture Builder +################################################### +def build_reduced_inception_resnet(input_shape=(29, 29, 1), num_classes=2, dropout_rate=0.2): + """ + Build the complete reduced Inception-ResNet model for CAN intrusion detection. + + This function assembles all components into a complete neural network optimized + for binary classification of CAN network traffic (normal vs attack). + + Architecture Summary: + 1. Stem Block: 29x29x1 → 13x13x128 (initial feature extraction + reduction) + 2. Inception-ResNet-A: 13x13x128 → 13x13x128 (multi-scale feature extraction) + 3. Reduction-A: 13x13x128 → 6x6x448 (spatial reduction + channel expansion) + 4. Inception-ResNet-B: 6x6x448 → 6x6x448 (high-level asymmetric features) + 5. Reduction-B: 6x6x448 → 2x2x896 (final spatial reduction) + 6. Global Average Pooling: 2x2x896 → 1x1x896 (spatial aggregation) + 7. Classification Head: 896 → 2 (binary classification) + + Args: + input_shape: Shape of input CAN frames (default: 29x29x1) + num_classes: Number of output classes (default: 2 for binary classification) + dropout_rate: Dropout rate for regularization (default: 0.2) + + Returns: + Compiled Keras Model ready for training + """ + # Define input layer for 29x29 binary CAN frame matrices + inputs = Input(shape=input_shape) + + # Stage 1: Initial feature extraction and spatial reduction + # 29x29x1 → 13x13x128 + x = stem_block(inputs) + + # Stage 2: Multi-scale feature extraction with residual connections + # 13x13x128 → 13x13x128 (maintains spatial dimensions) + x = inception_resnet_a_block(x, scale=0.1) + + # Stage 3: First major spatial reduction with channel expansion + # 13x13x128 → 6x6x448 + x = reduction_a_block(x) + + # Stage 4: High-level feature extraction with asymmetric convolutions + # 6x6x448 → 6x6x448 (maintains spatial dimensions) + x = inception_resnet_b_block(x, scale=0.1) + + # Stage 5: Final spatial reduction with maximum channel expansion + # 6x6x448 → 2x2x896 + x = reduction_b_block(x) + + # Stage 6: Global spatial aggregation + # 2x2x896 → 1x1x896 (eliminates spatial dimensions entirely) + x = AveragePooling2D((2, 2), padding='valid')(x) + + # Stage 7: Flatten for dense layer processing + # 1x1x896 → 896-dimensional feature vector + x = Flatten()(x) + + # Stage 8: Regularization to prevent overfitting + # Randomly sets 20% of features to zero during training + x = Dropout(dropout_rate)(x) + + # Stage 9: Final classification layer + # 896 → 2 classes with softmax activation for probability distribution + outputs = Dense(num_classes, activation='softmax')(x) + + # Create and return the complete model + model = Model(inputs, outputs) + return model + +################################################### +# Model Wrapper Class for Training and Evaluation +################################################### +class Inception_Resnet_V1: + """ + Wrapper class for the reduced Inception-ResNet model providing training and evaluation utilities. + + This class encapsulates the model architecture and provides methods for: + - Model initialization with configurable hyperparameters + - Training with batch-level loss tracking + - Optional pre-trained weight loading + - Model summary and inspection + + The class is designed to integrate seamlessly with the genetic algorithm + adversarial attack framework and provides the batch-level loss tracking + required for detailed training analysis. + """ + + def __init__(self, epochs=10, batch_size=32, load_weights=False): + """ + Initialize the Inception-ResNet model with specified hyperparameters. + + Args: + epochs: Number of training epochs (default: 10) + batch_size: Batch size for training (default: 32) + load_weights: Whether to load pre-trained weights (default: False) + """ + # Store training hyperparameters + self.epochs = epochs + self.batch_size = batch_size + + # Build the reduced Inception-ResNet architecture + self.model = build_reduced_inception_resnet() + + # Optionally load pre-trained weights + if load_weights: + # Placeholder for weight loading - can be customized as needed + # Example: self.model.load_weights('path_to_pretrained_weights.h5') + pass + + def train(self, x_train, y_train, x_test, y_test, filename_prefix="", epochs_override=None): + """ + Train the model with batch-level loss tracking for detailed analysis. + + This method compiles the model, trains it on the provided data, and captures + detailed training metrics including per-batch loss values. This granular + tracking is essential for genetic algorithm experiments and training analysis. + + Args: + x_train: Training feature data (CAN frames) + y_train: Training labels (0=normal, 1=attack) + x_test: Test feature data (for validation during training) + y_test: Test labels + filename_prefix: Prefix for saved model filename + epochs_override: Override default epoch count if specified + + Returns: + tuple: (training_history, batch_loss_list) + - training_history: Keras training history object + - batch_loss_list: List of (iteration, loss) tuples for each batch + """ + # Use override epochs if provided, otherwise use instance default + epochs_to_run = epochs_override if epochs_override is not None else self.epochs + + # Compile model with Adam optimizer and sparse categorical crossentropy loss + # Adam optimizer: adaptive learning rate with momentum for stable training + # Sparse categorical crossentropy: efficient for integer class labels + self.model.compile( + optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), + loss='sparse_categorical_crossentropy', + metrics=['accuracy'] + ) + + # Initialize custom callback for batch-level loss tracking + batch_callback = BatchLossHistory() + + # Train the model with batch-level monitoring + history = self.model.fit( + x_train, y_train, + epochs=epochs_to_run, + batch_size=self.batch_size, + callbacks=[batch_callback] # Capture per-batch metrics + ) + + # Save the trained model with custom filename prefix + # This allows saving models for different attack types (DoS, Fuzzy, RPM) + self.model.save(filename_prefix + 'final_model.h5') + + # Return both epoch-level and batch-level training metrics + return history, batch_callback.batch_losses + + def summary(self): + """ + Display model architecture summary including layer details and parameter counts. + + Returns: + Model summary showing architecture, output shapes, and parameter counts + """ + return self.model.summary() + +################################################### +# Development and Testing Code +################################################### +# Uncomment the following lines for model architecture debugging and testing: +# if __name__ == "__main__": +# # Create model instance with sample hyperparameters +# instance = Inception_Resnet_V1(epochs=5, batch_size=32) +# +# # Display model architecture summary +# instance.summary() +# +# # Optional: Test with dummy data +# # import numpy as np +# # x_dummy = np.random.rand(100, 29, 29, 1) +# # y_dummy = np.random.randint(0, 2, 100) +# # history, batch_losses = instance.train(x_dummy, y_dummy, x_dummy, y_dummy) +# # print(f"Training completed. Final batch loss: {batch_losses[-1][1]:.4f}") diff --git a/CARLA_Entropy/scripts/update_labels_dos_CARLA.py b/CARLA_Entropy/scripts/update_labels_dos_CARLA.py new file mode 100644 index 0000000..c1734c8 --- /dev/null +++ b/CARLA_Entropy/scripts/update_labels_dos_CARLA.py @@ -0,0 +1,108 @@ +import pandas as pd +import os +import yaml + +# def update_track(packet_level_data, prediction_file, updated_track_file): + +# with open(prediction_file, 'r') as prediction_f, open(packet_level_data, 'r') as packet_f, open(updated_track_file, 'w') as output_f : +# next(prediction_f) # Skip header line +# next(packet_f) # Skip header line +# #write header to output file +# output_f.write('row_no,timestamp,can_id,image_no,valid_flag,label' + '\n') +# for pred_line, packet_line in zip(prediction_f, packet_f): +# pred_parts = pred_line.strip().split(',') +# packet_parts = packet_line.strip().split(',') +# # print("Pred parts:", pred_parts) +# # print("Packet parts:", packet_parts) + +# # if(int(pred_parts[1],16) == int(packet_parts[2],16)): +# packet_parts = packet_parts[:-2] + ["1" if pred_parts[-1] == 'A' else "0"] +# updated_packet_line = ','.join(packet_parts) +# # print(updated_packet_line) +# output_f.write(updated_packet_line + '\n') +# # lines in packet_f > lines in prediction_f, so no need to handle extra lines in packet_f +# while(True): +# line = packet_f.readline() +# if not line: +# break +# part = line.strip().split(',') +# output_f.write(','.join(part[:-1])) +# output_f.write('\n') # default label 0 for packets with no prediction + + + +def update_labels(updated_track_file, label_file, updated_label_file): + + # df = pd.read_csv(updated_track_file) + df = pd.read_csv(updated_track_file, dtype=str, low_memory=False) + df["row_no"] = df["row_no"].astype(int) + df["timestamp"] = df["timestamp"].astype(float) + df["image_no"] = df["image_no"].astype(int) + df["valid_flag"] = df["valid_flag"].astype(int) + # print("DF rows:", len(df)) + # print("Unique images:", df['image_no'].nunique()) + with open(label_file, 'r') as label_f, open(updated_label_file, 'w') as final_label_f: + # next(updated_f) # Skip header line + # group by image_no in packet level data + + label_line = next(label_f).strip() + for image_no, group in df.groupby('image_no'): + + # labels = group['label'].tolist() + # if "A" in labels: + # final_label_f.write(f"{image_no},1\n") + # else: + # final_label_f.write(f"{image_no},0\n") + + img, rest = label_line.split(":") + valid_flag = int(rest.split(",")[0]) + + packet_labels = group['pred_label'].astype(str).str.upper().tolist() + new_label = 1 if "A" in packet_labels else 0 + + + final_label_f.write(f"perturbed_image_{image_no}.png: {valid_flag}, {new_label}\n") + try: + label_line = next(label_f).strip() + except StopIteration: + break + + + + +def run(params): + + tracksheet = params["tracksheet"] + label_file = params["label_file"] + updated_label_file = params["updated_label_file"] + + # update_track(packet_level_data, prediction_file, updated_track_file) + update_labels(tracksheet,label_file,updated_label_file) + print("updated label file") + + + +# # Allow standalone execution +# if __name__ == "__main__": + +# cfg = yaml.safe_load(open("config_dos_OTIDS.yaml")) +# run(cfg["update"]) +# # run() + + +if __name__ == "__main__": + + import argparse + import yaml + + parser = argparse.ArgumentParser() + parser.add_argument("--config", type=str, default="config_dos_CARLA.yaml") + args = parser.parse_args() + + cfg = yaml.safe_load(open(args.config)) + + # Ensure attack section exists + if "update" not in cfg: + raise ValueError("Config file must contain 'update' section.") + + run(cfg["update"]) \ No newline at end of file diff --git a/CARLA_Entropy/scripts/update_labels_spoof_CARLA.py b/CARLA_Entropy/scripts/update_labels_spoof_CARLA.py new file mode 100644 index 0000000..042fd78 --- /dev/null +++ b/CARLA_Entropy/scripts/update_labels_spoof_CARLA.py @@ -0,0 +1,96 @@ +import pandas as pd +import os +import yaml + +# def update_track(packet_level_data, prediction_file, updated_track_file): + +# with open(prediction_file, 'r') as prediction_f, open(packet_level_data, 'r') as packet_f, open(updated_track_file, 'w') as output_f : +# next(prediction_f) # Skip header line +# next(packet_f) # Skip header line +# #write header to output file +# output_f.write('row_no,timestamp,can_id,image_no,valid_flag,label' + '\n') +# for pred_line, packet_line in zip(prediction_f, packet_f): +# pred_parts = pred_line.strip().split(',') +# packet_parts = packet_line.strip().split(',') +# # print("Pred parts:", pred_parts) +# # print("Packet parts:", packet_parts) + +# # if(int(pred_parts[1],16) == int(packet_parts[2],16)): +# packet_parts = packet_parts[:-2] + ["1" if pred_parts[-1] == 'A' else "0"] +# updated_packet_line = ','.join(packet_parts) +# # print(updated_packet_line) +# output_f.write(updated_packet_line + '\n') +# # lines in packet_f > lines in prediction_f, so no need to handle extra lines in packet_f +# while(True): +# line = packet_f.readline() +# if not line: +# break +# part = line.strip().split(',') +# output_f.write(','.join(part[:-1])) +# output_f.write('\n') # default label 0 for packets with no prediction + + +def update_labels(updated_track_file, label_file, updated_label_file): + + df = pd.read_csv(updated_track_file) + df["row_no"] = df["row_no"].astype(int) + df["timestamp"] = df["timestamp"].astype(float) + df["image_no"] = df["image_no"].astype(int) + df["valid_flag"] = df["valid_flag"].astype(int) + # print("DF rows:", len(df)) + # print("Unique images:", df['image_no'].nunique()) + with open(label_file, 'r') as label_f, open(updated_label_file, 'w') as final_label_f: + # next(updated_f) # Skip header line + # group by image_no in packet level data + + label_line = next(label_f).strip() + for image_no, group in df.groupby('image_no'): + + # labels = group['label'].tolist() + # if "A" in labels: + # final_label_f.write(f"{image_no},1\n") + # else: + # final_label_f.write(f"{image_no},0\n") + + img, rest = label_line.split(":") + valid_flag = int(rest.split(",")[0]) + + packet_labels = group['pred_label'].astype(str).str.upper().tolist() + new_label = 1 if "A" in packet_labels else 0 + + + final_label_f.write(f"perturbed_image_{image_no}.png: {valid_flag}, {new_label}\n") + try: + label_line = next(label_f).strip() + except StopIteration: + break + + + + +def run(params): + + tracksheet = params["tracksheet"] + label_file = params["label_file"] + updated_label_file = params["updated_label_file"] + + update_labels(tracksheet,label_file,updated_label_file) + print("updated label file") + + +if __name__ == "__main__": + + import argparse + import yaml + + parser = argparse.ArgumentParser() + parser.add_argument("--config", type=str, default="config_spoof_CARLA.yaml") + args = parser.parse_args() + + cfg = yaml.safe_load(open(args.config)) + + # Ensure attack section exists + if "update" not in cfg: + raise ValueError("Config file must contain 'update' section.") + + run(cfg["update"]) \ No newline at end of file diff --git a/MIRGU_Entropy/config_dos_mirgu.yaml b/MIRGU_Entropy/config_dos_mirgu.yaml new file mode 100755 index 0000000..a526ad1 --- /dev/null +++ b/MIRGU_Entropy/config_dos_mirgu.yaml @@ -0,0 +1,23 @@ +run_steps: + attack: True + decode: True + evaluate: True + update: True + + +attack: + original_test_dir: "dos_dataset/Mirgu_DoS_arbitrated_data/Surrogate/DoS_images_mirgu" + original_label_file: "dos_dataset/Mirgu_DoS_arbitrated_data/Surrogate/test_labels.txt" + original_tracksheet: "dos_dataset/Mirgu_DoS_arbitrated_data/Surrogate/test_track.csv" + surrogate_model: "Trained_models/densenet161_surrogate_Mirgu_DoS.pth" + output_dir: "perturbed_images_dos_mirgu" + +decode: + decoded_output_dir: "decoded_traffic_dos_mirgu" + +evaluate: + model_path: "Trained_models/Mirgu_DoS.h5" + prediction_output_dir: "prediction_output_dos_mirgu" + +update: + tracksheet_dir: "tracksheets_mirgu" diff --git a/MIRGU_Entropy/config_spoof_mirgu.yaml b/MIRGU_Entropy/config_spoof_mirgu.yaml new file mode 100755 index 0000000..308fa4f --- /dev/null +++ b/MIRGU_Entropy/config_spoof_mirgu.yaml @@ -0,0 +1,23 @@ +run_steps: + attack: True + decode: True + evaluate: True + update: True + + +attack: + original_test_dir: "spoof_dataset/Mirgu_break_spoof_arbitrated_data/Surrogate/spoof_images_mirgu" + original_label_file: "spoof_dataset/Mirgu_break_spoof_arbitrated_data/Surrogate/test_labels.txt" + original_tracksheet: "spoof_dataset/Mirgu_break_spoof_arbitrated_data/Surrogate/test_track.csv" + surrogate_model: "Trained_models/densenet161_surrogate_Mirgu_break_arbitrated_data_spoof_updated.pth" + output_dir: "perturbed_images_spoof_mirgu" + +decode: + decoded_output_dir: "decoded_traffic_spoof_mirgu" + +evaluate: + model_path: "Trained_models/Mirgu_break_spoof.h5" + prediction_output_dir: "prediction_output_spoof_mirgu" + +update: + tracksheet_dir: "tracksheets_mirgu" diff --git a/MIRGU_Entropy/driver_dos_mirgu.py b/MIRGU_Entropy/driver_dos_mirgu.py new file mode 100755 index 0000000..aaaaf03 --- /dev/null +++ b/MIRGU_Entropy/driver_dos_mirgu.py @@ -0,0 +1,175 @@ +import os + +import sys +import yaml +import argparse +from io import StringIO + + +# ------------------------------------------------- +# Load YAML Config +# ------------------------------------------------- +def load_config(path): + with open(path, "r") as f: + return yaml.safe_load(f) + + +# ------------------------------------------------- +# Main Pipeline +# ------------------------------------------------- +def pipeline_run(config): + + round_num = config["round"] + steps = config.get("run_steps", {}) + + print(f"\n==============================") + print(f"Running Round in driver file {round_num}") + print(f"==============================") + + # ========================================================== + # STEP 1: ATTACK + # ========================================================== + if steps.get("attack", False): + + from scripts.adversarial_attack_dos_final_mirgu import run as run_attack + + round_num = config["round"] + + # Copy base attack config + attack_cfg = config["attack"].copy() + + # Required keys for attack script + attack_cfg["output_path"] = config["attack"]["output_dir"] + attack_cfg["model_path"] = config["attack"]["surrogate_model"] + attack_cfg["rounds"] = round_num + + # Round-dependent logic + if round_num == 0: + attack_cfg["test_data_dir"] = config["attack"]["original_test_dir"] + attack_cfg["packet_level_data"] = config["attack"]["original_tracksheet"] + attack_cfg["test_label_file"] = config["attack"]["original_label_file"] + else: + attack_cfg["test_data_dir"] = config["attack"]["output_dir"] + attack_cfg["packet_level_data"] = ( + f'{config["update"]["tracksheet_dir"]}/dos_test_track_{round_num-1}.csv' + ) + attack_cfg["test_label_file"] = ( + f'{config["attack"]["output_dir"]}/labels_{round_num}.txt' + ) + + # --------------------------------------------------- + # Create output directory + # --------------------------------------------------- + attack_out = attack_cfg["output_path"] + os.makedirs(attack_out, exist_ok=True) + + print("\n=== Step 1: Adversarial Attack ===") + + # --------------------------------------------------- + # Capture stdout and save to stats file + # --------------------------------------------------- + stats_file = os.path.join(attack_out, f"stats_round_{round_num}.txt") + + old_stdout = sys.stdout + sys.stdout = mystream = StringIO() + + try: + run_attack(attack_cfg) + finally: + sys.stdout = old_stdout + + with open(stats_file, "w") as f: + f.write(mystream.getvalue()) + + print(f"[INFO] Attack log saved to {stats_file}") + + + + # ========================================================== + # STEP 2: DECODE + # ========================================================== + if steps.get("decode", False): + + from scripts.Traffic_decoder_dos_mirgu import run as run_decode + + decode_cfg = config["decode"].copy() + decode_cfg["rounds"] = round_num + decode_cfg["input_images"] = config["attack"]["output_dir"] + decode_cfg["csv_file"] = ( + f'{config["attack"]["output_dir"]}/packet_level_data_{round_num}.csv' + ) + decode_cfg["output_file"] = ( + f'{config["decode"]["decoded_output_dir"]}/traffic_{round_num}.txt' + ) + + os.makedirs(config["decode"]["decoded_output_dir"], exist_ok=True) + + print("\n=== Step 2: Traffic Decoder ===") + run_decode(decode_cfg) + + + # ========================================================== + # STEP 3: EVALUATION + # ========================================================== + if steps.get("evaluate", False): + + from scripts.evaluate_dos_mirgu import run as run_eval + + eval_cfg = config["evaluate"].copy() + eval_cfg["rounds"] = round_num + eval_cfg["model_path"] = config["evaluate"]["model_path"] + eval_cfg["traffic_path"] = ( + f'{config["decode"]["decoded_output_dir"]}/traffic_{round_num}.txt' + ) + eval_cfg["tracksheet"] = ( + f'{config["attack"]["output_dir"]}/packet_level_data_{round_num}.csv' + ) + eval_cfg["output_path"] = ( + f'{config["evaluate"]["prediction_output_dir"]}/prediction_output_{round_num}.csv' + ) + + os.makedirs(config["evaluate"]["prediction_output_dir"], exist_ok=True) + + print("\n=== Step 3: Evaluation ===") + run_eval(eval_cfg) + + + # ========================================================== + # STEP 4: UPDATE + # ========================================================== + if steps.get("update", False): + + from scripts.update_labels_dos_mirgu import run as run_update + + update_cfg = config["update"].copy() + + update_cfg["tracksheet"] = ( + f'{config["update"]["tracksheet_dir"]}/dos_test_track_{round_num}.csv' + ) + + # Label logic + update_cfg["label_file"] = config["attack"]["original_label_file"] + + update_cfg["updated_label_file"] = ( + f'{config["attack"]["output_dir"]}/labels_{round_num+1}.txt' + ) + + print("\n=== Step 4: Update Labels ===") + run_update(update_cfg) + + + +# ------------------------------------------------- +# MAIN +# ------------------------------------------------- +if __name__ == "__main__": + + parser = argparse.ArgumentParser() + parser.add_argument("--round", type=int, default=0) + parser.add_argument("--config", type=str, default="config_dos_mirgu.yaml") + args = parser.parse_args() + + cfg = load_config(args.config) + cfg["round"] = args.round + + pipeline_run(cfg) diff --git a/MIRGU_Entropy/driver_spoof_mirgu.py b/MIRGU_Entropy/driver_spoof_mirgu.py new file mode 100755 index 0000000..b9c44a3 --- /dev/null +++ b/MIRGU_Entropy/driver_spoof_mirgu.py @@ -0,0 +1,175 @@ +import os + +import sys +import yaml +import argparse +from io import StringIO + + +# ------------------------------------------------- +# Load YAML Config +# ------------------------------------------------- +def load_config(path): + with open(path, "r") as f: + return yaml.safe_load(f) + + +# ------------------------------------------------- +# Main Pipeline +# ------------------------------------------------- +def pipeline_run(config): + + round_num = config["round"] + steps = config.get("run_steps", {}) + + print(f"\n==============================") + print(f"Running Round in driver file {round_num}") + print(f"==============================") + + # ========================================================== + # STEP 1: ATTACK + # ========================================================== + if steps.get("attack", False): + + from scripts.adversarial_attack_spoof_mirgu import run as run_attack + + round_num = config["round"] + + # Copy base attack config + attack_cfg = config["attack"].copy() + + # Required keys for attack script + attack_cfg["output_path"] = config["attack"]["output_dir"] + attack_cfg["model_path"] = config["attack"]["surrogate_model"] + attack_cfg["rounds"] = round_num + + # Round-dependent logic + if round_num == 0: + attack_cfg["test_data_dir"] = config["attack"]["original_test_dir"] + attack_cfg["packet_level_data"] = config["attack"]["original_tracksheet"] + attack_cfg["test_label_file"] = config["attack"]["original_label_file"] + else: + attack_cfg["test_data_dir"] = config["attack"]["output_dir"] + attack_cfg["packet_level_data"] = ( + f'{config["update"]["tracksheet_dir"]}/spoof_test_track_{round_num-1}.csv' + ) + attack_cfg["test_label_file"] = ( + f'{config["attack"]["output_dir"]}/labels_{round_num}.txt' + ) + + # --------------------------------------------------- + # Create output directory + # --------------------------------------------------- + attack_out = attack_cfg["output_path"] + os.makedirs(attack_out, exist_ok=True) + + print("\n=== Step 1: Adversarial Attack ===") + + # --------------------------------------------------- + # Capture stdout and save to stats file + # --------------------------------------------------- + stats_file = os.path.join(attack_out, f"stats_round_{round_num}.txt") + + old_stdout = sys.stdout + sys.stdout = mystream = StringIO() + + try: + run_attack(attack_cfg) + finally: + sys.stdout = old_stdout + + with open(stats_file, "w") as f: + f.write(mystream.getvalue()) + + print(f"[INFO] Attack log saved to {stats_file}") + + + + # ========================================================== + # STEP 2: DECODE + # ========================================================== + if steps.get("decode", False): + + from scripts.Traffic_decoder_spoof_mirgu import run as run_decode + + decode_cfg = config["decode"].copy() + decode_cfg["rounds"] = round_num + decode_cfg["input_images"] = config["attack"]["output_dir"] + decode_cfg["csv_file"] = ( + f'{config["attack"]["output_dir"]}/packet_level_data_{round_num}.csv' + ) + decode_cfg["output_file"] = ( + f'{config["decode"]["decoded_output_dir"]}/traffic_{round_num}.txt' + ) + + os.makedirs(config["decode"]["decoded_output_dir"], exist_ok=True) + + print("\n=== Step 2: Traffic Decoder ===") + run_decode(decode_cfg) + + + # ========================================================== + # STEP 3: EVALUATION + # ========================================================== + if steps.get("evaluate", False): + + from scripts.evaluate_spoof_mirgu import run as run_eval + + eval_cfg = config["evaluate"].copy() + eval_cfg["rounds"] = round_num + eval_cfg["model_path"] = config["evaluate"]["model_path"] + eval_cfg["traffic_path"] = ( + f'{config["decode"]["decoded_output_dir"]}/traffic_{round_num}.txt' + ) + eval_cfg["tracksheet"] = ( + f'{config["attack"]["output_dir"]}/packet_level_data_{round_num}.csv' + ) + eval_cfg["output_path"] = ( + f'{config["evaluate"]["prediction_output_dir"]}/prediction_output_{round_num}.csv' + ) + + os.makedirs(config["evaluate"]["prediction_output_dir"], exist_ok=True) + + print("\n=== Step 3: Evaluation ===") + run_eval(eval_cfg) + + + # ========================================================== + # STEP 4: UPDATE + # ========================================================== + if steps.get("update", False): + + from scripts.update_labels_spoof_mirgu import run as run_update + + update_cfg = config["update"].copy() + + update_cfg["tracksheet"] = ( + f'{config["update"]["tracksheet_dir"]}/spoof_test_track_{round_num}.csv' + ) + + # Label logic + update_cfg["label_file"] = config["attack"]["original_label_file"] + + update_cfg["updated_label_file"] = ( + f'{config["attack"]["output_dir"]}/labels_{round_num+1}.txt' + ) + + print("\n=== Step 4: Update Labels ===") + run_update(update_cfg) + + + +# ------------------------------------------------- +# MAIN +# ------------------------------------------------- +if __name__ == "__main__": + + parser = argparse.ArgumentParser() + parser.add_argument("--round", type=int, default=0) + parser.add_argument("--config", type=str, default="config_spoof_mirgu.yaml") + args = parser.parse_args() + + cfg = load_config(args.config) + cfg["round"] = args.round + + pipeline_run(cfg) diff --git a/MIRGU_Entropy/networks/Inception_Resnet_V1.py b/MIRGU_Entropy/networks/Inception_Resnet_V1.py new file mode 100755 index 0000000..c195402 --- /dev/null +++ b/MIRGU_Entropy/networks/Inception_Resnet_V1.py @@ -0,0 +1,492 @@ +#!/usr/bin/env python3 +""" +Reduced Inception-ResNet V1 Model for Vehicle CAN Network Intrusion Detection + +This implementation creates a lightweight version of Inception-ResNet V1 architecture +specifically optimized for processing 29x29 binary CAN frame matrices. The model combines: +- Inception modules for multi-scale feature extraction +- Residual connections for gradient flow and training stability +- Aggressive dimensionality reduction for computational efficiency +- Binary classification for normal vs attack traffic detection + +Architecture Flow: +Input (29x29x1) → Stem → Inception-ResNet-A → Reduction-A → +Inception-ResNet-B → Reduction-B → Global Pooling → Dense → Output (2 classes) + +Key optimizations for CAN data: +- Reduced depth compared to standard Inception-ResNet +- Optimized for small input dimensions (29x29) +- Binary classification head for intrusion detection +""" +import os +os.environ['CUDA_VISIBLE_DEVICES'] = '' + +import tensorflow as tf +from tensorflow.keras.layers import (Input, Conv2D, MaxPooling2D, AveragePooling2D, + Concatenate, Add, Flatten, Dropout, Dense, Lambda) +from tensorflow.keras.models import Model +from tensorflow.keras.callbacks import Callback + +################################################### +# Custom Training Callback for Batch-Level Monitoring +################################################### +class BatchLossHistory(Callback): + """ + Custom Keras callback to record training loss at every batch iteration. + + This provides more granular monitoring than epoch-level tracking, allowing + for detailed analysis of training dynamics and convergence behavior. + Particularly useful for genetic algorithm experiments that need to track + training progress over iterations rather than epochs. + """ + + def on_train_begin(self, logs=None): + """ + Initialize tracking variables at the start of training. + + Args: + logs: Training logs dictionary (unused but required by Keras) + """ + self.batch_losses = [] # List to store (iteration, loss) tuples + self.iterations = 0 # Counter for total training iterations + + def on_batch_end(self, batch, logs=None): + """ + Record loss value after each training batch completes. + + Args: + batch: Current batch number within the epoch + logs: Dictionary containing batch metrics (loss, accuracy, etc.) + """ + self.iterations += 1 # Increment global iteration counter + # Store iteration number and corresponding loss value + self.batch_losses.append((self.iterations, logs.get('loss'))) + +################################################### +# Stem Block: Initial Feature Extraction +################################################### +def stem_block(inputs): + """ + Stem block for initial feature extraction from 29x29 CAN frame inputs. + + This block performs aggressive early feature extraction and dimensionality reduction: + 1. Extracts low-level features with small convolutions + 2. Reduces spatial dimensions while increasing channel depth + 3. Prepares features for subsequent Inception-ResNet blocks + + Architecture: + - Conv2D(64, 3x3, valid) → 29x29x1 → 27x27x64 + - Conv2D(64, 3x3, same) → 27x27x64 → 27x27x64 + - MaxPool2D(2x2, stride=2) → 27x27x64 → 13x13x64 + - Conv2D(128, 1x1, same) → 13x13x64 → 13x13x128 + + Args: + inputs: Input tensor of shape (batch_size, 29, 29, 1) + + Returns: + Tensor of shape (batch_size, 13, 13, 128) + """ + # First convolution with valid padding reduces spatial dimensions + # 29x29x1 → 27x27x64 (removes 2 pixels due to valid padding) + x = Conv2D(64, (3, 3), strides=1, padding='valid', activation='relu')(inputs) + + # Second convolution with same padding preserves spatial dimensions + # 27x27x64 → 27x27x64 (maintains size, extracts more complex features) + x = Conv2D(64, (3, 3), strides=1, padding='same', activation='relu')(x) + + # Max pooling for spatial downsampling (critical for computational efficiency) + # 27x27x64 → 13x13x64 (roughly halves spatial dimensions) + x = MaxPooling2D((2, 2), strides=2, padding='valid')(x) + + # 1x1 convolution to increase channel depth without affecting spatial dimensions + # 13x13x64 → 13x13x128 (doubles channel depth for richer feature representation) + x = Conv2D(128, (1, 1), strides=1, padding='same', activation='relu')(x) + + return x + +################################################### +# Inception-ResNet Block A: Multi-Scale Feature Extraction +################################################### +def inception_resnet_a_block(x, scale=0.1): + """ + Inception-ResNet-A block combining multi-scale convolutions with residual connections. + + This block performs parallel convolutions at different scales to capture features + at multiple receptive field sizes, then combines them with a residual connection + for improved gradient flow and training stability. + + Architecture branches: + - Branch 0: 1x1 conv (32 filters) → point-wise features + - Branch 1: 1x1 conv → 3x3 conv (32 filters) → local spatial features + - Branch 2: 1x1 conv → 3x3 conv → 3x3 conv (64 filters) → larger spatial features + + The residual connection adds the scaled combined branches back to the input, + enabling the network to learn incremental improvements to existing features. + + Args: + x: Input tensor of shape (batch_size, height, width, channels) + scale: Scaling factor for residual connection (0.1 for training stability) + + Returns: + Tensor with same spatial dimensions but potentially different channel depth + """ + # Branch 0: 1x1 convolution for point-wise feature extraction + # Captures channel-wise interactions without spatial aggregation + branch_0 = Conv2D(32, (1, 1), padding='same', activation='relu')(x) + + # Branch 1: 1x1 → 3x3 convolution chain for local spatial features + # 1x1 reduces channels, 3x3 captures local spatial patterns + branch_1 = Conv2D(32, (1, 1), padding='same', activation='relu')(x) + branch_1 = Conv2D(32, (3, 3), padding='same', activation='relu')(branch_1) + + # Branch 2: 1x1 → 3x3 → 3x3 convolution chain for larger receptive field + # Sequential 3x3 convolutions effectively create a 5x5 receptive field + # More efficient than direct 5x5 convolution + branch_2 = Conv2D(32, (1, 1), padding='same', activation='relu')(x) + branch_2 = Conv2D(48, (3, 3), padding='same', activation='relu')(branch_2) + branch_2 = Conv2D(64, (3, 3), padding='same', activation='relu')(branch_2) + + # Concatenate all branches along channel dimension + # Total channels: 32 + 32 + 64 = 128 + merged = Concatenate(axis=-1)([branch_0, branch_1, branch_2]) + + # 1x1 convolution to match input channel dimensions for residual addition + # This projection layer ensures dimensional compatibility + up = Conv2D(tf.keras.backend.int_shape(x)[-1], (1, 1), padding='same')(merged) + + # Scale the residual branch for training stability + # Scaling factor (0.1) prevents residual branch from dominating early in training + up = Lambda(lambda s: s * scale)(up) + + # Residual connection: add scaled features to input + # This enables gradient flow and allows learning of incremental improvements + x = Add()([x, up]) + + # Apply activation after residual addition + # ReLU activation introduces non-linearity after feature combination + x = tf.keras.layers.Activation('relu')(x) + + return x + +################################################### +# Reduction Block A: Spatial Downsampling with Feature Expansion +################################################### +def reduction_a_block(x): + """ + Reduction-A block for spatial downsampling while expanding channel depth. + + This block reduces spatial dimensions (width/height) while increasing the number + of feature channels. Multiple parallel branches ensure that information is + preserved during downsampling through different aggregation strategies. + + Architecture branches: + - Branch 0: Max pooling → preserves dominant features + - Branch 1: Direct 3x3 conv with stride=2 → learned downsampling + - Branch 2: 1x1 → 3x3 → 3x3 conv chain → complex feature extraction before downsampling + + Args: + x: Input tensor (typically 13x13x128 from stem block) + + Returns: + Tensor with reduced spatial dimensions and increased channels (6x6x448) + """ + # Branch 0: Max pooling for dominant feature preservation + # Stride=2 reduces spatial dimensions by half: 13x13 → 6x6 + # Preserves existing channel depth (128) + branch_0 = MaxPooling2D((3, 3), strides=2, padding='valid')(x) + + # Branch 1: Direct convolution with stride=2 for learned downsampling + # Simultaneously reduces spatial dimensions and extracts new features + # 13x13x128 → 6x6x160 + branch_1 = Conv2D(160, (3, 3), strides=2, padding='valid', activation='relu')(x) + + # Branch 2: Multi-stage convolution chain for complex feature extraction + # 1x1 conv reduces channels for computational efficiency + branch_2 = Conv2D(128, (1, 1), padding='same', activation='relu')(x) + # 3x3 conv with same padding maintains spatial dimensions + branch_2 = Conv2D(160, (3, 3), strides=1, padding='same', activation='relu')(branch_2) + # Final 3x3 conv with stride=2 for downsampling: 13x13 → 6x6 + branch_2 = Conv2D(160, (3, 3), strides=2, padding='valid', activation='relu')(branch_2) + + # Concatenate all branches along channel dimension + # Total channels: 128 (branch_0) + 160 (branch_1) + 160 (branch_2) = 448 + x = Concatenate(axis=-1)([branch_0, branch_1, branch_2]) + + return x + +################################################### +# Inception-ResNet Block B: High-Level Feature Processing +################################################### +def inception_resnet_b_block(x, scale=0.1): + """ + Inception-ResNet-B block for high-level feature extraction with asymmetric convolutions. + + This block operates on higher-level features (post-reduction) and uses asymmetric + convolutions (1x7 and 7x1) to capture elongated patterns efficiently. The asymmetric + approach is more parameter-efficient than square convolutions for certain patterns. + + Architecture branches: + - Branch 0: 1x1 conv (192 filters) → channel-wise feature extraction + - Branch 1: 1x1 → 1x7 → 7x1 conv chain → asymmetric spatial feature extraction + + Args: + x: Input tensor of shape (batch_size, height, width, 448) + scale: Scaling factor for residual connection (0.1 for stability) + + Returns: + Tensor with same spatial dimensions and channel depth + """ + # Branch 0: Simple 1x1 convolution for channel-wise feature transformation + # Captures cross-channel interactions without spatial aggregation + branch_0 = Conv2D(192, (1, 1), padding='same', activation='relu')(x) + + # Branch 1: Asymmetric convolution sequence for efficient spatial feature extraction + # 1x1 convolution for dimensionality reduction + branch_1 = Conv2D(128, (1, 1), padding='same', activation='relu')(x) + # 1x7 convolution captures horizontal patterns + branch_1 = Conv2D(160, (1, 7), padding='same', activation='relu')(branch_1) + # 7x1 convolution captures vertical patterns + # This asymmetric approach is more efficient than 7x7 convolution + branch_1 = Conv2D(192, (7, 1), padding='same', activation='relu')(branch_1) + + # Concatenate branches along channel dimension + # Total channels: 192 + 192 = 384 + merged = Concatenate(axis=-1)([branch_0, branch_1]) + + # 1x1 projection to match input channel dimensions for residual connection + up = Conv2D(tf.keras.backend.int_shape(x)[-1], (1, 1), padding='same')(merged) + + # Apply scaling to residual branch for training stability + up = Lambda(lambda s: s * scale)(up) + + # Residual connection: add scaled features to input + x = Add()([x, up]) + + # Apply activation after residual addition + x = tf.keras.layers.Activation('relu')(x) + + return x + +################################################### +# Reduction Block B: Final Spatial Downsampling +################################################### +def reduction_b_block(x): + """ + Reduction-B block for final spatial downsampling before global pooling. + + This block performs the final spatial reduction while dramatically increasing + channel depth. It prepares features for global pooling by creating a very + high-dimensional but spatially compact representation. + + Architecture branches: + - Branch 0: Max pooling → preserves dominant features + - Branch 1: Direct 3x3 conv with stride=2 → learned aggressive feature extraction + + Args: + x: Input tensor of shape (batch_size, 6, 6, 448) + + Returns: + Tensor of shape (batch_size, 2, 2, 896) + """ + # Branch 0: Max pooling preserves strongest activations + # 6x6x448 → 2x2x448 + branch_0 = MaxPooling2D((3, 3), strides=2, padding='valid')(x) + + # Branch 1: Aggressive feature extraction with large channel expansion + # 6x6x448 → 2x2x448 (maintains input channel depth) + # High channel count captures complex high-level patterns + branch_1 = Conv2D(448, (3, 3), strides=2, padding='valid', activation='relu')(x) + + # Concatenate branches for maximum feature preservation + # Total channels: 448 + 448 = 896 + x = Concatenate(axis=-1)([branch_0, branch_1]) + + return x + +################################################### +# Main Model Architecture Builder +################################################### +def build_reduced_inception_resnet(input_shape=(29, 29, 1), num_classes=2, dropout_rate=0.2): + """ + Build the complete reduced Inception-ResNet model for CAN intrusion detection. + + This function assembles all components into a complete neural network optimized + for binary classification of CAN network traffic (normal vs attack). + + Architecture Summary: + 1. Stem Block: 29x29x1 → 13x13x128 (initial feature extraction + reduction) + 2. Inception-ResNet-A: 13x13x128 → 13x13x128 (multi-scale feature extraction) + 3. Reduction-A: 13x13x128 → 6x6x448 (spatial reduction + channel expansion) + 4. Inception-ResNet-B: 6x6x448 → 6x6x448 (high-level asymmetric features) + 5. Reduction-B: 6x6x448 → 2x2x896 (final spatial reduction) + 6. Global Average Pooling: 2x2x896 → 1x1x896 (spatial aggregation) + 7. Classification Head: 896 → 2 (binary classification) + + Args: + input_shape: Shape of input CAN frames (default: 29x29x1) + num_classes: Number of output classes (default: 2 for binary classification) + dropout_rate: Dropout rate for regularization (default: 0.2) + + Returns: + Compiled Keras Model ready for training + """ + # Define input layer for 29x29 binary CAN frame matrices + inputs = Input(shape=input_shape) + + # Stage 1: Initial feature extraction and spatial reduction + # 29x29x1 → 13x13x128 + x = stem_block(inputs) + + # Stage 2: Multi-scale feature extraction with residual connections + # 13x13x128 → 13x13x128 (maintains spatial dimensions) + x = inception_resnet_a_block(x, scale=0.1) + + # Stage 3: First major spatial reduction with channel expansion + # 13x13x128 → 6x6x448 + x = reduction_a_block(x) + + # Stage 4: High-level feature extraction with asymmetric convolutions + # 6x6x448 → 6x6x448 (maintains spatial dimensions) + x = inception_resnet_b_block(x, scale=0.1) + + # Stage 5: Final spatial reduction with maximum channel expansion + # 6x6x448 → 2x2x896 + x = reduction_b_block(x) + + # Stage 6: Global spatial aggregation + # 2x2x896 → 1x1x896 (eliminates spatial dimensions entirely) + x = AveragePooling2D((2, 2), padding='valid')(x) + + # Stage 7: Flatten for dense layer processing + # 1x1x896 → 896-dimensional feature vector + x = Flatten()(x) + + # Stage 8: Regularization to prevent overfitting + # Randomly sets 20% of features to zero during training + x = Dropout(dropout_rate)(x) + + # Stage 9: Final classification layer + # 896 → 2 classes with softmax activation for probability distribution + outputs = Dense(num_classes, activation='softmax')(x) + + # Create and return the complete model + model = Model(inputs, outputs) + return model + +################################################### +# Model Wrapper Class for Training and Evaluation +################################################### +class Inception_Resnet_V1: + """ + Wrapper class for the reduced Inception-ResNet model providing training and evaluation utilities. + + This class encapsulates the model architecture and provides methods for: + - Model initialization with configurable hyperparameters + - Training with batch-level loss tracking + - Optional pre-trained weight loading + - Model summary and inspection + + The class is designed to integrate seamlessly with the genetic algorithm + adversarial attack framework and provides the batch-level loss tracking + required for detailed training analysis. + """ + + def __init__(self, epochs=10, batch_size=32, load_weights=False): + """ + Initialize the Inception-ResNet model with specified hyperparameters. + + Args: + epochs: Number of training epochs (default: 10) + batch_size: Batch size for training (default: 32) + load_weights: Whether to load pre-trained weights (default: False) + """ + # Store training hyperparameters + self.epochs = epochs + self.batch_size = batch_size + + # Build the reduced Inception-ResNet architecture + self.model = build_reduced_inception_resnet() + + # Optionally load pre-trained weights + if load_weights: + # Placeholder for weight loading - can be customized as needed + # Example: self.model.load_weights('path_to_pretrained_weights.h5') + pass + + def train(self, x_train, y_train, x_test, y_test, filename_prefix="", epochs_override=None): + """ + Train the model with batch-level loss tracking for detailed analysis. + + This method compiles the model, trains it on the provided data, and captures + detailed training metrics including per-batch loss values. This granular + tracking is essential for genetic algorithm experiments and training analysis. + + Args: + x_train: Training feature data (CAN frames) + y_train: Training labels (0=normal, 1=attack) + x_test: Test feature data (for validation during training) + y_test: Test labels + filename_prefix: Prefix for saved model filename + epochs_override: Override default epoch count if specified + + Returns: + tuple: (training_history, batch_loss_list) + - training_history: Keras training history object + - batch_loss_list: List of (iteration, loss) tuples for each batch + """ + # Use override epochs if provided, otherwise use instance default + epochs_to_run = epochs_override if epochs_override is not None else self.epochs + + # Compile model with Adam optimizer and sparse categorical crossentropy loss + # Adam optimizer: adaptive learning rate with momentum for stable training + # Sparse categorical crossentropy: efficient for integer class labels + self.model.compile( + optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), + loss='sparse_categorical_crossentropy', + metrics=['accuracy'] + ) + + # Initialize custom callback for batch-level loss tracking + batch_callback = BatchLossHistory() + + # Train the model with batch-level monitoring + history = self.model.fit( + x_train, y_train, + epochs=epochs_to_run, + batch_size=self.batch_size, + callbacks=[batch_callback] # Capture per-batch metrics + ) + + # Save the trained model with custom filename prefix + # This allows saving models for different attack types (DoS, Fuzzy, RPM) + self.model.save(filename_prefix + 'final_model.h5') + + # Return both epoch-level and batch-level training metrics + return history, batch_callback.batch_losses + + def summary(self): + """ + Display model architecture summary including layer details and parameter counts. + + Returns: + Model summary showing architecture, output shapes, and parameter counts + """ + return self.model.summary() + +################################################### +# Development and Testing Code +################################################### +# Uncomment the following lines for model architecture debugging and testing: +# if __name__ == "__main__": +# # Create model instance with sample hyperparameters +# instance = Inception_Resnet_V1(epochs=5, batch_size=32) +# +# # Display model architecture summary +# instance.summary() +# +# # Optional: Test with dummy data +# # import numpy as np +# # x_dummy = np.random.rand(100, 29, 29, 1) +# # y_dummy = np.random.randint(0, 2, 100) +# # history, batch_losses = instance.train(x_dummy, y_dummy, x_dummy, y_dummy) +# # print(f"Training completed. Final batch loss: {batch_losses[-1][1]:.4f}") diff --git a/MIRGU_Entropy/run_rounds.sh b/MIRGU_Entropy/run_rounds.sh new file mode 100755 index 0000000..8d52e52 --- /dev/null +++ b/MIRGU_Entropy/run_rounds.sh @@ -0,0 +1,12 @@ +#!/bin/bash +set -e + +TOTAL_ROUNDS=4 + +for ((i=0; i<$TOTAL_ROUNDS; i++)) +do + echo "=======================================" + echo "Running Round $i" + echo "=======================================" + python driver_dos_mirgu.py --round $i +done diff --git a/MIRGU_Entropy/scripts/Traffic_decoder_dos_mirgu.py b/MIRGU_Entropy/scripts/Traffic_decoder_dos_mirgu.py new file mode 100755 index 0000000..cafc416 --- /dev/null +++ b/MIRGU_Entropy/scripts/Traffic_decoder_dos_mirgu.py @@ -0,0 +1,391 @@ +import numpy as np +from PIL import Image +import os +import sys +import csv +import yaml + +# Constants +PIXEL_COLOR_MAP = { + (255, 255, 0): '4', # Yellow + (255, 0, 0): '3', # Red + (0, 255, 0): '2', # Green + (255, 255, 255): '1',# White + (0, 0, 0): '0' # Black +} + + +def process_image(image_path): + image = Image.open(image_path) + pixels = np.array(image) + + label_matrix = np.zeros((128, 128), dtype=np.uint8) + for rgb, value in PIXEL_COLOR_MAP.items(): + mask = np.all(pixels == rgb, axis=-1) + label_matrix[mask] = value + + data_array = label_matrix.tolist() + dataset = [] + + for row in data_array: + if row[0] == 0: # Frame row + # print("inside row") + n_row = row[::-1] + last_1_index = n_row.index(1) + last_1 = len(row) - 1 - last_1_index + binary_string = "".join(map(str, row[:last_1 + 1])) + # print("binary tsrning", binary_string) + # CAN ID (bits 1–11) + can_id = hex(int(binary_string[1:12], 2))[2:].zfill(4) + # print("canid", can_id) + # DLC (bits 15–18) + dlc = int(binary_string[15:19], 2) + # print("dlc", dlc) + # Correct CAN data extraction (fixed) + start_bit = 19 + end_bit = 19 + dlc * 8 + data_bits = binary_string[start_bit:end_bit] + + data_bytes = [ + hex(int(data_bits[i:i + 8], 2))[2:].zfill(2) + for i in range(0, len(data_bits), 8) + ] + + dataset.append({ + "can_id": can_id, + "dlc": dlc, + "data": data_bytes + }) + # print("dataset\n", dataset) + + return dataset + + + +def save_to_txt(dataset, traffic_file, packet_level_data,rounds): + + def convert_label(org_label, oop_label): + org_label = org_label.strip() + oop_label = oop_label.strip() + + # map I/M → A + if oop_label in ["I", "M", "Pi", "Pm"] and org_label =="A": + return "A" + + # raw_label "None" + pkt_label == 1 → A + if oop_label == "None" and org_label == "A": + return "A" + + # everything else → B + return "B" + + with open(traffic_file, 'w') as file, open(packet_level_data, 'r') as csv_file: + + file.write("timestamp,can_id,dlc,d0,d1,d2,d3,d4,d5,d6,d7,label\n") + # Read header ONCE + header = next(csv_file).strip().split(",") + + # Create lookup table: column_name → index + col_index = {name: idx for idx, name in enumerate(header)} + + # Validate required columns exist + required_cols = ["timestamp", "can_id", "original_label", "operation_label"] + for col in required_cols: + if col not in col_index: + raise KeyError(f"Column '{col}' not found in CSV header: {header}") + + # Now read each subsequent row + for data in dataset: + + line = csv_file.readline().strip() + if not line: + break # no more rows → stop + extra_data = line.split(",") + # print("extra-data",extra_data) + # Use column names instead of hardcoded [-3], [-2], etc. + timestamp = float(extra_data[col_index["timestamp"]]) + org_label = extra_data[col_index["original_label"]] # old 'label' + # print(org_label) + oop_label = extra_data[col_index["operation_label"]] # raw attack label (I/M/None) + # print(oop_label) + if rounds == 0: + final_label = convert_label(org_label, oop_label) + else: + pred_label = extra_data[col_index["pred_label"]] + # print(pred_label) + final_label = convert_label(pred_label, oop_label) + + data_bytes_str = ",".join(data["data"]) + + file.write( + f"{timestamp:.6f},{data['can_id']},{data['dlc']},{data_bytes_str},{final_label}\n" + ) + + +def process_multiple_images(image_folder): + + # if input_images == "dos_k12": + # image_folder = r"perturbed_images_dos_OTIDS" + # elif input_images == "test/perturbed_dos": + # image_folder = r"test/perturbed_dos" + # else: + # print("Invalid input. Please provide a valid filetype.") + # return + + image_paths = [os.path.join(image_folder, f) + for f in os.listdir(image_folder) + if f.endswith(".png")] + + image_paths.sort(key=lambda x: int(x.split('_')[-1].split('.')[0])) + + all_data = [] + + for image_path in image_paths: + dataset = process_image(image_path) + all_data.extend(dataset) + + return all_data + + +def run(params): + + # input_images = "dos_k12" + rounds = params["rounds"] + input_images = params["input_images"] + packet_level_data = params["csv_file"] + traffic_file = params["output_file"] + # if len(sys.argv) != 2: + # print("Usage: python file_name.py ") + # sys.exit(1) + + # input_images = sys.argv[1] + + # output_file = f"./decoded_traffic/traffic_{rounds}.txt" + # csv_file = "./blackbox_dos_k_12_nfd/packet_level_data_fixed.csv" + + all_data = process_multiple_images(input_images) + print("Decoded") + save_to_txt(all_data, traffic_file, packet_level_data,rounds) + print(f"Saved decoded_traffic_dos_mirgu/traffic_{rounds}.txt") + + +# if __name__ == "__main__": +# # Allow standalone execution +# cfg = yaml.safe_load(open("config_dos_OTIDS.yaml")) +# run(cfg["decode"]) + + +if __name__ == "__main__": + + import argparse + import yaml + + parser = argparse.ArgumentParser() + parser.add_argument("--config", type=str, default="config_dos_mirgu.yaml") + args = parser.parse_args() + + cfg = yaml.safe_load(open(args.config)) + + # Ensure attack section exists + if "decode" not in cfg: + raise ValueError("Config file must contain 'decode' section.") + + run(cfg["decode"]) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +# import numpy as np +# from PIL import Image +# import os +# import sys +# import csv +# import yaml + +# # Constants +# PIXEL_COLOR_MAP = { +# (255, 255, 0): '4', # Yellow +# (255, 0, 0): '3', # Red +# (0, 255, 0): '2', # Green +# (255, 255, 255): '1',# White +# (0, 0, 0): '0' # Black +# } + + +# def process_image(image_path): +# image = Image.open(image_path) +# pixels = np.array(image) + +# label_matrix = np.zeros((128, 128), dtype=np.uint8) +# for rgb, value in PIXEL_COLOR_MAP.items(): +# mask = np.all(pixels == rgb, axis=-1) +# label_matrix[mask] = value + +# data_array = label_matrix.tolist() +# dataset = [] + +# for row in data_array: +# if row[0] == 0: # Frame row +# # print("inside row") +# n_row = row[::-1] +# last_1_index = n_row.index(1) +# last_1 = len(row) - 1 - last_1_index +# binary_string = "".join(map(str, row[:last_1 + 1])) +# # print("binary tsrning", binary_string) +# # CAN ID (bits 1–11) +# can_id = hex(int(binary_string[1:12], 2))[2:].zfill(4) +# # print("canid", can_id) +# # DLC (bits 15–18) +# dlc = int(binary_string[15:19], 2) +# # print("dlc", dlc) +# # Correct CAN data extraction (fixed) +# start_bit = 19 +# end_bit = 19 + dlc * 8 +# data_bits = binary_string[start_bit:end_bit] + +# data_bytes = [ +# hex(int(data_bits[i:i + 8], 2))[2:].zfill(2) +# for i in range(0, len(data_bits), 8) +# ] + +# dataset.append({ +# "can_id": can_id, +# "dlc": dlc, +# "data": data_bytes +# }) +# # print("dataset", dataset) + +# return dataset + + + +# def save_to_txt(dataset, file_path, data_csv_file): + +# def convert_label(raw_label, pkt_label): +# raw_label = raw_label.strip() + +# # map I/M → A +# if raw_label in ["I", "M"]: +# return "A" + +# # raw_label "None" + pkt_label == 1 → A +# if raw_label == "None" and pkt_label == "1": +# return "A" + +# # everything else → B +# return "B" + +# with open(file_path, 'w') as file, open(data_csv_file, 'r') as csv_file: + +# file.write("timestamp,can_id,dlc,d0,d1,d2,d3,d4,d5,d6,d7,label\n") +# # Read header ONCE +# header = next(csv_file).strip().split(",") + +# # Create lookup table: column_name → index +# col_index = {name: idx for idx, name in enumerate(header)} + +# # Validate required columns exist +# required_cols = ["timestamp", "can_id", "label", "perturbation_type"] +# for col in required_cols: +# if col not in col_index: +# raise KeyError(f"Column '{col}' not found in CSV header: {header}") + +# # Now read each subsequent row +# for data in dataset: + +# line = csv_file.readline().strip() +# if not line: +# break # no more rows → stop + +# extra_data = line.split(",") + +# # Use column names instead of hardcoded [-3], [-2], etc. +# timestamp = float(extra_data[col_index["timestamp"]]) +# pkt_label = extra_data[col_index["label"]] # old 'label' +# raw_label = extra_data[col_index["perturbation_type"]] # raw attack label (I/M/None) + +# final_label = convert_label(raw_label, pkt_label) + +# data_bytes_str = ",".join(data["data"]) + +# file.write( +# f"{timestamp:.6f},{data['can_id']},{data['dlc']},{data_bytes_str},{final_label}\n" +# ) + + +# def process_multiple_images(input_images, output_file, csv_file): + +# if input_images == "dos_k12": +# image_folder = r"perturbed_images" +# elif input_images == "test80": +# image_folder = r"test80/perturbed" +# else: +# print("Invalid input. Please provide a valid filetype.") +# return + + +# image_paths = [os.path.join(image_folder, f) +# for f in os.listdir(image_folder) +# if f.endswith(".png")] + +# image_paths.sort(key=lambda x: int(x.split('_')[-1].split('.')[0])) + +# all_data = [] + +# for image_path in image_paths: +# dataset = process_image(image_path) +# all_data.extend(dataset) + +# save_to_txt(all_data, output_file, csv_file) + + + +# def run(params): + +# # input_images = "gear_k12" +# input_images = params["input_images"] +# csv_file = params["csv_file"] +# output_file = params["output_file"] +# # if len(sys.argv) != 2: +# # print("Usage: python file_name.py ") +# # sys.exit(1) + +# # input_images = sys.argv[1] + +# # output_file = f"./decoded_traffic/traffic_{rounds}.txt" +# # csv_file = "./blackbox_dos_k_12_nfd/packet_level_data_fixed.csv" + +# process_multiple_images(input_images, output_file, csv_file) +# print("Decoded") + + +# if __name__ == "__main__": +# # Allow standalone execution +# cfg = yaml.safe_load(open("config_dos.yaml")) +# run(cfg["decode"]) \ No newline at end of file diff --git a/MIRGU_Entropy/scripts/Traffic_decoder_spoof_mirgu.py b/MIRGU_Entropy/scripts/Traffic_decoder_spoof_mirgu.py new file mode 100755 index 0000000..66c811f --- /dev/null +++ b/MIRGU_Entropy/scripts/Traffic_decoder_spoof_mirgu.py @@ -0,0 +1,184 @@ +import numpy as np +from PIL import Image +import os +import sys +import csv +import yaml + +# Constants +PIXEL_COLOR_MAP = { + (255, 255, 0): '4', # Yellow + (255, 0, 0): '3', # Red + (0, 255, 0): '2', # Green + (255, 255, 255): '1',# White + (0, 0, 0): '0' # Black +} + + +def process_image(image_path): + image = Image.open(image_path) + pixels = np.array(image) + + label_matrix = np.zeros((128, 128), dtype=np.uint8) + for rgb, value in PIXEL_COLOR_MAP.items(): + mask = np.all(pixels == rgb, axis=-1) + label_matrix[mask] = value + + data_array = label_matrix.tolist() + dataset = [] + + for row in data_array: + if row[0] == 0: # Frame row + # print("inside row") + n_row = row[::-1] + last_1_index = n_row.index(1) + last_1 = len(row) - 1 - last_1_index + binary_string = "".join(map(str, row[:last_1 + 1])) + # print("binary tsrning", binary_string) + # CAN ID (bits 1–11) + can_id = hex(int(binary_string[1:12], 2))[2:].zfill(4) + # print("canid", can_id) + # DLC (bits 15–18) + dlc = int(binary_string[15:19], 2) + # print("dlc", dlc) + # Correct CAN data extraction (fixed) + start_bit = 19 + end_bit = 19 + dlc * 8 + data_bits = binary_string[start_bit:end_bit] + + data_bytes = [ + hex(int(data_bits[i:i + 8], 2))[2:].zfill(2) + for i in range(0, len(data_bits), 8) + ] + + dataset.append({ + "can_id": can_id, + "dlc": dlc, + "data": data_bytes + }) + # print("dataset\n", dataset) + + return dataset + + +def save_to_txt(dataset, traffic_file, packet_level_data,rounds): + + def convert_label(org_label, oop_label): + org_label = org_label.strip() + oop_label = oop_label.strip() + + # map I/M → A + if oop_label in ["I", "M", "Pi", "Pm"] and org_label == "A": + return "A" + + # raw_label "None" + pkt_label == 1 → A + if oop_label == "None" and org_label == "A": + return "A" + + # everything else → B + return "B" + + with open(traffic_file, 'w') as file, open(packet_level_data, 'r') as csv_file: + + file.write("timestamp,can_id,dlc,d0,d1,d2,d3,d4,d5,d6,d7,label\n") + # Read header ONCE + header = next(csv_file).strip().split(",") + + # Create lookup table: column_name → index + col_index = {name: idx for idx, name in enumerate(header)} + + # Validate required columns exist + required_cols = ["timestamp", "can_id", "original_label", "operation_label"] + for col in required_cols: + if col not in col_index: + raise KeyError(f"Column '{col}' not found in CSV header: {header}") + + # Now read each subsequent row + for data in dataset: + + line = csv_file.readline().strip() + if not line: + break # no more rows → stop + + extra_data = line.split(",") + + # Use column names instead of hardcoded [-3], [-2], etc. + timestamp = float(extra_data[col_index["timestamp"]]) + org_label = extra_data[col_index["original_label"]] # old 'label' + oop_label = extra_data[col_index["operation_label"]] # raw attack label (I/M/None) + if rounds == 0: + final_label = convert_label(org_label, oop_label) + else: + pred_label = extra_data[col_index["pred_label"]] + final_label = convert_label(pred_label, oop_label) + + data_bytes_str = ",".join(data["data"]) + + file.write( + f"{timestamp:.6f},{data['can_id']},{data['dlc']},{data_bytes_str},{final_label}\n" + ) + + + +def process_multiple_images(image_folder): + + # if input_images == "gear_k12_no_data": + # image_folder = r"perturbed_images_gear_no_data_OTIDS" + # else: + # print("Invalid input. Please provide a valid filetype.") + # return + + image_paths = [os.path.join(image_folder, f) + for f in os.listdir(image_folder) + if f.endswith(".png")] + + image_paths.sort(key=lambda x: int(x.split('_')[-1].split('.')[0])) + + all_data = [] + + for image_path in image_paths: + dataset = process_image(image_path) + all_data.extend(dataset) + + return all_data + + +def run(params): + + # input_images = "gear_k12" + rounds = params["rounds"] + input_images = params["input_images"] + packet_level_data = params["csv_file"] + traffic_file = params["output_file"] + # if len(sys.argv) != 2: + # print("Usage: python file_name.py ") + # sys.exit(1) + + # input_images = sys.argv[1] + + # output_file = f"./decoded_traffic/traffic_{rounds}.txt" + # csv_file = "./blackbox_dos_k_12_nfd/packet_level_data_fixed.csv" + + all_data = process_multiple_images(input_images) + print("Decoded") + save_to_txt(all_data, traffic_file, packet_level_data,rounds) + print(f"Saved decoded_traffic_spoof_mirgu/traffic_{rounds}.txt") + + +if __name__ == "__main__": + + import argparse + import yaml + + parser = argparse.ArgumentParser() + parser.add_argument("--config", type=str, default="config_spoof_mirgu.yaml") + args = parser.parse_args() + + cfg = yaml.safe_load(open(args.config)) + + # Ensure attack section exists + if "decode" not in cfg: + raise ValueError("Config file must contain 'decode' section.") + + run(cfg["decode"]) + diff --git a/MIRGU_Entropy/scripts/__init__.py b/MIRGU_Entropy/scripts/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/MIRGU_Entropy/scripts/adversarial_attack_dos_final_mirgu.py b/MIRGU_Entropy/scripts/adversarial_attack_dos_final_mirgu.py new file mode 100755 index 0000000..0858a99 --- /dev/null +++ b/MIRGU_Entropy/scripts/adversarial_attack_dos_final_mirgu.py @@ -0,0 +1,1213 @@ +""" + Description: Multiple Injection and Modification in each iteration on RGB images using densenet161. + round 2, only modification no injection + no feedback +""" +import os +# os.environ["CUDA_VISIBLE_DEVICES"] = "1" # MUST COME FIRST + +import pandas as pd +import torch +import torch.nn as nn +import torch.nn.functional as F +import math +import yaml +import time +import torch.optim as optim +from torch.utils.data import DataLoader, TensorDataset +from torchvision import datasets, transforms, models +import numpy as np +import matplotlib.pyplot as plt +from PIL import Image +import torchvision.utils as vutils +from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay +from torchvision.utils import save_image +from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score +from collections import deque + +# Inception-ResNet Model +class InceptionStem(nn.Module): + def __init__(self): + super(InceptionStem, self).__init__() + self.stem = nn.Sequential( + nn.Conv2d(in_channels = 3, out_channels = 32, stride = 1, kernel_size = 3, padding = 'same'), + nn.Conv2d(in_channels = 32, out_channels = 32, stride = 1, kernel_size = 3, padding = 'valid'), + nn.MaxPool2d(kernel_size = 3, stride = 2, padding = 0), + nn.Conv2d(in_channels = 32, out_channels = 64, kernel_size = 1, stride = 1, padding = 'valid'), + nn.Conv2d(in_channels = 64, out_channels = 128, kernel_size = 3, stride = 1, padding = 'same'), + nn.Conv2d(in_channels = 128, out_channels = 128, kernel_size = 3, stride = 1, padding = 'same') + ) + + def forward(self, x): + stem_out = self.stem(x) + return stem_out + +class InceptionResNetABlock(nn.Module): + def __init__(self, in_channels = 128, scale=0.17): + super(InceptionResNetABlock, self).__init__() + self.scale = scale + self.branch0 = nn.Conv2d(in_channels, 32, kernel_size=1, stride=1, padding='same') + self.branch1 = nn.Sequential( + nn.Conv2d(in_channels, 32, kernel_size=1, stride=1, padding='same'), + nn.Conv2d(32, 32, kernel_size=3, stride=1, padding='same') + ) + self.branch2 = nn.Sequential( + nn.Conv2d(in_channels, 32, kernel_size=1, stride=1, padding='same'), + nn.Conv2d(32, 32, kernel_size=3, stride=1, padding='same'), + nn.Conv2d(32, 32, kernel_size=3, stride=1, padding='same') + ) + self.conv_up = nn.Conv2d(96, 128, kernel_size=1, stride=1, padding='same') + + def forward(self, x): + branch0 = self.branch0(x) + branch1 = self.branch1(x) + branch2 = self.branch2(x) + mixed = torch.cat([branch0, branch1, branch2], dim=1) + up = self.conv_up(mixed) + return F.relu(x + self.scale * up) + +class ReductionA(nn.Module): + def __init__(self, in_channels = 128): + super(ReductionA, self).__init__() + self.branch0 = nn.Conv2d(in_channels = in_channels, out_channels = 192, kernel_size = 3, stride = 2, padding = 'valid') + self.branch1 = nn.Sequential( + nn.Conv2d(in_channels = in_channels, out_channels = 96, kernel_size = 1, stride = 1, padding = 'same'), + nn.Conv2d(in_channels = 96, out_channels = 96, kernel_size = 3, stride = 1, padding = 'same'), + nn.Conv2d(in_channels = 96, out_channels = 128, kernel_size = 3, stride = 2, padding = 'valid') + ) + self.branch2 = nn.MaxPool2d(kernel_size = 3, stride = 2, padding = 0) + + def forward(self, x): + branch0 = self.branch0(x) + branch1 = self.branch1(x) + branch2 = self.branch2(x) + mixed = torch.cat([branch0, branch1, branch2], dim = 1) + return mixed + +class InceptionResNetBBlock(nn.Module): + def __init__(self, in_channels = 448, scale = 0.10): + super(InceptionResNetBBlock, self).__init__() + self.scale = scale + self.branch0 = nn.Conv2d(in_channels = in_channels, out_channels = 64, kernel_size = 1, stride = 1 , padding = 'same') + self.branch1 = nn.Sequential( + nn.Conv2d(in_channels = in_channels, out_channels = 64, kernel_size = 1, stride = 1, padding = 'same'), + nn.Conv2d(in_channels = 64, out_channels = 64, kernel_size = (1,3), stride = 1, padding = 'same'), + nn.Conv2d(in_channels = 64, out_channels = 64, kernel_size = (3,1), stride = 1, padding = 'same') + ) + self.conv_up = nn.Conv2d(in_channels = 128, out_channels = 448, kernel_size = 1, stride = 1, padding = 'same') + + + def forward(self, x): + branch0 = self.branch0(x) + branch1 = self.branch1(x) + mixed = torch.cat([branch0, branch1], dim = 1) + up = self.conv_up(mixed) + return F.relu(x + self.scale * up) + +class ReductionB(nn.Module): + def __init__(self): + super(ReductionB, self).__init__() + self.branch0 = nn.Sequential( + nn.Conv2d(in_channels = 448, out_channels = 128, kernel_size = 1, stride = 1, padding = 'same'), + nn.Conv2d(in_channels = 128, out_channels = 192, kernel_size = 3, stride = 1, padding = 'valid') + ) + self.branch1 = nn.Sequential( + nn.Conv2d(in_channels = 448, out_channels = 128, kernel_size = 1, stride = 1, padding = 'same'), + nn.Conv2d(in_channels = 128, out_channels = 128, kernel_size = 3, stride = 1, padding = 'valid') + ) + self.branch2 = nn.Sequential( + nn.Conv2d(in_channels = 448, out_channels = 128, kernel_size = 1, stride = 1, padding = 'same'), + nn.Conv2d(in_channels = 128, out_channels = 128, kernel_size = 3, stride = 1, padding = 'same'), + nn.Conv2d(in_channels = 128, out_channels = 128, kernel_size = 3, stride = 1, padding = 'valid') + ) + + self.branch3 = nn.MaxPool2d(kernel_size = 3, stride = 1, padding = 0) + + def forward(self, x): + branch0 = self.branch0(x) + branch1 = self.branch1(x) + branch2 = self.branch2(x) + branch3 = self.branch3(x) + mixed = torch.cat([branch0, branch1, branch2, branch3], dim = 1) + return mixed + +# Inception-ResNet Model +class InceptionResNetV1(nn.Module): + def __init__(self, num_classes=2): + super(InceptionResNetV1, self).__init__() + self.stem = InceptionStem() + self.a_block = InceptionResNetABlock() + self.b_block = InceptionResNetBBlock() + self.red_a = ReductionA() + self.red_b = ReductionB() + self.global_pool = nn.AdaptiveAvgPool2d((1,1)) + self.dropout = nn.Dropout(0.2) + self.fc = nn.Linear(896, num_classes) + + + def forward(self, x): + x = self.stem(x) + x = self.a_block(x) + x = self.red_a(x) + x = self.b_block(x) + x = self.red_b(x) + x = self.global_pool(x) + x = torch.flatten(x, 1) + x = self.dropout(x) + x = self.fc(x) + return F.log_softmax(x, dim = 1) + +def load_model(model_path): + # Load the pre-trained ResNet-18 model + + num_classes = 2 + + model = models.densenet161(weights=models.DenseNet161_Weights.DEFAULT) + model.classifier = nn.Linear(model.classifier.in_features, num_classes) + + # test_model = models.densenet161(weights=models.DenseNet161_Weights.DEFAULT) + # test_model.classifier = nn.Linear(test_model.classifier.in_features, num_classes) + + + #If the system has GPU + model.load_state_dict(torch.load(model_path, weights_only=True)) + # test_model.load_state_dict(torch.load(test_model_path, weights_only=True)) + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + # test_model = torch.jit.load(test_model_path, map_location=device) + # test_model.to(device) + # model = torch.jit.load(pre_trained_model_path, map_location=device) + + model = model.to(device) + # test_model = test_model.to(device) + + model.eval() + # test_model.eval() + + return model + + +data_transforms = { + 'test': transforms.Compose([transforms.ToTensor()]), + 'train': transforms.Compose([transforms.ToTensor()]) + } + +def load_labels(label_file): + """Load image labels from the label file.""" + labels = {} + with open(label_file, 'r') as file: + for line in file: + # Clean and split line into filename and label string + filename, label_str = line.strip().replace("'", "").replace('"', '').split(': ') + + # Split label_str by comma and take the last value + label = int(label_str.strip().split(',')[-1].strip()) + + labels[filename.strip()] = label + return labels + +def load_dataset(data_dir,label_file,device,is_train=True): + # Load datasets + image_labels = load_labels(label_file) + + # Load images and create lists for images and labels + images = [] + labels = [] + start_image_number = None + + for filename, label in image_labels.items(): + img_path = os.path.join(data_dir, filename) + if os.path.exists(img_path): + image = Image.open(img_path).convert("RGB") + if is_train: + image = data_transforms['train'](image) # Apply training transformations + else: + image = data_transforms['test'](image) # Apply testing transformations + # save_image(image, "test_image.png") + images.append(image) + labels.append(label) + + if start_image_number is None: + start_image_number = int(filename.split('_')[-1].split('.')[0]) + + # Create tensors and send them to the specified device + images_tensor = torch.stack(images) + labels_tensor = torch.tensor(labels) + + # Create DataLoader + dataset = TensorDataset(images_tensor, labels_tensor) + batch_size = 32 if is_train else 1 # Use larger batch size for training + data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=4) + + print(f'Loaded {len(images)} images.') + + return dataset, data_loader, start_image_number + +def calculate_crc(data): + """ + Calculate CRC-15 checksum for the given data. + """ + crc = 0x0000 + # CRC-15 polynomial + poly = 0x4599 + + for bit in data: + # XOR with the current bit shifted left by 14 bits + crc ^= (int(bit) & 0x01) << 14 + + for _ in range(15): + if crc & 0x8000: + crc = (crc << 1) ^ poly + else: + crc <<= 1 + + # Ensuring 15 bits + crc &= 0x7FFF + return crc + + +def print_image(img,n,pack): + img = img.detach() + img = img.squeeze().permute(1, 2, 0).cpu().numpy() # Convert to numpy format + # Normalize from [-1, 1] to [0, 1] for imshow + img = (img + 1.0) / 2.0 + img = np.clip(img, 0, 1) # Just in case + + plt.imshow(img, interpolation='none') + # plt.imshow(img, cmap='gray', interpolation='none') + if n == 1: + plt.title(f"Mask, Injection {pack})") + elif n == 2: + plt.title(f"Perturbed image, Injection{pack}") + plt.axis('off') + plt.show() + +def saving_image(img, name,output_path): + os.makedirs(output_path, exist_ok=True) + + # Construct the full path for the output image + output_path = os.path.join(output_path, f'perturbed_image_{name}.png') + + # Save the image to the specified path + save_image(img, output_path) + +def generate_mask(perturbed_data, modification_queue, injection_queue,prev_mod_queue, prev_inj_queue,rounds, I, M, Pi, Pm): + """ + Generate a binary perturbation mask for CAN-frame images using + budgeted injection and modification queues. + + Rows are selected from four queues (new injections, original + modifications, previously injected, previously modified) up to + their allocated budgets, without exceeding top_k. For all selected + rows, both ID and data bit regions are masked. + + Returns the perturbation mask along with selected injection and + modification row indices. + """ + sof_len = 1 + id_mask_length = 11 + mid_bits_length = 7 + data_bits_length = 64 + + batch_size, channels, height, width = perturbed_data.shape + id_start = sof_len + id_end = sof_len + id_mask_length + data_start = sof_len + id_mask_length + mid_bits_length + data_end = data_start + data_bits_length + + # Initialize mask + mask = torch.zeros_like(perturbed_data, dtype=torch.float32) + injection_rows = [] + modification_rows = [] + prev_modification_rows = [] + prev_injection_rows = [] + + def pop_k(queue, k): + selected = [] + for _ in range(min(k, len(queue))): + _, row = queue.popleft() + selected.append(row) + return selected + + + # 1. Select rows according to budgets + inj_rows = pop_k(injection_queue, I) + mod_rows = pop_k(modification_queue, M) + prev_inj_rows = pop_k(prev_inj_queue, Pi) + prev_mod_rows = pop_k(prev_mod_queue, Pm) + + # 2. Aggregate selections + injection_rows.extend(inj_rows) + modification_rows.extend(mod_rows) + prev_modification_rows.extend(prev_mod_rows) + prev_injection_rows.extend(prev_inj_rows) + + all_rows = injection_rows + modification_rows + prev_modification_rows + prev_injection_rows + + for row in all_rows: + for b in range(batch_size): + # ID bits + mask[b, :, row, id_start : id_end] = 1.0 + # Data bits + mask[b, :, row, data_start : data_end] = 1.0 + + + # for _ in range(top_k): + # if not injection_queue and not modification_queue: + # break # nothing left to pop + + # if modification_queue: + # mod_grad, mod_row = modification_queue[0] + # # Always prefer modification queue if it's not empty + # grad, row = modification_queue.popleft() + # modification_rows.append(row) + # p_type = "mod" + # elif injection_queue: # Only process injection queue if modification queue is empty + # inj_grad, inj_row = injection_queue[0] + # grad, row = injection_queue.popleft() + # injection_rows.append(row) + # p_type = "inj" + + # # Apply ID + Data masking for the selected row + # for b in range(batch_size): + # if p_type == "inj": + # mask[b, :, row, sof_len:sof_len + id_mask_length] = 1.0 # ID bits + # mask[b, :, row, sof_len + id_mask_length + mid_bits_length: + # sof_len + id_mask_length + mid_bits_length + data_bits_length] = 1.0 # Data bits + # else: + # mask[b, :, row, sof_len + id_mask_length + mid_bits_length: + # sof_len + id_mask_length + mid_bits_length + data_bits_length] = 1.0 # Data bits + + + # selected_total = len(injection_rows) + len(modification_rows) + len(prev_modification_rows) + len(prev_injection_rows) + # assert selected_total <= top_k, "Selected more rows than top_k" + + # print_image(mask,1,0) + return mask, injection_rows, modification_rows, prev_modification_rows, prev_injection_rows + +def bit_flip_attack_rgb(image, mask, data_grad, sign_data_grad): + """ + Bit-flip attack for RGB CAN images. + - Flips pixels based on sign of gradient: + If black ([0,0,0]) and sign_grad > 0 → flip to white ([1,1,1]) + If white ([1,1,1]) and sign_grad < 0 → flip to black ([0,0,0]) + - Works for ID bits and data bits separately with different top-k percentages. + """ + + perturbed_image = image.clone() # Start from original image + B, C, H, W = image.shape + ID_LEN = 11 + MID_LEN = 7 + DATA_LEN = 64 + id_start = 1 + id_end = id_start + ID_LEN + data_start = 1 + ID_LEN + MID_LEN + data_end = data_start + DATA_LEN + count_bit_flip_1 = 0 + count_bit_flip_0 = 0 + + for b in range(B): + rows = mask[b, 0].nonzero(as_tuple=True)[0] # Only use first channel for mask + rows = torch.unique(rows) + rows = torch.sort(rows, descending=True).values # Sort descending + + for row in rows: + # --- ID bits --- + id_pixels = perturbed_image[b, :, row, id_start:id_end] # Shape [3, ID_LEN] + # print("ID Pixels:", id_pixels) + id_grads = data_grad[b, :, row, id_start:id_end] # Shape [3, ID_LEN] + # print("ID gradient:", id_grads) + id_signs = sign_data_grad[b, :, row, id_start:id_end] # Shape [3, ID_LEN] + # print("ID Signs:", id_signs) + + # Collapse gradients to single value per bit (sum over channels) + id_scores = torch.sum(torch.abs(id_grads), dim=0) + # print("ID Scores: ", id_scores) + num_id_top = max(1, int(1.0 * ID_LEN)) + id_top_idx = torch.topk(id_scores, num_id_top).indices + # print("Top Index:", id_top_idx) + count_bit_flip = 0 + # print("ID before flipping: ", id_pixels.clone()) + for idx in id_top_idx: + # print("Index:", idx) + pixel = id_pixels[:, idx] # [R, G, B] + # print("Pixel:", pixel) + grad_sign = torch.sum(id_signs[:, idx]).item() # Combine channels' signs + grad_sign = (id_signs[0, idx] + id_signs[1, idx] + id_signs[2, idx]).item() + # print("Grad Sign:", grad_sign) + if grad_sign > 0: # Black → White + id_pixels[:, idx] = 1.0 + count_bit_flip += 1 + elif grad_sign < 0: # White → Black + id_pixels[:, idx] = 0.0 + count_bit_flip += 1 + + # print("Number of bitflip in ID: ", count_bit_flip) + # print("ID after flipping: ", id_pixels.clone()) + + # --- Data bits --- + + data_pixels = perturbed_image[b, :, row, data_start:data_end] # [3, DATA_LEN] + data_grads = data_grad[b, :, row, data_start:data_end] + data_signs = sign_data_grad[b, :, row, data_start:data_end] + + data_scores = torch.sum(torch.abs(data_grads), dim=0) + num_data_top = max(1, int(1.0 * DATA_LEN)) + data_top_idx = torch.topk(data_scores, num_data_top).indices + + # print("data before flipping: ", data_pixels.clone()) + for idx in data_top_idx: + pixel = data_pixels[:, idx] + # grad_sign = torch.sum(data_signs[:, idx]).item() + grad_sign = (data_signs[0, idx] + data_signs[1, idx] + data_signs[2, idx]).item() + if grad_sign > 0: + data_pixels[:, idx] = 1.0 + count_bit_flip_1 += 1 + elif grad_sign < 0: + data_pixels[:, idx] = 0.0 + count_bit_flip_0 += 1 + + # print("data after flipping: ", data_pixels.clone()) + + # Assign modified bits back + perturbed_image[b, :, row, id_start:id_end] = id_pixels + perturbed_image[b, :, row, data_start:data_end] = data_pixels + + # print("Number of bitflips_1 in Data: ", count_bit_flip_1) + # print("Numberof bitflips_0 in Data,",count_bit_flip_0) + perturbed_image = torch.clamp(perturbed_image, 0, 1) + + return perturbed_image + +def gradient_perturbation(image, perturbed_image,mask,existing_hex_ids, packet_level_data, image_no, injection_rows, modification_rows, prev_modification_rows, prev_injection_rows,rounds): + ID_LEN = 11 + MID_LEN = 7 + # mid_bits = '0001000' + + # Precompute existing IDs as integers + existing_int_ids = [int(h, 16) for h in existing_hex_ids] + # print(image.shape, mask.shape, perturbed_image.shape) + + for b in range(image.shape[0]): + totalRows = mask[b, 0].nonzero(as_tuple=True)[0] + totalRows = torch.unique(totalRows) + # totalRows = torch.sort(totalRows).values + totalRows = torch.sort(totalRows, descending=True).values # Sort descending + + # print(rows, flag) + for row in totalRows: + + if row in injection_rows: + flag = "injection" + elif row in modification_rows: + flag = "modification" + elif row in prev_modification_rows: + flag = "prev_mod" + elif row in prev_injection_rows: + flag = "prev_inj" + + + injection_row = row.item() + i = injection_row - 1 + packets_before_injection = [] + # print("Injection Row: ", injection_row) + + # Traverse upward until first pixel in the row is black + while i >= 0: + first_pixel = image[b, 0, i, 0].item() # First pixel in row i, channel 0 + second_pixel = image[b, 1, i, 0].item() # Second pixel in row i, channel 1 + third_pixel = image[b, 2, i, 0].item() # Third pixel in row i, channel 2 + # print(first_pixel, second_pixel, third_pixel) + if first_pixel == 0.0 and second_pixel == 0.0 and third_pixel == 0.0: + packets_before_injection.append(i) + i -= 1 + + image_packets = packet_level_data[packet_level_data["image_no"] == image_no] + # print("Image packets before injection:\n", image_packets) + target_index = len(packets_before_injection) - 1 + + # print("Target index for injection:", target_index, flag, injection_row,len(image_packets)) + + if flag == 'injection': + start_row = packets_before_injection[0] + end_row = injection_row + + red_pixel_count = 0 + for row_idx in range(start_row, end_row): + red_pixels_mask = ( + (perturbed_image[b, 0, row_idx, :] == 1.0) & # Red channel is 1 + (perturbed_image[b, 1, row_idx, :] == 0.0) & # Green channel is 0 + (perturbed_image[b, 2, row_idx, :] == 0.0) # Blue channel is 0 + ) + red_pixel_count += red_pixels_mask.sum().item() + + # print(f"Red pixel count between rows {start_row} and {end_row}: {red_pixel_count}") + + # print("Target index for injection:", target_index) + timestamp = image_packets.iloc[target_index]["timestamp"] + new_timestamp = timestamp + (injection_row-packets_before_injection[0])*128*0.000002 - red_pixel_count*0.000002 + + # --- 1. Decode ID bits from pixels --- + decoded_bits = '' + for col in range(1, 1 + ID_LEN): + pix = perturbed_image[b, :, row, col] + # print(pix) + # dot1 = torch.dot(pix, torch.tensor([1.0, 1.0, 1.0], device=image.device)) + # dot0 = torch.dot(pix, torch.tensor([0.0, 0.0, 0.0], device=image.device)) + # decoded_bits += '1' if dot1 >= dot0 else '0' + ones = (pix == 1.0).sum().item() # count channels equal to 1 + zeros = (pix == 0.0).sum().item() # count channels equal to 0 + bit = '1' if ones >= zeros else '0' + decoded_bits += bit + # print("decoded ID bits",decoded_bits) + + # --- 2. Project to nearest existing ID via Hamming distance --- + gen_int = int(decoded_bits, 2) + def hamming_dist(a, b, bitlen=ID_LEN): + return bin(a ^ b).count('1') + + best_int = min(existing_int_ids, + key=lambda eid: hamming_dist(eid, gen_int, bitlen=ID_LEN)) + + new_id = format(best_int, 'X') + + # print(packet_level_data.to_string()) + # Convert back to a bitstring of length ID_len + proj_bits = bin(best_int)[2:].zfill(ID_LEN) + # print("proj bitslen", proj_bits, len(proj_bits), decoded_bits) + # --- 3. Overwrite ID-region in perturbed_image with projected bits --- + for idx, bit in enumerate(proj_bits, start=1): + val = 1.0 if bit == '1' else 0.0 + perturbed_image[b, :, row, idx] = val + + + # --- 4. Decode data bits (unchanged) --- + data_bits = '' + start = 1 + ID_LEN + MID_LEN + for col in range(start, start + 64): + pix = perturbed_image[b, :, row, col] + ones = (pix == 1.0).sum().item() # count channels equal to 1 + zeros = (pix == 0.0).sum().item() # count channels equal to 0 + bit = '1' if ones >= zeros else '0' + data_bits += bit + # print("decoded data bits",data_bits) + + # print("Before Perturbed Row",perturbed_image[b, :, row, :]) + if flag in ['modification', 'prev_inj', 'prev_mod']: + mid_bits = '' + # 7 represents middle bits (RTR + IDE + Reserved bit + DLC) + for col in range(1 + ID_LEN, 1 + ID_LEN + 7): + # print("Columns:", col) + pix = perturbed_image[b, :, row, col] + # print("Pixel:", pix) + bit = int((pix > 0.0).any().item()) + mid_bits += str(bit) + else: + mid_bits = "0001000" + + # print("Middle Bits: ", mid_bits) + + # print("Middle Perturbed Row",perturbed_image[b, :, row, 12:19]) + + # --- 5. Build full frame bits, CRC, stuff, and write back --- + frame_start = ('0' + proj_bits + mid_bits + data_bits) + crc_val = calculate_crc(frame_start) + crc_bits = bin(crc_val)[2:].zfill(15) + uptill_crc = frame_start + crc_bits + # stuffed = stuff_bits(frame_start + crc_bits) + + # Write stuffed bits + for i, bit in enumerate(uptill_crc): + val = 1.0 if bit == '1' else 0.0 + perturbed_image[b, :, row, i] = val + + # Ending part (CRC delimiters, ACK, EoF, IFS) + ending = '1011111111111' + offset = len(uptill_crc) + for i, bit in enumerate(ending): + val = 1.0 if bit == '1' else 0.0 + perturbed_image[b, :, row, offset + i] = val + + # Mark rest as green + for i in range(offset + len(ending), perturbed_image.shape[-1]): + perturbed_image[b, 0, row, i] = 0.0 + perturbed_image[b, 1, row, i] = 1.0 + perturbed_image[b, 2, row, i] = 0.0 + + # print("Final Pedequerturbed Row",perturbed_image[b, :, row, :]) + # print(packet_level_data.to_string()) + + # UPDATE PACKET-LEVEL DATA + if flag == 'injection': + start_index = packet_level_data.index[packet_level_data["image_no"] == image_no][0] + df_part_1 = packet_level_data.iloc[:start_index+target_index+1] + df_part_2 = packet_level_data.iloc[start_index+target_index+1:] + if rounds == 0: + packet_level_data = pd.concat([df_part_1, pd.DataFrame({ "row_no": [injection_row],"timestamp": [new_timestamp], "can_id": [new_id], "image_no": [image_no],"valid_flag": [1], "original_label": "A", "operation_label": "I"}), df_part_2], ignore_index=True) + else: + packet_level_data = pd.concat([df_part_1, pd.DataFrame({ "row_no": [injection_row],"timestamp": [new_timestamp], "can_id": [new_id], "image_no": [image_no],"valid_flag": [1], "original_label": "A", "operation_label": "I","pred_label": "A"}), df_part_2], ignore_index=True) + + elif flag == 'modification': + # print(packet_level_data[packet_level_data["image_no"] == image_no]) + start_index = packet_level_data.index[packet_level_data["image_no"] == image_no][0] + # packet_level_data.loc[start_index + target_index+1, ["can_id", "perturbation_type"]] = [new_id, "M"] + packet_level_data.loc[start_index + target_index+1, ["can_id","operation_label"]] = [new_id, "M"] + elif flag == "prev_mod": + start_index = packet_level_data.index[packet_level_data["image_no"] == image_no][0] + packet_level_data.loc[start_index + target_index+1, ["can_id","operation_label"]] = [new_id,"Pm"] + elif flag == "prev_inj": + start_index = packet_level_data.index[packet_level_data["image_no"] == image_no][0] + packet_level_data.loc[start_index + target_index+1, ["can_id","operation_label"]] = [new_id,"Pi"] + + # print("id after gradient_perturbation for row: ",row, perturbed_image[b, :, row, 1:12]) + + + return perturbed_image, packet_level_data + +def apply_inj_mod(data_grad, image, existing_hex_ids, packet_level_data, n_image, modification_queue, injection_queue, prev_mod_queue, prev_inj_queue,rounds,I,M,Pi,Pm): + + sign_data_grad = data_grad.sign() + + mask, injection_rows, modification_rows, prev_modification_rows, prev_injection_rows = generate_mask(image, modification_queue, injection_queue, prev_mod_queue, prev_inj_queue, rounds,I,M,Pi,Pm) + + perturbed_image = bit_flip_attack_rgb(image, mask, data_grad, sign_data_grad) + + perturbed_image, packet_level_data = gradient_perturbation(image, perturbed_image,mask,existing_hex_ids, packet_level_data, n_image, injection_rows, modification_rows, prev_modification_rows, prev_injection_rows,rounds) + + return perturbed_image,packet_level_data, modification_queue, injection_queue + +def perform_perturbation(model, data_grad, perturbed_data, existing_hex_ids, packet_level_data, n_image,modification_queue, injection_queue, prev_mod_queue, prev_inj_queue, rounds,I,M,Pi,Pm): + + perturbed_data, packet_level_data,modification_queue, injection_queue = apply_inj_mod(data_grad, perturbed_data, existing_hex_ids, packet_level_data, n_image, modification_queue, injection_queue, prev_mod_queue, prev_inj_queue,rounds,I,M,Pi,Pm) + + with torch.no_grad(): + output = model(perturbed_data) + # feedback += 1 + + # Get the predicted class index + final_pred = output.max(1, keepdim=True)[1] # index of the maximum log-probability + # print("predicted, label ",final_pred.item(), target.item()) + + return final_pred, perturbed_data,packet_level_data # Indicate that we can stop + +def find_max_prev_inj(image, image_no, packet_level_data,rounds): + """ + Vectorized version: no iterrows(), 200x faster. + """ + + # Required columns + if 'original_label' not in packet_level_data.columns or 'image_no' not in packet_level_data.columns: + raise KeyError("Missing required columns.") + + # 1) Filter rows belonging to this image_no (FAST) + subset = packet_level_data.loc[ + packet_level_data["image_no"] == image_no + ] + + if rounds == 0: + # Round 0: no previously modified packets exist + subset = subset.iloc[0:0] # empty DataFrame, preserves columns + else: + subset = subset[ + (subset["original_label"].astype(str).str.upper() == "A") & + (subset["operation_label"].astype(str).str.upper().isin(["I", "PI"])) & + (subset["pred_label"].astype(str).str.upper() == "A") + ] + + + # 3) Extract row numbers + matched_rows = subset["row_no"].astype(int).tolist() + + # 4) Bound by image shape + _, _, n_rows, _ = image.shape + matched_rows = [r for r in matched_rows if 0 <= r < n_rows] + + return matched_rows + +def find_max_prev_mod(image, image_no, packet_level_data,rounds): + """ + Vectorized version: no iterrows(), 200x faster. + """ + + # Required columns + if 'original_label' not in packet_level_data.columns or 'image_no' not in packet_level_data.columns: + raise KeyError("Missing required columns.") + + # 1) Filter rows belonging to this image_no (FAST) + subset = packet_level_data.loc[ + packet_level_data["image_no"] == image_no + ] + # print("len of subset", len(subset)) + + if rounds == 0: + # Round 0: no previously modified packets exist + subset = subset.iloc[0:0] # empty DataFrame, preserves columns + else: + subset = subset[ + (subset["original_label"].astype(str).str.upper() == "A") & + (subset["operation_label"].astype(str).str.upper().isin(["M", "PM"])) & + (subset["pred_label"].astype(str).str.upper() == "A") + ] + # print(subset["operation_label"].astype(str).str.upper().value_counts()) + + # print("len of prev mod subset", len(subset)) + + + # 3) Extract row numbers + matched_rows = subset["row_no"].astype(int).tolist() + + # print("prev_mod candidate rows BEFORE bound:", matched_rows) + # print("image n_rows:", image.shape[2]) + + + # 4) Bound by image shape + _, _, n_rows, _ = image.shape + matched_rows = [r for r in matched_rows if 0 <= r < n_rows] + + return matched_rows + +def find_max_modification(image, image_no, packet_level_data,rounds): + """ + Vectorized version: no iterrows(), 200x faster. + """ + + # Required columns + if 'original_label' not in packet_level_data.columns or 'image_no' not in packet_level_data.columns: + raise KeyError("Missing required columns.") + + # 1) Filter rows belonging to this image_no (FAST) + subset = packet_level_data.loc[ + packet_level_data["image_no"] == image_no + ] + # print("Length of subset",len(subset)) + + + if rounds == 0: + subset = subset[ + (subset["original_label"].astype(str).str.upper() == "A") + # (subset["operation_label"].astype(str).str.upper()== "None") + # (subset["pred_label"].astype(str).str.upper() == "A") + ] + else: + # 2) Filter rows where original_label == 'A' AND pred_label == 'A' + subset = subset[ + (subset["original_label"].astype(str).str.upper() == "A") & + ( + subset["operation_label"].isna() | + (subset["operation_label"].astype(str).str.upper() == "NONE") + ) & + (subset["pred_label"].astype(str).str.upper() == "A") + ] + # print("subset",subset) + + # 3) Extract row numbers + matched_rows = subset["row_no"].astype(int).tolist() + + # 4) Bound by image shape + _, _, n_rows, _ = image.shape + matched_rows = [r for r in matched_rows if 0 <= r < n_rows] + + return matched_rows + +def find_max_injection(image): + + batch_size, _, n_rows, n_cols = image.shape + # --- Injection rows: check full-green rows --- + red_channel = image[:, 0, :, :] # shape (batch, row, col) + green_channel = image[:, 1, :, :] + blue_channel = image[:, 2, :, :] + + green_mask = (red_channel == 0) & (green_channel == 1) & (blue_channel == 0) + injection_rows = [row for row in range(n_rows) if green_mask[:, row, :].all(dim=1).any()] + return injection_rows + +def build_queues(image,image_no,data_grad,packet_level_data,rounds,verbose=True): + """ + Build two queues: + - modification_queue: rows that match bit_pattern (unbounded length) + - injection_queue: rows where every pixel in the row is green (R=0,G=1,B=0). + Each queue element: (grad_value, row_number), sorted descending by grad_value. + Injection queue is only truncated if > max_injection_len. + """ + sof_len, id_mask_length, mid_bits_length = 1, 11, 7 + batch_size, _, n_rows, n_cols = image.shape + + # --- Precompute safe column indices --- + id_start = sof_len + id_end = sof_len + id_mask_length + data_start = id_end + mid_bits_length + data_end = data_start + 64 + + # --- select candiidate rows via label match --- + modification_rows = find_max_modification(image,image_no,packet_level_data,rounds) + # print("modification_rows ",modification_rows) + prev_mod_rows = find_max_prev_mod(image,image_no,packet_level_data,rounds) + # print("previously modified rows",prev_mod_rows ) + prev_inj_rows= find_max_prev_inj(image, image_no, packet_level_data,rounds) + # print("previously injected rows",prev_inj_rows ) + injection_rows = find_max_injection(image) + + + + #How strong are the gradients in the ID + data bit region of this row? + def compute_grad_for_row_dos(row): + mask = torch.zeros_like(data_grad) + if id_start < id_end: + mask[:, :, row, id_start:id_end] = 1 + if data_start < data_end: + mask[:, :, row, data_start:data_end] = 1 + return float(torch.sum((data_grad * mask) ** 2).item()) #using squared sum because we are more interested in the higher abd values. + + + # --- Build the queues as lists --- + modification_queue = [(compute_grad_for_row_dos(r), r) for r in modification_rows] + injection_queue = [(compute_grad_for_row_dos(r), r) for r in injection_rows] + prev_mod_queue = [(compute_grad_for_row_dos(r), r) for r in prev_mod_rows] + prev_inj_queue = [(compute_grad_for_row_dos(r), r) for r in prev_inj_rows] + + # # Sort descending + modification_queue.sort(key=lambda x: x[0], reverse=True) + injection_queue.sort(key=lambda x: x[0], reverse=True) + prev_mod_queue.sort(key=lambda x: x[0], reverse=True) + prev_inj_queue.sort(key=lambda x: x[0], reverse=True) + + + # # Truncate injection queue + # if len(injection_queue) > max_injection_len: + # injection_queue = injection_queue[:max_injection_len] + + # if rounds >= 2 : + # injection_queue.clear() + + if verbose: + print(f"[INFO] modification_queue size: {len(modification_queue)}") + print(f"[INFO] injection_queue size: {len(injection_queue)}") + print(f"[INFO] prev_modification_queue size: {len(prev_mod_queue)}") + print(f"[INFO] preV_injection_queue size: {len(prev_inj_queue)}") + + return deque(modification_queue), deque(injection_queue), deque(prev_mod_queue), deque(prev_inj_queue) + +def evaluation_metrics(all_preds, all_labels,folder, filename): + + # Generate confusion matrix + # Print debug information + print("Number of predictions:", len(all_preds)) + print("Unique predictions:", np.unique(all_preds, return_counts=True)) + print("Unique labels:", np.unique(all_labels, return_counts=True)) + + cm = confusion_matrix(all_labels, all_preds) + print("Confusion Matrix:\n", cm) + + # Display confusion matrix + disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=[0, 1]) + disp.plot(cmap=plt.cm.Blues) + plt.title('Confusion Matrix') + + output_path = os.path.join(folder, filename) + os.makedirs(folder, exist_ok=True) + + plt.savefig(output_path, dpi=300) + plt.close() + + # os.makedirs(folder, exist_ok=True) + # output_path = os.path.join(folder, filename) + # os.makedirs(os.path.dirname(output_path), exist_ok=True) + + # plt.savefig(output_path, dpi=300) + + # plt.savefig(output_path, dpi=300) + # plt.show() + + # plt.savefig('./CF_Results/DoS/old/TST.png', dpi=300) + # plt.show() + + + # Now you can access the true negatives and other metrics + true_negatives = cm[0, 0] + false_positives = cm[0, 1] + false_negatives = cm[1, 0] + true_positives = cm[1, 1] + + # Calculate metrics with safe division + tnr = true_negatives / (true_negatives + false_positives) if (true_negatives + false_positives) > 0 else 0.0 + mdr = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0.0 + IDS_accu = accuracy_score(all_labels, all_preds) + IDS_prec = precision_score(all_labels, all_preds, zero_division=0) + IDS_recall = recall_score(all_labels, all_preds, zero_division=0) + IDS_F1 = f1_score(all_labels, all_preds, zero_division=0) + # Number of attack packets misclassified as benign (all_labels == 0 and all_preds == 1) + misclassified_attack_packets = ((all_labels == 1) & (all_preds == 0)).sum().item() + + # Total number of original attack packets (all_labels == 0) + total_attack_packets = (all_labels == 1).sum().item() + + oa_asr = misclassified_attack_packets / total_attack_packets + + return tnr, mdr, oa_asr, IDS_accu, IDS_prec, IDS_recall, IDS_F1 + +def Attack_procedure(model, device, test_loader,output_path,existing_hex_ids, start_image_number, packet_level_data,rounds): + all_preds = [] + all_labels = [] + n_image = start_image_number + + # summary_path = os.path.join(output_path, f"perturbation_summary_{rounds}.csv") + # csv_file = open(summary_path, "w") + # csv_file.write("image_name, target_label, injection_count, modification_count, final_prediction_label, model_feedback\n") + + + # rgb_pattern = [(0.0, 0.0, 0.0) if bit == '0' else (1.0, 1.0, 1.0) for bit in bit_pattern] + + for data, target in test_loader: + # print(f"Current target shape: {target.shape}, value: {target}") + data, target = data.to(device), target.to(device) + + # If target is a 1D tensor, no need for item() + current_target = target[0] if target.dim() > 0 else target + # feedback = 0 + + # Initialize predictions for benign images (target=0) + initial_output = model(data) + # feedback += 1 + final_pred = initial_output.max(1, keepdim=True)[1] + # Initialize perturbation counts + injection_count = 0 + modification_count = 0 + prev_mod_count = 0 + prev_inj_count = 0 + # Perform perturbation for predicted attack images + if current_target == 1: + print("\nImage no:", n_image, "(Attack image)") + + data.requires_grad = True + model.eval() + + initial_output = model(data) + loss = F.nll_loss(initial_output, target) + model.zero_grad(set_to_none=True) + loss.backward() + data_grad = data.grad.data + model.zero_grad(set_to_none=True) # clean up + data_denorm = data + # continue_perturbation = True + + if rounds == 0: + n_attack_current = ((packet_level_data["image_no"] == n_image) & (packet_level_data["original_label"] == "A")).sum() + print("n in image no ",n_attack_current, n_image) + I = 0 + M = n_attack_current + Pm = 0 + Pi = 0 + print("I, M, Pi, Pm for round 0", I,M,Pi,Pm) + + elif rounds == 1: + n_attack_current = ((packet_level_data["image_no"] == n_image) & (packet_level_data["original_label"] == "A")).sum() + I = 0 + M = 0 + Pi = 0 + Pm = math.ceil(0.5*n_attack_current) + print("I, M, Pi, Pm for round 1", I,M,Pi,Pm) + else: + n_attack_current = ((packet_level_data["image_no"] == n_image) & (packet_level_data["original_label"] == "A")).sum() + I = 0 + M = 0 + Pi = 0 + Pm = math.ceil(0.5*n_attack_current) + + print("I, M, Pi, Pm for round>=2", I,M,Pi,Pm) + + + + modification_queue, injection_queue, prev_mod_queue, prev_inj_queue = build_queues(data_denorm, n_image, data_grad,packet_level_data,rounds) + num_inj = len(injection_queue) + num_mod = len(modification_queue) + num_prev_mod = len(prev_mod_queue) + num_prev_inj = len(prev_inj_queue) + + perturbed_data = data_denorm.clone().detach().to(device) + perturbed_data.requires_grad = True + + model.eval() + + final_pred, data_denorm, packet_level_data, = perform_perturbation(model,data_grad, perturbed_data, existing_hex_ids, packet_level_data, n_image, modification_queue, injection_queue, prev_mod_queue, prev_inj_queue,rounds,I,M,Pi,Pm) + + injection_count = num_inj - len(injection_queue) + modification_count = num_mod - len(modification_queue) + prev_mod_count = num_prev_mod - len(prev_mod_queue) + prev_inj_count = num_prev_inj - len(prev_inj_queue) + + saving_image(data_denorm, n_image,output_path) + else: + # data.requires_grad = True + model.eval() + with torch.no_grad(): + initial_output = model(data) + final_pred = initial_output.max(1, keepdim=True)[1] + + print(f"Image {n_image}: Benign Image (Skipping Perturbation)") + saving_image(data, n_image,output_path) + + print(f"Final perturbations: Injection={injection_count}, Modification={modification_count}, Prev_inj={prev_inj_count}, Prev_mod={prev_mod_count} \n") + print(f"Image {n_image}, Truth Labels {target.item()}, Final Pred {final_pred.cpu().numpy()}") + + # all_preds.extend(final_pred.cpu().numpy()) + # all_labels.extend(target.cpu().numpy()) + all_preds.append(final_pred.item()) + all_labels.append(target.item()) + + # image_name = f"image_{n_image}.png" + # target_label = target.item() + # final_label = final_pred.item() + + # csv_file.write(f"{image_name}, {target_label}, {injection_count}, {modification_count}, {final_label}, {feedback}\n") + n_image += 1 + + + all_preds = np.array(all_preds) + all_labels = np.array(all_labels) + # csv_file.close() + + # return all_preds.squeeze(), all_labels, packet_level_data + return all_preds, all_labels, packet_level_data + + +def run(params): + + test_dataset_dir = params["test_data_dir"] + # os.makedirs(test_dataset_dir, exist_ok=True) + # print(test_dataset_dir) + test_label_file = params["test_label_file"] + output_path = params["output_path"] + rounds = params["rounds"] + packet_level_data = params["packet_level_data"] + model_path = params["model_path"] + # budgets = params["budgets"] + + + + os.makedirs(output_path, exist_ok=True) + folder = os.path.join("CF_Results", output_path) + # filename = f"{output_path}.png" + filename = f"perturbed_dos.png" + model_type = "densenet161" + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + existing_hex_ids = ['007F', '0130', '0140', '0153', '0160', '0164', '0220', '0251', '02B0', '0340', '0371', '0372', + '0381', '0386', '0387', '0389', '038D', '0394', '0410', '0412', '0420', '0421', '0436', '0470', '047F', + '0485', '0490', '0495', '04A2', '04C9', '04F1', '0500', '0507', '050A', '050B', '050C', '050E', '0520', '052A', '0541', '0544', + '054B', '0553', '0559', '057F', '0592', '0593', '0595', '0596', '05B0', '05BE', '05C1', '05E3', '05FF'] + + + # surr_model_type='densenet161' + # test_model_type = 'densenet161' + + # model_path = "./Trained_models/densenet161_surrogate_gear.pth" + + # surr_model_path = "./Trained_models/densenet161_surrogate_gear.pth" + # test_model_path = "./Trained_models/densenet161_surrogate_gear.pth" + + # output_path = "blackbox_dos_k_12_nfd" + # output_path = "test_images" + + + + # rounds = 0 + + + # packet_level_data = pd.read_csv("DoS_test_track.csv") + + # packet_level_data = pd.read_csv("test.csv") + + # Clean up all column names: strip spaces, remove BOMs + + # Read CSV + packet_level_data = pd.read_csv(packet_level_data, dtype=str, low_memory=False) + + # Strip column names FIRST before anything else + packet_level_data.columns = packet_level_data.columns.str.strip() + + # Fill NaN values + packet_level_data = packet_level_data.fillna("None") + + # Type casting + packet_level_data["row_no"] = packet_level_data["row_no"].astype(int) + packet_level_data["timestamp"] = packet_level_data["timestamp"].astype(float) + packet_level_data["image_no"] = packet_level_data["image_no"].astype(int) + packet_level_data["valid_flag"] = packet_level_data["valid_flag"].astype(int) + + # Round 0 label setup + if rounds == 0: + print("in round 0") + # 1. Rename the column + packet_level_data = packet_level_data.rename(columns={"label": "original_label"}) + + # 2. Map integer-string values (CSV read as str, so map "0"/"1" not 0/1) + packet_level_data["original_label"] = packet_level_data["original_label"].map({"0": "B", "1": "A"}) + + # 3. Initialize operation label + packet_level_data["operation_label"] = "None" + + + #Load dataset + image_datasets, test_loader, start_image_number = load_dataset(test_dataset_dir,test_label_file,device,is_train=False) + print("loaded test dataset") + + #load the model + model = load_model(model_path) + + # bit_pattern = "0000000000000001000" # for matching the packets/rows to modify + + + # List of max_perturbations to iterate over + st = time.time() + print("Start time:", st) + # Call the attack procedure + preds, labels, packet_level_data = Attack_procedure(model, device, test_loader,output_path,existing_hex_ids, start_image_number, packet_level_data,rounds) + et = time.time() + print("End time:", et) + # print("Labels:", labels) + # print("Predictions:", preds) + + tnr, mdr, oa_asr, IDS_accu, IDS_prec, IDS_recall,IDS_F1 = evaluation_metrics(preds, labels,folder,filename) + print("----------------IDS Perormance Metric----------------") + print(f'Accuracy: {IDS_accu:.4f}') + print(f'Precision: {IDS_prec:.4f}') + print(f'Recall: {IDS_recall:.4f}') + print(f'F1 Score: {IDS_F1:.4f}') + print("----------------Adversarial attack Perormance Metric----------------") + print("TNR:", tnr) + print("Malcious Detection Rate:", mdr) + print("Attack Success Rate:", oa_asr) + print("Execution Time:", et-st) + + # Force timestamp precision ONLY + packet_level_data["timestamp"] = packet_level_data["timestamp"].map(lambda x: f"{x:.6f}") + int_cols = ["row_no", "image_no", "valid_flag"] + for c in int_cols: + if c in packet_level_data.columns: + packet_level_data[c] = packet_level_data[c].astype(int) + + packet_level_data.to_csv(os.path.join(output_path, f"packet_level_data_{rounds}.csv"), index=False) + + + + +# if __name__ == "__main__": +# # Allow standalone execution +# cfg = yaml.safe_load(open("config_dos_OTIDS.yaml")) +# run(cfg["attack"]) + + +if __name__ == "__main__": + + import argparse + import yaml + + parser = argparse.ArgumentParser() + parser.add_argument("--config", type=str, default="config_dos_mirgu.yaml") + args = parser.parse_args() + + cfg = yaml.safe_load(open(args.config)) + + # Ensure attack section exists + if "attack" not in cfg: + raise ValueError("Config file must contain 'attack' section.") + + run(cfg["attack"]) diff --git a/MIRGU_Entropy/scripts/adversarial_attack_spoof_mirgu.py b/MIRGU_Entropy/scripts/adversarial_attack_spoof_mirgu.py new file mode 100755 index 0000000..dcbdadc --- /dev/null +++ b/MIRGU_Entropy/scripts/adversarial_attack_spoof_mirgu.py @@ -0,0 +1,1624 @@ +""" + Description: delete attack packets with the logic that deletes all packets except D packets just after a benign packet with same id. + Also, there is no data in the surrogate images. + #changes: 06/02/2026 (no data) + # 1. changed the compute_grad_row_for_spoof(), no need to take data for grad, only ID + # 2. bit_flip_attack_rgb() only flip ID, nothing for data + # 3. gradient_perturbation(), no need to calcualte crc, crc = '0'*15 + + To run for deletion, + 1. uncomment not_D + 2. pass not_D instead of D in build_queues() + 3. comment build_deletion_queue() and uncomment keep_D_logic() + 4. assign D = num_del after build_queue() + for rest of the rounds, undo these steps. +""" +import os +# os.environ["CUDA_VISIBLE_DEVICES"] = "1" # MUST COME FIRST +import math +import bisect +import random +import pandas as pd +import torch +import torch.nn as nn +import torch.nn.functional as F + +import yaml +import time +import torch.optim as optim +from torch.utils.data import DataLoader, TensorDataset +from torchvision import datasets, transforms, models +import numpy as np +import matplotlib.pyplot as plt +from PIL import Image +import torchvision.utils as vutils +from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay +from torchvision.utils import save_image +from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score +from collections import deque + +# Inception-ResNet Model +class InceptionStem(nn.Module): + def __init__(self): + super(InceptionStem, self).__init__() + self.stem = nn.Sequential( + nn.Conv2d(in_channels = 3, out_channels = 32, stride = 1, kernel_size = 3, padding = 'same'), + nn.Conv2d(in_channels = 32, out_channels = 32, stride = 1, kernel_size = 3, padding = 'valid'), + nn.MaxPool2d(kernel_size = 3, stride = 2, padding = 0), + nn.Conv2d(in_channels = 32, out_channels = 64, kernel_size = 1, stride = 1, padding = 'valid'), + nn.Conv2d(in_channels = 64, out_channels = 128, kernel_size = 3, stride = 1, padding = 'same'), + nn.Conv2d(in_channels = 128, out_channels = 128, kernel_size = 3, stride = 1, padding = 'same') + ) + + def forward(self, x): + stem_out = self.stem(x) + return stem_out + +class InceptionResNetABlock(nn.Module): + def __init__(self, in_channels = 128, scale=0.17): + super(InceptionResNetABlock, self).__init__() + self.scale = scale + self.branch0 = nn.Conv2d(in_channels, 32, kernel_size=1, stride=1, padding='same') + self.branch1 = nn.Sequential( + nn.Conv2d(in_channels, 32, kernel_size=1, stride=1, padding='same'), + nn.Conv2d(32, 32, kernel_size=3, stride=1, padding='same') + ) + self.branch2 = nn.Sequential( + nn.Conv2d(in_channels, 32, kernel_size=1, stride=1, padding='same'), + nn.Conv2d(32, 32, kernel_size=3, stride=1, padding='same'), + nn.Conv2d(32, 32, kernel_size=3, stride=1, padding='same') + ) + self.conv_up = nn.Conv2d(96, 128, kernel_size=1, stride=1, padding='same') + + def forward(self, x): + branch0 = self.branch0(x) + branch1 = self.branch1(x) + branch2 = self.branch2(x) + mixed = torch.cat([branch0, branch1, branch2], dim=1) + up = self.conv_up(mixed) + return F.relu(x + self.scale * up) + +class ReductionA(nn.Module): + def __init__(self, in_channels = 128): + super(ReductionA, self).__init__() + self.branch0 = nn.Conv2d(in_channels = in_channels, out_channels = 192, kernel_size = 3, stride = 2, padding = 'valid') + self.branch1 = nn.Sequential( + nn.Conv2d(in_channels = in_channels, out_channels = 96, kernel_size = 1, stride = 1, padding = 'same'), + nn.Conv2d(in_channels = 96, out_channels = 96, kernel_size = 3, stride = 1, padding = 'same'), + nn.Conv2d(in_channels = 96, out_channels = 128, kernel_size = 3, stride = 2, padding = 'valid') + ) + self.branch2 = nn.MaxPool2d(kernel_size = 3, stride = 2, padding = 0) + + def forward(self, x): + branch0 = self.branch0(x) + branch1 = self.branch1(x) + branch2 = self.branch2(x) + mixed = torch.cat([branch0, branch1, branch2], dim = 1) + return mixed + +class InceptionResNetBBlock(nn.Module): + def __init__(self, in_channels = 448, scale = 0.10): + super(InceptionResNetBBlock, self).__init__() + self.scale = scale + self.branch0 = nn.Conv2d(in_channels = in_channels, out_channels = 64, kernel_size = 1, stride = 1 , padding = 'same') + self.branch1 = nn.Sequential( + nn.Conv2d(in_channels = in_channels, out_channels = 64, kernel_size = 1, stride = 1, padding = 'same'), + nn.Conv2d(in_channels = 64, out_channels = 64, kernel_size = (1,3), stride = 1, padding = 'same'), + nn.Conv2d(in_channels = 64, out_channels = 64, kernel_size = (3,1), stride = 1, padding = 'same') + ) + self.conv_up = nn.Conv2d(in_channels = 128, out_channels = 448, kernel_size = 1, stride = 1, padding = 'same') + + + def forward(self, x): + branch0 = self.branch0(x) + branch1 = self.branch1(x) + mixed = torch.cat([branch0, branch1], dim = 1) + up = self.conv_up(mixed) + return F.relu(x + self.scale * up) + +class ReductionB(nn.Module): + def __init__(self): + super(ReductionB, self).__init__() + self.branch0 = nn.Sequential( + nn.Conv2d(in_channels = 448, out_channels = 128, kernel_size = 1, stride = 1, padding = 'same'), + nn.Conv2d(in_channels = 128, out_channels = 192, kernel_size = 3, stride = 1, padding = 'valid') + ) + self.branch1 = nn.Sequential( + nn.Conv2d(in_channels = 448, out_channels = 128, kernel_size = 1, stride = 1, padding = 'same'), + nn.Conv2d(in_channels = 128, out_channels = 128, kernel_size = 3, stride = 1, padding = 'valid') + ) + self.branch2 = nn.Sequential( + nn.Conv2d(in_channels = 448, out_channels = 128, kernel_size = 1, stride = 1, padding = 'same'), + nn.Conv2d(in_channels = 128, out_channels = 128, kernel_size = 3, stride = 1, padding = 'same'), + nn.Conv2d(in_channels = 128, out_channels = 128, kernel_size = 3, stride = 1, padding = 'valid') + ) + + self.branch3 = nn.MaxPool2d(kernel_size = 3, stride = 1, padding = 0) + + def forward(self, x): + branch0 = self.branch0(x) + branch1 = self.branch1(x) + branch2 = self.branch2(x) + branch3 = self.branch3(x) + mixed = torch.cat([branch0, branch1, branch2, branch3], dim = 1) + return mixed + +# Inception-ResNet Model +class InceptionResNetV1(nn.Module): + def __init__(self, num_classes=2): + super(InceptionResNetV1, self).__init__() + self.stem = InceptionStem() + self.a_block = InceptionResNetABlock() + self.b_block = InceptionResNetBBlock() + self.red_a = ReductionA() + self.red_b = ReductionB() + self.global_pool = nn.AdaptiveAvgPool2d((1,1)) + self.dropout = nn.Dropout(0.2) + self.fc = nn.Linear(896, num_classes) + + + def forward(self, x): + x = self.stem(x) + x = self.a_block(x) + x = self.red_a(x) + x = self.b_block(x) + x = self.red_b(x) + x = self.global_pool(x) + x = torch.flatten(x, 1) + x = self.dropout(x) + x = self.fc(x) + return F.log_softmax(x, dim = 1) + +def load_model(model_path): + # Load the pre-trained ResNet-18 model + + num_classes = 2 + + model = models.densenet161(weights=models.DenseNet161_Weights.DEFAULT) + model.classifier = nn.Linear(model.classifier.in_features, num_classes) + + # test_model = models.densenet161(weights=models.DenseNet161_Weights.DEFAULT) + # test_model.classifier = nn.Linear(test_model.classifier.in_features, num_classes) + + + #If the system has GPU + model.load_state_dict(torch.load(model_path, weights_only=True)) + # test_model.load_state_dict(torch.load(test_model_path, weights_only=True)) + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + # test_model = torch.jit.load(test_model_path, map_location=device) + # test_model.to(device) + # model = torch.jit.load(pre_trained_model_path, map_location=device) + + model = model.to(device) + # test_model = test_model.to(device) + + model.eval() + # test_model.eval() + + return model + + +data_transforms = { + 'test': transforms.Compose([transforms.ToTensor()]), + 'train': transforms.Compose([transforms.ToTensor()]) + } + +def load_labels(label_file): + """Load image labels from the label file.""" + labels = {} + with open(label_file, 'r') as file: + for line in file: + # Clean and split line into filename and label string + filename, label_str = line.strip().replace("'", "").replace('"', '').split(': ') + + # Split label_str by comma and take the last value + label = int(label_str.strip().split(',')[-1].strip()) + + labels[filename.strip()] = label + return labels + +def load_dataset(data_dir,label_file,device,is_train=True): + # Load datasets + image_labels = load_labels(label_file) + + # Load images and create lists for images and labels + images = [] + labels = [] + start_image_number = None + + for filename, label in image_labels.items(): + img_path = os.path.join(data_dir, filename) + if os.path.exists(img_path): + image = Image.open(img_path).convert("RGB") + if is_train: + image = data_transforms['train'](image) # Apply training transformations + else: + image = data_transforms['test'](image) # Apply testing transformations + # save_image(image, "test_image.png") + images.append(image) + labels.append(label) + + if start_image_number is None: + start_image_number = int(filename.split('_')[-1].split('.')[0]) + + # Create tensors and send them to the specified device + images_tensor = torch.stack(images) + labels_tensor = torch.tensor(labels) + + # Create DataLoader + dataset = TensorDataset(images_tensor, labels_tensor) + batch_size = 32 if is_train else 1 # Use larger batch size for training + data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=4) + + print(f'Loaded {len(images)} images.') + + return dataset, data_loader, start_image_number + +def calculate_crc(data): + """ + Calculate CRC-15 checksum for the given data. + """ + crc = 0x0000 + # CRC-15 polynomial + poly = 0x4599 + + for bit in data: + # XOR with the current bit shifted left by 14 bits + crc ^= (int(bit) & 0x01) << 14 + + for _ in range(15): + if crc & 0x8000: + crc = (crc << 1) ^ poly + else: + crc <<= 1 + + # Ensuring 15 bits + crc &= 0x7FFF + return crc + + +def print_image(img,n,pack): + img = img.detach() + img = img.squeeze().permute(1, 2, 0).cpu().numpy() # Convert to numpy format + # Normalize from [-1, 1] to [0, 1] for imshow + img = (img + 1.0) / 2.0 + img = np.clip(img, 0, 1) # Just in case + + plt.imshow(img, interpolation='none') + # plt.imshow(img, cmap='gray', interpolation='none') + if n == 1: + plt.title(f"Mask, Injection {pack})") + elif n == 2: + plt.title(f"Perturbed image, Injection{pack}") + plt.axis('off') + plt.show() + +def saving_image(img, name,output_path): + os.makedirs(output_path, exist_ok=True) + + # Construct the full path for the output image + output_path = os.path.join(output_path, f'perturbed_image_{name}.png') + + # Save the image to the specified path + save_image(img, output_path) + +def generate_mask(perturbed_data, modification_queue, injection_queue,prev_mod_queue, prev_inj_queue, deletion_queue, rounds, I, M, Pi, Pm, D): + """ + Generate a binary perturbation mask for CAN-frame images using + budgeted injection and modification queues. + + Rows are selected from four queues (new injections, original + modifications, previously injected, previously modified) up to + their allocated budgets, without exceeding top_k. For all selected + rows, both ID and data bit regions are masked. + + Returns the perturbation mask along with selected injection and + modification row indices. + """ + sof_len = 1 + id_mask_length = 11 + mid_bits_length = 7 + data_bits_length = 64 + + batch_size, channels, height, width = perturbed_data.shape + id_start = sof_len + id_end = sof_len + id_mask_length + data_start = sof_len + id_mask_length + mid_bits_length + data_end = data_start + data_bits_length + + # Initialize mask + mask = torch.zeros_like(perturbed_data, dtype=torch.float32) + injection_rows = [] + modification_rows = [] + prev_modification_rows = [] + prev_injection_rows = [] + deletion_rows = [] + + def pop_k(queue, k): + selected = [] + for _ in range(min(k, len(queue))): + _, row = queue.popleft() + selected.append(row) + return selected + + + + # 1. Select rows according to budgets + print("D in generate_mask", D) + # num_inj = len(injection_queue) + del_rows = pop_k(deletion_queue,D) + inj_rows = pop_k(injection_queue, I) + mod_rows = pop_k(modification_queue, M) + prev_inj_rows = pop_k(prev_inj_queue, Pi) + prev_mod_rows = pop_k(prev_mod_queue, Pm) + + # 2. Aggregate selections + injection_rows.extend(inj_rows) + deletion_rows.extend(del_rows) + modification_rows.extend(mod_rows) + prev_modification_rows.extend(prev_mod_rows) + prev_injection_rows.extend(prev_inj_rows) + + all_rows = injection_rows + modification_rows + prev_modification_rows + prev_injection_rows + deletion_rows + + for row in all_rows: + for b in range(batch_size): + # ID bits + # mask[b, :, row, id_start : id_end] = 1.0 + # Data bits + # mask[b, :, row, data_start : data_end] = 1.0 + mask[b,:,row,0:128] = 1.0 #because deletion will require entire row to be deleted + + + # for _ in range(top_k): + # if not injection_queue and not modification_queue: + # break # nothing left to pop + + # if modification_queue: + # mod_grad, mod_row = modification_queue[0] + # # Always prefer modification queue if it's not empty + # grad, row = modification_queue.popleft() + # modification_rows.append(row) + # p_type = "mod" + # elif injection_queue: # Only process injection queue if modification queue is empty + # inj_grad, inj_row = injection_queue[0] + # grad, row = injection_queue.popleft() + # injection_rows.append(row) + # p_type = "inj" + + # # Apply ID + Data masking for the selected row + # for b in range(batch_size): + # if p_type == "inj": + # mask[b, :, row, sof_len:sof_len + id_mask_length] = 1.0 # ID bits + # mask[b, :, row, sof_len + id_mask_length + mid_bits_length: + # sof_len + id_mask_length + mid_bits_length + data_bits_length] = 1.0 # Data bits + # else: + # mask[b, :, row, sof_len + id_mask_length + mid_bits_length: + # sof_len + id_mask_length + mid_bits_length + data_bits_length] = 1.0 # Data bits + + + # selected_total = len(injection_rows) + len(modification_rows) + len(prev_modification_rows) + len(prev_injection_rows) + # assert selected_total <= top_k, "Selected more rows than top_k" + + # print_image(mask,1,0) + return mask, injection_rows, modification_rows, prev_modification_rows, prev_injection_rows, del_rows + +def bit_flip_attack_rgb(image, mask, data_grad, sign_data_grad, injection_rows, del_rows): + """ + Bit-flip attack for RGB CAN images. + - Flips pixels based on sign of gradient: + If black ([0,0,0]) and sign_grad > 0 → flip to white ([1,1,1]) + If white ([1,1,1]) and sign_grad < 0 → flip to black ([0,0,0]) + - Works for ID bits and data bits separately with different top-k percentages. + """ + + perturbed_image = image.clone() # Start from original image + B, C, H, W = image.shape + ID_LEN = 11 + MID_LEN = 7 + DATA_LEN = 64 + id_start = 1 + id_end = id_start + ID_LEN + data_start = 1 + ID_LEN + MID_LEN + data_end = data_start + DATA_LEN + count_bit_flip_1 = 0 + count_bit_flip_0 = 0 + print("injection_rows", injection_rows) + print("del rows", del_rows) + for b in range(B): + rows = mask[b, 0].nonzero(as_tuple=True)[0] # Only use first channel for mask + rows = torch.unique(rows) + rows = torch.sort(rows, descending=True).values # Sort descending + + for row in rows: + if row in del_rows: + perturbed_image[b,0,row,:] = 0.0 + perturbed_image[b,1,row,:] = 1.0 + perturbed_image[b,2,row,:] = 0.0 + else: + # --- ID bits --- + id_pixels = perturbed_image[b, :, row, id_start:id_end] # Shape [3, ID_LEN] + # print("ID Pixels:", id_pixels) + id_grads = data_grad[b, :, row, id_start:id_end] # Shape [3, ID_LEN] + # print("ID gradient:", id_grads) + id_signs = sign_data_grad[b, :, row, id_start:id_end] # Shape [3, ID_LEN] + # print("ID Signs:", id_signs) + + # Collapse gradients to single value per bit (sum over channels) + id_scores = torch.sum(torch.abs(id_grads), dim=0) + # print("ID Scores: ", id_scores) + num_id_top = max(1, int(1.0 * ID_LEN)) + id_top_idx = torch.topk(id_scores, num_id_top).indices + # print("Top Index:", id_top_idx) + count_bit_flip = 0 + # print("ID before flipping: ", id_pixels.clone()) + for idx in id_top_idx: + # print("Index:", idx) + pixel = id_pixels[:, idx] # [R, G, B] + # print("Pixel:", pixel) + grad_sign = torch.sum(id_signs[:, idx]).item() # Combine channels' signs + grad_sign = (id_signs[0, idx] + id_signs[1, idx] + id_signs[2, idx]).item() + # print("Grad Sign:", grad_sign) + if grad_sign > 0: # Black → White + id_pixels[:, idx] = 1.0 + count_bit_flip += 1 + elif grad_sign < 0: # White → Black + id_pixels[:, idx] = 0.0 + count_bit_flip += 1 + + # print("Number of bitflip in ID: ", count_bit_flip) + # print("ID after flipping: ", id_pixels.clone()) + + ''' + # --- Data bits --- + + data_pixels = perturbed_image[b, :, row, data_start:data_end] # [3, DATA_LEN] + data_grads = data_grad[b, :, row, data_start:data_end] + data_signs = sign_data_grad[b, :, row, data_start:data_end] + + data_scores = torch.sum(torch.abs(data_grads), dim=0) + num_data_top = max(1, int(1.0 * DATA_LEN)) + data_top_idx = torch.topk(data_scores, num_data_top).indices + + # print("data before flipping: ", data_pixels.clone()) + for idx in data_top_idx: + pixel = data_pixels[:, idx] + # grad_sign = torch.sum(data_signs[:, idx]).item() + grad_sign = (data_signs[0, idx] + data_signs[1, idx] + data_signs[2, idx]).item() + if grad_sign > 0: + data_pixels[:, idx] = 1.0 + count_bit_flip_1 += 1 + elif grad_sign < 0: + data_pixels[:, idx] = 0.0 + count_bit_flip_0 += 1 + + # print("data after flipping: ", data_pixels.clone()) + ''' + # Assign modified bits back + perturbed_image[b, :, row, id_start:id_end] = id_pixels + # perturbed_image[b, :, row, data_start:data_end] = data_pixels + + # print("Number of bitflips_1 in Data: ", count_bit_flip_1) + # print("Numberof bitflips_0 in Data,",count_bit_flip_0) + perturbed_image = torch.clamp(perturbed_image, 0, 1) + + return perturbed_image + +def gradient_perturbation(image, perturbed_image,mask,existing_hex_ids, packet_level_data, image_no, injection_rows, modification_rows, prev_modification_rows, prev_injection_rows, del_rows,rounds): + ID_LEN = 11 + MID_LEN = 7 + # mid_bits = '0001000' + injection_set = set(injection_rows) + modification_set = set(modification_rows) + deletion_set = set(del_rows) + prev_inj_set = set(prev_injection_rows) + prev_mod_set = set(prev_modification_rows) + original_packets = len(packet_level_data[packet_level_data["image_no"] == image_no]) + # Precompute existing IDs as integers + existing_int_ids = [int(h, 16) for h in existing_hex_ids] + # print(image.shape, mask.shape, perturbed_image.shape) + for row in del_rows: + packet_level_data = packet_level_data[ + ~( + (packet_level_data["image_no"] == image_no) & + (packet_level_data["row_no"] == row) + ) + ].reset_index(drop=True) + + for b in range(image.shape[0]): + totalRows = mask[b, 0].nonzero(as_tuple=True)[0] + totalRows = torch.unique(totalRows) + # totalRows = torch.sort(totalRows).values + totalRows = torch.sort(totalRows, descending=True).values # Sort descending + + # print(rows, flag) + for row in totalRows: + row = row.item() + + if row in injection_set: + flag = "injection" + elif row in modification_set: + flag = "modification" + elif row in prev_mod_set: + flag = "prev_mod" + elif row in prev_inj_set: + flag = "prev_inj" + else: + # print("Skipping row", row) + continue + + + injection_row = row #earlier it was row.item() + i = injection_row - 1 + packets_before_injection = [] + # print("Injection Row: ", injection_row) + + # Traverse upward until first pixel in the row is black + while i >= 0: + first_pixel = image[b, 0, i, 0].item() # First pixel in row i, channel 0 + second_pixel = image[b, 1, i, 0].item() # Second pixel in row i, channel 1 + third_pixel = image[b, 2, i, 0].item() # Third pixel in row i, channel 2 + # print(first_pixel, second_pixel, third_pixel) + if first_pixel == 0.0 and second_pixel == 0.0 and third_pixel == 0.0: + packets_before_injection.append(i) + i -= 1 + + image_packets = packet_level_data[packet_level_data["image_no"] == image_no].reset_index(drop=True) + # print("Image packets before injection:\n", image_packets) + if packets_before_injection: + target_index = len(packets_before_injection) - 1 + # print("target_idex",target_index) + else: + target_index = 0 + + # print("Target index for injection:", target_index, flag, injection_row,len(image_packets)) + + if flag == 'injection': + + packet_time = 128 * 0.000002 + + if packets_before_injection: + # CASE A: packets exist before injection row + start_row = packets_before_injection[0] + end_row = injection_row + + red_pixel_count = 0 + for row_idx in range(start_row, end_row): + red_pixels_mask = ( + (perturbed_image[b, 0, row_idx, :] == 1.0) & + (perturbed_image[b, 1, row_idx, :] == 0.0) & + (perturbed_image[b, 2, row_idx, :] == 0.0) + ) + red_pixel_count += red_pixels_mask.sum().item() + + safe_index = min(len(packets_before_injection) - 1, + len(image_packets) - 1) + + base_timestamp = image_packets.iloc[safe_index]["timestamp"] + offset = ((injection_row - start_row) * packet_time + - red_pixel_count * 0.000002) + + new_timestamp = base_timestamp + offset + + else: + # CASE B: inject BEFORE first packet (same image) + first_packet = image_packets.iloc[0] + first_row = first_packet["row_no"] + first_ts = first_packet["timestamp"] + + delta_rows = first_row - injection_row + new_timestamp = first_ts - delta_rows * packet_time + + # --- 1. Decode ID bits from pixels --- + decoded_bits = '' + for col in range(1, 1 + ID_LEN): + pix = perturbed_image[b, :, row, col] + # print(pix) + # dot1 = torch.dot(pix, torch.tensor([1.0, 1.0, 1.0], device=image.device)) + # dot0 = torch.dot(pix, torch.tensor([0.0, 0.0, 0.0], device=image.device)) + # decoded_bits += '1' if dot1 >= dot0 else '0' + ones = (pix == 1.0).sum().item() # count channels equal to 1 + zeros = (pix == 0.0).sum().item() # count channels equal to 0 + bit = '1' if ones >= zeros else '0' + decoded_bits += bit + # print("decoded ID bits",decoded_bits) + + # --- 2. Project to nearest existing ID via Hamming distance --- + gen_int = int(decoded_bits, 2) + def hamming_dist(a, b, bitlen=ID_LEN): + return bin(a ^ b).count('1') + + best_int = min(existing_int_ids, + key=lambda eid: hamming_dist(eid, gen_int, bitlen=ID_LEN)) + + new_id = format(best_int, 'X') + + # print(packet_level_data.to_string()) + # Convert back to a bitstring of length ID_len + proj_bits = bin(best_int)[2:].zfill(ID_LEN) + + # --- 3. Overwrite ID-region in perturbed_image with projected bits --- + for idx, bit in enumerate(proj_bits, start=1): + val = 1.0 if bit == '1' else 0.0 + perturbed_image[b, :, row, idx] = val + + + # --- 4. Decode data bits --- + data_bits = '' + start = 1 + ID_LEN + MID_LEN + for col in range(start, start + 64): + pix = perturbed_image[b, :, row, col] + ones = (pix == 1.0).sum().item() # count channels equal to 1 + zeros = (pix == 0.0).sum().item() # count channels equal to 0 + bit = '1' if ones >= zeros else '0' + data_bits += bit + # print("decoded data bits",data_bits) + + # print("Before Perturbed Row",perturbed_image[b, :, row, :]) + if flag in ['modification', 'prev_inj', 'prev_mod']: + mid_bits = '' + # 7 represents middle bits (RTR + IDE + Reserved bit + DLC) + for col in range(1 + ID_LEN, 1 + ID_LEN + 7): + # print("Columns:", col) + pix = perturbed_image[b, :, row, col] + # print("Pixel:", pix) + bit = int((pix > 0.0).any().item()) + mid_bits += str(bit) + else: + mid_bits = "0001000" + + # print("Middle Bits: ", mid_bits) + + # print("Middle Perturbed Row",perturbed_image[b, :, row, 12:19]) + + # --- 5. Build full frame bits, CRC, stuff, and write back --- + frame_start = ('0' + proj_bits + mid_bits + data_bits) + # crc_val = calculate_crc(frame_start) + # crc_bits = bin(crc_val)[2:].zfill(15) + crc_bits = '0'*15 + uptill_crc = frame_start + crc_bits + # stuffed = stuff_bits(frame_start + crc_bits) + + # Write stuffed bits + for i, bit in enumerate(uptill_crc): + val = 1.0 if bit == '1' else 0.0 + perturbed_image[b, :, row, i] = val + + # Ending part (CRC delimiters, ACK, EoF, IFS) + ending = '1011111111111' + offset = len(uptill_crc) + for i, bit in enumerate(ending): + val = 1.0 if bit == '1' else 0.0 + perturbed_image[b, :, row, offset + i] = val + + # Mark rest as green + for i in range(offset + len(ending), perturbed_image.shape[-1]): + perturbed_image[b, 0, row, i] = 0.0 + perturbed_image[b, 1, row, i] = 1.0 + perturbed_image[b, 2, row, i] = 0.0 + + # print("Final Pedequerturbed Row",perturbed_image[b, :, row, :]) + # print(packet_level_data.to_string()) + + # UPDATE PACKET-LEVEL DATA + if flag == 'injection': + start_index = packet_level_data.index[packet_level_data["image_no"] == image_no][0] + df_part_1 = packet_level_data.iloc[:start_index+target_index+1] + df_part_2 = packet_level_data.iloc[start_index+target_index+1:] + if rounds == 0: + packet_level_data = pd.concat([df_part_1, pd.DataFrame({ "row_no": [injection_row],"timestamp": [new_timestamp], "can_id": [new_id], "image_no": [image_no],"valid_flag": [1], "original_label": "A", "operation_label": "I"}), df_part_2], ignore_index=True) + else: + packet_level_data = pd.concat([df_part_1, pd.DataFrame({ "row_no": [injection_row],"timestamp": [new_timestamp], "can_id": [new_id], "image_no": [image_no],"valid_flag": [1], "original_label": "A", "operation_label": "I","pred_label": "A"}), df_part_2], ignore_index=True) + + elif flag == 'modification': + # print(packet_level_data[packet_level_data["image_no"] == image_no]) + start_index = packet_level_data.index[packet_level_data["image_no"] == image_no][0] + # packet_level_data.loc[start_index + target_index+1, ["can_id", "perturbation_type"]] = [new_id, "M"] + packet_level_data.loc[start_index + target_index+1, ["can_id","operation_label"]] = [new_id, "M"] + elif flag == "prev_mod": + start_index = packet_level_data.index[packet_level_data["image_no"] == image_no][0] + packet_level_data.loc[start_index + target_index+1, ["can_id","operation_label"]] = [new_id,"Pm"] + elif flag == "prev_inj": + start_index = packet_level_data.index[packet_level_data["image_no"] == image_no][0] + packet_level_data.loc[start_index + target_index+1, ["can_id","operation_label"]] = [new_id,"Pi"] + + # print("id after gradient_perturbation for row: ",row, perturbed_image[b, :, row, 1:12]) + + + return perturbed_image, packet_level_data + +def apply_inj_mod(data_grad, image, existing_hex_ids, packet_level_data, n_image, modification_queue, injection_queue, prev_mod_queue, prev_inj_queue,deletion_queue,rounds,I,M,Pi,Pm,D): + + sign_data_grad = data_grad.sign() + + mask, injection_rows, modification_rows, prev_modification_rows, prev_injection_rows, del_rows = generate_mask(image, modification_queue, injection_queue, prev_mod_queue, prev_inj_queue, deletion_queue,rounds,I,M,Pi,Pm,D) + + perturbed_image = bit_flip_attack_rgb(image, mask, data_grad, sign_data_grad, injection_rows, del_rows) + + perturbed_image, packet_level_data = gradient_perturbation(image, perturbed_image,mask,existing_hex_ids, packet_level_data, n_image, injection_rows, modification_rows, prev_modification_rows, prev_injection_rows,del_rows,rounds) + + return perturbed_image,packet_level_data, modification_queue, injection_queue, deletion_queue + +def perform_perturbation(model, data_grad, perturbed_data, existing_hex_ids, packet_level_data, n_image,modification_queue, injection_queue, prev_mod_queue, prev_inj_queue, deletion_queue,rounds,I,M,Pi,Pm,D): + + perturbed_data, packet_level_data,modification_queue, injection_queue, del_queue = apply_inj_mod(data_grad, perturbed_data, existing_hex_ids, packet_level_data, n_image, modification_queue, injection_queue, prev_mod_queue, prev_inj_queue,deletion_queue,rounds,I,M,Pi,Pm,D) + + with torch.no_grad(): + output = model(perturbed_data) + # feedback += 1 + + # Get the predicted class index + final_pred = output.max(1, keepdim=True)[1] # index of the maximum log-probability + # print("predicted, label ",final_pred.item(), target.item()) + + return final_pred, perturbed_data,packet_level_data # Indicate that we can stop + +def find_max_prev_inj(image, image_no, packet_level_data,rounds): + """ + Vectorized version: no iterrows(), 200x faster. + """ + + # Required columns + if 'original_label' not in packet_level_data.columns or 'image_no' not in packet_level_data.columns: + raise KeyError("Missing required columns.") + + # 1) Filter rows belonging to this image_no (FAST) + subset = packet_level_data.loc[ + packet_level_data["image_no"] == image_no + ] + + if rounds == 0: + # Round 0: no previously modified packets exist + subset = subset.iloc[0:0] # empty DataFrame, preserves columns + else: + subset = subset[ + (subset["original_label"].astype(str).str.upper() == "A") & + (subset["operation_label"].astype(str).str.upper().isin(["I", "PI"])) & + (subset["pred_label"].astype(str).str.upper() == "A") + ] + + + # 3) Extract row numbers + matched_rows = subset["row_no"].astype(int).tolist() + + # 4) Bound by image shape + _, _, n_rows, _ = image.shape + matched_rows = [r for r in matched_rows if 0 <= r < n_rows] + + return matched_rows + +def find_max_prev_mod(image, image_no, packet_level_data,rounds): + """ + Vectorized version: no iterrows(), 200x faster. + """ + + # Required columns + if 'original_label' not in packet_level_data.columns or 'image_no' not in packet_level_data.columns: + raise KeyError("Missing required columns.") + + # 1) Filter rows belonging to this image_no (FAST) + subset = packet_level_data.loc[ + packet_level_data["image_no"] == image_no + ] + # print("len of subset", len(subset)) + + if rounds == 0: + # Round 0: no previously modified packets exist + subset = subset.iloc[0:0] # empty DataFrame, preserves columns + else: + subset = subset[ + (subset["original_label"].astype(str).str.upper() == "A") & + (subset["operation_label"].astype(str).str.upper().isin(["M", "PM"])) & + (subset["pred_label"].astype(str).str.upper() == "A") + ] + # print(subset["operation_label"].astype(str).str.upper().value_counts()) + + # print("len of prev mod subset", len(subset)) + + + # 3) Extract row numbers + matched_rows = subset["row_no"].astype(int).tolist() + + # print("prev_mod candidate rows BEFORE bound:", matched_rows) + # print("image n_rows:", image.shape[2]) + + + # 4) Bound by image shape + _, _, n_rows, _ = image.shape + matched_rows = [r for r in matched_rows if 0 <= r < n_rows] + + return matched_rows + +def find_max_modification(image, image_no, packet_level_data,rounds): + """ + Vectorized version: no iterrows(), 200x faster. + """ + + # Required columns + if 'original_label' not in packet_level_data.columns or 'image_no' not in packet_level_data.columns: + raise KeyError("Missing required columns.") + + # 1) Filter rows belonging to this image_no (FAST) + subset = packet_level_data.loc[ + packet_level_data["image_no"] == image_no + ] + # print("Length of subset",len(subset)) + + + if rounds == 0: + subset = subset[ + (subset["original_label"].astype(str).str.upper() == "A") + # (subset["operation_label"].astype(str).str.upper()== "None") + # (subset["pred_label"].astype(str).str.upper() == "A") + ] + else: #case where we have to modify fresh packets not shifted : only round 0 + # 2) Filter rows where original_label == 'A' AND pred_label == 'A' + # subset = subset[ + # (subset["original_label"].astype(str).str.upper() == "A") & + # ( + # subset["operation_label"].isna() | + # (subset["operation_label"].astype(str).str.upper() == "NONE") + # ) & + # (subset["pred_label"].astype(str).str.upper() == "A") + # ] + #here, need to modify shifted packets + subset = subset[ + (subset["original_label"].astype(str).str.upper() == "A") & + ( + subset["operation_label"].isna() | + (subset["operation_label"].astype(str).str.upper() == "NONE") + ) & + (subset["pred_label"].astype(str).str.upper() == "A") + ] + # print("subset",subset) + + # 3) Extract row numbers + matched_rows = subset["row_no"].astype(int).tolist() + + # 4) Bound by image shape + _, _, n_rows, _ = image.shape + matched_rows = [r for r in matched_rows if 0 <= r < n_rows] + + return matched_rows + +def find_max_del(image, image_no, packet_level_data,rounds): + """ + Vectorized version: no iterrows(), 200x faster. + """ + + # Required columns + if 'original_label' not in packet_level_data.columns or 'image_no' not in packet_level_data.columns: + raise KeyError("Missing required columns.") + + # 1) Filter rows belonging to this image_no (FAST) + subset = packet_level_data.loc[ + packet_level_data["image_no"] == image_no + ] + # print("Length of subset",len(subset)) + + + if rounds == 0: + subset = subset[ + (subset["original_label"].astype(str).str.upper() == "A") + # (subset["operation_label"].astype(str).str.upper()== "None") + # (subset["pred_label"].astype(str).str.upper() == "A") + ] + else: + # 2) Filter rows where original_label == 'A' AND pred_label == 'A' + subset = subset[ + (subset["original_label"].astype(str).str.upper() == "A") & + ( + subset["operation_label"].isna() | + (subset["operation_label"].astype(str).str.upper() == "NONE") + ) & + (subset["pred_label"].astype(str).str.upper() == "A") + ] + # print("subset",subset) + + # 3) Extract row numbers + matched_rows = subset["row_no"].astype(int).tolist() + + # 4) Bound by image shape + _, _, n_rows, _ = image.shape + matched_rows = [r for r in matched_rows if 0 <= r < n_rows] + + return matched_rows + +def build_deletion_queue(packet_level_data, image_no, del_rows,D, benign_can_id="43F",rng_seed=0): + """ + Build deletion queue based on anchor-based backward distance logic. + + Parameters + ---------- + packet_level_data : pd.DataFrame + Must contain columns: image_no, row_no, can_id, original_label + image_no : int + Current image index + del_rows : list[int] + Candidate attack packet row indices + D : int + Deletion budget + benign_can_id : str + Anchor CAN ID (default: '043F') + + Returns + ------- + deletion_queue : list[int] + Row indices of packets to delete (ordered) + """ + + if D <= 0 or not del_rows: + return [] + + attack_rows = sorted(del_rows) + + # --- 1) Get benign anchor rows for this image --- + anchors = ( + packet_level_data[ + (packet_level_data["image_no"] == image_no) & + (packet_level_data["can_id"].astype(str).str.upper() == "43F") & + (packet_level_data["original_label"].astype(str).str.upper() == "B") + ]["row_no"] + .astype(int) + .tolist() + ) + print("anchors", anchors) + + # --- 2) Fallback: no anchors → random deletion --- + if not anchors: + random.seed(rng_seed + image_no) # deterministic per image + return [(None, r) for r in random.sample(del_rows, min(D, len(del_rows)))] + + + anchors = sorted(anchors) + print("anchors", anchors) + + # --- Fallback: no anchors → delete first D attack packets --- + if not anchors: + return [(None, r) for r in attack_rows[:D]] + + # --- Compute all backward distances --- + candidates = [] # (distance, packet_row) + + for a in anchors: + for r in attack_rows: + if r < a: + candidates.append((a - r, r)) + + if not candidates: + return [] + + # --- Sort by closest distance first --- + candidates.sort(key=lambda x: (x[0], x[1])) + + # --- Select first D unique packet rows --- + deletion_queue = [] + seen = set() + + for _, r in candidates: + if r not in seen: + deletion_queue.append((None, r)) + seen.add(r) + if len(deletion_queue) == D: + break + # print("del queue size", len(deletion_queue)) + + return deletion_queue + +def delete_no_anchor_preserve_periodicity(packet_level_data,image_no,del_rows,D,benign_median_period=0.009909868240356445): + """ + No-anchor deletion strategy: + Keep exactly D attack packets such that the remaining packets + best preserve benign periodicity. + """ + + if not del_rows or D <= 0: + return [(None, r) for r in del_rows] + + # Extract timestamps for attack packets + rows = sorted(del_rows) + ts = ( + packet_level_data + .set_index("row_no") + .loc[rows, "timestamp"] + .to_dict() + ) + + # Work on a mutable list of remaining rows + remaining = rows.copy() + deletion_queue = [] + + # Helper: compute periodicity cost after removing index i + def removal_cost(rem, i): + if i == 0 or i == len(rem) - 1: + return float("inf") # avoid deleting endpoints early + t_prev = ts[rem[i - 1]] + t_next = ts[rem[i + 1]] + gap = t_next - t_prev + return abs(gap - benign_median_period) + + # Iteratively delete until D remain + while len(remaining) > D: + costs = [ + (removal_cost(remaining, i), i) + for i in range(len(remaining)) + ] + + # choose packet whose removal best preserves periodicity + _, idx = min(costs, key=lambda x: x[0]) + removed_row = remaining.pop(idx) + deletion_queue.append((None, removed_row)) + + return deletion_queue + + +def keep_D_logic(packet_level_data,image_no,del_rows,D,benign_can_id="160", rng_seed=0): + """ + D semantics: + - D == 0 : delete ALL attack packets + - D == 1 : keep exactly one attack packet immediately AFTER each anchor + """ + + # No attack packets at all + if not del_rows: + return [] + + attack_rows = sorted(del_rows) + # tset = packet_level_data[ + # packet_level_data["image_no"] == image_no + # ][["row_no", "can_id", "original_label"]] + + # print("test set", tset) + # --- 1) Find anchor rows --- + anchors = ( + packet_level_data[ + (packet_level_data["image_no"] == image_no) & + (packet_level_data["can_id"].astype(str).str.upper() == "160") & + (packet_level_data["original_label"].astype(str).str.upper() == "B") + ]["row_no"] + .astype(int) + .tolist() + ) + + anchors = sorted(anchors) #IMP step + print("anchors", anchors) + + if not anchors: + return delete_no_anchor_preserve_periodicity( + packet_level_data, + image_no, + del_rows, + D, + benign_median_period=0.009909868240356445 + ) + + + # --- Case A: D == 0 → delete all attack packets --- + if D == 0: + return [(None, r) for r in attack_rows] + + # --- Case B: D == 1 --- + # If no anchors → delete all attack packets + if D == 1 and not anchors: + return [(None, r) for r in attack_rows] + + # --- Find packets to KEEP --- + keep_rows = set() + + if D >= 1: + for a in anchors: + after_attacks = [r for r in attack_rows if r > a] + keep_rows.update(after_attacks[:D]) + + # for a in anchors: + # # attack packets strictly AFTER anchor + # after_attacks = [r for r in attack_rows if r > a] + # if after_attacks: + # # keep the closest one after anchor + # keep_rows.add(after_attacks[0]) + + # --- Delete everything else --- + deletion_queue = [ + (None, r) for r in attack_rows if r not in keep_rows + ] + # print("inside keep_D_logic", deletion_queue) + return deletion_queue + +# def keep_D_logic(packet_level_data,image_no,del_rows,D,benign_can_id="043F"): +# """ +# D semantics: +# - D == 0 : delete ALL attack packets +# - D == 1 : keep exactly one attack packet immediately AFTER each anchor +# """ + +# # No attack packets at all +# if not del_rows: +# return [] + +# attack_rows = sorted(del_rows) +# # tset = packet_level_data[ +# # packet_level_data["image_no"] == image_no +# # ][["row_no", "can_id", "original_label"]] + +# # print("test set", tset) +# # --- 1) Find anchor rows --- +# anchors = ( +# packet_level_data[ +# (packet_level_data["image_no"] == image_no) & +# (packet_level_data["can_id"].astype(str).str.upper() == "43F") & +# (packet_level_data["original_label"].astype(str).str.upper() == "B") +# ]["row_no"] +# .astype(int) +# .tolist() +# ) + +# anchors = sorted(anchors) #IMP step +# print("anchors", anchors) + +# # --- Case A: D == 0 → delete all attack packets --- +# if D == 0: +# return [(None, r) for r in attack_rows] + +# # --- Case B: D == 1 --- +# # If no anchors → delete all attack packets +# if D == 1 and not anchors: +# return [(None, r) for r in attack_rows] + +# # --- Find packets to KEEP --- +# keep_rows = set() + +# if D >= 1: +# for a in anchors: +# after_attacks = [r for r in attack_rows if r > a] +# keep_rows.update(after_attacks[:D]) + +# # for a in anchors: +# # # attack packets strictly AFTER anchor +# # after_attacks = [r for r in attack_rows if r > a] +# # if after_attacks: +# # # keep the closest one after anchor +# # keep_rows.add(after_attacks[0]) + +# # --- Delete everything else --- +# deletion_queue = [ +# (None, r) for r in attack_rows if r not in keep_rows +# ] +# # print("inside keep_D_logic", deletion_queue) +# return deletion_queue + +def find_max_injection(image): + + batch_size, _, n_rows, n_cols = image.shape + # --- Injection rows: check full-green rows --- + red_channel = image[:, 0, :, :] # shape (batch, row, col) + green_channel = image[:, 1, :, :] + blue_channel = image[:, 2, :, :] + + green_mask = (red_channel == 0) & (green_channel == 1) & (blue_channel == 0) + injection_rows = [row for row in range(n_rows) if green_mask[:, row, :].all(dim=1).any()] + return injection_rows + +def build_queues(image,image_no,data_grad,packet_level_data,rounds,D,verbose=True): + """ + Build two queues: + - modification_queue: rows that match bit_pattern (unbounded length) + - injection_queue: rows where every pixel in the row is green (R=0,G=1,B=0). + Each queue element: (grad_value, row_number), sorted descending by grad_value. + Injection queue is only truncated if > max_injection_len. + """ + sof_len, id_mask_length, mid_bits_length = 1, 11, 7 + batch_size, _, n_rows, n_cols = image.shape + + # --- Precompute safe column indices --- + id_start = sof_len + id_end = sof_len + id_mask_length + data_start = id_end + mid_bits_length + data_end = data_start + 64 + + # --- select candiidate rows via label match --- + modification_rows = find_max_modification(image,image_no,packet_level_data,rounds) + # print("modification_rows ",modification_rows) + prev_mod_rows = find_max_prev_mod(image,image_no,packet_level_data,rounds) + # print("previously modified rows",prev_mod_rows ) + prev_inj_rows= find_max_prev_inj(image, image_no, packet_level_data,rounds) + # print("previously injected rows",prev_inj_rows ) + injection_rows = find_max_injection(image) + del_rows = find_max_del(image,image_no,packet_level_data,rounds) + # print("total del rows", del_rows) + + #How strong are the gradients in the ID + data bit region of this row? + def compute_grad_for_row_gear(row): + mask = torch.zeros_like(data_grad) + if id_start < id_end: + mask[:, :, row, id_start:id_end] = 1 + # if data_start < data_end: + # mask[:, :, row, data_start:data_end] = 1 + return float(torch.sum((data_grad * mask) ** 2).item()) #using squared sum because we are more interested in the higher abd values. + + + + # --- Build the queues as lists --- + modification_queue = [(compute_grad_for_row_gear(r), r) for r in modification_rows] + injection_queue = [(compute_grad_for_row_gear(r), r) for r in injection_rows] + prev_mod_queue = [(compute_grad_for_row_gear(r), r) for r in prev_mod_rows] + prev_inj_queue = [(compute_grad_for_row_gear(r), r) for r in prev_inj_rows] + # del_queue = [(compute_grad_for_row_gear(r), r) for r in del_rows] + + # deletion_queue = keep_D_logic(packet_level_data,image_no,del_rows,D,benign_can_id="043F") + # print("D before build queue deletion", D) + deletion_queue = build_deletion_queue(packet_level_data, image_no, del_rows,D) + + # # Sort descending + modification_queue.sort(key=lambda x: x[0], reverse=True) + injection_queue.sort(key=lambda x: x[0], reverse=True) + prev_mod_queue.sort(key=lambda x: x[0], reverse=True) + prev_inj_queue.sort(key=lambda x: x[0], reverse=True) + + # # Truncate injection queue + # if len(injection_queue) > max_injection_len: + # injection_queue = injection_queue[:max_injection_len] + + # if rounds >= 2 : + # injection_queue.clear() + + if verbose: + print(f"[INFO] modification_queue size: {len(modification_queue)}") + print(f"[INFO] injection_queue size: {len(injection_queue)}") + print(f"[INFO] prev_modification_queue size: {len(prev_mod_queue)}") + print(f"[INFO] preV_injection_queue size: {len(prev_inj_queue)}") + print(f"[INFO] deletion_queue size: {len(deletion_queue)}") + + return deque(modification_queue), deque(injection_queue), deque(prev_mod_queue), deque(prev_inj_queue), deque(deletion_queue) + +def evaluation_metrics(all_preds, all_labels,folder, filename): + + # Generate confusion matrix + # Print debug information + print("Number of predictions:", len(all_preds)) + print("Unique predictions:", np.unique(all_preds, return_counts=True)) + print("Unique labels:", np.unique(all_labels, return_counts=True)) + + cm = confusion_matrix(all_labels, all_preds) + print("Confusion Matrix:\n", cm) + + # Display confusion matrix + disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=[0, 1]) + disp.plot(cmap=plt.cm.Blues) + plt.title('Confusion Matrix') + + output_path = os.path.join(folder, filename) + os.makedirs(folder, exist_ok=True) + + plt.savefig(output_path, dpi=300) + plt.close() + + # os.makedirs(folder, exist_ok=True) + # output_path = os.path.join(folder, filename) + # os.makedirs(os.path.dirname(output_path), exist_ok=True) + + # plt.savefig(output_path, dpi=300) + + # plt.savefig(output_path, dpi=300) + # plt.show() + + # plt.savefig('./CF_Results/DoS/old/TST.png', dpi=300) + # plt.show() + + + # Now you can access the true negatives and other metrics + true_negatives = cm[0, 0] + false_positives = cm[0, 1] + false_negatives = cm[1, 0] + true_positives = cm[1, 1] + + # Calculate metrics with safe division + tnr = true_negatives / (true_negatives + false_positives) if (true_negatives + false_positives) > 0 else 0.0 + mdr = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0.0 + IDS_accu = accuracy_score(all_labels, all_preds) + IDS_prec = precision_score(all_labels, all_preds, zero_division=0) + IDS_recall = recall_score(all_labels, all_preds, zero_division=0) + IDS_F1 = f1_score(all_labels, all_preds, zero_division=0) + # Number of attack packets misclassified as benign (all_labels == 0 and all_preds == 1) + misclassified_attack_packets = ((all_labels == 1) & (all_preds == 0)).sum().item() + + # Total number of original attack packets (all_labels == 0) + total_attack_packets = (all_labels == 1).sum().item() + + oa_asr = misclassified_attack_packets / total_attack_packets + + return tnr, mdr, oa_asr, IDS_accu, IDS_prec, IDS_recall, IDS_F1 + +def Attack_procedure(model, device, test_loader,output_path,existing_hex_ids, start_image_number, packet_level_data,rounds): + all_preds = [] + all_labels = [] + n_image = start_image_number + + # summary_path = os.path.join(output_path, f"perturbation_summary_{rounds}.csv") + # csv_file = open(summary_path, "w") + # csv_file.write("image_name, target_label, injection_count, modification_count, final_prediction_label, model_feedback\n") + + + # rgb_pattern = [(0.0, 0.0, 0.0) if bit == '0' else (1.0, 1.0, 1.0) for bit in bit_pattern] + + for data, target in test_loader: + # print(f"Current target shape: {target.shape}, value: {target}") + data, target = data.to(device), target.to(device) + + # If target is a 1D tensor, no need for item() + current_target = target[0] if target.dim() > 0 else target + # feedback = 0 + + # Initialize predictions for benign images (target=0) + initial_output = model(data) + # feedback += 1 + final_pred = initial_output.max(1, keepdim=True)[1] + # Initialize perturbation counts + injection_count = 0 + modification_count = 0 + del_count = 0 + prev_mod_count = 0 + prev_inj_count = 0 + # Perform perturbation for predicted attack images + if current_target == 1: + print("\nImage no:", n_image, "(Attack image)") + + data.requires_grad = True + model.eval() + + initial_output = model(data) + loss = F.nll_loss(initial_output, target) + model.zero_grad(set_to_none=True) + loss.backward() + data_grad = data.grad.data + model.zero_grad(set_to_none=True) # clean up + data_denorm = data + + + + # continue_perturbation = True + if rounds == 0: + n_attack_current = ((packet_level_data["image_no"] == n_image) & (packet_level_data["original_label"] == "A")).sum() + # print("n in image no ", n_image) + I = 0 + # M = math.ceil(0.75 * len(modification_queue)) + M = 0 + Pm = 0 + Pi = 0 + # D = 0 + # Deletion budget (generalized rule) + if n_attack_current < 3: + D = 0 + else: + D = n_attack_current // 2 + + # not_D = 2 + print("I, M, Pi, Pm and D for round 0", I,M,Pi,Pm,D) + + elif rounds == 1: + n_attack_current = ((packet_level_data["image_no"] == n_image) & (packet_level_data["original_label"] == "A")).sum() + M = 0 + Pm = 0 + Pi = 0 + D = 0 #how many to shift is decided based on modified packets in prev round. + if n_attack_current <= 2: + I = 1 + else: + I = 1 + + print("I, M, Pi, Pm and S for round 1", I,M,Pi,Pm,D) + elif rounds == 2: + n_attack_current = ((packet_level_data["image_no"] == n_image) & (packet_level_data["original_label"] == "A") & (packet_level_data["operation_label"] != "I")).sum() + I = 0 + # M = math.ceil(0.5*n_attack_current) #these many were shifted in prev round and selected for mmodification in this round. + Pi = 2 + Pm = 0 + D = 0 + if n_attack_current < 3: + M = 0 + else: + M = 1 + + print("I, M, Pi, Pm for round 2", I,M,Pi,Pm) + elif rounds >= 3: + n_attack_current = ((packet_level_data["image_no"] == n_image) & (packet_level_data["original_label"] == "A")).sum() + I = 0 + M = 0 + Pi = 5 + Pm = 5 + D = 0 + print("I, M, Pi, Pm for round>=2", I,M,Pi,Pm,D) + # # elif rounds == 4: + # n_attack_current = ((packet_level_data["image_no"] == n_image) & (packet_level_data["original_label"] == "A")).sum() + # I = 0 + # M = math.ceil(0.25*n_attack_current) + # Pm = 0 + # Pi = 0 + # D = 0 + # print("I, M, Pi, Pm for round>=2", I,M,Pi,Pm,D) + + + + modification_queue, injection_queue, prev_mod_queue, prev_inj_queue, deletion_queue = build_queues(data_denorm, n_image, data_grad,packet_level_data,rounds,D) + num_inj = len(injection_queue) + num_mod = len(modification_queue) + num_prev_mod = len(prev_mod_queue) + num_prev_inj = len(prev_inj_queue) + num_del = len(deletion_queue) + # D = num_del + perturbed_data = data_denorm.clone().detach().to(device) + perturbed_data.requires_grad = True + + model.eval() + + final_pred, data_denorm, packet_level_data, = perform_perturbation(model,data_grad, perturbed_data, existing_hex_ids, packet_level_data, n_image, modification_queue, injection_queue, prev_mod_queue, prev_inj_queue,deletion_queue,rounds,I,M,Pi,Pm,D) + + injection_count = num_inj - len(injection_queue) + modification_count = num_mod - len(modification_queue) + prev_mod_count = num_prev_mod - len(prev_mod_queue) + prev_inj_count = num_prev_inj - len(prev_inj_queue) + del_count = num_del - len(deletion_queue) + + saving_image(data_denorm, n_image,output_path) + else: + # data.requires_grad = True + model.eval() + with torch.no_grad(): + initial_output = model(data) + final_pred = initial_output.max(1, keepdim=True)[1] + + print(f"Image {n_image}: Benign Image (Skipping Perturbation)") + saving_image(data, n_image,output_path) + + print(f"Final perturbations: Injection={injection_count}, Modification={modification_count}, Prev_inj={prev_inj_count}, Prev_mod={prev_mod_count}, Del_count={del_count} \n") + print(f"Image {n_image}, Truth Labels {target.item()}, Final Pred {final_pred.cpu().numpy()}") + + # all_preds.extend(final_pred.cpu().numpy()) + # all_labels.extend(target.cpu().numpy()) + all_preds.append(final_pred.item()) + all_labels.append(target.item()) + + # image_name = f"image_{n_image}.png" + # target_label = target.item() + # final_label = final_pred.item() + + # csv_file.write(f"{image_name}, {target_label}, {injection_count}, {modification_count}, {final_label}, {feedback}\n") + n_image += 1 + + + all_preds = np.array(all_preds) + all_labels = np.array(all_labels) + # csv_file.close() + + # return all_preds.squeeze(), all_labels, packet_level_data + return all_preds, all_labels, packet_level_data + + +def run(params): + + test_dataset_dir = params["test_data_dir"] + # os.makedirs(test_dataset_dir, exist_ok=True) + # print(test_dataset_dir) + test_label_file = params["test_label_file"] + output_path = params["output_path"] + rounds = params["rounds"] + packet_level_data = params["packet_level_data"] + model_path = params["model_path"] + + + os.makedirs(output_path, exist_ok=True) + folder = os.path.join("CF_Results", output_path) + # filename = f"{output_path}.png" + filename = f"perturbed_spoof_no_data.png" + model_type = "densenet161" + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + # existing_hex_ids = ['0130', '0002', '0131', '0140', '018f', + # '02c0', '0370', '0316', '0153', '043f', '0260', + # '02a0', '0350', '0440', '0329', '0545', '0430', + # '01f1', '04b1', '04f0', '05f0', '00a0', '00a1', + # '0690', '05a0', '05a2'] + + existing_hex_ids = ['007F', '0130', '0140', '0153', '0164', '0220', '0251', '02B0', '0340', '0371', '0372', + '0381', '0386', '0387', '0389', '038D', '0394', '0410', '0412', '0420', '0421', '0436', '0470', '047F', + '0485', '0490', '0495', '04A2', '04C9', '04F1', '0500', '0507', '050A', '050B', '050C', '050E', '0520', '052A', '0541', '0544', + '054B', '0553', '0559', '057F', '0592', '0593', '0595', '0596', '05B0', '05BE', '05C1', '05E3', '05FF'] + + + + + # Clean up all column names: strip spaces, remove BOMs + + # Read CSV + packet_level_data = pd.read_csv(packet_level_data, dtype=str, low_memory=False) + + # Strip column names FIRST before anything else + packet_level_data.columns = packet_level_data.columns.str.strip() + + # Fill NaN values + packet_level_data = packet_level_data.fillna("None") + + # Type casting + packet_level_data["row_no"] = packet_level_data["row_no"].astype(int) + packet_level_data["timestamp"] = packet_level_data["timestamp"].astype(float) + packet_level_data["image_no"] = packet_level_data["image_no"].astype(int) + packet_level_data["valid_flag"] = packet_level_data["valid_flag"].astype(int) + + # Round 0 label setup + if rounds == 0: + print("in round 0") + # 1. Rename the column + packet_level_data = packet_level_data.rename(columns={"label": "original_label"}) + + # 2. Map integer-string values (CSV read as str, so map "0"/"1" not 0/1) + packet_level_data["original_label"] = packet_level_data["original_label"].map({"0": "B", "1": "A"}) + + # 3. Initialize operation label + packet_level_data["operation_label"] = "None" + +# Load dataset ... + #Load dataset + image_datasets, test_loader, start_image_number = load_dataset(test_dataset_dir,test_label_file,device,is_train=False) + print("loaded test dataset") + + #load the model + model = load_model(model_path) + + # bit_pattern = "0000000000000001000" # for matching the packets/rows to modify + + + # List of max_perturbations to iterate over + st = time.time() + print("Start time:", st) + # Call the attack procedure + preds, labels, packet_level_data = Attack_procedure(model, device, test_loader,output_path,existing_hex_ids, start_image_number, packet_level_data,rounds) + et = time.time() + print("End time:", et) + # print("Labels:", labels) + # print("Predictions:", preds) + + tnr, mdr, oa_asr, IDS_accu, IDS_prec, IDS_recall,IDS_F1 = evaluation_metrics(preds, labels,folder,filename) + print("----------------IDS Perormance Metric----------------") + print(f'Accuracy: {IDS_accu:.4f}') + print(f'Precision: {IDS_prec:.4f}') + print(f'Recall: {IDS_recall:.4f}') + print(f'F1 Score: {IDS_F1:.4f}') + print("----------------Adversarial attack Perormance Metric----------------") + print("TNR:", tnr) + print("Malcious Detection Rate:", mdr) + print("Attack Success Rate:", oa_asr) + print("Execution Time:", et-st) + + # Force timestamp precision ONLY + packet_level_data["timestamp"] = packet_level_data["timestamp"].map(lambda x: f"{x:.6f}") + int_cols = ["row_no", "image_no", "valid_flag"] + for c in int_cols: + if c in packet_level_data.columns: + packet_level_data[c] = packet_level_data[c].astype(int) + + packet_level_data.to_csv(os.path.join(output_path, f"packet_level_data_{rounds}.csv"), index=False) + + + + + +if __name__ == "__main__": + + import argparse + import yaml + + parser = argparse.ArgumentParser() + parser.add_argument("--config", type=str, default="config_spoof_mirgu.yaml") + args = parser.parse_args() + + cfg = yaml.safe_load(open(args.config)) + + # Ensure attack section exists + if "attack" not in cfg: + raise ValueError("Config file must contain 'attack' section.") + + run(cfg["attack"]) + + + #changes: 06/02/2026 (no data) + # 1. changed the compute_grad_row_for_spoof(), no need to take data for grad, only ID + # 2. bit_flip_attack_rgb() only flip ID, nothing for data + # 3. gradient_perturbation(), no need to calcualte crc, crc = '0'*15 \ No newline at end of file diff --git a/MIRGU_Entropy/scripts/evaluate_dos_mirgu.py b/MIRGU_Entropy/scripts/evaluate_dos_mirgu.py new file mode 100644 index 0000000..6f9e7da --- /dev/null +++ b/MIRGU_Entropy/scripts/evaluate_dos_mirgu.py @@ -0,0 +1,395 @@ +#!/usr/bin/env python3 +""" +MIRGU DoS Entropy Evaluation Script +Description: Detects attacks based on Shannon Entropy of CAN payloads. +Replaces the Inception-ResNet target model with entropy-based detection. +Follows the exact same pipeline structure as evaluate_dos_mirgu.py +""" +import os +os.environ["CUDA_VISIBLE_DEVICES"] = "-1" + +import numpy as np +import csv +import pandas as pd +import yaml +from sklearn.metrics import confusion_matrix, classification_report +import matplotlib.pyplot as plt +import itertools +from sklearn.metrics import roc_auc_score, balanced_accuracy_score, recall_score +from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score + + +# --------------------------------------------------------- +# MIRGU DoS ENTROPY CONSTANTS +# (Computed from benign-only training data: +# dos_dataset/Mirgu_DoS_arbitrated_data/Target/train.csv) +# --------------------------------------------------------- +TRAIN_MEAN = 8.1876 +TRAIN_STD = 0.1162 +K = 3.5 +WINDOW = 0.0376 +LOWER = TRAIN_MEAN - K * TRAIN_STD # 7.7809 +UPPER = TRAIN_MEAN + K * TRAIN_STD # 8.5943 + + +# --------------------------------------------------------- +# Helper: Safe Hex/Int Parser +# --------------------------------------------------------- +def parse_hex(x): + try: + if pd.isna(x) or str(x).strip() == "": + return 0 + s = str(x).strip() + if '.' in s: + return int(float(s)) + return int(s, 16) + except: + return 0 + + +# --------------------------------------------------------- +# Preprocessing +# --------------------------------------------------------- +def preprocess_dataframe(df): + print(f" -> Raw data shape: {df.shape}") + + df.columns = df.columns.str.strip() + + # 1. Standardize Timestamp + ts_col = next((c for c in ["Timestamp", "timestamp", "Time", "TimeStamp", "time"] + if c in df.columns), None) + if ts_col is None: + ts_col = df.columns[0] + + df = df.rename(columns={ts_col: "Timestamp"}) + df["Timestamp"] = pd.to_numeric(df["Timestamp"], errors="coerce") + df.dropna(subset=["Timestamp"], inplace=True) + + # 2. Standardize ID + if "ID" in df.columns and "can_id" not in df.columns: + df = df.rename(columns={"ID": "can_id"}) + if "can_id" not in df.columns: + df.rename(columns={df.columns[1]: "can_id"}, inplace=True) + df["can_id"] = df["can_id"].apply(parse_hex) + + # 3. Standardize DLC + if "DLC" in df.columns and "dlc" not in df.columns: + df = df.rename(columns={"DLC": "dlc"}) + df["dlc"] = pd.to_numeric(df["dlc"], errors="coerce").fillna(0).astype(int) + + # 4. Standardize Payload + payload_cols = ["d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7"] + for c in payload_cols: + if c not in df.columns: + df[c] = 0 + df[payload_cols] = df[payload_cols].fillna(0) + for c in payload_cols: + df[c] = df[c].apply(parse_hex) + + df["payload"] = df[payload_cols].values.tolist() + + # 5. Standardize Label + if "label" not in df.columns: + df["label"] = 0 + + df["label"] = df["label"].astype(str).str.upper().map({ + "B": 0, "R": 0, "0": 0, "BENIGN": 0, "NAN": 0, "NONE": 0, + "T": 1, "A": 1, "1": 1, "ATTACK": 1 + }).fillna(0).astype(int) + + df.sort_values("Timestamp", inplace=True) + df.reset_index(drop=True, inplace=True) + return df + + +# --------------------------------------------------------- +# Windowing +# --------------------------------------------------------- +def split_into_windows(df, window_size): + if df.empty: + return [], np.array([]), [] + start, end = df["Timestamp"].min(), df["Timestamp"].max() + windows, labels, indices = [], [], [] + t = start + while t <= end: + w = df[(df["Timestamp"] >= t) & (df["Timestamp"] < t + window_size)] + if not w.empty: + windows.append(w) + labels.append(int((w["label"] == 1).any())) + indices.append(w.index) + t += window_size + return windows, np.array(labels), indices + + +# --------------------------------------------------------- +# Entropy Calculation +# --------------------------------------------------------- +def calculate_entropy(windows): + ent = [] + for w in windows: + symbols = [] + for _, r in w.iterrows(): + for i, v in enumerate(r["payload"]): + symbols.append((r["can_id"], r["dlc"], i, v)) + if not symbols: + ent.append(0.0) + continue + _, c = np.unique(symbols, axis=0, return_counts=True) + p = c / c.sum() + ent.append(-np.sum(p * np.log2(p))) + return np.array(ent) + + +# --------------------------------------------------------- +# Save Predictions & Update Tracksheet +# (Same logic as evaluate_dos_mirgu.py save_preds) +# --------------------------------------------------------- +def save_preds(pass_num, tracksheet, traffic_rows, output_path, preds): + """ + traffic_rows: list of raw CSV rows (from build-like reading) + preds: array of window predictions (0/1) — NOT frame-level here, + so we map window preds back to packets differently. + + For entropy: we write packet-level output + update tracksheet. + """ + + # --- Write detailed prediction output CSV --- + output_rows = [] + # We don't have 29-packet frames here; instead we have + # window_indices mapping. This function is called with + # (df, window_indices, preds) — see run() below. + # We handle it inside run() and call this for tracksheet update only. + + # --- Update tracksheet (same logic as evaluate_dos_mirgu.py) --- + print(f"-> Updating tracksheet: {tracksheet}") + try: + df = pd.read_csv(tracksheet, dtype=str, low_memory=False) + except FileNotFoundError: + print(f"[ERROR] Tracksheet {tracksheet} not found.") + return + + df.columns = df.columns.str.strip() + df = df.fillna("None") + + df["row_no"] = df["row_no"].astype(int) + df["timestamp"] = df["timestamp"].astype(float) + df["image_no"] = df["image_no"].astype(int) + df["valid_flag"] = df["valid_flag"].astype(int) + + # pred_labels passed in via traffic_rows argument (reused param) + pred_labels = traffic_rows # list of "A"/"B" strings + + n_df = len(df) + n_pred = len(pred_labels) + + # Handle mismatch safely + if n_pred < n_df: + print(f"[WARN] pred_labels shorter than packet CSV: {n_pred} vs {n_df}. " + f"Filling remaining using operation_label.") + for i in range(n_pred, n_df): + op = str(df.iloc[i]["operation_label"]).strip().upper() + if op == "NONE": + pred_labels.append("B") + else: + pred_labels.append("A") + elif n_pred > n_df: + print(f"[WARN] pred_labels longer than packet CSV: {n_pred} vs {n_df}. " + f"Truncating extra predictions.") + pred_labels = pred_labels[:n_df] + + assert len(pred_labels) == n_df + + df["pred_label"] = pred_labels + + # Format timestamp + df["timestamp"] = df["timestamp"].map(lambda x: f"{x:.6f}") + + # Enforce integer columns + int_cols = ["row_no", "image_no", "valid_flag"] + for c in int_cols: + if c in df.columns: + df[c] = df[c].astype(int) + + tracksheet_dir = "tracksheets_mirgu" + os.makedirs(tracksheet_dir, exist_ok=True) + + new_tracksheet = os.path.join(tracksheet_dir, f"dos_test_track_{pass_num}.csv") + df.to_csv(new_tracksheet, index=False) + + print(f"Saved updated packet-level CSV -> {new_tracksheet} " + f"(rows={n_df}, preds={len(pred_labels)})") + + +# --------------------------------------------------------- +# Confusion Matrix Plot +# --------------------------------------------------------- +def plot_confusion(cm, pass_num, y_test, preds): + plt.imshow(cm, cmap='Blues') + plt.title("Confusion Matrix - DOS (Entropy)") + plt.colorbar() + ticks = ["Benign", "Attack"] + plt.xticks(range(2), ticks) + plt.yticks(range(2), ticks) + + for i, j in itertools.product(range(2), range(2)): + plt.text(j, i, f"{cm[i,j]}", + ha="center", + color="white" if cm[i,j] > np.max(cm)/2 else "black") + + plt.ylabel("True") + plt.xlabel("Predicted") + plt.tight_layout() + + os.makedirs("./CF_target", exist_ok=True) + plt.savefig("./CF_target/mirgu_dos_entropy_cf_pass_{}.png".format(pass_num)) + plt.close() + + # Extract confusion matrix elements + TN, FP, FN, TP = cm.ravel() + + # Metrics + accuracy = accuracy_score(y_test, preds) + precision = precision_score(y_test, preds, pos_label=1, zero_division=0) + rec = recall_score(y_test, preds, pos_label=1, zero_division=0) + f1 = f1_score(y_test, preds, pos_label=1, zero_division=0) + tpr = TP / (TP + FN) if (TP + FN) > 0 else 0 + tnr = TN / (TN + FP) if (TN + FP) > 0 else 0 + fpr = FP / (FP + TN) if (FP + TN) > 0 else 0 + fnr = FN / (TP + FN) if (TP + FN) > 0 else 0 + balanced_acc = balanced_accuracy_score(y_test, preds) + try: + auc = roc_auc_score(y_test, preds) + except: + auc = 0.0 + + print("\n--------------- PERFORMANCE METRICS ----------------") + print("Accuracy:", accuracy) + print("Precision:", precision) + print("Recall / TPR:", rec) + print("True Negative Rate (TNR):", tnr) + print("False Positive Rate (FPR):", fpr) + print("False Negative Rate (FNR):", fnr) + print("F1 Score:", f1) + print("Balanced Accuracy:", balanced_acc) + print("ROC AUC:", auc) + print("---------------------------------------------------\n") + + print("Confusion Matrix (Raw Values):") + print(cm) + print(f"TP={TP}, TN={TN}, FP={FP}, FN={FN}") + + +# --------------------------------------------------------- +# MAIN +# --------------------------------------------------------- +def run(params): + + rounds = params["rounds"] + traffic_path = params["traffic_path"] + tracksheet = params["tracksheet"] + output_path = params["output_path"] + + print(f"\nDataset: MIRGU DoS (Entropy) | Mean: {TRAIN_MEAN} | Std: {TRAIN_STD} " + f"| K: {K} | Window: {WINDOW}") + print(f"Thresholds: Lower={LOWER:.4f}, Upper={UPPER:.4f}") + + # 1. Load & Preprocess + print(f"\n--- Loading Data: {traffic_path} ---") + if not os.path.exists(traffic_path): + print(f"CRITICAL ERROR: File {traffic_path} not found.") + return + + try: + # Try reading with header first + df = pd.read_csv(traffic_path, on_bad_lines='skip', low_memory=False) + df.columns = df.columns.str.strip() + + # If first column looks like a header string, it has a header already + # If not, re-read without header + if df.columns[0] not in ["Timestamp", "timestamp", "Time", "time"]: + col_names = ["Timestamp", "can_id", "dlc", + "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "label"] + df = pd.read_csv(traffic_path, delimiter=',', header=None, + names=col_names, on_bad_lines='skip', low_memory=False) + df.columns = df.columns.str.strip() + + # Safety: if first row is actually a header string + if str(df.iloc[0]["Timestamp"]).lower() in ["timestamp", "time"]: + df = df.iloc[1:].reset_index(drop=True) + + except Exception as e: + print(f"Error reading CSV: {e}") + return + + try: + df = preprocess_dataframe(df) + except KeyError as e: + print(f"Preprocessing Error: {e}") + return + + if df.empty: + print("Error: DataFrame is empty.") + return + + # 2. Windowing + print("\n--- Splitting into Time Windows ---") + windows, y_test, window_indices = split_into_windows(df, WINDOW) + + print("\nWINDOW DISTRIBUTION") + print("-----------------------------------") + print(f"Total Windows: {len(y_test)}") + print(f"Benign: {(y_test == 0).sum()}") + print(f"Attack: {(y_test == 1).sum()}") + print("-----------------------------------\n") + + if not windows: + print("Error: No windows created.") + return + + # 3. Calculate Entropy + print("--- Calculating Entropy ---") + ent = calculate_entropy(windows) + + # 4. Prediction (Outlier Detection) + print(f"Applying Thresholds: Lower={LOWER:.4f}, Upper={UPPER:.4f}") + preds = ((ent < LOWER) | (ent > UPPER)).astype(int) + + # 5. Evaluate & Plot + cm = confusion_matrix(y_test, preds) + plot_confusion(cm, rounds, y_test, preds) + + print(f"\nSaved confusion matrix: mirgu_dos_entropy_cf_pass_{rounds}.png\n") + + # 6. Map window predictions back to packet-level pred_labels + df["pred_label"] = "B" + for i, idxs in enumerate(window_indices): + if preds[i] == 0: + df.loc[idxs, "pred_label"] = "B" + else: + df.loc[idxs, "pred_label"] = df.loc[idxs, "label"].map({1: "A", 0: "B"}) + + # 7. Save detailed prediction output + df_out = df.drop(columns=["payload"], errors='ignore') + os.makedirs(os.path.dirname(output_path) if os.path.dirname(output_path) else ".", exist_ok=True) + df_out.to_csv(output_path, index=False) + print("Saved detailed prediction results ->", output_path) + + # 8. Update tracksheet (same logic as evaluate_dos_mirgu.py) + pred_labels = df["pred_label"].tolist() + save_preds(rounds, tracksheet, pred_labels, output_path, preds) + + +if __name__ == "__main__": + + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("--config", type=str, default="config_dos_mirgu.yaml") + args = parser.parse_args() + + cfg = yaml.safe_load(open(args.config)) + + if "evaluate" not in cfg: + raise ValueError("Config file must contain 'evaluate' section.") + + run(cfg["evaluate"]) diff --git a/MIRGU_Entropy/scripts/evaluate_spoof_mirgu.py b/MIRGU_Entropy/scripts/evaluate_spoof_mirgu.py new file mode 100644 index 0000000..9e54c48 --- /dev/null +++ b/MIRGU_Entropy/scripts/evaluate_spoof_mirgu.py @@ -0,0 +1,382 @@ +#!/usr/bin/env python3 +""" +MIRGU Spoof Entropy Evaluation Script +Description: Detects attacks based on Shannon Entropy of CAN payloads. +Replaces the Inception-ResNet target model with entropy-based detection. +Follows the exact same pipeline structure as evaluate_spoof_mirgu.py +""" +import os +os.environ["CUDA_VISIBLE_DEVICES"] = "-1" + +import numpy as np +import csv +import pandas as pd +import yaml +from sklearn.metrics import confusion_matrix, classification_report +import matplotlib.pyplot as plt +import itertools +from sklearn.metrics import roc_auc_score, balanced_accuracy_score, recall_score +from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score + + +# --------------------------------------------------------- +# MIRGU Spoof ENTROPY CONSTANTS +# (Computed from benign-only training data: +# spoof_dataset/Mirgu_break_spoof_arbitrated_data/Target/train.csv) +# --------------------------------------------------------- +TRAIN_MEAN = 8.1876 +TRAIN_STD = 0.1162 +K = 3.5 +WINDOW = 0.0376 +LOWER = TRAIN_MEAN - K * TRAIN_STD # 7.7809 +UPPER = TRAIN_MEAN + K * TRAIN_STD # 8.5943 + + +# --------------------------------------------------------- +# Helper: Safe Hex/Int Parser +# --------------------------------------------------------- +def parse_hex(x): + try: + if pd.isna(x) or str(x).strip() == "": + return 0 + s = str(x).strip() + if '.' in s: + return int(float(s)) + return int(s, 16) + except: + return 0 + + +# --------------------------------------------------------- +# Preprocessing +# --------------------------------------------------------- +def preprocess_dataframe(df): + print(f" -> Raw data shape: {df.shape}") + + df.columns = df.columns.str.strip() + + # 1. Standardize Timestamp + ts_col = next((c for c in ["Timestamp", "timestamp", "Time", "TimeStamp", "time"] + if c in df.columns), None) + if ts_col is None: + ts_col = df.columns[0] + + df = df.rename(columns={ts_col: "Timestamp"}) + df["Timestamp"] = pd.to_numeric(df["Timestamp"], errors="coerce") + df.dropna(subset=["Timestamp"], inplace=True) + + # 2. Standardize ID + if "ID" in df.columns and "can_id" not in df.columns: + df = df.rename(columns={"ID": "can_id"}) + if "can_id" not in df.columns: + df.rename(columns={df.columns[1]: "can_id"}, inplace=True) + df["can_id"] = df["can_id"].apply(parse_hex) + + # 3. Standardize DLC + if "DLC" in df.columns and "dlc" not in df.columns: + df = df.rename(columns={"DLC": "dlc"}) + df["dlc"] = pd.to_numeric(df["dlc"], errors="coerce").fillna(0).astype(int) + + # 4. Standardize Payload + payload_cols = ["d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7"] + for c in payload_cols: + if c not in df.columns: + df[c] = 0 + df[payload_cols] = df[payload_cols].fillna(0) + for c in payload_cols: + df[c] = df[c].apply(parse_hex) + + df["payload"] = df[payload_cols].values.tolist() + + # 5. Standardize Label + if "label" not in df.columns: + df["label"] = 0 + + df["label"] = df["label"].astype(str).str.upper().map({ + "B": 0, "R": 0, "0": 0, "BENIGN": 0, "NAN": 0, "NONE": 0, + "T": 1, "A": 1, "1": 1, "ATTACK": 1, "SPOOF": 1 + }).fillna(0).astype(int) + + df.sort_values("Timestamp", inplace=True) + df.reset_index(drop=True, inplace=True) + return df + + +# --------------------------------------------------------- +# Windowing +# --------------------------------------------------------- +def split_into_windows(df, window_size): + if df.empty: + return [], np.array([]), [] + start, end = df["Timestamp"].min(), df["Timestamp"].max() + windows, labels, indices = [], [], [] + t = start + while t <= end: + w = df[(df["Timestamp"] >= t) & (df["Timestamp"] < t + window_size)] + if not w.empty: + windows.append(w) + labels.append(int((w["label"] == 1).any())) + indices.append(w.index) + t += window_size + return windows, np.array(labels), indices + + +# --------------------------------------------------------- +# Entropy Calculation +# --------------------------------------------------------- +def calculate_entropy(windows): + ent = [] + for w in windows: + symbols = [] + for _, r in w.iterrows(): + for i, v in enumerate(r["payload"]): + symbols.append((r["can_id"], r["dlc"], i, v)) + if not symbols: + ent.append(0.0) + continue + _, c = np.unique(symbols, axis=0, return_counts=True) + p = c / c.sum() + ent.append(-np.sum(p * np.log2(p))) + return np.array(ent) + + +# --------------------------------------------------------- +# Save Predictions & Update Tracksheet +# (Same logic as evaluate_spoof_mirgu.py save_preds) +# --------------------------------------------------------- +def save_preds(pass_num, tracksheet, pred_labels_list, output_path, preds): + """ + pred_labels_list: list of "A"/"B" strings for each packet + Updates the tracksheet CSV with pred_label column. + """ + + print(f"-> Updating tracksheet: {tracksheet}") + try: + df = pd.read_csv(tracksheet, dtype=str, low_memory=False) + except FileNotFoundError: + print(f"[ERROR] Tracksheet {tracksheet} not found.") + return + + df.columns = df.columns.str.strip() + df = df.fillna("None") + + df["row_no"] = df["row_no"].astype(int) + df["timestamp"] = df["timestamp"].astype(float) + df["image_no"] = df["image_no"].astype(int) + df["valid_flag"] = df["valid_flag"].astype(int) + + pred_labels = pred_labels_list + + n_df = len(df) + n_pred = len(pred_labels) + + # Handle mismatch safely + if n_pred < n_df: + print(f"[WARN] pred_labels shorter than packet CSV: " + f"{n_pred} vs {n_df}. Filling remaining using operation_label.") + for i in range(n_pred, n_df): + op = str(df.iloc[i]["operation_label"]).strip().upper() + if op == "NONE": + pred_labels.append("B") + else: + pred_labels.append("A") + elif n_pred > n_df: + print(f"[WARN] pred_labels longer than packet CSV: " + f"{n_pred} vs {n_df}. Truncating extra predictions.") + pred_labels = pred_labels[:n_df] + + assert len(pred_labels) == n_df + + df["pred_label"] = pred_labels + + # Format timestamp + df["timestamp"] = df["timestamp"].map(lambda x: f"{x:.6f}") + + # Enforce integer columns + int_cols = ["row_no", "image_no", "valid_flag"] + for c in int_cols: + if c in df.columns: + df[c] = df[c].astype(int) + + tracksheet_dir = "tracksheets_mirgu" + os.makedirs(tracksheet_dir, exist_ok=True) + + new_tracksheet = os.path.join(tracksheet_dir, f"spoof_test_track_{pass_num}.csv") + df.to_csv(new_tracksheet, index=False) + + print(f"Saved updated packet-level CSV -> {new_tracksheet} " + f"(rows={n_df}, preds={len(pred_labels)})") + + +# --------------------------------------------------------- +# Confusion Matrix Plot +# --------------------------------------------------------- +def plot_confusion(cm, pass_num, y_test, preds): + plt.imshow(cm, cmap='Blues') + plt.title("Confusion Matrix - Spoof (Entropy)") + plt.colorbar() + ticks = ["Benign", "Attack"] + plt.xticks(range(2), ticks) + plt.yticks(range(2), ticks) + + for i, j in itertools.product(range(2), range(2)): + plt.text(j, i, f"{cm[i,j]}", + ha="center", + color="white" if cm[i,j] > np.max(cm)/2 else "black") + + plt.ylabel("True") + plt.xlabel("Predicted") + plt.tight_layout() + + os.makedirs("./CF_target", exist_ok=True) + plt.savefig("./CF_target/mirgu_spoof_entropy_pass_{}.png".format(pass_num)) + plt.close() + + # Extract confusion matrix elements + TN, FP, FN, TP = cm.ravel() + + # Metrics + accuracy = accuracy_score(y_test, preds) + precision = precision_score(y_test, preds, pos_label=1, zero_division=0) + rec = recall_score(y_test, preds, pos_label=1, zero_division=0) + f1 = f1_score(y_test, preds, pos_label=1, zero_division=0) + tpr = TP / (TP + FN) if (TP + FN) > 0 else 0 + tnr = TN / (TN + FP) if (TN + FP) > 0 else 0 + fpr = FP / (FP + TN) if (FP + TN) > 0 else 0 + fnr = FN / (TP + FN) if (TP + FN) > 0 else 0 + balanced_acc = balanced_accuracy_score(y_test, preds) + try: + auc = roc_auc_score(y_test, preds) + except: + auc = 0.0 + + print("\n--------------- PERFORMANCE METRICS ----------------") + print("Accuracy:", accuracy) + print("Precision:", precision) + print("Recall / TPR:", rec) + print("True Negative Rate (TNR):", tnr) + print("False Positive Rate (FPR):", fpr) + print("False Negative Rate (FNR):", fnr) + print("F1 Score:", f1) + print("Balanced Accuracy:", balanced_acc) + print("ROC AUC:", auc) + print("---------------------------------------------------\n") + + print("Confusion Matrix (Raw Values):") + print(cm) + print(f"TP={TP}, TN={TN}, FP={FP}, FN={FN}") + + +# --------------------------------------------------------- +# MAIN +# --------------------------------------------------------- +def run(params): + + rounds = params["rounds"] + traffic_path = params["traffic_path"] + tracksheet = params["tracksheet"] + output_path = params["output_path"] + + print(f"\nDataset: MIRGU Spoof (Entropy) | Mean: {TRAIN_MEAN} | Std: {TRAIN_STD} " + f"| K: {K} | Window: {WINDOW}") + print(f"Thresholds: Lower={LOWER:.4f}, Upper={UPPER:.4f}") + + # 1. Load & Preprocess + print(f"\n--- Loading Data: {traffic_path} ---") + if not os.path.exists(traffic_path): + print(f"CRITICAL ERROR: File {traffic_path} not found.") + return + + try: + # Try reading with header first + df = pd.read_csv(traffic_path, on_bad_lines='skip', low_memory=False) + df.columns = df.columns.str.strip() + + # If first column doesn't look like a timestamp header, re-read headerless + if df.columns[0] not in ["Timestamp", "timestamp", "Time", "time"]: + col_names = ["Timestamp", "can_id", "dlc", + "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "label"] + df = pd.read_csv(traffic_path, delimiter=',', header=None, + names=col_names, on_bad_lines='skip', low_memory=False) + df.columns = df.columns.str.strip() + + # Safety: if first row is actually a header string + if str(df.iloc[0]["Timestamp"]).lower() in ["timestamp", "time"]: + df = df.iloc[1:].reset_index(drop=True) + + except Exception as e: + print(f"Error reading CSV: {e}") + return + + try: + df = preprocess_dataframe(df) + except KeyError as e: + print(f"Preprocessing Error: {e}") + return + + if df.empty: + print("Error: DataFrame is empty.") + return + + # 2. Windowing + print("\n--- Splitting into Time Windows ---") + windows, y_test, window_indices = split_into_windows(df, WINDOW) + + print("\nWINDOW DISTRIBUTION") + print("-----------------------------------") + print(f"Total Windows: {len(y_test)}") + print(f"Benign: {(y_test == 0).sum()}") + print(f"Attack: {(y_test == 1).sum()}") + print("-----------------------------------\n") + + if not windows: + print("Error: No windows created.") + return + + # 3. Calculate Entropy + print("--- Calculating Entropy ---") + ent = calculate_entropy(windows) + + # 4. Prediction (Outlier Detection) + print(f"Applying Thresholds: Lower={LOWER:.4f}, Upper={UPPER:.4f}") + preds = ((ent < LOWER) | (ent > UPPER)).astype(int) + + # 5. Evaluate & Plot + cm = confusion_matrix(y_test, preds) + plot_confusion(cm, rounds, y_test, preds) + + print(f"\nSaved confusion matrix: mirgu_spoof_entropy_pass_{rounds}.png\n") + + # 6. Map window predictions back to packet-level pred_labels + df["pred_label"] = "B" + for i, idxs in enumerate(window_indices): + if preds[i] == 0: + df.loc[idxs, "pred_label"] = "B" + else: + df.loc[idxs, "pred_label"] = df.loc[idxs, "label"].map({1: "A", 0: "B"}) + + # 7. Save detailed prediction output + df_out = df.drop(columns=["payload"], errors='ignore') + os.makedirs(os.path.dirname(output_path) if os.path.dirname(output_path) else ".", exist_ok=True) + df_out.to_csv(output_path, index=False) + print("Saved detailed prediction results ->", output_path) + + # 8. Update tracksheet (same logic as evaluate_spoof_mirgu.py) + pred_labels = df["pred_label"].tolist() + save_preds(rounds, tracksheet, pred_labels, output_path, preds) + + +if __name__ == "__main__": + + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("--config", type=str, default="config_spoof_mirgu.yaml") + args = parser.parse_args() + + cfg = yaml.safe_load(open(args.config)) + + if "evaluate" not in cfg: + raise ValueError("Config file must contain 'evaluate' section.") + + run(cfg["evaluate"]) diff --git a/MIRGU_Entropy/scripts/networks/Inception_Resnet_V1.py b/MIRGU_Entropy/scripts/networks/Inception_Resnet_V1.py new file mode 100755 index 0000000..c195402 --- /dev/null +++ b/MIRGU_Entropy/scripts/networks/Inception_Resnet_V1.py @@ -0,0 +1,492 @@ +#!/usr/bin/env python3 +""" +Reduced Inception-ResNet V1 Model for Vehicle CAN Network Intrusion Detection + +This implementation creates a lightweight version of Inception-ResNet V1 architecture +specifically optimized for processing 29x29 binary CAN frame matrices. The model combines: +- Inception modules for multi-scale feature extraction +- Residual connections for gradient flow and training stability +- Aggressive dimensionality reduction for computational efficiency +- Binary classification for normal vs attack traffic detection + +Architecture Flow: +Input (29x29x1) → Stem → Inception-ResNet-A → Reduction-A → +Inception-ResNet-B → Reduction-B → Global Pooling → Dense → Output (2 classes) + +Key optimizations for CAN data: +- Reduced depth compared to standard Inception-ResNet +- Optimized for small input dimensions (29x29) +- Binary classification head for intrusion detection +""" +import os +os.environ['CUDA_VISIBLE_DEVICES'] = '' + +import tensorflow as tf +from tensorflow.keras.layers import (Input, Conv2D, MaxPooling2D, AveragePooling2D, + Concatenate, Add, Flatten, Dropout, Dense, Lambda) +from tensorflow.keras.models import Model +from tensorflow.keras.callbacks import Callback + +################################################### +# Custom Training Callback for Batch-Level Monitoring +################################################### +class BatchLossHistory(Callback): + """ + Custom Keras callback to record training loss at every batch iteration. + + This provides more granular monitoring than epoch-level tracking, allowing + for detailed analysis of training dynamics and convergence behavior. + Particularly useful for genetic algorithm experiments that need to track + training progress over iterations rather than epochs. + """ + + def on_train_begin(self, logs=None): + """ + Initialize tracking variables at the start of training. + + Args: + logs: Training logs dictionary (unused but required by Keras) + """ + self.batch_losses = [] # List to store (iteration, loss) tuples + self.iterations = 0 # Counter for total training iterations + + def on_batch_end(self, batch, logs=None): + """ + Record loss value after each training batch completes. + + Args: + batch: Current batch number within the epoch + logs: Dictionary containing batch metrics (loss, accuracy, etc.) + """ + self.iterations += 1 # Increment global iteration counter + # Store iteration number and corresponding loss value + self.batch_losses.append((self.iterations, logs.get('loss'))) + +################################################### +# Stem Block: Initial Feature Extraction +################################################### +def stem_block(inputs): + """ + Stem block for initial feature extraction from 29x29 CAN frame inputs. + + This block performs aggressive early feature extraction and dimensionality reduction: + 1. Extracts low-level features with small convolutions + 2. Reduces spatial dimensions while increasing channel depth + 3. Prepares features for subsequent Inception-ResNet blocks + + Architecture: + - Conv2D(64, 3x3, valid) → 29x29x1 → 27x27x64 + - Conv2D(64, 3x3, same) → 27x27x64 → 27x27x64 + - MaxPool2D(2x2, stride=2) → 27x27x64 → 13x13x64 + - Conv2D(128, 1x1, same) → 13x13x64 → 13x13x128 + + Args: + inputs: Input tensor of shape (batch_size, 29, 29, 1) + + Returns: + Tensor of shape (batch_size, 13, 13, 128) + """ + # First convolution with valid padding reduces spatial dimensions + # 29x29x1 → 27x27x64 (removes 2 pixels due to valid padding) + x = Conv2D(64, (3, 3), strides=1, padding='valid', activation='relu')(inputs) + + # Second convolution with same padding preserves spatial dimensions + # 27x27x64 → 27x27x64 (maintains size, extracts more complex features) + x = Conv2D(64, (3, 3), strides=1, padding='same', activation='relu')(x) + + # Max pooling for spatial downsampling (critical for computational efficiency) + # 27x27x64 → 13x13x64 (roughly halves spatial dimensions) + x = MaxPooling2D((2, 2), strides=2, padding='valid')(x) + + # 1x1 convolution to increase channel depth without affecting spatial dimensions + # 13x13x64 → 13x13x128 (doubles channel depth for richer feature representation) + x = Conv2D(128, (1, 1), strides=1, padding='same', activation='relu')(x) + + return x + +################################################### +# Inception-ResNet Block A: Multi-Scale Feature Extraction +################################################### +def inception_resnet_a_block(x, scale=0.1): + """ + Inception-ResNet-A block combining multi-scale convolutions with residual connections. + + This block performs parallel convolutions at different scales to capture features + at multiple receptive field sizes, then combines them with a residual connection + for improved gradient flow and training stability. + + Architecture branches: + - Branch 0: 1x1 conv (32 filters) → point-wise features + - Branch 1: 1x1 conv → 3x3 conv (32 filters) → local spatial features + - Branch 2: 1x1 conv → 3x3 conv → 3x3 conv (64 filters) → larger spatial features + + The residual connection adds the scaled combined branches back to the input, + enabling the network to learn incremental improvements to existing features. + + Args: + x: Input tensor of shape (batch_size, height, width, channels) + scale: Scaling factor for residual connection (0.1 for training stability) + + Returns: + Tensor with same spatial dimensions but potentially different channel depth + """ + # Branch 0: 1x1 convolution for point-wise feature extraction + # Captures channel-wise interactions without spatial aggregation + branch_0 = Conv2D(32, (1, 1), padding='same', activation='relu')(x) + + # Branch 1: 1x1 → 3x3 convolution chain for local spatial features + # 1x1 reduces channels, 3x3 captures local spatial patterns + branch_1 = Conv2D(32, (1, 1), padding='same', activation='relu')(x) + branch_1 = Conv2D(32, (3, 3), padding='same', activation='relu')(branch_1) + + # Branch 2: 1x1 → 3x3 → 3x3 convolution chain for larger receptive field + # Sequential 3x3 convolutions effectively create a 5x5 receptive field + # More efficient than direct 5x5 convolution + branch_2 = Conv2D(32, (1, 1), padding='same', activation='relu')(x) + branch_2 = Conv2D(48, (3, 3), padding='same', activation='relu')(branch_2) + branch_2 = Conv2D(64, (3, 3), padding='same', activation='relu')(branch_2) + + # Concatenate all branches along channel dimension + # Total channels: 32 + 32 + 64 = 128 + merged = Concatenate(axis=-1)([branch_0, branch_1, branch_2]) + + # 1x1 convolution to match input channel dimensions for residual addition + # This projection layer ensures dimensional compatibility + up = Conv2D(tf.keras.backend.int_shape(x)[-1], (1, 1), padding='same')(merged) + + # Scale the residual branch for training stability + # Scaling factor (0.1) prevents residual branch from dominating early in training + up = Lambda(lambda s: s * scale)(up) + + # Residual connection: add scaled features to input + # This enables gradient flow and allows learning of incremental improvements + x = Add()([x, up]) + + # Apply activation after residual addition + # ReLU activation introduces non-linearity after feature combination + x = tf.keras.layers.Activation('relu')(x) + + return x + +################################################### +# Reduction Block A: Spatial Downsampling with Feature Expansion +################################################### +def reduction_a_block(x): + """ + Reduction-A block for spatial downsampling while expanding channel depth. + + This block reduces spatial dimensions (width/height) while increasing the number + of feature channels. Multiple parallel branches ensure that information is + preserved during downsampling through different aggregation strategies. + + Architecture branches: + - Branch 0: Max pooling → preserves dominant features + - Branch 1: Direct 3x3 conv with stride=2 → learned downsampling + - Branch 2: 1x1 → 3x3 → 3x3 conv chain → complex feature extraction before downsampling + + Args: + x: Input tensor (typically 13x13x128 from stem block) + + Returns: + Tensor with reduced spatial dimensions and increased channels (6x6x448) + """ + # Branch 0: Max pooling for dominant feature preservation + # Stride=2 reduces spatial dimensions by half: 13x13 → 6x6 + # Preserves existing channel depth (128) + branch_0 = MaxPooling2D((3, 3), strides=2, padding='valid')(x) + + # Branch 1: Direct convolution with stride=2 for learned downsampling + # Simultaneously reduces spatial dimensions and extracts new features + # 13x13x128 → 6x6x160 + branch_1 = Conv2D(160, (3, 3), strides=2, padding='valid', activation='relu')(x) + + # Branch 2: Multi-stage convolution chain for complex feature extraction + # 1x1 conv reduces channels for computational efficiency + branch_2 = Conv2D(128, (1, 1), padding='same', activation='relu')(x) + # 3x3 conv with same padding maintains spatial dimensions + branch_2 = Conv2D(160, (3, 3), strides=1, padding='same', activation='relu')(branch_2) + # Final 3x3 conv with stride=2 for downsampling: 13x13 → 6x6 + branch_2 = Conv2D(160, (3, 3), strides=2, padding='valid', activation='relu')(branch_2) + + # Concatenate all branches along channel dimension + # Total channels: 128 (branch_0) + 160 (branch_1) + 160 (branch_2) = 448 + x = Concatenate(axis=-1)([branch_0, branch_1, branch_2]) + + return x + +################################################### +# Inception-ResNet Block B: High-Level Feature Processing +################################################### +def inception_resnet_b_block(x, scale=0.1): + """ + Inception-ResNet-B block for high-level feature extraction with asymmetric convolutions. + + This block operates on higher-level features (post-reduction) and uses asymmetric + convolutions (1x7 and 7x1) to capture elongated patterns efficiently. The asymmetric + approach is more parameter-efficient than square convolutions for certain patterns. + + Architecture branches: + - Branch 0: 1x1 conv (192 filters) → channel-wise feature extraction + - Branch 1: 1x1 → 1x7 → 7x1 conv chain → asymmetric spatial feature extraction + + Args: + x: Input tensor of shape (batch_size, height, width, 448) + scale: Scaling factor for residual connection (0.1 for stability) + + Returns: + Tensor with same spatial dimensions and channel depth + """ + # Branch 0: Simple 1x1 convolution for channel-wise feature transformation + # Captures cross-channel interactions without spatial aggregation + branch_0 = Conv2D(192, (1, 1), padding='same', activation='relu')(x) + + # Branch 1: Asymmetric convolution sequence for efficient spatial feature extraction + # 1x1 convolution for dimensionality reduction + branch_1 = Conv2D(128, (1, 1), padding='same', activation='relu')(x) + # 1x7 convolution captures horizontal patterns + branch_1 = Conv2D(160, (1, 7), padding='same', activation='relu')(branch_1) + # 7x1 convolution captures vertical patterns + # This asymmetric approach is more efficient than 7x7 convolution + branch_1 = Conv2D(192, (7, 1), padding='same', activation='relu')(branch_1) + + # Concatenate branches along channel dimension + # Total channels: 192 + 192 = 384 + merged = Concatenate(axis=-1)([branch_0, branch_1]) + + # 1x1 projection to match input channel dimensions for residual connection + up = Conv2D(tf.keras.backend.int_shape(x)[-1], (1, 1), padding='same')(merged) + + # Apply scaling to residual branch for training stability + up = Lambda(lambda s: s * scale)(up) + + # Residual connection: add scaled features to input + x = Add()([x, up]) + + # Apply activation after residual addition + x = tf.keras.layers.Activation('relu')(x) + + return x + +################################################### +# Reduction Block B: Final Spatial Downsampling +################################################### +def reduction_b_block(x): + """ + Reduction-B block for final spatial downsampling before global pooling. + + This block performs the final spatial reduction while dramatically increasing + channel depth. It prepares features for global pooling by creating a very + high-dimensional but spatially compact representation. + + Architecture branches: + - Branch 0: Max pooling → preserves dominant features + - Branch 1: Direct 3x3 conv with stride=2 → learned aggressive feature extraction + + Args: + x: Input tensor of shape (batch_size, 6, 6, 448) + + Returns: + Tensor of shape (batch_size, 2, 2, 896) + """ + # Branch 0: Max pooling preserves strongest activations + # 6x6x448 → 2x2x448 + branch_0 = MaxPooling2D((3, 3), strides=2, padding='valid')(x) + + # Branch 1: Aggressive feature extraction with large channel expansion + # 6x6x448 → 2x2x448 (maintains input channel depth) + # High channel count captures complex high-level patterns + branch_1 = Conv2D(448, (3, 3), strides=2, padding='valid', activation='relu')(x) + + # Concatenate branches for maximum feature preservation + # Total channels: 448 + 448 = 896 + x = Concatenate(axis=-1)([branch_0, branch_1]) + + return x + +################################################### +# Main Model Architecture Builder +################################################### +def build_reduced_inception_resnet(input_shape=(29, 29, 1), num_classes=2, dropout_rate=0.2): + """ + Build the complete reduced Inception-ResNet model for CAN intrusion detection. + + This function assembles all components into a complete neural network optimized + for binary classification of CAN network traffic (normal vs attack). + + Architecture Summary: + 1. Stem Block: 29x29x1 → 13x13x128 (initial feature extraction + reduction) + 2. Inception-ResNet-A: 13x13x128 → 13x13x128 (multi-scale feature extraction) + 3. Reduction-A: 13x13x128 → 6x6x448 (spatial reduction + channel expansion) + 4. Inception-ResNet-B: 6x6x448 → 6x6x448 (high-level asymmetric features) + 5. Reduction-B: 6x6x448 → 2x2x896 (final spatial reduction) + 6. Global Average Pooling: 2x2x896 → 1x1x896 (spatial aggregation) + 7. Classification Head: 896 → 2 (binary classification) + + Args: + input_shape: Shape of input CAN frames (default: 29x29x1) + num_classes: Number of output classes (default: 2 for binary classification) + dropout_rate: Dropout rate for regularization (default: 0.2) + + Returns: + Compiled Keras Model ready for training + """ + # Define input layer for 29x29 binary CAN frame matrices + inputs = Input(shape=input_shape) + + # Stage 1: Initial feature extraction and spatial reduction + # 29x29x1 → 13x13x128 + x = stem_block(inputs) + + # Stage 2: Multi-scale feature extraction with residual connections + # 13x13x128 → 13x13x128 (maintains spatial dimensions) + x = inception_resnet_a_block(x, scale=0.1) + + # Stage 3: First major spatial reduction with channel expansion + # 13x13x128 → 6x6x448 + x = reduction_a_block(x) + + # Stage 4: High-level feature extraction with asymmetric convolutions + # 6x6x448 → 6x6x448 (maintains spatial dimensions) + x = inception_resnet_b_block(x, scale=0.1) + + # Stage 5: Final spatial reduction with maximum channel expansion + # 6x6x448 → 2x2x896 + x = reduction_b_block(x) + + # Stage 6: Global spatial aggregation + # 2x2x896 → 1x1x896 (eliminates spatial dimensions entirely) + x = AveragePooling2D((2, 2), padding='valid')(x) + + # Stage 7: Flatten for dense layer processing + # 1x1x896 → 896-dimensional feature vector + x = Flatten()(x) + + # Stage 8: Regularization to prevent overfitting + # Randomly sets 20% of features to zero during training + x = Dropout(dropout_rate)(x) + + # Stage 9: Final classification layer + # 896 → 2 classes with softmax activation for probability distribution + outputs = Dense(num_classes, activation='softmax')(x) + + # Create and return the complete model + model = Model(inputs, outputs) + return model + +################################################### +# Model Wrapper Class for Training and Evaluation +################################################### +class Inception_Resnet_V1: + """ + Wrapper class for the reduced Inception-ResNet model providing training and evaluation utilities. + + This class encapsulates the model architecture and provides methods for: + - Model initialization with configurable hyperparameters + - Training with batch-level loss tracking + - Optional pre-trained weight loading + - Model summary and inspection + + The class is designed to integrate seamlessly with the genetic algorithm + adversarial attack framework and provides the batch-level loss tracking + required for detailed training analysis. + """ + + def __init__(self, epochs=10, batch_size=32, load_weights=False): + """ + Initialize the Inception-ResNet model with specified hyperparameters. + + Args: + epochs: Number of training epochs (default: 10) + batch_size: Batch size for training (default: 32) + load_weights: Whether to load pre-trained weights (default: False) + """ + # Store training hyperparameters + self.epochs = epochs + self.batch_size = batch_size + + # Build the reduced Inception-ResNet architecture + self.model = build_reduced_inception_resnet() + + # Optionally load pre-trained weights + if load_weights: + # Placeholder for weight loading - can be customized as needed + # Example: self.model.load_weights('path_to_pretrained_weights.h5') + pass + + def train(self, x_train, y_train, x_test, y_test, filename_prefix="", epochs_override=None): + """ + Train the model with batch-level loss tracking for detailed analysis. + + This method compiles the model, trains it on the provided data, and captures + detailed training metrics including per-batch loss values. This granular + tracking is essential for genetic algorithm experiments and training analysis. + + Args: + x_train: Training feature data (CAN frames) + y_train: Training labels (0=normal, 1=attack) + x_test: Test feature data (for validation during training) + y_test: Test labels + filename_prefix: Prefix for saved model filename + epochs_override: Override default epoch count if specified + + Returns: + tuple: (training_history, batch_loss_list) + - training_history: Keras training history object + - batch_loss_list: List of (iteration, loss) tuples for each batch + """ + # Use override epochs if provided, otherwise use instance default + epochs_to_run = epochs_override if epochs_override is not None else self.epochs + + # Compile model with Adam optimizer and sparse categorical crossentropy loss + # Adam optimizer: adaptive learning rate with momentum for stable training + # Sparse categorical crossentropy: efficient for integer class labels + self.model.compile( + optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), + loss='sparse_categorical_crossentropy', + metrics=['accuracy'] + ) + + # Initialize custom callback for batch-level loss tracking + batch_callback = BatchLossHistory() + + # Train the model with batch-level monitoring + history = self.model.fit( + x_train, y_train, + epochs=epochs_to_run, + batch_size=self.batch_size, + callbacks=[batch_callback] # Capture per-batch metrics + ) + + # Save the trained model with custom filename prefix + # This allows saving models for different attack types (DoS, Fuzzy, RPM) + self.model.save(filename_prefix + 'final_model.h5') + + # Return both epoch-level and batch-level training metrics + return history, batch_callback.batch_losses + + def summary(self): + """ + Display model architecture summary including layer details and parameter counts. + + Returns: + Model summary showing architecture, output shapes, and parameter counts + """ + return self.model.summary() + +################################################### +# Development and Testing Code +################################################### +# Uncomment the following lines for model architecture debugging and testing: +# if __name__ == "__main__": +# # Create model instance with sample hyperparameters +# instance = Inception_Resnet_V1(epochs=5, batch_size=32) +# +# # Display model architecture summary +# instance.summary() +# +# # Optional: Test with dummy data +# # import numpy as np +# # x_dummy = np.random.rand(100, 29, 29, 1) +# # y_dummy = np.random.randint(0, 2, 100) +# # history, batch_losses = instance.train(x_dummy, y_dummy, x_dummy, y_dummy) +# # print(f"Training completed. Final batch loss: {batch_losses[-1][1]:.4f}") diff --git a/MIRGU_Entropy/scripts/update_labels_dos_mirgu.py b/MIRGU_Entropy/scripts/update_labels_dos_mirgu.py new file mode 100755 index 0000000..0acf1af --- /dev/null +++ b/MIRGU_Entropy/scripts/update_labels_dos_mirgu.py @@ -0,0 +1,108 @@ +import pandas as pd +import os +import yaml + +# def update_track(packet_level_data, prediction_file, updated_track_file): + +# with open(prediction_file, 'r') as prediction_f, open(packet_level_data, 'r') as packet_f, open(updated_track_file, 'w') as output_f : +# next(prediction_f) # Skip header line +# next(packet_f) # Skip header line +# #write header to output file +# output_f.write('row_no,timestamp,can_id,image_no,valid_flag,label' + '\n') +# for pred_line, packet_line in zip(prediction_f, packet_f): +# pred_parts = pred_line.strip().split(',') +# packet_parts = packet_line.strip().split(',') +# # print("Pred parts:", pred_parts) +# # print("Packet parts:", packet_parts) + +# # if(int(pred_parts[1],16) == int(packet_parts[2],16)): +# packet_parts = packet_parts[:-2] + ["1" if pred_parts[-1] == 'A' else "0"] +# updated_packet_line = ','.join(packet_parts) +# # print(updated_packet_line) +# output_f.write(updated_packet_line + '\n') +# # lines in packet_f > lines in prediction_f, so no need to handle extra lines in packet_f +# while(True): +# line = packet_f.readline() +# if not line: +# break +# part = line.strip().split(',') +# output_f.write(','.join(part[:-1])) +# output_f.write('\n') # default label 0 for packets with no prediction + + + +def update_labels(updated_track_file, label_file, updated_label_file): + + # df = pd.read_csv(updated_track_file) + df = pd.read_csv(updated_track_file, dtype=str, low_memory=False) + df["row_no"] = df["row_no"].astype(int) + df["timestamp"] = df["timestamp"].astype(float) + df["image_no"] = df["image_no"].astype(int) + df["valid_flag"] = df["valid_flag"].astype(int) + # print("DF rows:", len(df)) + # print("Unique images:", df['image_no'].nunique()) + with open(label_file, 'r') as label_f, open(updated_label_file, 'w') as final_label_f: + # next(updated_f) # Skip header line + # group by image_no in packet level data + + label_line = next(label_f).strip() + for image_no, group in df.groupby('image_no'): + + # labels = group['label'].tolist() + # if "A" in labels: + # final_label_f.write(f"{image_no},1\n") + # else: + # final_label_f.write(f"{image_no},0\n") + + img, rest = label_line.split(":") + valid_flag = int(rest.split(",")[0]) + + packet_labels = group['pred_label'].astype(str).str.upper().tolist() + new_label = 1 if "A" in packet_labels else 0 + + + final_label_f.write(f"perturbed_image_{image_no}.png: {valid_flag}, {new_label}\n") + try: + label_line = next(label_f).strip() + except StopIteration: + break + + + + +def run(params): + + tracksheet = params["tracksheet"] + label_file = params["label_file"] + updated_label_file = params["updated_label_file"] + + # update_track(packet_level_data, prediction_file, updated_track_file) + update_labels(tracksheet,label_file,updated_label_file) + print("updated label file") + + + +# # Allow standalone execution +# if __name__ == "__main__": + +# cfg = yaml.safe_load(open("config_dos_OTIDS.yaml")) +# run(cfg["update"]) +# # run() + + +if __name__ == "__main__": + + import argparse + import yaml + + parser = argparse.ArgumentParser() + parser.add_argument("--config", type=str, default="config_dos_mirgu.yaml") + args = parser.parse_args() + + cfg = yaml.safe_load(open(args.config)) + + # Ensure attack section exists + if "update" not in cfg: + raise ValueError("Config file must contain 'update' section.") + + run(cfg["update"]) \ No newline at end of file diff --git a/MIRGU_Entropy/scripts/update_labels_spoof_mirgu.py b/MIRGU_Entropy/scripts/update_labels_spoof_mirgu.py new file mode 100755 index 0000000..f5b53a6 --- /dev/null +++ b/MIRGU_Entropy/scripts/update_labels_spoof_mirgu.py @@ -0,0 +1,96 @@ +import pandas as pd +import os +import yaml + +# def update_track(packet_level_data, prediction_file, updated_track_file): + +# with open(prediction_file, 'r') as prediction_f, open(packet_level_data, 'r') as packet_f, open(updated_track_file, 'w') as output_f : +# next(prediction_f) # Skip header line +# next(packet_f) # Skip header line +# #write header to output file +# output_f.write('row_no,timestamp,can_id,image_no,valid_flag,label' + '\n') +# for pred_line, packet_line in zip(prediction_f, packet_f): +# pred_parts = pred_line.strip().split(',') +# packet_parts = packet_line.strip().split(',') +# # print("Pred parts:", pred_parts) +# # print("Packet parts:", packet_parts) + +# # if(int(pred_parts[1],16) == int(packet_parts[2],16)): +# packet_parts = packet_parts[:-2] + ["1" if pred_parts[-1] == 'A' else "0"] +# updated_packet_line = ','.join(packet_parts) +# # print(updated_packet_line) +# output_f.write(updated_packet_line + '\n') +# # lines in packet_f > lines in prediction_f, so no need to handle extra lines in packet_f +# while(True): +# line = packet_f.readline() +# if not line: +# break +# part = line.strip().split(',') +# output_f.write(','.join(part[:-1])) +# output_f.write('\n') # default label 0 for packets with no prediction + + +def update_labels(updated_track_file, label_file, updated_label_file): + + df = pd.read_csv(updated_track_file) + df["row_no"] = df["row_no"].astype(int) + df["timestamp"] = df["timestamp"].astype(float) + df["image_no"] = df["image_no"].astype(int) + df["valid_flag"] = df["valid_flag"].astype(int) + # print("DF rows:", len(df)) + # print("Unique images:", df['image_no'].nunique()) + with open(label_file, 'r') as label_f, open(updated_label_file, 'w') as final_label_f: + # next(updated_f) # Skip header line + # group by image_no in packet level data + + label_line = next(label_f).strip() + for image_no, group in df.groupby('image_no'): + + # labels = group['label'].tolist() + # if "A" in labels: + # final_label_f.write(f"{image_no},1\n") + # else: + # final_label_f.write(f"{image_no},0\n") + + img, rest = label_line.split(":") + valid_flag = int(rest.split(",")[0]) + + packet_labels = group['pred_label'].astype(str).str.upper().tolist() + new_label = 1 if "A" in packet_labels else 0 + + + final_label_f.write(f"perturbed_image_{image_no}.png: {valid_flag}, {new_label}\n") + try: + label_line = next(label_f).strip() + except StopIteration: + break + + + + +def run(params): + + tracksheet = params["tracksheet"] + label_file = params["label_file"] + updated_label_file = params["updated_label_file"] + + update_labels(tracksheet,label_file,updated_label_file) + print("updated label file") + + +if __name__ == "__main__": + + import argparse + import yaml + + parser = argparse.ArgumentParser() + parser.add_argument("--config", type=str, default="config_spoof_mirgu.yaml") + args = parser.parse_args() + + cfg = yaml.safe_load(open(args.config)) + + # Ensure attack section exists + if "update" not in cfg: + raise ValueError("Config file must contain 'update' section.") + + run(cfg["update"]) \ No newline at end of file