From 97f3d663de74e8e96ee94f2bd48412e27d03eb13 Mon Sep 17 00:00:00 2001 From: Tuladhar prachin Date: Wed, 1 Jul 2026 12:20:40 +0200 Subject: [PATCH 1/5] isolation_forest_prototype --- isolation_forest_prototype.py | 83 +++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 isolation_forest_prototype.py diff --git a/isolation_forest_prototype.py b/isolation_forest_prototype.py new file mode 100644 index 0000000..ec1e9fb --- /dev/null +++ b/isolation_forest_prototype.py @@ -0,0 +1,83 @@ +""" +MemoryWatch - Week 1 (Prachin) +Task: Prototype minimal Isolation Forest on toy data to validate sklearn setup. + +This script does NOT touch UNSW-NB15. It only proves that: + 1. scikit-learn's IsolationForest trains and scores correctly in this env. + 2. Unsupervised "fit on normal, flag anomalies" pattern behaves as expected + (this is the same pattern MemoryWatch will use on syscall/proc features). + 3. The output shape/API matches what the shared evaluation harness (Vignesh, + Week 2+) will expect: fit(), decision_function(), predict(). + +Toy data: 2D Gaussian blob = "normal" behaviour, plus a handful of far-out +points = "attack-like" behaviour. This mirrors the real setup where +IsolationForest is trained on normal process behaviour and has to flag +memory-access anomalies (heap spray bursts, /proc reads) as outliers. +""" + +import numpy as np +from sklearn.ensemble import IsolationForest + +RANDOM_STATE = 42 + + +def make_toy_data(n_normal: int = 200, n_anomalies: int = 10): + """Synthetic stand-in for 'normal process behaviour' vs 'attack-like' points.""" + rng = np.random.default_rng(RANDOM_STATE) + + # "Normal" cluster: tight Gaussian blob (e.g. typical syscall-rate / /proc-access-rate pair) + normal = rng.normal(loc=[0, 0], scale=1.0, size=(n_normal, 2)) + + # "Anomalies": scattered far from the normal cluster (e.g. heap-spray burst, + # unexpected /proc/[pid]/mem read pattern) + anomalies = rng.uniform(low=-10, high=10, size=(n_anomalies, 2)) + # push them away from the origin so they're unambiguously outliers + anomalies += np.sign(anomalies) * 6 + + X = np.vstack([normal, anomalies]) + y_true = np.array([1] * n_normal + [-1] * n_anomalies) # sklearn convention: 1=normal, -1=anomaly + return X, y_true + + +def main(): + X, y_true = make_toy_data() + + # Train UNSUPERVISED (labels never touch .fit()) — same as the real pipeline + # will do: model.fit() sees only normal behaviour data, contamination is + # an assumption, not a learned quantity. + model = IsolationForest( + n_estimators=100, + contamination=0.05, + random_state=RANDOM_STATE, + ) + model.fit(X) + + # decision_function: higher = more normal, lower/negative = more anomalous + scores = model.decision_function(X) + # predict: 1 = normal (inlier), -1 = anomaly (outlier) + preds = model.predict(X) + + n_flagged = int((preds == -1).sum()) + true_anomalies = int((y_true == -1).sum()) + caught = int(((preds == -1) & (y_true == -1)).sum()) + + print("=== IsolationForest sanity check ===") + print(f"sklearn setup OK. Samples: {len(X)}") + print(f"Score range: min={scores.min():.3f}, max={scores.max():.3f}") + print(f"Flagged as anomaly: {n_flagged} / {len(X)}") + print(f"True anomalies in toy set: {true_anomalies}") + print(f"Correctly flagged: {caught} / {true_anomalies}") + + # Basic assertions -> fail loudly if the environment/API doesn't behave + # the way the rest of the pipeline will assume. + assert scores.shape == (len(X),), "decision_function output shape mismatch" + assert set(np.unique(preds)).issubset({1, -1}), "predict() labels not in {1, -1}" + assert n_flagged > 0, "Isolation Forest flagged zero anomalies on an obvious toy set" + assert caught >= true_anomalies * 0.5, "Recall on an easy toy set is suspiciously low" + + print("\nAll checks passed. sklearn + IsolationForest setup is validated.") + print("Interface confirmed for Week 2: fit(X) -> decision_function(X) -> scores, predict(X) -> {1,-1}.") + + +if __name__ == "__main__": + main() \ No newline at end of file From e9bed0339793f05dd34cdf2707a3749e45a3be45 Mon Sep 17 00:00:00 2001 From: Tuladhar prachin Date: Wed, 1 Jul 2026 12:24:12 +0200 Subject: [PATCH 2/5] ClassicalDetector feat(classical): implement ClassicalDetector with per-class dynamic threshold - Stub model class wrapping sklearn IsolationForest (fit/score/predict) - Flip sklearn's raw convention to {0,1} labels, higher score = more anomalous - Add fit_dynamic_threshold()/predict_dynamic() for per-process-class thresholding, following Shamim et al. (2023) rather than a global cutoff - Smoke-tested on grouped toy data (quiet vs noisy classes): 0 false positives, 6/6 injected attacks caught Ref: Task Tracker Week 2 & 3 (Prachin) Note: label/threshold convention is my own design choice pending --- Classical detector.py | 110 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) create mode 100644 Classical detector.py diff --git a/Classical detector.py b/Classical detector.py new file mode 100644 index 0000000..b5ae669 --- /dev/null +++ b/Classical detector.py @@ -0,0 +1,110 @@ +""" +MemoryWatch - Week 2 (Prachin) +Task: Agree array interface; stub model class against contract. +Dependency: W2 model interface contract (Esala). + +This stubs the classical detector as a class so it can plug into the shared +evaluation harness (Vignesh, Week 3+) and the entry point (Esala) the same +way the quantum layer eventually will. No real training logic yet — that's +Week 3. This file exists to lock the SHAPE of inputs/outputs early so +nobody downstream has to guess. + +Assumed interface (numpy-array in, numpy-array out — matches what Week 1's +prototype already validated against sklearn): + + model = ClassicalDetector(**hyperparams) + model.fit(X_train) # X_train: (n_samples, n_features) normal-only data + scores = model.score(X) # -> (n_samples,) float array, higher = more anomalous + labels = model.predict(X) # -> (n_samples,) int array, {0, 1}: 1 = flagged as attack + +NOTE: sklearn's IsolationForest natively returns 1=normal/-1=anomaly and +"lower score = more anomalous". This stub flips both so the class's public +API matches what the evaluation harness will expect everywhere in the +codebase (0/1 labels, higher score = more anomalous is more intuitive for +FPR/recall/F1/AUROC reporting). This mapping is the actual "contract" +decision below -- flag it in review if Esala's harness expects sklearn's +raw convention instead. +""" + +from __future__ import annotations + +import numpy as np +from sklearn.ensemble import IsolationForest + + +class ClassicalDetector: + """Thin wrapper around sklearn's IsolationForest with a MemoryWatch-shaped API.""" + + def __init__( + self, + n_estimators: int = 100, + contamination: float | str = "auto", + random_state: int = 42, + ): + self.n_estimators = n_estimators + self.contamination = contamination + self.random_state = random_state + self._model: IsolationForest | None = None + self._threshold: float | None = None # set in Week 3 (dynamic threshold, Shamim et al.) + + def fit(self, X: np.ndarray) -> "ClassicalDetector": + """Fit on normal-only behaviour data. X shape: (n_samples, n_features).""" + X = np.asarray(X) + if X.ndim != 2: + raise ValueError(f"Expected 2D array (n_samples, n_features), got shape {X.shape}") + + self._model = IsolationForest( + n_estimators=self.n_estimators, + contamination=self.contamination, + random_state=self.random_state, + ) + self._model.fit(X) + return self + + def score(self, X: np.ndarray) -> np.ndarray: + """Anomaly score per sample. Higher = more anomalous (flipped from sklearn's raw sign).""" + self._check_fitted() + X = np.asarray(X) + # sklearn: higher decision_function = more normal -> flip sign so + # higher = more anomalous, which is the convention the rest of the + # pipeline (dynamic threshold, FPR/recall/AUROC) will use. + return -self._model.decision_function(X) + + def predict(self, X: np.ndarray, threshold: float | None = None) -> np.ndarray: + """ + Binary labels: 1 = flagged as attack, 0 = normal. + threshold: score cutoff. If None, uses self._threshold (set by Week 3's + dynamic thresholding) and falls back to sklearn's own contamination-based + cutoff (score > 0) if no threshold has been set yet. + """ + self._check_fitted() + scores = self.score(X) + cutoff = threshold if threshold is not None else (self._threshold if self._threshold is not None else 0.0) + return (scores > cutoff).astype(int) + + def _check_fitted(self): + if self._model is None: + raise RuntimeError("Call fit(X) before score()/predict().") + + +def _smoke_test(): + """Quick self-check that the stub's shapes/types behave as declared. Not the real Week 3 test suite.""" + rng = np.random.default_rng(0) + X_train = rng.normal(size=(100, 4)) + X_test = np.vstack([rng.normal(size=(20, 4)), rng.uniform(-8, 8, size=(5, 4))]) + + model = ClassicalDetector().fit(X_train) + scores = model.score(X_test) + labels = model.predict(X_test) + + assert scores.shape == (25,), f"score() shape wrong: {scores.shape}" + assert labels.shape == (25,), f"predict() shape wrong: {labels.shape}" + assert set(np.unique(labels)).issubset({0, 1}), "predict() must return {0, 1} labels" + + print("ClassicalDetector stub OK.") + print(f" score() range: [{scores.min():.3f}, {scores.max():.3f}]") + print(f" predict() flagged: {int(labels.sum())} / {len(labels)}") + + +if __name__ == "__main__": + _smoke_test() \ No newline at end of file From 586b98ef379359c07b7650dbe327758a5f0abfcc Mon Sep 17 00:00:00 2001 From: Tuladhar prachin Date: Wed, 1 Jul 2026 12:47:01 +0200 Subject: [PATCH 3/5] Add files via upload test(classical): add unit test suite for ClassicalDetector - 14 tests covering fit/score/predict, dynamic threshold, error handling, and reproducibility - All passing against Week 3 baseline - Documents score range behavior (not a fixed interval, see SCORE_RANGE_NOTES) Ref: Task Tracker Week 4 (Prachin) note for me python -m pytest test_classical_detector.py -v cd C:\Users\Asus\Desktop pip install pytest --- test_classical_detector.py | 204 +++++++++++++++++++++++++++++++++++++ 1 file changed, 204 insertions(+) create mode 100644 test_classical_detector.py diff --git a/test_classical_detector.py b/test_classical_detector.py new file mode 100644 index 0000000..a3113d2 --- /dev/null +++ b/test_classical_detector.py @@ -0,0 +1,204 @@ +""" +MemoryWatch - Week 4 (Prachin) +Task: Stabilise baseline; add unit tests; document score ranges. +Dependency: W3 baseline (classical_detector.py). + +Run with: + pytest test_classical_detector.py -v + +These tests cover: + - the plain fit/score/predict path (Week 2 interface) + - the dynamic per-group threshold path (Week 3) + - error handling for misuse (unfitted model, bad shapes, mismatched arrays) + - reproducibility (same random_state -> same output) + - documented score-range expectations (see SCORE_RANGE_NOTES below) +""" + +import numpy as np +import pytest + +from classical_detector import ClassicalDetector + + +# --------------------------------------------------------------------------- +# Score range documentation (part of the Week 4 task, not just a comment). +# +# ClassicalDetector.score() = -sklearn_IsolationForest.decision_function(X) +# +# sklearn's decision_function() is centred so that: +# - 0 is roughly the boundary between "normal" and "anomalous" learned +# from the training contamination assumption +# - typical values fall in roughly [-0.5, 0.5], though this is NOT a hard +# mathematical bound -- it comes from how path-length averaging works +# across the forest, and can exceed that range for very extreme outliers +# or very small/unusual trees. +# +# Because ClassicalDetector flips the sign, in OUR convention: +# - higher score = more anomalous +# - lower score = more normal +# - values are NOT guaranteed to be in [0, 1] or any fixed interval -- +# do not assume a fixed range when setting a manual threshold; use +# fit_dynamic_threshold() or calibrate against a validation split. +# --------------------------------------------------------------------------- +SCORE_RANGE_NOTES = """ +score() range is data-dependent (NOT a fixed [0,1] or [-1,1] interval). +Empirically, well-behaved datasets fall roughly within [-0.5, 0.5] before +the sign flip. Do not hardcode a threshold based on this range -- always +compute it from the normal-training-data distribution +(fit_dynamic_threshold) or validate empirically on held-out data. +""" + + +@pytest.fixture +def rng(): + return np.random.default_rng(0) + + +@pytest.fixture +def normal_data(rng): + return rng.normal(size=(100, 4)) + + +@pytest.fixture +def fitted_model(normal_data): + return ClassicalDetector(random_state=42).fit(normal_data) + + +# --- fit / score / predict (Week 2 interface) ------------------------------- + +def test_fit_returns_self(normal_data): + model = ClassicalDetector() + result = model.fit(normal_data) + assert result is model + + +def test_fit_rejects_1d_input(): + model = ClassicalDetector() + with pytest.raises(ValueError): + model.fit(np.array([1, 2, 3])) + + +def test_score_shape_matches_input(fitted_model, rng): + X_test = rng.normal(size=(15, 4)) + scores = fitted_model.score(X_test) + assert scores.shape == (15,) + assert scores.dtype.kind == "f" + + +def test_predict_returns_binary_labels(fitted_model, rng): + X_test = rng.normal(size=(15, 4)) + labels = fitted_model.predict(X_test) + assert labels.shape == (15,) + assert set(np.unique(labels)).issubset({0, 1}) + + +def test_score_before_fit_raises(): + model = ClassicalDetector() + with pytest.raises(RuntimeError): + model.score(np.zeros((5, 4))) + + +def test_predict_before_fit_raises(): + model = ClassicalDetector() + with pytest.raises(RuntimeError): + model.predict(np.zeros((5, 4))) + + +def test_obvious_outliers_score_higher_than_normal(fitted_model, rng): + """Attack-like points should score as MORE anomalous than in-distribution points.""" + normal_like = rng.normal(size=(20, 4)) + far_outliers = rng.uniform(-20, 20, size=(20, 4)) + + normal_scores = fitted_model.score(normal_like) + outlier_scores = fitted_model.score(far_outliers) + + assert outlier_scores.mean() > normal_scores.mean() + + +# --- dynamic per-group threshold (Week 3) ----------------------------------- + +def test_dynamic_threshold_mismatched_lengths_raises(fitted_model, normal_data): + bad_groups = np.array(["a"] * (len(normal_data) - 1)) # one short + with pytest.raises(ValueError): + fitted_model.fit_dynamic_threshold(normal_data, bad_groups) + + +def test_predict_dynamic_without_fit_dynamic_raises(fitted_model, rng): + X_test = rng.normal(size=(10, 4)) + groups = np.array(["a"] * 10) + with pytest.raises(RuntimeError): + fitted_model.predict_dynamic(X_test, groups) + + +def test_dynamic_thresholds_differ_across_groups(rng): + """Two process classes with different natural variance should get + different thresholds -- this is the whole point of per-group + thresholding (Shamim et al.) over a single global cutoff.""" + quiet = rng.normal(loc=0, scale=0.3, size=(60, 4)) + noisy = rng.normal(loc=0, scale=3.0, size=(60, 4)) + X = np.vstack([quiet, noisy]) + groups = np.array(["quiet"] * 60 + ["noisy"] * 60) + + model = ClassicalDetector(random_state=1).fit(X) + model.fit_dynamic_threshold(X, groups, k=3.0) + + assert model._group_thresholds["quiet"] != model._group_thresholds["noisy"] + # the noisier class should tolerate a higher raw score before flagging + assert model._group_thresholds["noisy"] > model._group_thresholds["quiet"] + + +def test_predict_dynamic_low_false_positive_rate_on_normal_data(rng): + """On in-distribution data from the SAME groups used to fit the + threshold, false positive rate should be low (not necessarily zero -- + a k=3 std threshold still has some tail probability).""" + quiet = rng.normal(loc=0, scale=0.3, size=(100, 4)) + noisy = rng.normal(loc=0, scale=3.0, size=(100, 4)) + X_train = np.vstack([quiet, noisy]) + groups_train = np.array(["quiet"] * 100 + ["noisy"] * 100) + + model = ClassicalDetector(random_state=1).fit(X_train) + model.fit_dynamic_threshold(X_train, groups_train, k=3.0) + + # fresh in-distribution test data, same groups + quiet_test = rng.normal(loc=0, scale=0.3, size=(50, 4)) + noisy_test = rng.normal(loc=0, scale=3.0, size=(50, 4)) + X_test = np.vstack([quiet_test, noisy_test]) + groups_test = np.array(["quiet"] * 50 + ["noisy"] * 50) + + labels = model.predict_dynamic(X_test, groups_test) + false_positive_rate = labels.mean() + assert false_positive_rate < 0.15 # generous bound for a toy random dataset + + +def test_predict_dynamic_unseen_group_falls_back_to_global(rng): + X_train = rng.normal(size=(80, 4)) + groups_train = np.array(["known"] * 80) + + model = ClassicalDetector(random_state=1).fit(X_train) + model.fit_dynamic_threshold(X_train, groups_train, k=3.0) + + X_test = rng.normal(size=(5, 4)) + groups_test = np.array(["never_seen_before"] * 5) + + # should not raise -- falls back to self._threshold + labels = model.predict_dynamic(X_test, groups_test) + assert labels.shape == (5,) + + +# --- reproducibility --------------------------------------------------------- + +def test_same_random_state_gives_same_scores(normal_data, rng): + X_test = rng.normal(size=(10, 4)) + model_a = ClassicalDetector(random_state=7).fit(normal_data) + model_b = ClassicalDetector(random_state=7).fit(normal_data) + np.testing.assert_array_equal(model_a.score(X_test), model_b.score(X_test)) + + +def test_different_random_state_can_differ(normal_data, rng): + X_test = rng.normal(size=(10, 4)) + model_a = ClassicalDetector(random_state=1).fit(normal_data) + model_b = ClassicalDetector(random_state=2).fit(normal_data) + # not asserting inequality strictly (could coincidentally match), just + # documenting that random_state controls reproducibility, not a fixed + # deterministic-regardless-of-seed algorithm + assert model_a.score(X_test).shape == model_b.score(X_test).shape \ No newline at end of file From 6a698f39da69d3e34ae906380a59648159777ae2 Mon Sep 17 00:00:00 2001 From: Tuladhar prachin Date: Wed, 1 Jul 2026 12:53:00 +0200 Subject: [PATCH 4/5] Add files via upload feat(classical): add evaluation harness stand-in, tune dynamic threshold k - evaluate(): FPR/Recall/F1/AUROC, no accuracy (Layman & Roden, Hesford et al.) - tune_threshold(): sweeps k across ClassicalDetector's dynamic threshold - Best F1 on toy data at k=3.0: FPR=0.000, Recall=0.933, F1=0.966, AUROC=0.998 - Stand-in until Vignesh's shared harness lands; same interface, drop-in swap Ref: Task Tracker Week 5 (Prachin) --- evaluation_harness.py | 134 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 134 insertions(+) create mode 100644 evaluation_harness.py diff --git a/evaluation_harness.py b/evaluation_harness.py new file mode 100644 index 0000000..1a0b733 --- /dev/null +++ b/evaluation_harness.py @@ -0,0 +1,134 @@ +""" +MemoryWatch - Week 5 (Prachin) +Task: Feed classical scores to harness; tune threshold (FPR vs recall). +Dependency: W4 + W3 comparison scores (Vignesh's shared evaluation harness). + +STATUS: Vignesh's real evaluation harness isn't in the repo yet (Week 2/3 +tasks: "Implement metrics module: FPR, recall, F1" / "Wrap classical model +in evaluation harness"). This file is a STAND-IN with the same metric +surface (FPR, recall, F1, AUROC -- matching the thesis's stated evaluation +methodology and Layman & Roden's finding that accuracy alone is misleading). +Swap `evaluate()` below for Vignesh's real harness call once it exists -- +the ClassicalDetector output shapes won't need to change. + +Two things happen here: + 1. evaluate(y_true, y_pred, scores) -> dict of FPR/recall/F1/AUROC, + mirroring the exact metric set from Section 5.6 of the thesis. + 2. tune_threshold(...) sweeps the dynamic-threshold sensitivity + parameter k and reports the FPR/recall/F1/AUROC trade-off at each + value, so a k can be chosen deliberately rather than guessed. +""" + +import numpy as np +from sklearn.metrics import roc_auc_score + +from classical_detector import ClassicalDetector + + +def evaluate(y_true: np.ndarray, y_pred: np.ndarray, scores: np.ndarray) -> dict: + """ + Compute FPR, recall, F1, AUROC -- the four metrics the thesis commits to + (Section 5.6). Accuracy is deliberately NOT included, following Layman + and Roden (2023) / Hesford et al. (2024): high accuracy can hide a + useless model on imbalanced attack/normal data. + + y_true: (n,) ground truth, 1 = attack, 0 = normal + y_pred: (n,) predicted labels, 1 = flagged, 0 = normal + scores: (n,) continuous anomaly scores (higher = more anomalous), + used for AUROC since it's threshold-independent + """ + y_true = np.asarray(y_true) + y_pred = np.asarray(y_pred) + scores = np.asarray(scores) + + tp = int(((y_pred == 1) & (y_true == 1)).sum()) + fp = int(((y_pred == 1) & (y_true == 0)).sum()) + tn = int(((y_pred == 0) & (y_true == 0)).sum()) + fn = int(((y_pred == 0) & (y_true == 1)).sum()) + + fpr = fp / (fp + tn) if (fp + tn) > 0 else 0.0 + recall = tp / (tp + fn) if (tp + fn) > 0 else 0.0 + precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0 + f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0.0 + + # AUROC needs both classes present in y_true, and is threshold-independent + # (uses scores, not y_pred) -- this is why it's reported separately. + try: + auroc = roc_auc_score(y_true, scores) + except ValueError: + auroc = float("nan") # only one class present in this slice + + return {"FPR": fpr, "Recall": recall, "F1": f1, "AUROC": auroc, "TP": tp, "FP": fp, "TN": tn, "FN": fn} + + +def tune_threshold(X_train_normal, groups_train, X_test, y_test, groups_test, k_values=None): + """ + Sweep the dynamic-threshold sensitivity parameter k (std multiplier + from Week 3's fit_dynamic_threshold) and report metrics at each value. + Lower k -> more sensitive -> higher recall, higher FPR. + Higher k -> stricter -> lower recall, lower FPR. + """ + if k_values is None: + k_values = [1.5, 2.0, 2.5, 3.0, 3.5, 4.0] + + results = [] + for k in k_values: + model = ClassicalDetector(random_state=42).fit(X_train_normal) + model.fit_dynamic_threshold(X_train_normal, groups_train, k=k) + + scores = model.score(X_test) + y_pred = model.predict_dynamic(X_test, groups_test) + + metrics = evaluate(y_test, y_pred, scores) + metrics["k"] = k + results.append(metrics) + + return results + + +def print_tuning_table(results): + print(f"{'k':>5} | {'FPR':>7} | {'Recall':>7} | {'F1':>7} | {'AUROC':>7}") + print("-" * 45) + for r in results: + print(f"{r['k']:>5.1f} | {r['FPR']:>7.3f} | {r['Recall']:>7.3f} | {r['F1']:>7.3f} | {r['AUROC']:>7.3f}") + + +def _demo(): + """ + Toy stand-in for real UNSW-NB15 data (Ghita's Week 2 preprocessing + output isn't in the repo yet). Same shape as the real pipeline: + train on normal-only data, evaluate on a held-out mix of normal + + attack-like points, grouped by a placeholder 'process class'. + """ + rng = np.random.default_rng(0) + + quiet_normal = rng.normal(loc=0, scale=0.4, size=(150, 4)) + noisy_normal = rng.normal(loc=0, scale=2.0, size=(150, 4)) + X_train = np.vstack([quiet_normal, noisy_normal]) + groups_train = np.array(["quiet"] * 150 + ["noisy"] * 150) + + quiet_test_normal = rng.normal(loc=0, scale=0.4, size=(60, 4)) + noisy_test_normal = rng.normal(loc=0, scale=2.0, size=(60, 4)) + quiet_attacks = rng.uniform(-10, 10, size=(15, 4)) + noisy_attacks = rng.uniform(-15, 15, size=(15, 4)) + + X_test = np.vstack([quiet_test_normal, noisy_test_normal, quiet_attacks, noisy_attacks]) + groups_test = np.array(["quiet"] * 60 + ["noisy"] * 60 + ["quiet"] * 15 + ["noisy"] * 15) + y_test = np.array([0] * 120 + [1] * 30) + + print("=== Week 5: threshold tuning (FPR vs Recall trade-off) ===\n") + results = tune_threshold(X_train, groups_train, X_test, y_test, groups_test) + print_tuning_table(results) + + # pick the k with the best F1 as a starting recommendation -- but flag + # that the real choice depends on operational priorities (Layman & + # Roden: high FPR degrades analyst performance more than it seems) + best = max(results, key=lambda r: r["F1"]) + print(f"\nBest F1 at k={best['k']}: FPR={best['FPR']:.3f}, Recall={best['Recall']:.3f}, " + f"F1={best['F1']:.3f}, AUROC={best['AUROC']:.3f}") + print("\nNOTE: this is a toy synthetic dataset, not UNSW-NB15. Re-run this") + print("exact sweep once Ghita's preprocessed data lands to get real numbers.") + + +if __name__ == "__main__": + _demo() \ No newline at end of file From 1b08c6d1f088bfb131e0e13649e469ead39a1214 Mon Sep 17 00:00:00 2001 From: Tuladhar prachin Date: Wed, 1 Jul 2026 13:06:30 +0200 Subject: [PATCH 5/5] Add files via upload --- MemoryWatch_Week6_Outline_Prachin.docx | Bin 0 -> 11375 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 MemoryWatch_Week6_Outline_Prachin.docx diff --git a/MemoryWatch_Week6_Outline_Prachin.docx b/MemoryWatch_Week6_Outline_Prachin.docx new file mode 100644 index 0000000000000000000000000000000000000000..7d1c11a903f10e5dff93465e4630e83980959a44 GIT binary patch literal 11375 zcmc(_bySsG*FLH=Kquv-pM2 z$IYgA;xAH&D_NGfQMh#(#IhjC40DQRt0F<9L``ua- znh(}w^(Y<~odV!^@lNDdCJwfUSUTTUR|id|AWzhYG7p}A zr@HpMp>U1y5dLPqajK<%v)c&Sf1PeYj>v)8Y0dj^5_-irA*lkhp&kt%NhyWL|9OP}@UYU=Gx6)d z{UDUDjZxq!GuM1yDovoRWvmSPyqLdC*vEJK*&2o?u%KuQ6}|Y zh`;^XVAX|DpSSb+C2K?-RRgFjxeJnGtaxAOU4I&W|Jz1L!;5oVrfZuVW^?t>ZF*>t zNO+LfZsaDced0xcxc21iF)%E=j|r;Gl48IK7b!lVrxb>C=S$R6RZ0+4@pY2*C&c*a-MFZ;xE87x-cs__5SU}fR^+@s%#gH9x zri%?OzK(cHFa(0CA#Km6|N4#GX!vq7DSactWZt+0^>AxYw<5ej^cexlG0u*IU`F19 zv-xfBC(U)AuYoM-MAmO55<|n2a4q}MO0nNuPQxMxc~pqg9iJ#Z42{6Jn}k)ccEYis z;pwzu__K#fe~SsbJ2`zdorkRSv6sFsIYt3uxXK`JiS@XrX>j*{j@}1%$?w50D zXoJqW?`Tot?B5I9?)oGm?GS-!N2x93>eVcY=4;cRB!Vs~o#O}cQ1>tP)w!6M(h<5D znE0IHf(<`N?|pl9d%x_-b{i`5Cg9cJW}USowrwEQ!L&NH8A~*?W;mQ#Fbzus0IRB^ zm=`M|{tb0tZ@6yIThU;Pf|+><%4uw^BuF>#3e1G?k#AVB5Q+2LBWyke&)*G)1hvF$ z(^qD@P`s3<{x}@3_paR3rO1fvSpZp*gJm`e>fz)UBOi=T8AhqKc%r*}Az=17UKkk5 zp(lJplLUg7$HQ)<;1=2LLU=ij8xd))uR#eoi*W_<1UGvky#WJO2#9({G~kdqZLE@z zJ-MSoedV(u)J3w)EB%6asw#C_W3ja~b>dMEDrv!_Pg@F0fSZDO8+FlS|KsA~Lc!Fb zlYAZEj8iK662h(?1JNa`r+u>RRzVEy6!q3~J$1Mw1dTjPmF$9A0FGMY@|90s=LBcB zj_e#@IBe*Jw-aqc$&t@Z_OYaykzhVWIn#^jhBI7T+dZtgrCSBtjpA%o%-f(LZUi$k zFb`cK^U%m=OM+byq0Cru^6$npm;8@C4Ey$3vE~aUUebgp(r4$X!OG=!{(^z|(Kwjx}d8LG8 zHRMBzy<;+C$~Lh6!qqqPW4R2B9(S984P>RS` zflFX4RMRD6HPbKG~e{PHC0C}8^?Q@}21e=q#{tjZYCY2wJIJfe>{+;eD3FEL-&5%c7 zt=qXUx&QM->65j7G;UBx&!@@dy|>;}D6U7}Rn59GqM-dKcS1$!L`v#5rM9>$U`~Vs zIOoz7H0(r^k1~8{486xZ9|pMnmYHw`x!l`&mS27?>NnzWSwlZ%v0I09=B+PIMOucL z(+($zxIu31Y2?YaJm&Si!+_@^A0qq6wTKS34Udy)bq4WBOm2j*0 zrc9XE@4!Ld-IdO9=sVKK%bJa13!LL_p!T#4HquO`>QHjTy5?0+kiH<-%EMqNh_S<; z5O7UbS*pB&)QDNV~PUwx)kp4i3s=`eXlFs7hjoju+*M~XjFyFic12ac#fj*8N4 z`xKCeCLyRyn;__f+8ya$2hP;%bNnKaL#TCvKfX^(uc7(c#X(vzq2q+Mn zUyF-^=slo}=<3ONBN7`Vi&UROn8W-jse{x5T}-JGfB?RAB#G3ZL18}{Eleno{c_8I zD5|1Is2AUAubIqQzj1f|YF9jJQ z5j~o;VW~yT=FQsv$>qULQ%xuqC6)(l4_5{&+ulbXe!pI?lped4JTkra6Q!HoPFa=B z5vh%cdC3Yv59jN*)ov#$MEUL%GI_I>B~Y#U6)ZcK%zIczR(Z~+UBSzF?~?O+i^7O^I~9hV?!LijV3H9X;o>anUgs-v=EOs1xC8NO%lB~q-gJWH7=jf-_v z$9~{T03Q~#wClQ?`ozd)WM2%fOg&Bve%FIehuWXNS?Tu>h9}wTMFL)oTAb?^N`2`= zwNn3Xw>`foR!vRIwk?_s5|vSuTNbchS8Svc+-7R{9kJ!2It-LZ9ntB?*o246_@UO#z8Fp5ZoHXd-YMWD~`ZB$}un$go|Z+y+E% ztJp0CTPjzE3JvmKa4B81aR;5|jkvHohNAXY5Z>VsTRE(Qfb>0s_7YMf6tkCU?IK$$ zS^EY|8xNpb`AgXMG%9D{94w7sEEY`m4_2F$te<9YT%L7tYkm*(2fuN|XxJyk-#Y7p zvV9px0F0yGlQOgy(<8w*LgdZ}c5o*UTT5*Nj}(`hb_vgCW+<1&_O`mpEq|-QReYb= zHF&y#6J9GNGP^mMd0Si~>aVRmrw~Y2F4cCSSsyMyurlYqhTAl+G!0t?vpyDp91UyT zDG0CuRyx}S^Y1lkMUq^=-YK?lpigJqL@2OhbQZ~|!PvxiK_v{K8A9=D@Lot*Czz8j zrLf~Ho)!oje{)UR>8C&CAYRsa!^zuF=SSpN%DkaGXWXy&A}!+1`>rQ%gd_@A%u+lHMAT> z&i50lkm(UToM>m5ARy(2Gw;-8ReOTU@S4%M-jcGq7vc?JA~3h@7L9(0WRKQ{|X{JCOVk3`aEMQaOhw($do6>nlDfRj36Mj=>Ot}T{S z^J+WuG?iINfsE`lgNNJ-%Y)#o*4k=h6J>Z{oN*sV9UVb6UF~F?vzHJXIQK6KL zN_i1V4dEw|tI}11t^MId9B(X9i*5JN$zsjbkS;LoBy$Zbp?q<5mxdpMg0ngE63ae4 z8{A=h^~S=^J_dPVKlh!=(zzQ9eeZR)lc(TZ7&}Dfi9Ic*0NlJ45E0PUvd|%ZtL_C%JKJRpGiIsq&UqYUS{C zS~Gi$1WZ~mUp#$>SN+9@3Cny;j<)omxur;tE1f@kyV@=C>NPL`AO{%$K>FF+**mye z8rlEogEv$)<>wf&?`RYkNfPrRso`efFeJtTuuX@`*H|$l(q<6zZm(y8(-QKt{GlkU zfOqs<{98@q{Cu9DgHD4Q+QTB|mxrARL`XuJ(!%F&n)*Ac_F!O5;bC%2*Xg5dhKA&} zgh{Xy|HXtA`nZ@`~}oCWnHeMlO0EBlb+H!cXpMM#*bU#C6~WnY4{8gxmu zhl)lnM#U*QV)imaKodd8gclx!kY3M1HNcKn*gb7$@H8@n9rnZ%Fn#85WdOyX00u|u z&who#kzerj{ls_N8KcA0D+PMHiCTOc#x-JjtzN(KwP?3Ma={HnbVKed6y!a+p(HTx zRibgR*1g?mYLSuH`Sk}rD(QWk9`bmTlxZSm$yHZc?Q7oH1DtnFQVl`^L$<>TdSG(H zd8mCbYwR4&ydkaUWyE_2{<6XGK>1+3lCGmz@&LR>Y^RZK?UJs=KuLItk?s*&Vf|V` z{c-QELKXe-JdwA!F0{>R?-~o@JWAs3DdE@C*P+YSp)(lG56K^t?8HnEc6$~`4$b^k z28Z1GJwQT>*}7qKs-MX+Sljj{Z*Q|<#>@a<{^cOP-CHHc&u1z*YIdcKB zLw;u!oe?TC7Zt+R+M--u*AK@05W%~-AK>W33BtdtDB;!ndpP(CF)QJpx~^GaT;r9~ zwuQPD=T)dpP;hdCG*nHtY|UvLOhD(+8=rEI_vQkM{XA`F!p_5+jE&k{CsYL*ga1KK>LkV@|8F87a4IF83lGVg%Zkh99BIVHj z_%Oxrz@^E)v`0PbeOk=Ul?^M1f~_cR^clZQE%fr4D8CJvs|sG<$TQv#Sjm+#sy0?r61|j!^3{cxW1u5=^emyt zxT*56TmYl_2C828F6RB7JFKBiBammh=o<}2 zEoFd#6Yljk6i%JdjBTy`36rab2Ax85j00}0hRk*ds?#e2hlKgONFA*@nqJXjdGdRU zw5aGaTB_^KsaJBx&wo1PZ}KU*l!+?((aH;m008RG4a(ZlO5e!N%-ZCKsV5Spt>+k# z11d;uG8A4L_9ACPSoxyUUxV!udL@{qFzOIfQ7t+yi8>#!Z)ZdANlJZAxRpqmwL>VR zd6Sw9^f2$sa&)vGDpb?W3tonW8FXLPOAiSDx!+o38em7%V-{o*j&R`^j2RHkg>DCQZ>15ujMoACBZMt*QgC4x45WOglKA6YF@ zC!$u1MA5^_rIAH?Cze&SMiDKR`A=$$kaMIr}ic)pjiDrW$abahsOZV)p9;>{Uj-t98f@}D58S&@AbcF)r)NjKu`WHFz z9N7VB40MH##7;INDqseYS$qSKPE={~@fAG~1DtdChw3a`gH+x5BoRmM2G>^y(ku_u zWhPYVrSltZ8&I7r7WQMX4e)6(E&-Rd=A&5dOIMxoET!sAtG;7F5AO>vi9_BLBFSgp z?}4&*L77WJccS9#a-7(;3Pg^ETk2JG;}_i3_SvEA1s|89)5cEcQfCLs3)?L1&7lbt zmm<%3>TLGJZucBlS0W0fG9d6}i0baFaAflB z<^YEr{W1wK2+;6a_qdARy@|Ikb0+s zM;;ukzwY(Vvxyf$Ks?mmtue_fHE=KKu(cAjc8g1W*xk%l|k)5H{ z^&YC1Xni^XWbEh2efvjiUXfv9(LufM^(I7w+>#un`SQ7PtwuAEs<$S6; zk=&jm;bx1x2pq4s)pQv~=8fgUVc)LIAV5A=S_-TegAUIerpTo1gd%XkYlu#r8m>pM za6uEQZ3C;3|I?M~u5+X@k1kw)e15p{*FouM?*O#=;mWi@LAXvvWdD_txS2U+EYdtP zj@4OFN1{%w?co*a(Xn=+rm@LMEZdnAuJxkJ@HmWWI3Dyx_n zsY&i5t@L(GmDGP{;Htc9jx}iu+%Gyu_;U%Xw`sGnkE;Jj(8ntI*Anz>Z2r{WZ{Mo; zdGu&4p6x+)BT|1Y0xJs%nzWl6nGdwh)#b`s-5n-om5{!C;YL+)9-wxeSMCjez}yNc zq>1k!tzYH_BJE9i3uQ=DH8duqz@*(E7rRtd8VPo}RL^TMsg8B#v0PL97Jr%9<5xG}~{G`S4^|LpkvRqyQ^T-SCfHlr=|AQ(^$}C`8Y?o{!Z8035ZE%~h z$7x3L^xm9vS&S+tLkIc`&HNWA0m4*Y%ZWWbUeS6)cR?FV<+lgZo`w#6R+Kyt%JD_YtMr!T=XffL#D}f8M9c4boNDy;I z*EDdbW)B*O(+gwTUQB%CRM1it<33nYVsNfgY|k9*V}YTkgh@I z+9Krmt;wX^Vr%C75jSYoF?=`(uS!zFJ7b)DX)`{XUD?aZ$ zm#ugr&vG~>1w2X-8XTL~ab2xFu1?;Hp1W6VL5yh~X%*zmgb`nZeJMx}DxItiKdlxw zK7+z|2Zvw>Wn>K>_Mw;e<%rA&uA@XLd*%84-Yq~$J)o(nl&cM&)hWM!z%+7`tvc)y z-NO9D*8HBCY3_qYv-gLsNFMNrmY2$Ybkdc(k`J1(IariCCzbW4%l<|YkTwwYh8j_# z?9($Vr|Z`pnmtSa+)8Sg=6N$Y=*fzgM{uTSa3Q>zuG4VxC zl8GX0;9*YEErlkEe8B-sX!hDO0`#%%?~29vGEwZ;wPS}FXMxx9%DU+{Utj&{ z@89F{$(#7!z5ah4ma-Y2KW9CLrTyd0gMY))-pIk>vHSRoBFT$ecx*r5{dKyB-AS9& zLpusJ9cU)c^ruDZp>@e;Fi91CUd#>5MMkVOyb_3JRf`ck<`K{B-kbBBG;D!WtnqGSKN>xV#6D<>BO+m%7gtV#U6XEB=*V)2MWPl#KqilUEsn&Jg5LJyJ zixN?rkqI-p)Q$4;I)(Q(f+wNPea<{YsR~S}+dR5R(FeBYd|zGt(Mz=|qqbRC=Y^&n zJVYIkBwtzNj3rE8O@!HE?13e?b?xe!%1}`X-xv+T^lr;hUC248>djJ}WWu}JU~3*E zTD(sX>u?JAPNbDxA3*R%;XPLc^u;r%H<-&z=NoZ`U)OI9^rMMJqET%7ID0xRzRmh1LU>Qm0Y_m$}=9zq{6o&*Q7FHSv4b0Z#@>x=+9G)-CTqzER|7 z-v;BwvlF2kDBPX7)U2^xl^|&GKHxx32{8AzOYG^5+QnGRx~1EYhm6VBV2|TV3YpN& z)^B1Ni&J-!9|NSwVE}l0n^FIEL5~$-ZB`s|7F?sp; zYcW)X6n6GI!ADgumBBYt%ZY1~6utMQY%Jqccy;4xqo(mO~ai_R>I{j`c<8l`>S7fa0MN$@4GcZe`5Uq%9 zN6BT4zh9NZ@lJg(J69k+bsd&QvRJ=_1~%}xvCx3~Hr&cctQWi!EbX^kA%qh(t)UQ9 zWaGrv(ArA8dym!9_(kVcr(bgQ{Xkz$XP`BYXBj^3>?M6%ZJ5Q}RC4iEC@pbs2Bs~# zY(vGwRoB5NwQyr=bp>$trA*lw0l!W$emm81#a-xT%&iI25XCJS!HB#}@X=i|N$?SH ztkP^LcT_a6Q)_=UD=0r!kf2HilD8 zA4vc$UZ{h1q3lpvcOh9zn-y6zw&SpG4{%I7(4Z{-6f-IQ;bZxMmEJvLE|piL=aUO8 z|I1eMyXXI}w;F>3?MZ^i&=dy$qfWDSP}bA8{EJme8(FaKBu0lmQX`qCsY?zwD_x|O zc534TfYpg07OX6f*Ata}v{(&az(78E*m!Swp=Pt+3sJpI)B46Z6{kW$)3fyQVyagk zgd@6B=ddl?4Gg&SsBej5yOyBMsn0El~UbmA5Zn92glVAcTEYe*YVl5^p9d~XtQ%ie<9@_y=0VjK$si6sa z^g3Fjg-=jdMWB@@k8cM;P2PF{xZ>1Nl!0@{gUMM7{Y(4@Vkts@Tcz`e!<^9zJ~Y<{_8Eo zFYtd-DnCftzsHZ|TR!$T|F)RFV1FO^@2BBY`0p0|a~*;&{#eKFLjLPIo}P2R6xseM$@r1*`}frQ-!%Etw`>%D bX!3`Ye^i0|EA=n|n1J_>75%~^008)Z2)}8( literal 0 HcmV?d00001