From 4f2156f75aea130bf4acc5441b8083367c2960ff Mon Sep 17 00:00:00 2001 From: doomedraven Date: Mon, 8 Jun 2026 10:37:42 +0200 Subject: [PATCH 1/2] Hunting (#3065) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add Threat Discovery 'Hunt' feature and UI Introduce a new Threat Discovery / Hunting feature: add default web.conf settings and HUNT_ENABLED flag in Django settings, register /hunt/ and /hunt/tag/ routes, and implement hunt view that runs MongoDB aggregations, noise whitelisting and facet filtering. Add tag_tasks POST endpoint to apply tags to SQL Task entries. Include a new analysis/hunt.html template and conditional header navigation link. Add tests for hunt views and tagging behavior. * Make hunt categories toggleable and dynamic facets Add UI toggles for hunt target categories and make the server-side MongoDB aggregation pipeline build $facet stages dynamically based on the selected categories. The hunt view now detects whether the form was submitted and derives a categories map (domains, ips, mutexes, files, commands, registry keys, and various hash types) that is passed to the template. Facet stages are only appended to the pipeline for enabled categories and aggregation is skipped when no facets are requested to save DB work. The hunt template was updated to render category switches and conditionally display panels. A test was added to verify that category filtering constructs the expected facets and that untoggled panels are not rendered. * Update views.py * fixes * Update settings.py * Update test_hunt_views.py * Update test_hunt_views.py * Update test_hunt_views.py * Update views.py * Add imphashes, http_uris, signatures to hunt view Expose three new hunt facets (PE import hashes, HTTP URIs, and behavioral signatures). Adds category toggles, facet pipeline stages and post-query cleaning/filtering (including a new is_valid_md5 check that rejects empty MD5 and wrong lengths). Updates template to render toggles and three new panels (with layout tweak: text-truncate on toggles container). Tests updated to cover rendering and filtering of valid/invalid imphash, HTTP URI, and signature entries. * Add dynamic hunting config and validators Introduce a modular hunting configuration and central validators. Added conf/default/hunt.json as the default category registry, and lib/cuckoo/common/hunting.py which centralizes validation functions (domains, IPs, files, hashes, mutexes, registry keys, commands), noisy denylists, and a load_hunt_map() loader with mtime caching and min_count substitution. Refactored web/analysis/views.py hunt() to hot-load the HUNT_MAP, build aggregation facets dynamically, apply validators from the map, and render clearer error pages when hunt.json is missing or invalid. Updated the hunt template to generate category switches, cards and external pivots from hunt_map. Tests updated to cover missing/invalid hunt.json behavior. * fixes * Update test_hunt_views.py * Add Threat Discovery & Hunting dashboard changelog Add 08.06.2026 changelog entry for the Threat Discovery & Hunting Workstation Dashboard: documents centralized dynamic multi-faceted DB clustering across 12 categories, a cascading JSON config cacher (conf/hunt.json) with hierarchical lookup (custom/conf → conf → conf/default), mtime-based high-performance caching for instant reloads, integrated OSINT pivot links (VirusTotal, Shodan, Censys, MalwareBazaar, AlienVault OTX) with sanitized AJAX task tagging, and comprehensive unit tests covering views, error handling, and security. * Update views.py --- changelog.md | 8 + conf/default/hunt.json | 143 ++++++++++++++++ conf/default/web.conf.default | 4 + lib/cuckoo/common/hunting.py | 221 +++++++++++++++++++++++++ tests/web/test_hunt_views.py | 273 +++++++++++++++++++++++++++++++ web/analysis/urls.py | 2 + web/analysis/views.py | 158 ++++++++++++++++++ web/templates/analysis/hunt.html | 158 ++++++++++++++++++ web/templates/header.html | 3 + web/web/settings.py | 4 +- 10 files changed, 973 insertions(+), 1 deletion(-) create mode 100644 conf/default/hunt.json create mode 100644 lib/cuckoo/common/hunting.py create mode 100644 tests/web/test_hunt_views.py create mode 100644 web/templates/analysis/hunt.html diff --git a/changelog.md b/changelog.md index a2e6a1cf040..d2ef732802d 100644 --- a/changelog.md +++ b/changelog.md @@ -1,3 +1,11 @@ +### [08.06.2026] +* Threat Discovery & Hunting Workstation Dashboard: + * Integrated centralized dynamic multi-faceted database clustering across 12 categories (Domains, IPs, Mutexes, Dropped Files, Commands, Registry Keys, Hashes, ImpHashes, and Signatures). + * Created dynamic, cascading, auto-reloaded JSON configuration cacher (`conf/hunt.json`) with hierarchical lookup order (`custom/conf` ➔ `conf` ➔ `conf/default`). + * Built high-performance, memory-speed caching system utilizing OS modification-time (`mtime`) checks for instant reloading without disk parsing or server restarts. + * Integrated inline threat intelligence OSINT pivoting links (VirusTotal, Shodan, Censys, MalwareBazaar, and AlienVault OTX) and transaction-safe, sanitized AJAX-based task tagging group actions. + * Added comprehensive unit testing covering all view states, error handling, and security measures. + ### [05.06.2026] * Monitor updates: * NtCreateUserProcess hook: Dynamically patch ping commandline to thwart ping delays (e.g. Formbook/Xloader) diff --git a/conf/default/hunt.json b/conf/default/hunt.json new file mode 100644 index 00000000000..051d141b5ed --- /dev/null +++ b/conf/default/hunt.json @@ -0,0 +1,143 @@ +{ + "domains": { + "title": "Top Shared Domains", + "icon": "fas fa-globe text-info", + "badge_color": "info", + "form_key": "cat_domains", + "db_unwind": "$network.domains", + "db_group": "$network.domains.domain", + "validator": "is_valid_domain", + "pivots": [ + {"name": "VirusTotal", "icon": "fas fa-external-link-alt", "url": "https://www.virustotal.com/gui/domain/"}, + {"name": "AlienVault OTX", "icon": "fas fa-shield-alt", "url": "https://otx.alienvault.com/indicator/domain/"} + ] + }, + "ips": { + "title": "Top Shared IPs", + "icon": "fas fa-network-wired text-info", + "badge_color": "info", + "form_key": "cat_ips", + "db_unwind": "$network.hosts", + "db_group": "$network.hosts.ip", + "validator": "is_valid_ip", + "pivots": [ + {"name": "VirusTotal", "icon": "fas fa-external-link-alt", "url": "https://www.virustotal.com/gui/ip-address/"}, + {"name": "Shodan", "icon": "fas fa-search", "url": "https://www.shodan.io/host/"} + ] + }, + "mutexes": { + "title": "Top Shared Mutexes", + "icon": "fas fa-lock text-warning", + "badge_color": "warning", + "form_key": "cat_mutexes", + "db_unwind": "$behavior.summary.mutexes", + "db_group": "$behavior.summary.mutexes", + "validator": "is_valid_mutex", + "pivots": [] + }, + "dropped_files": { + "title": "Shared Dropped Files", + "icon": "fas fa-file-alt text-success", + "badge_color": "success", + "form_key": "cat_files", + "db_unwind": "$behavior.summary.files", + "db_group": "$behavior.summary.files", + "validator": "is_valid_file", + "pivots": [] + }, + "executed_commands": { + "title": "Shared Executed Commands", + "icon": "fas fa-terminal text-success", + "badge_color": "success", + "form_key": "cat_commands", + "db_unwind": "$behavior.summary.executed_commands", + "db_group": "$behavior.summary.executed_commands", + "validator": "is_valid_command", + "pivots": [] + }, + "registry_keys": { + "title": "Shared Registry Keys", + "icon": "fas fa-key text-danger", + "badge_color": "danger", + "form_key": "cat_registry", + "db_unwind": "$behavior.summary.keys", + "db_group": "$behavior.summary.keys", + "validator": "is_valid_registry", + "pivots": [] + }, + "dropped_hashes": { + "title": "Top Dropped File Hashes", + "icon": "fas fa-hashtag text-white", + "badge_color": "light", + "form_key": "cat_dropped_hashes", + "db_unwind": "$dropped", + "db_group": "$dropped.sha256", + "validator": "is_valid_hash", + "pivots": [ + {"name": "VirusTotal", "icon": "fas fa-external-link-alt", "url": "https://www.virustotal.com/gui/file/"}, + {"name": "MalwareBazaar", "icon": "fas fa-database", "url": "https://bazaar.abuse.ch/sample/"} + ] + }, + "procdump_hashes": { + "title": "Unpacked Memory Hashes", + "icon": "fas fa-memory text-white", + "badge_color": "light", + "form_key": "cat_procdump_hashes", + "db_unwind": "$procdump", + "db_group": "$procdump.sha256", + "validator": "is_valid_hash", + "pivots": [ + {"name": "VirusTotal", "icon": "fas fa-external-link-alt", "url": "https://www.virustotal.com/gui/file/"}, + {"name": "MalwareBazaar", "icon": "fas fa-database", "url": "https://bazaar.abuse.ch/sample/"} + ] + }, + "extracted_hashes": { + "title": "Extracted Payload Hashes", + "icon": "fas fa-file-medical text-white", + "badge_color": "light", + "form_key": "cat_extracted_hashes", + "db_unwind": "$extracted", + "db_group": "$extracted.sha256", + "validator": "is_valid_hash", + "pivots": [ + {"name": "VirusTotal", "icon": "fas fa-external-link-alt", "url": "https://www.virustotal.com/gui/file/"}, + {"name": "MalwareBazaar", "icon": "fas fa-database", "url": "https://bazaar.abuse.ch/sample/"} + ] + }, + "imphashes": { + "title": "PE Import Hashes (ImpHashes)", + "icon": "fas fa-file-invoice text-white", + "badge_color": "light", + "form_key": "cat_imphashes", + "db_unwind": null, + "db_group": "$static.pe.imphash", + "validator": "is_valid_md5", + "pivots": [ + {"name": "VirusTotal", "icon": "fas fa-external-link-alt", "url": "https://www.virustotal.com/gui/search/"}, + {"name": "MalwareBazaar", "icon": "fas fa-database", "url": "https://bazaar.abuse.ch/browse.php?search=imphash%3A"} + ], + "db_match": {"_id": {"$ne": null}} + }, + "http_uris": { + "title": "Shared HTTP Request URIs", + "icon": "fas fa-link text-info", + "badge_color": "info", + "form_key": "cat_http_uris", + "db_unwind": "$network.http", + "db_group": "$network.http.uri", + "validator": "is_valid_string", + "pivots": [ + {"name": "VirusTotal", "icon": "fas fa-external-link-alt", "url": "https://www.virustotal.com/gui/search/"} + ] + }, + "signatures": { + "title": "Shared Signatures", + "icon": "fas fa-signature text-warning", + "badge_color": "warning", + "form_key": "cat_signatures", + "db_unwind": "$signatures", + "db_group": "$signatures.name", + "validator": "is_valid_string", + "pivots": [] + } +} \ No newline at end of file diff --git a/conf/default/web.conf.default b/conf/default/web.conf.default index fe311627857..f829ca02eb7 100644 --- a/conf/default/web.conf.default +++ b/conf/default/web.conf.default @@ -274,3 +274,7 @@ enabled = no [display_authenticode] # Show Authenticode certificate chain card on the analysis overview tab enabled = no + +[hunt] +# Proactively discover new emerging campaigns by grouping undetected analyses by shared indicators +enabled = no diff --git a/lib/cuckoo/common/hunting.py b/lib/cuckoo/common/hunting.py new file mode 100644 index 00000000000..f92666cb0c0 --- /dev/null +++ b/lib/cuckoo/common/hunting.py @@ -0,0 +1,221 @@ +import ipaddress +import logging +import os +import re +import json + +from data.safelist.domains import domain_passlist, domain_passlist_re +from data.safelist.replacepatterns import FILES_DENYLIST, FILES_ENDING_DENYLIST, MUTEX_DENYLIST + +log = logging.getLogger(__name__) + +# Resolve CUCKOO_ROOT +_current_dir = os.path.abspath(os.path.dirname(__file__)) +CUCKOO_ROOT = os.path.normpath(os.path.join(_current_dir, "..", "..", "..")) + +# Precompile regex list once at the module level for maximum performance +compiled_passlist_re = [] +for safe_re in domain_passlist_re: + try: + if isinstance(safe_re, str): + compiled_passlist_re.append(re.compile(safe_re, re.IGNORECASE)) + elif hasattr(safe_re, "match"): + compiled_passlist_re.append(safe_re) + except Exception: + pass + + +# Define module-level validation filters +def is_valid_domain(domain): + if not domain or not isinstance(domain, str): + return False + domain_lower = domain.lower() + for safe in domain_passlist: + if domain_lower == safe or domain_lower.endswith("." + safe): + return False + for regex in compiled_passlist_re: + try: + if regex.match(domain_lower): + return False + except Exception: + pass + return True + + +def is_valid_ip(ip): + if not ip or not isinstance(ip, str): + return False + try: + ip_obj = ipaddress.ip_address(ip) + if ip_obj.is_private or ip_obj.is_loopback or ip_obj.is_multicast or ip_obj.is_link_local: + return False + if ip in ("8.8.8.8", "8.8.4.4", "1.1.1.1", "9.9.9.9", "208.67.222.222", "208.67.220.220"): + return False + except ValueError: + return False + return True + + +def is_valid_file(file_path): + if not file_path or not isinstance(file_path, str): + return False + file_path_lower = file_path.lower() + for item in FILES_DENYLIST: + if item.lower() in file_path_lower: + return False + for item in FILES_ENDING_DENYLIST: + if file_path_lower.endswith(item.lower()): + return False + return True + + +def is_valid_hash(h): + if not h or not isinstance(h, str): + return False + if h in ("d41d8cd98f00b204e9800998ecf8427e", "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"): + return False + if len(h) != 64: + return False + return True + + +def is_valid_md5(h): + if not h or not isinstance(h, str): + return False + if h == "d41d8cd98f00b204e9800998ecf8427e": + return False + if len(h) != 32: + return False + return True + + +# Common system mutexes that generate noise +noisy_mutexes = [ + "Local\\ZoneBaseMutex", "CTF.Asm.Mutex", "Global\\Access_Registry_Mutex", + "Local\\__wf_mut__", "cuckoo_mutex", "Local\\_Global_", "Local\\MS-LanguageProfile" +] +def is_valid_mutex(mutex): + if not mutex or not isinstance(mutex, str): + return False + mutex_lower = mutex.lower() + for m in MUTEX_DENYLIST: + if m.lower() in mutex_lower: + return False + for m in noisy_mutexes: + if m.lower() in mutex_lower: + return False + return True + + +noisy_registry_substrings = [ + "Controlset001\\Control\\Lsa", + "Cryptography\\Providers", + "System\\CurrentControlSet\\Control\\Nls", + "SOFTWARE\\Microsoft\\Windows NT\\CurrentVersion\\Font", + "SOFTWARE\\Microsoft\\CTF\\", + "SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Explorer\\MountPoints2" +] +def is_valid_registry(key): + if not key or not isinstance(key, str): + return False + key_lower = key.lower() + for sub in noisy_registry_substrings: + if sub.lower() in key_lower: + return False + return True + + +noisy_command_substrings = [ + "chcp", "reg query", "sc query", "net start", "tasklist" +] +def is_valid_command(cmd): + if not cmd or not isinstance(cmd, str): + return False + cmd_lower = cmd.lower() + for sub in noisy_command_substrings: + if sub.lower() in cmd_lower: + return False + return True + + +VALIDATORS = { + "is_valid_domain": is_valid_domain, + "is_valid_ip": is_valid_ip, + "is_valid_mutex": is_valid_mutex, + "is_valid_file": is_valid_file, + "is_valid_command": is_valid_command, + "is_valid_registry": is_valid_registry, + "is_valid_hash": is_valid_hash, + "is_valid_md5": is_valid_md5, + "is_valid_string": lambda x: isinstance(x, str) and bool(x), +} + +# Module level caching for Hunt Configuration +_CACHED_HUNT_MAP = None +_CACHED_HUNT_MTIME = None +_CACHED_HUNT_PATH = None + + +def load_hunt_map(min_count: int = 3): + """ + Dynamically loads the hunting configuration from a hierarchical search of paths. + Lookup order (reverse mode, most specific to least specific): + 1. custom/conf/hunt.json + 2. conf/hunt.json + 3. conf/default/hunt.json (fallback defaults) + + Utilizes system mtime caching on the resolved path to achieve zero disk reads when unmodified. + Returns (HUNT_MAP, VALIDATORS) tuple, or (None, error_reason) on error. + """ + global _CACHED_HUNT_MAP, _CACHED_HUNT_MTIME, _CACHED_HUNT_PATH + + lookup_paths = [ + os.path.normpath(os.path.join(CUCKOO_ROOT, "custom", "conf", "hunt.json")), + os.path.normpath(os.path.join(CUCKOO_ROOT, "conf", "hunt.json")), + os.path.normpath(os.path.join(CUCKOO_ROOT, "conf", "default", "hunt.json")) + ] + + has_invalid_syntax = False + + for path in lookup_paths: + if os.path.exists(path): + try: + current_mtime = os.path.getmtime(path) + + # Cache Hit: If cached configuration matches this path and modification time, return instantly! + if _CACHED_HUNT_MAP is not None and _CACHED_HUNT_PATH == path and _CACHED_HUNT_MTIME == current_mtime: + return _CACHED_HUNT_MAP, VALIDATORS + + # Cache Miss: Parse the JSON file + with open(path, "r") as f: + raw_map = json.load(f) + if raw_map and isinstance(raw_map, dict): + temp_map = {} + for cat_id, cat_config in raw_map.items(): + val_func_name = cat_config.get("validator", "is_valid_string") + cat_config["validator"] = VALIDATORS.get(val_func_name, lambda x: isinstance(x, str) and bool(x)) + + # Dynamically replace min_count placeholders inside the custom db_match if present + if "db_match" in cat_config: + if "count" in cat_config["db_match"] and "$gte" in cat_config["db_match"]["count"]: + cat_config["db_match"]["count"]["$gte"] = min_count + + temp_map[cat_id] = cat_config + + # Save to cache + _CACHED_HUNT_MAP = temp_map + _CACHED_HUNT_MTIME = current_mtime + _CACHED_HUNT_PATH = path + + return _CACHED_HUNT_MAP, VALIDATORS + except Exception as e: + # Log detailed traceback of corrupted file, but proceed to fallback paths + log.exception("Failed to load hunting configuration from %s: %s", path, e) + has_invalid_syntax = True + + # If no configuration file could be loaded successfully + if has_invalid_syntax: + return None, "invalid" + else: + log.error("All hunting configuration lookup paths are missing: %s", lookup_paths) + return None, "missing" diff --git a/tests/web/test_hunt_views.py b/tests/web/test_hunt_views.py new file mode 100644 index 00000000000..f22108aaf0b --- /dev/null +++ b/tests/web/test_hunt_views.py @@ -0,0 +1,273 @@ +from unittest.mock import MagicMock, patch +import json +import pytest +from django.conf import settings +from django.test import SimpleTestCase +from analysis.views import enabledconf + + +@pytest.mark.usefixtures("db") +class TestHuntViews(SimpleTestCase): + def setUp(self): + self.original_mongodb_enabled = enabledconf["mongodb"] + self.original_hunt_enabled = getattr(settings, "HUNT_ENABLED", False) + self.original_web_auth = getattr(settings, "WEB_AUTHENTICATION", False) + settings.HUNT_ENABLED = True + settings.WEB_AUTHENTICATION = False + + def tearDown(self): + enabledconf["mongodb"] = self.original_mongodb_enabled + settings.HUNT_ENABLED = self.original_hunt_enabled + settings.WEB_AUTHENTICATION = self.original_web_auth + + def test_hunt_page_requires_enabled_setting(self): + """If HUNT_ENABLED is set to False in settings (via web.conf), the page should render an error.""" + settings.HUNT_ENABLED = False + response = self.client.get("/analysis/hunt/") + self.assertEqual(response.status_code, 200) + self.assertIn("The Hunt/Threat Discovery feature is disabled in web.conf", response.content.decode()) + + def test_hunt_page_requires_mongodb(self): + """If MongoDB is disabled, the hunt page should render an error.""" + enabledconf["mongodb"] = False + response = self.client.get("/analysis/hunt/") + self.assertEqual(response.status_code, 200) + self.assertIn("MongoDB is required", response.content.decode()) + + def test_hunt_page_prevents_global_all_time_hunt(self): + """If filename_prefix is blank and days_back is set to 0 (All Time), render a database performance safeguard error.""" + enabledconf["mongodb"] = True + response = self.client.get("/analysis/hunt/?filename_prefix=&days_back=0") + self.assertEqual(response.status_code, 200) + self.assertIn("An all-time global hunt with no filename prefix is not allowed", response.content.decode()) + + @patch("lib.cuckoo.common.hunting.os.path.exists") + def test_hunt_page_error_when_config_missing(self, mock_exists): + """If hunt.json is missing from disk, render a clean error page and block execution.""" + mock_exists.return_value = False + enabledconf["mongodb"] = True + response = self.client.get("/analysis/hunt/") + self.assertEqual(response.status_code, 200) + self.assertIn("The hunt.json configuration file is missing", response.content.decode()) + + @patch("lib.cuckoo.common.hunting.os.path.getmtime") + @patch("lib.cuckoo.common.hunting.os.path.exists") + @patch("lib.cuckoo.common.hunting.open") + def test_hunt_page_error_when_config_invalid(self, mock_open, mock_exists, mock_getmtime): + """If hunt.json contains invalid syntax, log detailed tracebacks internally and render a secure error page.""" + mock_exists.return_value = True + mock_getmtime.return_value = 12345678.0 + mock_open.side_effect = ValueError("Invalid JSON syntax") + enabledconf["mongodb"] = True + response = self.client.get("/analysis/hunt/") + self.assertEqual(response.status_code, 200) + self.assertIn("The hunt.json configuration file is invalid", response.content.decode()) + + @patch("analysis.views.mongo_aggregate", create=True) + def test_hunt_page_success_renders_template(self, mock_mongo_aggregate): + """The hunt page should render facets correctly after whitelisting system noise.""" + enabledconf["mongodb"] = True + + # Mock MongoDB returning aggregations with noise and signal + mock_mongo_aggregate.return_value = [{ + "domains": [ + {"_id": "malicious-c2.com", "count": 5, "task_ids": {101, 102}}, + {"_id": "crl.microsoft.com", "count": 20, "task_ids": {101, 102}} # Should be whitelisted out + ], + "ips": [ + {"_id": "185.190.140.1", "count": 4, "task_ids": {101, 102}}, + {"_id": "127.0.0.1", "count": 10, "task_ids": {101}} # Private IP, should be whitelisted out + ], + "mutexes": [ + {"_id": "EvilCampaignMutex", "count": 3, "task_ids": {101, 102}}, + {"_id": "Local\\ZoneBaseMutex", "count": 15, "task_ids": {101}} # Whitelisted out + ], + "dropped_files": [ + {"_id": "C:\\Windows\\Temp\\payload.exe", "count": 3, "task_ids": {101, 102}}, + {"_id": "Device\\KsecDD", "count": 10, "task_ids": {101}} # Whitelisted out + ], + "executed_commands": [ + {"_id": "powershell.exe -enc BADCODE", "count": 4, "task_ids": {101, 102}}, + {"_id": "chcp", "count": 12, "task_ids": {101}} # Whitelisted out + ], + "registry_keys": [ + {"_id": "HKCU\\Software\\EvilKey", "count": 3, "task_ids": {101, 102}}, + {"_id": "HKLM\\SOFTWARE\\Microsoft\\CTF\\", "count": 14, "task_ids": {101}} # Whitelisted out + ], + "dropped_hashes": [ + {"_id": "a" * 64, "count": 4, "task_ids": {101, 102}}, + {"_id": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", "count": 12, "task_ids": {101}} # Blank hash, should be filtered + ], + "procdump_hashes": [ + {"_id": "b" * 64, "count": 3, "task_ids": {101, 102}}, + {"_id": "invalid_hash_len", "count": 10, "task_ids": {101}} # Invalid hash length, filtered + ], + "extracted_hashes": [ + {"_id": "c" * 64, "count": 5, "task_ids": {101, 102}} + ], + "imphashes": [ + {"_id": "d" * 32, "count": 4, "task_ids": {101, 102}}, + {"_id": "invalid_imphash_len", "count": 15, "task_ids": {101}} # Invalid imphash length, filtered + ], + "http_uris": [ + {"_id": "/api/v1/beacon.php", "count": 6, "task_ids": {101, 102}} + ], + "signatures": [ + {"_id": "has_pogo_autorun", "count": 8, "task_ids": {101, 102}} + ] + }] + + response = self.client.get("/analysis/hunt/?filename_prefix=downloaded_by_&min_count=2&days_back=14") + self.assertEqual(response.status_code, 200) + + html_content = response.content.decode() + + # Check that title / elements are present + self.assertIn("Threat Discovery & Hunting", html_content) + + # Check signal values are rendered + self.assertIn("malicious-c2.com", html_content) + self.assertIn("185.190.140.1", html_content) + self.assertIn("EvilCampaignMutex", html_content) + self.assertIn("payload.exe", html_content) + self.assertIn("powershell.exe -enc BADCODE", html_content) + self.assertIn("HKCU\\Software\\EvilKey", html_content) + self.assertIn("a" * 64, html_content) + self.assertIn("b" * 64, html_content) + self.assertIn("c" * 64, html_content) + self.assertIn("d" * 32, html_content) + self.assertIn("/api/v1/beacon.php", html_content) + self.assertIn("has_pogo_autorun", html_content) + + # Ensure whitelisted / noise items are successfully filtered out and not rendered + self.assertNotIn("crl.microsoft.com", html_content) + self.assertNotIn("127.0.0.1", html_content) + self.assertNotIn("ZoneBaseMutex", html_content) + self.assertNotIn("Device\\KsecDD", html_content) + self.assertNotIn("HKLM\\SOFTWARE\\Microsoft\\CTF\\", html_content) + self.assertNotIn("e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", html_content) + self.assertNotIn("invalid_hash_len", html_content) + self.assertNotIn("invalid_imphash_len", html_content) + + # Verify query parameters are passed back to forms + self.assertIn('value="downloaded_by_"', html_content) + self.assertIn('value="2"', html_content) + # Select box selection option checked + self.assertIn('', html_content) + + @patch("analysis.views.mongo_aggregate", create=True) + def test_hunt_page_blank_prefix_works(self, mock_mongo_aggregate): + """When filename_prefix is left blank, the match query should skip the target.file.name check to allow global hunting.""" + enabledconf["mongodb"] = True + mock_mongo_aggregate.return_value = [{}] + + response = self.client.get("/analysis/hunt/?filename_prefix=&min_count=2&days_back=7") + self.assertEqual(response.status_code, 200) + + # Ensure mongo_aggregate was called + self.assertTrue(mock_mongo_aggregate.called) + called_pipeline = mock_mongo_aggregate.call_args[0][1] + + # Extract the $match stage from the pipeline + match_stage = called_pipeline[0]["$match"] + + # Assert that 'target.file.name' was omitted from the match query + self.assertNotIn("target.file.name", match_stage) + self.assertIn("malfamily", match_stage) + self.assertIn("detections", match_stage) + self.assertIn("info.started", match_stage) + self.assertEqual(match_stage["detections"], {"$exists": False}) + + @patch("analysis.views.mongo_aggregate", create=True) + def test_hunt_page_ignore_detections_toggle_works(self, mock_mongo_aggregate): + """When ignore_detections is toggled ON, the query should completely skip malfamily and detections filters.""" + enabledconf["mongodb"] = True + mock_mongo_aggregate.return_value = [{}] + + # Send ignore_detections=on (standard GET form checkbox format) + response = self.client.get("/analysis/hunt/?filename_prefix=downloaded_by_&min_count=2&days_back=7&ignore_detections=on") + self.assertEqual(response.status_code, 200) + + # Ensure mongo_aggregate was called + self.assertTrue(mock_mongo_aggregate.called) + called_pipeline = mock_mongo_aggregate.call_args[0][1] + + # Extract the $match stage from the pipeline + match_stage = called_pipeline[0]["$match"] + + # Assert that 'malfamily' and 'detections' were skipped + self.assertNotIn("malfamily", match_stage) + self.assertNotIn("detections", match_stage) + self.assertIn("target.file.name", match_stage) + + @patch("analysis.views.db.session.get") + @patch("analysis.views.db.session.commit") + def test_tag_tasks_endpoint_works(self, mock_commit, mock_get): + """The tag_tasks API should properly add and append custom tags to SQL Task entries.""" + # Use side_effect to return distinct Task mocks for each call, avoiding mock mutation reuse + def mock_get_task(model, tid): + task = MagicMock() + task.tags_tasks = "existing_tag" + return task + mock_get.side_effect = mock_get_task + + payload = {"task_ids": [101, 102], "tag": "New_Campaign"} + response = self.client.post( + "/analysis/hunt/tag/", + data=json.dumps(payload), + content_type="application/json" + ) + self.assertEqual(response.status_code, 200) + + # Verify returned JSON response + data = response.json() + self.assertEqual(data["status"], "success") + self.assertEqual(data["tag"], "New_Campaign") + self.assertEqual(data["updated_count"], 2) + + # Verify commit happened + self.assertTrue(mock_commit.called) + + @patch("analysis.views.mongo_aggregate", create=True) + def test_hunt_page_category_filtering_works(self, mock_mongo_aggregate): + """The hunt page should dynamically construct pipeline facets based on untoggled category checkboxes.""" + enabledconf["mongodb"] = True + mock_mongo_aggregate.return_value = [{}] + + # Submit form with only cat_domains and cat_ips enabled (others default to false when form submitted) + response = self.client.get( + "/analysis/hunt/?filename_prefix=downloaded_by_&min_count=2&days_back=7&cat_domains=on&cat_ips=on" + ) + self.assertEqual(response.status_code, 200) + + # Verify mongo_aggregate was called with only those 2 facets + self.assertTrue(mock_mongo_aggregate.called) + called_pipeline = mock_mongo_aggregate.call_args[0][1] + + # Extract $facet stage + facet_stage = called_pipeline[1]["$facet"] + self.assertIn("domains", facet_stage) + self.assertIn("ips", facet_stage) + + # Assert other facets were excluded to save database performance + self.assertNotIn("mutexes", facet_stage) + self.assertNotIn("dropped_files", facet_stage) + self.assertNotIn("executed_commands", facet_stage) + self.assertNotIn("registry_keys", facet_stage) + self.assertNotIn("dropped_hashes", facet_stage) + self.assertNotIn("procdump_hashes", facet_stage) + self.assertNotIn("extracted_hashes", facet_stage) + self.assertNotIn("imphashes", facet_stage) + self.assertNotIn("http_uris", facet_stage) + self.assertNotIn("signatures", facet_stage) + + # Verify template did not render untoggled panels + html_content = response.content.decode() + self.assertIn("Top Shared Domains", html_content) + self.assertIn("Top Shared IPs", html_content) + self.assertNotIn("Top Shared Mutexes", html_content) + self.assertNotIn("Shared Registry Keys", html_content) + self.assertNotIn("Unpacked Memory Hashes", html_content) + self.assertNotIn("PE Import Hashes", html_content) + self.assertNotIn("Shared HTTP Request URIs", html_content) + self.assertNotIn("Shared Signatures", html_content) diff --git a/web/analysis/urls.py b/web/analysis/urls.py index c0c9efc137f..6be5f182e48 100644 --- a/web/analysis/urls.py +++ b/web/analysis/urls.py @@ -30,6 +30,8 @@ re_path(r"^search/(?P\d+)/$", views.search_behavior, name="search_behavior"), re_path(r"^search/(?P[\w\d\s:\-_\.]+)/$", views.search, name="search"), re_path(r"^search/$", views.search, name="search"), + re_path(r"^hunt/$", views.hunt, name="hunt"), + re_path(r"^hunt/tag/$", views.tag_tasks, name="tag_tasks"), re_path(r"^pending/$", views.pending, name="pending"), re_path(r"^ban_user_tasks/(?P[\d]+)/$", views.ban_all_user_tasks, name="ban_all_user_tasks"), re_path(r"^ban_user/(?P[\d]+)/$", views.ban_user, name="ban_user"), diff --git a/web/analysis/views.py b/web/analysis/views.py index 28e599589da..d16e2e5870c 100644 --- a/web/analysis/views.py +++ b/web/analysis/views.py @@ -7,6 +7,7 @@ import datetime import json import os +import re import subprocess import sys import tempfile @@ -18,6 +19,7 @@ from urllib.parse import quote from wsgiref.util import FileWrapper + from django.conf import settings from django.contrib.auth.decorators import login_required from django.core.exceptions import BadRequest, PermissionDenied @@ -48,6 +50,7 @@ from lib.cuckoo.core.database import Database, TasksMixIn from lib.cuckoo.core.data.task import TASK_PENDING, Task from modules.reporting.report_doc import CHUNK_CALL_SIZE +from lib.cuckoo.common.hunting import load_hunt_map try: from django_ratelimit.decorators import ratelimit @@ -4129,3 +4132,158 @@ def failed_processing(request, task_id): "process_log": log_content, "settings": settings, }) + + +@require_safe +@conditional_login_required(login_required, settings.WEB_AUTHENTICATION) +def hunt(request): + if not settings.HUNT_ENABLED: + return render(request, "error.html", {"error": "The Hunt/Threat Discovery feature is disabled in web.conf."}) + + if not enabledconf["mongodb"]: + return render(request, "error.html", {"error": "MongoDB is required for the Hunt/Threat Discovery feature."}) + + filename_prefix = request.GET.get("filename_prefix", "downloaded_by_") + min_count = request.GET.get("min_count", "3") + days_back = request.GET.get("days_back", "14") + ignore_detections = request.GET.get("ignore_detections") == "on" + try: + min_count = int(min_count) + except ValueError: + min_count = 3 + try: + days_back = int(days_back) + except ValueError: + days_back = 14 + + # Hot-reload HUNT_MAP with modular, high-performance system mtime caching + HUNT_MAP, VALIDATORS = load_hunt_map(min_count) + if HUNT_MAP is None: + if VALIDATORS == "missing": + return render(request, "error.html", {"error": "The hunt.json configuration file is missing. Please contact your system administrator."}) + else: + return render(request, "error.html", {"error": "The hunt.json configuration file is invalid. Please check system logs."}) + + # Evaluate dynamic categories based on HUNT_MAP definitions + has_category_filter = any(key.startswith("cat_") for key in request.GET) + categories = {} + for cat_id, cat_config in HUNT_MAP.items(): + fkey = cat_config["form_key"] + categories[cat_id] = True if not has_category_filter else (request.GET.get(fkey) == "on") + + # Clean prefix to avoid double caret and force strict case-sensitive Prefix Match. + # MongoDB B-Tree indexes are ONLY fully utilized by regex if it is anchored at the start (^) + # and case-sensitive (no $options: "i"). + clean_prefix = re.escape(filename_prefix.lstrip("^").strip()) + + # Database Safeguard: Prevent global all-time hunts to avoid database timeouts + if not clean_prefix and days_back == 0: + return render(request, "error.html", {"error": "An all-time global hunt with no filename prefix is not allowed due to performance risks."}) + + # Build match query with optional date filters for performance + match_query = {} + + if not ignore_detections: + match_query["malfamily"] = {"$exists": False} + match_query["detections"] = {"$exists": False} + + if clean_prefix: + match_query["target.file.name"] = {"$regex": f"^{clean_prefix}"} + + if days_back > 0: + import datetime + delta = datetime.timedelta(days=days_back) + start_date = (datetime.datetime.utcnow() - delta).strftime("%Y-%m-%d %H:%M:%S") + match_query["info.started"] = {"$gte": start_date} + + # Dynamic multi-category aggregation + facet_stages = {} + for cat_id, cat_config in HUNT_MAP.items(): + if categories[cat_id]: + stages = [] + if cat_config["db_unwind"]: + stages.append({"$unwind": cat_config["db_unwind"]}) + stages.extend([ + {"$group": {"_id": cat_config["db_group"], "count": {"$sum": 1}, "task_ids": {"$addToSet": "$info.id"}}}, + {"$match": cat_config.get("db_match", {"count": {"$gte": min_count}})}, + {"$sort": {"count": -1}}, + {"$limit": 100} + ]) + facet_stages[cat_id] = stages + + # MongoDB Pipeline using $facet for multi-category aggregation + pipeline = [ + {"$match": match_query} + ] + if facet_stages: + pipeline.append({"$facet": facet_stages}) + + try: + if facet_stages: + res = list(mongo_aggregate("analysis", pipeline)) + facets = res[0] if res else {} + else: + facets = {} + except Exception as e: + return render(request, "error.html", {"error": f"Threat hunting aggregation failed: {e}"}) + + # Apply noise whitelists and validators dynamically + clean_facets = {} + for cat_id, cat_config in HUNT_MAP.items(): + if categories[cat_id]: + raw_items = facets.get(cat_id, []) + validator_func = cat_config["validator"] + clean_facets[cat_id] = [ + (item["_id"], item["count"], sorted(list(item["task_ids"]))) + for item in raw_items if validator_func(item["_id"]) + ][:15] + + return render(request, "analysis/hunt.html", { + "facets": clean_facets, + "filename_prefix": filename_prefix, + "min_count": min_count, + "days_back": days_back, + "ignore_detections": ignore_detections, + "categories": categories, + "hunt_map": HUNT_MAP, + "settings": settings, + }) + + +@require_POST +@conditional_login_required(login_required, settings.WEB_AUTHENTICATION) +def tag_tasks(request): + try: + data = json.loads(request.body) + except ValueError: + return JsonResponse({"status": "error", "message": "Invalid JSON"}, status=400) + + task_ids = data.get("task_ids", []) + tag = data.get("tag", "").strip() + + if not task_ids or not tag: + return JsonResponse({"status": "error", "message": "Missing task_ids or tag"}, status=400) + + # Sanitize tag string (alphanumeric, underscores, hyphens) + tag = "".join(c for c in tag if c.isalnum() or c in ("_", "-")).strip() + if not tag: + return JsonResponse({"status": "error", "message": "Invalid tag string"}, status=400) + + from lib.cuckoo.core.data.task import Task + updated_count = 0 + try: + for tid in task_ids: + task = db.session.get(Task, int(tid)) + if task: + existing_tags = task.tags_tasks or "" + current_tags = [t.strip() for t in existing_tags.split(",") if t.strip()] + if tag not in current_tags: + current_tags.append(tag) + task.tags_tasks = ",".join(current_tags) + updated_count += 1 + db.session.commit() + except Exception as e: + db.session.rollback() + return JsonResponse({"status": "error", "message": f"Database update failed: {e}"}, status=500) + + return JsonResponse({"status": "success", "updated_count": updated_count, "tag": tag}) diff --git a/web/templates/analysis/hunt.html b/web/templates/analysis/hunt.html new file mode 100644 index 00000000000..5cecdfbd429 --- /dev/null +++ b/web/templates/analysis/hunt.html @@ -0,0 +1,158 @@ +{% extends "base.html" %} +{% block content %} +
+
+
+
+

Threat Discovery & Hunting

+
+
+

+ Proactively discover new emerging campaigns by grouping undetected analyses by shared indicators. + Search specifically by filename prefix to cluster similar samples and uncover shared infrastructure or behavioral signatures. +

+
+
+ +
+ + +
+
+
+ +
+ + +
+
+
+ +
+ + +
+
+
+
+ + +
+
+
+ +
+ + +
+ Target Categories: + {% for cat_id, cat_config in hunt_map.items %} +
+ + +
+ {% endfor %} +
+
+
+
+
+
+ + +
+ {% for cat_id, cat_config in hunt_map.items %} + {% if categories|get_item:cat_id %} +
+
+
+
{{ cat_config.title }}
+ {{ facets|get_item:cat_id|length }} clusters +
+
+ + + + + + + + + + {% if facets|get_item:cat_id %} + {% for item in facets|get_item:cat_id %} + + + + + + {% endfor %} + {% else %} + + {% endif %} + +
Indicator ValueCountTasks
+ {{ item.0 }} + + {% for pivot in cat_config.pivots %} + + {% endfor %} + {{ item.1 }} + {% for tid in item.2 %} + {{ tid }} + {% endfor %} + +
No correlated indicators found.
+
+
+
+ {% endif %} + {% endfor %} +
+ + +{% endblock %} diff --git a/web/templates/header.html b/web/templates/header.html index c14426f61f3..11e7cd48e72 100644 --- a/web/templates/header.html +++ b/web/templates/header.html @@ -14,6 +14,9 @@ + {% if settings.HUNT_ENABLED %} + + {% endif %} diff --git a/web/web/settings.py b/web/web/settings.py index 4700988b975..2f314897a0d 100644 --- a/web/web/settings.py +++ b/web/web/settings.py @@ -36,6 +36,7 @@ REPROCESS_TASKS = web_cfg.general.reprocess_tasks REPROCESS_FAILED_PROCESSING = web_cfg.general.reprocess_failed_processing +HUNT_ENABLED = getattr(web_cfg, "hunt", {}).get("enabled", False) # CSRF TRUSTED ORIGINS # For requests that include the Origin header, Django's CSRF protection # requires that header match the origin present in the Host header. @@ -332,7 +333,8 @@ "NETWORK_PROC_MAP", "REPROCESS_TASKS", "REPROCESS_FAILED_PROCESSING", - "AUDIT_FRAMEWORK" + "AUDIT_FRAMEWORK", + "HUNT_ENABLED" ] EMAIL_BACKEND = "django.core.mail.backends.smtp.EmailBackend" From 7ddaea857674fcf047d792449d711bded94fec98 Mon Sep 17 00:00:00 2001 From: Kevin O'Reilly Date: Mon, 8 Jun 2026 12:14:04 +0100 Subject: [PATCH 2/2] Tweak Vidar detection --- data/yara/CAPE/Vidar.yar | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/yara/CAPE/Vidar.yar b/data/yara/CAPE/Vidar.yar index 4450dcf4e19..942b6791bd4 100644 --- a/data/yara/CAPE/Vidar.yar +++ b/data/yara/CAPE/Vidar.yar @@ -6,7 +6,7 @@ rule Vidar cape_type = "Vidar Payload" packed = "96ab9c389a6a53c54a3ea05d139aaf2d775e8db06f37d027f696828dcc55e2bb" strings: - $code = {4D 85 C0 75 0? [0-16] (41|45) 88 ?? [0-20] (41|45) 38 (08|10) 74 ?? [0-16] (48|49|4C|4D) (63|2B) [0-16] 4? 3B ?? 73} + $code = {4D 85 C0 75 0? [0-16] (41|45) 88 ?? [0-20] (41|45) 38 ?? 74 ?? [0-16] (48|49|4C|4D) (63|2B) [0-16] 4? 3B ?? 73} condition: uint16(0) == 0x5A4D and all of them }