diff --git a/changelog.md b/changelog.md index a2e6a1cf040..d2ef732802d 100644 --- a/changelog.md +++ b/changelog.md @@ -1,3 +1,11 @@ +### [08.06.2026] +* Threat Discovery & Hunting Workstation Dashboard: + * Integrated centralized dynamic multi-faceted database clustering across 12 categories (Domains, IPs, Mutexes, Dropped Files, Commands, Registry Keys, Hashes, ImpHashes, and Signatures). + * Created dynamic, cascading, auto-reloaded JSON configuration cacher (`conf/hunt.json`) with hierarchical lookup order (`custom/conf` ➔ `conf` ➔ `conf/default`). + * Built high-performance, memory-speed caching system utilizing OS modification-time (`mtime`) checks for instant reloading without disk parsing or server restarts. + * Integrated inline threat intelligence OSINT pivoting links (VirusTotal, Shodan, Censys, MalwareBazaar, and AlienVault OTX) and transaction-safe, sanitized AJAX-based task tagging group actions. + * Added comprehensive unit testing covering all view states, error handling, and security measures. + ### [05.06.2026] * Monitor updates: * NtCreateUserProcess hook: Dynamically patch ping commandline to thwart ping delays (e.g. Formbook/Xloader) diff --git a/conf/default/hunt.json b/conf/default/hunt.json new file mode 100644 index 00000000000..051d141b5ed --- /dev/null +++ b/conf/default/hunt.json @@ -0,0 +1,143 @@ +{ + "domains": { + "title": "Top Shared Domains", + "icon": "fas fa-globe text-info", + "badge_color": "info", + "form_key": "cat_domains", + "db_unwind": "$network.domains", + "db_group": "$network.domains.domain", + "validator": "is_valid_domain", + "pivots": [ + {"name": "VirusTotal", "icon": "fas fa-external-link-alt", "url": "https://www.virustotal.com/gui/domain/"}, + {"name": "AlienVault OTX", "icon": "fas fa-shield-alt", "url": "https://otx.alienvault.com/indicator/domain/"} + ] + }, + "ips": { + "title": "Top Shared IPs", + "icon": "fas fa-network-wired text-info", + "badge_color": "info", + "form_key": "cat_ips", + "db_unwind": "$network.hosts", + "db_group": "$network.hosts.ip", + "validator": "is_valid_ip", + "pivots": [ + {"name": "VirusTotal", "icon": "fas fa-external-link-alt", "url": "https://www.virustotal.com/gui/ip-address/"}, + {"name": "Shodan", "icon": "fas fa-search", "url": "https://www.shodan.io/host/"} + ] + }, + "mutexes": { + "title": "Top Shared Mutexes", + "icon": "fas fa-lock text-warning", + "badge_color": "warning", + "form_key": "cat_mutexes", + "db_unwind": "$behavior.summary.mutexes", + "db_group": "$behavior.summary.mutexes", + "validator": "is_valid_mutex", + "pivots": [] + }, + "dropped_files": { + "title": "Shared Dropped Files", + "icon": "fas fa-file-alt text-success", + "badge_color": "success", + "form_key": "cat_files", + "db_unwind": "$behavior.summary.files", + "db_group": "$behavior.summary.files", + "validator": "is_valid_file", + "pivots": [] + }, + "executed_commands": { + "title": "Shared Executed Commands", + "icon": "fas fa-terminal text-success", + "badge_color": "success", + "form_key": "cat_commands", + "db_unwind": "$behavior.summary.executed_commands", + "db_group": "$behavior.summary.executed_commands", + "validator": "is_valid_command", + "pivots": [] + }, + "registry_keys": { + "title": "Shared Registry Keys", + "icon": "fas fa-key text-danger", + "badge_color": "danger", + "form_key": "cat_registry", + "db_unwind": "$behavior.summary.keys", + "db_group": "$behavior.summary.keys", + "validator": "is_valid_registry", + "pivots": [] + }, + "dropped_hashes": { + "title": "Top Dropped File Hashes", + "icon": "fas fa-hashtag text-white", + "badge_color": "light", + "form_key": "cat_dropped_hashes", + "db_unwind": "$dropped", + "db_group": "$dropped.sha256", + "validator": "is_valid_hash", + "pivots": [ + {"name": "VirusTotal", "icon": "fas fa-external-link-alt", "url": "https://www.virustotal.com/gui/file/"}, + {"name": "MalwareBazaar", "icon": "fas fa-database", "url": "https://bazaar.abuse.ch/sample/"} + ] + }, + "procdump_hashes": { + "title": "Unpacked Memory Hashes", + "icon": "fas fa-memory text-white", + "badge_color": "light", + "form_key": "cat_procdump_hashes", + "db_unwind": "$procdump", + "db_group": "$procdump.sha256", + "validator": "is_valid_hash", + "pivots": [ + {"name": "VirusTotal", "icon": "fas fa-external-link-alt", "url": "https://www.virustotal.com/gui/file/"}, + {"name": "MalwareBazaar", "icon": "fas fa-database", "url": "https://bazaar.abuse.ch/sample/"} + ] + }, + "extracted_hashes": { + "title": "Extracted Payload Hashes", + "icon": "fas fa-file-medical text-white", + "badge_color": "light", + "form_key": "cat_extracted_hashes", + "db_unwind": "$extracted", + "db_group": "$extracted.sha256", + "validator": "is_valid_hash", + "pivots": [ + {"name": "VirusTotal", "icon": "fas fa-external-link-alt", "url": "https://www.virustotal.com/gui/file/"}, + {"name": "MalwareBazaar", "icon": "fas fa-database", "url": "https://bazaar.abuse.ch/sample/"} + ] + }, + "imphashes": { + "title": "PE Import Hashes (ImpHashes)", + "icon": "fas fa-file-invoice text-white", + "badge_color": "light", + "form_key": "cat_imphashes", + "db_unwind": null, + "db_group": "$static.pe.imphash", + "validator": "is_valid_md5", + "pivots": [ + {"name": "VirusTotal", "icon": "fas fa-external-link-alt", "url": "https://www.virustotal.com/gui/search/"}, + {"name": "MalwareBazaar", "icon": "fas fa-database", "url": "https://bazaar.abuse.ch/browse.php?search=imphash%3A"} + ], + "db_match": {"_id": {"$ne": null}} + }, + "http_uris": { + "title": "Shared HTTP Request URIs", + "icon": "fas fa-link text-info", + "badge_color": "info", + "form_key": "cat_http_uris", + "db_unwind": "$network.http", + "db_group": "$network.http.uri", + "validator": "is_valid_string", + "pivots": [ + {"name": "VirusTotal", "icon": "fas fa-external-link-alt", "url": "https://www.virustotal.com/gui/search/"} + ] + }, + "signatures": { + "title": "Shared Signatures", + "icon": "fas fa-signature text-warning", + "badge_color": "warning", + "form_key": "cat_signatures", + "db_unwind": "$signatures", + "db_group": "$signatures.name", + "validator": "is_valid_string", + "pivots": [] + } +} \ No newline at end of file diff --git a/conf/default/web.conf.default b/conf/default/web.conf.default index fe311627857..f829ca02eb7 100644 --- a/conf/default/web.conf.default +++ b/conf/default/web.conf.default @@ -274,3 +274,7 @@ enabled = no [display_authenticode] # Show Authenticode certificate chain card on the analysis overview tab enabled = no + +[hunt] +# Proactively discover new emerging campaigns by grouping undetected analyses by shared indicators +enabled = no diff --git a/data/yara/CAPE/Vidar.yar b/data/yara/CAPE/Vidar.yar index 4450dcf4e19..942b6791bd4 100644 --- a/data/yara/CAPE/Vidar.yar +++ b/data/yara/CAPE/Vidar.yar @@ -6,7 +6,7 @@ rule Vidar cape_type = "Vidar Payload" packed = "96ab9c389a6a53c54a3ea05d139aaf2d775e8db06f37d027f696828dcc55e2bb" strings: - $code = {4D 85 C0 75 0? [0-16] (41|45) 88 ?? [0-20] (41|45) 38 (08|10) 74 ?? [0-16] (48|49|4C|4D) (63|2B) [0-16] 4? 3B ?? 73} + $code = {4D 85 C0 75 0? [0-16] (41|45) 88 ?? [0-20] (41|45) 38 ?? 74 ?? [0-16] (48|49|4C|4D) (63|2B) [0-16] 4? 3B ?? 73} condition: uint16(0) == 0x5A4D and all of them } diff --git a/lib/cuckoo/common/hunting.py b/lib/cuckoo/common/hunting.py new file mode 100644 index 00000000000..f92666cb0c0 --- /dev/null +++ b/lib/cuckoo/common/hunting.py @@ -0,0 +1,221 @@ +import ipaddress +import logging +import os +import re +import json + +from data.safelist.domains import domain_passlist, domain_passlist_re +from data.safelist.replacepatterns import FILES_DENYLIST, FILES_ENDING_DENYLIST, MUTEX_DENYLIST + +log = logging.getLogger(__name__) + +# Resolve CUCKOO_ROOT +_current_dir = os.path.abspath(os.path.dirname(__file__)) +CUCKOO_ROOT = os.path.normpath(os.path.join(_current_dir, "..", "..", "..")) + +# Precompile regex list once at the module level for maximum performance +compiled_passlist_re = [] +for safe_re in domain_passlist_re: + try: + if isinstance(safe_re, str): + compiled_passlist_re.append(re.compile(safe_re, re.IGNORECASE)) + elif hasattr(safe_re, "match"): + compiled_passlist_re.append(safe_re) + except Exception: + pass + + +# Define module-level validation filters +def is_valid_domain(domain): + if not domain or not isinstance(domain, str): + return False + domain_lower = domain.lower() + for safe in domain_passlist: + if domain_lower == safe or domain_lower.endswith("." + safe): + return False + for regex in compiled_passlist_re: + try: + if regex.match(domain_lower): + return False + except Exception: + pass + return True + + +def is_valid_ip(ip): + if not ip or not isinstance(ip, str): + return False + try: + ip_obj = ipaddress.ip_address(ip) + if ip_obj.is_private or ip_obj.is_loopback or ip_obj.is_multicast or ip_obj.is_link_local: + return False + if ip in ("8.8.8.8", "8.8.4.4", "1.1.1.1", "9.9.9.9", "208.67.222.222", "208.67.220.220"): + return False + except ValueError: + return False + return True + + +def is_valid_file(file_path): + if not file_path or not isinstance(file_path, str): + return False + file_path_lower = file_path.lower() + for item in FILES_DENYLIST: + if item.lower() in file_path_lower: + return False + for item in FILES_ENDING_DENYLIST: + if file_path_lower.endswith(item.lower()): + return False + return True + + +def is_valid_hash(h): + if not h or not isinstance(h, str): + return False + if h in ("d41d8cd98f00b204e9800998ecf8427e", "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"): + return False + if len(h) != 64: + return False + return True + + +def is_valid_md5(h): + if not h or not isinstance(h, str): + return False + if h == "d41d8cd98f00b204e9800998ecf8427e": + return False + if len(h) != 32: + return False + return True + + +# Common system mutexes that generate noise +noisy_mutexes = [ + "Local\\ZoneBaseMutex", "CTF.Asm.Mutex", "Global\\Access_Registry_Mutex", + "Local\\__wf_mut__", "cuckoo_mutex", "Local\\_Global_", "Local\\MS-LanguageProfile" +] +def is_valid_mutex(mutex): + if not mutex or not isinstance(mutex, str): + return False + mutex_lower = mutex.lower() + for m in MUTEX_DENYLIST: + if m.lower() in mutex_lower: + return False + for m in noisy_mutexes: + if m.lower() in mutex_lower: + return False + return True + + +noisy_registry_substrings = [ + "Controlset001\\Control\\Lsa", + "Cryptography\\Providers", + "System\\CurrentControlSet\\Control\\Nls", + "SOFTWARE\\Microsoft\\Windows NT\\CurrentVersion\\Font", + "SOFTWARE\\Microsoft\\CTF\\", + "SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Explorer\\MountPoints2" +] +def is_valid_registry(key): + if not key or not isinstance(key, str): + return False + key_lower = key.lower() + for sub in noisy_registry_substrings: + if sub.lower() in key_lower: + return False + return True + + +noisy_command_substrings = [ + "chcp", "reg query", "sc query", "net start", "tasklist" +] +def is_valid_command(cmd): + if not cmd or not isinstance(cmd, str): + return False + cmd_lower = cmd.lower() + for sub in noisy_command_substrings: + if sub.lower() in cmd_lower: + return False + return True + + +VALIDATORS = { + "is_valid_domain": is_valid_domain, + "is_valid_ip": is_valid_ip, + "is_valid_mutex": is_valid_mutex, + "is_valid_file": is_valid_file, + "is_valid_command": is_valid_command, + "is_valid_registry": is_valid_registry, + "is_valid_hash": is_valid_hash, + "is_valid_md5": is_valid_md5, + "is_valid_string": lambda x: isinstance(x, str) and bool(x), +} + +# Module level caching for Hunt Configuration +_CACHED_HUNT_MAP = None +_CACHED_HUNT_MTIME = None +_CACHED_HUNT_PATH = None + + +def load_hunt_map(min_count: int = 3): + """ + Dynamically loads the hunting configuration from a hierarchical search of paths. + Lookup order (reverse mode, most specific to least specific): + 1. custom/conf/hunt.json + 2. conf/hunt.json + 3. conf/default/hunt.json (fallback defaults) + + Utilizes system mtime caching on the resolved path to achieve zero disk reads when unmodified. + Returns (HUNT_MAP, VALIDATORS) tuple, or (None, error_reason) on error. + """ + global _CACHED_HUNT_MAP, _CACHED_HUNT_MTIME, _CACHED_HUNT_PATH + + lookup_paths = [ + os.path.normpath(os.path.join(CUCKOO_ROOT, "custom", "conf", "hunt.json")), + os.path.normpath(os.path.join(CUCKOO_ROOT, "conf", "hunt.json")), + os.path.normpath(os.path.join(CUCKOO_ROOT, "conf", "default", "hunt.json")) + ] + + has_invalid_syntax = False + + for path in lookup_paths: + if os.path.exists(path): + try: + current_mtime = os.path.getmtime(path) + + # Cache Hit: If cached configuration matches this path and modification time, return instantly! + if _CACHED_HUNT_MAP is not None and _CACHED_HUNT_PATH == path and _CACHED_HUNT_MTIME == current_mtime: + return _CACHED_HUNT_MAP, VALIDATORS + + # Cache Miss: Parse the JSON file + with open(path, "r") as f: + raw_map = json.load(f) + if raw_map and isinstance(raw_map, dict): + temp_map = {} + for cat_id, cat_config in raw_map.items(): + val_func_name = cat_config.get("validator", "is_valid_string") + cat_config["validator"] = VALIDATORS.get(val_func_name, lambda x: isinstance(x, str) and bool(x)) + + # Dynamically replace min_count placeholders inside the custom db_match if present + if "db_match" in cat_config: + if "count" in cat_config["db_match"] and "$gte" in cat_config["db_match"]["count"]: + cat_config["db_match"]["count"]["$gte"] = min_count + + temp_map[cat_id] = cat_config + + # Save to cache + _CACHED_HUNT_MAP = temp_map + _CACHED_HUNT_MTIME = current_mtime + _CACHED_HUNT_PATH = path + + return _CACHED_HUNT_MAP, VALIDATORS + except Exception as e: + # Log detailed traceback of corrupted file, but proceed to fallback paths + log.exception("Failed to load hunting configuration from %s: %s", path, e) + has_invalid_syntax = True + + # If no configuration file could be loaded successfully + if has_invalid_syntax: + return None, "invalid" + else: + log.error("All hunting configuration lookup paths are missing: %s", lookup_paths) + return None, "missing" diff --git a/tests/web/test_hunt_views.py b/tests/web/test_hunt_views.py new file mode 100644 index 00000000000..f22108aaf0b --- /dev/null +++ b/tests/web/test_hunt_views.py @@ -0,0 +1,273 @@ +from unittest.mock import MagicMock, patch +import json +import pytest +from django.conf import settings +from django.test import SimpleTestCase +from analysis.views import enabledconf + + +@pytest.mark.usefixtures("db") +class TestHuntViews(SimpleTestCase): + def setUp(self): + self.original_mongodb_enabled = enabledconf["mongodb"] + self.original_hunt_enabled = getattr(settings, "HUNT_ENABLED", False) + self.original_web_auth = getattr(settings, "WEB_AUTHENTICATION", False) + settings.HUNT_ENABLED = True + settings.WEB_AUTHENTICATION = False + + def tearDown(self): + enabledconf["mongodb"] = self.original_mongodb_enabled + settings.HUNT_ENABLED = self.original_hunt_enabled + settings.WEB_AUTHENTICATION = self.original_web_auth + + def test_hunt_page_requires_enabled_setting(self): + """If HUNT_ENABLED is set to False in settings (via web.conf), the page should render an error.""" + settings.HUNT_ENABLED = False + response = self.client.get("/analysis/hunt/") + self.assertEqual(response.status_code, 200) + self.assertIn("The Hunt/Threat Discovery feature is disabled in web.conf", response.content.decode()) + + def test_hunt_page_requires_mongodb(self): + """If MongoDB is disabled, the hunt page should render an error.""" + enabledconf["mongodb"] = False + response = self.client.get("/analysis/hunt/") + self.assertEqual(response.status_code, 200) + self.assertIn("MongoDB is required", response.content.decode()) + + def test_hunt_page_prevents_global_all_time_hunt(self): + """If filename_prefix is blank and days_back is set to 0 (All Time), render a database performance safeguard error.""" + enabledconf["mongodb"] = True + response = self.client.get("/analysis/hunt/?filename_prefix=&days_back=0") + self.assertEqual(response.status_code, 200) + self.assertIn("An all-time global hunt with no filename prefix is not allowed", response.content.decode()) + + @patch("lib.cuckoo.common.hunting.os.path.exists") + def test_hunt_page_error_when_config_missing(self, mock_exists): + """If hunt.json is missing from disk, render a clean error page and block execution.""" + mock_exists.return_value = False + enabledconf["mongodb"] = True + response = self.client.get("/analysis/hunt/") + self.assertEqual(response.status_code, 200) + self.assertIn("The hunt.json configuration file is missing", response.content.decode()) + + @patch("lib.cuckoo.common.hunting.os.path.getmtime") + @patch("lib.cuckoo.common.hunting.os.path.exists") + @patch("lib.cuckoo.common.hunting.open") + def test_hunt_page_error_when_config_invalid(self, mock_open, mock_exists, mock_getmtime): + """If hunt.json contains invalid syntax, log detailed tracebacks internally and render a secure error page.""" + mock_exists.return_value = True + mock_getmtime.return_value = 12345678.0 + mock_open.side_effect = ValueError("Invalid JSON syntax") + enabledconf["mongodb"] = True + response = self.client.get("/analysis/hunt/") + self.assertEqual(response.status_code, 200) + self.assertIn("The hunt.json configuration file is invalid", response.content.decode()) + + @patch("analysis.views.mongo_aggregate", create=True) + def test_hunt_page_success_renders_template(self, mock_mongo_aggregate): + """The hunt page should render facets correctly after whitelisting system noise.""" + enabledconf["mongodb"] = True + + # Mock MongoDB returning aggregations with noise and signal + mock_mongo_aggregate.return_value = [{ + "domains": [ + {"_id": "malicious-c2.com", "count": 5, "task_ids": {101, 102}}, + {"_id": "crl.microsoft.com", "count": 20, "task_ids": {101, 102}} # Should be whitelisted out + ], + "ips": [ + {"_id": "185.190.140.1", "count": 4, "task_ids": {101, 102}}, + {"_id": "127.0.0.1", "count": 10, "task_ids": {101}} # Private IP, should be whitelisted out + ], + "mutexes": [ + {"_id": "EvilCampaignMutex", "count": 3, "task_ids": {101, 102}}, + {"_id": "Local\\ZoneBaseMutex", "count": 15, "task_ids": {101}} # Whitelisted out + ], + "dropped_files": [ + {"_id": "C:\\Windows\\Temp\\payload.exe", "count": 3, "task_ids": {101, 102}}, + {"_id": "Device\\KsecDD", "count": 10, "task_ids": {101}} # Whitelisted out + ], + "executed_commands": [ + {"_id": "powershell.exe -enc BADCODE", "count": 4, "task_ids": {101, 102}}, + {"_id": "chcp", "count": 12, "task_ids": {101}} # Whitelisted out + ], + "registry_keys": [ + {"_id": "HKCU\\Software\\EvilKey", "count": 3, "task_ids": {101, 102}}, + {"_id": "HKLM\\SOFTWARE\\Microsoft\\CTF\\", "count": 14, "task_ids": {101}} # Whitelisted out + ], + "dropped_hashes": [ + {"_id": "a" * 64, "count": 4, "task_ids": {101, 102}}, + {"_id": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", "count": 12, "task_ids": {101}} # Blank hash, should be filtered + ], + "procdump_hashes": [ + {"_id": "b" * 64, "count": 3, "task_ids": {101, 102}}, + {"_id": "invalid_hash_len", "count": 10, "task_ids": {101}} # Invalid hash length, filtered + ], + "extracted_hashes": [ + {"_id": "c" * 64, "count": 5, "task_ids": {101, 102}} + ], + "imphashes": [ + {"_id": "d" * 32, "count": 4, "task_ids": {101, 102}}, + {"_id": "invalid_imphash_len", "count": 15, "task_ids": {101}} # Invalid imphash length, filtered + ], + "http_uris": [ + {"_id": "/api/v1/beacon.php", "count": 6, "task_ids": {101, 102}} + ], + "signatures": [ + {"_id": "has_pogo_autorun", "count": 8, "task_ids": {101, 102}} + ] + }] + + response = self.client.get("/analysis/hunt/?filename_prefix=downloaded_by_&min_count=2&days_back=14") + self.assertEqual(response.status_code, 200) + + html_content = response.content.decode() + + # Check that title / elements are present + self.assertIn("Threat Discovery & Hunting", html_content) + + # Check signal values are rendered + self.assertIn("malicious-c2.com", html_content) + self.assertIn("185.190.140.1", html_content) + self.assertIn("EvilCampaignMutex", html_content) + self.assertIn("payload.exe", html_content) + self.assertIn("powershell.exe -enc BADCODE", html_content) + self.assertIn("HKCU\\Software\\EvilKey", html_content) + self.assertIn("a" * 64, html_content) + self.assertIn("b" * 64, html_content) + self.assertIn("c" * 64, html_content) + self.assertIn("d" * 32, html_content) + self.assertIn("/api/v1/beacon.php", html_content) + self.assertIn("has_pogo_autorun", html_content) + + # Ensure whitelisted / noise items are successfully filtered out and not rendered + self.assertNotIn("crl.microsoft.com", html_content) + self.assertNotIn("127.0.0.1", html_content) + self.assertNotIn("ZoneBaseMutex", html_content) + self.assertNotIn("Device\\KsecDD", html_content) + self.assertNotIn("HKLM\\SOFTWARE\\Microsoft\\CTF\\", html_content) + self.assertNotIn("e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", html_content) + self.assertNotIn("invalid_hash_len", html_content) + self.assertNotIn("invalid_imphash_len", html_content) + + # Verify query parameters are passed back to forms + self.assertIn('value="downloaded_by_"', html_content) + self.assertIn('value="2"', html_content) + # Select box selection option checked + self.assertIn('', html_content) + + @patch("analysis.views.mongo_aggregate", create=True) + def test_hunt_page_blank_prefix_works(self, mock_mongo_aggregate): + """When filename_prefix is left blank, the match query should skip the target.file.name check to allow global hunting.""" + enabledconf["mongodb"] = True + mock_mongo_aggregate.return_value = [{}] + + response = self.client.get("/analysis/hunt/?filename_prefix=&min_count=2&days_back=7") + self.assertEqual(response.status_code, 200) + + # Ensure mongo_aggregate was called + self.assertTrue(mock_mongo_aggregate.called) + called_pipeline = mock_mongo_aggregate.call_args[0][1] + + # Extract the $match stage from the pipeline + match_stage = called_pipeline[0]["$match"] + + # Assert that 'target.file.name' was omitted from the match query + self.assertNotIn("target.file.name", match_stage) + self.assertIn("malfamily", match_stage) + self.assertIn("detections", match_stage) + self.assertIn("info.started", match_stage) + self.assertEqual(match_stage["detections"], {"$exists": False}) + + @patch("analysis.views.mongo_aggregate", create=True) + def test_hunt_page_ignore_detections_toggle_works(self, mock_mongo_aggregate): + """When ignore_detections is toggled ON, the query should completely skip malfamily and detections filters.""" + enabledconf["mongodb"] = True + mock_mongo_aggregate.return_value = [{}] + + # Send ignore_detections=on (standard GET form checkbox format) + response = self.client.get("/analysis/hunt/?filename_prefix=downloaded_by_&min_count=2&days_back=7&ignore_detections=on") + self.assertEqual(response.status_code, 200) + + # Ensure mongo_aggregate was called + self.assertTrue(mock_mongo_aggregate.called) + called_pipeline = mock_mongo_aggregate.call_args[0][1] + + # Extract the $match stage from the pipeline + match_stage = called_pipeline[0]["$match"] + + # Assert that 'malfamily' and 'detections' were skipped + self.assertNotIn("malfamily", match_stage) + self.assertNotIn("detections", match_stage) + self.assertIn("target.file.name", match_stage) + + @patch("analysis.views.db.session.get") + @patch("analysis.views.db.session.commit") + def test_tag_tasks_endpoint_works(self, mock_commit, mock_get): + """The tag_tasks API should properly add and append custom tags to SQL Task entries.""" + # Use side_effect to return distinct Task mocks for each call, avoiding mock mutation reuse + def mock_get_task(model, tid): + task = MagicMock() + task.tags_tasks = "existing_tag" + return task + mock_get.side_effect = mock_get_task + + payload = {"task_ids": [101, 102], "tag": "New_Campaign"} + response = self.client.post( + "/analysis/hunt/tag/", + data=json.dumps(payload), + content_type="application/json" + ) + self.assertEqual(response.status_code, 200) + + # Verify returned JSON response + data = response.json() + self.assertEqual(data["status"], "success") + self.assertEqual(data["tag"], "New_Campaign") + self.assertEqual(data["updated_count"], 2) + + # Verify commit happened + self.assertTrue(mock_commit.called) + + @patch("analysis.views.mongo_aggregate", create=True) + def test_hunt_page_category_filtering_works(self, mock_mongo_aggregate): + """The hunt page should dynamically construct pipeline facets based on untoggled category checkboxes.""" + enabledconf["mongodb"] = True + mock_mongo_aggregate.return_value = [{}] + + # Submit form with only cat_domains and cat_ips enabled (others default to false when form submitted) + response = self.client.get( + "/analysis/hunt/?filename_prefix=downloaded_by_&min_count=2&days_back=7&cat_domains=on&cat_ips=on" + ) + self.assertEqual(response.status_code, 200) + + # Verify mongo_aggregate was called with only those 2 facets + self.assertTrue(mock_mongo_aggregate.called) + called_pipeline = mock_mongo_aggregate.call_args[0][1] + + # Extract $facet stage + facet_stage = called_pipeline[1]["$facet"] + self.assertIn("domains", facet_stage) + self.assertIn("ips", facet_stage) + + # Assert other facets were excluded to save database performance + self.assertNotIn("mutexes", facet_stage) + self.assertNotIn("dropped_files", facet_stage) + self.assertNotIn("executed_commands", facet_stage) + self.assertNotIn("registry_keys", facet_stage) + self.assertNotIn("dropped_hashes", facet_stage) + self.assertNotIn("procdump_hashes", facet_stage) + self.assertNotIn("extracted_hashes", facet_stage) + self.assertNotIn("imphashes", facet_stage) + self.assertNotIn("http_uris", facet_stage) + self.assertNotIn("signatures", facet_stage) + + # Verify template did not render untoggled panels + html_content = response.content.decode() + self.assertIn("Top Shared Domains", html_content) + self.assertIn("Top Shared IPs", html_content) + self.assertNotIn("Top Shared Mutexes", html_content) + self.assertNotIn("Shared Registry Keys", html_content) + self.assertNotIn("Unpacked Memory Hashes", html_content) + self.assertNotIn("PE Import Hashes", html_content) + self.assertNotIn("Shared HTTP Request URIs", html_content) + self.assertNotIn("Shared Signatures", html_content) diff --git a/web/analysis/urls.py b/web/analysis/urls.py index c0c9efc137f..6be5f182e48 100644 --- a/web/analysis/urls.py +++ b/web/analysis/urls.py @@ -30,6 +30,8 @@ re_path(r"^search/(?P\d+)/$", views.search_behavior, name="search_behavior"), re_path(r"^search/(?P[\w\d\s:\-_\.]+)/$", views.search, name="search"), re_path(r"^search/$", views.search, name="search"), + re_path(r"^hunt/$", views.hunt, name="hunt"), + re_path(r"^hunt/tag/$", views.tag_tasks, name="tag_tasks"), re_path(r"^pending/$", views.pending, name="pending"), re_path(r"^ban_user_tasks/(?P[\d]+)/$", views.ban_all_user_tasks, name="ban_all_user_tasks"), re_path(r"^ban_user/(?P[\d]+)/$", views.ban_user, name="ban_user"), diff --git a/web/analysis/views.py b/web/analysis/views.py index 28e599589da..d16e2e5870c 100644 --- a/web/analysis/views.py +++ b/web/analysis/views.py @@ -7,6 +7,7 @@ import datetime import json import os +import re import subprocess import sys import tempfile @@ -18,6 +19,7 @@ from urllib.parse import quote from wsgiref.util import FileWrapper + from django.conf import settings from django.contrib.auth.decorators import login_required from django.core.exceptions import BadRequest, PermissionDenied @@ -48,6 +50,7 @@ from lib.cuckoo.core.database import Database, TasksMixIn from lib.cuckoo.core.data.task import TASK_PENDING, Task from modules.reporting.report_doc import CHUNK_CALL_SIZE +from lib.cuckoo.common.hunting import load_hunt_map try: from django_ratelimit.decorators import ratelimit @@ -4129,3 +4132,158 @@ def failed_processing(request, task_id): "process_log": log_content, "settings": settings, }) + + +@require_safe +@conditional_login_required(login_required, settings.WEB_AUTHENTICATION) +def hunt(request): + if not settings.HUNT_ENABLED: + return render(request, "error.html", {"error": "The Hunt/Threat Discovery feature is disabled in web.conf."}) + + if not enabledconf["mongodb"]: + return render(request, "error.html", {"error": "MongoDB is required for the Hunt/Threat Discovery feature."}) + + filename_prefix = request.GET.get("filename_prefix", "downloaded_by_") + min_count = request.GET.get("min_count", "3") + days_back = request.GET.get("days_back", "14") + ignore_detections = request.GET.get("ignore_detections") == "on" + try: + min_count = int(min_count) + except ValueError: + min_count = 3 + try: + days_back = int(days_back) + except ValueError: + days_back = 14 + + # Hot-reload HUNT_MAP with modular, high-performance system mtime caching + HUNT_MAP, VALIDATORS = load_hunt_map(min_count) + if HUNT_MAP is None: + if VALIDATORS == "missing": + return render(request, "error.html", {"error": "The hunt.json configuration file is missing. Please contact your system administrator."}) + else: + return render(request, "error.html", {"error": "The hunt.json configuration file is invalid. Please check system logs."}) + + # Evaluate dynamic categories based on HUNT_MAP definitions + has_category_filter = any(key.startswith("cat_") for key in request.GET) + categories = {} + for cat_id, cat_config in HUNT_MAP.items(): + fkey = cat_config["form_key"] + categories[cat_id] = True if not has_category_filter else (request.GET.get(fkey) == "on") + + # Clean prefix to avoid double caret and force strict case-sensitive Prefix Match. + # MongoDB B-Tree indexes are ONLY fully utilized by regex if it is anchored at the start (^) + # and case-sensitive (no $options: "i"). + clean_prefix = re.escape(filename_prefix.lstrip("^").strip()) + + # Database Safeguard: Prevent global all-time hunts to avoid database timeouts + if not clean_prefix and days_back == 0: + return render(request, "error.html", {"error": "An all-time global hunt with no filename prefix is not allowed due to performance risks."}) + + # Build match query with optional date filters for performance + match_query = {} + + if not ignore_detections: + match_query["malfamily"] = {"$exists": False} + match_query["detections"] = {"$exists": False} + + if clean_prefix: + match_query["target.file.name"] = {"$regex": f"^{clean_prefix}"} + + if days_back > 0: + import datetime + delta = datetime.timedelta(days=days_back) + start_date = (datetime.datetime.utcnow() - delta).strftime("%Y-%m-%d %H:%M:%S") + match_query["info.started"] = {"$gte": start_date} + + # Dynamic multi-category aggregation + facet_stages = {} + for cat_id, cat_config in HUNT_MAP.items(): + if categories[cat_id]: + stages = [] + if cat_config["db_unwind"]: + stages.append({"$unwind": cat_config["db_unwind"]}) + stages.extend([ + {"$group": {"_id": cat_config["db_group"], "count": {"$sum": 1}, "task_ids": {"$addToSet": "$info.id"}}}, + {"$match": cat_config.get("db_match", {"count": {"$gte": min_count}})}, + {"$sort": {"count": -1}}, + {"$limit": 100} + ]) + facet_stages[cat_id] = stages + + # MongoDB Pipeline using $facet for multi-category aggregation + pipeline = [ + {"$match": match_query} + ] + if facet_stages: + pipeline.append({"$facet": facet_stages}) + + try: + if facet_stages: + res = list(mongo_aggregate("analysis", pipeline)) + facets = res[0] if res else {} + else: + facets = {} + except Exception as e: + return render(request, "error.html", {"error": f"Threat hunting aggregation failed: {e}"}) + + # Apply noise whitelists and validators dynamically + clean_facets = {} + for cat_id, cat_config in HUNT_MAP.items(): + if categories[cat_id]: + raw_items = facets.get(cat_id, []) + validator_func = cat_config["validator"] + clean_facets[cat_id] = [ + (item["_id"], item["count"], sorted(list(item["task_ids"]))) + for item in raw_items if validator_func(item["_id"]) + ][:15] + + return render(request, "analysis/hunt.html", { + "facets": clean_facets, + "filename_prefix": filename_prefix, + "min_count": min_count, + "days_back": days_back, + "ignore_detections": ignore_detections, + "categories": categories, + "hunt_map": HUNT_MAP, + "settings": settings, + }) + + +@require_POST +@conditional_login_required(login_required, settings.WEB_AUTHENTICATION) +def tag_tasks(request): + try: + data = json.loads(request.body) + except ValueError: + return JsonResponse({"status": "error", "message": "Invalid JSON"}, status=400) + + task_ids = data.get("task_ids", []) + tag = data.get("tag", "").strip() + + if not task_ids or not tag: + return JsonResponse({"status": "error", "message": "Missing task_ids or tag"}, status=400) + + # Sanitize tag string (alphanumeric, underscores, hyphens) + tag = "".join(c for c in tag if c.isalnum() or c in ("_", "-")).strip() + if not tag: + return JsonResponse({"status": "error", "message": "Invalid tag string"}, status=400) + + from lib.cuckoo.core.data.task import Task + updated_count = 0 + try: + for tid in task_ids: + task = db.session.get(Task, int(tid)) + if task: + existing_tags = task.tags_tasks or "" + current_tags = [t.strip() for t in existing_tags.split(",") if t.strip()] + if tag not in current_tags: + current_tags.append(tag) + task.tags_tasks = ",".join(current_tags) + updated_count += 1 + db.session.commit() + except Exception as e: + db.session.rollback() + return JsonResponse({"status": "error", "message": f"Database update failed: {e}"}, status=500) + + return JsonResponse({"status": "success", "updated_count": updated_count, "tag": tag}) diff --git a/web/templates/analysis/hunt.html b/web/templates/analysis/hunt.html new file mode 100644 index 00000000000..5cecdfbd429 --- /dev/null +++ b/web/templates/analysis/hunt.html @@ -0,0 +1,158 @@ +{% extends "base.html" %} +{% block content %} +
+
+
+
+

Threat Discovery & Hunting

+
+
+

+ Proactively discover new emerging campaigns by grouping undetected analyses by shared indicators. + Search specifically by filename prefix to cluster similar samples and uncover shared infrastructure or behavioral signatures. +

+
+
+ +
+ + +
+
+
+ +
+ + +
+
+
+ +
+ + +
+
+
+
+ + +
+
+
+ +
+ + +
+ Target Categories: + {% for cat_id, cat_config in hunt_map.items %} +
+ + +
+ {% endfor %} +
+
+
+
+
+
+ + +
+ {% for cat_id, cat_config in hunt_map.items %} + {% if categories|get_item:cat_id %} +
+
+
+
{{ cat_config.title }}
+ {{ facets|get_item:cat_id|length }} clusters +
+
+ + + + + + + + + + {% if facets|get_item:cat_id %} + {% for item in facets|get_item:cat_id %} + + + + + + {% endfor %} + {% else %} + + {% endif %} + +
Indicator ValueCountTasks
+ {{ item.0 }} + + {% for pivot in cat_config.pivots %} + + {% endfor %} + {{ item.1 }} + {% for tid in item.2 %} + {{ tid }} + {% endfor %} + +
No correlated indicators found.
+
+
+
+ {% endif %} + {% endfor %} +
+ + +{% endblock %} diff --git a/web/templates/header.html b/web/templates/header.html index c14426f61f3..11e7cd48e72 100644 --- a/web/templates/header.html +++ b/web/templates/header.html @@ -14,6 +14,9 @@ + {% if settings.HUNT_ENABLED %} + + {% endif %} diff --git a/web/web/settings.py b/web/web/settings.py index 4700988b975..2f314897a0d 100644 --- a/web/web/settings.py +++ b/web/web/settings.py @@ -36,6 +36,7 @@ REPROCESS_TASKS = web_cfg.general.reprocess_tasks REPROCESS_FAILED_PROCESSING = web_cfg.general.reprocess_failed_processing +HUNT_ENABLED = getattr(web_cfg, "hunt", {}).get("enabled", False) # CSRF TRUSTED ORIGINS # For requests that include the Origin header, Django's CSRF protection # requires that header match the origin present in the Host header. @@ -332,7 +333,8 @@ "NETWORK_PROC_MAP", "REPROCESS_TASKS", "REPROCESS_FAILED_PROCESSING", - "AUDIT_FRAMEWORK" + "AUDIT_FRAMEWORK", + "HUNT_ENABLED" ] EMAIL_BACKEND = "django.core.mail.backends.smtp.EmailBackend"