threatcode · pull · Apr 20, 2026 · Apr 20, 2026 · Apr 20, 2026 · Apr 20, 2026
diff --git a/admin/admin.py b/admin/admin.py
@@ -30,7 +30,6 @@
 
 from lib.cuckoo.common.admin_utils import (
     CAPE_PATH,
-    POSTPROCESS,
     AutoAddPolicy,
     bulk_deploy,
     compare_hashed_files,
@@ -61,6 +60,7 @@
 
 JUMPBOX_USED = False
 jumpbox = False
+RETRY = 3
 
 logging.getLogger("paramiko").setLevel(logging.WARNING)
 logging.getLogger("paramiko.transport").setLevel(logging.WARNING)
@@ -226,6 +226,13 @@
         required=False,
         default=False,
     )
+    compare_opt.add_argument(
+        "--remove-ssh-keys",
+        help="Remove servers ssh key from known keys on localhost",
+        action="store_true",
+        required=False,
+        default=False,
+    )
 
     args = parser.parse_args()
 
@@ -235,12 +242,15 @@
         logging.getLogger("paramiko.transport").setLevel(logging.DEBUG)
 
     if args.username:
+        from lib.cuckoo.common import admin_utils
+        admin_utils.JUMP_BOX_USERNAME = args.username
         JUMP_BOX_USERNAME = args.username
 
-    # if args.debug:
-    #    log.setLevel(logging.DEBUG)
+    if args.debug:
+        logging.getLogger().setLevel(logging.DEBUG)
 
     if args.jump_box_second and not args.dry_run:
+
         ssh.connect(
             JUMP_BOX_SECOND,
             username=JUMP_BOX_SECOND_USERNAME,
@@ -286,7 +296,12 @@
                 print(parameters)
                 sys.exit(0)
             queue.put([servers, file] + list(parameters))
-            _ = deploy_file(queue, jumpbox)
+            for i in range(RETRY):
+                try:
+                    _ = deploy_file(queue, jumpbox)
+                    break
+                except Exception as eee:
+                    print(f"Error {eee}, retry {i + 1}/{RETRY}")
 
     elif args.delete_file:
         queue = Queue()
@@ -342,7 +357,7 @@
         sys.exit()
 
     elif args.enum_all_servers:
-        enumerate_files_on_all_servers()
+        enumerate_files_on_all_servers(servers, jumpbox, "/opt/CAPEv2", args.filename)
     elif args.generate_files_listing and not args.enum_all_servers:
         gen_hashfile(args.generate_files_listing, args.filename)
     elif args.check_files_difference:
@@ -355,8 +370,12 @@
 
         bulk_deploy(files, args.yara_category, args.dry_run, servers, jumpbox)
 
-    if args.restart_service and POSTPROCESS:
-        execute_command_on_all(POSTPROCESS, servers, jumpbox)
+    if args.restart_service:
+        execute_command_on_all("systemctl restart cape-processor; systemctl status cape-processor", servers, jumpbox)
 
     if args.restart_uwsgi:
         execute_command_on_all("touch /tmp/capeuwsgireload", servers, jumpbox)
+
+    if args.remove_ssh_keys:
+        for node in SERVERS_STATIC_LIST:
+            subprocess.run(["ssh-keygen", "-R", node])
diff --git a/analyzer/linux/analyzer.py b/analyzer/linux/analyzer.py
@@ -8,6 +8,7 @@
 import os
 import pkgutil
 import re
+import subprocess
 import sys
 import tempfile
 import time
@@ -100,33 +101,29 @@ def dump_memory(pid):
     if pid in DUMPED_LIST:
         return  # Skip if already dumped
     try:
-        maps_file = open(f"/proc/{pid}/maps", "r")
-        mem_file = open(f"/proc/{pid}/mem", "rb", 0)
-        output_file = open(f"{MEM_PATH}/{pid}.dmp", "wb")
-
-        for line in maps_file.readlines():
-            # Reference: https://man7.org/linux/man-pages/man5/proc_pid_maps.5.html
-            m = re.match(r"^([0-9a-f]+)-([0-9a-f]+) ([-rwxsp]{4}) ([0-9a-f]+) (\d\d:\d\d) (\d+) *(.*)$", line)
-            if not m:
-                log.error("Could not parse memory map line for pid %s: %s", pid, line)
-                continue
-            perms = m.group(3)
-            pathname = m.group(7)
-            if "r" in perms:
-                # Testing: Uncomment to skip memory regions associated with dynamic libraries
-                # if pathname and (pathname.endswith('.so') or 'lib' in pathname or '[' in pathname):
-                # continue
-                start = int(m.group(1), 16)
-                end = int(m.group(2), 16)
-                try:
-                    mem_file.seek(start)
-                    chunk = mem_file.read(end - start)
-                    output_file.write(chunk)
-                except (OSError, ValueError) as e:
-                    log.error("Could not read memory range %x-%x (%s) (%s): %s", start, end, perms, pathname, e)
-        maps_file.close()
-        mem_file.close()
-        output_file.close()
+        with open(f"/proc/{pid}/maps", "r") as maps_file, open(f"/proc/{pid}/mem", "rb", 0) as mem_file, open(
+            f"{MEM_PATH}/{pid}.dmp", "wb"
+        ) as output_file:
+            for line in maps_file:
+                # Reference: https://man7.org/linux/man-pages/man5/proc_pid_maps.5.html
+                m = re.match(r"^([0-9a-f]+)-([0-9a-f]+) ([-rwxsp]{4}) ([0-9a-f]+) (\d\d:\d\d) (\d+) *(.*)$", line)
+                if not m:
+                    log.error("Could not parse memory map line for pid %s: %s", pid, line)
+                    continue
+                perms = m.group(3)
+                pathname = m.group(7)
+                if "r" in perms:
+                    # Testing: Uncomment to skip memory regions associated with dynamic libraries
+                    # if pathname and (pathname.endswith('.so') or 'lib' in pathname or '[' in pathname):
+                    # continue
+                    start = int(m.group(1), 16)
+                    end = int(m.group(2), 16)
+                    try:
+                        mem_file.seek(start)
+                        chunk = mem_file.read(end - start)
+                        output_file.write(chunk)
+                    except (OSError, ValueError) as e:
+                        log.error("Could not read memory range %x-%x (%s) (%s): %s", start, end, perms, pathname, e)
     except FileNotFoundError:
         log.error("Process with PID %s not found.", str(pid))
     except PermissionError:
@@ -166,7 +163,7 @@ def prepare(self):
             # Set virtual machine clock.
             clock = datetime.datetime.strptime(self.config.clock, "%Y%m%dT%H:%M:%S")
             # Setting date and time.
-            os.system(f'date -s "{clock.strftime("%y-%m-%d %H:%M:%S")}"')
+            subprocess.run(["date", "-s", clock.strftime("%y-%m-%d %H:%M:%S")], check=True)
 
         # We update the target according to its category. If it's a file, then
         # we store the path.

diff --git a/analyzer/linux/lib/api/process.py b/analyzer/linux/lib/api/process.py
@@ -39,7 +39,11 @@ def get_proc_status(self):
         try:
             with open(f"/proc/{self.pid}/status") as f:
                 status = f.readlines()
-            status_values = dict([tuple(map(str.strip, j.split(':',1))) for j in status])
+            status_values = {}
+            for line in status:
+                if ":" in line:
+                    key, value = line.split(":", 1)
+                    status_values[key.strip()] = value.strip()
             return status_values
         except Exception:
             log.critical("Could not get process status for pid %s", self.pid)

diff --git a/analyzer/linux/lib/core/packages.py b/analyzer/linux/lib/core/packages.py
@@ -4,6 +4,7 @@
 # of the MIT license. See the LICENSE file for details.
 
 import inspect
+import importlib
 import logging
 import shutil
 import subprocess
@@ -36,7 +37,7 @@ def choose_package_class(file_type=None, file_name="", suggestion=None):
         sys.path.append(path.abspath(path.join(path.dirname(__file__), "..", "..")))
         # Since we don't know the package class yet, we'll just import everything
         # from this module and then try to figure out the required member class
-        module = __import__(full_name, globals(), locals(), ["*"])
+        module = importlib.import_module(full_name)
     except ImportError:
         raise Exception(f'Unable to import package "{name}": it does not exist')
     try:

diff --git a/conf/default/web.conf.default b/conf/default/web.conf.default
@@ -70,6 +70,7 @@ reprocess_failed_processing = no
 url_splitter = ,
 # Limit number of files extracted from archive in demux.py
 demux_files_limit = 10
+public_searches = yes
 
 # ratelimit for anon users
 [ratelimit]
@@ -123,6 +124,8 @@ package = edge
 # TLP markings on submission and webgui
 [tlp]
 enabled = no
+# Should TLP: RED tasks be searchable by other users?
+public_red = yes
 
 #AMSI dump submission checkbox: can be useful to disable if no Win10+ instances
 #(amsidump is enabled by default in the monitor for Win10+)

diff --git a/dev_utils/mongo_hooks.py b/dev_utils/mongo_hooks.py
@@ -182,7 +182,7 @@ def remove_task_references_from_files(task_ids):
     """
     mongo_update_many(
         FILES_COLL,
-        {TASK_IDS_KEY: {"$elemMatch": {"$in": task_ids}}},
+        {TASK_IDS_KEY: {"$in": task_ids}},
         {"$pullAll": {TASK_IDS_KEY: task_ids}},
     )
 
@@ -210,7 +210,8 @@ def delete_unused_file_docs():
     referenced by any analysis tasks. This should typically be invoked
     via utils/cleaners.py in a cron job.
     """
-    return mongo_delete_many(FILES_COLL, {TASK_IDS_KEY: {"$size": 0}})
+    # Using exact empty array match is much faster than $size: 0
+    return mongo_delete_many(FILES_COLL, {TASK_IDS_KEY: []})
 
 
 NORMALIZED_FILE_FIELDS = ("target.file", "dropped", "CAPE.payloads", "procdump", "procmemory")

diff --git a/dev_utils/mongodb.py b/dev_utils/mongodb.py
@@ -1,11 +1,8 @@
 import collections
 import functools
-import itertools
 import logging
 import time
-from typing import Any, Callable, Sequence
-
-from bson import ObjectId
+from typing import Callable, Sequence
 
 from lib.cuckoo.common.config import Config
 
@@ -161,11 +158,11 @@ def mongo_update_many(collection: str, query, update):
 
 
 @graceful_auto_reconnect
-def mongo_update_one(collection: str, query, projection, bypass_document_validation: bool = False):
-    if query.get("$set", None):
-        for hook in hooks[mongo_find_one][collection]:
-            query["$set"] = hook(query["$set"])
-    return getattr(results_db, collection).update_one(query, projection, bypass_document_validation=bypass_document_validation)
+def mongo_update_one(collection: str, query, update, bypass_document_validation: bool = False):
+    if isinstance(update, dict) and update.get("$set"):
+        for hook in hooks[mongo_update_one][collection]:
+            update["$set"] = hook(update["$set"])
+    return getattr(results_db, collection).update_one(query, update, bypass_document_validation=bypass_document_validation)
 
 
 @graceful_auto_reconnect
@@ -224,43 +221,14 @@ def mongo_delete_data_range(*, range_start: int = 0, range_end: int = 0) -> None
 
 
 def mongo_delete_calls(task_ids: Sequence[int] | None) -> None:
-    """Delete calls by primary key.
-
-    This obtains the call IDs from the analysis collection, which are then used
-    to delete calls in batches."""
-    log.info("attempting to delete calls for %d tasks", len(task_ids))
-
-    query = {"info.id": {"$in": task_ids}}
-    projection = {"behavior.processes.calls": 1}
-    tasks: list[dict[str, Any]] = mongo_find("analysis", query, projection)
-
-    if not tasks:
-        return
-
-    delete_target_ids: list[ObjectId] = []
-
-    def get_call_ids_from_task(task: dict[str, Any]) -> list[ObjectId]:
-        """Get the call IDs from an analysis document."""
-        processes = task.get("behavior", {}).get("processes", [])
-        calls = [proc.get("calls", []) for proc in processes]
-        return list(itertools.chain.from_iterable(calls))
-
-    for task in tasks:
-        delete_target_ids.extend(get_call_ids_from_task(task))
-
-    delete_target_ids = list(set(delete_target_ids))
-    chunk_size = 1000
-    for idx in range(0, len(delete_target_ids), chunk_size):
-        mongo_delete_many("calls", {"_id": {"$in": delete_target_ids[idx : idx + chunk_size]}})
-
-
-def mongo_delete_calls_by_task_id(task_ids: Sequence[int]) -> None:
     """Delete calls by querying the calls collection by the task_id field.
 
     Note, the task_id field was added to the calls collection in 9999881.
-    Objects added to the collection prior to this will not be deleted. Use
-    mongo_delete_calls for backwards compatibility.
+    Objects added to the collection prior to this will be deleted.
     """
+    if not task_ids:
+        return
+    log.info("attempting to delete calls for %d tasks", len(task_ids))
     mongo_delete_many("calls", {"task_id": {"$in": task_ids}})