Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 26 additions & 7 deletions admin/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@

from lib.cuckoo.common.admin_utils import (
CAPE_PATH,
POSTPROCESS,
AutoAddPolicy,
bulk_deploy,
compare_hashed_files,
Expand Down Expand Up @@ -61,6 +60,7 @@

JUMPBOX_USED = False
jumpbox = False
RETRY = 3

logging.getLogger("paramiko").setLevel(logging.WARNING)
logging.getLogger("paramiko.transport").setLevel(logging.WARNING)
Expand Down Expand Up @@ -226,6 +226,13 @@
required=False,
default=False,
)
compare_opt.add_argument(
"--remove-ssh-keys",
help="Remove servers ssh key from known keys on localhost",
action="store_true",
required=False,
default=False,
)

args = parser.parse_args()

Expand All @@ -235,12 +242,15 @@
logging.getLogger("paramiko.transport").setLevel(logging.DEBUG)

if args.username:
from lib.cuckoo.common import admin_utils
admin_utils.JUMP_BOX_USERNAME = args.username
JUMP_BOX_USERNAME = args.username

# if args.debug:
# log.setLevel(logging.DEBUG)
if args.debug:
logging.getLogger().setLevel(logging.DEBUG)

if args.jump_box_second and not args.dry_run:

ssh.connect(
JUMP_BOX_SECOND,
username=JUMP_BOX_SECOND_USERNAME,
Expand Down Expand Up @@ -286,7 +296,12 @@
print(parameters)
sys.exit(0)
queue.put([servers, file] + list(parameters))
_ = deploy_file(queue, jumpbox)
for i in range(RETRY):
try:
_ = deploy_file(queue, jumpbox)
break
except Exception as eee:
print(f"Error {eee}, retry {i + 1}/{RETRY}")

elif args.delete_file:
queue = Queue()
Expand Down Expand Up @@ -342,7 +357,7 @@
sys.exit()

elif args.enum_all_servers:
enumerate_files_on_all_servers()
enumerate_files_on_all_servers(servers, jumpbox, "/opt/CAPEv2", args.filename)
elif args.generate_files_listing and not args.enum_all_servers:
gen_hashfile(args.generate_files_listing, args.filename)
elif args.check_files_difference:
Expand All @@ -355,8 +370,12 @@

bulk_deploy(files, args.yara_category, args.dry_run, servers, jumpbox)

if args.restart_service and POSTPROCESS:
execute_command_on_all(POSTPROCESS, servers, jumpbox)
if args.restart_service:
execute_command_on_all("systemctl restart cape-processor; systemctl status cape-processor", servers, jumpbox)

if args.restart_uwsgi:
execute_command_on_all("touch /tmp/capeuwsgireload", servers, jumpbox)

if args.remove_ssh_keys:
for node in SERVERS_STATIC_LIST:
subprocess.run(["ssh-keygen", "-R", node])
53 changes: 25 additions & 28 deletions analyzer/linux/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import os
import pkgutil
import re
import subprocess
import sys
import tempfile
import time
Expand Down Expand Up @@ -100,33 +101,29 @@ def dump_memory(pid):
if pid in DUMPED_LIST:
return # Skip if already dumped
try:
maps_file = open(f"/proc/{pid}/maps", "r")
mem_file = open(f"/proc/{pid}/mem", "rb", 0)
output_file = open(f"{MEM_PATH}/{pid}.dmp", "wb")

for line in maps_file.readlines():
# Reference: https://man7.org/linux/man-pages/man5/proc_pid_maps.5.html
m = re.match(r"^([0-9a-f]+)-([0-9a-f]+) ([-rwxsp]{4}) ([0-9a-f]+) (\d\d:\d\d) (\d+) *(.*)$", line)
if not m:
log.error("Could not parse memory map line for pid %s: %s", pid, line)
continue
perms = m.group(3)
pathname = m.group(7)
if "r" in perms:
# Testing: Uncomment to skip memory regions associated with dynamic libraries
# if pathname and (pathname.endswith('.so') or 'lib' in pathname or '[' in pathname):
# continue
start = int(m.group(1), 16)
end = int(m.group(2), 16)
try:
mem_file.seek(start)
chunk = mem_file.read(end - start)
output_file.write(chunk)
except (OSError, ValueError) as e:
log.error("Could not read memory range %x-%x (%s) (%s): %s", start, end, perms, pathname, e)
maps_file.close()
mem_file.close()
output_file.close()
with open(f"/proc/{pid}/maps", "r") as maps_file, open(f"/proc/{pid}/mem", "rb", 0) as mem_file, open(
f"{MEM_PATH}/{pid}.dmp", "wb"
) as output_file:
for line in maps_file:
# Reference: https://man7.org/linux/man-pages/man5/proc_pid_maps.5.html
m = re.match(r"^([0-9a-f]+)-([0-9a-f]+) ([-rwxsp]{4}) ([0-9a-f]+) (\d\d:\d\d) (\d+) *(.*)$", line)
if not m:
log.error("Could not parse memory map line for pid %s: %s", pid, line)
continue
perms = m.group(3)
pathname = m.group(7)
if "r" in perms:
# Testing: Uncomment to skip memory regions associated with dynamic libraries
# if pathname and (pathname.endswith('.so') or 'lib' in pathname or '[' in pathname):
# continue
start = int(m.group(1), 16)
end = int(m.group(2), 16)
try:
mem_file.seek(start)
chunk = mem_file.read(end - start)
output_file.write(chunk)
except (OSError, ValueError) as e:
log.error("Could not read memory range %x-%x (%s) (%s): %s", start, end, perms, pathname, e)
except FileNotFoundError:
log.error("Process with PID %s not found.", str(pid))
except PermissionError:
Expand Down Expand Up @@ -166,7 +163,7 @@ def prepare(self):
# Set virtual machine clock.
clock = datetime.datetime.strptime(self.config.clock, "%Y%m%dT%H:%M:%S")
# Setting date and time.
os.system(f'date -s "{clock.strftime("%y-%m-%d %H:%M:%S")}"')
subprocess.run(["date", "-s", clock.strftime("%y-%m-%d %H:%M:%S")], check=True)

# We update the target according to its category. If it's a file, then
# we store the path.
Expand Down
6 changes: 5 additions & 1 deletion analyzer/linux/lib/api/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,11 @@ def get_proc_status(self):
try:
with open(f"/proc/{self.pid}/status") as f:
status = f.readlines()
status_values = dict([tuple(map(str.strip, j.split(':',1))) for j in status])
status_values = {}
for line in status:
if ":" in line:
key, value = line.split(":", 1)
status_values[key.strip()] = value.strip()
return status_values
except Exception:
log.critical("Could not get process status for pid %s", self.pid)
Expand Down
3 changes: 2 additions & 1 deletion analyzer/linux/lib/core/packages.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# of the MIT license. See the LICENSE file for details.

import inspect
import importlib
import logging
import shutil
import subprocess
Expand Down Expand Up @@ -36,7 +37,7 @@ def choose_package_class(file_type=None, file_name="", suggestion=None):
sys.path.append(path.abspath(path.join(path.dirname(__file__), "..", "..")))
# Since we don't know the package class yet, we'll just import everything
# from this module and then try to figure out the required member class
module = __import__(full_name, globals(), locals(), ["*"])
module = importlib.import_module(full_name)
except ImportError:
raise Exception(f'Unable to import package "{name}": it does not exist')
try:
Expand Down
3 changes: 3 additions & 0 deletions conf/default/web.conf.default
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ reprocess_failed_processing = no
url_splitter = ,
# Limit number of files extracted from archive in demux.py
demux_files_limit = 10
public_searches = yes

# ratelimit for anon users
[ratelimit]
Expand Down Expand Up @@ -123,6 +124,8 @@ package = edge
# TLP markings on submission and webgui
[tlp]
enabled = no
# Should TLP: RED tasks be searchable by other users?
public_red = yes

#AMSI dump submission checkbox: can be useful to disable if no Win10+ instances
#(amsidump is enabled by default in the monitor for Win10+)
Expand Down
5 changes: 3 additions & 2 deletions dev_utils/mongo_hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ def remove_task_references_from_files(task_ids):
"""
mongo_update_many(
FILES_COLL,
{TASK_IDS_KEY: {"$elemMatch": {"$in": task_ids}}},
{TASK_IDS_KEY: {"$in": task_ids}},
{"$pullAll": {TASK_IDS_KEY: task_ids}},
)

Expand Down Expand Up @@ -210,7 +210,8 @@ def delete_unused_file_docs():
referenced by any analysis tasks. This should typically be invoked
via utils/cleaners.py in a cron job.
"""
return mongo_delete_many(FILES_COLL, {TASK_IDS_KEY: {"$size": 0}})
# Using exact empty array match is much faster than $size: 0
return mongo_delete_many(FILES_COLL, {TASK_IDS_KEY: []})


NORMALIZED_FILE_FIELDS = ("target.file", "dropped", "CAPE.payloads", "procdump", "procmemory")
Expand Down
52 changes: 10 additions & 42 deletions dev_utils/mongodb.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
import collections
import functools
import itertools
import logging
import time
from typing import Any, Callable, Sequence

from bson import ObjectId
from typing import Callable, Sequence

from lib.cuckoo.common.config import Config

Expand Down Expand Up @@ -161,11 +158,11 @@ def mongo_update_many(collection: str, query, update):


@graceful_auto_reconnect
def mongo_update_one(collection: str, query, projection, bypass_document_validation: bool = False):
if query.get("$set", None):
for hook in hooks[mongo_find_one][collection]:
query["$set"] = hook(query["$set"])
return getattr(results_db, collection).update_one(query, projection, bypass_document_validation=bypass_document_validation)
def mongo_update_one(collection: str, query, update, bypass_document_validation: bool = False):
if isinstance(update, dict) and update.get("$set"):
for hook in hooks[mongo_update_one][collection]:
update["$set"] = hook(update["$set"])
return getattr(results_db, collection).update_one(query, update, bypass_document_validation=bypass_document_validation)


@graceful_auto_reconnect
Expand Down Expand Up @@ -224,43 +221,14 @@ def mongo_delete_data_range(*, range_start: int = 0, range_end: int = 0) -> None


def mongo_delete_calls(task_ids: Sequence[int] | None) -> None:
"""Delete calls by primary key.

This obtains the call IDs from the analysis collection, which are then used
to delete calls in batches."""
log.info("attempting to delete calls for %d tasks", len(task_ids))

query = {"info.id": {"$in": task_ids}}
projection = {"behavior.processes.calls": 1}
tasks: list[dict[str, Any]] = mongo_find("analysis", query, projection)

if not tasks:
return

delete_target_ids: list[ObjectId] = []

def get_call_ids_from_task(task: dict[str, Any]) -> list[ObjectId]:
"""Get the call IDs from an analysis document."""
processes = task.get("behavior", {}).get("processes", [])
calls = [proc.get("calls", []) for proc in processes]
return list(itertools.chain.from_iterable(calls))

for task in tasks:
delete_target_ids.extend(get_call_ids_from_task(task))

delete_target_ids = list(set(delete_target_ids))
chunk_size = 1000
for idx in range(0, len(delete_target_ids), chunk_size):
mongo_delete_many("calls", {"_id": {"$in": delete_target_ids[idx : idx + chunk_size]}})


def mongo_delete_calls_by_task_id(task_ids: Sequence[int]) -> None:
"""Delete calls by querying the calls collection by the task_id field.

Note, the task_id field was added to the calls collection in 9999881.
Objects added to the collection prior to this will not be deleted. Use
mongo_delete_calls for backwards compatibility.
Objects added to the collection prior to this will be deleted.
"""
if not task_ids:
return
log.info("attempting to delete calls for %d tasks", len(task_ids))
mongo_delete_many("calls", {"task_id": {"$in": task_ids}})


Expand Down
Loading
Loading