diff --git a/.prettierignore b/.prettierignore new file mode 100644 index 00000000000..2d19fc766d9 --- /dev/null +++ b/.prettierignore @@ -0,0 +1 @@ +*.html diff --git a/conf/copy_configs.sh b/conf/copy_configs.sh index 62fdf606b33..61276aad7f7 100644 --- a/conf/copy_configs.sh +++ b/conf/copy_configs.sh @@ -1,5 +1,6 @@ #!/bin/bash -for filename in conf/default/*.conf.default; do - cp -vf "./$filename" "./$(echo "$filename" | sed -e 's/.default//g' | sed -e 's/default//g')"; +for filename in conf/default/*.conf.default conf/default/*.env; do + dest="conf/${filename#conf/default/}" + cp -vf "./$filename" "./${dest%.default}" done diff --git a/conf/default/cape-processor.env b/conf/default/cape-processor.env new file mode 100644 index 00000000000..0c8d090426a --- /dev/null +++ b/conf/default/cape-processor.env @@ -0,0 +1,26 @@ +# CAPE Processor Configuration overrides +# Un-comment the variables you want to change + +# ID of the analysis to process (default: auto) +#CAPE_ID=auto + +# Max amount of time spent in processing before we fail a task (default: 300) +#CAPE_PROCESSING_TIMEOUT=900 + +# Number of parallel threads to use (default: 1) +#CAPE_PARALLEL=7 + +# Max children tasks per worker (default: 7) +#CAPE_MAXTASKSPERCHILD=7 + +# Enable debug messages (default: false) +#CAPE_DEBUG=true + +# Reprocess failed processing (default: false) +#CAPE_FAILED_PROCESSING=true + +# Enable logging garbage collection related info (default: false) +#CAPE_MEMORY_DEBUGGING=true + +# Disable memory limit (default: false) +#CAPE_DISABLE_MEMORY_LIMIT=true diff --git a/installer/cape2.sh b/installer/cape2.sh index 963b71abfcb..fc363d3827b 100755 --- a/installer/cape2.sh +++ b/installer/cape2.sh @@ -1399,8 +1399,11 @@ function install_CAPE() { fi cd "$CAPE_ROOT/" || return - # copy *.conf.default to *.conf so we have all properly updated fields, as we can't ignore old configs in repository - for filename in conf/default/*.conf.default; do cp -vf "./$filename" "./$(echo "$filename" | sed -e 's/.default//g' | sed -e 's/default//g')"; done + # copy *.conf.default and *.env to their destination so we have all properly updated fields + for filename in conf/default/*.conf.default conf/default/*.env; do + dest="conf/${filename#conf/default/}" + cp -vf "./$filename" "./${dest%.default}" + done sed -i "/connection =/cconnection = postgresql://${USER}:${PASSWD}@localhost:5432/${USER}" conf/cuckoo.conf # sed -i "/tor/{n;s/enabled = no/enabled = yes/g}" conf/routing.conf diff --git a/lib/cuckoo/common/demux.py b/lib/cuckoo/common/demux.py index f890055b17d..91718c29518 100644 --- a/lib/cuckoo/common/demux.py +++ b/lib/cuckoo/common/demux.py @@ -5,7 +5,7 @@ import logging import os import tempfile -from typing import List +from typing import Any, Dict, List, Tuple from lib.cuckoo.common.config import Config from lib.cuckoo.common.exceptions import CuckooDemuxError @@ -185,17 +185,17 @@ def is_valid_package(package: str) -> bool: # ToDo fix return type -def _sf_children(child: sfFile): # -> bytes: - path_to_extract = "" +def _sf_children(child: Any) -> Tuple[bytes, str, str, int]: + path_to_extract = b"" filename_lower = child.filename.lower() # Skip junk files if any(filename_lower.endswith(ext) for ext in JUNK_EXTENSIONS): - return (b"", child.platform, child.magic, child.filesize) + return b"", child.platform, child.magic, child.filesize if any(name in filename_lower for name in JUNK_NAMES): - return (b"", child.platform, child.magic, child.filesize) + return b"", child.platform, child.magic, child.filesize if b".github/" in filename_lower or b".git/" in filename_lower: - return (b"", child.platform, child.magic, child.filesize) + return b"", child.platform, child.magic, child.filesize _, ext = os.path.splitext(child.filename) ext = ext.lower() @@ -213,15 +213,14 @@ def _sf_children(child: sfFile): # -> bytes: tmp_dir = tempfile.mkdtemp(dir=target_path) try: if child.contents: - path_to_extract = os.path.join(tmp_dir, sanitize_filename((child.filename).decode())) + path_to_extract = os.path.join(tmp_dir, sanitize_filename((child.filename).decode())).encode() _ = path_write_file(path_to_extract, child.contents) except Exception as e: log.exception(e) - return (path_to_extract.encode(), child.platform, child.magic or "", child.filesize) + return path_to_extract, child.platform, child.magic or "", child.filesize -# ToDo fix typing need to add str as error msg -def demux_sflock(filename: bytes, options: str, check_shellcode: bool = True): # -> List[bytes]: +def demux_sflock(filename: bytes, options: str, check_shellcode: bool = True) -> Tuple[List[Tuple[bytes, str, str, int]], str]: retlist = [] # do not extract from .bin (downloaded from us) if os.path.splitext(filename)[1] == b".bin": @@ -229,6 +228,14 @@ def demux_sflock(filename: bytes, options: str, check_shellcode: bool = True): # ToDo need to introduce error msgs here try: + platform = "" + magic_type = "" + file_size = 0 + + # Before unpacking, ensure the file actually exists and is not empty to avoid IncorrectUsageException + if not path_exists(filename) or os.path.getsize(filename) == 0: + return [(filename, platform, magic_type, file_size)], "file not found or empty" + password = options2passwd(options) or "infected" try: unpacked = unpack(filename, password=password, check_shellcode=check_shellcode) @@ -240,7 +247,7 @@ def demux_sflock(filename: bytes, options: str, check_shellcode: bool = True): magic_type = file.get_type() or "" platform = file.get_platform() file_size = file.get_size() - return [[filename, platform, magic_type, file_size]], "" + return [(filename, platform, magic_type, file_size)], "" if unpacked.package in blacklist_extensions: return [], "blacklisted package" for sf_child in unpacked.children: @@ -267,7 +274,9 @@ def demux_sflock(filename: bytes, options: str, check_shellcode: bool = True): return list(filter(None, retlist)), "" -def demux_sample(filename: bytes, package: str, options: str, use_sflock: bool = True, platform: str = ""): # -> tuple[bytes, str]: +def demux_sample( + filename: bytes, package: str, options: str, use_sflock: bool = True, platform: str = "" +) -> Tuple[List[Tuple[bytes, str]], List[Dict[str, str]]]: """ If file is a ZIP, extract its included files and return their file paths If file is an email, extracts its attachments and return their file paths (later we'll also extract URLs) @@ -311,9 +320,7 @@ def demux_sample(filename: bytes, package: str, options: str, use_sflock: bool = return retlist, error_list # handle quarantine files - tmp_path = unquarantine(filename) - if tmp_path: - filename = tmp_path + filename = unquarantine(filename) # don't try to extract from office docs magic = File(filename).get_type() or "" @@ -407,3 +414,4 @@ def demux_sample(filename: bytes, package: str, options: str, use_sflock: bool = new_retlist.append((filename, platform)) return new_retlist[:demux_files_limit], error_list + diff --git a/lib/cuckoo/common/gcp.py b/lib/cuckoo/common/gcp.py index f20ccf9f602..f3e047c96e4 100644 --- a/lib/cuckoo/common/gcp.py +++ b/lib/cuckoo/common/gcp.py @@ -3,11 +3,11 @@ import logging import time import shutil +from typing import Any, Dict, List, Optional, Set from lib.cuckoo.common.config import Config from lib.cuckoo.common.path_utils import path_exists from lib.cuckoo.common.constants import CUCKOO_ROOT - try: from google.api_core.exceptions import Forbidden from google.cloud import compute_v1 @@ -38,7 +38,7 @@ class GCSUploader: """Helper class to upload files to GCS.""" @staticmethod - def parse_custom_string(custom_str): + def parse_custom_string(custom_str: str) -> Dict[str, str]: if not custom_str: return {} @@ -52,7 +52,15 @@ def parse_custom_string(custom_str): data[key] = value return data - def __init__(self, bucket_name=None, auth_by=None, credentials_path=None, exclude_dirs=None, exclude_files=None, mode=None): + def __init__( + self, + bucket_name: Optional[str] = None, + auth_by: Optional[str] = None, + credentials_path: Optional[str] = None, + exclude_dirs: Optional[Set[str]] = None, + exclude_files: Optional[Set[str]] = None, + mode: Optional[str] = None, + ): if not HAVE_GCP: raise ImportError("google-cloud-storage library is missing") @@ -89,7 +97,7 @@ def __init__(self, bucket_name=None, auth_by=None, credentials_path=None, exclud self.bucket = self.storage_client.bucket(bucket_name) - def _iter_files_to_upload(self, source_directory): + def _iter_files_to_upload(self, source_directory: str): """Generator that yields files to be uploaded, skipping excluded ones.""" for root, dirs, files in os.walk(source_directory): # Exclude specified directories @@ -105,13 +113,13 @@ def _iter_files_to_upload(self, source_directory): relative_path = os.path.relpath(local_path, source_directory) yield local_path, relative_path - def upload(self, source_directory, analysis_id, tlp=None, metadata=None): + def upload(self, source_directory: str, analysis_id: int, tlp: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None): if self.mode == "zip": self.upload_zip_archive(analysis_id, source_directory, tlp=tlp, metadata=metadata) else: self.upload_files_individually(analysis_id, source_directory, tlp=tlp, metadata=metadata) - def upload_zip_archive(self, analysis_id, source_directory, tlp=None, metadata=None): + def upload_zip_archive(self, analysis_id: int, source_directory: str, tlp: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None): log.debug("Compressing and uploading files for analysis ID %s to GCS", analysis_id) blob_name = f"{analysis_id}_tlp_{tlp}.zip" if tlp else f"{analysis_id}.zip" @@ -130,7 +138,9 @@ def upload_zip_archive(self, analysis_id, source_directory, tlp=None, metadata=N os.unlink(tmp_zip_file_name) log.info("Successfully uploaded archive for analysis %s to GCS.", analysis_id) - def upload_files_individually(self, analysis_id, source_directory, tlp=None, metadata=None): + def upload_files_individually( + self, analysis_id: int, source_directory: str, tlp: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None + ): log.debug("Uploading files for analysis ID %s to GCS", analysis_id) folder_name = f"{analysis_id}_tlp_{tlp}" if tlp else str(analysis_id) @@ -144,7 +154,7 @@ def upload_files_individually(self, analysis_id, source_directory, tlp=None, met log.info("Successfully uploaded files for analysis %s to GCS.", analysis_id) - def check_exists(self, analysis_id): + def check_exists(self, analysis_id: int) -> bool: """Check if any blobs exist for the given analysis ID.""" prefix = str(analysis_id) blobs = list(self.storage_client.list_blobs(self.bucket, prefix=prefix, max_results=1)) @@ -165,7 +175,7 @@ def check_exists(self, analysis_id): GCS_ENABLED = False -def download_from_gcs(gcs_uri, destination_path, logger=None, client=None): +def download_from_gcs(gcs_uri: str, destination_path: str, logger: Optional[Any] = None, client: Optional[storage.Client] = None) -> bool: """ Downloads a file from GCS. gcs_uri: gs://bucket_name/object_name @@ -174,7 +184,9 @@ def download_from_gcs(gcs_uri, destination_path, logger=None, client=None): logger = log if not HAVE_GCP: - logger.error("Google Cloud Storage dependencies not installed. Please run `poetry install --extras gcp` or `pip install google-cloud-storage`") + logger.error( + "Google Cloud Storage dependencies not installed. Please run `poetry install --extras gcp` or `pip install google-cloud-storage`" + ) return False try: @@ -220,6 +232,7 @@ def download_from_gcs(gcs_uri, destination_path, logger=None, client=None): logger.error("Failed to download from GCS %s: %s", gcs_uri, e) return False + def check_node_up(host: str) -> bool: """Auxiliar function for autodiscovery of instances when cluster autoscale""" try: @@ -252,7 +265,7 @@ def __init__(self) -> None: "Authorization": f"Bearer {self.token}", } - def list_instances(self) -> dict: + def list_instances(self) -> Dict[str, List[str]]: """Auto discovery of new servers""" servers = {} instance_name_pattern = "cape-server" @@ -261,7 +274,9 @@ def list_instances(self) -> dict: if self.token: for zone in self.zones: try: - r = requests.get(f"{self.GCP_BASE_URL}projects/{self.project_id}/zones/{zone}/instances", headers=self.headers) + r = requests.get( + f"{self.GCP_BASE_URL}projects/{self.project_id}/zones/{zone}/instances", headers=self.headers + ) for instance in r.json().get("items", []): if not instance["name"].startswith(instance_name_pattern): continue @@ -329,7 +344,7 @@ def autodiscovery(self): time.sleep(autodiscovery_interval) -def gcs_replay(task_range): +def gcs_replay(task_range: str): if not GCS_ENABLED: log.error("GCS is not enabled in reporting.conf") return @@ -372,7 +387,7 @@ def gcs_replay(task_range): metadata["md5"] = samples[0].sample.md5 metadata["sha1"] = samples[0].sample.sha1 - metadata["task_id"] = task_id + metadata["task_id"] = str(task_id) gcs_upload_report(report_path, task_id, tlp, metadata=metadata) @@ -380,7 +395,7 @@ def gcs_replay(task_range): log.error("Failed to replay GCS upload for task %d: %s", task_id, e) -def gcs_upload_report(report_path, analysis_id, tlp=None, metadata=None): +def gcs_upload_report(report_path: str, analysis_id: int, tlp: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None): if not GCS_ENABLED: return @@ -399,7 +414,7 @@ def gcs_upload_report(report_path, analysis_id, tlp=None, metadata=None): log.error("Failed to upload report to GCS for task %d: %s", analysis_id, e) -def gcs_sync(time_range): +def gcs_sync(time_range: str): if not GCS_ENABLED: log.error("GCS is not enabled in reporting.conf") return @@ -450,7 +465,7 @@ def gcs_sync(time_range): gcs_replay(",".join(map(str, sorted(missing_ids)))) -def gcs_refetch_banned(time_range, samples_bucket=None): +def gcs_refetch_banned(time_range: str, samples_bucket: Optional[str] = None): if not HAVE_GCP: log.error("Google Cloud Storage dependencies not installed.") return diff --git a/lib/cuckoo/common/quarantine.py b/lib/cuckoo/common/quarantine.py index 843590e5485..75ca5ddd3e4 100644 --- a/lib/cuckoo/common/quarantine.py +++ b/lib/cuckoo/common/quarantine.py @@ -9,6 +9,7 @@ import struct from binascii import crc32 from pathlib import Path +from typing import Optional, Tuple, Union from Cryptodome.Cipher import ARC4 @@ -24,13 +25,13 @@ print("Missed olefile dependency: poetry run pip install olefile") -def bytearray_xor(data, key): +def bytearray_xor(data: bytearray, key: int) -> bytearray: for i in range(len(data)): data[i] ^= key return data -def read_trend_tag(data, offset): +def read_trend_tag(data: bytes, offset: int) -> Tuple[int, bytes]: """@return a code byte and data tuple""" code, length = struct.unpack(" Tuple[int, int, int, bytes]: """@return a code byte, metalength, metaval, and extra data tuple""" code = struct.unpack("B", data[offset : offset + 1])[0] codeval = 0 - retdata = "" + retdata = b"" length = 0 if code in {1, 10}: @@ -122,7 +123,7 @@ def read_sep_tag(data, offset): return code, length, codeval, retdata -def sep_unquarantine(f): +def sep_unquarantine(f: str) -> Optional[bytes]: qdata = Path(f).read_bytes() data = bytearray(qdata) dataoffset = struct.unpack(" Optional[bytes]: with open(f, "rb") as quarfile: data = bytearray(quarfile.read()) @@ -491,7 +492,7 @@ def mse_unquarantine(f): # the decrypted metadata file from the line beginning with "ObjectName:" -def mbam_unquarantine(f): +def mbam_unquarantine(f: str) -> Optional[bytes]: with open(f, "rb") as quarfile: data = bytearray(quarfile.read()) @@ -507,7 +508,7 @@ def mbam_unquarantine(f): # format, partially on reversing qb.ppl -def kav_unquarantine(file): +def kav_unquarantine(file: str) -> Optional[bytes]: with open(file, "rb") as quarfile: data = bytearray(quarfile.read()) @@ -558,7 +559,7 @@ def kav_unquarantine(file): # for the sake of documentation -def trend_unquarantine(f): +def trend_unquarantine(f: str) -> Optional[bytes]: # Read first 10 bytes with open(f, "rb") as fil: qheader = fil.read(10) @@ -630,7 +631,7 @@ def trend_unquarantine(f): return store_temp_file(data[dataoffset:], origname) -def mcafee_unquarantine(f): +def mcafee_unquarantine(f: str) -> Optional[bytes]: if not HAVE_OLEFILE: log.info("Missed olefile dependency: pip3 install olefile") return None @@ -673,7 +674,7 @@ def mcafee_unquarantine(f): return store_temp_file(value, malname) -def xorff_unquarantine(f): +def xorff_unquarantine(f: str) -> Optional[bytes]: """ sentinelone forefront @@ -694,35 +695,43 @@ def xorff_unquarantine(f): } -def unquarantine(f): - f = f.decode() if isinstance(f, bytes) else f - if not path_exists(f): - return f +def unquarantine(f: Union[str, bytes]) -> bytes: + f_bytes = f if isinstance(f, bytes) else f.encode() + f_str = f_bytes.decode() - base = os.path.basename(f) + if not path_exists(f_str): + return f_bytes + + base = os.path.basename(f_str) realbase, ext = os.path.splitext(base) if not HAVE_OLEFILE: log.info("Missed olefile dependency: pip3 install olefile") try: - if ext.lower() == ".bup" or (HAVE_OLEFILE and olefile.isOleFile(f)): + if ext.lower() == ".bup" or (HAVE_OLEFILE and olefile.isOleFile(f_str)): with contextlib.suppress(Exception): - return mcafee_unquarantine(f) + res = mcafee_unquarantine(f_str) + if res: + return res if isinstance(res, bytes) else res.encode() except (FileNotFoundError, PermissionError, IsADirectoryError): pass if ext.lower() in func_map: try: - return func_map[ext.lower()](f) + res = func_map[ext.lower()](f_str) + if res: + return res if isinstance(res, bytes) else res.encode() except Exception as e: print(e) for func in (mse_unquarantine, kav_unquarantine, trend_unquarantine, sep_unquarantine): with contextlib.suppress(Exception): - quarfile = func(f) + quarfile = func(f_str) if quarfile: - return quarfile + return quarfile if isinstance(quarfile, bytes) else quarfile.encode() + + return f_bytes if __name__ == "__main__": diff --git a/lib/cuckoo/core/data/tasking.py b/lib/cuckoo/core/data/tasking.py index 10839928cf7..51851d9e186 100644 --- a/lib/cuckoo/core/data/tasking.py +++ b/lib/cuckoo/core/data/tasking.py @@ -472,7 +472,8 @@ def demux_sample_and_add_to_db( if extracted_files and not any(file_path == path for path, _ in extracted_files): parent_sample = self.register_sample(File(file_path), source_url=source_url) if conf.cuckoo.delete_archive: - path_delete(file_path.decode()) + if path_exists(file_path): + path_delete(file_path.decode()) # create tasks for each file in the archive for file, platform in extracted_files: @@ -630,8 +631,9 @@ def add_static( parent_sample = self.register_sample(File(file_path)) if conf.cuckoo.delete_archive: # ToDo keep as info for now - log.info("Deleting archive: %s. conf.cuckoo.delete_archive is enabled. %s", file_path, str(extracted_files)) - path_delete(file_path) + if path_exists(file_path): + log.info("Deleting archive: %s. conf.cuckoo.delete_archive is enabled. %s", file_path, str(extracted_files)) + path_delete(file_path.decode()) task_ids = [] # create tasks for each file in the archive diff --git a/modules/reporting/gcs.py b/modules/reporting/gcs.py index 32ff9c83781..3136aabb578 100644 --- a/modules/reporting/gcs.py +++ b/modules/reporting/gcs.py @@ -1,5 +1,6 @@ import os import logging +from typing import Any, Dict from lib.cuckoo.common.constants import CUCKOO_ROOT from lib.cuckoo.common.abstracts import Report from lib.cuckoo.common.exceptions import CuckooReportError @@ -19,7 +20,7 @@ class GCS(Report): # This Report module is not executed by default order = 9999 - def run(self, results): + def run(self, results: Dict[str, Any]): """ Run the Report module. diff --git a/systemd/cape-processor.service b/systemd/cape-processor.service index efb27ece5c2..565a5b7178b 100644 --- a/systemd/cape-processor.service +++ b/systemd/cape-processor.service @@ -5,12 +5,28 @@ Wants=cape.service After=cape-rooter.service [Service] -WorkingDirectory=/opt/CAPEv2/utils/ -ExecStart=/etc/poetry/bin/poetry run python process.py -p7 auto -pt 900 +Type=exec +WorkingDirectory=/opt/CAPEv2 +# Default configuration via environment variables +Environment=CAPE_PROCESSING_TIMEOUT=900 +Environment=CAPE_PARALLEL=7 +Environment=CAPE_MAXTASKSPERCHILD=7 +Environment=CAPE_ID=auto +# Optional environment variables (can be set to 'true' or '1' to enable) +# Environment=CAPE_DEBUG=false +# Environment=CAPE_FAILED_PROCESSING=false +# Environment=CAPE_MEMORY_DEBUGGING=false +# Environment=CAPE_DISABLE_MEMORY_LIMIT=false + +# Allow user overrides via file +EnvironmentFile=-/opt/CAPEv2/conf/default/cape-processor.env +EnvironmentFile=-/opt/CAPEv2/conf/cape-processor.env + +ExecStart=/etc/poetry/bin/poetry run python utils/process.py User=cape Group=cape Restart=always -RestartSec=5m +RestartSec=10 LimitNOFILE=100000 [Install] diff --git a/tests/test_quarantine.py b/tests/test_quarantine.py index 9e10d1d6b8c..d68e1dbcb14 100644 --- a/tests/test_quarantine.py +++ b/tests/test_quarantine.py @@ -140,11 +140,11 @@ def test_sep(self, grab_sample): """ def test_ext_err(self, empty_file): - assert unquarantine(empty_file.name) is None + assert unquarantine(empty_file.name) == empty_file.name.encode() def test_trend_unquarantine_normal_file(self, temp_pe32): """Test only the file header (first 10 bytes) is XOR'd for non-quarantined files.""" - # The expected output is None + # The expected output is None for the sub-function direct call expected = None def bytearray_xor_wrapper(data, key): diff --git a/utils/gcp_pubsub_service.py b/utils/gcp_pubsub_service.py index 0e503bbc23c..efad6d28ded 100644 --- a/utils/gcp_pubsub_service.py +++ b/utils/gcp_pubsub_service.py @@ -9,6 +9,7 @@ import sys import tempfile import warnings +from typing import Any, Dict, Tuple # Mute Google Cloud's Python version support warning for Python 3.10 warnings.filterwarnings("ignore", category=FutureWarning, module="google.api_core") @@ -29,7 +30,7 @@ log = logging.getLogger("gcp_pubsub_service") class GCPServiceLogger(logging.LoggerAdapter): - def process(self, msg, kwargs): + def process(self, msg: str, kwargs: Dict[str, Any]) -> Tuple[str, Dict[str, Any]]: correlation_id = self.extra.get("correlation_id") if correlation_id: msg = f"[{correlation_id}] {msg}" @@ -134,7 +135,7 @@ def _init_clients(self): self.subscription_path = self.subscriber.subscription_path(self.project_id, self.subscription_id) - def process_message(self, message): + def process_message(self, message: Any): msg_id = message.message_id with self.ids_lock: if msg_id in self.processing_ids: diff --git a/utils/process.py b/utils/process.py index 8babe538cce..24c7b8790df 100644 --- a/utils/process.py +++ b/utils/process.py @@ -547,6 +547,14 @@ def _load_report(task_id: int): return False +def str_to_bool(v): + if isinstance(v, bool): + return v + if isinstance(v, str): + return v.lower() in ("yes", "true", "t", "y", "1") + return False + + def parse_id(id_string: str): """ Parses a string representing a range or list of ranges of IDs and returns a list of tuples. @@ -586,18 +594,43 @@ def main(): "id", type=parse_id, help="ID of the analysis to process (auto for continuous processing of unprocessed tasks). Can be 1 or 1-10 or 1,3,5,7", + default=os.getenv("CAPE_ID") or "auto", + nargs="?", ) parser.add_argument("-c", "--caperesubmit", help="Allow CAPE resubmit processing.", action="store_true", required=False) - parser.add_argument("-d", "--debug", help="Display debug messages", action="store_true", required=False) + parser.add_argument( + "-d", + "--debug", + help="Display debug messages", + action="store_true", + required=False, + default=str_to_bool(os.getenv("CAPE_DEBUG", "false")), + ) parser.add_argument("-r", "--report", help="Re-generate report", action="store_true", required=False) parser.add_argument( - "-p", "--parallel", help="Number of parallel threads to use (auto mode only).", type=int, required=False, default=1 + "-p", + "--parallel", + help="Number of parallel threads to use (auto mode only).", + type=int, + required=False, + default=int(os.getenv("CAPE_PARALLEL") or 1), ) parser.add_argument( - "-fp", "--failed-processing", help="reprocess failed processing", action="store_true", required=False, default=False + "-fp", + "--failed-processing", + help="reprocess failed processing", + action="store_true", + required=False, + default=str_to_bool(os.getenv("CAPE_FAILED_PROCESSING", "false")), ) parser.add_argument( - "-mc", "--maxtasksperchild", help="Max children tasks per worker", action="store", type=int, required=False, default=7 + "-mc", + "--maxtasksperchild", + help="Max children tasks per worker", + action="store", + type=int, + required=False, + default=int(os.getenv("CAPE_MAXTASKSPERCHILD") or 7), ) parser.add_argument( "-md", @@ -605,7 +638,7 @@ def main(): help="Enable logging garbage collection related info", action="store_true", required=False, - default=False, + default=str_to_bool(os.getenv("CAPE_MEMORY_DEBUGGING", "false")), ) parser.add_argument( "-pt", @@ -614,7 +647,7 @@ def main(): action="store", type=int, required=False, - default=300, + default=int(os.getenv("CAPE_PROCESSING_TIMEOUT") or 300), ) testing_args = parser.add_argument_group("Signature testing options") testing_args.add_argument( @@ -641,7 +674,13 @@ def main(): default=False, required=False, ) - parser.add_argument("--disable-memory-limit", help="Disable memory limit.", action="store_true", default=False, required=False) + parser.add_argument( + "--disable-memory-limit", + help="Disable memory limit.", + action="store_true", + required=False, + default=str_to_bool(os.getenv("CAPE_DISABLE_MEMORY_LIMIT", "false")), + ) args = parser.parse_args() init_database() diff --git a/utils/submit.py b/utils/submit.py index 61a05fcf4c7..a4a2f87eab4 100644 --- a/utils/submit.py +++ b/utils/submit.py @@ -9,6 +9,7 @@ import os import random import sys +from typing import Any, Dict, List, Optional, Tuple try: import requests @@ -30,25 +31,25 @@ def submit_file( - db, - file_path, - package="", - timeout=0, - options="", - priority=1, - machine="", - platform="", - memory=False, - enforce_timeout=False, - custom="", - tags=None, - route=None, - clock=None, - unique=False, - quiet=False, - category=None, - filename=None, -): + db: Database, + file_path: str, + package: str = "", + timeout: int = 0, + options: str = "", + priority: int = 1, + machine: str = "", + platform: str = "", + memory: bool = False, + enforce_timeout: bool = False, + custom: str = "", + tags: Optional[str] = None, + route: Optional[str] = None, + clock: Optional[str] = None, + unique: bool = False, + quiet: bool = False, + category: Optional[str] = None, + filename: Optional[str] = None, +) -> Tuple[List[int], Dict[str, Any]]: if not File(file_path).get_size(): if not quiet: print((bold(yellow("Empty") + ": sample {0} (skipping file)".format(file_path)))) @@ -68,6 +69,8 @@ def submit_file( l = logging.getLogger(__name__) tmp_path = "" + task_ids = [] + extra_details = {} try: # Create a temp file with the correct name for demuxing (if needed) # Some demuxers rely on the filename/extension @@ -112,7 +115,7 @@ def submit_file( l.warning("Failed to delete temp file %s: %s", tmp_path, e) -def main(): +def main() -> Optional[bool]: parser = argparse.ArgumentParser() parser.add_argument("target", help="URL, path to the file or folder to analyze") parser.add_argument("-d", "--debug", action="store_true", help="Enable debug logging") diff --git a/web/apikey/__init__.py b/web/apikey/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/web/apikey/admin.py b/web/apikey/admin.py new file mode 100644 index 00000000000..6653d830209 --- /dev/null +++ b/web/apikey/admin.py @@ -0,0 +1,32 @@ +from django.contrib import admin, messages +from django.utils import timezone + +from .models import ApiKey, _generate_key, hash_key + + +@admin.register(ApiKey) +class ApiKeyAdmin(admin.ModelAdmin): + list_display = ("name", "user", "created_at", "last_used_at", "revoked_at") + list_filter = ("revoked_at",) + search_fields = ("name", "user__username", "user__email") + readonly_fields = ("key", "created_at", "last_used_at") + actions = ("revoke_selected",) + + def save_model(self, request, obj, form, change): + # `key` is readonly with no model default, so a key created through the + # admin add-form would otherwise hit a NOT NULL/unique violation. Mint + # and hash one here, and surface the raw value to the admin exactly once. + if not change and not obj.key: + raw = _generate_key() + obj.key = hash_key(raw) + self.message_user( + request, + f"API key '{obj.name}' created. Raw key: {raw} — copy it now; it will not be shown again.", + level=messages.WARNING, + ) + super().save_model(request, obj, form, change) + + @admin.action(description="Revoke selected keys") + def revoke_selected(self, request, queryset): + n = queryset.filter(revoked_at__isnull=True).update(revoked_at=timezone.now()) + self.message_user(request, f"Revoked {n} API key(s).") diff --git a/web/apikey/apps.py b/web/apikey/apps.py new file mode 100644 index 00000000000..38b1160285d --- /dev/null +++ b/web/apikey/apps.py @@ -0,0 +1,13 @@ +from django.apps import AppConfig + + +class ApiKeyConfig(AppConfig): + default_auto_field = "django.db.models.BigAutoField" + name = "apikey" + verbose_name = "API Keys" + + def ready(self): + # Import signal handlers so the disable-cascades-revoke handler + # gets registered. Importing inside ready() avoids the standard + # "models not yet loaded" startup pitfall. + from . import signals # noqa: F401 diff --git a/web/apikey/authentication.py b/web/apikey/authentication.py new file mode 100644 index 00000000000..6b7be6e1701 --- /dev/null +++ b/web/apikey/authentication.py @@ -0,0 +1,78 @@ +"""DRF authentication class backed by the multi-key ``ApiKey`` model. + +Wire-format compatible with DRF's built-in ``TokenAuthentication`` — +both expect ``Authorization: Token ``. Falls through to the legacy +``rest_framework.authentication.TokenAuthentication`` when a key isn't +found in our model, so any tokens previously issued via +``/apiv2/api-token-auth/`` keep working without migration. +""" + +from django.utils import timezone +from rest_framework.authentication import ( + BaseAuthentication, + TokenAuthentication, + get_authorization_header, +) +from rest_framework.exceptions import AuthenticationFailed + +from .models import ApiKey, hash_key + + +class ApiKeyAuthentication(BaseAuthentication): + """Authenticate via ``Authorization: Token ``. + + Lookup order: + 1. ``ApiKey`` model (per-user, labeled, individually revocable) + 2. DRF's legacy ``Token`` model (one-token-per-user, kept for back-compat) + + Failures in (1) — invalid key, revoked key, disabled user — return 401 + immediately rather than falling through, so an attacker can't probe + the legacy table by sending a key that happens to look like one of + ours but matches a legacy token. + """ + + keyword = "Token" + + def authenticate(self, request): + auth = get_authorization_header(request).split() + if not auth or auth[0].lower() != self.keyword.lower().encode(): + # Not our header; let other auth classes handle it. + return None + if len(auth) == 1: + raise AuthenticationFailed("Invalid token header. No credentials provided.") + if len(auth) > 2: + raise AuthenticationFailed("Invalid token header. Token string should not contain spaces.") + try: + key = auth[1].decode() + except UnicodeError: + raise AuthenticationFailed("Invalid token header. Token string contains invalid characters.") + + # Try our multi-key model first. We store only the SHA-256 hash of the + # raw key, so hash the presented token before looking it up. + try: + apikey = ApiKey.objects.select_related("user").get(key=hash_key(key)) + except ApiKey.DoesNotExist: + apikey = None + + if apikey is not None: + if apikey.revoked_at is not None: + raise AuthenticationFailed("API key has been revoked.") + if not apikey.user.is_active: + # Defense in depth: even if the disable-cascade signal didn't + # fire (e.g. user deactivated via direct SQL), the runtime + # check still shuts the key down. + raise AuthenticationFailed("User inactive or deleted.") + # Throttle last_used_at writes to at most once per minute. Writing + # on every request causes needless write load and lock contention, + # especially painful on SQLite (CAPE's default web-auth DB). + now = timezone.now() + if apikey.last_used_at is None or (now - apikey.last_used_at).total_seconds() > 60: + ApiKey.objects.filter(pk=apikey.pk).update(last_used_at=now) + return (apikey.user, apikey) + + # Fall through to the legacy DRF Token model. Anyone with an + # existing CAPE-issued token continues to authenticate normally. + return TokenAuthentication().authenticate_credentials(key) + + def authenticate_header(self, request): + return self.keyword diff --git a/web/apikey/context_processors.py b/web/apikey/context_processors.py new file mode 100644 index 00000000000..9d389cbdf52 --- /dev/null +++ b/web/apikey/context_processors.py @@ -0,0 +1,9 @@ +"""Template context processor — surfaces apikey access policy to templates.""" + +from .views import _user_may_manage_keys + + +def apikey_access(request): + """Make `may_manage_apikeys` available to every template, so the user + dropdown can hide the API Keys link for SSO non-staff users.""" + return {"may_manage_apikeys": _user_may_manage_keys(getattr(request, "user", None))} diff --git a/web/apikey/forms.py b/web/apikey/forms.py new file mode 100644 index 00000000000..040bae6f623 --- /dev/null +++ b/web/apikey/forms.py @@ -0,0 +1,23 @@ +from django import forms + +from .models import ApiKey + + +class ApiKeyCreateForm(forms.ModelForm): + class Meta: + model = ApiKey + fields = ("name",) + widgets = { + "name": forms.TextInput(attrs={ + "class": "form-control", + "placeholder": "e.g. ci-bot, personal-laptop, automation", + "autofocus": "autofocus", + "maxlength": 100, + }), + } + + def clean_name(self): + name = (self.cleaned_data.get("name") or "").strip() + if not name: + raise forms.ValidationError("Name is required.") + return name diff --git a/web/apikey/management/__init__.py b/web/apikey/management/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/web/apikey/management/commands/__init__.py b/web/apikey/management/commands/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/web/apikey/migrations/0001_initial.py b/web/apikey/migrations/0001_initial.py new file mode 100644 index 00000000000..b11ed25fd6b --- /dev/null +++ b/web/apikey/migrations/0001_initial.py @@ -0,0 +1,50 @@ +from django.conf import settings +from django.db import migrations, models + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ] + + operations = [ + migrations.CreateModel( + name="ApiKey", + fields=[ + ("id", models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name="ID")), + ("name", models.CharField(help_text="A human-readable label (e.g. 'ci-bot', 'personal-laptop').", max_length=100)), + ("key", models.CharField(db_index=True, max_length=64, unique=True)), + ("created_at", models.DateTimeField(auto_now_add=True)), + ("last_used_at", models.DateTimeField(blank=True, null=True)), + ( + "revoked_at", + models.DateTimeField( + blank=True, + help_text=( + "Set when the key is explicitly revoked OR when the owner is disabled. " + "A non-null value means the key MUST NOT authenticate." + ), + null=True, + ), + ), + ( + "user", + models.ForeignKey( + on_delete=models.deletion.CASCADE, + related_name="api_keys", + to=settings.AUTH_USER_MODEL, + ), + ), + ], + options={ + "ordering": ["-created_at"], + }, + ), + migrations.AddIndex( + model_name="apikey", + index=models.Index(fields=["user", "revoked_at"], name="apikey_apik_user_id_42b89d_idx"), + ), + ] diff --git a/web/apikey/migrations/0002_hash_existing_keys.py b/web/apikey/migrations/0002_hash_existing_keys.py new file mode 100644 index 00000000000..e6ced0f2105 --- /dev/null +++ b/web/apikey/migrations/0002_hash_existing_keys.py @@ -0,0 +1,42 @@ +"""Convert any existing plaintext API keys to their SHA-256 hash in place. + +Keys are now stored hashed (see apikey.models.hash_key). Existing rows hold the +raw 43-char token; re-hashing them keeps every already-issued key working — the +client still presents the same raw value, which now hashes to the stored digest. +Idempotent: rows that already look like a SHA-256 hex digest are left alone. +""" + +import hashlib + +from django.db import migrations + +_HEX = set("0123456789abcdef") + + +def _looks_hashed(value: str) -> bool: + return len(value) == 64 and set(value) <= _HEX + + +def hash_existing_keys(apps, schema_editor): + ApiKey = apps.get_model("apikey", "ApiKey") + for row in ApiKey.objects.all().iterator(): + if _looks_hashed(row.key): + continue + row.key = hashlib.sha256(row.key.encode()).hexdigest() + row.save(update_fields=["key"]) + + +def noop_reverse(apps, schema_editor): + # Hashing is one-way; the raw keys cannot be recovered. + pass + + +class Migration(migrations.Migration): + + dependencies = [ + ("apikey", "0001_initial"), + ] + + operations = [ + migrations.RunPython(hash_existing_keys, noop_reverse), + ] diff --git a/web/apikey/migrations/__init__.py b/web/apikey/migrations/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/web/apikey/models.py b/web/apikey/models.py new file mode 100644 index 00000000000..8e64efd7c11 --- /dev/null +++ b/web/apikey/models.py @@ -0,0 +1,72 @@ +"""Per-user API keys for CAPE's REST API. + +Decoupled from DRF's built-in `Token` (which is one-token-per-user) so +each operator / script / CI bot can have its own labeled credential and +revoke any of them independently. Authentication remains the standard +``Authorization: Token `` header for drop-in compatibility with +existing CAPE clients. +""" + +import hashlib +import secrets +from django.conf import settings +from django.db import models + + +def _generate_key() -> str: + """43-char URL-safe raw key with ~256 bits of entropy — shown to the + operator exactly once and never stored. Only its hash (see `hash_key`) + is persisted, so a database leak doesn't expose usable credentials.""" + return secrets.token_urlsafe(32) + + +def hash_key(raw: str) -> str: + """SHA-256 hex digest of a raw key (what we store and look up by). + Raw keys are high-entropy random tokens, so an unsalted SHA-256 is + sufficient — there is no low-entropy secret to brute-force.""" + return hashlib.sha256(raw.encode()).hexdigest() + + +class ApiKey(models.Model): + user = models.ForeignKey( + settings.AUTH_USER_MODEL, + on_delete=models.CASCADE, + related_name="api_keys", + ) + name = models.CharField( + max_length=100, + help_text="A human-readable label (e.g. 'ci-bot', 'personal-laptop').", + ) + # Stores the SHA-256 hex digest of the raw key (64 chars), never the raw key. + key = models.CharField(max_length=64, unique=True, db_index=True) + created_at = models.DateTimeField(auto_now_add=True) + last_used_at = models.DateTimeField(null=True, blank=True) + revoked_at = models.DateTimeField( + null=True, + blank=True, + help_text="Set when the key is explicitly revoked OR when the owner is disabled. " + "A non-null value means the key MUST NOT authenticate.", + ) + + class Meta: + ordering = ["-created_at"] + indexes = [ + # Fast lookup of a user's active keys for the "my keys" page. + models.Index(fields=["user", "revoked_at"]), + ] + + def __str__(self): + return f"{self.user.username}:{self.name}" + + @property + def is_active(self) -> bool: + return self.revoked_at is None + + @classmethod + def issue(cls, user, name: str) -> tuple["ApiKey", str]: + """Create a new key for `user` with the given label. Returns + ``(obj, raw_key)``: only the hash is stored, so the caller MUST show + the raw key to the operator exactly once — it can never be recovered.""" + raw = _generate_key() + obj = cls.objects.create(user=user, name=name, key=hash_key(raw)) + return obj, raw diff --git a/web/apikey/signals.py b/web/apikey/signals.py new file mode 100644 index 00000000000..74972d8db24 --- /dev/null +++ b/web/apikey/signals.py @@ -0,0 +1,55 @@ +"""Disable-cascades-revoke for API keys. + +When a Django ``User`` flips ``is_active`` from True to False, every +``ApiKey`` that user owns gets its ``revoked_at`` stamped. Combined with +the runtime ``user.is_active`` check in ``ApiKeyAuthentication``, this +gives us two independent barriers — the runtime check is the security +guarantee, the signal-driven revocation is the audit trail. + +Re-enabling a previously-disabled user does NOT auto-restore old keys; +they stay revoked. That's the safer default for the +contractor-offboarded-then-re-onboarded case. New keys must be issued +explicitly after re-enable. +""" + +from django.conf import settings +from django.db.models.signals import post_save, pre_save +from django.dispatch import receiver +from django.utils import timezone + + +@receiver(pre_save, sender=settings.AUTH_USER_MODEL) +def _capture_previous_is_active(sender, instance, **kwargs): + """Stash the pre-save ``is_active`` value so the post_save handler + can detect the True→False transition. Skip new users (no pk yet).""" + if not instance.pk: + instance._previous_is_active = True # treat new users as active + return + # Optimization: if update_fields is given and excludes is_active, its value + # can't have changed — skip the extra SELECT. This fires on every login + # (Django saves with update_fields=["last_login"]). + update_fields = kwargs.get("update_fields") + if update_fields is not None and "is_active" not in update_fields: + instance._previous_is_active = instance.is_active + return + try: + previous = sender.objects.only("is_active").get(pk=instance.pk) + instance._previous_is_active = previous.is_active + except sender.DoesNotExist: + instance._previous_is_active = True + + +@receiver(post_save, sender=settings.AUTH_USER_MODEL) +def _revoke_keys_on_user_disable(sender, instance, created, **kwargs): + """If is_active just transitioned from True to False, revoke every + one of this user's API keys. Idempotent — keys already revoked are + left alone (revoked_at is only set if currently null).""" + if created: + return + was_active = getattr(instance, "_previous_is_active", True) + if was_active and not instance.is_active: + # Local import dodges AppRegistryNotReady at import time. + from .models import ApiKey + ApiKey.objects.filter(user=instance, revoked_at__isnull=True).update( + revoked_at=timezone.now() + ) diff --git a/web/apikey/templates/apikey/create.html b/web/apikey/templates/apikey/create.html new file mode 100644 index 00000000000..10a037ef054 --- /dev/null +++ b/web/apikey/templates/apikey/create.html @@ -0,0 +1,18 @@ +{% extends "base.html" %} + +{% block content %} +
+

New API Key

+
+ {% csrf_token %} +
+ + {{ form.name }} +
A label to help you remember what this key is for. Doesn't have to be unique.
+ {% if form.name.errors %}
{{ form.name.errors|join:", " }}
{% endif %} +
+ + Cancel +
+
+{% endblock %} diff --git a/web/apikey/templates/apikey/forbidden.html b/web/apikey/templates/apikey/forbidden.html new file mode 100644 index 00000000000..e3b6093f5e7 --- /dev/null +++ b/web/apikey/templates/apikey/forbidden.html @@ -0,0 +1,19 @@ +{% extends "base.html" %} +{% block content %} +
+
+
+

API key management not available

+

+ Your account is provisioned via single sign-on without programmatic-API + privileges. CAPE issues API keys to local service accounts and + designated administrators only. +

+

+ If you need programmatic access for an integration, please contact a + CAPE administrator. +

+
+
+
+{% endblock %} diff --git a/web/apikey/templates/apikey/list.html b/web/apikey/templates/apikey/list.html new file mode 100644 index 00000000000..21f8d53ca3f --- /dev/null +++ b/web/apikey/templates/apikey/list.html @@ -0,0 +1,72 @@ +{% extends "base.html" %} +{% load static %} + +{% block content %} +
+
+

API Keys

+ New key +
+ + {% if flash_key %} +
+
Key created — copy it now
+

This is the only time you'll see the full value. After you leave this page it can't be recovered — just revoked and re-issued.

+
+ {{ flash_key.name }} + + +
+

Authenticate with: Authorization: Token {{ flash_key.key }}

+
+ {% endif %} + + {% if messages %} + {% for m in messages %} +
{{ m }}
+ {% endfor %} + {% endif %} + + {% if keys %} + + + + + + + + + + + + {% for k in keys %} + + + + + + + + {% endfor %} + +
NameCreatedLast usedStatus
{{ k.name }}{{ k.created_at }}{% if k.last_used_at %}{{ k.last_used_at }}{% else %}never{% endif %} + {% if k.revoked_at %} + revoked + {% else %} + active + {% endif %} + + {% if not k.revoked_at %} +
+ {% csrf_token %} + +
+ {% endif %} +
+ {% else %} +
No API keys yet. Create one to call the REST API as yourself.
+ {% endif %} +
+{% endblock %} diff --git a/web/apikey/urls.py b/web/apikey/urls.py new file mode 100644 index 00000000000..6b33f7482ce --- /dev/null +++ b/web/apikey/urls.py @@ -0,0 +1,11 @@ +from django.urls import path + +from . import views + +app_name = "apikey" + +urlpatterns = [ + path("", views.list_view, name="list"), + path("create/", views.create_view, name="create"), + path("/revoke/", views.revoke_view, name="revoke"), +] diff --git a/web/apikey/views.py b/web/apikey/views.py new file mode 100644 index 00000000000..9064f5045ca --- /dev/null +++ b/web/apikey/views.py @@ -0,0 +1,94 @@ +"""User-facing views for managing API keys. + +All views require an authenticated session — keys are scoped to the +logged-in user. The raw key value is shown EXACTLY ONCE on creation; +afterwards only the redacted form (last 4 chars) appears in the UI. + +Authorization model: + - Local users (no SocialAccount link): always permitted to manage + their own keys. Service accounts and break-glass admins live here. + - SSO users with is_staff=True: permitted. The OIDC adapter promotes + members of the configured admin/superadmin groups; everyone else + has no programmatic API access. + - SSO users without staff: denied. Programmatic API access for these + users is issued out-of-band (admin creates a service account on + their behalf, or admin creates a key for them in Django admin). +""" + +from allauth.socialaccount.models import SocialAccount +from django.contrib import messages +from django.contrib.auth.decorators import login_required +from django.shortcuts import get_object_or_404, redirect, render +from django.utils import timezone +from django.views.decorators.http import require_POST + +from .forms import ApiKeyCreateForm +from .models import ApiKey + + +def _user_may_manage_keys(user): + """Return True if `user` is allowed to view/create/revoke their own keys. + Local-only users always pass; SSO-provisioned users must be staff.""" + if not user or not user.is_authenticated: + return False + # Called from the apikey_access context processor on every page load — + # cache the SocialAccount lookup on the user object for the request to + # avoid a redundant query per render. + if not hasattr(user, "_may_manage_keys"): + is_sso = SocialAccount.objects.filter(user=user).exists() + user._may_manage_keys = True if not is_sso else bool(user.is_staff) + return user._may_manage_keys + + +def _forbidden(request): + return render(request, "apikey/forbidden.html", status=403) + + +@login_required +def list_view(request): + if not _user_may_manage_keys(request.user): + return _forbidden(request) + keys = ApiKey.objects.filter(user=request.user).order_by("-created_at") + # `flash_key` lets the create view hand the freshly-issued raw key + # to the list page through the session — we never re-display it + # after the page is reloaded. + flash_key = request.session.pop("apikey_flash", None) + return render(request, "apikey/list.html", {"keys": keys, "flash_key": flash_key}) + + +@login_required +def create_view(request): + if not _user_may_manage_keys(request.user): + return _forbidden(request) + if request.method == "POST": + form = ApiKeyCreateForm(request.POST) + if form.is_valid(): + apikey, raw_key = ApiKey.issue(user=request.user, name=form.cleaned_data["name"]) + # Stash the raw key in the session so we can show it once on the + # list page and then forget it (only its hash is stored, so this is + # the only moment we ever have the raw value). Avoids the "save my + # key" being lost if the user navigates away from the create page. + request.session["apikey_flash"] = { + "name": apikey.name, + "key": raw_key, + } + messages.success(request, f"API key '{apikey.name}' created.") + return redirect("apikey:list") + else: + form = ApiKeyCreateForm() + return render(request, "apikey/create.html", {"form": form}) + + +@login_required +@require_POST +def revoke_view(request, pk): + if not _user_may_manage_keys(request.user): + return _forbidden(request) + apikey = get_object_or_404(ApiKey, pk=pk, user=request.user) + if apikey.revoked_at is None: + apikey.revoked_at = timezone.now() + apikey.save(update_fields=["revoked_at"]) + messages.success(request, f"API key '{apikey.name}' revoked.") + else: + messages.info(request, f"API key '{apikey.name}' was already revoked.") + return redirect("apikey:list") diff --git a/web/static/js/cape-shortcuts.js b/web/static/js/cape-shortcuts.js new file mode 100644 index 00000000000..2b08d945372 --- /dev/null +++ b/web/static/js/cape-shortcuts.js @@ -0,0 +1,117 @@ +/** + * CAPEv2 Global Keyboard Shortcuts + * Enhances analyst workflow by providing keyboard navigation. + */ + +(function() { + 'use strict'; + + document.addEventListener('keydown', function(e) { + // Don't trigger if user is typing in an input or textarea + const activeElement = document.activeElement; + const isInput = activeElement && ( + activeElement.tagName === 'INPUT' || + activeElement.tagName === 'TEXTAREA' || + activeElement.tagName === 'SELECT' || + activeElement.isContentEditable); + + if (isInput && e.key !== 'Escape') { + return; + } + + // Global Key handlers + switch(e.key) { + case '/': + e.preventDefault(); + const globalSearch = document.getElementById('form_search'); + if (globalSearch) { + globalSearch.focus(); + globalSearch.select(); + } + break; + + case 'Escape': + if (isInput) { + activeElement.blur(); + } + // Close any open modals + $('.modal.show').modal('hide'); + break; + + case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': + if (e.altKey) { + e.preventDefault(); + const tabIndex = parseInt(e.key) - 1; + const tabs = $('#reportTabs .nav-link, #analysisTabs .nav-link'); + if (tabs[tabIndex]) { + tabs[tabIndex].click(); + } + } + break; + + case 'j': // Next Item + navigateList(1); + break; + + case 'k': // Previous Item + navigateList(-1); + break; + + case 'o': // Open/Expand + const activeRow = $('.table-hover tbody tr.keyboard-active'); + if (activeRow.length) { + const link = activeRow.find('a').first(); + if (link.length) link[0].click(); + } + break; + + case '?': // Show help + const helpModal = bootstrap.Modal.getOrCreateInstance(document.getElementById('shortcutsHelpModal')); + helpModal.show(); + break; + } + }); + + /** + * Helper to navigate tables/lists via J/K + */ + function navigateList(direction) { + const rows = $('.table-hover tbody tr:visible, #diff-table tbody tr:visible'); + if (!rows.length) return; + + let currentIndex = rows.index($('.keyboard-active')); + let nextIndex = currentIndex + direction; + + if (currentIndex === -1 && direction === 1) nextIndex = 0; + if (nextIndex < 0) nextIndex = 0; + if (nextIndex >= rows.length) nextIndex = rows.length - 1; + + rows.removeClass('keyboard-active'); + const nextRow = $(rows[nextIndex]); + nextRow.addClass('keyboard-active'); + + // Scroll into view if needed + const rowTop = nextRow.offset().top; + const rowBottom = rowTop + nextRow.height(); + const winTop = $(window).scrollTop() + 100; // Header offset + const winBottom = $(window).scrollTop() + $(window).height(); + + if (rowTop < winTop || rowBottom > winBottom) { + $('html, body').animate({ + scrollTop: rowTop - 150 + }, 50); + } + } + + // Add visual feedback for keyboard navigation + const style = document.createElement('style'); + style.innerHTML = ` + .keyboard-active { + outline: 2px solid #3498db !important; + outline-offset: -2px; + background-color: rgba(52, 152, 219, 0.1) !important; + } + `; + document.head.appendChild(style); + +})(); diff --git a/web/templates/base.html b/web/templates/base.html index 68a60007927..8c58a357a8e 100644 --- a/web/templates/base.html +++ b/web/templates/base.html @@ -41,6 +41,7 @@ + + + {% block extra_scripts %}{% endblock %} diff --git a/web/templates/header.html b/web/templates/header.html index c3e7fdf7a1b..c14426f61f3 100644 --- a/web/templates/header.html +++ b/web/templates/header.html @@ -41,7 +41,12 @@ {% endif %} - + +
diff --git a/web/web/guac_settings.py b/web/web/guac_settings.py index 7d67608b652..62d38265166 100644 --- a/web/web/guac_settings.py +++ b/web/web/guac_settings.py @@ -4,10 +4,11 @@ from pathlib import Path from django.utils.log import DEFAULT_LOGGING - CUCKOO_PATH = os.path.join(Path.cwd(), "..") sys.path.append(CUCKOO_PATH) +from lib.cuckoo.common.config import Config + # Build paths inside the project like this: BASE_DIR / "subdir". BASE_DIR = Path(__file__).resolve().parent.parent @@ -38,6 +39,8 @@ LOGGING_CONFIG = None +WEB_AUTHENTICATION = getattr(Config("web"), "web_auth", {}).get("enabled", False) + ALLOWED_HOSTS = [ "*", ] @@ -55,6 +58,7 @@ MIDDLEWARE = [ "django.contrib.sessions.middleware.SessionMiddleware", + "django.contrib.auth.middleware.AuthenticationMiddleware", "django.middleware.common.CommonMiddleware", "django.middleware.csrf.CsrfViewMiddleware", "django.contrib.messages.middleware.MessageMiddleware", @@ -179,4 +183,3 @@ from lib.cuckoo.core.data.guac_session import GuacSession # noqa: F401 from lib.cuckoo.core.data.db_common import Base Base.metadata.create_all(_db.engine) - diff --git a/web/web/settings.py b/web/web/settings.py index 3c2c492855a..4700988b975 100644 --- a/web/web/settings.py +++ b/web/web/settings.py @@ -193,6 +193,8 @@ "django.template.context_processors.request", "django.contrib.messages.context_processors.messages", "django_settings_export.settings_export", + # Surfaces `may_manage_apikeys` for the API Keys link in the user dropdown. + "apikey.context_processors.apikey_access", ], "loaders": [ "django.template.loaders.filesystem.Loader", @@ -271,6 +273,10 @@ "django_recaptcha", # https://pypi.org/project/django-recaptcha/ "rest_framework", "rest_framework.authtoken", + # Per-user labeled API keys (multi-key, individually revocable). Lives + # alongside DRF's legacy `authtoken` so ApiKeyAuthentication can fall + # back to existing tokens for back-compat. + "apikey", ] AUDIT_FRAMEWORK = web_cfg.audit_framework.get("enabled", False) @@ -278,7 +284,10 @@ if api_cfg.api.token_auth_enabled: REST_FRAMEWORK = { "DEFAULT_AUTHENTICATION_CLASSES": [ - "rest_framework.authentication.TokenAuthentication", + # Per-user labeled API keys; internally falls back to DRF's legacy + # TokenAuthentication so tokens issued via /apiv2/api-token-auth/ + # keep working without migration. + "apikey.authentication.ApiKeyAuthentication", "rest_framework.authentication.SessionAuthentication", ], "DEFAULT_PERMISSION_CLASSES": ("rest_framework.permissions.IsAuthenticated",), diff --git a/web/web/urls.py b/web/web/urls.py index 13fbf771a1c..f05cfe50127 100644 --- a/web/web/urls.py +++ b/web/web/urls.py @@ -36,6 +36,10 @@ urlpatterns = [ re_path(r"^guac/", include("guac.urls")), + # Per-user API key management (list / create / revoke). Mounted under + # /accounts/ so it sits next to allauth's login pages and naturally + # picks up @login_required. Must precede the allauth catch-all below. + path("accounts/api-keys/", include("apikey.urls")), path("accounts/", include("allauth.urls")), path("robots.txt", TemplateView.as_view(template_name="robots.txt", content_type="text/plain")), re_path(r"^$", dashboard_views.index, name="dashboard"),