diff --git a/architecture/sandbox.md b/architecture/sandbox.md index 656f42138..9128c75b5 100644 --- a/architecture/sandbox.md +++ b/architecture/sandbox.md @@ -1281,7 +1281,9 @@ Wraps `tokio::process::Child` + PID. Platform-specific `spawn()` methods delegat 1. `setpgid(0, 0)` if non-interactive (create new process group) 2. `setns(fd, CLONE_NEWNET)` to enter network namespace (Linux only) 3. `drop_privileges(policy)`: `initgroups()` -> `setgid()` -> `setuid()` -4. `sandbox::apply(policy, workdir)`: Landlock then seccomp +4. Disable core dumps with `setrlimit(RLIMIT_CORE, 0)` on Unix +5. Set `prctl(PR_SET_DUMPABLE, 0)` on Linux +6. `sandbox::apply(policy, workdir)`: Landlock then seccomp ### `drop_privileges()` @@ -1297,6 +1299,8 @@ The ordering is significant: `initgroups`/`setgid` must happen before `setuid` b Steps 3, 5, and 6 are defense-in-depth post-condition checks (CWE-250 / CERT POS37-C). All three syscalls (`geteuid`, `getegid`, `setuid`) are async-signal-safe, so they are safe to call in the `pre_exec` context. The checks add negligible overhead while guarding against hypothetical kernel-level defects that could cause `setuid`/`setgid` to return success without actually changing the effective IDs. +After the privilege drop, the child process also disables core dumps before Landlock and seccomp are applied. On all Unix targets it sets `RLIMIT_CORE=0`; on Linux it additionally sets `PR_SET_DUMPABLE=0`. This prevents crash artifacts from containing provider credentials, request payloads, or other sensitive in-memory data. + ### `ProcessStatus` Exit code is `code` if the process exited normally, or `128 + signal` if killed by a signal (standard Unix convention). Returns `-1` if neither is available. diff --git a/crates/openshell-sandbox/src/process.rs b/crates/openshell-sandbox/src/process.rs index b903566bd..4e9225929 100644 --- a/crates/openshell-sandbox/src/process.rs +++ b/crates/openshell-sandbox/src/process.rs @@ -34,6 +34,35 @@ fn scrub_sensitive_env(cmd: &mut Command) { cmd.env_remove(SSH_HANDSHAKE_SECRET_ENV); } +#[cfg(unix)] +#[allow(unsafe_code)] +pub(crate) fn harden_child_process() -> Result<()> { + let core_limit = libc::rlimit { + rlim_cur: 0, + rlim_max: 0, + }; + let rc = unsafe { libc::setrlimit(libc::RLIMIT_CORE, &core_limit) }; + if rc != 0 { + return Err(miette::miette!( + "Failed to disable core dumps: {}", + std::io::Error::last_os_error() + )); + } + + #[cfg(target_os = "linux")] + { + let rc = unsafe { libc::prctl(libc::PR_SET_DUMPABLE, 0, 0, 0, 0) }; + if rc != 0 { + return Err(miette::miette!( + "Failed to set PR_SET_DUMPABLE=0: {}", + std::io::Error::last_os_error() + )); + } + } + + Ok(()) +} + /// Handle to a running process. pub struct ProcessHandle { child: Child, @@ -200,6 +229,8 @@ impl ProcessHandle { drop_privileges(&policy) .map_err(|err| std::io::Error::other(err.to_string()))?; + harden_child_process().map_err(|err| std::io::Error::other(err.to_string()))?; + // Phase 2 (as unprivileged user): Enforce the prepared // Landlock ruleset via restrict_self() + apply seccomp. // restrict_self() does not require root. @@ -292,6 +323,8 @@ impl ProcessHandle { drop_privileges(&policy) .map_err(|err| std::io::Error::other(err.to_string()))?; + harden_child_process().map_err(|err| std::io::Error::other(err.to_string()))?; + sandbox::apply(&policy, workdir.as_deref()) .map_err(|err| std::io::Error::other(err.to_string()))?; @@ -543,6 +576,12 @@ mod tests { use crate::policy::{ FilesystemPolicy, LandlockPolicy, NetworkPolicy, ProcessPolicy, SandboxPolicy, }; + #[cfg(unix)] + use nix::sys::wait::{WaitStatus, waitpid}; + #[cfg(unix)] + use nix::unistd::{ForkResult, fork}; + #[cfg(unix)] + use std::mem::size_of; use std::process::Stdio as StdStdio; /// Helper to create a minimal `SandboxPolicy` with the given process policy. @@ -660,6 +699,91 @@ mod tests { ); } + #[cfg(unix)] + #[allow(unsafe_code)] + fn probe_hardened_child(probe: unsafe fn() -> i64) -> i64 { + const HARDEN_FAILED: i64 = -2; + + let mut fds = [0; 2]; + let pipe_rc = unsafe { libc::pipe(fds.as_mut_ptr()) }; + assert_eq!( + pipe_rc, + 0, + "pipe failed: {}", + std::io::Error::last_os_error() + ); + + match unsafe { fork() }.expect("fork should succeed") { + ForkResult::Child => { + unsafe { libc::close(fds[0]) }; + let value = match harden_child_process() { + Ok(()) => unsafe { probe() }, + Err(_) => HARDEN_FAILED, + }; + let bytes = value.to_ne_bytes(); + let written = unsafe { libc::write(fds[1], bytes.as_ptr().cast(), bytes.len()) }; + unsafe { + libc::close(fds[1]); + libc::_exit(if written == bytes.len() as isize { + 0 + } else { + 1 + }); + } + } + ForkResult::Parent { child } => { + unsafe { libc::close(fds[1]) }; + let mut bytes = [0u8; size_of::()]; + let read = unsafe { libc::read(fds[0], bytes.as_mut_ptr().cast(), bytes.len()) }; + unsafe { libc::close(fds[0]) }; + assert_eq!( + read as usize, + bytes.len(), + "expected {} probe bytes, got {}", + bytes.len(), + read + ); + + match waitpid(child, None).expect("waitpid should succeed") { + WaitStatus::Exited(_, 0) => {} + status => panic!("probe child exited unexpectedly: {status:?}"), + } + + i64::from_ne_bytes(bytes) + } + } + } + + #[cfg(unix)] + #[allow(unsafe_code)] + unsafe fn core_dump_limit_is_zero_probe() -> i64 { + let mut limit = std::mem::MaybeUninit::::uninit(); + let rc = unsafe { libc::getrlimit(libc::RLIMIT_CORE, limit.as_mut_ptr()) }; + if rc != 0 { + return -1; + } + let limit = unsafe { limit.assume_init() }; + i64::from(limit.rlim_cur == 0 && limit.rlim_max == 0) + } + + #[test] + #[cfg(unix)] + fn harden_child_process_disables_core_dumps() { + assert_eq!(probe_hardened_child(core_dump_limit_is_zero_probe), 1); + } + + #[cfg(target_os = "linux")] + #[allow(unsafe_code)] + unsafe fn dumpable_flag_probe() -> i64 { + unsafe { libc::prctl(libc::PR_GET_DUMPABLE, 0, 0, 0, 0) as i64 } + } + + #[test] + #[cfg(target_os = "linux")] + fn harden_child_process_marks_process_nondumpable() { + assert_eq!(probe_hardened_child(dumpable_flag_probe), 0); + } + #[tokio::test] async fn scrub_sensitive_env_removes_ssh_handshake_secret() { let mut cmd = Command::new("/usr/bin/env"); diff --git a/crates/openshell-sandbox/src/ssh.rs b/crates/openshell-sandbox/src/ssh.rs index 0bee2e2f2..3c26f8061 100644 --- a/crates/openshell-sandbox/src/ssh.rs +++ b/crates/openshell-sandbox/src/ssh.rs @@ -1259,6 +1259,8 @@ mod unsafe_pty { // Drop privileges. initgroups/setgid/setuid need /etc/group and // /etc/passwd which would be blocked if Landlock were already enforced. drop_privileges(policy).map_err(|err| std::io::Error::other(err.to_string()))?; + crate::process::harden_child_process() + .map_err(|err| std::io::Error::other(err.to_string()))?; // Phase 2: Enforce the prepared Landlock ruleset + seccomp. // restrict_self() does not require root. diff --git a/docs/security/best-practices.mdx b/docs/security/best-practices.mdx index 80dd72059..a84800e4c 100644 --- a/docs/security/best-practices.mdx +++ b/docs/security/best-practices.mdx @@ -185,7 +185,7 @@ The sandbox process runs as a non-root user after explicit privilege dropping. | Aspect | Detail | |---|---| -| Default | `run_as_user: sandbox`, `run_as_group: sandbox`. The supervisor calls `setuid()`/`setgid()` with post-condition verification: confirms the effective UID/GID match the target and that `setuid(0)` fails (root cannot be re-acquired). | +| Default | `run_as_user: sandbox`, `run_as_group: sandbox`. The supervisor calls `setuid()`/`setgid()` with post-condition verification, disables core dumps with `RLIMIT_CORE=0`, and on Linux sets `PR_SET_DUMPABLE=0`. | | What you can change | Set `run_as_user` and `run_as_group` in the `process` section. Validation rejects root (`root` or `0`). | | Risk if relaxed | Running as a higher-privilege user increases the impact of container escape vulnerabilities. | | Recommendation | Keep the `sandbox` user. Do not attempt to set root. | @@ -208,8 +208,9 @@ This ordering is intentional: privilege dropping needs `/etc/group` and `/etc/pa 1. Network namespace entry (`setns`). 2. Privilege drop (`initgroups` + `setgid` + `setuid`). -3. Landlock filesystem restrictions. -4. Seccomp socket domain filters. +3. Core-dump hardening (`RLIMIT_CORE=0`, plus `PR_SET_DUMPABLE=0` on Linux). +4. Landlock filesystem restrictions. +5. Seccomp socket domain filters. ## Inference Controls diff --git a/e2e/rust/tests/core_dump_hardening.rs b/e2e/rust/tests/core_dump_hardening.rs new file mode 100644 index 000000000..a400b6c21 --- /dev/null +++ b/e2e/rust/tests/core_dump_hardening.rs @@ -0,0 +1,26 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +#![cfg(feature = "e2e")] + +use openshell_e2e::harness::sandbox::SandboxGuard; + +#[tokio::test] +async fn sandbox_processes_disable_core_dumps() { + let mut sb = SandboxGuard::create(&[ + "--", + "sh", + "-lc", + "test \"$(ulimit -c)\" = 0 && echo core-limit-ok", + ]) + .await + .expect("sandbox create should succeed"); + + assert!( + sb.create_output.contains("core-limit-ok"), + "expected sandbox output to confirm core dumps are disabled:\n{}", + sb.create_output, + ); + + sb.cleanup().await; +}