Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion architecture/sandbox.md
Original file line number Diff line number Diff line change
Expand Up @@ -1281,7 +1281,9 @@ Wraps `tokio::process::Child` + PID. Platform-specific `spawn()` methods delegat
1. `setpgid(0, 0)` if non-interactive (create new process group)
2. `setns(fd, CLONE_NEWNET)` to enter network namespace (Linux only)
3. `drop_privileges(policy)`: `initgroups()` -> `setgid()` -> `setuid()`
4. `sandbox::apply(policy, workdir)`: Landlock then seccomp
4. Disable core dumps with `setrlimit(RLIMIT_CORE, 0)` on Unix
5. Set `prctl(PR_SET_DUMPABLE, 0)` on Linux
6. `sandbox::apply(policy, workdir)`: Landlock then seccomp

### `drop_privileges()`

Expand All @@ -1297,6 +1299,8 @@ The ordering is significant: `initgroups`/`setgid` must happen before `setuid` b

Steps 3, 5, and 6 are defense-in-depth post-condition checks (CWE-250 / CERT POS37-C). All three syscalls (`geteuid`, `getegid`, `setuid`) are async-signal-safe, so they are safe to call in the `pre_exec` context. The checks add negligible overhead while guarding against hypothetical kernel-level defects that could cause `setuid`/`setgid` to return success without actually changing the effective IDs.

After the privilege drop, the child process also disables core dumps before Landlock and seccomp are applied. On all Unix targets it sets `RLIMIT_CORE=0`; on Linux it additionally sets `PR_SET_DUMPABLE=0`. This prevents crash artifacts from containing provider credentials, request payloads, or other sensitive in-memory data.

### `ProcessStatus`

Exit code is `code` if the process exited normally, or `128 + signal` if killed by a signal (standard Unix convention). Returns `-1` if neither is available.
Expand Down
124 changes: 124 additions & 0 deletions crates/openshell-sandbox/src/process.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,35 @@ fn scrub_sensitive_env(cmd: &mut Command) {
cmd.env_remove(SSH_HANDSHAKE_SECRET_ENV);
}

#[cfg(unix)]
#[allow(unsafe_code)]
pub(crate) fn harden_child_process() -> Result<()> {
let core_limit = libc::rlimit {
rlim_cur: 0,
rlim_max: 0,
};
let rc = unsafe { libc::setrlimit(libc::RLIMIT_CORE, &core_limit) };
if rc != 0 {
return Err(miette::miette!(
"Failed to disable core dumps: {}",
std::io::Error::last_os_error()
));
}

#[cfg(target_os = "linux")]
{
let rc = unsafe { libc::prctl(libc::PR_SET_DUMPABLE, 0, 0, 0, 0) };
if rc != 0 {
return Err(miette::miette!(
"Failed to set PR_SET_DUMPABLE=0: {}",
std::io::Error::last_os_error()
));
}
}

Ok(())
}

/// Handle to a running process.
pub struct ProcessHandle {
child: Child,
Expand Down Expand Up @@ -200,6 +229,8 @@ impl ProcessHandle {
drop_privileges(&policy)
.map_err(|err| std::io::Error::other(err.to_string()))?;

harden_child_process().map_err(|err| std::io::Error::other(err.to_string()))?;

// Phase 2 (as unprivileged user): Enforce the prepared
// Landlock ruleset via restrict_self() + apply seccomp.
// restrict_self() does not require root.
Expand Down Expand Up @@ -292,6 +323,8 @@ impl ProcessHandle {
drop_privileges(&policy)
.map_err(|err| std::io::Error::other(err.to_string()))?;

harden_child_process().map_err(|err| std::io::Error::other(err.to_string()))?;

sandbox::apply(&policy, workdir.as_deref())
.map_err(|err| std::io::Error::other(err.to_string()))?;

Expand Down Expand Up @@ -543,6 +576,12 @@ mod tests {
use crate::policy::{
FilesystemPolicy, LandlockPolicy, NetworkPolicy, ProcessPolicy, SandboxPolicy,
};
#[cfg(unix)]
use nix::sys::wait::{WaitStatus, waitpid};
#[cfg(unix)]
use nix::unistd::{ForkResult, fork};
#[cfg(unix)]
use std::mem::size_of;
use std::process::Stdio as StdStdio;

/// Helper to create a minimal `SandboxPolicy` with the given process policy.
Expand Down Expand Up @@ -660,6 +699,91 @@ mod tests {
);
}

#[cfg(unix)]
#[allow(unsafe_code)]
fn probe_hardened_child(probe: unsafe fn() -> i64) -> i64 {
const HARDEN_FAILED: i64 = -2;

let mut fds = [0; 2];
let pipe_rc = unsafe { libc::pipe(fds.as_mut_ptr()) };
assert_eq!(
pipe_rc,
0,
"pipe failed: {}",
std::io::Error::last_os_error()
);

match unsafe { fork() }.expect("fork should succeed") {
ForkResult::Child => {
unsafe { libc::close(fds[0]) };
let value = match harden_child_process() {
Ok(()) => unsafe { probe() },
Err(_) => HARDEN_FAILED,
};
let bytes = value.to_ne_bytes();
let written = unsafe { libc::write(fds[1], bytes.as_ptr().cast(), bytes.len()) };
unsafe {
libc::close(fds[1]);
libc::_exit(if written == bytes.len() as isize {
0
} else {
1
});
}
}
ForkResult::Parent { child } => {
unsafe { libc::close(fds[1]) };
let mut bytes = [0u8; size_of::<i64>()];
let read = unsafe { libc::read(fds[0], bytes.as_mut_ptr().cast(), bytes.len()) };
unsafe { libc::close(fds[0]) };
assert_eq!(
read as usize,
bytes.len(),
"expected {} probe bytes, got {}",
bytes.len(),
read
);

match waitpid(child, None).expect("waitpid should succeed") {
WaitStatus::Exited(_, 0) => {}
status => panic!("probe child exited unexpectedly: {status:?}"),
}

i64::from_ne_bytes(bytes)
}
}
}

#[cfg(unix)]
#[allow(unsafe_code)]
unsafe fn core_dump_limit_is_zero_probe() -> i64 {
let mut limit = std::mem::MaybeUninit::<libc::rlimit>::uninit();
let rc = unsafe { libc::getrlimit(libc::RLIMIT_CORE, limit.as_mut_ptr()) };
if rc != 0 {
return -1;
}
let limit = unsafe { limit.assume_init() };
i64::from(limit.rlim_cur == 0 && limit.rlim_max == 0)
}

#[test]
#[cfg(unix)]
fn harden_child_process_disables_core_dumps() {
assert_eq!(probe_hardened_child(core_dump_limit_is_zero_probe), 1);
}

#[cfg(target_os = "linux")]
#[allow(unsafe_code)]
unsafe fn dumpable_flag_probe() -> i64 {
unsafe { libc::prctl(libc::PR_GET_DUMPABLE, 0, 0, 0, 0) as i64 }
}

#[test]
#[cfg(target_os = "linux")]
fn harden_child_process_marks_process_nondumpable() {
assert_eq!(probe_hardened_child(dumpable_flag_probe), 0);
}

#[tokio::test]
async fn scrub_sensitive_env_removes_ssh_handshake_secret() {
let mut cmd = Command::new("/usr/bin/env");
Expand Down
2 changes: 2 additions & 0 deletions crates/openshell-sandbox/src/ssh.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1259,6 +1259,8 @@ mod unsafe_pty {
// Drop privileges. initgroups/setgid/setuid need /etc/group and
// /etc/passwd which would be blocked if Landlock were already enforced.
drop_privileges(policy).map_err(|err| std::io::Error::other(err.to_string()))?;
crate::process::harden_child_process()
.map_err(|err| std::io::Error::other(err.to_string()))?;

// Phase 2: Enforce the prepared Landlock ruleset + seccomp.
// restrict_self() does not require root.
Expand Down
7 changes: 4 additions & 3 deletions docs/security/best-practices.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ The sandbox process runs as a non-root user after explicit privilege dropping.

| Aspect | Detail |
|---|---|
| Default | `run_as_user: sandbox`, `run_as_group: sandbox`. The supervisor calls `setuid()`/`setgid()` with post-condition verification: confirms the effective UID/GID match the target and that `setuid(0)` fails (root cannot be re-acquired). |
| Default | `run_as_user: sandbox`, `run_as_group: sandbox`. The supervisor calls `setuid()`/`setgid()` with post-condition verification, disables core dumps with `RLIMIT_CORE=0`, and on Linux sets `PR_SET_DUMPABLE=0`. |
| What you can change | Set `run_as_user` and `run_as_group` in the `process` section. Validation rejects root (`root` or `0`). |
| Risk if relaxed | Running as a higher-privilege user increases the impact of container escape vulnerabilities. |
| Recommendation | Keep the `sandbox` user. Do not attempt to set root. |
Expand All @@ -208,8 +208,9 @@ This ordering is intentional: privilege dropping needs `/etc/group` and `/etc/pa

1. Network namespace entry (`setns`).
2. Privilege drop (`initgroups` + `setgid` + `setuid`).
3. Landlock filesystem restrictions.
4. Seccomp socket domain filters.
3. Core-dump hardening (`RLIMIT_CORE=0`, plus `PR_SET_DUMPABLE=0` on Linux).
4. Landlock filesystem restrictions.
5. Seccomp socket domain filters.

## Inference Controls

Expand Down
26 changes: 26 additions & 0 deletions e2e/rust/tests/core_dump_hardening.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

#![cfg(feature = "e2e")]

use openshell_e2e::harness::sandbox::SandboxGuard;

#[tokio::test]
async fn sandbox_processes_disable_core_dumps() {
let mut sb = SandboxGuard::create(&[
"--",
"sh",
"-lc",
"test \"$(ulimit -c)\" = 0 && echo core-limit-ok",
])
.await
.expect("sandbox create should succeed");

assert!(
sb.create_output.contains("core-limit-ok"),
"expected sandbox output to confirm core dumps are disabled:\n{}",
sb.create_output,
);

sb.cleanup().await;
}
Loading