Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
132 changes: 95 additions & 37 deletions crates/bashkit/src/interpreter/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4946,7 +4946,7 @@ impl Interpreter {
break;
}
let expanded = self.expand_word(word).await?;
for field in self.ifs_split_limited(&expanded, remaining) {
for field in self.ifs_split_limited(&expanded, remaining)? {
next_arr.insert(idx, field);
idx += 1;
}
Expand Down Expand Up @@ -9104,7 +9104,7 @@ impl Interpreter {
// $@ unquoted: each param is subject to further IFS splitting
let mut fields = Vec::new();
for p in &positional {
fields.extend(self.ifs_split(p));
fields.extend(self.ifs_split(p)?);
}
return Ok(fields);
}
Expand All @@ -9130,7 +9130,7 @@ impl Interpreter {
// $* unquoted: each param is subject to IFS splitting
let mut fields = Vec::new();
for p in &positional {
fields.extend(self.ifs_split(p));
fields.extend(self.ifs_split(p)?);
}
return Ok(fields);
}
Expand Down Expand Up @@ -9210,7 +9210,7 @@ impl Interpreter {
};

if part_has_expansion && !part_is_quoted {
let split = self.ifs_split(&value);
let split = self.ifs_split(&value)?;
if let Some((first, rest)) = split.split_first() {
if let Some(current) = fields.last_mut() {
current.push_str(first);
Expand Down Expand Up @@ -9259,7 +9259,7 @@ impl Interpreter {
});

if has_expansion {
Ok(self.ifs_split(&expanded))
self.ifs_split(&expanded)
} else {
Ok(vec![Self::strip_quote_markers(&expanded)])
}
Expand Down Expand Up @@ -9403,14 +9403,17 @@ impl Interpreter {
/// an empty field between them.
/// - `<ws><nws><ws>` = single delimiter (ws absorbed into the nws delimiter).
/// - Empty IFS → no splitting. Unset IFS → default " \t\n".
fn ifs_split(&self, s: &str) -> Vec<String> {
self.ifs_split_limited(s, usize::MAX)
fn ifs_split(&self, s: &str) -> Result<Vec<String>> {
self.ifs_split_limited(s, self.limits.max_word_split_fields)
}

/// Split a string on IFS characters, returning at most `limit` fields.
fn ifs_split_limited(&self, s: &str, limit: usize) -> Vec<String> {
/// Split a string on IFS characters, returning an error if resource caps are exceeded.
fn ifs_split_limited(&self, s: &str, limit: usize) -> Result<Vec<String>> {
// Clamp so callers passing a larger value (e.g. remaining array capacity)
// cannot bypass the configured max_word_split_fields cap.
let limit = limit.min(self.limits.max_word_split_fields);
if limit == 0 {
return Vec::new();
return Ok(Vec::new());
}
Comment thread
chaliy marked this conversation as resolved.

let ifs = self
Expand All @@ -9420,7 +9423,9 @@ impl Interpreter {
.unwrap_or_else(|| " \t\n".to_string());

if ifs.is_empty() {
return vec![Self::strip_quote_markers(s)];
let field = Self::strip_quote_markers(s);
let bytes = field.len();
return self.push_ifs_field(Vec::new(), field, limit, bytes);
}
Comment thread
chaliy marked this conversation as resolved.

let is_ifs = |c: char, quoted: bool| !quoted && ifs.contains(c);
Expand All @@ -9433,27 +9438,33 @@ impl Interpreter {
// IFS is only whitespace: split on unquoted runs, elide empties.
let mut fields = Vec::new();
let mut current = String::new();
let mut bytes = 0usize;
for &(c, quoted) in &chars {
if is_ifs(c, quoted) {
if !current.is_empty() {
fields.push(std::mem::take(&mut current));
if fields.len() >= limit {
return fields;
}
bytes = bytes.saturating_add(current.len());
fields = self.push_ifs_field(
fields,
std::mem::take(&mut current),
limit,
bytes,
)?;
}
} else {
current.push(c);
}
}
if !current.is_empty() && fields.len() < limit {
fields.push(current);
if !current.is_empty() {
bytes = bytes.saturating_add(current.len());
fields = self.push_ifs_field(fields, current, limit, bytes)?;
}
return fields;
return Ok(fields);
}

// Mixed or pure non-whitespace IFS.
let mut fields: Vec<String> = Vec::new();
let mut current = String::new();
let mut bytes = 0usize;
let mut i = 0;

// Skip leading IFS whitespace
Expand All @@ -9462,10 +9473,7 @@ impl Interpreter {
}
// Leading non-whitespace IFS produces an empty first field
if i < chars.len() && is_ifs_nws(chars[i].0, chars[i].1) {
fields.push(String::new());
if fields.len() >= limit {
return fields;
}
fields = self.push_ifs_field(fields, String::new(), limit, bytes)?;
i += 1;
while i < chars.len() && is_ifs_ws(chars[i].0, chars[i].1) {
i += 1;
Expand All @@ -9476,10 +9484,9 @@ impl Interpreter {
let (c, quoted) = chars[i];
if is_ifs_nws(c, quoted) {
// Non-whitespace IFS delimiter: finalize current field
fields.push(std::mem::take(&mut current));
if fields.len() >= limit {
return fields;
}
let field = std::mem::take(&mut current);
bytes = bytes.saturating_add(field.len());
fields = self.push_ifs_field(fields, field, limit, bytes)?;
i += 1;
// Consume trailing IFS whitespace
while i < chars.len() && is_ifs_ws(chars[i].0, chars[i].1) {
Expand All @@ -9492,20 +9499,18 @@ impl Interpreter {
}
if i < chars.len() && is_ifs_nws(chars[i].0, chars[i].1) {
// <ws><nws> = single delimiter. Push current field.
fields.push(std::mem::take(&mut current));
if fields.len() >= limit {
return fields;
}
let field = std::mem::take(&mut current);
bytes = bytes.saturating_add(field.len());
fields = self.push_ifs_field(fields, field, limit, bytes)?;
i += 1; // consume the nws char
while i < chars.len() && is_ifs_ws(chars[i].0, chars[i].1) {
i += 1;
}
} else if i < chars.len() {
// ws alone as delimiter (no nws follows)
fields.push(std::mem::take(&mut current));
if fields.len() >= limit {
return fields;
}
let field = std::mem::take(&mut current);
bytes = bytes.saturating_add(field.len());
fields = self.push_ifs_field(fields, field, limit, bytes)?;
}
// trailing ws at end → ignore (don't push empty field)
} else {
Expand All @@ -9514,11 +9519,36 @@ impl Interpreter {
}
}

if !current.is_empty() && fields.len() < limit {
fields.push(current);
if !current.is_empty() {
bytes = bytes.saturating_add(current.len());
fields = self.push_ifs_field(fields, current, limit, bytes)?;
}

fields
Ok(fields)
}

fn push_ifs_field(
&self,
mut fields: Vec<String>,
field: String,
limit: usize,
bytes: usize,
) -> Result<Vec<String>> {
if fields.len() >= limit {
return Err(crate::limits::LimitExceeded::Memory(format!(
"word split field limit ({limit}) exceeded"
))
.into());
}
if bytes > self.limits.max_word_split_bytes {
return Err(crate::limits::LimitExceeded::Memory(format!(
"word split byte limit ({}) exceeded",
self.limits.max_word_split_bytes
))
.into());
}
fields.push(field);
Ok(fields)
}

/// Expand an operand string from a parameter expansion (sync, lazy).
Expand Down Expand Up @@ -12560,6 +12590,34 @@ mod tests {
interp.execute(&ast).await.unwrap()
}

#[tokio::test]
async fn test_ifs_split_field_limit_rejects_exploding_command_substitution() {
let fs: Arc<dyn FileSystem> = Arc::new(InMemoryFs::new());
let mut interp = Interpreter::new(Arc::clone(&fs));
interp.set_limits(ExecutionLimits::default().max_word_split_fields(3));
let parser = Parser::new("IFS=,; for x in $(echo a,b,c,d); do :; done");
let ast = parser.parse().unwrap();
let err = interp.execute(&ast).await.unwrap_err();
assert!(
err.to_string()
.contains("word split field limit (3) exceeded")
);
}

#[tokio::test]
async fn test_ifs_split_byte_limit_rejects_large_materialized_field() {
let fs: Arc<dyn FileSystem> = Arc::new(InMemoryFs::new());
let mut interp = Interpreter::new(Arc::clone(&fs));
interp.set_limits(ExecutionLimits::default().max_word_split_bytes(5));
let parser = Parser::new("v=abcdef; echo $v");
let ast = parser.parse().unwrap();
let err = interp.execute(&ast).await.unwrap_err();
assert!(
err.to_string()
.contains("word split byte limit (5) exceeded")
);
}

#[tokio::test]
async fn test_colon_null_utility() {
// POSIX : (colon) - null utility, should return success
Expand Down
41 changes: 39 additions & 2 deletions crates/bashkit/src/limits.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,14 @@ pub struct ExecutionLimits {
/// Default: 1MB (1,048,576 bytes)
pub max_history_output_bytes: usize,

/// Maximum fields produced by one IFS word-splitting operation.
/// Default: 100,000
pub max_word_split_fields: usize,

/// Maximum total bytes copied into fields by one IFS word-splitting operation.
/// Default: 10MB (10,000,000 bytes)
pub max_word_split_bytes: usize,

/// Whether to capture the final environment state in ExecResult.
/// Default: false (opt-in to avoid cloning cost when not needed)
pub capture_final_env: bool,
Expand All @@ -143,6 +151,8 @@ impl Default for ExecutionLimits {
max_history_entries: 1_000,
max_history_bytes: 1_048_576, // 1MB
max_history_output_bytes: 1_048_576, // 1MB
max_word_split_fields: 100_000,
max_word_split_bytes: 10_000_000,
capture_final_env: false,
}
}
Expand Down Expand Up @@ -293,6 +303,24 @@ impl ExecutionLimits {
self
}

/// Set maximum fields produced by one IFS word-splitting operation.
/// Passing 0 is treated as "use default" (no-op) to prevent misconfiguration.
pub fn max_word_split_fields(mut self, count: usize) -> Self {
if count > 0 {
self.max_word_split_fields = count;
}
self
}

/// Set maximum total bytes copied by one IFS word-splitting operation.
/// Passing 0 is treated as "use default" (no-op) to prevent misconfiguration.
pub fn max_word_split_bytes(mut self, bytes: usize) -> Self {
if bytes > 0 {
self.max_word_split_bytes = bytes;
}
self
}

/// Enable capturing final environment state in ExecResult
pub fn capture_final_env(mut self, capture: bool) -> Self {
self.capture_final_env = capture;
Expand Down Expand Up @@ -1153,8 +1181,11 @@ mod tests {
.max_stderr_bytes(0)
.max_subst_depth(0)
.max_subshell_depth(0)
.max_file_descriptors(0);
.max_file_descriptors(0)
.max_word_split_fields(0)
.max_word_split_bytes(0);

let defaults = ExecutionLimits::default();
assert_eq!(limits.max_commands, 0);
assert_eq!(limits.max_loop_iterations, 0);
assert_eq!(limits.max_total_loop_iterations, 0);
Expand All @@ -1167,6 +1198,8 @@ mod tests {
assert_eq!(limits.max_subst_depth, 0);
assert_eq!(limits.max_subshell_depth, 0);
assert_eq!(limits.max_file_descriptors, 0);
assert_eq!(limits.max_word_split_fields, defaults.max_word_split_fields);
assert_eq!(limits.max_word_split_bytes, defaults.max_word_split_bytes);
}

#[test]
Expand All @@ -1183,7 +1216,9 @@ mod tests {
.max_stderr_bytes(4096)
.max_subst_depth(8)
.max_subshell_depth(6)
.max_file_descriptors(16);
.max_file_descriptors(16)
.max_word_split_fields(17)
.max_word_split_bytes(18);

assert_eq!(limits.max_commands, 5);
assert_eq!(limits.max_loop_iterations, 7);
Expand All @@ -1197,6 +1232,8 @@ mod tests {
assert_eq!(limits.max_subst_depth, 8);
assert_eq!(limits.max_subshell_depth, 6);
assert_eq!(limits.max_file_descriptors, 16);
assert_eq!(limits.max_word_split_fields, 17);
assert_eq!(limits.max_word_split_bytes, 18);
}

#[test]
Expand Down
21 changes: 15 additions & 6 deletions crates/bashkit/tests/integration/threat_model_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4772,6 +4772,8 @@ echo ${#arr[@]}

/// TM-DOS-060: Unquoted expansion in array assignment must still respect
/// max_array_entries after IFS word splitting (arr=($x)).
/// Word-split array assignment that exceeds max_array_entries now errors
/// instead of silently truncating (ifs_split_limited returns Err).
#[tokio::test]
async fn array_assignment_word_split_respects_array_entry_limit() {
let mem = MemoryLimits::new().max_array_entries(100);
Expand All @@ -4792,12 +4794,19 @@ done
arr=($parts)
echo ${#arr[@]}
"#;
let result = bash.exec(script).await.unwrap();
assert_eq!(result.exit_code, 0);
let count: usize = result.stdout.trim().parse().unwrap_or(0);
assert_eq!(
count, 100,
"Word-split array assignment should stop at max_array_entries"
// Word-splitting that exceeds max_word_split_fields propagates as Err.
let result = bash.exec(script).await;
let err_msg = match &result {
Err(e) => e.to_string(),
Ok(r) => r.stderr.clone(),
};
assert!(
result.is_err(),
"should error on word-split limit, got: {err_msg}"
);
assert!(
err_msg.contains("word split") || err_msg.contains("limit"),
"error should mention word-split limit, got: {err_msg}"
);
}

Expand Down
Loading