From 6719c41c61d80c331bbba0e7f469897dfa2fb770 Mon Sep 17 00:00:00 2001 From: Mykhailo Chalyi Date: Fri, 12 Jun 2026 09:57:23 +0000 Subject: [PATCH 1/2] fix(interpreter): add IFS word-split field and byte resource caps --- crates/bashkit/src/interpreter/mod.rs | 128 +++++++++++++----- crates/bashkit/src/limits.rs | 41 +++++- .../tests/integration/threat_model_tests.rs | 14 +- 3 files changed, 138 insertions(+), 45 deletions(-) diff --git a/crates/bashkit/src/interpreter/mod.rs b/crates/bashkit/src/interpreter/mod.rs index 53203dc9..20e332f1 100644 --- a/crates/bashkit/src/interpreter/mod.rs +++ b/crates/bashkit/src/interpreter/mod.rs @@ -4946,7 +4946,7 @@ impl Interpreter { break; } let expanded = self.expand_word(word).await?; - for field in self.ifs_split_limited(&expanded, remaining) { + for field in self.ifs_split_limited(&expanded, remaining)? { next_arr.insert(idx, field); idx += 1; } @@ -9104,7 +9104,7 @@ impl Interpreter { // $@ unquoted: each param is subject to further IFS splitting let mut fields = Vec::new(); for p in &positional { - fields.extend(self.ifs_split(p)); + fields.extend(self.ifs_split(p)?); } return Ok(fields); } @@ -9130,7 +9130,7 @@ impl Interpreter { // $* unquoted: each param is subject to IFS splitting let mut fields = Vec::new(); for p in &positional { - fields.extend(self.ifs_split(p)); + fields.extend(self.ifs_split(p)?); } return Ok(fields); } @@ -9210,7 +9210,7 @@ impl Interpreter { }; if part_has_expansion && !part_is_quoted { - let split = self.ifs_split(&value); + let split = self.ifs_split(&value)?; if let Some((first, rest)) = split.split_first() { if let Some(current) = fields.last_mut() { current.push_str(first); @@ -9259,7 +9259,7 @@ impl Interpreter { }); if has_expansion { - Ok(self.ifs_split(&expanded)) + self.ifs_split(&expanded) } else { Ok(vec![Self::strip_quote_markers(&expanded)]) } @@ -9403,14 +9403,14 @@ impl Interpreter { /// an empty field between them. /// - `` = single delimiter (ws absorbed into the nws delimiter). /// - Empty IFS → no splitting. Unset IFS → default " \t\n". - fn ifs_split(&self, s: &str) -> Vec { - self.ifs_split_limited(s, usize::MAX) + fn ifs_split(&self, s: &str) -> Result> { + self.ifs_split_limited(s, self.limits.max_word_split_fields) } - /// Split a string on IFS characters, returning at most `limit` fields. - fn ifs_split_limited(&self, s: &str, limit: usize) -> Vec { + /// Split a string on IFS characters, returning an error if resource caps are exceeded. + fn ifs_split_limited(&self, s: &str, limit: usize) -> Result> { if limit == 0 { - return Vec::new(); + return Ok(Vec::new()); } let ifs = self @@ -9420,7 +9420,8 @@ impl Interpreter { .unwrap_or_else(|| " \t\n".to_string()); if ifs.is_empty() { - return vec![Self::strip_quote_markers(s)]; + let field = Self::strip_quote_markers(s); + return self.push_ifs_field(Vec::new(), field, limit, s.len()); } let is_ifs = |c: char, quoted: bool| !quoted && ifs.contains(c); @@ -9433,27 +9434,33 @@ impl Interpreter { // IFS is only whitespace: split on unquoted runs, elide empties. let mut fields = Vec::new(); let mut current = String::new(); + let mut bytes = 0usize; for &(c, quoted) in &chars { if is_ifs(c, quoted) { if !current.is_empty() { - fields.push(std::mem::take(&mut current)); - if fields.len() >= limit { - return fields; - } + bytes = bytes.saturating_add(current.len()); + fields = self.push_ifs_field( + fields, + std::mem::take(&mut current), + limit, + bytes, + )?; } } else { current.push(c); } } - if !current.is_empty() && fields.len() < limit { - fields.push(current); + if !current.is_empty() { + bytes = bytes.saturating_add(current.len()); + fields = self.push_ifs_field(fields, current, limit, bytes)?; } - return fields; + return Ok(fields); } // Mixed or pure non-whitespace IFS. let mut fields: Vec = Vec::new(); let mut current = String::new(); + let mut bytes = 0usize; let mut i = 0; // Skip leading IFS whitespace @@ -9462,10 +9469,7 @@ impl Interpreter { } // Leading non-whitespace IFS produces an empty first field if i < chars.len() && is_ifs_nws(chars[i].0, chars[i].1) { - fields.push(String::new()); - if fields.len() >= limit { - return fields; - } + fields = self.push_ifs_field(fields, String::new(), limit, bytes)?; i += 1; while i < chars.len() && is_ifs_ws(chars[i].0, chars[i].1) { i += 1; @@ -9476,10 +9480,9 @@ impl Interpreter { let (c, quoted) = chars[i]; if is_ifs_nws(c, quoted) { // Non-whitespace IFS delimiter: finalize current field - fields.push(std::mem::take(&mut current)); - if fields.len() >= limit { - return fields; - } + let field = std::mem::take(&mut current); + bytes = bytes.saturating_add(field.len()); + fields = self.push_ifs_field(fields, field, limit, bytes)?; i += 1; // Consume trailing IFS whitespace while i < chars.len() && is_ifs_ws(chars[i].0, chars[i].1) { @@ -9492,20 +9495,18 @@ impl Interpreter { } if i < chars.len() && is_ifs_nws(chars[i].0, chars[i].1) { // = single delimiter. Push current field. - fields.push(std::mem::take(&mut current)); - if fields.len() >= limit { - return fields; - } + let field = std::mem::take(&mut current); + bytes = bytes.saturating_add(field.len()); + fields = self.push_ifs_field(fields, field, limit, bytes)?; i += 1; // consume the nws char while i < chars.len() && is_ifs_ws(chars[i].0, chars[i].1) { i += 1; } } else if i < chars.len() { // ws alone as delimiter (no nws follows) - fields.push(std::mem::take(&mut current)); - if fields.len() >= limit { - return fields; - } + let field = std::mem::take(&mut current); + bytes = bytes.saturating_add(field.len()); + fields = self.push_ifs_field(fields, field, limit, bytes)?; } // trailing ws at end → ignore (don't push empty field) } else { @@ -9514,11 +9515,36 @@ impl Interpreter { } } - if !current.is_empty() && fields.len() < limit { - fields.push(current); + if !current.is_empty() { + bytes = bytes.saturating_add(current.len()); + fields = self.push_ifs_field(fields, current, limit, bytes)?; } - fields + Ok(fields) + } + + fn push_ifs_field( + &self, + mut fields: Vec, + field: String, + limit: usize, + bytes: usize, + ) -> Result> { + if fields.len() >= limit { + return Err(crate::limits::LimitExceeded::Memory(format!( + "word split field limit ({limit}) exceeded" + )) + .into()); + } + if bytes > self.limits.max_word_split_bytes { + return Err(crate::limits::LimitExceeded::Memory(format!( + "word split byte limit ({}) exceeded", + self.limits.max_word_split_bytes + )) + .into()); + } + fields.push(field); + Ok(fields) } /// Expand an operand string from a parameter expansion (sync, lazy). @@ -12560,6 +12586,34 @@ mod tests { interp.execute(&ast).await.unwrap() } + #[tokio::test] + async fn test_ifs_split_field_limit_rejects_exploding_command_substitution() { + let fs: Arc = Arc::new(InMemoryFs::new()); + let mut interp = Interpreter::new(Arc::clone(&fs)); + interp.set_limits(ExecutionLimits::default().max_word_split_fields(3)); + let parser = Parser::new("IFS=,; for x in $(echo a,b,c,d); do :; done"); + let ast = parser.parse().unwrap(); + let err = interp.execute(&ast).await.unwrap_err(); + assert!( + err.to_string() + .contains("word split field limit (3) exceeded") + ); + } + + #[tokio::test] + async fn test_ifs_split_byte_limit_rejects_large_materialized_field() { + let fs: Arc = Arc::new(InMemoryFs::new()); + let mut interp = Interpreter::new(Arc::clone(&fs)); + interp.set_limits(ExecutionLimits::default().max_word_split_bytes(5)); + let parser = Parser::new("v=abcdef; echo $v"); + let ast = parser.parse().unwrap(); + let err = interp.execute(&ast).await.unwrap_err(); + assert!( + err.to_string() + .contains("word split byte limit (5) exceeded") + ); + } + #[tokio::test] async fn test_colon_null_utility() { // POSIX : (colon) - null utility, should return success diff --git a/crates/bashkit/src/limits.rs b/crates/bashkit/src/limits.rs index ad07264b..7659623b 100644 --- a/crates/bashkit/src/limits.rs +++ b/crates/bashkit/src/limits.rs @@ -118,6 +118,14 @@ pub struct ExecutionLimits { /// Default: 1MB (1,048,576 bytes) pub max_history_output_bytes: usize, + /// Maximum fields produced by one IFS word-splitting operation. + /// Default: 100,000 + pub max_word_split_fields: usize, + + /// Maximum total bytes copied into fields by one IFS word-splitting operation. + /// Default: 10MB (10,000,000 bytes) + pub max_word_split_bytes: usize, + /// Whether to capture the final environment state in ExecResult. /// Default: false (opt-in to avoid cloning cost when not needed) pub capture_final_env: bool, @@ -143,6 +151,8 @@ impl Default for ExecutionLimits { max_history_entries: 1_000, max_history_bytes: 1_048_576, // 1MB max_history_output_bytes: 1_048_576, // 1MB + max_word_split_fields: 100_000, + max_word_split_bytes: 10_000_000, capture_final_env: false, } } @@ -293,6 +303,24 @@ impl ExecutionLimits { self } + /// Set maximum fields produced by one IFS word-splitting operation. + /// Passing 0 is treated as "use default" (no-op) to prevent misconfiguration. + pub fn max_word_split_fields(mut self, count: usize) -> Self { + if count > 0 { + self.max_word_split_fields = count; + } + self + } + + /// Set maximum total bytes copied by one IFS word-splitting operation. + /// Passing 0 is treated as "use default" (no-op) to prevent misconfiguration. + pub fn max_word_split_bytes(mut self, bytes: usize) -> Self { + if bytes > 0 { + self.max_word_split_bytes = bytes; + } + self + } + /// Enable capturing final environment state in ExecResult pub fn capture_final_env(mut self, capture: bool) -> Self { self.capture_final_env = capture; @@ -1153,8 +1181,11 @@ mod tests { .max_stderr_bytes(0) .max_subst_depth(0) .max_subshell_depth(0) - .max_file_descriptors(0); + .max_file_descriptors(0) + .max_word_split_fields(0) + .max_word_split_bytes(0); + let defaults = ExecutionLimits::default(); assert_eq!(limits.max_commands, 0); assert_eq!(limits.max_loop_iterations, 0); assert_eq!(limits.max_total_loop_iterations, 0); @@ -1167,6 +1198,8 @@ mod tests { assert_eq!(limits.max_subst_depth, 0); assert_eq!(limits.max_subshell_depth, 0); assert_eq!(limits.max_file_descriptors, 0); + assert_eq!(limits.max_word_split_fields, defaults.max_word_split_fields); + assert_eq!(limits.max_word_split_bytes, defaults.max_word_split_bytes); } #[test] @@ -1183,7 +1216,9 @@ mod tests { .max_stderr_bytes(4096) .max_subst_depth(8) .max_subshell_depth(6) - .max_file_descriptors(16); + .max_file_descriptors(16) + .max_word_split_fields(17) + .max_word_split_bytes(18); assert_eq!(limits.max_commands, 5); assert_eq!(limits.max_loop_iterations, 7); @@ -1197,6 +1232,8 @@ mod tests { assert_eq!(limits.max_subst_depth, 8); assert_eq!(limits.max_subshell_depth, 6); assert_eq!(limits.max_file_descriptors, 16); + assert_eq!(limits.max_word_split_fields, 17); + assert_eq!(limits.max_word_split_bytes, 18); } #[test] diff --git a/crates/bashkit/tests/integration/threat_model_tests.rs b/crates/bashkit/tests/integration/threat_model_tests.rs index 74603744..b3155775 100644 --- a/crates/bashkit/tests/integration/threat_model_tests.rs +++ b/crates/bashkit/tests/integration/threat_model_tests.rs @@ -4772,6 +4772,8 @@ echo ${#arr[@]} /// TM-DOS-060: Unquoted expansion in array assignment must still respect /// max_array_entries after IFS word splitting (arr=($x)). + /// Word-split array assignment that exceeds max_array_entries now errors + /// instead of silently truncating (ifs_split_limited returns Err). #[tokio::test] async fn array_assignment_word_split_respects_array_entry_limit() { let mem = MemoryLimits::new().max_array_entries(100); @@ -4792,12 +4794,12 @@ done arr=($parts) echo ${#arr[@]} "#; - let result = bash.exec(script).await.unwrap(); - assert_eq!(result.exit_code, 0); - let count: usize = result.stdout.trim().parse().unwrap_or(0); - assert_eq!( - count, 100, - "Word-split array assignment should stop at max_array_entries" + // Word-splitting that exceeds max_array_entries now propagates as an error + // rather than silently truncating, so exec returns Err. + let result = bash.exec(script).await; + assert!( + result.is_err() || result.as_ref().map(|r| r.exit_code).unwrap_or(0) != 0, + "Word-split array assignment exceeding max_array_entries should fail" ); } From ebc7c4d4a441dfae00dab9bc7bba138c8ccbf02e Mon Sep 17 00:00:00 2001 From: Mykhailo Chalyi Date: Fri, 12 Jun 2026 10:12:19 +0000 Subject: [PATCH 2/2] fix(interpreter): address Copilot review on IFS word-split caps - ifs_split_limited: clamp effective limit to max_word_split_fields so callers passing larger values (e.g. remaining array capacity) cannot bypass the configured cap - empty-IFS branch: use stripped field length for byte accounting instead of raw s.len() which includes quote markers and overstates size - threat-model test: assert result.is_err() with limit-message check instead of loose is_err()||exit_code!=0 that masked unrelated failures --- crates/bashkit/src/interpreter/mod.rs | 6 +++++- .../tests/integration/threat_model_tests.rs | 15 +++++++++++---- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/crates/bashkit/src/interpreter/mod.rs b/crates/bashkit/src/interpreter/mod.rs index 20e332f1..672333fd 100644 --- a/crates/bashkit/src/interpreter/mod.rs +++ b/crates/bashkit/src/interpreter/mod.rs @@ -9409,6 +9409,9 @@ impl Interpreter { /// Split a string on IFS characters, returning an error if resource caps are exceeded. fn ifs_split_limited(&self, s: &str, limit: usize) -> Result> { + // Clamp so callers passing a larger value (e.g. remaining array capacity) + // cannot bypass the configured max_word_split_fields cap. + let limit = limit.min(self.limits.max_word_split_fields); if limit == 0 { return Ok(Vec::new()); } @@ -9421,7 +9424,8 @@ impl Interpreter { if ifs.is_empty() { let field = Self::strip_quote_markers(s); - return self.push_ifs_field(Vec::new(), field, limit, s.len()); + let bytes = field.len(); + return self.push_ifs_field(Vec::new(), field, limit, bytes); } let is_ifs = |c: char, quoted: bool| !quoted && ifs.contains(c); diff --git a/crates/bashkit/tests/integration/threat_model_tests.rs b/crates/bashkit/tests/integration/threat_model_tests.rs index b3155775..9b4d622a 100644 --- a/crates/bashkit/tests/integration/threat_model_tests.rs +++ b/crates/bashkit/tests/integration/threat_model_tests.rs @@ -4794,12 +4794,19 @@ done arr=($parts) echo ${#arr[@]} "#; - // Word-splitting that exceeds max_array_entries now propagates as an error - // rather than silently truncating, so exec returns Err. + // Word-splitting that exceeds max_word_split_fields propagates as Err. let result = bash.exec(script).await; + let err_msg = match &result { + Err(e) => e.to_string(), + Ok(r) => r.stderr.clone(), + }; assert!( - result.is_err() || result.as_ref().map(|r| r.exit_code).unwrap_or(0) != 0, - "Word-split array assignment exceeding max_array_entries should fail" + result.is_err(), + "should error on word-split limit, got: {err_msg}" + ); + assert!( + err_msg.contains("word split") || err_msg.contains("limit"), + "error should mention word-split limit, got: {err_msg}" ); }