From 6719c41c61d80c331bbba0e7f469897dfa2fb770 Mon Sep 17 00:00:00 2001
From: Mykhailo Chalyi <mike@chaliy.name>
Date: Fri, 12 Jun 2026 09:57:23 +0000
Subject: [PATCH 1/2] fix(interpreter): add IFS word-split field and byte
 resource caps

---
 crates/bashkit/src/interpreter/mod.rs         | 128 +++++++++++++-----
 crates/bashkit/src/limits.rs                  |  41 +++++-
 .../tests/integration/threat_model_tests.rs   |  14 +-
 3 files changed, 138 insertions(+), 45 deletions(-)
diff --git a/crates/bashkit/src/interpreter/mod.rs b/crates/bashkit/src/interpreter/mod.rs
index 53203dc9..20e332f1 100644
--- a/crates/bashkit/src/interpreter/mod.rs
+++ b/crates/bashkit/src/interpreter/mod.rs
@@ -4946,7 +4946,7 @@ impl Interpreter {
                                 break;
                             }
                             let expanded = self.expand_word(word).await?;
-                            for field in self.ifs_split_limited(&expanded, remaining) {
+                            for field in self.ifs_split_limited(&expanded, remaining)? {
                                 next_arr.insert(idx, field);
                                 idx += 1;
                             }
@@ -9104,7 +9104,7 @@ impl Interpreter {
                         // $@ unquoted: each param is subject to further IFS splitting
                         let mut fields = Vec::new();
                         for p in &positional {
-                            fields.extend(self.ifs_split(p));
+                            fields.extend(self.ifs_split(p)?);
                         }
                         return Ok(fields);
                     }
@@ -9130,7 +9130,7 @@ impl Interpreter {
                         // $* unquoted: each param is subject to IFS splitting
                         let mut fields = Vec::new();
                         for p in &positional {
-                            fields.extend(self.ifs_split(p));
+                            fields.extend(self.ifs_split(p)?);
                         }
                         return Ok(fields);
                     }
@@ -9210,7 +9210,7 @@ impl Interpreter {
                     };
 
                     if part_has_expansion && !part_is_quoted {
-                        let split = self.ifs_split(&value);
+                        let split = self.ifs_split(&value)?;
                         if let Some((first, rest)) = split.split_first() {
                             if let Some(current) = fields.last_mut() {
                                 current.push_str(first);
@@ -9259,7 +9259,7 @@ impl Interpreter {
                 });
 
             if has_expansion {
-                Ok(self.ifs_split(&expanded))
+                self.ifs_split(&expanded)
             } else {
                 Ok(vec![Self::strip_quote_markers(&expanded)])
             }
@@ -9403,14 +9403,14 @@ impl Interpreter {
     ///   an empty field between them.
     /// - `<ws><nws><ws>` = single delimiter (ws absorbed into the nws delimiter).
     /// - Empty IFS → no splitting. Unset IFS → default " \t\n".
-    fn ifs_split(&self, s: &str) -> Vec<String> {
-        self.ifs_split_limited(s, usize::MAX)
+    fn ifs_split(&self, s: &str) -> Result<Vec<String>> {
+        self.ifs_split_limited(s, self.limits.max_word_split_fields)
     }
 
-    /// Split a string on IFS characters, returning at most `limit` fields.
-    fn ifs_split_limited(&self, s: &str, limit: usize) -> Vec<String> {
+    /// Split a string on IFS characters, returning an error if resource caps are exceeded.
+    fn ifs_split_limited(&self, s: &str, limit: usize) -> Result<Vec<String>> {
         if limit == 0 {
-            return Vec::new();
+            return Ok(Vec::new());
         }
 
         let ifs = self
@@ -9420,7 +9420,8 @@ impl Interpreter {
             .unwrap_or_else(|| " \t\n".to_string());
 
         if ifs.is_empty() {
-            return vec![Self::strip_quote_markers(s)];
+            let field = Self::strip_quote_markers(s);
+            return self.push_ifs_field(Vec::new(), field, limit, s.len());
         }
 
         let is_ifs = |c: char, quoted: bool| !quoted && ifs.contains(c);
@@ -9433,27 +9434,33 @@ impl Interpreter {
             // IFS is only whitespace: split on unquoted runs, elide empties.
             let mut fields = Vec::new();
             let mut current = String::new();
+            let mut bytes = 0usize;
             for &(c, quoted) in &chars {
                 if is_ifs(c, quoted) {
                     if !current.is_empty() {
-                        fields.push(std::mem::take(&mut current));
-                        if fields.len() >= limit {
-                            return fields;
-                        }
+                        bytes = bytes.saturating_add(current.len());
+                        fields = self.push_ifs_field(
+                            fields,
+                            std::mem::take(&mut current),
+                            limit,
+                            bytes,
+                        )?;
                     }
                 } else {
                     current.push(c);
                 }
             }
-            if !current.is_empty() && fields.len() < limit {
-                fields.push(current);
+            if !current.is_empty() {
+                bytes = bytes.saturating_add(current.len());
+                fields = self.push_ifs_field(fields, current, limit, bytes)?;
             }
-            return fields;
+            return Ok(fields);
         }
 
         // Mixed or pure non-whitespace IFS.
         let mut fields: Vec<String> = Vec::new();
         let mut current = String::new();
+        let mut bytes = 0usize;
         let mut i = 0;
 
         // Skip leading IFS whitespace
@@ -9462,10 +9469,7 @@ impl Interpreter {
         }
         // Leading non-whitespace IFS produces an empty first field
         if i < chars.len() && is_ifs_nws(chars[i].0, chars[i].1) {
-            fields.push(String::new());
-            if fields.len() >= limit {
-                return fields;
-            }
+            fields = self.push_ifs_field(fields, String::new(), limit, bytes)?;
             i += 1;
             while i < chars.len() && is_ifs_ws(chars[i].0, chars[i].1) {
                 i += 1;
@@ -9476,10 +9480,9 @@ impl Interpreter {
             let (c, quoted) = chars[i];
             if is_ifs_nws(c, quoted) {
                 // Non-whitespace IFS delimiter: finalize current field
-                fields.push(std::mem::take(&mut current));
-                if fields.len() >= limit {
-                    return fields;
-                }
+                let field = std::mem::take(&mut current);
+                bytes = bytes.saturating_add(field.len());
+                fields = self.push_ifs_field(fields, field, limit, bytes)?;
                 i += 1;
                 // Consume trailing IFS whitespace
                 while i < chars.len() && is_ifs_ws(chars[i].0, chars[i].1) {
@@ -9492,20 +9495,18 @@ impl Interpreter {
                 }
                 if i < chars.len() && is_ifs_nws(chars[i].0, chars[i].1) {
                     // <ws><nws> = single delimiter. Push current field.
-                    fields.push(std::mem::take(&mut current));
-                    if fields.len() >= limit {
-                        return fields;
-                    }
+                    let field = std::mem::take(&mut current);
+                    bytes = bytes.saturating_add(field.len());
+                    fields = self.push_ifs_field(fields, field, limit, bytes)?;
                     i += 1; // consume the nws char
                     while i < chars.len() && is_ifs_ws(chars[i].0, chars[i].1) {
                         i += 1;
                     }
                 } else if i < chars.len() {
                     // ws alone as delimiter (no nws follows)
-                    fields.push(std::mem::take(&mut current));
-                    if fields.len() >= limit {
-                        return fields;
-                    }
+                    let field = std::mem::take(&mut current);
+                    bytes = bytes.saturating_add(field.len());
+                    fields = self.push_ifs_field(fields, field, limit, bytes)?;
                 }
                 // trailing ws at end → ignore (don't push empty field)
             } else {
@@ -9514,11 +9515,36 @@ impl Interpreter {
             }
         }
 
-        if !current.is_empty() && fields.len() < limit {
-            fields.push(current);
+        if !current.is_empty() {
+            bytes = bytes.saturating_add(current.len());
+            fields = self.push_ifs_field(fields, current, limit, bytes)?;
         }
 
-        fields
+        Ok(fields)
+    }
+
+    fn push_ifs_field(
+        &self,
+        mut fields: Vec<String>,
+        field: String,
+        limit: usize,
+        bytes: usize,
+    ) -> Result<Vec<String>> {
+        if fields.len() >= limit {
+            return Err(crate::limits::LimitExceeded::Memory(format!(
+                "word split field limit ({limit}) exceeded"
+            ))
+            .into());
+        }
+        if bytes > self.limits.max_word_split_bytes {
+            return Err(crate::limits::LimitExceeded::Memory(format!(
+                "word split byte limit ({}) exceeded",
+                self.limits.max_word_split_bytes
+            ))
+            .into());
+        }
+        fields.push(field);
+        Ok(fields)
     }
 
     /// Expand an operand string from a parameter expansion (sync, lazy).
@@ -12560,6 +12586,34 @@ mod tests {
         interp.execute(&ast).await.unwrap()
     }
 
+    #[tokio::test]
+    async fn test_ifs_split_field_limit_rejects_exploding_command_substitution() {
+        let fs: Arc<dyn FileSystem> = Arc::new(InMemoryFs::new());
+        let mut interp = Interpreter::new(Arc::clone(&fs));
+        interp.set_limits(ExecutionLimits::default().max_word_split_fields(3));
+        let parser = Parser::new("IFS=,; for x in $(echo a,b,c,d); do :; done");
+        let ast = parser.parse().unwrap();
+        let err = interp.execute(&ast).await.unwrap_err();
+        assert!(
+            err.to_string()
+                .contains("word split field limit (3) exceeded")
+        );
+    }
+
+    #[tokio::test]
+    async fn test_ifs_split_byte_limit_rejects_large_materialized_field() {
+        let fs: Arc<dyn FileSystem> = Arc::new(InMemoryFs::new());
+        let mut interp = Interpreter::new(Arc::clone(&fs));
+        interp.set_limits(ExecutionLimits::default().max_word_split_bytes(5));
+        let parser = Parser::new("v=abcdef; echo $v");
+        let ast = parser.parse().unwrap();
+        let err = interp.execute(&ast).await.unwrap_err();
+        assert!(
+            err.to_string()
+                .contains("word split byte limit (5) exceeded")
+        );
+    }
+
     #[tokio::test]
     async fn test_colon_null_utility() {
         // POSIX : (colon) - null utility, should return success
diff --git a/crates/bashkit/src/limits.rs b/crates/bashkit/src/limits.rs
index ad07264b..7659623b 100644
--- a/crates/bashkit/src/limits.rs
+++ b/crates/bashkit/src/limits.rs
@@ -118,6 +118,14 @@ pub struct ExecutionLimits {
     /// Default: 1MB (1,048,576 bytes)
     pub max_history_output_bytes: usize,
 
+    /// Maximum fields produced by one IFS word-splitting operation.
+    /// Default: 100,000
+    pub max_word_split_fields: usize,
+
+    /// Maximum total bytes copied into fields by one IFS word-splitting operation.
+    /// Default: 10MB (10,000,000 bytes)
+    pub max_word_split_bytes: usize,
+
     /// Whether to capture the final environment state in ExecResult.
     /// Default: false (opt-in to avoid cloning cost when not needed)
     pub capture_final_env: bool,
@@ -143,6 +151,8 @@ impl Default for ExecutionLimits {
             max_history_entries: 1_000,
             max_history_bytes: 1_048_576,        // 1MB
             max_history_output_bytes: 1_048_576, // 1MB
+            max_word_split_fields: 100_000,
+            max_word_split_bytes: 10_000_000,
             capture_final_env: false,
         }
     }
@@ -293,6 +303,24 @@ impl ExecutionLimits {
         self
     }
 
+    /// Set maximum fields produced by one IFS word-splitting operation.
+    /// Passing 0 is treated as "use default" (no-op) to prevent misconfiguration.
+    pub fn max_word_split_fields(mut self, count: usize) -> Self {
+        if count > 0 {
+            self.max_word_split_fields = count;
+        }
+        self
+    }
+
+    /// Set maximum total bytes copied by one IFS word-splitting operation.
+    /// Passing 0 is treated as "use default" (no-op) to prevent misconfiguration.
+    pub fn max_word_split_bytes(mut self, bytes: usize) -> Self {
+        if bytes > 0 {
+            self.max_word_split_bytes = bytes;
+        }
+        self
+    }
+
     /// Enable capturing final environment state in ExecResult
     pub fn capture_final_env(mut self, capture: bool) -> Self {
         self.capture_final_env = capture;
@@ -1153,8 +1181,11 @@ mod tests {
             .max_stderr_bytes(0)
             .max_subst_depth(0)
             .max_subshell_depth(0)
-            .max_file_descriptors(0);
+            .max_file_descriptors(0)
+            .max_word_split_fields(0)
+            .max_word_split_bytes(0);
 
+        let defaults = ExecutionLimits::default();
         assert_eq!(limits.max_commands, 0);
         assert_eq!(limits.max_loop_iterations, 0);
         assert_eq!(limits.max_total_loop_iterations, 0);
@@ -1167,6 +1198,8 @@ mod tests {
         assert_eq!(limits.max_subst_depth, 0);
         assert_eq!(limits.max_subshell_depth, 0);
         assert_eq!(limits.max_file_descriptors, 0);
+        assert_eq!(limits.max_word_split_fields, defaults.max_word_split_fields);
+        assert_eq!(limits.max_word_split_bytes, defaults.max_word_split_bytes);
     }
 
     #[test]
@@ -1183,7 +1216,9 @@ mod tests {
             .max_stderr_bytes(4096)
             .max_subst_depth(8)
             .max_subshell_depth(6)
-            .max_file_descriptors(16);
+            .max_file_descriptors(16)
+            .max_word_split_fields(17)
+            .max_word_split_bytes(18);
 
         assert_eq!(limits.max_commands, 5);
         assert_eq!(limits.max_loop_iterations, 7);
@@ -1197,6 +1232,8 @@ mod tests {
         assert_eq!(limits.max_subst_depth, 8);
         assert_eq!(limits.max_subshell_depth, 6);
         assert_eq!(limits.max_file_descriptors, 16);
+        assert_eq!(limits.max_word_split_fields, 17);
+        assert_eq!(limits.max_word_split_bytes, 18);
     }
 
     #[test]
diff --git a/crates/bashkit/tests/integration/threat_model_tests.rs b/crates/bashkit/tests/integration/threat_model_tests.rs
index 74603744..b3155775 100644
--- a/crates/bashkit/tests/integration/threat_model_tests.rs
+++ b/crates/bashkit/tests/integration/threat_model_tests.rs
@@ -4772,6 +4772,8 @@ echo ${#arr[@]}
 
     /// TM-DOS-060: Unquoted expansion in array assignment must still respect
     /// max_array_entries after IFS word splitting (arr=($x)).
+    /// Word-split array assignment that exceeds max_array_entries now errors
+    /// instead of silently truncating (ifs_split_limited returns Err).
     #[tokio::test]
     async fn array_assignment_word_split_respects_array_entry_limit() {
         let mem = MemoryLimits::new().max_array_entries(100);
@@ -4792,12 +4794,12 @@ done
 arr=($parts)
 echo ${#arr[@]}
 "#;
-        let result = bash.exec(script).await.unwrap();
-        assert_eq!(result.exit_code, 0);
-        let count: usize = result.stdout.trim().parse().unwrap_or(0);
-        assert_eq!(
-            count, 100,
-            "Word-split array assignment should stop at max_array_entries"
+        // Word-splitting that exceeds max_array_entries now propagates as an error
+        // rather than silently truncating, so exec returns Err.
+        let result = bash.exec(script).await;
+        assert!(
+            result.is_err() || result.as_ref().map(|r| r.exit_code).unwrap_or(0) != 0,
+            "Word-split array assignment exceeding max_array_entries should fail"
         );
     }
 

From ebc7c4d4a441dfae00dab9bc7bba138c8ccbf02e Mon Sep 17 00:00:00 2001
From: Mykhailo Chalyi <mike@chaliy.name>
Date: Fri, 12 Jun 2026 10:12:19 +0000
Subject: [PATCH 2/2] fix(interpreter): address Copilot review on IFS
 word-split caps

- ifs_split_limited: clamp effective limit to max_word_split_fields so
  callers passing larger values (e.g. remaining array capacity) cannot
  bypass the configured cap
- empty-IFS branch: use stripped field length for byte accounting instead
  of raw s.len() which includes quote markers and overstates size
- threat-model test: assert result.is_err() with limit-message check
  instead of loose is_err()||exit_code!=0 that masked unrelated failures
---
 crates/bashkit/src/interpreter/mod.rs             |  6 +++++-
 .../tests/integration/threat_model_tests.rs       | 15 +++++++++++----
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/crates/bashkit/src/interpreter/mod.rs b/crates/bashkit/src/interpreter/mod.rs
index 20e332f1..672333fd 100644
--- a/crates/bashkit/src/interpreter/mod.rs
+++ b/crates/bashkit/src/interpreter/mod.rs
@@ -9409,6 +9409,9 @@ impl Interpreter {
 
     /// Split a string on IFS characters, returning an error if resource caps are exceeded.
     fn ifs_split_limited(&self, s: &str, limit: usize) -> Result<Vec<String>> {
+        // Clamp so callers passing a larger value (e.g. remaining array capacity)
+        // cannot bypass the configured max_word_split_fields cap.
+        let limit = limit.min(self.limits.max_word_split_fields);
         if limit == 0 {
             return Ok(Vec::new());
         }
@@ -9421,7 +9424,8 @@ impl Interpreter {
 
         if ifs.is_empty() {
             let field = Self::strip_quote_markers(s);
-            return self.push_ifs_field(Vec::new(), field, limit, s.len());
+            let bytes = field.len();
+            return self.push_ifs_field(Vec::new(), field, limit, bytes);
         }
 
         let is_ifs = |c: char, quoted: bool| !quoted && ifs.contains(c);
diff --git a/crates/bashkit/tests/integration/threat_model_tests.rs b/crates/bashkit/tests/integration/threat_model_tests.rs
index b3155775..9b4d622a 100644
--- a/crates/bashkit/tests/integration/threat_model_tests.rs
+++ b/crates/bashkit/tests/integration/threat_model_tests.rs
@@ -4794,12 +4794,19 @@ done
 arr=($parts)
 echo ${#arr[@]}
 "#;
-        // Word-splitting that exceeds max_array_entries now propagates as an error
-        // rather than silently truncating, so exec returns Err.
+        // Word-splitting that exceeds max_word_split_fields propagates as Err.
         let result = bash.exec(script).await;
+        let err_msg = match &result {
+            Err(e) => e.to_string(),
+            Ok(r) => r.stderr.clone(),
+        };
         assert!(
-            result.is_err() || result.as_ref().map(|r| r.exit_code).unwrap_or(0) != 0,
-            "Word-split array assignment exceeding max_array_entries should fail"
+            result.is_err(),
+            "should error on word-split limit, got: {err_msg}"
+        );
+        assert!(
+            err_msg.contains("word split") || err_msg.contains("limit"),
+            "error should mention word-split limit, got: {err_msg}"
         );
     }