From b61ae9d21585a5cc24ad38da6736057870d9f16d Mon Sep 17 00:00:00 2001 From: Mykhailo Chalyi Date: Fri, 12 Jun 2026 09:22:25 +0000 Subject: [PATCH 1/3] fix(awk): cap multi-subscript arrays --- crates/bashkit/src/builtins/awk/parser.rs | 14 ++++++++++++-- crates/bashkit/src/builtins/awk/tests.rs | 11 +++++++++++ crates/bashkit/src/builtins/limits.rs | 2 ++ 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/crates/bashkit/src/builtins/awk/parser.rs b/crates/bashkit/src/builtins/awk/parser.rs index 8d98b70f2..58a1815dc 100644 --- a/crates/bashkit/src/builtins/awk/parser.rs +++ b/crates/bashkit/src/builtins/awk/parser.rs @@ -3,7 +3,10 @@ use std::collections::HashMap; use super::{AwkAction, AwkExpr, AwkFunctionDef, AwkOutputTarget, AwkPattern, AwkProgram, AwkRule}; -use crate::builtins::limits::AWK_MAX_PARSER_DEPTH as MAX_AWK_PARSER_DEPTH; +use crate::builtins::limits::{ + AWK_MAX_MULTI_SUBSCRIPTS as MAX_AWK_MULTI_SUBSCRIPTS, + AWK_MAX_PARSER_DEPTH as MAX_AWK_PARSER_DEPTH, +}; use crate::builtins::search_common::build_regex; use crate::error::{Error, Result}; @@ -1484,8 +1487,15 @@ impl<'a> AwkParser<'a> { self.pos += 1; // consume '[' let mut subscripts = vec![self.parse_expression()?]; self.skip_whitespace(); - // Handle multi-subscript: arr[e1, e2, ...] joined by SUBSEP + // THREAT[TM-DOS-027]: SUBSEP_CONCAT still evaluates recursively, so cap + // attacker-controlled comma lists before folding them into a left-deep AST. while self.pos < self.input.len() && self.current_char().unwrap() == ',' { + if subscripts.len() >= MAX_AWK_MULTI_SUBSCRIPTS { + return Err(Error::Execution(format!( + "awk: too many array subscripts (max {})", + MAX_AWK_MULTI_SUBSCRIPTS + ))); + } self.pos += 1; // consume ',' self.skip_whitespace(); subscripts.push(self.parse_expression()?); diff --git a/crates/bashkit/src/builtins/awk/tests.rs b/crates/bashkit/src/builtins/awk/tests.rs index 08bb00e7a..b780888e0 100644 --- a/crates/bashkit/src/builtins/awk/tests.rs +++ b/crates/bashkit/src/builtins/awk/tests.rs @@ -593,6 +593,17 @@ async fn test_awk_multi_subscript() { assert_eq!(result.stdout.trim(), "1"); } +#[tokio::test] +async fn test_awk_rejects_too_many_multi_subscripts() { + // Regression: unbounded multi-subscript lists built a recursive SUBSEP_CONCAT AST. + let subscripts = std::iter::repeat_n("1", 101).collect::>().join(","); + let program = format!("BEGIN {{ a[{subscripts}] = 1 }}"); + + let err = run_awk(&[&program], Some("")).await.unwrap_err(); + + assert!(err.to_string().contains("too many array subscripts")); +} + #[tokio::test] async fn test_awk_subsep_defined() { // Issue #396.3: SUBSEP should be defined as \034 diff --git a/crates/bashkit/src/builtins/limits.rs b/crates/bashkit/src/builtins/limits.rs index 134e49e2f..f18814957 100644 --- a/crates/bashkit/src/builtins/limits.rs +++ b/crates/bashkit/src/builtins/limits.rs @@ -19,6 +19,8 @@ pub(crate) const ARCHIVE_MAX_DECOMPRESSION_RATIO: usize = 100; /// awk: max parser recursion depth. pub(crate) const AWK_MAX_PARSER_DEPTH: usize = 100; +/// awk: max comma-separated subscripts in one array key. +pub(crate) const AWK_MAX_MULTI_SUBSCRIPTS: usize = 100; /// awk: max user-function call depth at runtime. pub(crate) const AWK_MAX_CALL_DEPTH: usize = 64; /// awk: total output byte cap per invocation. From 4808eb56d3a46c980c6fc5431c0643f42d492483 Mon Sep 17 00:00:00 2001 From: Mykhailo Chalyi Date: Fri, 12 Jun 2026 10:04:22 +0000 Subject: [PATCH 2/3] test(awk): use AWK_MAX_MULTI_SUBSCRIPTS constant in subscript cap test Avoids hardcoded 101 drifting from the limit. --- crates/bashkit/src/builtins/awk/tests.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/bashkit/src/builtins/awk/tests.rs b/crates/bashkit/src/builtins/awk/tests.rs index b780888e0..aeb1d66f8 100644 --- a/crates/bashkit/src/builtins/awk/tests.rs +++ b/crates/bashkit/src/builtins/awk/tests.rs @@ -7,6 +7,7 @@ use crate::builtins::limits::{ AWK_MAX_GETLINE_CACHE_BYTES as MAX_GETLINE_CACHE_BYTES, AWK_MAX_GETLINE_CACHED_FILES as MAX_GETLINE_CACHED_FILES, AWK_MAX_GETLINE_FILE_BYTES as MAX_GETLINE_FILE_BYTES, + AWK_MAX_MULTI_SUBSCRIPTS, AWK_MAX_OUTPUT_BYTES as MAX_OUTPUT_BYTES, AWK_MAX_OUTPUT_TARGETS as MAX_OUTPUT_TARGETS, }; @@ -596,7 +597,7 @@ async fn test_awk_multi_subscript() { #[tokio::test] async fn test_awk_rejects_too_many_multi_subscripts() { // Regression: unbounded multi-subscript lists built a recursive SUBSEP_CONCAT AST. - let subscripts = std::iter::repeat_n("1", 101).collect::>().join(","); + let subscripts = std::iter::repeat_n("1", AWK_MAX_MULTI_SUBSCRIPTS + 1).collect::>().join(","); let program = format!("BEGIN {{ a[{subscripts}] = 1 }}"); let err = run_awk(&[&program], Some("")).await.unwrap_err(); From 2776e759f5211d6f4a639f1c9e7dc473ca976d50 Mon Sep 17 00:00:00 2001 From: Mykhailo Chalyi Date: Fri, 12 Jun 2026 10:07:49 +0000 Subject: [PATCH 3/3] style(awk): format multi-subscript test to satisfy cargo fmt --- crates/bashkit/src/builtins/awk/tests.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crates/bashkit/src/builtins/awk/tests.rs b/crates/bashkit/src/builtins/awk/tests.rs index aeb1d66f8..bddcefec7 100644 --- a/crates/bashkit/src/builtins/awk/tests.rs +++ b/crates/bashkit/src/builtins/awk/tests.rs @@ -597,7 +597,9 @@ async fn test_awk_multi_subscript() { #[tokio::test] async fn test_awk_rejects_too_many_multi_subscripts() { // Regression: unbounded multi-subscript lists built a recursive SUBSEP_CONCAT AST. - let subscripts = std::iter::repeat_n("1", AWK_MAX_MULTI_SUBSCRIPTS + 1).collect::>().join(","); + let subscripts = std::iter::repeat_n("1", AWK_MAX_MULTI_SUBSCRIPTS + 1) + .collect::>() + .join(","); let program = format!("BEGIN {{ a[{subscripts}] = 1 }}"); let err = run_awk(&[&program], Some("")).await.unwrap_err();