From 9cd76fbda8d163edbccbd7bd93601fbbfd089ea0 Mon Sep 17 00:00:00 2001 From: Mykhailo Chalyi Date: Thu, 11 Jun 2026 20:28:27 -0500 Subject: [PATCH] fix(awk): cap multi-subscript arrays --- crates/bashkit/src/builtins/awk/parser.rs | 14 ++++++++++++-- crates/bashkit/src/builtins/awk/tests.rs | 11 +++++++++++ crates/bashkit/src/builtins/limits.rs | 2 ++ 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/crates/bashkit/src/builtins/awk/parser.rs b/crates/bashkit/src/builtins/awk/parser.rs index 8d98b70f2..58a1815dc 100644 --- a/crates/bashkit/src/builtins/awk/parser.rs +++ b/crates/bashkit/src/builtins/awk/parser.rs @@ -3,7 +3,10 @@ use std::collections::HashMap; use super::{AwkAction, AwkExpr, AwkFunctionDef, AwkOutputTarget, AwkPattern, AwkProgram, AwkRule}; -use crate::builtins::limits::AWK_MAX_PARSER_DEPTH as MAX_AWK_PARSER_DEPTH; +use crate::builtins::limits::{ + AWK_MAX_MULTI_SUBSCRIPTS as MAX_AWK_MULTI_SUBSCRIPTS, + AWK_MAX_PARSER_DEPTH as MAX_AWK_PARSER_DEPTH, +}; use crate::builtins::search_common::build_regex; use crate::error::{Error, Result}; @@ -1484,8 +1487,15 @@ impl<'a> AwkParser<'a> { self.pos += 1; // consume '[' let mut subscripts = vec![self.parse_expression()?]; self.skip_whitespace(); - // Handle multi-subscript: arr[e1, e2, ...] joined by SUBSEP + // THREAT[TM-DOS-027]: SUBSEP_CONCAT still evaluates recursively, so cap + // attacker-controlled comma lists before folding them into a left-deep AST. while self.pos < self.input.len() && self.current_char().unwrap() == ',' { + if subscripts.len() >= MAX_AWK_MULTI_SUBSCRIPTS { + return Err(Error::Execution(format!( + "awk: too many array subscripts (max {})", + MAX_AWK_MULTI_SUBSCRIPTS + ))); + } self.pos += 1; // consume ',' self.skip_whitespace(); subscripts.push(self.parse_expression()?); diff --git a/crates/bashkit/src/builtins/awk/tests.rs b/crates/bashkit/src/builtins/awk/tests.rs index 76b2d91ec..e25a1c8f0 100644 --- a/crates/bashkit/src/builtins/awk/tests.rs +++ b/crates/bashkit/src/builtins/awk/tests.rs @@ -586,6 +586,17 @@ async fn test_awk_multi_subscript() { assert_eq!(result.stdout.trim(), "1"); } +#[tokio::test] +async fn test_awk_rejects_too_many_multi_subscripts() { + // Regression: unbounded multi-subscript lists built a recursive SUBSEP_CONCAT AST. + let subscripts = std::iter::repeat_n("1", 101).collect::>().join(","); + let program = format!("BEGIN {{ a[{subscripts}] = 1 }}"); + + let err = run_awk(&[&program], Some("")).await.unwrap_err(); + + assert!(err.to_string().contains("too many array subscripts")); +} + #[tokio::test] async fn test_awk_subsep_defined() { // Issue #396.3: SUBSEP should be defined as \034 diff --git a/crates/bashkit/src/builtins/limits.rs b/crates/bashkit/src/builtins/limits.rs index fab92dd71..d71c25da6 100644 --- a/crates/bashkit/src/builtins/limits.rs +++ b/crates/bashkit/src/builtins/limits.rs @@ -19,6 +19,8 @@ pub(crate) const ARCHIVE_MAX_DECOMPRESSION_RATIO: usize = 100; /// awk: max parser recursion depth. pub(crate) const AWK_MAX_PARSER_DEPTH: usize = 100; +/// awk: max comma-separated subscripts in one array key. +pub(crate) const AWK_MAX_MULTI_SUBSCRIPTS: usize = 100; /// awk: max user-function call depth at runtime. pub(crate) const AWK_MAX_CALL_DEPTH: usize = 64; /// awk: total output byte cap per invocation.