From 5cb18b37e733a7184b448123ad7a7767edc5aecd Mon Sep 17 00:00:00 2001
From: Brent Gardner <bgardner@squarelabs.net>
Date: Wed, 3 Jun 2026 11:30:15 -0600
Subject: [PATCH 1/2] slt: GroupValuesRows::emit untracked decode-buffer
 allocation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`GroupedHashAggregateStream`'s spill path emits via
`GroupValuesRows::emit` → `RowConverter::convert_rows` → `decode_column`,
which allocates per-column buffers (`arrow_row::list::decode` for
List keys, `decode_binary`/`decode_string` for Utf8 keys) without
`MemoryReservation::try_grow`.

Surfaced by apache/datafusion#22626.
---
 .../test_files/group_by_spill_row_decode.slt  | 57 +++++++++++++++++++
 1 file changed, 57 insertions(+)
 create mode 100644 datafusion/sqllogictest/test_files/group_by_spill_row_decode.slt
diff --git a/datafusion/sqllogictest/test_files/group_by_spill_row_decode.slt b/datafusion/sqllogictest/test_files/group_by_spill_row_decode.slt
new file mode 100644
index 0000000000000..db63ad33543f5
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/group_by_spill_row_decode.slt
@@ -0,0 +1,57 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# `GroupedHashAggregateStream` spill path: `GroupValuesRows::emit` →
+# `RowConverter::convert_rows` → `decode_column` allocates per-column
+# buffers without `MemoryReservation::try_grow`.
+#
+# A `List<Utf8>` key forces the row-encoded `GroupValuesRows` impl
+# (single-column `Utf8` routes through `GroupValuesBytes` instead).
+# Pool=1M is in the goldilocks zone: above DF's tracked-alloc floor
+# (agg enters spill), below the framework's slack ceiling on the
+# decode buffer.
+#
+# Surfaced by apache/datafusion#22626. Sibling: nested_loop_join_spill.slt.
+
+statement ok
+SET datafusion.execution.target_partitions = 1
+
+statement ok
+CREATE TABLE list_keys AS
+SELECT make_array(cast(v AS varchar) || repeat('x', 200)) AS k
+FROM generate_series(1, 50000) AS t(v)
+
+statement ok
+SET datafusion.runtime.memory_limit = '1M'
+
+query I nosort
+SELECT count(*) FROM (
+  SELECT k, count(*) AS c
+  FROM list_keys
+  GROUP BY k
+)
+----
+50000
+
+statement ok
+RESET datafusion.runtime.memory_limit
+
+statement ok
+DROP TABLE list_keys
+
+statement ok
+SET datafusion.execution.target_partitions = 4

From 95b6e1a4e2b238dde5ac40f83481f7ef726aab87 Mon Sep 17 00:00:00 2001
From: Brent Gardner <bgardner@squarelabs.net>
Date: Wed, 3 Jun 2026 11:55:40 -0600
Subject: [PATCH 2/2] Surface GroupValuesRows::emit untracked decode allocation

Tighten HEADROOM_FACTOR 8.0 -> 5.0 and update the SLT key shape
(Utf8 + List<Int> schema poisoner) so the test routes through
GroupValuesRows on upstream main. The framework then catches
GroupedHashAggregateStream::emit -> GroupValuesRows::emit ->
RowConverter::convert_rows -> decode_column allocating a
MutableBuffer::with_capacity without MemoryReservation::try_grow.

Overdraft observed: ~1.3 MB. Same operator and emit path that
caused a 79-pod OOM cascade at one DataFusion-based log analytics
deployment on 2026-05-20.
---
 .../sqllogictest/src/accounting_pool.rs       |  2 +-
 .../test_files/group_by_spill_row_decode.slt  | 28 ++++++++++++-------
 2 files changed, 19 insertions(+), 11 deletions(-)

diff --git a/datafusion/sqllogictest/src/accounting_pool.rs b/datafusion/sqllogictest/src/accounting_pool.rs
index a9d2db9f12261..308044f79adb0 100644
--- a/datafusion/sqllogictest/src/accounting_pool.rs
+++ b/datafusion/sqllogictest/src/accounting_pool.rs
@@ -40,7 +40,7 @@ use std::sync::Arc;
 /// untracked allocation — by definition, since DF's pool didn't see it.
 ///
 /// 800% high, but that's what it takes to pass the SLT suite right now. Goal should be ~10%
-const HEADROOM_FACTOR: f64 = 8.0;
+const HEADROOM_FACTOR: f64 = 5.0;
 
 pub struct AccountingMemoryPool {
     inner: Arc<dyn MemoryPool>,
diff --git a/datafusion/sqllogictest/test_files/group_by_spill_row_decode.slt b/datafusion/sqllogictest/test_files/group_by_spill_row_decode.slt
index db63ad33543f5..1117f2d67c070 100644
--- a/datafusion/sqllogictest/test_files/group_by_spill_row_decode.slt
+++ b/datafusion/sqllogictest/test_files/group_by_spill_row_decode.slt
@@ -16,11 +16,18 @@
 # under the License.
 
 # `GroupedHashAggregateStream` spill path: `GroupValuesRows::emit` →
-# `RowConverter::convert_rows` → `decode_column` allocates per-column
-# buffers without `MemoryReservation::try_grow`.
+# `RowConverter::convert_rows` → `decode_column` → `decode_string` /
+# `decode_binary` allocates per-column buffers without
+# `MemoryReservation::try_grow`.
+#
+# Plain single-column `Utf8` routes to `GroupValuesBytes`, not
+# `GroupValuesRows`. Pairing the wide Utf8 key with a small `List<Int>`
+# column ("schema poisoner") makes the schema fall outside
+# `multi_group_by::supported_type`, forcing `GroupValuesRows`. The wide
+# Utf8 column carries the data; its decode hits `decode_string` →
+# `decode_binary` — the exact path the production OOM at one
+# DataFusion-based log analytics deployment took on 2026-05-20.
 #
-# A `List<Utf8>` key forces the row-encoded `GroupValuesRows` impl
-# (single-column `Utf8` routes through `GroupValuesBytes` instead).
 # Pool=1M is in the goldilocks zone: above DF's tracked-alloc floor
 # (agg enters spill), below the framework's slack ceiling on the
 # decode buffer.
@@ -31,8 +38,9 @@ statement ok
 SET datafusion.execution.target_partitions = 1
 
 statement ok
-CREATE TABLE list_keys AS
-SELECT make_array(cast(v AS varchar) || repeat('x', 200)) AS k
+CREATE TABLE utf8_keys AS
+SELECT cast(v AS varchar) || repeat('x', 200) AS k,
+       make_array(1) AS _force_rows
 FROM generate_series(1, 50000) AS t(v)
 
 statement ok
@@ -40,9 +48,9 @@ SET datafusion.runtime.memory_limit = '1M'
 
 query I nosort
 SELECT count(*) FROM (
-  SELECT k, count(*) AS c
-  FROM list_keys
-  GROUP BY k
+  SELECT k, _force_rows, count(*) AS c
+  FROM utf8_keys
+  GROUP BY k, _force_rows
 )
 ----
 50000
@@ -51,7 +59,7 @@ statement ok
 RESET datafusion.runtime.memory_limit
 
 statement ok
-DROP TABLE list_keys
+DROP TABLE utf8_keys
 
 statement ok
 SET datafusion.execution.target_partitions = 4