From 58857df6adde4d50e1e1b388dd72f6b674768af0 Mon Sep 17 00:00:00 2001 From: Peter Toth Date: Wed, 27 May 2026 12:20:57 +0200 Subject: [PATCH] GH-3596: Add RowRanges.Builder for incremental construction from selected row indices ### Rationale for this change Opening up APIs needed by a later materialization feature in Spark. External readers need to assemble a RowRanges incrementally from a stream of selected row indices (e.g. produced by a downstream filter or join) without having to know page boundaries ahead of time. ### What changes are included in this PR? Adds a Builder to RowRanges that takes a strictly-increasing sequence of selected row indices via addSelected(long) and coalesces consecutive indices into Range entries. Out-of-order or duplicate calls throw IllegalArgumentException. ### Are these changes tested? Yes. TestRowRanges covers single/multiple/coalesced ranges, the empty builder case, and the out-of-order/duplicate rejection paths. ### Are there any user-facing changes? No. Closes #3596 Co-authored-by: Matt Butrovich --- .../filter2/columnindex/RowRanges.java | 68 ++++++++++++ .../filter2/columnindex/TestRowRanges.java | 102 ++++++++++++++++++ 2 files changed, 170 insertions(+) diff --git a/parquet-column/src/main/java/org/apache/parquet/internal/filter2/columnindex/RowRanges.java b/parquet-column/src/main/java/org/apache/parquet/internal/filter2/columnindex/RowRanges.java index 0b2257a6bc..eb3b2abdd2 100644 --- a/parquet-column/src/main/java/org/apache/parquet/internal/filter2/columnindex/RowRanges.java +++ b/parquet-column/src/main/java/org/apache/parquet/internal/filter2/columnindex/RowRanges.java @@ -316,4 +316,72 @@ public List getRanges() { public String toString() { return ranges.toString(); } + + /** + * @return a new {@link Builder} for constructing a {@link RowRanges} from a sequence of + * selected row indices. + */ + public static Builder builder() { + return new Builder(); + } + + /** + * Constructs a {@link RowRanges} by appending selected row indices in strictly increasing + * order. Consecutive indices are coalesced into a single {@link Range}; gaps close the + * current run and start a new one. + * + *

Usage: + *

{@code
+   * RowRanges.Builder builder = RowRanges.builder();
+   * for (long row : selectedRowsInOrder) {
+   *   builder.addSelected(row);
+   * }
+   * RowRanges ranges = builder.build();
+   * }
+ */ + public static class Builder { + private final List ranges = new ArrayList<>(); + private long runStart = -1; // -1 = no active run + private long runEnd = -1; // valid iff runStart >= 0 + + /** + * Marks {@code blockRow} as selected. Must be called in strictly increasing order; calling + * with a value less than or equal to the previous call's value throws + * {@link IllegalArgumentException}. + * + * @param blockRow the row index to mark selected (must be {@code >} the last value passed) + * @return this builder for chaining + */ + public Builder addSelected(long blockRow) { + if (runStart < 0) { + runStart = blockRow; + runEnd = blockRow; + } else if (blockRow == runEnd + 1) { + runEnd = blockRow; + } else if (blockRow > runEnd + 1) { + ranges.add(new Range(runStart, runEnd)); + runStart = blockRow; + runEnd = blockRow; + } else { + throw new IllegalArgumentException( + "addSelected requires strictly increasing row indices; got " + blockRow + " after " + runEnd); + } + return this; + } + + /** + * @return the constructed {@link RowRanges}, or {@link RowRanges#EMPTY} when no rows were + * selected. + */ + public RowRanges build() { + if (runStart >= 0) { + ranges.add(new Range(runStart, runEnd)); + runStart = -1; + } + if (ranges.isEmpty()) { + return RowRanges.EMPTY; + } + return new RowRanges(ranges); + } + } } diff --git a/parquet-column/src/test/java/org/apache/parquet/internal/filter2/columnindex/TestRowRanges.java b/parquet-column/src/test/java/org/apache/parquet/internal/filter2/columnindex/TestRowRanges.java index 9c6b9f737c..34cd0f8203 100644 --- a/parquet-column/src/test/java/org/apache/parquet/internal/filter2/columnindex/TestRowRanges.java +++ b/parquet-column/src/test/java/org/apache/parquet/internal/filter2/columnindex/TestRowRanges.java @@ -152,4 +152,106 @@ public void testIntersection() { assertAllRowsEqual(intersection(empty, ranges2).iterator()); assertAllRowsEqual(intersection(empty, empty).iterator()); } + + @Test + public void testBuilderBasic() { + // Select rows 2, 3, 4, 5 (one contiguous run) + RowRanges ranges = RowRanges.builder() + .addSelected(2) + .addSelected(3) + .addSelected(4) + .addSelected(5) + .build(); + assertAllRowsEqual(ranges.iterator(), 2, 3, 4, 5); + assertEquals(4, ranges.rowCount()); + } + + @Test + public void testBuilderMultipleRanges() { + // Two runs: 1-2 and 5-7 + RowRanges ranges = RowRanges.builder() + .addSelected(1) + .addSelected(2) + .addSelected(5) + .addSelected(6) + .addSelected(7) + .build(); + assertAllRowsEqual(ranges.iterator(), 1, 2, 5, 6, 7); + assertEquals(5, ranges.rowCount()); + assertTrue(ranges.isOverlapping(1, 2)); + assertTrue(ranges.isOverlapping(5, 7)); + assertFalse(ranges.isOverlapping(3, 4)); + } + + @Test + public void testBuilderEmpty() { + // No rows selected + RowRanges ranges = RowRanges.builder().build(); + assertEquals(RowRanges.EMPTY, ranges); + assertEquals(0, ranges.rowCount()); + assertAllRowsEqual(ranges.iterator()); + } + + @Test + public void testBuilderAllSelected() { + // Five contiguous rows starting at 0 + RowRanges.Builder builder = RowRanges.builder(); + for (long i = 0; i < 5; i++) { + builder.addSelected(i); + } + RowRanges ranges = builder.build(); + assertAllRowsEqual(ranges.iterator(), 0, 1, 2, 3, 4); + assertEquals(5, ranges.rowCount()); + } + + @Test + public void testBuilderSingleRow() { + RowRanges ranges = RowRanges.builder().addSelected(3).build(); + assertAllRowsEqual(ranges.iterator(), 3); + assertEquals(1, ranges.rowCount()); + assertTrue(ranges.isOverlapping(3, 3)); + assertFalse(ranges.isOverlapping(0, 2)); + assertFalse(ranges.isOverlapping(4, 10)); + } + + @Test + public void testBuilderAlternating() { + // Every other row selected: 0, 2, 4, 6, 8 — five singleton runs. + RowRanges.Builder builder = RowRanges.builder(); + for (long i = 0; i < 10; i += 2) { + builder.addSelected(i); + } + RowRanges ranges = builder.build(); + assertAllRowsEqual(ranges.iterator(), 0, 2, 4, 6, 8); + assertEquals(5, ranges.rowCount()); + } + + @Test + public void testBuilderFirstAndLast() { + RowRanges ranges = RowRanges.builder().addSelected(0).addSelected(99).build(); + assertAllRowsEqual(ranges.iterator(), 0, 99); + assertEquals(2, ranges.rowCount()); + } + + @Test + public void testBuilderRejectsOutOfOrder() { + RowRanges.Builder builder = RowRanges.builder().addSelected(5).addSelected(7); + try { + builder.addSelected(6); + org.junit.Assert.fail("expected IllegalArgumentException for out-of-order index"); + } catch (IllegalArgumentException expected) { + // expected + } + } + + @Test + public void testBuilderRejectsDuplicate() { + RowRanges.Builder builder = RowRanges.builder().addSelected(3); + try { + builder.addSelected(3); + org.junit.Assert.fail("expected IllegalArgumentException for duplicate index"); + } catch (IllegalArgumentException expected) { + // expected + } + } }