Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 77 additions & 0 deletions benchmarks/bench.sh
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,9 @@ main() {
sort_pushdown|sort_pushdown_sorted)
data_sort_pushdown
;;
sort_pushdown_inexact)
data_sort_pushdown_inexact
;;
sort_tpch)
# same data as for tpch
data_tpch "1" "parquet"
Expand Down Expand Up @@ -522,6 +525,9 @@ main() {
sort_pushdown_sorted)
run_sort_pushdown_sorted
;;
sort_pushdown_inexact)
run_sort_pushdown_inexact
;;
sort_tpch)
run_sort_tpch "1"
;;
Expand Down Expand Up @@ -1137,6 +1143,77 @@ run_sort_pushdown_sorted() {
debug_run $CARGO_COMMAND --bin dfbench -- sort-pushdown --sorted --iterations 5 --path "${SORT_PUSHDOWN_DIR}" --queries-path "${SCRIPT_DIR}/queries/sort_pushdown" -o "${RESULTS_FILE}" ${QUERY_ARG} ${LATENCY_ARG}
}

# Generates data for sort pushdown Inexact benchmark.
#
# Unlike sort_pushdown (Exact path), this benchmark targets the Inexact path
# where row group reorder by statistics is beneficial. It produces a single
# large lineitem parquet file where row groups have NON-OVERLAPPING but
# OUT-OF-ORDER l_orderkey ranges (each RG internally sorted, RGs shuffled).
#
# This simulates append-heavy workloads where data is written in batches at
# different times, producing segments with tight value ranges but in arbitrary
# row-group order.
data_sort_pushdown_inexact() {
INEXACT_DIR="${DATA_DIR}/sort_pushdown_inexact/lineitem"
if [ -d "${INEXACT_DIR}" ] && [ "$(ls -A ${INEXACT_DIR}/*.parquet 2>/dev/null)" ]; then
echo "Sort pushdown Inexact data already exists at ${INEXACT_DIR}"
return
fi

echo "Generating sort pushdown Inexact benchmark data (single file, shuffled RGs)..."

# Re-use the sort_pushdown data as the source (generate if missing)
data_sort_pushdown

mkdir -p "${INEXACT_DIR}"
SRC_DIR="${DATA_DIR}/sort_pushdown/lineitem"

# Use datafusion-cli to:
# 1. Read the 3 source files as one table
# 2. Shuffle row order by hashing l_orderkey (produces deterministic but
# non-sorted output — each RG will cover a scattered subset of orderkeys
# with no overlap between RGs once row_group_size is small)
# 3. Write a single parquet file with small max_row_group_size so we get
# many RGs per file.
#
# Note: with pure hash-shuffle, each RG still covers the full orderkey range.
# To get tight RGs with non-overlapping ranges we bucket by hash first then
# sort within bucket.
(cd "${SCRIPT_DIR}/.." && cargo run --release -p datafusion-cli -- -c "
CREATE EXTERNAL TABLE src
STORED AS PARQUET
LOCATION '${SRC_DIR}';

-- Bucket rows: split orderkey range into 64 chunks, scramble chunk
-- order, but sort within each chunk. This produces ~64 RG-sized
-- segments where each segment has a tight orderkey range but the
-- segments appear in scrambled (non-sorted) order in the file.
-- We use (l_orderkey * 1664525 + 1013904223) % 64 as a deterministic
-- scrambler (linear congruential generator) so bucket order is
-- effectively random but reproducible.
COPY (
SELECT * FROM src
ORDER BY
(l_orderkey * 1664525 + 1013904223) % 64,
l_orderkey
)
TO '${INEXACT_DIR}/shuffled.parquet'
STORED AS PARQUET
OPTIONS ('format.max_row_group_size' '100000');
")

echo "Sort pushdown Inexact data generated at ${INEXACT_DIR}"
ls -la "${INEXACT_DIR}"
}

# Runs the sort pushdown Inexact benchmark (tests RG reorder by statistics).
run_sort_pushdown_inexact() {
INEXACT_DIR="${DATA_DIR}/sort_pushdown_inexact"
RESULTS_FILE="${RESULTS_DIR}/sort_pushdown_inexact.json"
echo "Running sort pushdown Inexact benchmark (row group reorder by statistics)..."
debug_run $CARGO_COMMAND --bin dfbench -- sort-pushdown --sorted --iterations 5 --path "${INEXACT_DIR}" --queries-path "${SCRIPT_DIR}/queries/sort_pushdown_inexact" -o "${RESULTS_FILE}" ${QUERY_ARG} ${LATENCY_ARG}
}

# Runs the sort integration benchmark
run_sort_tpch() {
SCALE_FACTOR=$1
Expand Down
8 changes: 8 additions & 0 deletions benchmarks/queries/sort_pushdown_inexact/q1.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
-- Inexact path: TopK + DESC LIMIT on ASC-declared file.
-- With RG reorder, the first RG read contains the highest max value,
-- so TopK's threshold tightens quickly and subsequent RGs get filtered
-- efficiently via dynamic filter pushdown.
SELECT l_orderkey, l_partkey, l_suppkey
FROM lineitem
ORDER BY l_orderkey DESC
LIMIT 100
7 changes: 7 additions & 0 deletions benchmarks/queries/sort_pushdown_inexact/q2.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
-- Inexact path: TopK + DESC LIMIT with larger fetch (1000).
-- Larger LIMIT means more row_replacements; RG reorder reduces the
-- total replacement count by tightening the threshold faster.
SELECT l_orderkey, l_partkey, l_suppkey
FROM lineitem
ORDER BY l_orderkey DESC
LIMIT 1000
8 changes: 8 additions & 0 deletions benchmarks/queries/sort_pushdown_inexact/q3.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
-- Inexact path: wide projection (all columns) + DESC LIMIT.
-- Shows the row-level filter benefit: with a tight threshold from the
-- first RG, subsequent RGs skip decoding non-sort columns for filtered
-- rows — bigger wins for wide tables.
SELECT *
FROM lineitem
ORDER BY l_orderkey DESC
LIMIT 100
7 changes: 7 additions & 0 deletions benchmarks/queries/sort_pushdown_inexact/q4.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
-- Inexact path: wide projection + DESC LIMIT with larger fetch.
-- Combines wide-row row-level filter benefit with larger LIMIT to
-- demonstrate cumulative gains from RG reorder.
SELECT *
FROM lineitem
ORDER BY l_orderkey DESC
LIMIT 1000
9 changes: 8 additions & 1 deletion benchmarks/src/sort_pushdown.rs
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,14 @@ impl RunOpt {
async fn benchmark_query(&self, query_id: usize) -> Result<Vec<QueryResult>> {
let sql = self.load_query(query_id)?;

let config = self.common.config()?;
let mut config = self.common.config()?;
// Enable parquet filter pushdown + late materialization. This is
// essential for the Inexact sort pushdown path: TopK's dynamic
// filter is pushed to the parquet reader, so only sort-column
// rows pass the filter's Decode non-sort columns are skipped for
// rows that don't pass the filter — this is where RG reorder's
// tight-threshold-first strategy pays off for wide-row queries.
config.options_mut().execution.parquet.pushdown_filters = true;
let rt = self.common.build_runtime()?;
let state = SessionStateBuilder::new()
.with_config(config)
Expand Down
Loading
Loading