Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions contrib/pax_storage/expected/delete_bloom_stats.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
-- Regression test for issue #1749:
-- PAX DELETE crashes with SIGSEGV when bloomfilter_columns are not a
-- subset of minmax_columns. The stats refresh inside
-- DeleteWithVisibilityMap must project every column it reads.
-- Case 1: bloomfilter column (payload) is NOT in minmax_columns.
-- Pre-fix: segment crashed on DELETE.
drop table if exists pax_delete_bloom_crash;
NOTICE: table "pax_delete_bloom_crash" does not exist, skipping
create table pax_delete_bloom_crash (id int, k int, payload text)
using pax
with (minmax_columns = 'id', bloomfilter_columns = 'payload');
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into pax_delete_bloom_crash
select i, i % 10, 'payload-' || i::text
from generate_series(1, 10000) as i;
delete from pax_delete_bloom_crash where id between 1 and 100;
select count(*) from pax_delete_bloom_crash;
count
-------
9900
(1 row)

drop table pax_delete_bloom_crash;
-- Case 2: bloomfilter only, no minmax columns.
drop table if exists pax_delete_bf_only;
NOTICE: table "pax_delete_bf_only" does not exist, skipping
create table pax_delete_bf_only (id int, payload text)
using pax
with (bloomfilter_columns = 'payload');
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into pax_delete_bf_only
select i, 'payload-' || i::text from generate_series(1, 5000) as i;
delete from pax_delete_bf_only where id between 1 and 50;
select count(*) from pax_delete_bf_only;
count
-------
4950
(1 row)

drop table pax_delete_bf_only;
-- Case 3: minmax and bloomfilter columns overlap but neither is a subset.
drop table if exists pax_delete_mm_bf_mixed;
NOTICE: table "pax_delete_mm_bf_mixed" does not exist, skipping
create table pax_delete_mm_bf_mixed (id int, k int, payload text)
using pax
with (minmax_columns = 'id,payload', bloomfilter_columns = 'k,payload');
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into pax_delete_mm_bf_mixed
select i, i % 7, 'p-' || i::text from generate_series(1, 5000) as i;
delete from pax_delete_mm_bf_mixed where id between 1 and 50;
select count(*) from pax_delete_mm_bf_mixed;
count
-------
4950
(1 row)

drop table pax_delete_mm_bf_mixed;
1 change: 1 addition & 0 deletions contrib/pax_storage/pax_schedule
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ test: alter_distributed
test: toast toast_failed detoast ddl numeric types filter
test: update
test: statistics_bloom_filter
test: delete_bloom_stats

test: filter_tree filter_tree_arithmetic
test: filter_tree_root_quals
Expand Down
48 changes: 48 additions & 0 deletions contrib/pax_storage/sql/delete_bloom_stats.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
-- Regression test for issue #1749:
-- PAX DELETE crashes with SIGSEGV when bloomfilter_columns are not a
-- subset of minmax_columns. The stats refresh inside
-- DeleteWithVisibilityMap must project every column it reads.

-- Case 1: bloomfilter column (payload) is NOT in minmax_columns.
-- Pre-fix: segment crashed on DELETE.
drop table if exists pax_delete_bloom_crash;
create table pax_delete_bloom_crash (id int, k int, payload text)
using pax
with (minmax_columns = 'id', bloomfilter_columns = 'payload');

insert into pax_delete_bloom_crash
select i, i % 10, 'payload-' || i::text
from generate_series(1, 10000) as i;

delete from pax_delete_bloom_crash where id between 1 and 100;
select count(*) from pax_delete_bloom_crash;

drop table pax_delete_bloom_crash;

-- Case 2: bloomfilter only, no minmax columns.
drop table if exists pax_delete_bf_only;
create table pax_delete_bf_only (id int, payload text)
using pax
with (bloomfilter_columns = 'payload');

insert into pax_delete_bf_only
select i, 'payload-' || i::text from generate_series(1, 5000) as i;

delete from pax_delete_bf_only where id between 1 and 50;
select count(*) from pax_delete_bf_only;

drop table pax_delete_bf_only;

-- Case 3: minmax and bloomfilter columns overlap but neither is a subset.
drop table if exists pax_delete_mm_bf_mixed;
create table pax_delete_mm_bf_mixed (id int, k int, payload text)
using pax
with (minmax_columns = 'id,payload', bloomfilter_columns = 'k,payload');

insert into pax_delete_mm_bf_mixed
select i, i % 7, 'p-' || i::text from generate_series(1, 5000) as i;

delete from pax_delete_mm_bf_mixed where id between 1 and 50;
select count(*) from pax_delete_mm_bf_mixed;

drop table pax_delete_mm_bf_mixed;
16 changes: 14 additions & 2 deletions contrib/pax_storage/src/cpp/storage/pax.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,11 @@

#include "storage/pax.h"

#include <algorithm>
#include <iterator>
#include <map>
#include <utility>
#include <vector>

#include "access/pax_visimap.h"
#include "access/paxc_rel_options.h"
Expand Down Expand Up @@ -595,14 +598,23 @@ void TableDeleter::DeleteWithVisibilityMap(
return;
}
std::vector<int> min_max_col_idxs;
std::vector<int> bf_col_idxs;
std::vector<int> stats_proj_col_idxs;
auto stats_updater_projection = std::make_shared<PaxFilter>();

std::unique_ptr<Bitmap8> visi_bitmap;
auto catalog_update = pax::PaxCatalogUpdater::Begin(rel_);
auto rel_path = cbdb::BuildPaxDirectoryPath(rel_->rd_node, rel_->rd_backend);

min_max_col_idxs = cbdb::GetMinMaxColumnIndexes(rel_);
stats_updater_projection->SetColumnProjection(min_max_col_idxs,
bf_col_idxs = cbdb::GetBloomFilterColumnIndexes(rel_);

// Projection must cover minmax ∪ bloomfilter columns; otherwise
// AddRow reads uninitialized slot values for bf columns (issue #1749).
std::set_union(min_max_col_idxs.begin(), min_max_col_idxs.end(),
bf_col_idxs.begin(), bf_col_idxs.end(),
std::back_inserter(stats_proj_col_idxs));
stats_updater_projection->SetColumnProjection(stats_proj_col_idxs,
rel_->rd_att->natts);
do {
auto it = iterator->Next();
Expand Down Expand Up @@ -675,7 +687,7 @@ void TableDeleter::DeleteWithVisibilityMap(
UpdateStatsInAuxTable(
catalog_update, micro_partition_metadata,
std::make_shared<Bitmap8>(visi_bitmap->Raw()), min_max_col_idxs,
cbdb::GetBloomFilterColumnIndexes(rel_), stats_updater_projection);
bf_col_idxs, stats_updater_projection);

// write pg_pax_blocks_oid
catalog_update.UpdateVisimap(block_id, visimap_file_name);
Expand Down
Loading